diff options
Diffstat (limited to 'dom/base/BodyUtil.cpp')
-rw-r--r-- | dom/base/BodyUtil.cpp | 475 |
1 files changed, 475 insertions, 0 deletions
diff --git a/dom/base/BodyUtil.cpp b/dom/base/BodyUtil.cpp new file mode 100644 index 0000000000..e5d86a3c36 --- /dev/null +++ b/dom/base/BodyUtil.cpp @@ -0,0 +1,475 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "BodyUtil.h" + +#include "nsError.h" +#include "nsString.h" +#include "nsIGlobalObject.h" +#include "mozilla/Encoding.h" +#include "mozilla/dom/MimeType.h" +#include "nsCRT.h" +#include "nsCharSeparatedTokenizer.h" +#include "nsDOMString.h" +#include "nsNetUtil.h" +#include "nsReadableUtils.h" +#include "nsStreamUtils.h" +#include "nsStringStream.h" +#include "nsURLHelper.h" + +#include "js/ArrayBuffer.h" // JS::NewArrayBufferWithContents +#include "js/JSON.h" +#include "mozilla/ErrorResult.h" +#include "mozilla/dom/Exceptions.h" +#include "mozilla/dom/FetchUtil.h" +#include "mozilla/dom/File.h" +#include "mozilla/dom/FormData.h" +#include "mozilla/dom/Headers.h" +#include "mozilla/dom/Promise.h" + +namespace mozilla::dom { + +namespace { + +// Reads over a CRLF and positions start after it. +static bool PushOverLine(nsACString::const_iterator& aStart, + const nsACString::const_iterator& aEnd) { + if (*aStart == nsCRT::CR && (aEnd - aStart > 1) && *(++aStart) == nsCRT::LF) { + ++aStart; // advance to after CRLF + return true; + } + + return false; +} + +/** + * A simple multipart/form-data parser as defined in RFC 2388 and RFC 2046. + * This does not respect any encoding specified per entry, using UTF-8 + * throughout. This is as the Fetch spec states in the consume body algorithm. + * Borrows some things from Necko's nsMultiMixedConv, but is simpler since + * unlike Necko we do not have to deal with receiving incomplete chunks of data. + * + * This parser will fail the entire parse on any invalid entry, so it will + * never return a partially filled FormData. + * The content-disposition header is used to figure out the name and filename + * entries. The inclusion of the filename parameter decides if the entry is + * inserted into the FormData as a string or a File. + * + * File blobs are copies of the underlying data string since we cannot adopt + * char* chunks embedded within the larger body without significant effort. + * FIXME(nsm): Bug 1127552 - We should add telemetry to calls to formData() and + * friends to figure out if Fetch ends up copying big blobs to see if this is + * worth optimizing. + */ +class MOZ_STACK_CLASS FormDataParser { + private: + RefPtr<FormData> mFormData; + nsCString mMimeType; + nsCString mMixedCaseMimeType; + nsCString mData; + + // Entry state, reset in START_PART. + nsCString mName; + nsCString mFilename; + nsCString mContentType; + + enum { + START_PART, + PARSE_HEADER, + PARSE_BODY, + } mState; + + nsIGlobalObject* mParentObject; + + // Reads over a boundary and sets start to the position after the end of the + // boundary. Returns false if no boundary is found immediately. + bool PushOverBoundary(const nsACString& aBoundaryString, + nsACString::const_iterator& aStart, + nsACString::const_iterator& aEnd) { + // We copy the end iterator to keep the original pointing to the real end + // of the string. + nsACString::const_iterator end(aEnd); + const char* beginning = aStart.get(); + if (FindInReadable(aBoundaryString, aStart, end)) { + // We either should find the body immediately, or after 2 chars with the + // 2 chars being '-', everything else is failure. + if ((aStart.get() - beginning) == 0) { + aStart.advance(aBoundaryString.Length()); + return true; + } + + if ((aStart.get() - beginning) == 2) { + if (*(--aStart) == '-' && *(--aStart) == '-') { + aStart.advance(aBoundaryString.Length() + 2); + return true; + } + } + } + + return false; + } + + bool ParseHeader(nsACString::const_iterator& aStart, + nsACString::const_iterator& aEnd, bool* aWasEmptyHeader) { + nsAutoCString headerName, headerValue; + if (!FetchUtil::ExtractHeader(aStart, aEnd, headerName, headerValue, + aWasEmptyHeader)) { + return false; + } + if (*aWasEmptyHeader) { + return true; + } + + if (headerName.LowerCaseEqualsLiteral("content-disposition")) { + bool seenFormData = false; + for (const nsACString& token : + nsCCharSeparatedTokenizer(headerValue, ';').ToRange()) { + if (token.IsEmpty()) { + continue; + } + + if (token.EqualsLiteral("form-data")) { + seenFormData = true; + continue; + } + + if (seenFormData && StringBeginsWith(token, "name="_ns)) { + mName = StringTail(token, token.Length() - 5); + mName.Trim(" \""); + continue; + } + + if (seenFormData && StringBeginsWith(token, "filename="_ns)) { + mFilename = StringTail(token, token.Length() - 9); + mFilename.Trim(" \""); + continue; + } + } + + if (mName.IsVoid()) { + // Could not parse a valid entry name. + return false; + } + } else if (headerName.LowerCaseEqualsLiteral("content-type")) { + mContentType = headerValue; + } + + return true; + } + + // The end of a body is marked by a CRLF followed by the boundary. So the + // CRLF is part of the boundary and not the body, but any prior CRLFs are + // part of the body. This will position the iterator at the beginning of the + // boundary (after the CRLF). + bool ParseBody(const nsACString& aBoundaryString, + nsACString::const_iterator& aStart, + nsACString::const_iterator& aEnd) { + const char* beginning = aStart.get(); + + // Find the boundary marking the end of the body. + nsACString::const_iterator end(aEnd); + if (!FindInReadable(aBoundaryString, aStart, end)) { + return false; + } + + // We found a boundary, strip the just prior CRLF, and consider + // everything else the body section. + if (aStart.get() - beginning < 2) { + // Only the first entry can have a boundary right at the beginning. Even + // an empty body will have a CRLF before the boundary. So this is + // a failure. + return false; + } + + // Check that there is a CRLF right before the boundary. + aStart.advance(-2); + + // Skip optional hyphens. + if (*aStart == '-' && *(aStart.get() + 1) == '-') { + if (aStart.get() - beginning < 2) { + return false; + } + + aStart.advance(-2); + } + + if (*aStart != nsCRT::CR || *(aStart.get() + 1) != nsCRT::LF) { + return false; + } + + nsAutoCString body(beginning, aStart.get() - beginning); + + // Restore iterator to after the \r\n as we promised. + // We do not need to handle the extra hyphens case since our boundary + // parser in PushOverBoundary() + aStart.advance(2); + + if (!mFormData) { + mFormData = new FormData(); + } + + NS_ConvertUTF8toUTF16 name(mName); + + if (mFilename.IsVoid()) { + ErrorResult rv; + mFormData->Append(name, NS_ConvertUTF8toUTF16(body), rv); + MOZ_ASSERT(!rv.Failed()); + } else { + // Unfortunately we've to copy the data first since all our strings are + // going to free it. We also need fallible alloc, so we can't just use + // ToNewCString(). + char* copy = static_cast<char*>(moz_xmalloc(body.Length())); + nsCString::const_iterator bodyIter, bodyEnd; + body.BeginReading(bodyIter); + body.EndReading(bodyEnd); + char* p = copy; + while (bodyIter != bodyEnd) { + *p++ = *bodyIter++; + } + p = nullptr; + + RefPtr<Blob> file = File::CreateMemoryFileWithCustomLastModified( + mParentObject, reinterpret_cast<void*>(copy), body.Length(), + NS_ConvertUTF8toUTF16(mFilename), NS_ConvertUTF8toUTF16(mContentType), + /* aLastModifiedDate */ 0); + if (NS_WARN_IF(!file)) { + return false; + } + + Optional<nsAString> dummy; + ErrorResult rv; + mFormData->Append(name, *file, dummy, rv); + if (NS_WARN_IF(rv.Failed())) { + rv.SuppressException(); + return false; + } + } + + return true; + } + + public: + FormDataParser(const nsACString& aMimeType, + const nsACString& aMixedCaseMimeType, const nsACString& aData, + nsIGlobalObject* aParent) + : mMimeType(aMimeType), + mMixedCaseMimeType(aMixedCaseMimeType), + mData(aData), + mState(START_PART), + mParentObject(aParent) {} + + bool Parse() { + if (mData.IsEmpty()) { + return false; + } + + // Determine boundary from mimetype. + UniquePtr<CMimeType> parsed = CMimeType::Parse(mMixedCaseMimeType); + if (!parsed) { + return false; + } + + nsAutoCString boundaryString; + if (!parsed->GetParameterValue("boundary"_ns, boundaryString)) { + return false; + } + + nsACString::const_iterator start, end; + mData.BeginReading(start); + // This should ALWAYS point to the end of data. + // Helpers make copies. + mData.EndReading(end); + + while (start != end) { + switch (mState) { + case START_PART: + mName.SetIsVoid(true); + mFilename.SetIsVoid(true); + mContentType = "text/plain"_ns; + + // MUST start with boundary. + if (!PushOverBoundary(boundaryString, start, end)) { + return false; + } + + if (start != end && *start == '-') { + // End of data. + if (!mFormData) { + mFormData = new FormData(); + } + return true; + } + + if (!PushOverLine(start, end)) { + return false; + } + mState = PARSE_HEADER; + break; + + case PARSE_HEADER: + bool emptyHeader; + if (!ParseHeader(start, end, &emptyHeader)) { + return false; + } + + if (emptyHeader && !PushOverLine(start, end)) { + return false; + } + + mState = emptyHeader ? PARSE_BODY : PARSE_HEADER; + break; + + case PARSE_BODY: + if (mName.IsVoid()) { + NS_WARNING( + "No content-disposition header with a valid name was " + "found. Failing at body parse."); + return false; + } + + if (!ParseBody(boundaryString, start, end)) { + return false; + } + + mState = START_PART; + break; + + default: + MOZ_CRASH("Invalid case"); + } + } + + MOZ_ASSERT_UNREACHABLE("Should never reach here."); + return false; + } + + already_AddRefed<FormData> GetFormData() { return mFormData.forget(); } +}; +} // namespace + +// static +void BodyUtil::ConsumeArrayBuffer(JSContext* aCx, + JS::MutableHandle<JSObject*> aValue, + uint32_t aInputLength, + UniquePtr<uint8_t[], JS::FreePolicy> aInput, + ErrorResult& aRv) { + JS::Rooted<JSObject*> arrayBuffer(aCx); + arrayBuffer = + JS::NewArrayBufferWithContents(aCx, aInputLength, std::move(aInput)); + if (!arrayBuffer) { + JS_ClearPendingException(aCx); + aRv.Throw(NS_ERROR_OUT_OF_MEMORY); + return; + } + aValue.set(arrayBuffer); +} + +// static +already_AddRefed<Blob> BodyUtil::ConsumeBlob(nsIGlobalObject* aParent, + const nsString& aMimeType, + uint32_t aInputLength, + uint8_t* aInput, + ErrorResult& aRv) { + RefPtr<Blob> blob = Blob::CreateMemoryBlob( + aParent, reinterpret_cast<void*>(aInput), aInputLength, aMimeType); + + if (!blob) { + aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR); + return nullptr; + } + return blob.forget(); +} + +// static +already_AddRefed<FormData> BodyUtil::ConsumeFormData( + nsIGlobalObject* aParent, const nsCString& aMimeType, + const nsACString& aMixedCaseMimeType, const nsCString& aStr, + ErrorResult& aRv) { + constexpr auto formDataMimeType = "multipart/form-data"_ns; + + // Allow semicolon separated boundary/encoding suffix like + // multipart/form-data; boundary= but disallow multipart/form-datafoobar. + bool isValidFormDataMimeType = StringBeginsWith(aMimeType, formDataMimeType); + + if (isValidFormDataMimeType && + aMimeType.Length() > formDataMimeType.Length()) { + isValidFormDataMimeType = aMimeType[formDataMimeType.Length()] == ';'; + } + + if (isValidFormDataMimeType) { + FormDataParser parser(aMimeType, aMixedCaseMimeType, aStr, aParent); + if (!parser.Parse()) { + aRv.ThrowTypeError<MSG_BAD_FORMDATA>(); + return nullptr; + } + + RefPtr<FormData> fd = parser.GetFormData(); + MOZ_ASSERT(fd); + return fd.forget(); + } + + constexpr auto urlDataMimeType = "application/x-www-form-urlencoded"_ns; + bool isValidUrlEncodedMimeType = StringBeginsWith(aMimeType, urlDataMimeType); + + if (isValidUrlEncodedMimeType && + aMimeType.Length() > urlDataMimeType.Length()) { + isValidUrlEncodedMimeType = aMimeType[urlDataMimeType.Length()] == ';'; + } + + if (isValidUrlEncodedMimeType) { + RefPtr<FormData> fd = new FormData(aParent); + DebugOnly<bool> status = URLParams::Parse( + aStr, [&fd](const nsAString& aName, const nsAString& aValue) { + ErrorResult rv; + fd->Append(aName, aValue, rv); + MOZ_ASSERT(!rv.Failed()); + return true; + }); + MOZ_ASSERT(status); + + return fd.forget(); + } + + aRv.ThrowTypeError<MSG_BAD_FORMDATA>(); + return nullptr; +} + +// static +nsresult BodyUtil::ConsumeText(uint32_t aInputLength, uint8_t* aInput, + nsString& aText) { + nsresult rv = + UTF_8_ENCODING->DecodeWithBOMRemoval(Span(aInput, aInputLength), aText); + if (NS_FAILED(rv)) { + return rv; + } + return NS_OK; +} + +// static +void BodyUtil::ConsumeJson(JSContext* aCx, JS::MutableHandle<JS::Value> aValue, + const nsString& aStr, ErrorResult& aRv) { + aRv.MightThrowJSException(); + + JS::Rooted<JS::Value> json(aCx); + if (!JS_ParseJSON(aCx, aStr.get(), aStr.Length(), &json)) { + if (!JS_IsExceptionPending(aCx)) { + aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR); + return; + } + + JS::Rooted<JS::Value> exn(aCx); + DebugOnly<bool> gotException = JS_GetPendingException(aCx, &exn); + MOZ_ASSERT(gotException); + + JS_ClearPendingException(aCx); + aRv.ThrowJSException(aCx, exn); + return; + } + + aValue.set(json); +} + +} // namespace mozilla::dom |