summaryrefslogtreecommitdiffstats
path: root/dom/base/BodyUtil.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--dom/base/BodyUtil.cpp474
1 files changed, 474 insertions, 0 deletions
diff --git a/dom/base/BodyUtil.cpp b/dom/base/BodyUtil.cpp
new file mode 100644
index 0000000000..206fa52ebf
--- /dev/null
+++ b/dom/base/BodyUtil.cpp
@@ -0,0 +1,474 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BodyUtil.h"
+
+#include "nsError.h"
+#include "nsString.h"
+#include "nsIGlobalObject.h"
+#include "mozilla/Encoding.h"
+#include "mozilla/dom/MimeType.h"
+#include "nsCRT.h"
+#include "nsCharSeparatedTokenizer.h"
+#include "nsDOMString.h"
+#include "nsNetUtil.h"
+#include "nsReadableUtils.h"
+#include "nsStreamUtils.h"
+#include "nsStringStream.h"
+#include "nsURLHelper.h"
+
+#include "js/ArrayBuffer.h" // JS::NewArrayBufferWithContents
+#include "js/JSON.h"
+#include "mozilla/ErrorResult.h"
+#include "mozilla/dom/Exceptions.h"
+#include "mozilla/dom/FetchUtil.h"
+#include "mozilla/dom/File.h"
+#include "mozilla/dom/FormData.h"
+#include "mozilla/dom/Headers.h"
+#include "mozilla/dom/Promise.h"
+
+namespace mozilla::dom {
+
+namespace {
+
+// Reads over a CRLF and positions start after it.
+static bool PushOverLine(nsACString::const_iterator& aStart,
+ const nsACString::const_iterator& aEnd) {
+ if (*aStart == nsCRT::CR && (aEnd - aStart > 1) && *(++aStart) == nsCRT::LF) {
+ ++aStart; // advance to after CRLF
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * A simple multipart/form-data parser as defined in RFC 2388 and RFC 2046.
+ * This does not respect any encoding specified per entry, using UTF-8
+ * throughout. This is as the Fetch spec states in the consume body algorithm.
+ * Borrows some things from Necko's nsMultiMixedConv, but is simpler since
+ * unlike Necko we do not have to deal with receiving incomplete chunks of data.
+ *
+ * This parser will fail the entire parse on any invalid entry, so it will
+ * never return a partially filled FormData.
+ * The content-disposition header is used to figure out the name and filename
+ * entries. The inclusion of the filename parameter decides if the entry is
+ * inserted into the FormData as a string or a File.
+ *
+ * File blobs are copies of the underlying data string since we cannot adopt
+ * char* chunks embedded within the larger body without significant effort.
+ * FIXME(nsm): Bug 1127552 - We should add telemetry to calls to formData() and
+ * friends to figure out if Fetch ends up copying big blobs to see if this is
+ * worth optimizing.
+ */
+class MOZ_STACK_CLASS FormDataParser {
+ private:
+ RefPtr<FormData> mFormData;
+ nsCString mMimeType;
+ nsCString mMixedCaseMimeType;
+ nsCString mData;
+
+ // Entry state, reset in START_PART.
+ nsCString mName;
+ nsCString mFilename;
+ nsCString mContentType;
+
+ enum {
+ START_PART,
+ PARSE_HEADER,
+ PARSE_BODY,
+ } mState;
+
+ nsIGlobalObject* mParentObject;
+
+ // Reads over a boundary and sets start to the position after the end of the
+ // boundary. Returns false if no boundary is found immediately.
+ bool PushOverBoundary(const nsACString& aBoundaryString,
+ nsACString::const_iterator& aStart,
+ nsACString::const_iterator& aEnd) {
+ // We copy the end iterator to keep the original pointing to the real end
+ // of the string.
+ nsACString::const_iterator end(aEnd);
+ const char* beginning = aStart.get();
+ if (FindInReadable(aBoundaryString, aStart, end)) {
+ // We either should find the body immediately, or after 2 chars with the
+ // 2 chars being '-', everything else is failure.
+ if ((aStart.get() - beginning) == 0) {
+ aStart.advance(aBoundaryString.Length());
+ return true;
+ }
+
+ if ((aStart.get() - beginning) == 2) {
+ if (*(--aStart) == '-' && *(--aStart) == '-') {
+ aStart.advance(aBoundaryString.Length() + 2);
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ bool ParseHeader(nsACString::const_iterator& aStart,
+ nsACString::const_iterator& aEnd, bool* aWasEmptyHeader) {
+ nsAutoCString headerName, headerValue;
+ if (!FetchUtil::ExtractHeader(aStart, aEnd, headerName, headerValue,
+ aWasEmptyHeader)) {
+ return false;
+ }
+ if (*aWasEmptyHeader) {
+ return true;
+ }
+
+ if (headerName.LowerCaseEqualsLiteral("content-disposition")) {
+ bool seenFormData = false;
+ for (const nsACString& token :
+ nsCCharSeparatedTokenizer(headerValue, ';').ToRange()) {
+ if (token.IsEmpty()) {
+ continue;
+ }
+
+ if (token.EqualsLiteral("form-data")) {
+ seenFormData = true;
+ continue;
+ }
+
+ if (seenFormData && StringBeginsWith(token, "name="_ns)) {
+ mName = StringTail(token, token.Length() - 5);
+ mName.Trim(" \"");
+ continue;
+ }
+
+ if (seenFormData && StringBeginsWith(token, "filename="_ns)) {
+ mFilename = StringTail(token, token.Length() - 9);
+ mFilename.Trim(" \"");
+ continue;
+ }
+ }
+
+ if (mName.IsVoid()) {
+ // Could not parse a valid entry name.
+ return false;
+ }
+ } else if (headerName.LowerCaseEqualsLiteral("content-type")) {
+ mContentType = headerValue;
+ }
+
+ return true;
+ }
+
+ // The end of a body is marked by a CRLF followed by the boundary. So the
+ // CRLF is part of the boundary and not the body, but any prior CRLFs are
+ // part of the body. This will position the iterator at the beginning of the
+ // boundary (after the CRLF).
+ bool ParseBody(const nsACString& aBoundaryString,
+ nsACString::const_iterator& aStart,
+ nsACString::const_iterator& aEnd) {
+ const char* beginning = aStart.get();
+
+ // Find the boundary marking the end of the body.
+ nsACString::const_iterator end(aEnd);
+ if (!FindInReadable(aBoundaryString, aStart, end)) {
+ return false;
+ }
+
+ // We found a boundary, strip the just prior CRLF, and consider
+ // everything else the body section.
+ if (aStart.get() - beginning < 2) {
+ // Only the first entry can have a boundary right at the beginning. Even
+ // an empty body will have a CRLF before the boundary. So this is
+ // a failure.
+ return false;
+ }
+
+ // Check that there is a CRLF right before the boundary.
+ aStart.advance(-2);
+
+ // Skip optional hyphens.
+ if (*aStart == '-' && *(aStart.get() + 1) == '-') {
+ if (aStart.get() - beginning < 2) {
+ return false;
+ }
+
+ aStart.advance(-2);
+ }
+
+ if (*aStart != nsCRT::CR || *(aStart.get() + 1) != nsCRT::LF) {
+ return false;
+ }
+
+ nsAutoCString body(beginning, aStart.get() - beginning);
+
+ // Restore iterator to after the \r\n as we promised.
+ // We do not need to handle the extra hyphens case since our boundary
+ // parser in PushOverBoundary()
+ aStart.advance(2);
+
+ if (!mFormData) {
+ mFormData = new FormData();
+ }
+
+ NS_ConvertUTF8toUTF16 name(mName);
+
+ if (mFilename.IsVoid()) {
+ ErrorResult rv;
+ mFormData->Append(name, NS_ConvertUTF8toUTF16(body), rv);
+ MOZ_ASSERT(!rv.Failed());
+ } else {
+ // Unfortunately we've to copy the data first since all our strings are
+ // going to free it. We also need fallible alloc, so we can't just use
+ // ToNewCString().
+ char* copy = static_cast<char*>(moz_xmalloc(body.Length()));
+ nsCString::const_iterator bodyIter, bodyEnd;
+ body.BeginReading(bodyIter);
+ body.EndReading(bodyEnd);
+ char* p = copy;
+ while (bodyIter != bodyEnd) {
+ *p++ = *bodyIter++;
+ }
+ p = nullptr;
+
+ RefPtr<Blob> file = File::CreateMemoryFileWithCustomLastModified(
+ mParentObject, reinterpret_cast<void*>(copy), body.Length(),
+ NS_ConvertUTF8toUTF16(mFilename), NS_ConvertUTF8toUTF16(mContentType),
+ /* aLastModifiedDate */ 0);
+ if (NS_WARN_IF(!file)) {
+ return false;
+ }
+
+ Optional<nsAString> dummy;
+ ErrorResult rv;
+ mFormData->Append(name, *file, dummy, rv);
+ if (NS_WARN_IF(rv.Failed())) {
+ rv.SuppressException();
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ public:
+ FormDataParser(const nsACString& aMimeType,
+ const nsACString& aMixedCaseMimeType, const nsACString& aData,
+ nsIGlobalObject* aParent)
+ : mMimeType(aMimeType),
+ mMixedCaseMimeType(aMixedCaseMimeType),
+ mData(aData),
+ mState(START_PART),
+ mParentObject(aParent) {}
+
+ bool Parse() {
+ if (mData.IsEmpty()) {
+ return false;
+ }
+
+ // Determine boundary from mimetype.
+ UniquePtr<CMimeType> parsed = CMimeType::Parse(mMixedCaseMimeType);
+ if (!parsed) {
+ return false;
+ }
+
+ nsAutoCString boundaryString;
+ if (!parsed->GetParameterValue("boundary"_ns, boundaryString)) {
+ return false;
+ }
+
+ nsACString::const_iterator start, end;
+ mData.BeginReading(start);
+ // This should ALWAYS point to the end of data.
+ // Helpers make copies.
+ mData.EndReading(end);
+
+ while (start != end) {
+ switch (mState) {
+ case START_PART:
+ mName.SetIsVoid(true);
+ mFilename.SetIsVoid(true);
+ mContentType = "text/plain"_ns;
+
+ // MUST start with boundary.
+ if (!PushOverBoundary(boundaryString, start, end)) {
+ return false;
+ }
+
+ if (start != end && *start == '-') {
+ // End of data.
+ if (!mFormData) {
+ mFormData = new FormData();
+ }
+ return true;
+ }
+
+ if (!PushOverLine(start, end)) {
+ return false;
+ }
+ mState = PARSE_HEADER;
+ break;
+
+ case PARSE_HEADER:
+ bool emptyHeader;
+ if (!ParseHeader(start, end, &emptyHeader)) {
+ return false;
+ }
+
+ if (emptyHeader && !PushOverLine(start, end)) {
+ return false;
+ }
+
+ mState = emptyHeader ? PARSE_BODY : PARSE_HEADER;
+ break;
+
+ case PARSE_BODY:
+ if (mName.IsVoid()) {
+ NS_WARNING(
+ "No content-disposition header with a valid name was "
+ "found. Failing at body parse.");
+ return false;
+ }
+
+ if (!ParseBody(boundaryString, start, end)) {
+ return false;
+ }
+
+ mState = START_PART;
+ break;
+
+ default:
+ MOZ_CRASH("Invalid case");
+ }
+ }
+
+ MOZ_ASSERT_UNREACHABLE("Should never reach here.");
+ return false;
+ }
+
+ already_AddRefed<FormData> GetFormData() { return mFormData.forget(); }
+};
+} // namespace
+
+// static
+void BodyUtil::ConsumeArrayBuffer(JSContext* aCx,
+ JS::MutableHandle<JSObject*> aValue,
+ uint32_t aInputLength, uint8_t* aInput,
+ ErrorResult& aRv) {
+ JS::Rooted<JSObject*> arrayBuffer(aCx);
+ arrayBuffer = JS::NewArrayBufferWithContents(aCx, aInputLength,
+ reinterpret_cast<void*>(aInput));
+ if (!arrayBuffer) {
+ JS_ClearPendingException(aCx);
+ aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
+ return;
+ }
+ aValue.set(arrayBuffer);
+}
+
+// static
+already_AddRefed<Blob> BodyUtil::ConsumeBlob(nsIGlobalObject* aParent,
+ const nsString& aMimeType,
+ uint32_t aInputLength,
+ uint8_t* aInput,
+ ErrorResult& aRv) {
+ RefPtr<Blob> blob = Blob::CreateMemoryBlob(
+ aParent, reinterpret_cast<void*>(aInput), aInputLength, aMimeType);
+
+ if (!blob) {
+ aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
+ return nullptr;
+ }
+ return blob.forget();
+}
+
+// static
+already_AddRefed<FormData> BodyUtil::ConsumeFormData(
+ nsIGlobalObject* aParent, const nsCString& aMimeType,
+ const nsACString& aMixedCaseMimeType, const nsCString& aStr,
+ ErrorResult& aRv) {
+ constexpr auto formDataMimeType = "multipart/form-data"_ns;
+
+ // Allow semicolon separated boundary/encoding suffix like
+ // multipart/form-data; boundary= but disallow multipart/form-datafoobar.
+ bool isValidFormDataMimeType = StringBeginsWith(aMimeType, formDataMimeType);
+
+ if (isValidFormDataMimeType &&
+ aMimeType.Length() > formDataMimeType.Length()) {
+ isValidFormDataMimeType = aMimeType[formDataMimeType.Length()] == ';';
+ }
+
+ if (isValidFormDataMimeType) {
+ FormDataParser parser(aMimeType, aMixedCaseMimeType, aStr, aParent);
+ if (!parser.Parse()) {
+ aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
+ return nullptr;
+ }
+
+ RefPtr<FormData> fd = parser.GetFormData();
+ MOZ_ASSERT(fd);
+ return fd.forget();
+ }
+
+ constexpr auto urlDataMimeType = "application/x-www-form-urlencoded"_ns;
+ bool isValidUrlEncodedMimeType = StringBeginsWith(aMimeType, urlDataMimeType);
+
+ if (isValidUrlEncodedMimeType &&
+ aMimeType.Length() > urlDataMimeType.Length()) {
+ isValidUrlEncodedMimeType = aMimeType[urlDataMimeType.Length()] == ';';
+ }
+
+ if (isValidUrlEncodedMimeType) {
+ RefPtr<FormData> fd = new FormData(aParent);
+ DebugOnly<bool> status = URLParams::Parse(
+ aStr, [&fd](const nsAString& aName, const nsAString& aValue) {
+ ErrorResult rv;
+ fd->Append(aName, aValue, rv);
+ MOZ_ASSERT(!rv.Failed());
+ return true;
+ });
+ MOZ_ASSERT(status);
+
+ return fd.forget();
+ }
+
+ aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
+ return nullptr;
+}
+
+// static
+nsresult BodyUtil::ConsumeText(uint32_t aInputLength, uint8_t* aInput,
+ nsString& aText) {
+ nsresult rv =
+ UTF_8_ENCODING->DecodeWithBOMRemoval(Span(aInput, aInputLength), aText);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ return NS_OK;
+}
+
+// static
+void BodyUtil::ConsumeJson(JSContext* aCx, JS::MutableHandle<JS::Value> aValue,
+ const nsString& aStr, ErrorResult& aRv) {
+ aRv.MightThrowJSException();
+
+ JS::Rooted<JS::Value> json(aCx);
+ if (!JS_ParseJSON(aCx, aStr.get(), aStr.Length(), &json)) {
+ if (!JS_IsExceptionPending(aCx)) {
+ aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
+ return;
+ }
+
+ JS::Rooted<JS::Value> exn(aCx);
+ DebugOnly<bool> gotException = JS_GetPendingException(aCx, &exn);
+ MOZ_ASSERT(gotException);
+
+ JS_ClearPendingException(aCx);
+ aRv.ThrowJSException(aCx, exn);
+ return;
+ }
+
+ aValue.set(json);
+}
+
+} // namespace mozilla::dom