1 files changed, 475 insertions, 0 deletions
diff --git a/dom/base/BodyUtil.cpp b/dom/base/BodyUtil.cpp
new file mode 100644
index 0000000000..e5d86a3c36
--- /dev/null
+++ b/dom/base/BodyUtil.cpp
@@ -0,0 +1,475 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BodyUtil.h"
+
+#include "nsError.h"
+#include "nsString.h"
+#include "nsIGlobalObject.h"
+#include "mozilla/Encoding.h"
+#include "mozilla/dom/MimeType.h"
+#include "nsCRT.h"
+#include "nsCharSeparatedTokenizer.h"
+#include "nsDOMString.h"
+#include "nsNetUtil.h"
+#include "nsReadableUtils.h"
+#include "nsStreamUtils.h"
+#include "nsStringStream.h"
+#include "nsURLHelper.h"
+
+#include "js/ArrayBuffer.h"  // JS::NewArrayBufferWithContents
+#include "js/JSON.h"
+#include "mozilla/ErrorResult.h"
+#include "mozilla/dom/Exceptions.h"
+#include "mozilla/dom/FetchUtil.h"
+#include "mozilla/dom/File.h"
+#include "mozilla/dom/FormData.h"
+#include "mozilla/dom/Headers.h"
+#include "mozilla/dom/Promise.h"
+
+namespace mozilla::dom {
+
+namespace {
+
+// Reads over a CRLF and positions start after it.
+static bool PushOverLine(nsACString::const_iterator& aStart,
+                         const nsACString::const_iterator& aEnd) {
+  if (*aStart == nsCRT::CR && (aEnd - aStart > 1) && *(++aStart) == nsCRT::LF) {
+    ++aStart;  // advance to after CRLF
+    return true;
+  }
+
+  return false;
+}
+
+/**
+ * A simple multipart/form-data parser as defined in RFC 2388 and RFC 2046.
+ * This does not respect any encoding specified per entry, using UTF-8
+ * throughout. This is as the Fetch spec states in the consume body algorithm.
+ * Borrows some things from Necko's nsMultiMixedConv, but is simpler since
+ * unlike Necko we do not have to deal with receiving incomplete chunks of data.
+ *
+ * This parser will fail the entire parse on any invalid entry, so it will
+ * never return a partially filled FormData.
+ * The content-disposition header is used to figure out the name and filename
+ * entries. The inclusion of the filename parameter decides if the entry is
+ * inserted into the FormData as a string or a File.
+ *
+ * File blobs are copies of the underlying data string since we cannot adopt
+ * char* chunks embedded within the larger body without significant effort.
+ * FIXME(nsm): Bug 1127552 - We should add telemetry to calls to formData() and
+ * friends to figure out if Fetch ends up copying big blobs to see if this is
+ * worth optimizing.
+ */
+class MOZ_STACK_CLASS FormDataParser {
+ private:
+  RefPtr<FormData> mFormData;
+  nsCString mMimeType;
+  nsCString mMixedCaseMimeType;
+  nsCString mData;
+
+  // Entry state, reset in START_PART.
+  nsCString mName;
+  nsCString mFilename;
+  nsCString mContentType;
+
+  enum {
+    START_PART,
+    PARSE_HEADER,
+    PARSE_BODY,
+  } mState;
+
+  nsIGlobalObject* mParentObject;
+
+  // Reads over a boundary and sets start to the position after the end of the
+  // boundary. Returns false if no boundary is found immediately.
+  bool PushOverBoundary(const nsACString& aBoundaryString,
+                        nsACString::const_iterator& aStart,
+                        nsACString::const_iterator& aEnd) {
+    // We copy the end iterator to keep the original pointing to the real end
+    // of the string.
+    nsACString::const_iterator end(aEnd);
+    const char* beginning = aStart.get();
+    if (FindInReadable(aBoundaryString, aStart, end)) {
+      // We either should find the body immediately, or after 2 chars with the
+      // 2 chars being '-', everything else is failure.
+      if ((aStart.get() - beginning) == 0) {
+        aStart.advance(aBoundaryString.Length());
+        return true;
+      }
+
+      if ((aStart.get() - beginning) == 2) {
+        if (*(--aStart) == '-' && *(--aStart) == '-') {
+          aStart.advance(aBoundaryString.Length() + 2);
+          return true;
+        }
+      }
+    }
+
+    return false;
+  }
+
+  bool ParseHeader(nsACString::const_iterator& aStart,
+                   nsACString::const_iterator& aEnd, bool* aWasEmptyHeader) {
+    nsAutoCString headerName, headerValue;
+    if (!FetchUtil::ExtractHeader(aStart, aEnd, headerName, headerValue,
+                                  aWasEmptyHeader)) {
+      return false;
+    }
+    if (*aWasEmptyHeader) {
+      return true;
+    }
+
+    if (headerName.LowerCaseEqualsLiteral("content-disposition")) {
+      bool seenFormData = false;
+      for (const nsACString& token :
+           nsCCharSeparatedTokenizer(headerValue, ';').ToRange()) {
+        if (token.IsEmpty()) {
+          continue;
+        }
+
+        if (token.EqualsLiteral("form-data")) {
+          seenFormData = true;
+          continue;
+        }
+
+        if (seenFormData && StringBeginsWith(token, "name="_ns)) {
+          mName = StringTail(token, token.Length() - 5);
+          mName.Trim(" \"");
+          continue;
+        }
+
+        if (seenFormData && StringBeginsWith(token, "filename="_ns)) {
+          mFilename = StringTail(token, token.Length() - 9);
+          mFilename.Trim(" \"");
+          continue;
+        }
+      }
+
+      if (mName.IsVoid()) {
+        // Could not parse a valid entry name.
+        return false;
+      }
+    } else if (headerName.LowerCaseEqualsLiteral("content-type")) {
+      mContentType = headerValue;
+    }
+
+    return true;
+  }
+
+  // The end of a body is marked by a CRLF followed by the boundary. So the
+  // CRLF is part of the boundary and not the body, but any prior CRLFs are
+  // part of the body. This will position the iterator at the beginning of the
+  // boundary (after the CRLF).
+  bool ParseBody(const nsACString& aBoundaryString,
+                 nsACString::const_iterator& aStart,
+                 nsACString::const_iterator& aEnd) {
+    const char* beginning = aStart.get();
+
+    // Find the boundary marking the end of the body.
+    nsACString::const_iterator end(aEnd);
+    if (!FindInReadable(aBoundaryString, aStart, end)) {
+      return false;
+    }
+
+    // We found a boundary, strip the just prior CRLF, and consider
+    // everything else the body section.
+    if (aStart.get() - beginning < 2) {
+      // Only the first entry can have a boundary right at the beginning. Even
+      // an empty body will have a CRLF before the boundary. So this is
+      // a failure.
+      return false;
+    }
+
+    // Check that there is a CRLF right before the boundary.
+    aStart.advance(-2);
+
+    // Skip optional hyphens.
+    if (*aStart == '-' && *(aStart.get() + 1) == '-') {
+      if (aStart.get() - beginning < 2) {
+        return false;
+      }
+
+      aStart.advance(-2);
+    }
+
+    if (*aStart != nsCRT::CR || *(aStart.get() + 1) != nsCRT::LF) {
+      return false;
+    }
+
+    nsAutoCString body(beginning, aStart.get() - beginning);
+
+    // Restore iterator to after the \r\n as we promised.
+    // We do not need to handle the extra hyphens case since our boundary
+    // parser in PushOverBoundary()
+    aStart.advance(2);
+
+    if (!mFormData) {
+      mFormData = new FormData();
+    }
+
+    NS_ConvertUTF8toUTF16 name(mName);
+
+    if (mFilename.IsVoid()) {
+      ErrorResult rv;
+      mFormData->Append(name, NS_ConvertUTF8toUTF16(body), rv);
+      MOZ_ASSERT(!rv.Failed());
+    } else {
+      // Unfortunately we've to copy the data first since all our strings are
+      // going to free it. We also need fallible alloc, so we can't just use
+      // ToNewCString().
+      char* copy = static_cast<char*>(moz_xmalloc(body.Length()));
+      nsCString::const_iterator bodyIter, bodyEnd;
+      body.BeginReading(bodyIter);
+      body.EndReading(bodyEnd);
+      char* p = copy;
+      while (bodyIter != bodyEnd) {
+        *p++ = *bodyIter++;
+      }
+      p = nullptr;
+
+      RefPtr<Blob> file = File::CreateMemoryFileWithCustomLastModified(
+          mParentObject, reinterpret_cast<void*>(copy), body.Length(),
+          NS_ConvertUTF8toUTF16(mFilename), NS_ConvertUTF8toUTF16(mContentType),
+          /* aLastModifiedDate */ 0);
+      if (NS_WARN_IF(!file)) {
+        return false;
+      }
+
+      Optional<nsAString> dummy;
+      ErrorResult rv;
+      mFormData->Append(name, *file, dummy, rv);
+      if (NS_WARN_IF(rv.Failed())) {
+        rv.SuppressException();
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+ public:
+  FormDataParser(const nsACString& aMimeType,
+                 const nsACString& aMixedCaseMimeType, const nsACString& aData,
+                 nsIGlobalObject* aParent)
+      : mMimeType(aMimeType),
+        mMixedCaseMimeType(aMixedCaseMimeType),
+        mData(aData),
+        mState(START_PART),
+        mParentObject(aParent) {}
+
+  bool Parse() {
+    if (mData.IsEmpty()) {
+      return false;
+    }
+
+    // Determine boundary from mimetype.
+    UniquePtr<CMimeType> parsed = CMimeType::Parse(mMixedCaseMimeType);
+    if (!parsed) {
+      return false;
+    }
+
+    nsAutoCString boundaryString;
+    if (!parsed->GetParameterValue("boundary"_ns, boundaryString)) {
+      return false;
+    }
+
+    nsACString::const_iterator start, end;
+    mData.BeginReading(start);
+    // This should ALWAYS point to the end of data.
+    // Helpers make copies.
+    mData.EndReading(end);
+
+    while (start != end) {
+      switch (mState) {
+        case START_PART:
+          mName.SetIsVoid(true);
+          mFilename.SetIsVoid(true);
+          mContentType = "text/plain"_ns;
+
+          // MUST start with boundary.
+          if (!PushOverBoundary(boundaryString, start, end)) {
+            return false;
+          }
+
+          if (start != end && *start == '-') {
+            // End of data.
+            if (!mFormData) {
+              mFormData = new FormData();
+            }
+            return true;
+          }
+
+          if (!PushOverLine(start, end)) {
+            return false;
+          }
+          mState = PARSE_HEADER;
+          break;
+
+        case PARSE_HEADER:
+          bool emptyHeader;
+          if (!ParseHeader(start, end, &emptyHeader)) {
+            return false;
+          }
+
+          if (emptyHeader && !PushOverLine(start, end)) {
+            return false;
+          }
+
+          mState = emptyHeader ? PARSE_BODY : PARSE_HEADER;
+          break;
+
+        case PARSE_BODY:
+          if (mName.IsVoid()) {
+            NS_WARNING(
+                "No content-disposition header with a valid name was "
+                "found. Failing at body parse.");
+            return false;
+          }
+
+          if (!ParseBody(boundaryString, start, end)) {
+            return false;
+          }
+
+          mState = START_PART;
+          break;
+
+        default:
+          MOZ_CRASH("Invalid case");
+      }
+    }
+
+    MOZ_ASSERT_UNREACHABLE("Should never reach here.");
+    return false;
+  }
+
+  already_AddRefed<FormData> GetFormData() { return mFormData.forget(); }
+};
+}  // namespace
+
+// static
+void BodyUtil::ConsumeArrayBuffer(JSContext* aCx,
+                                  JS::MutableHandle<JSObject*> aValue,
+                                  uint32_t aInputLength,
+                                  UniquePtr<uint8_t[], JS::FreePolicy> aInput,
+                                  ErrorResult& aRv) {
+  JS::Rooted<JSObject*> arrayBuffer(aCx);
+  arrayBuffer =
+      JS::NewArrayBufferWithContents(aCx, aInputLength, std::move(aInput));
+  if (!arrayBuffer) {
+    JS_ClearPendingException(aCx);
+    aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
+    return;
+  }
+  aValue.set(arrayBuffer);
+}
+
+// static
+already_AddRefed<Blob> BodyUtil::ConsumeBlob(nsIGlobalObject* aParent,
+                                             const nsString& aMimeType,
+                                             uint32_t aInputLength,
+                                             uint8_t* aInput,
+                                             ErrorResult& aRv) {
+  RefPtr<Blob> blob = Blob::CreateMemoryBlob(
+      aParent, reinterpret_cast<void*>(aInput), aInputLength, aMimeType);
+
+  if (!blob) {
+    aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
+    return nullptr;
+  }
+  return blob.forget();
+}
+
+// static
+already_AddRefed<FormData> BodyUtil::ConsumeFormData(
+    nsIGlobalObject* aParent, const nsCString& aMimeType,
+    const nsACString& aMixedCaseMimeType, const nsCString& aStr,
+    ErrorResult& aRv) {
+  constexpr auto formDataMimeType = "multipart/form-data"_ns;
+
+  // Allow semicolon separated boundary/encoding suffix like
+  // multipart/form-data; boundary= but disallow multipart/form-datafoobar.
+  bool isValidFormDataMimeType = StringBeginsWith(aMimeType, formDataMimeType);
+
+  if (isValidFormDataMimeType &&
+      aMimeType.Length() > formDataMimeType.Length()) {
+    isValidFormDataMimeType = aMimeType[formDataMimeType.Length()] == ';';
+  }
+
+  if (isValidFormDataMimeType) {
+    FormDataParser parser(aMimeType, aMixedCaseMimeType, aStr, aParent);
+    if (!parser.Parse()) {
+      aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
+      return nullptr;
+    }
+
+    RefPtr<FormData> fd = parser.GetFormData();
+    MOZ_ASSERT(fd);
+    return fd.forget();
+  }
+
+  constexpr auto urlDataMimeType = "application/x-www-form-urlencoded"_ns;
+  bool isValidUrlEncodedMimeType = StringBeginsWith(aMimeType, urlDataMimeType);
+
+  if (isValidUrlEncodedMimeType &&
+      aMimeType.Length() > urlDataMimeType.Length()) {
+    isValidUrlEncodedMimeType = aMimeType[urlDataMimeType.Length()] == ';';
+  }
+
+  if (isValidUrlEncodedMimeType) {
+    RefPtr<FormData> fd = new FormData(aParent);
+    DebugOnly<bool> status = URLParams::Parse(
+        aStr, [&fd](const nsAString& aName, const nsAString& aValue) {
+          ErrorResult rv;
+          fd->Append(aName, aValue, rv);
+          MOZ_ASSERT(!rv.Failed());
+          return true;
+        });
+    MOZ_ASSERT(status);
+
+    return fd.forget();
+  }
+
+  aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
+  return nullptr;
+}
+
+// static
+nsresult BodyUtil::ConsumeText(uint32_t aInputLength, uint8_t* aInput,
+                               nsString& aText) {
+  nsresult rv =
+      UTF_8_ENCODING->DecodeWithBOMRemoval(Span(aInput, aInputLength), aText);
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
+  return NS_OK;
+}
+
+// static
+void BodyUtil::ConsumeJson(JSContext* aCx, JS::MutableHandle<JS::Value> aValue,
+                           const nsString& aStr, ErrorResult& aRv) {
+  aRv.MightThrowJSException();
+
+  JS::Rooted<JS::Value> json(aCx);
+  if (!JS_ParseJSON(aCx, aStr.get(), aStr.Length(), &json)) {
+    if (!JS_IsExceptionPending(aCx)) {
+      aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
+      return;
+    }
+
+    JS::Rooted<JS::Value> exn(aCx);
+    DebugOnly<bool> gotException = JS_GetPendingException(aCx, &exn);
+    MOZ_ASSERT(gotException);
+
+    JS_ClearPendingException(aCx);
+    aRv.ThrowJSException(aCx, exn);
+    return;
+  }
+
+  aValue.set(json);
+}
+
+}  // namespace mozilla::dom