Adding upstream version 124.0.1.upstream/124.0.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
commit: 26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree: f435a8308119effd964b339f76abb83a57c29483 /intl/uconv/nsConverterInputStream.cpp
parent: Initial commit. (diff)
download: firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
1 files changed, 256 insertions, 0 deletions
diff --git a/intl/uconv/nsConverterInputStream.cpp b/intl/uconv/nsConverterInputStream.cpp
new file mode 100644
index 0000000000..e3efdbc146
--- /dev/null
+++ b/intl/uconv/nsConverterInputStream.cpp
@@ -0,0 +1,256 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsConverterInputStream.h"
+#include "nsIInputStream.h"
+#include "nsReadLine.h"
+#include "nsStreamUtils.h"
+
+#include <algorithm>
+#include <tuple>
+
+using namespace mozilla;
+
+#define CONVERTER_BUFFER_SIZE 8192
+
+NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream,
+                  nsIUnicharInputStream, nsIUnicharLineInputStream)
+
+NS_IMETHODIMP
+nsConverterInputStream::Init(nsIInputStream* aStream, const char* aCharset,
+                             int32_t aBufferSize, char16_t aReplacementChar) {
+  nsAutoCString label;
+  if (!aCharset) {
+    label.AssignLiteral("UTF-8");
+  } else {
+    label = aCharset;
+  }
+
+  auto encoding = Encoding::ForLabelNoReplacement(label);
+  if (!encoding) {
+    return NS_ERROR_UCONV_NOCONV;
+  }
+  // Previously, the implementation auto-switched only
+  // between the two UTF-16 variants and only when
+  // initialized with an endianness-unspecific label.
+  mConverter = encoding->NewDecoder();
+
+  size_t outputBufferSize;
+  if (aBufferSize <= 0) {
+    aBufferSize = CONVERTER_BUFFER_SIZE;
+    outputBufferSize = CONVERTER_BUFFER_SIZE;
+  } else {
+    // NetUtil.jsm assumes that if buffer size equals
+    // the input size, the whole stream will be processed
+    // as one readString. This is not true with encoding_rs,
+    // because encoding_rs might want to see space for a
+    // surrogate pair, so let's compute a larger output
+    // buffer length.
+    CheckedInt<size_t> needed = mConverter->MaxUTF16BufferLength(aBufferSize);
+    if (!needed.isValid()) {
+      return NS_ERROR_OUT_OF_MEMORY;
+    }
+    outputBufferSize = needed.value();
+  }
+
+  // set up our buffers.
+  if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) ||
+      !mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }
+
+  mInput = aStream;
+  mErrorsAreFatal = !aReplacementChar;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsConverterInputStream::Close() {
+  nsresult rv = mInput ? mInput->Close() : NS_OK;
+  mLineBuffer = nullptr;
+  mInput = nullptr;
+  mConverter = nullptr;
+  mByteData.Clear();
+  mUnicharData.Clear();
+  return rv;
+}
+
+NS_IMETHODIMP
+nsConverterInputStream::Read(char16_t* aBuf, uint32_t aCount,
+                             uint32_t* aReadCount) {
+  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
+  uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
+  if (0 == readCount) {
+    // Fill the unichar buffer
+    readCount = Fill(&mLastErrorCode);
+    if (readCount == 0) {
+      *aReadCount = 0;
+      return mLastErrorCode;
+    }
+  }
+  if (readCount > aCount) {
+    readCount = aCount;
+  }
+  memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
+         readCount * sizeof(char16_t));
+  mUnicharDataOffset += readCount;
+  *aReadCount = readCount;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
+                                     void* aClosure, uint32_t aCount,
+                                     uint32_t* aReadCount) {
+  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
+  uint32_t codeUnitsToWrite = mUnicharDataLength - mUnicharDataOffset;
+  if (0 == codeUnitsToWrite) {
+    // Fill the unichar buffer
+    codeUnitsToWrite = Fill(&mLastErrorCode);
+    if (codeUnitsToWrite == 0) {
+      *aReadCount = 0;
+      return mLastErrorCode;
+    }
+  }
+
+  if (codeUnitsToWrite > aCount) {
+    codeUnitsToWrite = aCount;
+  }
+
+  uint32_t codeUnitsWritten;
+  uint32_t totalCodeUnitsWritten = 0;
+
+  while (codeUnitsToWrite) {
+    nsresult rv =
+        aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset,
+                totalCodeUnitsWritten, codeUnitsToWrite, &codeUnitsWritten);
+    if (NS_FAILED(rv)) {
+      // don't propagate errors to the caller
+      break;
+    }
+
+    codeUnitsToWrite -= codeUnitsWritten;
+    totalCodeUnitsWritten += codeUnitsWritten;
+    mUnicharDataOffset += codeUnitsWritten;
+  }
+
+  *aReadCount = totalCodeUnitsWritten;
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
+                                   uint32_t* aReadCount) {
+  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
+  uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
+  if (0 == readCount) {
+    // Fill the unichar buffer
+    readCount = Fill(&mLastErrorCode);
+    if (readCount == 0) {
+      *aReadCount = 0;
+      return mLastErrorCode;
+    }
+  }
+  if (readCount > aCount) {
+    readCount = aCount;
+  }
+  const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
+  aString.Assign(buf, readCount);
+  mUnicharDataOffset += readCount;
+  *aReadCount = readCount;
+  return NS_OK;
+}
+
+uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) {
+  if (!mInput) {
+    // We already closed the stream!
+    *aErrorCode = NS_BASE_STREAM_CLOSED;
+    return 0;
+  }
+
+  if (NS_FAILED(mLastErrorCode)) {
+    // We failed to completely convert last time, and error-recovery
+    // is disabled.  We will fare no better this time, so...
+    *aErrorCode = mLastErrorCode;
+    return 0;
+  }
+
+  // mUnicharData.Length() is the buffer length, not the fill status.
+  // mUnicharDataLength reflects the current fill status.
+  mUnicharDataLength = 0;
+  // Whenever we convert, mUnicharData is logically empty.
+  mUnicharDataOffset = 0;
+
+  // Continue trying to read from the source stream until we successfully decode
+  // a character or encounter an error, as returning `0` here implies that the
+  // stream is complete.
+  //
+  // If the converter has been cleared, we've fully consumed the stream, and
+  // want to report EOF.
+  while (mUnicharDataLength == 0 && mConverter) {
+    // We assume a many to one conversion and are using equal sizes for
+    // the two buffers.  However if an error happens at the very start
+    // of a byte buffer we may end up in a situation where n bytes lead
+    // to n+1 unicode chars.  Thus we need to keep track of the leftover
+    // bytes as we convert.
+
+    uint32_t nb;
+    *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
+    if (NS_FAILED(*aErrorCode)) {
+      return 0;
+    }
+
+    NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
+                 "mByteData is lying to us somewhere");
+
+    // If `NS_FillArray` failed to read any new bytes, this is the last read,
+    // and we're at the end of the stream.
+    bool last = (nb == 0);
+
+    // Now convert as much of the byte buffer to unicode as possible
+    auto src = AsBytes(Span(mByteData));
+    auto dst = Span(mUnicharData);
+
+    // Truncation from size_t to uint32_t below is OK, because the sizes
+    // are bounded by the lengths of mByteData and mUnicharData.
+    uint32_t result;
+    size_t read;
+    size_t written;
+    if (mErrorsAreFatal) {
+      std::tie(result, read, written) =
+          mConverter->DecodeToUTF16WithoutReplacement(src, dst, last);
+    } else {
+      std::tie(result, read, written, std::ignore) =
+          mConverter->DecodeToUTF16(src, dst, last);
+    }
+    mLeftOverBytes = mByteData.Length() - read;
+    mUnicharDataLength = written;
+    // Clear `mConverter` if we reached the end of the stream, as we can't
+    // call methods on it anymore. This will also signal EOF to the caller
+    // through the loop condition.
+    if (last) {
+      MOZ_ASSERT(mLeftOverBytes == 0,
+                 "Failed to read all bytes on the last pass?");
+      mConverter = nullptr;
+    }
+    // If we got a decode error, we're done.
+    if (result != kInputEmpty && result != kOutputFull) {
+      MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?");
+      *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT;
+      return 0;
+    }
+  }
+  *aErrorCode = NS_OK;
+  return mUnicharDataLength;
+}
+
+NS_IMETHODIMP
+nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) {
+  if (!mLineBuffer) {
+    mLineBuffer = MakeUnique<nsLineBuffer<char16_t>>();
+  }
+  return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult);
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
commit	26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree	f435a8308119effd964b339f76abb83a57c29483 /intl/uconv/nsConverterInputStream.cpp
parent	Initial commit. (diff)
download	firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip