summaryrefslogtreecommitdiffstats
path: root/intl/uconv/nsScriptableUConv.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--intl/uconv/nsScriptableUConv.cpp192
1 files changed, 192 insertions, 0 deletions
diff --git a/intl/uconv/nsScriptableUConv.cpp b/intl/uconv/nsScriptableUConv.cpp
new file mode 100644
index 0000000000..e56d578efd
--- /dev/null
+++ b/intl/uconv/nsScriptableUConv.cpp
@@ -0,0 +1,192 @@
+
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsString.h"
+#include "nsIScriptableUConv.h"
+#include "nsScriptableUConv.h"
+#include "nsComponentManagerUtils.h"
+
+#include <tuple>
+
+using namespace mozilla;
+
+/* Implementation file */
+NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter)
+
+nsScriptableUnicodeConverter::nsScriptableUnicodeConverter()
+ : mIsInternal(false) {}
+
+nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default;
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc,
+ nsACString& _retval) {
+ if (!mEncoder) return NS_ERROR_FAILURE;
+
+ // We can compute the length without replacement, because the
+ // the replacement is only one byte long and a mappable character
+ // would always output something, i.e. at least one byte.
+ // When encoding to ISO-2022-JP, unmappables shouldn't be able
+ // to cause more escape sequences to be emitted than the mappable
+ // worst case where every input character causes an escape into
+ // a different state.
+ CheckedInt<size_t> needed =
+ mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length());
+ if (!needed.isValid() || needed.value() > UINT32_MAX) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ auto dstChars = _retval.GetMutableData(needed.value(), fallible);
+ if (!dstChars) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ auto src = Span(aSrc);
+ auto dst = AsWritableBytes(*dstChars);
+ size_t totalWritten = 0;
+ for (;;) {
+ auto [result, read, written] =
+ mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false);
+ if (result != kInputEmpty && result != kOutputFull) {
+ MOZ_RELEASE_ASSERT(written < dst.Length(),
+ "Unmappables with one-byte replacement should not "
+ "exceed mappable worst case.");
+ dst[written++] = '?';
+ }
+ totalWritten += written;
+ if (result == kInputEmpty) {
+ MOZ_ASSERT(totalWritten <= UINT32_MAX);
+ if (!_retval.SetLength(totalWritten, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ return NS_OK;
+ }
+ src = src.From(read);
+ dst = dst.From(written);
+ }
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::Finish(nsACString& _retval) {
+ // The documentation for this method says it should be called after
+ // ConvertFromUnicode(). However, our own tests called it after
+ // convertFromByteArray(), i.e. when *decoding*.
+ // Assuming that there exists extensions that similarly call
+ // this at the wrong time, let's deal. In general, it is a design
+ // error for this class to handle conversions in both directions.
+ if (!mEncoder) {
+ _retval.Truncate();
+ mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
+ return NS_OK;
+ }
+ // If we are encoding to ISO-2022-JP, potentially
+ // transition back to the ASCII state. The buffer
+ // needs to be large enough for an additional NCR,
+ // though.
+ _retval.SetLength(13);
+ auto dst = AsWritableBytes(_retval.GetMutableData(13));
+ Span<char16_t> src(nullptr);
+ uint32_t result;
+ size_t read;
+ size_t written;
+ std::tie(result, read, written, std::ignore) =
+ mEncoder->EncodeFromUTF16(src, dst, true);
+ MOZ_ASSERT(!read);
+ MOZ_ASSERT(result == kInputEmpty);
+ _retval.SetLength(written);
+
+ mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
+ mEncoder->Encoding()->NewEncoderInto(*mEncoder);
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc,
+ nsAString& _retval) {
+ if (!mDecoder) return NS_ERROR_FAILURE;
+
+ uint32_t length = aSrc.Length();
+
+ CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length);
+ if (!needed.isValid() || needed.value() > UINT32_MAX) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ auto dst = _retval.GetMutableData(needed.value(), fallible);
+ if (!dst) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ auto src =
+ Span(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length);
+ uint32_t result;
+ size_t read;
+ size_t written;
+ // The UTF-8 decoder used to throw regardless of the error behavior.
+ // Simulating the old behavior for compatibility with legacy callers.
+ // If callers want control over the behavior, they should switch to
+ // TextDecoder.
+ if (mDecoder->Encoding() == UTF_8_ENCODING) {
+ std::tie(result, read, written) =
+ mDecoder->DecodeToUTF16WithoutReplacement(src, *dst, false);
+ if (result != kInputEmpty) {
+ return NS_ERROR_UDEC_ILLEGALINPUT;
+ }
+ } else {
+ std::tie(result, read, written, std::ignore) =
+ mDecoder->DecodeToUTF16(src, *dst, false);
+ }
+ MOZ_ASSERT(result == kInputEmpty);
+ MOZ_ASSERT(read == length);
+ MOZ_ASSERT(written <= needed.value());
+ if (!_retval.SetLength(written, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) {
+ if (!mDecoder) {
+ aCharset.Truncate();
+ } else {
+ mDecoder->Encoding()->Name(aCharset);
+ }
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) {
+ return InitConverter(aCharset);
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) {
+ *aIsInternal = mIsInternal;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) {
+ mIsInternal = aIsInternal;
+ return NS_OK;
+}
+
+nsresult nsScriptableUnicodeConverter::InitConverter(
+ const nsACString& aCharset) {
+ mEncoder = nullptr;
+ mDecoder = nullptr;
+
+ auto encoding = Encoding::ForLabelNoReplacement(aCharset);
+ if (!encoding) {
+ return NS_ERROR_UCONV_NOCONV;
+ }
+ if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) {
+ mEncoder = encoding->NewEncoder();
+ }
+ mDecoder = encoding->NewDecoderWithBOMRemoval();
+ return NS_OK;
+}