summaryrefslogtreecommitdiffstats
path: root/intl/uconv/nsScriptableUConv.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/uconv/nsScriptableUConv.cpp')
-rw-r--r--intl/uconv/nsScriptableUConv.cpp261
1 files changed, 261 insertions, 0 deletions
diff --git a/intl/uconv/nsScriptableUConv.cpp b/intl/uconv/nsScriptableUConv.cpp
new file mode 100644
index 0000000000..942da4e7dc
--- /dev/null
+++ b/intl/uconv/nsScriptableUConv.cpp
@@ -0,0 +1,261 @@
+
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsString.h"
+#include "nsIScriptableUConv.h"
+#include "nsScriptableUConv.h"
+#include "nsIStringStream.h"
+#include "nsComponentManagerUtils.h"
+
+using namespace mozilla;
+
+/* Implementation file */
+NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter)
+
+nsScriptableUnicodeConverter::nsScriptableUnicodeConverter()
+ : mIsInternal(false) {}
+
+nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default;
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc,
+ nsACString& _retval) {
+ if (!mEncoder) return NS_ERROR_FAILURE;
+
+ // We can compute the length without replacement, because the
+ // the replacement is only one byte long and a mappable character
+ // would always output something, i.e. at least one byte.
+ // When encoding to ISO-2022-JP, unmappables shouldn't be able
+ // to cause more escape sequences to be emitted than the mappable
+ // worst case where every input character causes an escape into
+ // a different state.
+ CheckedInt<size_t> needed =
+ mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length());
+ if (!needed.isValid() || needed.value() > UINT32_MAX) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ if (!_retval.SetLength(needed.value(), fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ auto src = Span(aSrc);
+ auto dst = AsWritableBytes(Span(_retval));
+ size_t totalWritten = 0;
+ for (;;) {
+ uint32_t result;
+ size_t read;
+ size_t written;
+ Tie(result, read, written) =
+ mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false);
+ if (result != kInputEmpty && result != kOutputFull) {
+ MOZ_RELEASE_ASSERT(written < dst.Length(),
+ "Unmappables with one-byte replacement should not "
+ "exceed mappable worst case.");
+ dst[written++] = '?';
+ }
+ totalWritten += written;
+ if (result == kInputEmpty) {
+ MOZ_ASSERT(totalWritten <= UINT32_MAX);
+ if (!_retval.SetLength(totalWritten, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ return NS_OK;
+ }
+ src = src.From(read);
+ dst = dst.From(written);
+ }
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::Finish(nsACString& _retval) {
+ // The documentation for this method says it should be called after
+ // ConvertFromUnicode(). However, our own tests called it after
+ // convertFromByteArray(), i.e. when *decoding*.
+ // Assuming that there exists extensions that similarly call
+ // this at the wrong time, let's deal. In general, it is a design
+ // error for this class to handle conversions in both directions.
+ if (!mEncoder) {
+ _retval.Truncate();
+ mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
+ return NS_OK;
+ }
+ // If we are encoding to ISO-2022-JP, potentially
+ // transition back to the ASCII state. The buffer
+ // needs to be large enough for an additional NCR,
+ // though.
+ _retval.SetLength(13);
+ Span<char16_t> src(nullptr);
+ uint32_t result;
+ size_t read;
+ size_t written;
+ bool hadErrors;
+ Tie(result, read, written, hadErrors) =
+ mEncoder->EncodeFromUTF16(src, _retval, true);
+ Unused << hadErrors;
+ MOZ_ASSERT(!read);
+ MOZ_ASSERT(result == kInputEmpty);
+ _retval.SetLength(written);
+
+ mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
+ mEncoder->Encoding()->NewEncoderInto(*mEncoder);
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc,
+ nsAString& _retval) {
+ if (!mDecoder) return NS_ERROR_FAILURE;
+
+ uint32_t length = aSrc.Length();
+
+ CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length);
+ if (!needed.isValid() || needed.value() > UINT32_MAX) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ if (!_retval.SetLength(needed.value(), fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ auto src =
+ Span(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length);
+ uint32_t result;
+ size_t read;
+ size_t written;
+ bool hadErrors;
+ // The UTF-8 decoder used to throw regardless of the error behavior.
+ // Simulating the old behavior for compatibility with legacy callers.
+ // If callers want control over the behavior, they should switch to
+ // TextDecoder.
+ if (mDecoder->Encoding() == UTF_8_ENCODING) {
+ Tie(result, read, written) =
+ mDecoder->DecodeToUTF16WithoutReplacement(src, _retval, false);
+ if (result != kInputEmpty) {
+ return NS_ERROR_UDEC_ILLEGALINPUT;
+ }
+ } else {
+ Tie(result, read, written, hadErrors) =
+ mDecoder->DecodeToUTF16(src, _retval, false);
+ }
+ MOZ_ASSERT(result == kInputEmpty);
+ MOZ_ASSERT(read == length);
+ MOZ_ASSERT(written <= needed.value());
+ Unused << hadErrors;
+ if (!_retval.SetLength(written, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString,
+ uint32_t* aLen,
+ uint8_t** _aData) {
+ if (!mEncoder) return NS_ERROR_FAILURE;
+
+ CheckedInt<size_t> needed =
+ mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aString.Length());
+ if (!needed.isValid() || needed.value() > UINT32_MAX) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ uint8_t* data = (uint8_t*)malloc(needed.value());
+ if (!data) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ auto src = Span(aString);
+ auto dst = Span(data, needed.value());
+ size_t totalWritten = 0;
+ for (;;) {
+ uint32_t result;
+ size_t read;
+ size_t written;
+ Tie(result, read, written) =
+ mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true);
+ if (result != kInputEmpty && result != kOutputFull) {
+ // There's always room for one byte in the case of
+ // an unmappable character, because otherwise
+ // we'd have gotten `kOutputFull`.
+ dst[written++] = '?';
+ }
+ totalWritten += written;
+ if (result == kInputEmpty) {
+ *_aData = data;
+ MOZ_ASSERT(totalWritten <= UINT32_MAX);
+ *aLen = totalWritten;
+ return NS_OK;
+ }
+ src = src.From(read);
+ dst = dst.From(written);
+ }
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString,
+ nsIInputStream** _retval) {
+ nsresult rv;
+ nsCOMPtr<nsIStringInputStream> inputStream =
+ do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv);
+ if (NS_FAILED(rv)) return rv;
+
+ uint8_t* data;
+ uint32_t dataLen;
+ rv = ConvertToByteArray(aString, &dataLen, &data);
+ if (NS_FAILED(rv)) return rv;
+
+ rv = inputStream->AdoptData(reinterpret_cast<char*>(data), dataLen);
+ if (NS_FAILED(rv)) {
+ free(data);
+ return rv;
+ }
+
+ NS_ADDREF(*_retval = inputStream);
+ return rv;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) {
+ if (!mDecoder) {
+ aCharset.Truncate();
+ } else {
+ mDecoder->Encoding()->Name(aCharset);
+ }
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) {
+ return InitConverter(aCharset);
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) {
+ *aIsInternal = mIsInternal;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) {
+ mIsInternal = aIsInternal;
+ return NS_OK;
+}
+
+nsresult nsScriptableUnicodeConverter::InitConverter(
+ const nsACString& aCharset) {
+ mEncoder = nullptr;
+ mDecoder = nullptr;
+
+ auto encoding = Encoding::ForLabelNoReplacement(aCharset);
+ if (!encoding) {
+ return NS_ERROR_UCONV_NOCONV;
+ }
+ if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) {
+ mEncoder = encoding->NewEncoder();
+ }
+ mDecoder = encoding->NewDecoderWithBOMRemoval();
+ return NS_OK;
+}