diff options
Diffstat (limited to '')
-rw-r--r-- | intl/uconv/nsScriptableUConv.cpp | 192 |
1 files changed, 192 insertions, 0 deletions
diff --git a/intl/uconv/nsScriptableUConv.cpp b/intl/uconv/nsScriptableUConv.cpp new file mode 100644 index 0000000000..e56d578efd --- /dev/null +++ b/intl/uconv/nsScriptableUConv.cpp @@ -0,0 +1,192 @@ + +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" +#include "nsIScriptableUConv.h" +#include "nsScriptableUConv.h" +#include "nsComponentManagerUtils.h" + +#include <tuple> + +using namespace mozilla; + +/* Implementation file */ +NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter) + +nsScriptableUnicodeConverter::nsScriptableUnicodeConverter() + : mIsInternal(false) {} + +nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default; + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc, + nsACString& _retval) { + if (!mEncoder) return NS_ERROR_FAILURE; + + // We can compute the length without replacement, because the + // the replacement is only one byte long and a mappable character + // would always output something, i.e. at least one byte. + // When encoding to ISO-2022-JP, unmappables shouldn't be able + // to cause more escape sequences to be emitted than the mappable + // worst case where every input character causes an escape into + // a different state. + CheckedInt<size_t> needed = + mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length()); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto dstChars = _retval.GetMutableData(needed.value(), fallible); + if (!dstChars) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto src = Span(aSrc); + auto dst = AsWritableBytes(*dstChars); + size_t totalWritten = 0; + for (;;) { + auto [result, read, written] = + mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false); + if (result != kInputEmpty && result != kOutputFull) { + MOZ_RELEASE_ASSERT(written < dst.Length(), + "Unmappables with one-byte replacement should not " + "exceed mappable worst case."); + dst[written++] = '?'; + } + totalWritten += written; + if (result == kInputEmpty) { + MOZ_ASSERT(totalWritten <= UINT32_MAX); + if (!_retval.SetLength(totalWritten, fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; + } + src = src.From(read); + dst = dst.From(written); + } +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::Finish(nsACString& _retval) { + // The documentation for this method says it should be called after + // ConvertFromUnicode(). However, our own tests called it after + // convertFromByteArray(), i.e. when *decoding*. + // Assuming that there exists extensions that similarly call + // this at the wrong time, let's deal. In general, it is a design + // error for this class to handle conversions in both directions. + if (!mEncoder) { + _retval.Truncate(); + mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); + return NS_OK; + } + // If we are encoding to ISO-2022-JP, potentially + // transition back to the ASCII state. The buffer + // needs to be large enough for an additional NCR, + // though. + _retval.SetLength(13); + auto dst = AsWritableBytes(_retval.GetMutableData(13)); + Span<char16_t> src(nullptr); + uint32_t result; + size_t read; + size_t written; + std::tie(result, read, written, std::ignore) = + mEncoder->EncodeFromUTF16(src, dst, true); + MOZ_ASSERT(!read); + MOZ_ASSERT(result == kInputEmpty); + _retval.SetLength(written); + + mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); + mEncoder->Encoding()->NewEncoderInto(*mEncoder); + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, + nsAString& _retval) { + if (!mDecoder) return NS_ERROR_FAILURE; + + uint32_t length = aSrc.Length(); + + CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto dst = _retval.GetMutableData(needed.value(), fallible); + if (!dst) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto src = + Span(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length); + uint32_t result; + size_t read; + size_t written; + // The UTF-8 decoder used to throw regardless of the error behavior. + // Simulating the old behavior for compatibility with legacy callers. + // If callers want control over the behavior, they should switch to + // TextDecoder. + if (mDecoder->Encoding() == UTF_8_ENCODING) { + std::tie(result, read, written) = + mDecoder->DecodeToUTF16WithoutReplacement(src, *dst, false); + if (result != kInputEmpty) { + return NS_ERROR_UDEC_ILLEGALINPUT; + } + } else { + std::tie(result, read, written, std::ignore) = + mDecoder->DecodeToUTF16(src, *dst, false); + } + MOZ_ASSERT(result == kInputEmpty); + MOZ_ASSERT(read == length); + MOZ_ASSERT(written <= needed.value()); + if (!_retval.SetLength(written, fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) { + if (!mDecoder) { + aCharset.Truncate(); + } else { + mDecoder->Encoding()->Name(aCharset); + } + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) { + return InitConverter(aCharset); +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) { + *aIsInternal = mIsInternal; + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) { + mIsInternal = aIsInternal; + return NS_OK; +} + +nsresult nsScriptableUnicodeConverter::InitConverter( + const nsACString& aCharset) { + mEncoder = nullptr; + mDecoder = nullptr; + + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + return NS_ERROR_UCONV_NOCONV; + } + if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) { + mEncoder = encoding->NewEncoder(); + } + mDecoder = encoding->NewDecoderWithBOMRemoval(); + return NS_OK; +} |