diff options
Diffstat (limited to 'intl/uconv/nsScriptableUConv.cpp')
-rw-r--r-- | intl/uconv/nsScriptableUConv.cpp | 261 |
1 files changed, 261 insertions, 0 deletions
diff --git a/intl/uconv/nsScriptableUConv.cpp b/intl/uconv/nsScriptableUConv.cpp new file mode 100644 index 0000000000..942da4e7dc --- /dev/null +++ b/intl/uconv/nsScriptableUConv.cpp @@ -0,0 +1,261 @@ + +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" +#include "nsIScriptableUConv.h" +#include "nsScriptableUConv.h" +#include "nsIStringStream.h" +#include "nsComponentManagerUtils.h" + +using namespace mozilla; + +/* Implementation file */ +NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter) + +nsScriptableUnicodeConverter::nsScriptableUnicodeConverter() + : mIsInternal(false) {} + +nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default; + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc, + nsACString& _retval) { + if (!mEncoder) return NS_ERROR_FAILURE; + + // We can compute the length without replacement, because the + // the replacement is only one byte long and a mappable character + // would always output something, i.e. at least one byte. + // When encoding to ISO-2022-JP, unmappables shouldn't be able + // to cause more escape sequences to be emitted than the mappable + // worst case where every input character causes an escape into + // a different state. + CheckedInt<size_t> needed = + mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length()); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + if (!_retval.SetLength(needed.value(), fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto src = Span(aSrc); + auto dst = AsWritableBytes(Span(_retval)); + size_t totalWritten = 0; + for (;;) { + uint32_t result; + size_t read; + size_t written; + Tie(result, read, written) = + mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false); + if (result != kInputEmpty && result != kOutputFull) { + MOZ_RELEASE_ASSERT(written < dst.Length(), + "Unmappables with one-byte replacement should not " + "exceed mappable worst case."); + dst[written++] = '?'; + } + totalWritten += written; + if (result == kInputEmpty) { + MOZ_ASSERT(totalWritten <= UINT32_MAX); + if (!_retval.SetLength(totalWritten, fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; + } + src = src.From(read); + dst = dst.From(written); + } +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::Finish(nsACString& _retval) { + // The documentation for this method says it should be called after + // ConvertFromUnicode(). However, our own tests called it after + // convertFromByteArray(), i.e. when *decoding*. + // Assuming that there exists extensions that similarly call + // this at the wrong time, let's deal. In general, it is a design + // error for this class to handle conversions in both directions. + if (!mEncoder) { + _retval.Truncate(); + mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); + return NS_OK; + } + // If we are encoding to ISO-2022-JP, potentially + // transition back to the ASCII state. The buffer + // needs to be large enough for an additional NCR, + // though. + _retval.SetLength(13); + Span<char16_t> src(nullptr); + uint32_t result; + size_t read; + size_t written; + bool hadErrors; + Tie(result, read, written, hadErrors) = + mEncoder->EncodeFromUTF16(src, _retval, true); + Unused << hadErrors; + MOZ_ASSERT(!read); + MOZ_ASSERT(result == kInputEmpty); + _retval.SetLength(written); + + mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); + mEncoder->Encoding()->NewEncoderInto(*mEncoder); + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, + nsAString& _retval) { + if (!mDecoder) return NS_ERROR_FAILURE; + + uint32_t length = aSrc.Length(); + + CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + if (!_retval.SetLength(needed.value(), fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto src = + Span(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length); + uint32_t result; + size_t read; + size_t written; + bool hadErrors; + // The UTF-8 decoder used to throw regardless of the error behavior. + // Simulating the old behavior for compatibility with legacy callers. + // If callers want control over the behavior, they should switch to + // TextDecoder. + if (mDecoder->Encoding() == UTF_8_ENCODING) { + Tie(result, read, written) = + mDecoder->DecodeToUTF16WithoutReplacement(src, _retval, false); + if (result != kInputEmpty) { + return NS_ERROR_UDEC_ILLEGALINPUT; + } + } else { + Tie(result, read, written, hadErrors) = + mDecoder->DecodeToUTF16(src, _retval, false); + } + MOZ_ASSERT(result == kInputEmpty); + MOZ_ASSERT(read == length); + MOZ_ASSERT(written <= needed.value()); + Unused << hadErrors; + if (!_retval.SetLength(written, fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString, + uint32_t* aLen, + uint8_t** _aData) { + if (!mEncoder) return NS_ERROR_FAILURE; + + CheckedInt<size_t> needed = + mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aString.Length()); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + uint8_t* data = (uint8_t*)malloc(needed.value()); + if (!data) { + return NS_ERROR_OUT_OF_MEMORY; + } + auto src = Span(aString); + auto dst = Span(data, needed.value()); + size_t totalWritten = 0; + for (;;) { + uint32_t result; + size_t read; + size_t written; + Tie(result, read, written) = + mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true); + if (result != kInputEmpty && result != kOutputFull) { + // There's always room for one byte in the case of + // an unmappable character, because otherwise + // we'd have gotten `kOutputFull`. + dst[written++] = '?'; + } + totalWritten += written; + if (result == kInputEmpty) { + *_aData = data; + MOZ_ASSERT(totalWritten <= UINT32_MAX); + *aLen = totalWritten; + return NS_OK; + } + src = src.From(read); + dst = dst.From(written); + } +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString, + nsIInputStream** _retval) { + nsresult rv; + nsCOMPtr<nsIStringInputStream> inputStream = + do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv); + if (NS_FAILED(rv)) return rv; + + uint8_t* data; + uint32_t dataLen; + rv = ConvertToByteArray(aString, &dataLen, &data); + if (NS_FAILED(rv)) return rv; + + rv = inputStream->AdoptData(reinterpret_cast<char*>(data), dataLen); + if (NS_FAILED(rv)) { + free(data); + return rv; + } + + NS_ADDREF(*_retval = inputStream); + return rv; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) { + if (!mDecoder) { + aCharset.Truncate(); + } else { + mDecoder->Encoding()->Name(aCharset); + } + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) { + return InitConverter(aCharset); +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) { + *aIsInternal = mIsInternal; + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) { + mIsInternal = aIsInternal; + return NS_OK; +} + +nsresult nsScriptableUnicodeConverter::InitConverter( + const nsACString& aCharset) { + mEncoder = nullptr; + mDecoder = nullptr; + + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + return NS_ERROR_UCONV_NOCONV; + } + if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) { + mEncoder = encoding->NewEncoder(); + } + mDecoder = encoding->NewDecoderWithBOMRemoval(); + return NS_OK; +} |