/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsString.h" #include "nsIScriptableUConv.h" #include "nsScriptableUConv.h" #include "nsIStringStream.h" #include "nsComponentManagerUtils.h" #include using namespace mozilla; /* Implementation file */ NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter) nsScriptableUnicodeConverter::nsScriptableUnicodeConverter() : mIsInternal(false) {} nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default; NS_IMETHODIMP nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc, nsACString& _retval) { if (!mEncoder) return NS_ERROR_FAILURE; // We can compute the length without replacement, because the // the replacement is only one byte long and a mappable character // would always output something, i.e. at least one byte. // When encoding to ISO-2022-JP, unmappables shouldn't be able // to cause more escape sequences to be emitted than the mappable // worst case where every input character causes an escape into // a different state. CheckedInt needed = mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length()); if (!needed.isValid() || needed.value() > UINT32_MAX) { return NS_ERROR_OUT_OF_MEMORY; } auto dstChars = _retval.GetMutableData(needed.value(), fallible); if (!dstChars) { return NS_ERROR_OUT_OF_MEMORY; } auto src = Span(aSrc); auto dst = AsWritableBytes(*dstChars); size_t totalWritten = 0; for (;;) { auto [result, read, written] = mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false); if (result != kInputEmpty && result != kOutputFull) { MOZ_RELEASE_ASSERT(written < dst.Length(), "Unmappables with one-byte replacement should not " "exceed mappable worst case."); dst[written++] = '?'; } totalWritten += written; if (result == kInputEmpty) { MOZ_ASSERT(totalWritten <= UINT32_MAX); if (!_retval.SetLength(totalWritten, fallible)) { return NS_ERROR_OUT_OF_MEMORY; } return NS_OK; } src = src.From(read); dst = dst.From(written); } } NS_IMETHODIMP nsScriptableUnicodeConverter::Finish(nsACString& _retval) { // The documentation for this method says it should be called after // ConvertFromUnicode(). However, our own tests called it after // convertFromByteArray(), i.e. when *decoding*. // Assuming that there exists extensions that similarly call // this at the wrong time, let's deal. In general, it is a design // error for this class to handle conversions in both directions. if (!mEncoder) { _retval.Truncate(); mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); return NS_OK; } // If we are encoding to ISO-2022-JP, potentially // transition back to the ASCII state. The buffer // needs to be large enough for an additional NCR, // though. _retval.SetLength(13); auto dst = AsWritableBytes(_retval.GetMutableData(13)); Span src(nullptr); uint32_t result; size_t read; size_t written; std::tie(result, read, written, std::ignore) = mEncoder->EncodeFromUTF16(src, dst, true); MOZ_ASSERT(!read); MOZ_ASSERT(result == kInputEmpty); _retval.SetLength(written); mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); mEncoder->Encoding()->NewEncoderInto(*mEncoder); return NS_OK; } NS_IMETHODIMP nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, nsAString& _retval) { if (!mDecoder) return NS_ERROR_FAILURE; uint32_t length = aSrc.Length(); CheckedInt needed = mDecoder->MaxUTF16BufferLength(length); if (!needed.isValid() || needed.value() > UINT32_MAX) { return NS_ERROR_OUT_OF_MEMORY; } auto dst = _retval.GetMutableData(needed.value(), fallible); if (!dst) { return NS_ERROR_OUT_OF_MEMORY; } auto src = Span(reinterpret_cast(aSrc.BeginReading()), length); uint32_t result; size_t read; size_t written; // The UTF-8 decoder used to throw regardless of the error behavior. // Simulating the old behavior for compatibility with legacy callers. // If callers want control over the behavior, they should switch to // TextDecoder. if (mDecoder->Encoding() == UTF_8_ENCODING) { std::tie(result, read, written) = mDecoder->DecodeToUTF16WithoutReplacement(src, *dst, false); if (result != kInputEmpty) { return NS_ERROR_UDEC_ILLEGALINPUT; } } else { std::tie(result, read, written, std::ignore) = mDecoder->DecodeToUTF16(src, *dst, false); } MOZ_ASSERT(result == kInputEmpty); MOZ_ASSERT(read == length); MOZ_ASSERT(written <= needed.value()); if (!_retval.SetLength(written, fallible)) { return NS_ERROR_OUT_OF_MEMORY; } return NS_OK; } NS_IMETHODIMP nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString, uint32_t* aLen, uint8_t** _aData) { if (!mEncoder) return NS_ERROR_FAILURE; CheckedInt needed = mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aString.Length()); if (!needed.isValid() || needed.value() > UINT32_MAX) { return NS_ERROR_OUT_OF_MEMORY; } uint8_t* data = (uint8_t*)malloc(needed.value()); if (!data) { return NS_ERROR_OUT_OF_MEMORY; } auto src = Span(aString); auto dst = Span(data, needed.value()); size_t totalWritten = 0; for (;;) { auto [result, read, written] = mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true); if (result != kInputEmpty && result != kOutputFull) { // There's always room for one byte in the case of // an unmappable character, because otherwise // we'd have gotten `kOutputFull`. dst[written++] = '?'; } totalWritten += written; if (result == kInputEmpty) { *_aData = data; MOZ_ASSERT(totalWritten <= UINT32_MAX); *aLen = totalWritten; return NS_OK; } src = src.From(read); dst = dst.From(written); } } NS_IMETHODIMP nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString, nsIInputStream** _retval) { nsresult rv; nsCOMPtr inputStream = do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv); if (NS_FAILED(rv)) return rv; uint8_t* data; uint32_t dataLen; rv = ConvertToByteArray(aString, &dataLen, &data); if (NS_FAILED(rv)) return rv; rv = inputStream->AdoptData(reinterpret_cast(data), dataLen); if (NS_FAILED(rv)) { free(data); return rv; } NS_ADDREF(*_retval = inputStream); return rv; } NS_IMETHODIMP nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) { if (!mDecoder) { aCharset.Truncate(); } else { mDecoder->Encoding()->Name(aCharset); } return NS_OK; } NS_IMETHODIMP nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) { return InitConverter(aCharset); } NS_IMETHODIMP nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) { *aIsInternal = mIsInternal; return NS_OK; } NS_IMETHODIMP nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) { mIsInternal = aIsInternal; return NS_OK; } nsresult nsScriptableUnicodeConverter::InitConverter( const nsACString& aCharset) { mEncoder = nullptr; mDecoder = nullptr; auto encoding = Encoding::ForLabelNoReplacement(aCharset); if (!encoding) { return NS_ERROR_UCONV_NOCONV; } if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) { mEncoder = encoding->NewEncoder(); } mDecoder = encoding->NewDecoderWithBOMRemoval(); return NS_OK; }