summaryrefslogtreecommitdiffstats
path: root/intl/uconv
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
commit6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
treea68f146d7fa01f0134297619fbe7e33db084e0aa /intl/uconv
parentInitial commit. (diff)
downloadthunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz
thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/uconv')
-rw-r--r--intl/uconv/components.conf34
-rw-r--r--intl/uconv/crashtests/563618.html12
-rw-r--r--intl/uconv/crashtests/crashtests.list2
-rw-r--r--intl/uconv/crashtests/omt-non-utf-8-jsurl.html14
-rw-r--r--intl/uconv/directory.txt32
-rw-r--r--intl/uconv/moz.build32
-rw-r--r--intl/uconv/nsConverterInputStream.cpp256
-rw-r--r--intl/uconv/nsConverterInputStream.h64
-rw-r--r--intl/uconv/nsConverterOutputStream.cpp115
-rw-r--r--intl/uconv/nsConverterOutputStream.h39
-rw-r--r--intl/uconv/nsIScriptableUConv.idl79
-rw-r--r--intl/uconv/nsITextToSubURI.idl60
-rw-r--r--intl/uconv/nsScriptableUConv.cpp256
-rw-r--r--intl/uconv/nsScriptableUConv.h34
-rw-r--r--intl/uconv/nsTextToSubURI.cpp178
-rw-r--r--intl/uconv/nsTextToSubURI.h36
-rw-r--r--intl/uconv/tests/gtest/TestShortRead.cpp109
-rw-r--r--intl/uconv/tests/gtest/moz.build11
-rw-r--r--intl/uconv/tests/mochitest.ini14
-rw-r--r--intl/uconv/tests/moz.build13
-rw-r--r--intl/uconv/tests/stressgb.pl23
-rw-r--r--intl/uconv/tests/test_big5_encoder.html43
-rw-r--r--intl/uconv/tests/test_bug335816.html40
-rw-r--r--intl/uconv/tests/test_bug843434.html27
-rw-r--r--intl/uconv/tests/test_bug959058-1.html28
-rw-r--r--intl/uconv/tests/test_bug959058-2.html28
-rw-r--r--intl/uconv/tests/test_ncr_fallback.html74
-rw-r--r--intl/uconv/tests/test_singlebyte_overconsumption.html33
-rw-r--r--intl/uconv/tests/test_unicode_noncharacterescapes.html303
-rw-r--r--intl/uconv/tests/test_unicode_noncharacters_gb18030.html305
-rw-r--r--intl/uconv/tests/test_unicode_noncharacters_utf8.html303
-rw-r--r--intl/uconv/tests/test_utf8_overconsumption.html39
32 files changed, 2636 insertions, 0 deletions
diff --git a/intl/uconv/components.conf b/intl/uconv/components.conf
new file mode 100644
index 0000000000..00686f661a
--- /dev/null
+++ b/intl/uconv/components.conf
@@ -0,0 +1,34 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Classes = [
+ {
+ 'cid': '{2bc2ad62-ad5d-4b7b-a9db-f74ae203c527}',
+ 'contract_ids': ['@mozilla.org/intl/converter-input-stream;1'],
+ 'type': 'nsConverterInputStream',
+ 'headers': ['nsConverterInputStream.h'],
+ },
+ {
+ 'cid': '{ff8780a5-bbb1-4bc5-8ee7-057e7bc5c925}',
+ 'contract_ids': ['@mozilla.org/intl/converter-output-stream;1'],
+ 'type': 'nsConverterOutputStream',
+ 'headers': ['/intl/uconv/nsConverterOutputStream.h'],
+ },
+ {
+ 'cid': '{0a698c44-3bff-11d4-9649-00c0ca135b4e}',
+ 'contract_ids': ['@mozilla.org/intl/scriptableunicodeconverter'],
+ 'type': 'nsScriptableUnicodeConverter',
+ 'headers': ['/intl/uconv/nsScriptableUConv.h'],
+ },
+ {
+ 'js_name': 'textToSubURI',
+ 'cid': '{8b042e22-6f87-11d3-b3c8-00805f8a6670}',
+ 'contract_ids': ['@mozilla.org/intl/texttosuburi;1'],
+ 'interfaces': ['nsITextToSubURI'],
+ 'type': 'nsTextToSubURI',
+ 'headers': ['/intl/uconv/nsTextToSubURI.h'],
+ },
+]
diff --git a/intl/uconv/crashtests/563618.html b/intl/uconv/crashtests/563618.html
new file mode 100644
index 0000000000..e36b664762
--- /dev/null
+++ b/intl/uconv/crashtests/563618.html
@@ -0,0 +1,12 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html>
+<head>
+ <meta content="text/html; charset=euc-jp"
+ http-equiv="content-type">
+ <title>Serbian Glyph Test</title>
+</head>
+<body>
+
+ <p style="font-size: 20pt;"></p>
+</body>
+</html>
diff --git a/intl/uconv/crashtests/crashtests.list b/intl/uconv/crashtests/crashtests.list
new file mode 100644
index 0000000000..6c54a699c1
--- /dev/null
+++ b/intl/uconv/crashtests/crashtests.list
@@ -0,0 +1,2 @@
+load 563618.html
+load omt-non-utf-8-jsurl.html
diff --git a/intl/uconv/crashtests/omt-non-utf-8-jsurl.html b/intl/uconv/crashtests/omt-non-utf-8-jsurl.html
new file mode 100644
index 0000000000..033e38a280
--- /dev/null
+++ b/intl/uconv/crashtests/omt-non-utf-8-jsurl.html
@@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html class=reftest-wait>
+<head>
+ <meta charset=iso-8859-1><!-- must be non-UTF-8 -->
+ <title>Test for off the main thread non-UTF-8 javascript: URL</title>
+</head>
+<body>
+<script>
+new Worker("javascript:foo").onerror = () => {
+ document.documentElement.className = "";
+};
+</script>
+</body>
+</html>
diff --git a/intl/uconv/directory.txt b/intl/uconv/directory.txt
new file mode 100644
index 0000000000..2b6be7af7f
--- /dev/null
+++ b/intl/uconv/directory.txt
@@ -0,0 +1,32 @@
+Directory Structure :
+================================
+
+idl - public .idl files
+public - public header file
+src - source directory of charset converter manager and utilities, and
+ charset converters for ISO-8859-1, CP1252, MacRoman and UTF-8
+tests - tests program and application for charset converter
+tests/unit - xpcshell tests
+tools - tools to build the tables used by the converters
+util - utility functions used by the converters
+
+The following directories contain different charset converters:
+
+ucvcn - Simplified Chinese charsets - GB2312, HZ, ISO-2022-CN, GBK, GB18030
+ucvja - Japanese charsets - Shift-JIS, ISO-2022-JP, EUC-JP
+ucvko - Korean charsets - ISO-2022-KR, EUC-KR, Johab
+ucvlatin - Latin charsets and others - ISO-8859-x, CP1250-1258
+ CP866, 874, KOI8,
+ Mac charsets, TIS620, UTF16
+ucvtw - Traditional Chinese charsets Set 1 - Big5
+ucvtw2 - Traditional Chinese charsets Set 2 - EUC-TW
+
+Within the directories containing charset converters:
+
+*.ut - tables used to convert to Unicode from a charset
+*.uf - tables used to convert to a charset from Unicode
+
+The following directories are obsolete and should not be used:
+
+ucvth
+ucvvt
diff --git a/intl/uconv/moz.build b/intl/uconv/moz.build
new file mode 100644
index 0000000000..f21e4055f9
--- /dev/null
+++ b/intl/uconv/moz.build
@@ -0,0 +1,32 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+TEST_DIRS += ["tests"]
+
+XPIDL_SOURCES += [
+ "nsIScriptableUConv.idl",
+ "nsITextToSubURI.idl",
+]
+
+XPIDL_MODULE = "uconv"
+
+EXPORTS += [
+ "nsConverterInputStream.h",
+ "nsTextToSubURI.h",
+]
+
+UNIFIED_SOURCES += [
+ "nsConverterInputStream.cpp",
+ "nsConverterOutputStream.cpp",
+ "nsScriptableUConv.cpp",
+ "nsTextToSubURI.cpp",
+]
+
+XPCOM_MANIFESTS += [
+ "components.conf",
+]
+
+FINAL_LIBRARY = "xul"
diff --git a/intl/uconv/nsConverterInputStream.cpp b/intl/uconv/nsConverterInputStream.cpp
new file mode 100644
index 0000000000..e3efdbc146
--- /dev/null
+++ b/intl/uconv/nsConverterInputStream.cpp
@@ -0,0 +1,256 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsConverterInputStream.h"
+#include "nsIInputStream.h"
+#include "nsReadLine.h"
+#include "nsStreamUtils.h"
+
+#include <algorithm>
+#include <tuple>
+
+using namespace mozilla;
+
+#define CONVERTER_BUFFER_SIZE 8192
+
+NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream,
+ nsIUnicharInputStream, nsIUnicharLineInputStream)
+
+NS_IMETHODIMP
+nsConverterInputStream::Init(nsIInputStream* aStream, const char* aCharset,
+ int32_t aBufferSize, char16_t aReplacementChar) {
+ nsAutoCString label;
+ if (!aCharset) {
+ label.AssignLiteral("UTF-8");
+ } else {
+ label = aCharset;
+ }
+
+ auto encoding = Encoding::ForLabelNoReplacement(label);
+ if (!encoding) {
+ return NS_ERROR_UCONV_NOCONV;
+ }
+ // Previously, the implementation auto-switched only
+ // between the two UTF-16 variants and only when
+ // initialized with an endianness-unspecific label.
+ mConverter = encoding->NewDecoder();
+
+ size_t outputBufferSize;
+ if (aBufferSize <= 0) {
+ aBufferSize = CONVERTER_BUFFER_SIZE;
+ outputBufferSize = CONVERTER_BUFFER_SIZE;
+ } else {
+ // NetUtil.jsm assumes that if buffer size equals
+ // the input size, the whole stream will be processed
+ // as one readString. This is not true with encoding_rs,
+ // because encoding_rs might want to see space for a
+ // surrogate pair, so let's compute a larger output
+ // buffer length.
+ CheckedInt<size_t> needed = mConverter->MaxUTF16BufferLength(aBufferSize);
+ if (!needed.isValid()) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ outputBufferSize = needed.value();
+ }
+
+ // set up our buffers.
+ if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) ||
+ !mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ mInput = aStream;
+ mErrorsAreFatal = !aReplacementChar;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsConverterInputStream::Close() {
+ nsresult rv = mInput ? mInput->Close() : NS_OK;
+ mLineBuffer = nullptr;
+ mInput = nullptr;
+ mConverter = nullptr;
+ mByteData.Clear();
+ mUnicharData.Clear();
+ return rv;
+}
+
+NS_IMETHODIMP
+nsConverterInputStream::Read(char16_t* aBuf, uint32_t aCount,
+ uint32_t* aReadCount) {
+ NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
+ uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
+ if (0 == readCount) {
+ // Fill the unichar buffer
+ readCount = Fill(&mLastErrorCode);
+ if (readCount == 0) {
+ *aReadCount = 0;
+ return mLastErrorCode;
+ }
+ }
+ if (readCount > aCount) {
+ readCount = aCount;
+ }
+ memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
+ readCount * sizeof(char16_t));
+ mUnicharDataOffset += readCount;
+ *aReadCount = readCount;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
+ void* aClosure, uint32_t aCount,
+ uint32_t* aReadCount) {
+ NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
+ uint32_t codeUnitsToWrite = mUnicharDataLength - mUnicharDataOffset;
+ if (0 == codeUnitsToWrite) {
+ // Fill the unichar buffer
+ codeUnitsToWrite = Fill(&mLastErrorCode);
+ if (codeUnitsToWrite == 0) {
+ *aReadCount = 0;
+ return mLastErrorCode;
+ }
+ }
+
+ if (codeUnitsToWrite > aCount) {
+ codeUnitsToWrite = aCount;
+ }
+
+ uint32_t codeUnitsWritten;
+ uint32_t totalCodeUnitsWritten = 0;
+
+ while (codeUnitsToWrite) {
+ nsresult rv =
+ aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset,
+ totalCodeUnitsWritten, codeUnitsToWrite, &codeUnitsWritten);
+ if (NS_FAILED(rv)) {
+ // don't propagate errors to the caller
+ break;
+ }
+
+ codeUnitsToWrite -= codeUnitsWritten;
+ totalCodeUnitsWritten += codeUnitsWritten;
+ mUnicharDataOffset += codeUnitsWritten;
+ }
+
+ *aReadCount = totalCodeUnitsWritten;
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
+ uint32_t* aReadCount) {
+ NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
+ uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
+ if (0 == readCount) {
+ // Fill the unichar buffer
+ readCount = Fill(&mLastErrorCode);
+ if (readCount == 0) {
+ *aReadCount = 0;
+ return mLastErrorCode;
+ }
+ }
+ if (readCount > aCount) {
+ readCount = aCount;
+ }
+ const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
+ aString.Assign(buf, readCount);
+ mUnicharDataOffset += readCount;
+ *aReadCount = readCount;
+ return NS_OK;
+}
+
+uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) {
+ if (!mInput) {
+ // We already closed the stream!
+ *aErrorCode = NS_BASE_STREAM_CLOSED;
+ return 0;
+ }
+
+ if (NS_FAILED(mLastErrorCode)) {
+ // We failed to completely convert last time, and error-recovery
+ // is disabled. We will fare no better this time, so...
+ *aErrorCode = mLastErrorCode;
+ return 0;
+ }
+
+ // mUnicharData.Length() is the buffer length, not the fill status.
+ // mUnicharDataLength reflects the current fill status.
+ mUnicharDataLength = 0;
+ // Whenever we convert, mUnicharData is logically empty.
+ mUnicharDataOffset = 0;
+
+ // Continue trying to read from the source stream until we successfully decode
+ // a character or encounter an error, as returning `0` here implies that the
+ // stream is complete.
+ //
+ // If the converter has been cleared, we've fully consumed the stream, and
+ // want to report EOF.
+ while (mUnicharDataLength == 0 && mConverter) {
+ // We assume a many to one conversion and are using equal sizes for
+ // the two buffers. However if an error happens at the very start
+ // of a byte buffer we may end up in a situation where n bytes lead
+ // to n+1 unicode chars. Thus we need to keep track of the leftover
+ // bytes as we convert.
+
+ uint32_t nb;
+ *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
+ if (NS_FAILED(*aErrorCode)) {
+ return 0;
+ }
+
+ NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
+ "mByteData is lying to us somewhere");
+
+ // If `NS_FillArray` failed to read any new bytes, this is the last read,
+ // and we're at the end of the stream.
+ bool last = (nb == 0);
+
+ // Now convert as much of the byte buffer to unicode as possible
+ auto src = AsBytes(Span(mByteData));
+ auto dst = Span(mUnicharData);
+
+ // Truncation from size_t to uint32_t below is OK, because the sizes
+ // are bounded by the lengths of mByteData and mUnicharData.
+ uint32_t result;
+ size_t read;
+ size_t written;
+ if (mErrorsAreFatal) {
+ std::tie(result, read, written) =
+ mConverter->DecodeToUTF16WithoutReplacement(src, dst, last);
+ } else {
+ std::tie(result, read, written, std::ignore) =
+ mConverter->DecodeToUTF16(src, dst, last);
+ }
+ mLeftOverBytes = mByteData.Length() - read;
+ mUnicharDataLength = written;
+ // Clear `mConverter` if we reached the end of the stream, as we can't
+ // call methods on it anymore. This will also signal EOF to the caller
+ // through the loop condition.
+ if (last) {
+ MOZ_ASSERT(mLeftOverBytes == 0,
+ "Failed to read all bytes on the last pass?");
+ mConverter = nullptr;
+ }
+ // If we got a decode error, we're done.
+ if (result != kInputEmpty && result != kOutputFull) {
+ MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?");
+ *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT;
+ return 0;
+ }
+ }
+ *aErrorCode = NS_OK;
+ return mUnicharDataLength;
+}
+
+NS_IMETHODIMP
+nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) {
+ if (!mLineBuffer) {
+ mLineBuffer = MakeUnique<nsLineBuffer<char16_t>>();
+ }
+ return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult);
+}
diff --git a/intl/uconv/nsConverterInputStream.h b/intl/uconv/nsConverterInputStream.h
new file mode 100644
index 0000000000..55555fc679
--- /dev/null
+++ b/intl/uconv/nsConverterInputStream.h
@@ -0,0 +1,64 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsConverterInputStream_h
+#define nsConverterInputStream_h
+
+#include "nsIInputStream.h"
+#include "nsIConverterInputStream.h"
+#include "nsIUnicharLineInputStream.h"
+#include "nsTArray.h"
+#include "nsCOMPtr.h"
+#include "nsReadLine.h"
+#include "mozilla/Encoding.h"
+#include "mozilla/UniquePtr.h"
+
+#define NS_CONVERTERINPUTSTREAM_CONTRACTID \
+ "@mozilla.org/intl/converter-input-stream;1"
+
+// {2BC2AD62-AD5D-4b7b-A9DB-F74AE203C527}
+#define NS_CONVERTERINPUTSTREAM_CID \
+ { \
+ 0x2bc2ad62, 0xad5d, 0x4b7b, { \
+ 0xa9, 0xdb, 0xf7, 0x4a, 0xe2, 0x3, 0xc5, 0x27 \
+ } \
+ }
+
+class nsConverterInputStream : public nsIConverterInputStream,
+ public nsIUnicharLineInputStream {
+ public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSIUNICHARINPUTSTREAM
+ NS_DECL_NSIUNICHARLINEINPUTSTREAM
+ NS_DECL_NSICONVERTERINPUTSTREAM
+
+ nsConverterInputStream()
+ : mLastErrorCode(NS_OK),
+ mLeftOverBytes(0),
+ mUnicharDataOffset(0),
+ mUnicharDataLength(0),
+ mErrorsAreFatal(false),
+ mLineBuffer(nullptr) {}
+
+ private:
+ virtual ~nsConverterInputStream() { Close(); }
+
+ uint32_t Fill(nsresult* aErrorCode);
+
+ mozilla::UniquePtr<mozilla::Decoder> mConverter;
+ FallibleTArray<char> mByteData;
+ FallibleTArray<char16_t> mUnicharData;
+ nsCOMPtr<nsIInputStream> mInput;
+
+ nsresult mLastErrorCode;
+ uint32_t mLeftOverBytes;
+ uint32_t mUnicharDataOffset;
+ uint32_t mUnicharDataLength;
+ bool mErrorsAreFatal;
+
+ mozilla::UniquePtr<nsLineBuffer<char16_t> > mLineBuffer;
+};
+
+#endif
diff --git a/intl/uconv/nsConverterOutputStream.cpp b/intl/uconv/nsConverterOutputStream.cpp
new file mode 100644
index 0000000000..a24adb0377
--- /dev/null
+++ b/intl/uconv/nsConverterOutputStream.cpp
@@ -0,0 +1,115 @@
+/* vim:set expandtab ts=4 sw=2 sts=2 cin: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsCOMPtr.h"
+#include "nsIOutputStream.h"
+#include "nsString.h"
+#include "nsConverterOutputStream.h"
+#include "mozilla/Encoding.h"
+
+using namespace mozilla;
+
+NS_IMPL_ISUPPORTS(nsConverterOutputStream, nsIUnicharOutputStream,
+ nsIConverterOutputStream)
+
+nsConverterOutputStream::~nsConverterOutputStream() { Close(); }
+
+NS_IMETHODIMP
+nsConverterOutputStream::Init(nsIOutputStream* aOutStream,
+ const char* aCharset) {
+ MOZ_ASSERT(aOutStream, "Null output stream!");
+
+ const Encoding* encoding;
+ if (!aCharset) {
+ encoding = UTF_8_ENCODING;
+ } else {
+ encoding = Encoding::ForLabelNoReplacement(MakeStringSpan(aCharset));
+ if (!encoding || encoding == UTF_16LE_ENCODING ||
+ encoding == UTF_16BE_ENCODING) {
+ return NS_ERROR_UCONV_NOCONV;
+ }
+ }
+
+ mConverter = encoding->NewEncoder();
+
+ mOutStream = aOutStream;
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsConverterOutputStream::Write(uint32_t aCount, const char16_t* aChars,
+ bool* aSuccess) {
+ if (!mOutStream) {
+ NS_ASSERTION(!mConverter, "Closed streams shouldn't have converters");
+ return NS_BASE_STREAM_CLOSED;
+ }
+ MOZ_ASSERT(mConverter, "Must have a converter when not closed");
+ uint8_t buffer[4096];
+ auto dst = Span(buffer);
+ auto src = Span(aChars, aCount);
+ for (;;) {
+ uint32_t result;
+ size_t read;
+ size_t written;
+ std::tie(result, read, written, std::ignore) =
+ mConverter->EncodeFromUTF16(src, dst, false);
+ src = src.From(read);
+ uint32_t streamWritten;
+ nsresult rv = mOutStream->Write(reinterpret_cast<char*>(dst.Elements()),
+ written, &streamWritten);
+ *aSuccess = NS_SUCCEEDED(rv) && written == streamWritten;
+ if (!(*aSuccess)) {
+ return rv;
+ }
+ if (result == kInputEmpty) {
+ return NS_OK;
+ }
+ }
+}
+
+NS_IMETHODIMP
+nsConverterOutputStream::WriteString(const nsAString& aString, bool* aSuccess) {
+ int32_t inLen = aString.Length();
+ nsAString::const_iterator i;
+ aString.BeginReading(i);
+ return Write(inLen, i.get(), aSuccess);
+}
+
+NS_IMETHODIMP
+nsConverterOutputStream::Flush() {
+ if (!mOutStream) return NS_OK; // Already closed.
+
+ // If we are encoding to ISO-2022-JP, potentially
+ // transition back to the ASCII state. The buffer
+ // needs to be large enough for an additional NCR,
+ // though.
+ uint8_t buffer[12];
+ auto dst = Span(buffer);
+ Span<char16_t> src(nullptr);
+ uint32_t result;
+ size_t written;
+ std::tie(result, std::ignore, written, std::ignore) =
+ mConverter->EncodeFromUTF16(src, dst, true);
+ MOZ_ASSERT(result == kInputEmpty);
+ uint32_t streamWritten;
+ if (!written) {
+ return NS_OK;
+ }
+ return mOutStream->Write(reinterpret_cast<char*>(dst.Elements()), written,
+ &streamWritten);
+}
+
+NS_IMETHODIMP
+nsConverterOutputStream::Close() {
+ if (!mOutStream) return NS_OK; // Already closed.
+
+ nsresult rv1 = Flush();
+
+ nsresult rv2 = mOutStream->Close();
+ mOutStream = nullptr;
+ mConverter = nullptr;
+ return NS_FAILED(rv1) ? rv1 : rv2;
+}
diff --git a/intl/uconv/nsConverterOutputStream.h b/intl/uconv/nsConverterOutputStream.h
new file mode 100644
index 0000000000..74b873acd5
--- /dev/null
+++ b/intl/uconv/nsConverterOutputStream.h
@@ -0,0 +1,39 @@
+/* vim:set expandtab ts=4 sw=2 sts=2 cin: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef NSCONVERTEROUTPUTSTREAM_H_
+#define NSCONVERTEROUTPUTSTREAM_H_
+
+#include "nsIConverterOutputStream.h"
+#include "nsCOMPtr.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/Encoding.h"
+
+class nsIOutputStream;
+
+/* ff8780a5-bbb1-4bc5-8ee7-057e7bc5c925 */
+#define NS_CONVERTEROUTPUTSTREAM_CID \
+ { \
+ 0xff8780a5, 0xbbb1, 0x4bc5, { \
+ 0x8e, 0xe7, 0x05, 0x7e, 0x7b, 0xc5, 0xc9, 0x25 \
+ } \
+ }
+
+class nsConverterOutputStream final : public nsIConverterOutputStream {
+ public:
+ nsConverterOutputStream() = default;
+
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSIUNICHAROUTPUTSTREAM
+ NS_DECL_NSICONVERTEROUTPUTSTREAM
+
+ private:
+ ~nsConverterOutputStream();
+
+ mozilla::UniquePtr<mozilla::Encoder> mConverter;
+ nsCOMPtr<nsIOutputStream> mOutStream;
+};
+
+#endif
diff --git a/intl/uconv/nsIScriptableUConv.idl b/intl/uconv/nsIScriptableUConv.idl
new file mode 100644
index 0000000000..f4557dce8b
--- /dev/null
+++ b/intl/uconv/nsIScriptableUConv.idl
@@ -0,0 +1,79 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+
+interface nsIInputStream;
+
+%{C++
+// {0A698C44-3BFF-11d4-9649-00C0CA135B4E}
+#define NS_ISCRIPTABLEUNICODECONVERTER_CID { 0x0A698C44, 0x3BFF, 0x11d4, { 0x96, 0x49, 0x00, 0xC0, 0xCA, 0x13, 0x5B, 0x4E } }
+#define NS_ISCRIPTABLEUNICODECONVERTER_CONTRACTID "@mozilla.org/intl/scriptableunicodeconverter"
+%}
+
+/**
+ * In new code, please use the WebIDL TextDecoder and TextEncoder
+ * instead. They represent bytes as Uint8Array (or as view to such
+ * array), which is the current best practice for representing bytes
+ * in JavaScript.
+ *
+ * This interface converts between UTF-16 in JavaScript strings
+ * and bytes transported as the unsigned value of each byte
+ * transported in a code unit of the same numeric value in
+ * a JavaScript string.
+ *
+ * @created 8/Jun/2000
+ * @author Makoto Kato [m_kato@ga2.so-net.ne.jp]
+ */
+[scriptable, uuid(f36ee324-5c1c-437f-ba10-2b4db7a18031)]
+interface nsIScriptableUnicodeConverter : nsISupports
+{
+ /**
+ * Converts the data from Unicode to one Charset.
+ * Returns the converted string. After converting, Finish should be called
+ * and its return value appended to this return value.
+ */
+ ACString ConvertFromUnicode(in AString aSrc);
+
+ /**
+ * Returns the terminator string.
+ * Should be called after ConvertFromUnicode() and appended to that
+ * function's return value.
+ */
+ ACString Finish();
+
+ /**
+ * Converts the data from one Charset to Unicode.
+ */
+ AString ConvertToUnicode(in ACString aSrc);
+
+ /**
+ * Convert a unicode string to an array of bytes. Finish does not need to be
+ * called.
+ */
+ void convertToByteArray(in AString aString,
+ [optional] out unsigned long aLen,
+ [array, size_is(aLen),retval] out octet aData);
+
+ /**
+ * Converts a unicode string to an input stream. The bytes in the stream are
+ * encoded according to the charset attribute.
+ * The returned stream will be nonblocking.
+ */
+ nsIInputStream convertToInputStream(in AString aString);
+
+ /**
+ * Current character set.
+ *
+ * @throw NS_ERROR_UCONV_NOCONV
+ * The requested charset is not supported.
+ */
+ attribute ACString charset;
+
+ /**
+ * Meaningless
+ */
+ attribute boolean isInternal;
+};
diff --git a/intl/uconv/nsITextToSubURI.idl b/intl/uconv/nsITextToSubURI.idl
new file mode 100644
index 0000000000..3bb404e414
--- /dev/null
+++ b/intl/uconv/nsITextToSubURI.idl
@@ -0,0 +1,60 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+
+
+%{C++
+// {8B042E22-6F87-11d3-B3C8-00805F8A6670}
+#define NS_TEXTTOSUBURI_CID { 0x8b042e22, 0x6f87, 0x11d3, { 0xb3, 0xc8, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
+#define NS_ITEXTTOSUBURI_CONTRACTID "@mozilla.org/intl/texttosuburi;1"
+%}
+
+[scriptable, uuid(8B042E24-6F87-11d3-B3C8-00805F8A6670)]
+interface nsITextToSubURI : nsISupports
+{
+ ACString ConvertAndEscape(in ACString charset, in AString text);
+ AString UnEscapeAndConvert(in ACString charset, in ACString text);
+
+ /**
+ * Unescapes the given URI fragment (for UI purpose only)
+ * Note:
+ * <ul>
+ * <li> escaping back the result (unescaped string) is not guaranteed to
+ * give the original escaped string
+ * <li> The URI fragment (escaped) is assumed to be in UTF-8 and converted
+ * to AString (UTF-16)
+ * <li> In case of successful conversion any resulting character listed
+ * in netwerk/dns/IDNCharacterBlocklist.inc (except space) is escaped
+ * <li> Always succeeeds (callers don't need to do error checking)
+ * </ul>
+ *
+ * @param aURIFragment the URI (or URI fragment) to unescape
+ * @param aDontEscape whether to escape IDN blocklisted characters
+ * @return Unescaped aURIFragment converted to unicode
+ */
+ AString unEscapeURIForUI(in AUTF8String aURIFragment,
+ [optional] in boolean aDontEscape);
+%{C++
+ nsresult UnEscapeURIForUI(const nsACString& aURIFragment,
+ nsAString& _retval) {
+ return UnEscapeURIForUI(aURIFragment, false, _retval);
+ }
+%}
+
+ /**
+ * Unescapes only non ASCII characters in the given URI fragment
+ * note: this method assumes the URI as UTF-8 and fallbacks to the given
+ * charset if the charset is an ASCII superset
+ *
+ * @param aCharset the charset to convert from
+ * @param aURIFragment the URI (or URI fragment) to unescape
+ * @return Unescaped aURIFragment converted to unicode
+ * @throws NS_ERROR_UCONV_NOCONV when there is no decoder for aCharset
+ * or NS_ERROR_UDEC_ILLEGALINPUT in case of conversion failure
+ */
+ [binaryname(UnEscapeNonAsciiURIJS)]
+ AString unEscapeNonAsciiURI(in ACString aCharset, in AUTF8String aURIFragment);
+};
diff --git a/intl/uconv/nsScriptableUConv.cpp b/intl/uconv/nsScriptableUConv.cpp
new file mode 100644
index 0000000000..8a9638f2ce
--- /dev/null
+++ b/intl/uconv/nsScriptableUConv.cpp
@@ -0,0 +1,256 @@
+
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsString.h"
+#include "nsIScriptableUConv.h"
+#include "nsScriptableUConv.h"
+#include "nsIStringStream.h"
+#include "nsComponentManagerUtils.h"
+
+#include <tuple>
+
+using namespace mozilla;
+
+/* Implementation file */
+NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter)
+
+nsScriptableUnicodeConverter::nsScriptableUnicodeConverter()
+ : mIsInternal(false) {}
+
+nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default;
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc,
+ nsACString& _retval) {
+ if (!mEncoder) return NS_ERROR_FAILURE;
+
+ // We can compute the length without replacement, because the
+ // the replacement is only one byte long and a mappable character
+ // would always output something, i.e. at least one byte.
+ // When encoding to ISO-2022-JP, unmappables shouldn't be able
+ // to cause more escape sequences to be emitted than the mappable
+ // worst case where every input character causes an escape into
+ // a different state.
+ CheckedInt<size_t> needed =
+ mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length());
+ if (!needed.isValid() || needed.value() > UINT32_MAX) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ auto dstChars = _retval.GetMutableData(needed.value(), fallible);
+ if (!dstChars) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ auto src = Span(aSrc);
+ auto dst = AsWritableBytes(*dstChars);
+ size_t totalWritten = 0;
+ for (;;) {
+ auto [result, read, written] =
+ mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false);
+ if (result != kInputEmpty && result != kOutputFull) {
+ MOZ_RELEASE_ASSERT(written < dst.Length(),
+ "Unmappables with one-byte replacement should not "
+ "exceed mappable worst case.");
+ dst[written++] = '?';
+ }
+ totalWritten += written;
+ if (result == kInputEmpty) {
+ MOZ_ASSERT(totalWritten <= UINT32_MAX);
+ if (!_retval.SetLength(totalWritten, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ return NS_OK;
+ }
+ src = src.From(read);
+ dst = dst.From(written);
+ }
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::Finish(nsACString& _retval) {
+ // The documentation for this method says it should be called after
+ // ConvertFromUnicode(). However, our own tests called it after
+ // convertFromByteArray(), i.e. when *decoding*.
+ // Assuming that there exists extensions that similarly call
+ // this at the wrong time, let's deal. In general, it is a design
+ // error for this class to handle conversions in both directions.
+ if (!mEncoder) {
+ _retval.Truncate();
+ mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
+ return NS_OK;
+ }
+ // If we are encoding to ISO-2022-JP, potentially
+ // transition back to the ASCII state. The buffer
+ // needs to be large enough for an additional NCR,
+ // though.
+ _retval.SetLength(13);
+ auto dst = AsWritableBytes(_retval.GetMutableData(13));
+ Span<char16_t> src(nullptr);
+ uint32_t result;
+ size_t read;
+ size_t written;
+ std::tie(result, read, written, std::ignore) =
+ mEncoder->EncodeFromUTF16(src, dst, true);
+ MOZ_ASSERT(!read);
+ MOZ_ASSERT(result == kInputEmpty);
+ _retval.SetLength(written);
+
+ mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
+ mEncoder->Encoding()->NewEncoderInto(*mEncoder);
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc,
+ nsAString& _retval) {
+ if (!mDecoder) return NS_ERROR_FAILURE;
+
+ uint32_t length = aSrc.Length();
+
+ CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length);
+ if (!needed.isValid() || needed.value() > UINT32_MAX) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ auto dst = _retval.GetMutableData(needed.value(), fallible);
+ if (!dst) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ auto src =
+ Span(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length);
+ uint32_t result;
+ size_t read;
+ size_t written;
+ // The UTF-8 decoder used to throw regardless of the error behavior.
+ // Simulating the old behavior for compatibility with legacy callers.
+ // If callers want control over the behavior, they should switch to
+ // TextDecoder.
+ if (mDecoder->Encoding() == UTF_8_ENCODING) {
+ std::tie(result, read, written) =
+ mDecoder->DecodeToUTF16WithoutReplacement(src, *dst, false);
+ if (result != kInputEmpty) {
+ return NS_ERROR_UDEC_ILLEGALINPUT;
+ }
+ } else {
+ std::tie(result, read, written, std::ignore) =
+ mDecoder->DecodeToUTF16(src, *dst, false);
+ }
+ MOZ_ASSERT(result == kInputEmpty);
+ MOZ_ASSERT(read == length);
+ MOZ_ASSERT(written <= needed.value());
+ if (!_retval.SetLength(written, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString,
+ uint32_t* aLen,
+ uint8_t** _aData) {
+ if (!mEncoder) return NS_ERROR_FAILURE;
+
+ CheckedInt<size_t> needed =
+ mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aString.Length());
+ if (!needed.isValid() || needed.value() > UINT32_MAX) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ uint8_t* data = (uint8_t*)malloc(needed.value());
+ if (!data) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ auto src = Span(aString);
+ auto dst = Span(data, needed.value());
+ size_t totalWritten = 0;
+ for (;;) {
+ auto [result, read, written] =
+ mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true);
+ if (result != kInputEmpty && result != kOutputFull) {
+ // There's always room for one byte in the case of
+ // an unmappable character, because otherwise
+ // we'd have gotten `kOutputFull`.
+ dst[written++] = '?';
+ }
+ totalWritten += written;
+ if (result == kInputEmpty) {
+ *_aData = data;
+ MOZ_ASSERT(totalWritten <= UINT32_MAX);
+ *aLen = totalWritten;
+ return NS_OK;
+ }
+ src = src.From(read);
+ dst = dst.From(written);
+ }
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString,
+ nsIInputStream** _retval) {
+ nsresult rv;
+ nsCOMPtr<nsIStringInputStream> inputStream =
+ do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv);
+ if (NS_FAILED(rv)) return rv;
+
+ uint8_t* data;
+ uint32_t dataLen;
+ rv = ConvertToByteArray(aString, &dataLen, &data);
+ if (NS_FAILED(rv)) return rv;
+
+ rv = inputStream->AdoptData(reinterpret_cast<char*>(data), dataLen);
+ if (NS_FAILED(rv)) {
+ free(data);
+ return rv;
+ }
+
+ NS_ADDREF(*_retval = inputStream);
+ return rv;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) {
+ if (!mDecoder) {
+ aCharset.Truncate();
+ } else {
+ mDecoder->Encoding()->Name(aCharset);
+ }
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) {
+ return InitConverter(aCharset);
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) {
+ *aIsInternal = mIsInternal;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) {
+ mIsInternal = aIsInternal;
+ return NS_OK;
+}
+
+nsresult nsScriptableUnicodeConverter::InitConverter(
+ const nsACString& aCharset) {
+ mEncoder = nullptr;
+ mDecoder = nullptr;
+
+ auto encoding = Encoding::ForLabelNoReplacement(aCharset);
+ if (!encoding) {
+ return NS_ERROR_UCONV_NOCONV;
+ }
+ if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) {
+ mEncoder = encoding->NewEncoder();
+ }
+ mDecoder = encoding->NewDecoderWithBOMRemoval();
+ return NS_OK;
+}
diff --git a/intl/uconv/nsScriptableUConv.h b/intl/uconv/nsScriptableUConv.h
new file mode 100644
index 0000000000..059a4b430c
--- /dev/null
+++ b/intl/uconv/nsScriptableUConv.h
@@ -0,0 +1,34 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __nsScriptableUConv_h_
+#define __nsScriptableUConv_h_
+
+#include "nsIScriptableUConv.h"
+#include "nsCOMPtr.h"
+#include "mozilla/Encoding.h"
+
+class nsScriptableUnicodeConverter : public nsIScriptableUnicodeConverter {
+ public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSISCRIPTABLEUNICODECONVERTER
+
+ nsScriptableUnicodeConverter();
+
+ protected:
+ virtual ~nsScriptableUnicodeConverter();
+
+ mozilla::UniquePtr<mozilla::Encoder> mEncoder;
+ mozilla::UniquePtr<mozilla::Decoder> mDecoder;
+ bool mIsInternal;
+
+ nsresult FinishWithLength(char** _retval, int32_t* aLength);
+ nsresult ConvertFromUnicodeWithLength(const nsAString& aSrc, int32_t* aOutLen,
+ char** _retval);
+
+ nsresult InitConverter(const nsACString& aCharset);
+};
+
+#endif
diff --git a/intl/uconv/nsTextToSubURI.cpp b/intl/uconv/nsTextToSubURI.cpp
new file mode 100644
index 0000000000..e70d9ccbd8
--- /dev/null
+++ b/intl/uconv/nsTextToSubURI.cpp
@@ -0,0 +1,178 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "nsString.h"
+#include "nsITextToSubURI.h"
+#include "nsEscape.h"
+#include "nsTextToSubURI.h"
+#include "nsCRT.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Encoding.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/TextUtils.h"
+#include "mozilla/Utf8.h"
+
+using namespace mozilla;
+
+nsTextToSubURI::~nsTextToSubURI() = default;
+
+NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI)
+
+NS_IMETHODIMP
+nsTextToSubURI::ConvertAndEscape(const nsACString& aCharset,
+ const nsAString& aText, nsACString& aOut) {
+ auto encoding = Encoding::ForLabelNoReplacement(aCharset);
+ if (!encoding) {
+ aOut.Truncate();
+ return NS_ERROR_UCONV_NOCONV;
+ }
+ nsresult rv;
+ nsAutoCString intermediate;
+ std::tie(rv, std::ignore) = encoding->Encode(aText, intermediate);
+ if (NS_FAILED(rv)) {
+ aOut.Truncate();
+ return rv;
+ }
+ bool ok = NS_Escape(intermediate, aOut, url_XPAlphas);
+ if (!ok) {
+ aOut.Truncate();
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsTextToSubURI::UnEscapeAndConvert(const nsACString& aCharset,
+ const nsACString& aText, nsAString& aOut) {
+ auto encoding = Encoding::ForLabelNoReplacement(aCharset);
+ if (!encoding) {
+ aOut.Truncate();
+ return NS_ERROR_UCONV_NOCONV;
+ }
+ nsAutoCString unescaped(aText);
+ NS_UnescapeURL(unescaped);
+ auto rv = encoding->DecodeWithoutBOMHandling(unescaped, aOut);
+ if (NS_SUCCEEDED(rv)) {
+ return NS_OK;
+ }
+ return rv;
+}
+
+static bool statefulCharset(const char* charset) {
+ // HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in
+ // mozilla-central but keeping them here just in case for the benefit of
+ // comm-central.
+ if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-") - 1) ||
+ !nsCRT::strcasecmp(charset, "UTF-7") ||
+ !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
+ return true;
+
+ return false;
+}
+
+// static
+nsresult nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset,
+ const nsCString& aURI,
+ nsAString& aOut) {
+ // check for 7bit encoding the data may not be ASCII after we decode
+ bool isStatefulCharset = statefulCharset(aCharset.get());
+
+ if (!isStatefulCharset) {
+ if (IsAscii(aURI)) {
+ CopyASCIItoUTF16(aURI, aOut);
+ return NS_OK;
+ }
+ if (IsUtf8(aURI)) {
+ CopyUTF8toUTF16(aURI, aOut);
+ return NS_OK;
+ }
+ }
+
+ // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
+ NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
+
+ auto encoding = Encoding::ForLabelNoReplacement(aCharset);
+ if (!encoding) {
+ aOut.Truncate();
+ return NS_ERROR_UCONV_NOCONV;
+ }
+ return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI, aOut);
+}
+
+NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString& aURIFragment,
+ bool aDontEscape,
+ nsAString& _retval) {
+ nsAutoCString unescapedSpec;
+ // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
+ NS_UnescapeURL(PromiseFlatCString(aURIFragment),
+ esc_SkipControl | esc_AlwaysCopy, unescapedSpec);
+
+ // in case of failure, return escaped URI
+ // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
+ // sequences are also considered failure in this context
+ if (convertURItoUnicode("UTF-8"_ns, unescapedSpec, _retval) != NS_OK) {
+ // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8
+ CopyUTF8toUTF16(aURIFragment, _retval);
+ }
+
+ if (aDontEscape) {
+ return NS_OK;
+ }
+
+ // If there are any characters that are unsafe for URIs, reescape those.
+ if (mIDNBlocklist.IsEmpty()) {
+ mozilla::net::InitializeBlocklist(mIDNBlocklist);
+ // we allow SPACE and IDEOGRAPHIC SPACE in this method
+ mozilla::net::RemoveCharFromBlocklist(u' ', mIDNBlocklist);
+ mozilla::net::RemoveCharFromBlocklist(0x3000, mIDNBlocklist);
+ }
+
+ MOZ_ASSERT(!mIDNBlocklist.IsEmpty());
+ const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval);
+ nsString reescapedSpec;
+ _retval = NS_EscapeURL(
+ unescapedResult,
+ [&](char16_t aChar) -> bool {
+ return mozilla::net::CharInBlocklist(aChar, mIDNBlocklist);
+ },
+ reescapedSpec);
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsTextToSubURI::UnEscapeNonAsciiURIJS(const nsACString& aCharset,
+ const nsACString& aURIFragment,
+ nsAString& _retval) {
+ return UnEscapeNonAsciiURI(aCharset, aURIFragment, _retval);
+}
+
+// static
+nsresult nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset,
+ const nsACString& aURIFragment,
+ nsAString& _retval) {
+ nsAutoCString unescapedSpec;
+ NS_UnescapeURL(PromiseFlatCString(aURIFragment),
+ esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
+ // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
+ // superset since converting "http:" with such an encoding is always a bad
+ // idea.
+ if (!IsUtf8(unescapedSpec) &&
+ (aCharset.LowerCaseEqualsLiteral("utf-16") ||
+ aCharset.LowerCaseEqualsLiteral("utf-16be") ||
+ aCharset.LowerCaseEqualsLiteral("utf-16le") ||
+ aCharset.LowerCaseEqualsLiteral("utf-7") ||
+ aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))) {
+ CopyASCIItoUTF16(aURIFragment, _retval);
+ return NS_OK;
+ }
+
+ nsresult rv =
+ convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, _retval);
+ // NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error
+ // if the string ends with a valid (but incomplete) sequence.
+ return rv == NS_OK_UDEC_MOREINPUT ? NS_ERROR_UDEC_ILLEGALINPUT : rv;
+}
+
+//----------------------------------------------------------------------
diff --git a/intl/uconv/nsTextToSubURI.h b/intl/uconv/nsTextToSubURI.h
new file mode 100644
index 0000000000..1eaeb554dc
--- /dev/null
+++ b/intl/uconv/nsTextToSubURI.h
@@ -0,0 +1,36 @@
+// -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vim: set ts=2 et sw=2 tw=80:
+// This Source Code is subject to the terms of the Mozilla Public License
+// version 2.0 (the "License"). You can obtain a copy of the License at
+// http://mozilla.org/MPL/2.0/.
+#ifndef nsTextToSubURI_h__
+#define nsTextToSubURI_h__
+
+#include "nsITextToSubURI.h"
+#include "nsString.h"
+#include "nsTArray.h"
+#include "mozilla/net/IDNBlocklistUtils.h"
+
+class nsTextToSubURI : public nsITextToSubURI {
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSITEXTTOSUBURI
+
+ // Thread-safe function for C++ callers
+ static nsresult UnEscapeNonAsciiURI(const nsACString& aCharset,
+ const nsACString& aURIFragment,
+ nsAString& _retval);
+
+ private:
+ virtual ~nsTextToSubURI();
+
+ // We assume that the URI is encoded as UTF-8.
+ static nsresult convertURItoUnicode(const nsCString& aCharset,
+ const nsCString& aURI,
+ nsAString& _retval);
+
+ // Characters defined in netwerk/dns/IDNCharacterBlocklist.inc or via the
+ // network.IDN.extra_allowed_chars and network.IDN.extra_blocked_chars prefs.
+ nsTArray<mozilla::net::BlocklistRange> mIDNBlocklist;
+};
+
+#endif // nsTextToSubURI_h__
diff --git a/intl/uconv/tests/gtest/TestShortRead.cpp b/intl/uconv/tests/gtest/TestShortRead.cpp
new file mode 100644
index 0000000000..393f5e0027
--- /dev/null
+++ b/intl/uconv/tests/gtest/TestShortRead.cpp
@@ -0,0 +1,109 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "gtest/gtest.h"
+#include "mozilla/ErrorNames.h"
+#include "nsCOMPtr.h"
+#include "nsConverterInputStream.h"
+#include "nsIInputStream.h"
+#include "nsISupports.h"
+#include "nsStringStream.h"
+
+namespace {
+
+class ShortReadWrapper final : public nsIInputStream {
+ public:
+ NS_DECL_THREADSAFE_ISUPPORTS
+ NS_DECL_NSIINPUTSTREAM
+
+ template <size_t N>
+ ShortReadWrapper(const uint32_t (&aShortReads)[N],
+ nsIInputStream* aBaseStream)
+ : mShortReadIter(std::begin(aShortReads)),
+ mShortReadEnd(std::end(aShortReads)),
+ mBaseStream(aBaseStream) {}
+
+ ShortReadWrapper(const ShortReadWrapper&) = delete;
+ ShortReadWrapper& operator=(const ShortReadWrapper&) = delete;
+
+ private:
+ ~ShortReadWrapper() = default;
+
+ const uint32_t* mShortReadIter;
+ const uint32_t* mShortReadEnd;
+ nsCOMPtr<nsIInputStream> mBaseStream;
+};
+
+NS_IMPL_ISUPPORTS(ShortReadWrapper, nsIInputStream)
+
+NS_IMETHODIMP
+ShortReadWrapper::Close() { return mBaseStream->Close(); }
+
+NS_IMETHODIMP
+ShortReadWrapper::Available(uint64_t* aAvailable) {
+ nsresult rv = mBaseStream->Available(aAvailable);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (mShortReadIter != mShortReadEnd) {
+ *aAvailable = std::min(uint64_t(*mShortReadIter), *aAvailable);
+ }
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+ShortReadWrapper::StreamStatus() { return mBaseStream->StreamStatus(); }
+
+NS_IMETHODIMP
+ShortReadWrapper::Read(char* aBuf, uint32_t aCount, uint32_t* _retval) {
+ if (mShortReadIter != mShortReadEnd) {
+ aCount = std::min(*mShortReadIter, aCount);
+ }
+
+ nsresult rv = mBaseStream->Read(aBuf, aCount, _retval);
+ if (NS_SUCCEEDED(rv) && mShortReadIter != mShortReadEnd) {
+ ++mShortReadIter;
+ }
+ return rv;
+}
+
+NS_IMETHODIMP
+ShortReadWrapper::ReadSegments(nsWriteSegmentFun aWriter, void* aClosure,
+ uint32_t aCount, uint32_t* _retval) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP
+ShortReadWrapper::IsNonBlocking(bool* _retval) {
+ return mBaseStream->IsNonBlocking(_retval);
+}
+
+} // namespace
+
+TEST(ConverterStreamShortRead, ShortRead)
+{
+ uint8_t bytes[] = {0xd8, 0x35, 0xdc, 0x20};
+ nsCOMPtr<nsIInputStream> baseStream;
+ ASSERT_TRUE(NS_SUCCEEDED(NS_NewByteInputStream(getter_AddRefs(baseStream),
+ AsChars(mozilla::Span(bytes)),
+ NS_ASSIGNMENT_COPY)));
+
+ static const uint32_t kShortReads[] = {1, 2, 1};
+ nsCOMPtr<nsIInputStream> shortStream =
+ new ShortReadWrapper(kShortReads, baseStream);
+
+ RefPtr<nsConverterInputStream> unicharStream = new nsConverterInputStream();
+ ASSERT_TRUE(NS_SUCCEEDED(
+ unicharStream->Init(shortStream, "UTF-16BE", 4096,
+ nsIConverterInputStream::ERRORS_ARE_FATAL)));
+
+ uint32_t read;
+ nsAutoString result;
+ ASSERT_TRUE(
+ NS_SUCCEEDED(unicharStream->ReadString(UINT32_MAX, result, &read)));
+
+ ASSERT_EQ(read, 2u);
+ ASSERT_TRUE(result == u"\U0001d420");
+}
diff --git a/intl/uconv/tests/gtest/moz.build b/intl/uconv/tests/gtest/moz.build
new file mode 100644
index 0000000000..969fb52c7e
--- /dev/null
+++ b/intl/uconv/tests/gtest/moz.build
@@ -0,0 +1,11 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+UNIFIED_SOURCES += [
+ "TestShortRead.cpp",
+]
+
+FINAL_LIBRARY = "xul-gtest"
diff --git a/intl/uconv/tests/mochitest.ini b/intl/uconv/tests/mochitest.ini
new file mode 100644
index 0000000000..0f13e77971
--- /dev/null
+++ b/intl/uconv/tests/mochitest.ini
@@ -0,0 +1,14 @@
+[DEFAULT]
+
+[test_bug335816.html]
+[test_bug843434.html]
+[test_bug959058-1.html]
+[test_bug959058-2.html]
+[test_long_doc.html]
+[test_singlebyte_overconsumption.html]
+[test_unicode_noncharacterescapes.html]
+[test_unicode_noncharacters_gb18030.html]
+[test_unicode_noncharacters_utf8.html]
+[test_utf8_overconsumption.html]
+[test_big5_encoder.html]
+[test_ncr_fallback.html]
diff --git a/intl/uconv/tests/moz.build b/intl/uconv/tests/moz.build
new file mode 100644
index 0000000000..888186fb26
--- /dev/null
+++ b/intl/uconv/tests/moz.build
@@ -0,0 +1,13 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+TEST_DIRS += [
+ "gtest",
+]
+
+XPCSHELL_TESTS_MANIFESTS += ["unit/xpcshell.ini"]
+
+MOCHITEST_MANIFESTS += ["mochitest.ini"]
diff --git a/intl/uconv/tests/stressgb.pl b/intl/uconv/tests/stressgb.pl
new file mode 100644
index 0000000000..5b37fb63fb
--- /dev/null
+++ b/intl/uconv/tests/stressgb.pl
@@ -0,0 +1,23 @@
+#!/usr/bin/perl
+use LWP::Simple;
+use IO::Handle;
+$stdout = *STDOUT;
+open(RES , ">resultlog.txt") || die "cannot open result log file";
+#system("rm alldiff.txt in*.txt out*.txt");
+for($i=10;$i<909;$i++)
+{
+ RES->printf("Test Page %d \n", $i);
+ $url = "http://people.netscape.com/ftang/testscript/gb18030/gbtext.cgi?page=" . $i;
+ RES->printf( "URL = %s\n", $url);
+ $tmpfile = "> in". $i . ".txt";
+ open STDOUT, $tmpfile || RES->print("cannot open " . $tmpfile . "\n");
+ getprint $url;
+ $cmd2 = "../../../dist/win32_d.obj/bin/nsconv -f GB18030 -t GB18030 in" . $i . ".txt out" . $i . ".txt >err";
+ $cmd3 = "diff -u in" . $i . ".txt out" . $i . ".txt >> alldiff.txt";
+ RES->printf( "Run '%s'\n", $cmd2);
+ $st2 = system($cmd2);
+ RES->printf( "result = '%d'\n", $st2);
+ RES->printf( "Run '%s'\n", $cmd3);
+ $st3 = system($cmd3);
+ RES->printf( "result = '%d'\n", $st3);
+}
diff --git a/intl/uconv/tests/test_big5_encoder.html b/intl/uconv/tests/test_big5_encoder.html
new file mode 100644
index 0000000000..7e86683f00
--- /dev/null
+++ b/intl/uconv/tests/test_big5_encoder.html
@@ -0,0 +1,43 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=912470
+-->
+<head>
+ <meta http-equiv="Content-type" content="text/html; charset=UTF-8">
+ <title>Test for Unicode non-characters</title>
+ <script src="/tests/SimpleTest/SimpleTest.js"></script>
+ <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
+</head>
+<body onload="test()">
+<pre id="test">
+<script class="testbody" type="text/javascript">
+/* NOTE:
+ * When we make our data: URL origin work as in Blink, this test will fail.
+ * Hopefully, by that time are URL parser has become spec-compliant, so that
+ * we'll pass the Web Platform Test for the big5 encoder
+ * (testing/web-platform/tests/encoding/big5-encoder.html) and this test can
+ * simply be removed.
+ */
+SimpleTest.waitForExplicitFinish();
+
+function test() {
+ var f = document.getElementsByTagName("iframe")[0];
+ f.onload = function() {
+ var href = SpecialPowers.wrap(f).contentWindow.location.href;
+ var index = href.indexOf("?foo=");
+ var actual = href.substring(index + 5);
+ var expected = "h%26%2340614%3Bi%26%23156267%3Bj%A1%40k%A3%E1l%A4%40m%C8%A4n%C8%CDo%FE%FEp%26%238365%3Bq%FDjr%F9%F9s%26%23128169%3Bt";
+ is(actual, expected, "Should have gotten the expected encode.");
+ SimpleTest.finish();
+ }
+ SpecialPowers.wrap(f).contentDocument.forms[0].submit();
+}
+</script>
+</pre>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=912470">Mozilla Bug 912470</a>
+<p id="display"></p>
+<div id="content" style="display: none"><iframe src="data:text/html;charset=big5,<form><input name=foo value=h&amp;%23x9EA6;i&amp;%23x2626B;j&amp;%23x3000;k&amp;%23x20AC;l&amp;%23x4E00;m&amp;%23x27607;n&amp;%23xFFE2;o&amp;%23x79D4;p&amp;%23x20AD;q&amp;%23x203B5;r&amp;%23x2550;s&amp;%23x1F4A9;t></form>">
+</div>
+</body>
+</html>
diff --git a/intl/uconv/tests/test_bug335816.html b/intl/uconv/tests/test_bug335816.html
new file mode 100644
index 0000000000..58fe538b5d
--- /dev/null
+++ b/intl/uconv/tests/test_bug335816.html
@@ -0,0 +1,40 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=335816
+-->
+<head>
+ <meta http-equiv="Content-type" content="text/html; charset=UTF-8">
+ <title>Test for Bug 335816</title>
+ <script src="/tests/SimpleTest/SimpleTest.js"></script>
+ <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
+</head>
+<body onload="test()">
+<pre id="test">
+<script class="testbody" type="text/javascript">
+
+/** Test for Bug 335816 **/
+function test()
+{
+ ok($("display").innerHTML != "Keep that breathless charm", "Hidden script not executed");
+ SimpleTest.finish();
+}
+
+function WontYouPleaseArrangeIt()
+{
+ $("display").innerHTML = "Keep that breathless charm";
+}
+
+SimpleTest.waitForExplicitFinish();
+</script>
+</pre>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=335816">Mozilla Bug 335816</a>
+<p id="display">Lovely ... Never, ever change.</p>
+
+<!-- There is a UTF-8 BOM just here - - -
+ | which should not be stripped
+ v -->
+<div id="content" style="display: none"><script>WontYouPleaseArrangeIt();</script></div>
+</body>
+</html>
+
diff --git a/intl/uconv/tests/test_bug843434.html b/intl/uconv/tests/test_bug843434.html
new file mode 100644
index 0000000000..d79ad70e6a
--- /dev/null
+++ b/intl/uconv/tests/test_bug843434.html
@@ -0,0 +1,27 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=843434
+-->
+<head>
+ <meta http-equiv="Content-type" content="text/html; charset=UTF-8">
+ <title>Test for Bug 843434</title>
+ <script src="/tests/SimpleTest/SimpleTest.js"></script>
+ <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
+ <!-- This next 'link' is part of the test, it may cause an assertion. -->
+ <link rel=stylesheet href="data:text/css;charset=ISO-2022-JP,%cc">
+</head>
+<body onload="test()">
+<pre id="test">
+<script class="testbody" type="text/javascript">
+function test()
+{
+ ok(true, "Test for no prior assertion.");
+ SimpleTest.finish();
+}
+SimpleTest.waitForExplicitFinish();
+</script>
+</pre>
+</body>
+</html>
+
diff --git a/intl/uconv/tests/test_bug959058-1.html b/intl/uconv/tests/test_bug959058-1.html
new file mode 100644
index 0000000000..85d45a8841
--- /dev/null
+++ b/intl/uconv/tests/test_bug959058-1.html
@@ -0,0 +1,28 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=959058
+-->
+<head>
+ <meta charset="gbk">
+ <title>Test for Bug 959058</title>
+ <script src="/tests/SimpleTest/SimpleTest.js"></script>
+ <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+ <script type="application/javascript">
+
+ /** Test for Bug 959058 **/
+
+ is("92", "\uD83C\uDF54", "Should have gotten a hamburger.");
+
+ </script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=959058">Mozilla Bug 959058</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+</pre>
+</body>
+</html>
diff --git a/intl/uconv/tests/test_bug959058-2.html b/intl/uconv/tests/test_bug959058-2.html
new file mode 100644
index 0000000000..86bf500e1b
--- /dev/null
+++ b/intl/uconv/tests/test_bug959058-2.html
@@ -0,0 +1,28 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=959058
+-->
+<head>
+ <meta charset="gbk">
+ <title>Test for Bug 959058</title>
+ <script src="/tests/SimpleTest/SimpleTest.js"></script>
+ <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+ <script type="application/javascript">
+
+ /** Test for Bug 959058 **/
+
+ is("", "\u20AC", "Should have gotten euro.");
+
+ </script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=959058">Mozilla Bug 959058</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+</pre>
+</body>
+</html>
diff --git a/intl/uconv/tests/test_ncr_fallback.html b/intl/uconv/tests/test_ncr_fallback.html
new file mode 100644
index 0000000000..846f18be8f
--- /dev/null
+++ b/intl/uconv/tests/test_ncr_fallback.html
@@ -0,0 +1,74 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1202366
+-->
+<head>
+ <meta http-equiv="Content-type" content="text/html; charset=UTF-8">
+ <title>Test for unpaired surrogates</title>
+ <script src="/tests/SimpleTest/SimpleTest.js"></script>
+ <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
+</head>
+<body onload="step()">
+<pre id="test">
+<script class="testbody" type="text/javascript">
+/* NOTE:
+ * When we make our data: URL origin work as in Blink, this test will fail.
+ * Don't let this test block alignment of data: URL origin with Blink.
+ */
+SimpleTest.waitForExplicitFinish();
+
+var expectations = [
+ "%26%2365533%3B",
+ "a%26%2365533%3B",
+ "%26%2365533%3Ba",
+ "a%26%2365533%3Ba",
+ "%26%2365533%3B",
+ "a%26%2365533%3B",
+ "%26%2365533%3Ba",
+ "a%26%2365533%3Ba",
+ "%26%23128169%3B",
+ "%26%23128169%3B",
+ "%1B%24B%22%29%1B%28B",
+ "%1B%24B%22%29%1B%28B%26%23128169%3B",
+];
+
+var i = 0;
+
+function step() {
+ var f = document.getElementsByTagName("iframe")[i];
+ f.onload = function() {
+ var href = SpecialPowers.wrap(f).contentWindow.location.href;
+ var index = href.indexOf("?foo=");
+ var actual = href.substring(index + 5);
+ var expected = expectations[i];
+ is(actual, expected, "Should have gotten the expected encode.");
+ i++
+ if (i == document.getElementsByTagName("iframe").length) {
+ SimpleTest.finish();
+ } else {
+ step();
+ }
+ }
+ SpecialPowers.wrap(f).contentDocument.forms[0].submit();
+}
+</script>
+</pre>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1202366">Mozilla Bug 1202366</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uD83D></form>');</script>"></iframe>
+<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=a\uD83D></form>');</script>"></iframe>
+<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uD83Da></form>');</script>"></iframe>
+<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=a\uD83Da></form>');</script>"></iframe>
+<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uDCA9></form>');</script>"></iframe>
+<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=a\uDCA9></form>');</script>"></iframe>
+<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uDCA9a></form>');</script>"></iframe>
+<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=a\uDCA9a></form>');</script>"></iframe>
+<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uD83D\uDCA9></form>');</script>"></iframe>
+<iframe src="data:text/html;charset=iso-2022-jp,<script>document.write('<form><input name=foo value=\uD83D\uDCA9></form>');</script>"></iframe>
+<iframe src="data:text/html;charset=iso-2022-jp,<script>document.write('<form><input name=foo value=\u3012></form>');</script>"></iframe>
+<iframe src="data:text/html;charset=iso-2022-jp,<script>document.write('<form><input name=foo value=\u3012\uD83D\uDCA9></form>');</script>"></iframe>
+</div>
+</body>
+</html>
diff --git a/intl/uconv/tests/test_singlebyte_overconsumption.html b/intl/uconv/tests/test_singlebyte_overconsumption.html
new file mode 100644
index 0000000000..3aeeb928ec
--- /dev/null
+++ b/intl/uconv/tests/test_singlebyte_overconsumption.html
@@ -0,0 +1,33 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=564679
+-->
+<head>
+ <meta http-equiv="Content-type" content="text/html; charset=windows-1253">
+ <title>Test for undefined codepoints</title>
+ <script src="/tests/SimpleTest/SimpleTest.js"></script>
+ <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
+</head>
+<body onload="test()">
+<pre id="test">
+<script class="testbody" type="text/javascript">
+
+/** test that single byte decoding resynchronizes after undefined codepoints */
+function test()
+{
+ is($("display").innerHTML, "All good.", "No overconsumption");
+ SimpleTest.finish();
+}
+
+ SimpleTest.waitForExplicitFinish();
+</script>
+</pre>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=564679">Mozilla Bug 564679</a>
+<p id="display">Evil.</p>
+<div id="content" style="display: none"></div>
+ <script type="text/javascript">
+ $("display").innerHTML = "All good.";
+ </script> ->
+</body>
+</html>
diff --git a/intl/uconv/tests/test_unicode_noncharacterescapes.html b/intl/uconv/tests/test_unicode_noncharacterescapes.html
new file mode 100644
index 0000000000..e44f8d782b
--- /dev/null
+++ b/intl/uconv/tests/test_unicode_noncharacterescapes.html
@@ -0,0 +1,303 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=445886
+-->
+<head>
+ <meta http-equiv="Content-type" content="text/html; charset=UTF-8">
+ <title>Test for Unicode non-characters</title>
+ <script src="/tests/SimpleTest/SimpleTest.js"></script>
+ <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
+</head>
+<body onload="Inject()">
+<pre id="test">
+<script class="testbody" type="text/javascript">
+
+/* eslint-disable no-eval */
+
+/** Test that unicode non-characters are not discarded **/
+function test()
+{
+ is($("display").innerHTML,"All good.", "Noncharacters not stripped");
+ SimpleTest.finish();
+}
+
+// eslint-disable-next-line complexity
+function Inject()
+{
+ // script fragments containing Unicode non-characters
+ try {
+ // U+FDD0
+ eval("$(\"display\").inner\ufdd0HTML += \" U+FDD0 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDD1
+ eval("$(\"display\").inner\ufdd1HTML += \" U+FDD1 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDD2
+ eval("$(\"display\").inner\ufdd2HTML += \" U+FDD2 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDD3
+ eval("$(\"display\").inner\ufdd3HTML += \" U+FDD3 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDD4
+ eval("$(\"display\").inner\ufdd4HTML += \" U+FDD4 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDD5
+ eval("$(\"display\").inner\ufdd5HTML += \" U+FDD5 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDD6
+ eval("$(\"display\").inner\ufdd6HTML += \" U+FDD6 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDD7
+ eval("$(\"display\").inner\ufdd7HTML += \" U+FDD7 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDD8
+ eval("$(\"display\").inner\ufdd8HTML += \" U+FDD8 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDD9
+ eval("$(\"display\").inner\ufdd9HTML += \" U+FDD9 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDDA
+ eval("$(\"display\").inner\ufddaHTML += \" U+FDDA is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDDB
+ eval("$(\"display\").inner\ufddbHTML += \" U+FDDB is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDDC
+ eval("$(\"display\").inner\ufddcHTML += \" U+FDDC is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDDD
+ eval("$(\"display\").inner\ufdddHTML += \" U+FDDD is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDDE
+ eval("$(\"display\").inner\ufddeHTML += \" U+FDDE is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDDF
+ eval("$(\"display\").inner\ufddfHTML += \" U+FDDF is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDE0
+ eval("$(\"display\").inner\ufde0HTML += \" U+FDE0 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDE1
+ eval("$(\"display\").inner\ufde1HTML += \" U+FDE1 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDE2
+ eval("$(\"display\").inner\ufde2HTML += \" U+FDE2 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDE3
+ eval("$(\"display\").inner\ufde3HTML += \" U+FDE3 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDE4
+ eval("$(\"display\").inner\ufde4HTML += \" U+FDE4 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDE5
+ eval("$(\"display\").inner\ufde5HTML += \" U+FDE5 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDE6
+ eval("$(\"display\").inner\ufde6HTML += \" U+FDE6 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDE7
+ eval("$(\"display\").inner\ufde7HTML += \" U+FDE7 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDE8
+ eval("$(\"display\").inner\ufde8HTML += \" U+FDE8 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDE9
+ eval("$(\"display\").inner\ufde9HTML += \" U+FDE9 is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDEA
+ eval("$(\"display\").inner\ufdeaHTML += \" U+FDEA is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDEB
+ eval("$(\"display\").inner\ufdebHTML += \" U+FDEB is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDEC
+ eval("$(\"display\").inner\ufdecHTML += \" U+FDEC is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDED
+ eval("$(\"display\").inner\ufdedHTML += \" U+FDED is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDEE
+ eval("$(\"display\").inner\ufdeeHTML += \" U+FDEE is evil\"");
+ } catch(e) {}
+ try {
+ // U+FDEF
+ eval("$(\"display\").inner\ufdefHTML += \" U+FDEF is evil\"");
+ } catch(e) {}
+ try {
+ // U+FFFE
+ eval("$(\"display\").inner\ufffeHTML += \" U+FFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+FFFF
+ eval("$(\"display\").inner\uffffHTML += \" U+FFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+1FFFE
+ eval("$(\"display\").inner\ud83f\udffeHTML += \" U+1FFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+1FFFF
+ eval("$(\"display\").inner\ud83f\udfffHTML += \" U+1FFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+2FFFE
+ eval("$(\"display\").inner\ud87f\udffeHTML += \" U+2FFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+2FFFF
+ eval("$(\"display\").inner\ud87f\udfffHTML += \" U+2FFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+3FFFE
+ eval("$(\"display\").inner\ud8bf\udffeHTML += \" U+3FFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+3FFFF
+ eval("$(\"display\").inner\ud8bf\udfffHTML += \" U+3FFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+4FFFE
+ eval("$(\"display\").inner\ud8ff\udffeHTML += \" U+4FFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+4FFFF
+ eval("$(\"display\").inner\ud8ff\udfffHTML += \" U+4FFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+5FFFE
+ eval("$(\"display\").inner\ud93f\udffeHTML += \" U+5FFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+5FFFF
+ eval("$(\"display\").inner\ud93f\udfffHTML += \" U+5FFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+6FFFE
+ eval("$(\"display\").inner\ud97f\udffeHTML += \" U+6FFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+6FFFF
+ eval("$(\"display\").inner\ud97f\udfffHTML += \" U+6FFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+7FFFE
+ eval("$(\"display\").inner\ud9bf\udffeHTML += \" U+7FFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+7FFFF
+ eval("$(\"display\").inner\ud9bf\udfffHTML += \" U+7FFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+8FFFE
+ eval("$(\"display\").inner\ud9ff\udffeHTML += \" U+8FFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+8FFFF
+ eval("$(\"display\").inner\ud9ff\udfffHTML += \" U+8FFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+9FFFE
+ eval("$(\"display\").inner\uda3f\udffeHTML += \" U+9FFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+9FFFF
+ eval("$(\"display\").inner\uda3f\udfffHTML += \" U+9FFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+AFFFE
+ eval("$(\"display\").inner\uda7f\udffeHTML += \" U+AFFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+AFFFF
+ eval("$(\"display\").inner\uda7f\udfffHTML += \" U+AFFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+BFFFE
+ eval("$(\"display\").inner\udabf\udffeHTML += \" U+BFFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+BFFFF
+ eval("$(\"display\").inner\udabf\udfffHTML += \" U+BFFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+CFFFE
+ eval("$(\"display\").inner\udaff\udffeHTML += \" U+CFFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+CFFFF
+ eval("$(\"display\").inner\udaff\udfffHTML += \" U+CFFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+DFFFE
+ eval("$(\"display\").inner\udb3f\udffeHTML += \" U+DFFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+DFFFF
+ eval("$(\"display\").inner\udb3f\udfffHTML += \" U+DFFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+EFFFE
+ eval("$(\"display\").inner\udb7f\udffeHTML += \" U+EFFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+EFFFF
+ eval("$(\"display\").inner\udb7f\udfffHTML += \" U+EFFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+FFFFE
+ eval("$(\"display\").inner\udbbf\udffeHTML += \" U+FFFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+FFFFF
+ eval("$(\"display\").inner\udbbf\udfffHTML += \" U+FFFFF is evil\"");
+ } catch(e) {}
+ try {
+ // U+10FFFE
+ eval("$(\"display\").inner\udbff\udffeHTML += \" U+10FFFE is evil\"");
+ } catch(e) {}
+ try {
+ // U+10FFFF
+ eval("$(\"display\").inner\udbff\udfffHTML += \" U+10FFFF is evil\"");
+ } catch(e) {}
+ test();
+}
+
+ SimpleTest.waitForExplicitFinish();
+</script>
+</pre>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=445886">Mozilla Bug 445886</a>
+<p id="display">All good.</p>
+<div id="content" style="display: none"></div>
+</body>
+</html>
diff --git a/intl/uconv/tests/test_unicode_noncharacters_gb18030.html b/intl/uconv/tests/test_unicode_noncharacters_gb18030.html
new file mode 100644
index 0000000000..0c9156d9e3
--- /dev/null
+++ b/intl/uconv/tests/test_unicode_noncharacters_gb18030.html
@@ -0,0 +1,305 @@
+<!DOCTYPE HTML>
+<html>
+<head>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=445886
+-->
+ <meta http-equiv="Content-type" content="text/html; charset=gb18030">
+ <title>Test for Unicode non-characters</title>
+ <script src="/tests/SimpleTest/SimpleTest.js"></script>
+ <link rel="stylesheet" type="text/css"
+ href="/tests/SimpleTest/test.css">
+</head>
+<body onload="Inject()">
+<pre id="test"><script class="testbody" type="text/javascript">
+
+/* eslint-disable no-eval */
+
+/** Test that unicode non-characters are not discarded **/
+function test()
+{
+ is($("display").innerHTML, "All good.", "Noncharacters not stripped");
+ SimpleTest.finish();
+}
+
+// eslint-disable-next-line complexity
+function Inject()
+{
+ // script fragments containing Unicode non-characters
+ try {
+ // U+FDD0
+ eval("$(\"display\").inner�0�2HTML += \" U+FDD0 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD1
+ eval("$(\"display\").inner�0�3HTML += \" U+FDD1 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD2
+ eval("$(\"display\").inner�0�4HTML += \" U+FDD2 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD3
+ eval("$(\"display\").inner�0�5HTML += \" U+FDD3 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD4
+ eval("$(\"display\").inner�0�6HTML += \" U+FDD4 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD5
+ eval("$(\"display\").inner�0�7HTML += \" U+FDD5 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD6
+ eval("$(\"display\").inner�0�8HTML += \" U+FDD6 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD7
+ eval("$(\"display\").inner�0�9HTML += \" U+FDD7 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD8
+ eval("$(\"display\").inner�0�0HTML += \" U+FDD8 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD9
+ eval("$(\"display\").inner�0�1HTML += \" U+FDD9 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDDA
+ eval("$(\"display\").inner�0�2HTML += \" U+FDDA is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDDB
+ eval("$(\"display\").inner�0�3HTML += \" U+FDDB is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDDC
+ eval("$(\"display\").inner�0�4HTML += \" U+FDDC is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDDD
+ eval("$(\"display\").inner�0�5HTML += \" U+FDDD is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDDE
+ eval("$(\"display\").inner�0�6HTML += \" U+FDDE is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDDF
+ eval("$(\"display\").inner�0�7HTML += \" U+FDDF is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE0
+ eval("$(\"display\").inner�0�8HTML += \" U+FDE0 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE1
+ eval("$(\"display\").inner�0�9HTML += \" U+FDE1 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE2
+ eval("$(\"display\").inner�0�0HTML += \" U+FDE2 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE3
+ eval("$(\"display\").inner�0�1HTML += \" U+FDE3 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE4
+ eval("$(\"display\").inner�0�2HTML += \" U+FDE4 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE5
+ eval("$(\"display\").inner�0�3HTML += \" U+FDE5 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE6
+ eval("$(\"display\").inner�0�4HTML += \" U+FDE6 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE7
+ eval("$(\"display\").inner�0�5HTML += \" U+FDE7 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE8
+ eval("$(\"display\").inner�0�6HTML += \" U+FDE8 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE9
+ eval("$(\"display\").inner�0�7HTML += \" U+FDE9 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDEA
+ eval("$(\"display\").inner�0�8HTML += \" U+FDEA is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDEB
+ eval("$(\"display\").inner�0�9HTML += \" U+FDEB is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDEC
+ eval("$(\"display\").inner�0�0HTML += \" U+FDEC is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDED
+ eval("$(\"display\").inner�0�1HTML += \" U+FDED is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDEE
+ eval("$(\"display\").inner�0�2HTML += \" U+FDEE is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDEF
+ eval("$(\"display\").inner�0�3HTML += \" U+FDEF is evil \"");
+ } catch(e) {}
+ try {
+ // U+FFFE
+ eval("$(\"display\").inner�1�8HTML += \" U+FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+FFFF
+ eval("$(\"display\").inner�1�9HTML += \" U+FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+1FFFE
+ eval("$(\"display\").inner�2�4HTML += \" U+1FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+1FFFF
+ eval("$(\"display\").inner�2�5HTML += \" U+1FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+2FFFE
+ eval("$(\"display\").inner�4�0HTML += \" U+2FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+2FFFF
+ eval("$(\"display\").inner�4�1HTML += \" U+2FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+3FFFE
+ eval("$(\"display\").inner�6�6HTML += \" U+3FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+3FFFF
+ eval("$(\"display\").inner�6�7HTML += \" U+3FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+4FFFE
+ eval("$(\"display\").inner�8�2HTML += \" U+4FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+4FFFF
+ eval("$(\"display\").inner�8�3HTML += \" U+4FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+5FFFE
+ eval("$(\"display\").inner�0�8HTML += \" U+5FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+5FFFF
+ eval("$(\"display\").inner�0�9HTML += \" U+5FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+6FFFE
+ eval("$(\"display\").inner�2�4HTML += \" U+6FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+6FFFF
+ eval("$(\"display\").inner�2�5HTML += \" U+6FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+7FFFE
+ eval("$(\"display\").inner�4�0HTML += \" U+7FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+7FFFF
+ eval("$(\"display\").inner�4�1HTML += \" U+7FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+8FFFE
+ eval("$(\"display\").inner�6�6HTML += \" U+8FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+8FFFF
+ eval("$(\"display\").inner�6�7HTML += \" U+8FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+9FFFE
+ eval("$(\"display\").inner�8�2HTML += \" U+9FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+9FFFF
+ eval("$(\"display\").inner�8�3HTML += \" U+9FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+AFFFE
+ eval("$(\"display\").inner�0�8HTML += \" U+AFFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+AFFFF
+ eval("$(\"display\").inner�0�9HTML += \" U+AFFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+BFFFE
+ eval("$(\"display\").inner�2�4HTML += \" U+BFFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+BFFFF
+ eval("$(\"display\").inner�2�5HTML += \" U+BFFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+CFFFE
+ eval("$(\"display\").inner�4�0HTML += \" U+CFFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+CFFFF
+ eval("$(\"display\").inner�4�1HTML += \" U+CFFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+DFFFE
+ eval("$(\"display\").inner�6�6HTML += \" U+DFFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+DFFFF
+ eval("$(\"display\").inner�6�7HTML += \" U+DFFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+EFFFE
+ eval("$(\"display\").inner�8�2HTML += \" U+EFFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+EFFFF
+ eval("$(\"display\").inner�8�3HTML += \" U+EFFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+FFFFE
+ eval("$(\"display\").inner�0�8HTML += \" U+FFFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+FFFFF
+ eval("$(\"display\").inner�0�9HTML += \" U+FFFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+10FFFE
+ eval("$(\"display\").inner�2�4HTML += \" U+10FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+10FFFF
+ eval("$(\"display\").inner�2�5HTML += \" U+10FFFF is evil \"");
+ } catch(e) {}
+ test();
+}
+
+ SimpleTest.waitForExplicitFinish();
+</script>
+</pre>
+<a target="_blank"
+ href="https://bugzilla.mozilla.org/show_bug.cgi?id=445886">Mozilla Bug
+445886</a>
+<p id="display">All good.</p>
+<div id="content" style="display: none;"></div>
+</body>
+</html>
diff --git a/intl/uconv/tests/test_unicode_noncharacters_utf8.html b/intl/uconv/tests/test_unicode_noncharacters_utf8.html
new file mode 100644
index 0000000000..ecfdbeae09
--- /dev/null
+++ b/intl/uconv/tests/test_unicode_noncharacters_utf8.html
@@ -0,0 +1,303 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=445886
+-->
+<head>
+ <meta http-equiv="Content-type" content="text/html; charset=UTF-8">
+ <title>Test for Unicode non-characters</title>
+ <script src="/tests/SimpleTest/SimpleTest.js"></script>
+ <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
+</head>
+<body onload="Inject()">
+<pre id="test">
+<script class="testbody" type="text/javascript">
+
+/* eslint-disable no-eval */
+
+/** Test that unicode non-characters are not discarded **/
+function test()
+{
+ is($("display").innerHTML, "All good.", "Noncharacters not stripped");
+ SimpleTest.finish();
+}
+
+// eslint-disable-next-line complexity
+function Inject()
+{
+ // script fragments containing Unicode non-characters
+ try {
+ // U+FDD0
+ eval("$(\"display\").inner﷐HTML += \" U+FDD0 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD1
+ eval("$(\"display\").inner﷑HTML += \" U+FDD1 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD2
+ eval("$(\"display\").inner﷒HTML += \" U+FDD2 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD3
+ eval("$(\"display\").inner﷓HTML += \" U+FDD3 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD4
+ eval("$(\"display\").inner﷔HTML += \" U+FDD4 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD5
+ eval("$(\"display\").inner﷕HTML += \" U+FDD5 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD6
+ eval("$(\"display\").inner﷖HTML += \" U+FDD6 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD7
+ eval("$(\"display\").inner﷗HTML += \" U+FDD7 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD8
+ eval("$(\"display\").inner﷘HTML += \" U+FDD8 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDD9
+ eval("$(\"display\").inner﷙HTML += \" U+FDD9 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDDA
+ eval("$(\"display\").inner﷚HTML += \" U+FDDA is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDDB
+ eval("$(\"display\").inner﷛HTML += \" U+FDDB is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDDC
+ eval("$(\"display\").inner﷜HTML += \" U+FDDC is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDDD
+ eval("$(\"display\").inner﷝HTML += \" U+FDDD is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDDE
+ eval("$(\"display\").inner﷞HTML += \" U+FDDE is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDDF
+ eval("$(\"display\").inner﷟HTML += \" U+FDDF is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE0
+ eval("$(\"display\").inner﷠HTML += \" U+FDE0 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE1
+ eval("$(\"display\").inner﷡HTML += \" U+FDE1 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE2
+ eval("$(\"display\").inner﷢HTML += \" U+FDE2 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE3
+ eval("$(\"display\").inner﷣HTML += \" U+FDE3 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE4
+ eval("$(\"display\").inner﷤HTML += \" U+FDE4 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE5
+ eval("$(\"display\").inner﷥HTML += \" U+FDE5 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE6
+ eval("$(\"display\").inner﷦HTML += \" U+FDE6 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE7
+ eval("$(\"display\").inner﷧HTML += \" U+FDE7 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE8
+ eval("$(\"display\").inner﷨HTML += \" U+FDE8 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDE9
+ eval("$(\"display\").inner﷩HTML += \" U+FDE9 is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDEA
+ eval("$(\"display\").inner﷪HTML += \" U+FDEA is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDEB
+ eval("$(\"display\").inner﷫HTML += \" U+FDEB is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDEC
+ eval("$(\"display\").inner﷬HTML += \" U+FDEC is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDED
+ eval("$(\"display\").inner﷭HTML += \" U+FDED is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDEE
+ eval("$(\"display\").inner﷮HTML += \" U+FDEE is evil \"");
+ } catch(e) {}
+ try {
+ // U+FDEF
+ eval("$(\"display\").inner﷯HTML += \" U+FDEF is evil \"");
+ } catch(e) {}
+ try {
+ // U+FFFE
+ eval("$(\"display\").inner￾HTML += \" U+FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+FFFF
+ eval("$(\"display\").inner￿HTML += \" U+FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+1FFFE
+ eval("$(\"display\").inner🿾HTML += \" U+1FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+1FFFF
+ eval("$(\"display\").inner🿿HTML += \" U+1FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+2FFFE
+ eval("$(\"display\").inner𯿾HTML += \" U+2FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+2FFFF
+ eval("$(\"display\").inner𯿿HTML += \" U+2FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+3FFFE
+ eval("$(\"display\").inner𿿾HTML += \" U+3FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+3FFFF
+ eval("$(\"display\").inner𿿿HTML += \" U+3FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+4FFFE
+ eval("$(\"display\").inner񏿾HTML += \" U+4FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+4FFFF
+ eval("$(\"display\").inner񏿿HTML += \" U+4FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+5FFFE
+ eval("$(\"display\").inner񟿾HTML += \" U+5FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+5FFFF
+ eval("$(\"display\").inner񟿿HTML += \" U+5FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+6FFFE
+ eval("$(\"display\").inner񯿾HTML += \" U+6FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+6FFFF
+ eval("$(\"display\").inner񯿿HTML += \" U+6FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+7FFFE
+ eval("$(\"display\").inner񿿾HTML += \" U+7FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+7FFFF
+ eval("$(\"display\").inner񿿿HTML += \" U+7FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+8FFFE
+ eval("$(\"display\").inner򏿾HTML += \" U+8FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+8FFFF
+ eval("$(\"display\").inner򏿿HTML += \" U+8FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+9FFFE
+ eval("$(\"display\").inner򟿾HTML += \" U+9FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+9FFFF
+ eval("$(\"display\").inner򟿿HTML += \" U+9FFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+AFFFE
+ eval("$(\"display\").inner򯿾HTML += \" U+AFFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+AFFFF
+ eval("$(\"display\").inner򯿿HTML += \" U+AFFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+BFFFE
+ eval("$(\"display\").inner򿿾HTML += \" U+BFFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+BFFFF
+ eval("$(\"display\").inner򿿿HTML += \" U+BFFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+CFFFE
+ eval("$(\"display\").inner󏿾HTML += \" U+CFFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+CFFFF
+ eval("$(\"display\").inner󏿿HTML += \" U+CFFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+DFFFE
+ eval("$(\"display\").inner󟿾HTML += \" U+DFFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+DFFFF
+ eval("$(\"display\").inner󟿿HTML += \" U+DFFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+EFFFE
+ eval("$(\"display\").inner󯿾HTML += \" U+EFFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+EFFFF
+ eval("$(\"display\").inner󯿿HTML += \" U+EFFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+FFFFE
+ eval("$(\"display\").inner󿿾HTML += \" U+FFFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+FFFFF
+ eval("$(\"display\").inner󿿿HTML += \" U+FFFFF is evil \"");
+ } catch(e) {}
+ try {
+ // U+10FFFE
+ eval("$(\"display\").inner􏿾HTML += \" U+10FFFE is evil \"");
+ } catch(e) {}
+ try {
+ // U+10FFFF
+ eval("$(\"display\").inner􏿿HTML += \" U+10FFFF is evil \"");
+ } catch(e) {}
+ test();
+}
+
+ SimpleTest.waitForExplicitFinish();
+</script>
+</pre>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=445886">Mozilla Bug 445886</a>
+<p id="display">All good.</p>
+<div id="content" style="display: none"></div>
+</body>
+</html>
diff --git a/intl/uconv/tests/test_utf8_overconsumption.html b/intl/uconv/tests/test_utf8_overconsumption.html
new file mode 100644
index 0000000000..25c4a273ea
--- /dev/null
+++ b/intl/uconv/tests/test_utf8_overconsumption.html
@@ -0,0 +1,39 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=445886
+-->
+<head>
+ <meta http-equiv="Content-type" content="text/html; charset=UTF-8">
+ <title>Test for Unicode non-characters</title>
+ <script src="/tests/SimpleTest/SimpleTest.js"></script>
+ <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
+ <script type="text/javascript">
+function Inject()
+{
+ $("display").innerHTML = "Evil";
+}
+ </script>
+</head>
+<body >onload="Inject()">
+<pre id="test">
+<script class="testbody" type="text/javascript">
+
+/** test that UTF-8 decoding resynchronizes after incomplete sequences */
+function test()
+{
+ is($("display").innerHTML, "All good.", "No overconsumption");
+ SimpleTest.finish();
+}
+
+ addLoadEvent(function() {
+ setTimeout(test, 0);
+ });
+ SimpleTest.waitForExplicitFinish();
+</script>
+</pre>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=445886">Mozilla Bug 445886</a>
+<p id="display">All good.</p>
+<div id="content" style="display: none"></div>
+</body>
+</html>