diff options
Diffstat (limited to 'intl/uconv')
32 files changed, 2636 insertions, 0 deletions
diff --git a/intl/uconv/components.conf b/intl/uconv/components.conf new file mode 100644 index 0000000000..00686f661a --- /dev/null +++ b/intl/uconv/components.conf @@ -0,0 +1,34 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + 'cid': '{2bc2ad62-ad5d-4b7b-a9db-f74ae203c527}', + 'contract_ids': ['@mozilla.org/intl/converter-input-stream;1'], + 'type': 'nsConverterInputStream', + 'headers': ['nsConverterInputStream.h'], + }, + { + 'cid': '{ff8780a5-bbb1-4bc5-8ee7-057e7bc5c925}', + 'contract_ids': ['@mozilla.org/intl/converter-output-stream;1'], + 'type': 'nsConverterOutputStream', + 'headers': ['/intl/uconv/nsConverterOutputStream.h'], + }, + { + 'cid': '{0a698c44-3bff-11d4-9649-00c0ca135b4e}', + 'contract_ids': ['@mozilla.org/intl/scriptableunicodeconverter'], + 'type': 'nsScriptableUnicodeConverter', + 'headers': ['/intl/uconv/nsScriptableUConv.h'], + }, + { + 'js_name': 'textToSubURI', + 'cid': '{8b042e22-6f87-11d3-b3c8-00805f8a6670}', + 'contract_ids': ['@mozilla.org/intl/texttosuburi;1'], + 'interfaces': ['nsITextToSubURI'], + 'type': 'nsTextToSubURI', + 'headers': ['/intl/uconv/nsTextToSubURI.h'], + }, +] diff --git a/intl/uconv/crashtests/563618.html b/intl/uconv/crashtests/563618.html new file mode 100644 index 0000000000..e36b664762 --- /dev/null +++ b/intl/uconv/crashtests/563618.html @@ -0,0 +1,12 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html> +<head> + <meta content="text/html; charset=euc-jp" + http-equiv="content-type"> + <title>Serbian Glyph Test</title> +</head> +<body> + + <p style="font-size: 20pt;"></p> +</body> +</html> diff --git a/intl/uconv/crashtests/crashtests.list b/intl/uconv/crashtests/crashtests.list new file mode 100644 index 0000000000..6c54a699c1 --- /dev/null +++ b/intl/uconv/crashtests/crashtests.list @@ -0,0 +1,2 @@ +load 563618.html +load omt-non-utf-8-jsurl.html diff --git a/intl/uconv/crashtests/omt-non-utf-8-jsurl.html b/intl/uconv/crashtests/omt-non-utf-8-jsurl.html new file mode 100644 index 0000000000..033e38a280 --- /dev/null +++ b/intl/uconv/crashtests/omt-non-utf-8-jsurl.html @@ -0,0 +1,14 @@ +<!DOCTYPE html> +<html class=reftest-wait> +<head> + <meta charset=iso-8859-1><!-- must be non-UTF-8 --> + <title>Test for off the main thread non-UTF-8 javascript: URL</title> +</head> +<body> +<script> +new Worker("javascript:foo").onerror = () => { + document.documentElement.className = ""; +}; +</script> +</body> +</html> diff --git a/intl/uconv/directory.txt b/intl/uconv/directory.txt new file mode 100644 index 0000000000..2b6be7af7f --- /dev/null +++ b/intl/uconv/directory.txt @@ -0,0 +1,32 @@ +Directory Structure : +================================ + +idl - public .idl files +public - public header file +src - source directory of charset converter manager and utilities, and + charset converters for ISO-8859-1, CP1252, MacRoman and UTF-8 +tests - tests program and application for charset converter +tests/unit - xpcshell tests +tools - tools to build the tables used by the converters +util - utility functions used by the converters + +The following directories contain different charset converters: + +ucvcn - Simplified Chinese charsets - GB2312, HZ, ISO-2022-CN, GBK, GB18030 +ucvja - Japanese charsets - Shift-JIS, ISO-2022-JP, EUC-JP +ucvko - Korean charsets - ISO-2022-KR, EUC-KR, Johab +ucvlatin - Latin charsets and others - ISO-8859-x, CP1250-1258 + CP866, 874, KOI8, + Mac charsets, TIS620, UTF16 +ucvtw - Traditional Chinese charsets Set 1 - Big5 +ucvtw2 - Traditional Chinese charsets Set 2 - EUC-TW + +Within the directories containing charset converters: + +*.ut - tables used to convert to Unicode from a charset +*.uf - tables used to convert to a charset from Unicode + +The following directories are obsolete and should not be used: + +ucvth +ucvvt diff --git a/intl/uconv/moz.build b/intl/uconv/moz.build new file mode 100644 index 0000000000..f21e4055f9 --- /dev/null +++ b/intl/uconv/moz.build @@ -0,0 +1,32 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +TEST_DIRS += ["tests"] + +XPIDL_SOURCES += [ + "nsIScriptableUConv.idl", + "nsITextToSubURI.idl", +] + +XPIDL_MODULE = "uconv" + +EXPORTS += [ + "nsConverterInputStream.h", + "nsTextToSubURI.h", +] + +UNIFIED_SOURCES += [ + "nsConverterInputStream.cpp", + "nsConverterOutputStream.cpp", + "nsScriptableUConv.cpp", + "nsTextToSubURI.cpp", +] + +XPCOM_MANIFESTS += [ + "components.conf", +] + +FINAL_LIBRARY = "xul" diff --git a/intl/uconv/nsConverterInputStream.cpp b/intl/uconv/nsConverterInputStream.cpp new file mode 100644 index 0000000000..e3efdbc146 --- /dev/null +++ b/intl/uconv/nsConverterInputStream.cpp @@ -0,0 +1,256 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsConverterInputStream.h" +#include "nsIInputStream.h" +#include "nsReadLine.h" +#include "nsStreamUtils.h" + +#include <algorithm> +#include <tuple> + +using namespace mozilla; + +#define CONVERTER_BUFFER_SIZE 8192 + +NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream, + nsIUnicharInputStream, nsIUnicharLineInputStream) + +NS_IMETHODIMP +nsConverterInputStream::Init(nsIInputStream* aStream, const char* aCharset, + int32_t aBufferSize, char16_t aReplacementChar) { + nsAutoCString label; + if (!aCharset) { + label.AssignLiteral("UTF-8"); + } else { + label = aCharset; + } + + auto encoding = Encoding::ForLabelNoReplacement(label); + if (!encoding) { + return NS_ERROR_UCONV_NOCONV; + } + // Previously, the implementation auto-switched only + // between the two UTF-16 variants and only when + // initialized with an endianness-unspecific label. + mConverter = encoding->NewDecoder(); + + size_t outputBufferSize; + if (aBufferSize <= 0) { + aBufferSize = CONVERTER_BUFFER_SIZE; + outputBufferSize = CONVERTER_BUFFER_SIZE; + } else { + // NetUtil.jsm assumes that if buffer size equals + // the input size, the whole stream will be processed + // as one readString. This is not true with encoding_rs, + // because encoding_rs might want to see space for a + // surrogate pair, so let's compute a larger output + // buffer length. + CheckedInt<size_t> needed = mConverter->MaxUTF16BufferLength(aBufferSize); + if (!needed.isValid()) { + return NS_ERROR_OUT_OF_MEMORY; + } + outputBufferSize = needed.value(); + } + + // set up our buffers. + if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) || + !mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + mInput = aStream; + mErrorsAreFatal = !aReplacementChar; + return NS_OK; +} + +NS_IMETHODIMP +nsConverterInputStream::Close() { + nsresult rv = mInput ? mInput->Close() : NS_OK; + mLineBuffer = nullptr; + mInput = nullptr; + mConverter = nullptr; + mByteData.Clear(); + mUnicharData.Clear(); + return rv; +} + +NS_IMETHODIMP +nsConverterInputStream::Read(char16_t* aBuf, uint32_t aCount, + uint32_t* aReadCount) { + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; + if (0 == readCount) { + // Fill the unichar buffer + readCount = Fill(&mLastErrorCode); + if (readCount == 0) { + *aReadCount = 0; + return mLastErrorCode; + } + } + if (readCount > aCount) { + readCount = aCount; + } + memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset, + readCount * sizeof(char16_t)); + mUnicharDataOffset += readCount; + *aReadCount = readCount; + return NS_OK; +} + +NS_IMETHODIMP +nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, + void* aClosure, uint32_t aCount, + uint32_t* aReadCount) { + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t codeUnitsToWrite = mUnicharDataLength - mUnicharDataOffset; + if (0 == codeUnitsToWrite) { + // Fill the unichar buffer + codeUnitsToWrite = Fill(&mLastErrorCode); + if (codeUnitsToWrite == 0) { + *aReadCount = 0; + return mLastErrorCode; + } + } + + if (codeUnitsToWrite > aCount) { + codeUnitsToWrite = aCount; + } + + uint32_t codeUnitsWritten; + uint32_t totalCodeUnitsWritten = 0; + + while (codeUnitsToWrite) { + nsresult rv = + aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset, + totalCodeUnitsWritten, codeUnitsToWrite, &codeUnitsWritten); + if (NS_FAILED(rv)) { + // don't propagate errors to the caller + break; + } + + codeUnitsToWrite -= codeUnitsWritten; + totalCodeUnitsWritten += codeUnitsWritten; + mUnicharDataOffset += codeUnitsWritten; + } + + *aReadCount = totalCodeUnitsWritten; + + return NS_OK; +} + +NS_IMETHODIMP +nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString, + uint32_t* aReadCount) { + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; + if (0 == readCount) { + // Fill the unichar buffer + readCount = Fill(&mLastErrorCode); + if (readCount == 0) { + *aReadCount = 0; + return mLastErrorCode; + } + } + if (readCount > aCount) { + readCount = aCount; + } + const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset; + aString.Assign(buf, readCount); + mUnicharDataOffset += readCount; + *aReadCount = readCount; + return NS_OK; +} + +uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) { + if (!mInput) { + // We already closed the stream! + *aErrorCode = NS_BASE_STREAM_CLOSED; + return 0; + } + + if (NS_FAILED(mLastErrorCode)) { + // We failed to completely convert last time, and error-recovery + // is disabled. We will fare no better this time, so... + *aErrorCode = mLastErrorCode; + return 0; + } + + // mUnicharData.Length() is the buffer length, not the fill status. + // mUnicharDataLength reflects the current fill status. + mUnicharDataLength = 0; + // Whenever we convert, mUnicharData is logically empty. + mUnicharDataOffset = 0; + + // Continue trying to read from the source stream until we successfully decode + // a character or encounter an error, as returning `0` here implies that the + // stream is complete. + // + // If the converter has been cleared, we've fully consumed the stream, and + // want to report EOF. + while (mUnicharDataLength == 0 && mConverter) { + // We assume a many to one conversion and are using equal sizes for + // the two buffers. However if an error happens at the very start + // of a byte buffer we may end up in a situation where n bytes lead + // to n+1 unicode chars. Thus we need to keep track of the leftover + // bytes as we convert. + + uint32_t nb; + *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb); + if (NS_FAILED(*aErrorCode)) { + return 0; + } + + NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(), + "mByteData is lying to us somewhere"); + + // If `NS_FillArray` failed to read any new bytes, this is the last read, + // and we're at the end of the stream. + bool last = (nb == 0); + + // Now convert as much of the byte buffer to unicode as possible + auto src = AsBytes(Span(mByteData)); + auto dst = Span(mUnicharData); + + // Truncation from size_t to uint32_t below is OK, because the sizes + // are bounded by the lengths of mByteData and mUnicharData. + uint32_t result; + size_t read; + size_t written; + if (mErrorsAreFatal) { + std::tie(result, read, written) = + mConverter->DecodeToUTF16WithoutReplacement(src, dst, last); + } else { + std::tie(result, read, written, std::ignore) = + mConverter->DecodeToUTF16(src, dst, last); + } + mLeftOverBytes = mByteData.Length() - read; + mUnicharDataLength = written; + // Clear `mConverter` if we reached the end of the stream, as we can't + // call methods on it anymore. This will also signal EOF to the caller + // through the loop condition. + if (last) { + MOZ_ASSERT(mLeftOverBytes == 0, + "Failed to read all bytes on the last pass?"); + mConverter = nullptr; + } + // If we got a decode error, we're done. + if (result != kInputEmpty && result != kOutputFull) { + MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?"); + *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT; + return 0; + } + } + *aErrorCode = NS_OK; + return mUnicharDataLength; +} + +NS_IMETHODIMP +nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) { + if (!mLineBuffer) { + mLineBuffer = MakeUnique<nsLineBuffer<char16_t>>(); + } + return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult); +} diff --git a/intl/uconv/nsConverterInputStream.h b/intl/uconv/nsConverterInputStream.h new file mode 100644 index 0000000000..55555fc679 --- /dev/null +++ b/intl/uconv/nsConverterInputStream.h @@ -0,0 +1,64 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsConverterInputStream_h +#define nsConverterInputStream_h + +#include "nsIInputStream.h" +#include "nsIConverterInputStream.h" +#include "nsIUnicharLineInputStream.h" +#include "nsTArray.h" +#include "nsCOMPtr.h" +#include "nsReadLine.h" +#include "mozilla/Encoding.h" +#include "mozilla/UniquePtr.h" + +#define NS_CONVERTERINPUTSTREAM_CONTRACTID \ + "@mozilla.org/intl/converter-input-stream;1" + +// {2BC2AD62-AD5D-4b7b-A9DB-F74AE203C527} +#define NS_CONVERTERINPUTSTREAM_CID \ + { \ + 0x2bc2ad62, 0xad5d, 0x4b7b, { \ + 0xa9, 0xdb, 0xf7, 0x4a, 0xe2, 0x3, 0xc5, 0x27 \ + } \ + } + +class nsConverterInputStream : public nsIConverterInputStream, + public nsIUnicharLineInputStream { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSIUNICHARINPUTSTREAM + NS_DECL_NSIUNICHARLINEINPUTSTREAM + NS_DECL_NSICONVERTERINPUTSTREAM + + nsConverterInputStream() + : mLastErrorCode(NS_OK), + mLeftOverBytes(0), + mUnicharDataOffset(0), + mUnicharDataLength(0), + mErrorsAreFatal(false), + mLineBuffer(nullptr) {} + + private: + virtual ~nsConverterInputStream() { Close(); } + + uint32_t Fill(nsresult* aErrorCode); + + mozilla::UniquePtr<mozilla::Decoder> mConverter; + FallibleTArray<char> mByteData; + FallibleTArray<char16_t> mUnicharData; + nsCOMPtr<nsIInputStream> mInput; + + nsresult mLastErrorCode; + uint32_t mLeftOverBytes; + uint32_t mUnicharDataOffset; + uint32_t mUnicharDataLength; + bool mErrorsAreFatal; + + mozilla::UniquePtr<nsLineBuffer<char16_t> > mLineBuffer; +}; + +#endif diff --git a/intl/uconv/nsConverterOutputStream.cpp b/intl/uconv/nsConverterOutputStream.cpp new file mode 100644 index 0000000000..a24adb0377 --- /dev/null +++ b/intl/uconv/nsConverterOutputStream.cpp @@ -0,0 +1,115 @@ +/* vim:set expandtab ts=4 sw=2 sts=2 cin: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsCOMPtr.h" +#include "nsIOutputStream.h" +#include "nsString.h" +#include "nsConverterOutputStream.h" +#include "mozilla/Encoding.h" + +using namespace mozilla; + +NS_IMPL_ISUPPORTS(nsConverterOutputStream, nsIUnicharOutputStream, + nsIConverterOutputStream) + +nsConverterOutputStream::~nsConverterOutputStream() { Close(); } + +NS_IMETHODIMP +nsConverterOutputStream::Init(nsIOutputStream* aOutStream, + const char* aCharset) { + MOZ_ASSERT(aOutStream, "Null output stream!"); + + const Encoding* encoding; + if (!aCharset) { + encoding = UTF_8_ENCODING; + } else { + encoding = Encoding::ForLabelNoReplacement(MakeStringSpan(aCharset)); + if (!encoding || encoding == UTF_16LE_ENCODING || + encoding == UTF_16BE_ENCODING) { + return NS_ERROR_UCONV_NOCONV; + } + } + + mConverter = encoding->NewEncoder(); + + mOutStream = aOutStream; + + return NS_OK; +} + +NS_IMETHODIMP +nsConverterOutputStream::Write(uint32_t aCount, const char16_t* aChars, + bool* aSuccess) { + if (!mOutStream) { + NS_ASSERTION(!mConverter, "Closed streams shouldn't have converters"); + return NS_BASE_STREAM_CLOSED; + } + MOZ_ASSERT(mConverter, "Must have a converter when not closed"); + uint8_t buffer[4096]; + auto dst = Span(buffer); + auto src = Span(aChars, aCount); + for (;;) { + uint32_t result; + size_t read; + size_t written; + std::tie(result, read, written, std::ignore) = + mConverter->EncodeFromUTF16(src, dst, false); + src = src.From(read); + uint32_t streamWritten; + nsresult rv = mOutStream->Write(reinterpret_cast<char*>(dst.Elements()), + written, &streamWritten); + *aSuccess = NS_SUCCEEDED(rv) && written == streamWritten; + if (!(*aSuccess)) { + return rv; + } + if (result == kInputEmpty) { + return NS_OK; + } + } +} + +NS_IMETHODIMP +nsConverterOutputStream::WriteString(const nsAString& aString, bool* aSuccess) { + int32_t inLen = aString.Length(); + nsAString::const_iterator i; + aString.BeginReading(i); + return Write(inLen, i.get(), aSuccess); +} + +NS_IMETHODIMP +nsConverterOutputStream::Flush() { + if (!mOutStream) return NS_OK; // Already closed. + + // If we are encoding to ISO-2022-JP, potentially + // transition back to the ASCII state. The buffer + // needs to be large enough for an additional NCR, + // though. + uint8_t buffer[12]; + auto dst = Span(buffer); + Span<char16_t> src(nullptr); + uint32_t result; + size_t written; + std::tie(result, std::ignore, written, std::ignore) = + mConverter->EncodeFromUTF16(src, dst, true); + MOZ_ASSERT(result == kInputEmpty); + uint32_t streamWritten; + if (!written) { + return NS_OK; + } + return mOutStream->Write(reinterpret_cast<char*>(dst.Elements()), written, + &streamWritten); +} + +NS_IMETHODIMP +nsConverterOutputStream::Close() { + if (!mOutStream) return NS_OK; // Already closed. + + nsresult rv1 = Flush(); + + nsresult rv2 = mOutStream->Close(); + mOutStream = nullptr; + mConverter = nullptr; + return NS_FAILED(rv1) ? rv1 : rv2; +} diff --git a/intl/uconv/nsConverterOutputStream.h b/intl/uconv/nsConverterOutputStream.h new file mode 100644 index 0000000000..74b873acd5 --- /dev/null +++ b/intl/uconv/nsConverterOutputStream.h @@ -0,0 +1,39 @@ +/* vim:set expandtab ts=4 sw=2 sts=2 cin: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef NSCONVERTEROUTPUTSTREAM_H_ +#define NSCONVERTEROUTPUTSTREAM_H_ + +#include "nsIConverterOutputStream.h" +#include "nsCOMPtr.h" +#include "mozilla/Attributes.h" +#include "mozilla/Encoding.h" + +class nsIOutputStream; + +/* ff8780a5-bbb1-4bc5-8ee7-057e7bc5c925 */ +#define NS_CONVERTEROUTPUTSTREAM_CID \ + { \ + 0xff8780a5, 0xbbb1, 0x4bc5, { \ + 0x8e, 0xe7, 0x05, 0x7e, 0x7b, 0xc5, 0xc9, 0x25 \ + } \ + } + +class nsConverterOutputStream final : public nsIConverterOutputStream { + public: + nsConverterOutputStream() = default; + + NS_DECL_ISUPPORTS + NS_DECL_NSIUNICHAROUTPUTSTREAM + NS_DECL_NSICONVERTEROUTPUTSTREAM + + private: + ~nsConverterOutputStream(); + + mozilla::UniquePtr<mozilla::Encoder> mConverter; + nsCOMPtr<nsIOutputStream> mOutStream; +}; + +#endif diff --git a/intl/uconv/nsIScriptableUConv.idl b/intl/uconv/nsIScriptableUConv.idl new file mode 100644 index 0000000000..f4557dce8b --- /dev/null +++ b/intl/uconv/nsIScriptableUConv.idl @@ -0,0 +1,79 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +interface nsIInputStream; + +%{C++ +// {0A698C44-3BFF-11d4-9649-00C0CA135B4E} +#define NS_ISCRIPTABLEUNICODECONVERTER_CID { 0x0A698C44, 0x3BFF, 0x11d4, { 0x96, 0x49, 0x00, 0xC0, 0xCA, 0x13, 0x5B, 0x4E } } +#define NS_ISCRIPTABLEUNICODECONVERTER_CONTRACTID "@mozilla.org/intl/scriptableunicodeconverter" +%} + +/** + * In new code, please use the WebIDL TextDecoder and TextEncoder + * instead. They represent bytes as Uint8Array (or as view to such + * array), which is the current best practice for representing bytes + * in JavaScript. + * + * This interface converts between UTF-16 in JavaScript strings + * and bytes transported as the unsigned value of each byte + * transported in a code unit of the same numeric value in + * a JavaScript string. + * + * @created 8/Jun/2000 + * @author Makoto Kato [m_kato@ga2.so-net.ne.jp] + */ +[scriptable, uuid(f36ee324-5c1c-437f-ba10-2b4db7a18031)] +interface nsIScriptableUnicodeConverter : nsISupports +{ + /** + * Converts the data from Unicode to one Charset. + * Returns the converted string. After converting, Finish should be called + * and its return value appended to this return value. + */ + ACString ConvertFromUnicode(in AString aSrc); + + /** + * Returns the terminator string. + * Should be called after ConvertFromUnicode() and appended to that + * function's return value. + */ + ACString Finish(); + + /** + * Converts the data from one Charset to Unicode. + */ + AString ConvertToUnicode(in ACString aSrc); + + /** + * Convert a unicode string to an array of bytes. Finish does not need to be + * called. + */ + void convertToByteArray(in AString aString, + [optional] out unsigned long aLen, + [array, size_is(aLen),retval] out octet aData); + + /** + * Converts a unicode string to an input stream. The bytes in the stream are + * encoded according to the charset attribute. + * The returned stream will be nonblocking. + */ + nsIInputStream convertToInputStream(in AString aString); + + /** + * Current character set. + * + * @throw NS_ERROR_UCONV_NOCONV + * The requested charset is not supported. + */ + attribute ACString charset; + + /** + * Meaningless + */ + attribute boolean isInternal; +}; diff --git a/intl/uconv/nsITextToSubURI.idl b/intl/uconv/nsITextToSubURI.idl new file mode 100644 index 0000000000..3bb404e414 --- /dev/null +++ b/intl/uconv/nsITextToSubURI.idl @@ -0,0 +1,60 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + + +%{C++ +// {8B042E22-6F87-11d3-B3C8-00805F8A6670} +#define NS_TEXTTOSUBURI_CID { 0x8b042e22, 0x6f87, 0x11d3, { 0xb3, 0xc8, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } } +#define NS_ITEXTTOSUBURI_CONTRACTID "@mozilla.org/intl/texttosuburi;1" +%} + +[scriptable, uuid(8B042E24-6F87-11d3-B3C8-00805F8A6670)] +interface nsITextToSubURI : nsISupports +{ + ACString ConvertAndEscape(in ACString charset, in AString text); + AString UnEscapeAndConvert(in ACString charset, in ACString text); + + /** + * Unescapes the given URI fragment (for UI purpose only) + * Note: + * <ul> + * <li> escaping back the result (unescaped string) is not guaranteed to + * give the original escaped string + * <li> The URI fragment (escaped) is assumed to be in UTF-8 and converted + * to AString (UTF-16) + * <li> In case of successful conversion any resulting character listed + * in netwerk/dns/IDNCharacterBlocklist.inc (except space) is escaped + * <li> Always succeeeds (callers don't need to do error checking) + * </ul> + * + * @param aURIFragment the URI (or URI fragment) to unescape + * @param aDontEscape whether to escape IDN blocklisted characters + * @return Unescaped aURIFragment converted to unicode + */ + AString unEscapeURIForUI(in AUTF8String aURIFragment, + [optional] in boolean aDontEscape); +%{C++ + nsresult UnEscapeURIForUI(const nsACString& aURIFragment, + nsAString& _retval) { + return UnEscapeURIForUI(aURIFragment, false, _retval); + } +%} + + /** + * Unescapes only non ASCII characters in the given URI fragment + * note: this method assumes the URI as UTF-8 and fallbacks to the given + * charset if the charset is an ASCII superset + * + * @param aCharset the charset to convert from + * @param aURIFragment the URI (or URI fragment) to unescape + * @return Unescaped aURIFragment converted to unicode + * @throws NS_ERROR_UCONV_NOCONV when there is no decoder for aCharset + * or NS_ERROR_UDEC_ILLEGALINPUT in case of conversion failure + */ + [binaryname(UnEscapeNonAsciiURIJS)] + AString unEscapeNonAsciiURI(in ACString aCharset, in AUTF8String aURIFragment); +}; diff --git a/intl/uconv/nsScriptableUConv.cpp b/intl/uconv/nsScriptableUConv.cpp new file mode 100644 index 0000000000..8a9638f2ce --- /dev/null +++ b/intl/uconv/nsScriptableUConv.cpp @@ -0,0 +1,256 @@ + +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" +#include "nsIScriptableUConv.h" +#include "nsScriptableUConv.h" +#include "nsIStringStream.h" +#include "nsComponentManagerUtils.h" + +#include <tuple> + +using namespace mozilla; + +/* Implementation file */ +NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter) + +nsScriptableUnicodeConverter::nsScriptableUnicodeConverter() + : mIsInternal(false) {} + +nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default; + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc, + nsACString& _retval) { + if (!mEncoder) return NS_ERROR_FAILURE; + + // We can compute the length without replacement, because the + // the replacement is only one byte long and a mappable character + // would always output something, i.e. at least one byte. + // When encoding to ISO-2022-JP, unmappables shouldn't be able + // to cause more escape sequences to be emitted than the mappable + // worst case where every input character causes an escape into + // a different state. + CheckedInt<size_t> needed = + mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length()); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto dstChars = _retval.GetMutableData(needed.value(), fallible); + if (!dstChars) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto src = Span(aSrc); + auto dst = AsWritableBytes(*dstChars); + size_t totalWritten = 0; + for (;;) { + auto [result, read, written] = + mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false); + if (result != kInputEmpty && result != kOutputFull) { + MOZ_RELEASE_ASSERT(written < dst.Length(), + "Unmappables with one-byte replacement should not " + "exceed mappable worst case."); + dst[written++] = '?'; + } + totalWritten += written; + if (result == kInputEmpty) { + MOZ_ASSERT(totalWritten <= UINT32_MAX); + if (!_retval.SetLength(totalWritten, fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; + } + src = src.From(read); + dst = dst.From(written); + } +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::Finish(nsACString& _retval) { + // The documentation for this method says it should be called after + // ConvertFromUnicode(). However, our own tests called it after + // convertFromByteArray(), i.e. when *decoding*. + // Assuming that there exists extensions that similarly call + // this at the wrong time, let's deal. In general, it is a design + // error for this class to handle conversions in both directions. + if (!mEncoder) { + _retval.Truncate(); + mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); + return NS_OK; + } + // If we are encoding to ISO-2022-JP, potentially + // transition back to the ASCII state. The buffer + // needs to be large enough for an additional NCR, + // though. + _retval.SetLength(13); + auto dst = AsWritableBytes(_retval.GetMutableData(13)); + Span<char16_t> src(nullptr); + uint32_t result; + size_t read; + size_t written; + std::tie(result, read, written, std::ignore) = + mEncoder->EncodeFromUTF16(src, dst, true); + MOZ_ASSERT(!read); + MOZ_ASSERT(result == kInputEmpty); + _retval.SetLength(written); + + mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); + mEncoder->Encoding()->NewEncoderInto(*mEncoder); + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, + nsAString& _retval) { + if (!mDecoder) return NS_ERROR_FAILURE; + + uint32_t length = aSrc.Length(); + + CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto dst = _retval.GetMutableData(needed.value(), fallible); + if (!dst) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto src = + Span(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length); + uint32_t result; + size_t read; + size_t written; + // The UTF-8 decoder used to throw regardless of the error behavior. + // Simulating the old behavior for compatibility with legacy callers. + // If callers want control over the behavior, they should switch to + // TextDecoder. + if (mDecoder->Encoding() == UTF_8_ENCODING) { + std::tie(result, read, written) = + mDecoder->DecodeToUTF16WithoutReplacement(src, *dst, false); + if (result != kInputEmpty) { + return NS_ERROR_UDEC_ILLEGALINPUT; + } + } else { + std::tie(result, read, written, std::ignore) = + mDecoder->DecodeToUTF16(src, *dst, false); + } + MOZ_ASSERT(result == kInputEmpty); + MOZ_ASSERT(read == length); + MOZ_ASSERT(written <= needed.value()); + if (!_retval.SetLength(written, fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString, + uint32_t* aLen, + uint8_t** _aData) { + if (!mEncoder) return NS_ERROR_FAILURE; + + CheckedInt<size_t> needed = + mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aString.Length()); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + uint8_t* data = (uint8_t*)malloc(needed.value()); + if (!data) { + return NS_ERROR_OUT_OF_MEMORY; + } + auto src = Span(aString); + auto dst = Span(data, needed.value()); + size_t totalWritten = 0; + for (;;) { + auto [result, read, written] = + mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true); + if (result != kInputEmpty && result != kOutputFull) { + // There's always room for one byte in the case of + // an unmappable character, because otherwise + // we'd have gotten `kOutputFull`. + dst[written++] = '?'; + } + totalWritten += written; + if (result == kInputEmpty) { + *_aData = data; + MOZ_ASSERT(totalWritten <= UINT32_MAX); + *aLen = totalWritten; + return NS_OK; + } + src = src.From(read); + dst = dst.From(written); + } +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString, + nsIInputStream** _retval) { + nsresult rv; + nsCOMPtr<nsIStringInputStream> inputStream = + do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv); + if (NS_FAILED(rv)) return rv; + + uint8_t* data; + uint32_t dataLen; + rv = ConvertToByteArray(aString, &dataLen, &data); + if (NS_FAILED(rv)) return rv; + + rv = inputStream->AdoptData(reinterpret_cast<char*>(data), dataLen); + if (NS_FAILED(rv)) { + free(data); + return rv; + } + + NS_ADDREF(*_retval = inputStream); + return rv; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) { + if (!mDecoder) { + aCharset.Truncate(); + } else { + mDecoder->Encoding()->Name(aCharset); + } + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) { + return InitConverter(aCharset); +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) { + *aIsInternal = mIsInternal; + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) { + mIsInternal = aIsInternal; + return NS_OK; +} + +nsresult nsScriptableUnicodeConverter::InitConverter( + const nsACString& aCharset) { + mEncoder = nullptr; + mDecoder = nullptr; + + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + return NS_ERROR_UCONV_NOCONV; + } + if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) { + mEncoder = encoding->NewEncoder(); + } + mDecoder = encoding->NewDecoderWithBOMRemoval(); + return NS_OK; +} diff --git a/intl/uconv/nsScriptableUConv.h b/intl/uconv/nsScriptableUConv.h new file mode 100644 index 0000000000..059a4b430c --- /dev/null +++ b/intl/uconv/nsScriptableUConv.h @@ -0,0 +1,34 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __nsScriptableUConv_h_ +#define __nsScriptableUConv_h_ + +#include "nsIScriptableUConv.h" +#include "nsCOMPtr.h" +#include "mozilla/Encoding.h" + +class nsScriptableUnicodeConverter : public nsIScriptableUnicodeConverter { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSISCRIPTABLEUNICODECONVERTER + + nsScriptableUnicodeConverter(); + + protected: + virtual ~nsScriptableUnicodeConverter(); + + mozilla::UniquePtr<mozilla::Encoder> mEncoder; + mozilla::UniquePtr<mozilla::Decoder> mDecoder; + bool mIsInternal; + + nsresult FinishWithLength(char** _retval, int32_t* aLength); + nsresult ConvertFromUnicodeWithLength(const nsAString& aSrc, int32_t* aOutLen, + char** _retval); + + nsresult InitConverter(const nsACString& aCharset); +}; + +#endif diff --git a/intl/uconv/nsTextToSubURI.cpp b/intl/uconv/nsTextToSubURI.cpp new file mode 100644 index 0000000000..e70d9ccbd8 --- /dev/null +++ b/intl/uconv/nsTextToSubURI.cpp @@ -0,0 +1,178 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "nsString.h" +#include "nsITextToSubURI.h" +#include "nsEscape.h" +#include "nsTextToSubURI.h" +#include "nsCRT.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/Encoding.h" +#include "mozilla/Preferences.h" +#include "mozilla/TextUtils.h" +#include "mozilla/Utf8.h" + +using namespace mozilla; + +nsTextToSubURI::~nsTextToSubURI() = default; + +NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI) + +NS_IMETHODIMP +nsTextToSubURI::ConvertAndEscape(const nsACString& aCharset, + const nsAString& aText, nsACString& aOut) { + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + aOut.Truncate(); + return NS_ERROR_UCONV_NOCONV; + } + nsresult rv; + nsAutoCString intermediate; + std::tie(rv, std::ignore) = encoding->Encode(aText, intermediate); + if (NS_FAILED(rv)) { + aOut.Truncate(); + return rv; + } + bool ok = NS_Escape(intermediate, aOut, url_XPAlphas); + if (!ok) { + aOut.Truncate(); + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; +} + +NS_IMETHODIMP +nsTextToSubURI::UnEscapeAndConvert(const nsACString& aCharset, + const nsACString& aText, nsAString& aOut) { + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + aOut.Truncate(); + return NS_ERROR_UCONV_NOCONV; + } + nsAutoCString unescaped(aText); + NS_UnescapeURL(unescaped); + auto rv = encoding->DecodeWithoutBOMHandling(unescaped, aOut); + if (NS_SUCCEEDED(rv)) { + return NS_OK; + } + return rv; +} + +static bool statefulCharset(const char* charset) { + // HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in + // mozilla-central but keeping them here just in case for the benefit of + // comm-central. + if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-") - 1) || + !nsCRT::strcasecmp(charset, "UTF-7") || + !nsCRT::strcasecmp(charset, "HZ-GB-2312")) + return true; + + return false; +} + +// static +nsresult nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset, + const nsCString& aURI, + nsAString& aOut) { + // check for 7bit encoding the data may not be ASCII after we decode + bool isStatefulCharset = statefulCharset(aCharset.get()); + + if (!isStatefulCharset) { + if (IsAscii(aURI)) { + CopyASCIItoUTF16(aURI, aOut); + return NS_OK; + } + if (IsUtf8(aURI)) { + CopyUTF8toUTF16(aURI, aOut); + return NS_OK; + } + } + + // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8. + NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG); + + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + aOut.Truncate(); + return NS_ERROR_UCONV_NOCONV; + } + return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI, aOut); +} + +NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString& aURIFragment, + bool aDontEscape, + nsAString& _retval) { + nsAutoCString unescapedSpec; + // skip control octets (0x00 - 0x1f and 0x7f) when unescaping + NS_UnescapeURL(PromiseFlatCString(aURIFragment), + esc_SkipControl | esc_AlwaysCopy, unescapedSpec); + + // in case of failure, return escaped URI + // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte + // sequences are also considered failure in this context + if (convertURItoUnicode("UTF-8"_ns, unescapedSpec, _retval) != NS_OK) { + // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8 + CopyUTF8toUTF16(aURIFragment, _retval); + } + + if (aDontEscape) { + return NS_OK; + } + + // If there are any characters that are unsafe for URIs, reescape those. + if (mIDNBlocklist.IsEmpty()) { + mozilla::net::InitializeBlocklist(mIDNBlocklist); + // we allow SPACE and IDEOGRAPHIC SPACE in this method + mozilla::net::RemoveCharFromBlocklist(u' ', mIDNBlocklist); + mozilla::net::RemoveCharFromBlocklist(0x3000, mIDNBlocklist); + } + + MOZ_ASSERT(!mIDNBlocklist.IsEmpty()); + const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval); + nsString reescapedSpec; + _retval = NS_EscapeURL( + unescapedResult, + [&](char16_t aChar) -> bool { + return mozilla::net::CharInBlocklist(aChar, mIDNBlocklist); + }, + reescapedSpec); + + return NS_OK; +} + +NS_IMETHODIMP +nsTextToSubURI::UnEscapeNonAsciiURIJS(const nsACString& aCharset, + const nsACString& aURIFragment, + nsAString& _retval) { + return UnEscapeNonAsciiURI(aCharset, aURIFragment, _retval); +} + +// static +nsresult nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset, + const nsACString& aURIFragment, + nsAString& _retval) { + nsAutoCString unescapedSpec; + NS_UnescapeURL(PromiseFlatCString(aURIFragment), + esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec); + // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII + // superset since converting "http:" with such an encoding is always a bad + // idea. + if (!IsUtf8(unescapedSpec) && + (aCharset.LowerCaseEqualsLiteral("utf-16") || + aCharset.LowerCaseEqualsLiteral("utf-16be") || + aCharset.LowerCaseEqualsLiteral("utf-16le") || + aCharset.LowerCaseEqualsLiteral("utf-7") || + aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))) { + CopyASCIItoUTF16(aURIFragment, _retval); + return NS_OK; + } + + nsresult rv = + convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, _retval); + // NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error + // if the string ends with a valid (but incomplete) sequence. + return rv == NS_OK_UDEC_MOREINPUT ? NS_ERROR_UDEC_ILLEGALINPUT : rv; +} + +//---------------------------------------------------------------------- diff --git a/intl/uconv/nsTextToSubURI.h b/intl/uconv/nsTextToSubURI.h new file mode 100644 index 0000000000..1eaeb554dc --- /dev/null +++ b/intl/uconv/nsTextToSubURI.h @@ -0,0 +1,36 @@ +// -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- +// vim: set ts=2 et sw=2 tw=80: +// This Source Code is subject to the terms of the Mozilla Public License +// version 2.0 (the "License"). You can obtain a copy of the License at +// http://mozilla.org/MPL/2.0/. +#ifndef nsTextToSubURI_h__ +#define nsTextToSubURI_h__ + +#include "nsITextToSubURI.h" +#include "nsString.h" +#include "nsTArray.h" +#include "mozilla/net/IDNBlocklistUtils.h" + +class nsTextToSubURI : public nsITextToSubURI { + NS_DECL_ISUPPORTS + NS_DECL_NSITEXTTOSUBURI + + // Thread-safe function for C++ callers + static nsresult UnEscapeNonAsciiURI(const nsACString& aCharset, + const nsACString& aURIFragment, + nsAString& _retval); + + private: + virtual ~nsTextToSubURI(); + + // We assume that the URI is encoded as UTF-8. + static nsresult convertURItoUnicode(const nsCString& aCharset, + const nsCString& aURI, + nsAString& _retval); + + // Characters defined in netwerk/dns/IDNCharacterBlocklist.inc or via the + // network.IDN.extra_allowed_chars and network.IDN.extra_blocked_chars prefs. + nsTArray<mozilla::net::BlocklistRange> mIDNBlocklist; +}; + +#endif // nsTextToSubURI_h__ diff --git a/intl/uconv/tests/gtest/TestShortRead.cpp b/intl/uconv/tests/gtest/TestShortRead.cpp new file mode 100644 index 0000000000..393f5e0027 --- /dev/null +++ b/intl/uconv/tests/gtest/TestShortRead.cpp @@ -0,0 +1,109 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" +#include "mozilla/ErrorNames.h" +#include "nsCOMPtr.h" +#include "nsConverterInputStream.h" +#include "nsIInputStream.h" +#include "nsISupports.h" +#include "nsStringStream.h" + +namespace { + +class ShortReadWrapper final : public nsIInputStream { + public: + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSIINPUTSTREAM + + template <size_t N> + ShortReadWrapper(const uint32_t (&aShortReads)[N], + nsIInputStream* aBaseStream) + : mShortReadIter(std::begin(aShortReads)), + mShortReadEnd(std::end(aShortReads)), + mBaseStream(aBaseStream) {} + + ShortReadWrapper(const ShortReadWrapper&) = delete; + ShortReadWrapper& operator=(const ShortReadWrapper&) = delete; + + private: + ~ShortReadWrapper() = default; + + const uint32_t* mShortReadIter; + const uint32_t* mShortReadEnd; + nsCOMPtr<nsIInputStream> mBaseStream; +}; + +NS_IMPL_ISUPPORTS(ShortReadWrapper, nsIInputStream) + +NS_IMETHODIMP +ShortReadWrapper::Close() { return mBaseStream->Close(); } + +NS_IMETHODIMP +ShortReadWrapper::Available(uint64_t* aAvailable) { + nsresult rv = mBaseStream->Available(aAvailable); + NS_ENSURE_SUCCESS(rv, rv); + + if (mShortReadIter != mShortReadEnd) { + *aAvailable = std::min(uint64_t(*mShortReadIter), *aAvailable); + } + return NS_OK; +} + +NS_IMETHODIMP +ShortReadWrapper::StreamStatus() { return mBaseStream->StreamStatus(); } + +NS_IMETHODIMP +ShortReadWrapper::Read(char* aBuf, uint32_t aCount, uint32_t* _retval) { + if (mShortReadIter != mShortReadEnd) { + aCount = std::min(*mShortReadIter, aCount); + } + + nsresult rv = mBaseStream->Read(aBuf, aCount, _retval); + if (NS_SUCCEEDED(rv) && mShortReadIter != mShortReadEnd) { + ++mShortReadIter; + } + return rv; +} + +NS_IMETHODIMP +ShortReadWrapper::ReadSegments(nsWriteSegmentFun aWriter, void* aClosure, + uint32_t aCount, uint32_t* _retval) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +ShortReadWrapper::IsNonBlocking(bool* _retval) { + return mBaseStream->IsNonBlocking(_retval); +} + +} // namespace + +TEST(ConverterStreamShortRead, ShortRead) +{ + uint8_t bytes[] = {0xd8, 0x35, 0xdc, 0x20}; + nsCOMPtr<nsIInputStream> baseStream; + ASSERT_TRUE(NS_SUCCEEDED(NS_NewByteInputStream(getter_AddRefs(baseStream), + AsChars(mozilla::Span(bytes)), + NS_ASSIGNMENT_COPY))); + + static const uint32_t kShortReads[] = {1, 2, 1}; + nsCOMPtr<nsIInputStream> shortStream = + new ShortReadWrapper(kShortReads, baseStream); + + RefPtr<nsConverterInputStream> unicharStream = new nsConverterInputStream(); + ASSERT_TRUE(NS_SUCCEEDED( + unicharStream->Init(shortStream, "UTF-16BE", 4096, + nsIConverterInputStream::ERRORS_ARE_FATAL))); + + uint32_t read; + nsAutoString result; + ASSERT_TRUE( + NS_SUCCEEDED(unicharStream->ReadString(UINT32_MAX, result, &read))); + + ASSERT_EQ(read, 2u); + ASSERT_TRUE(result == u"\U0001d420"); +} diff --git a/intl/uconv/tests/gtest/moz.build b/intl/uconv/tests/gtest/moz.build new file mode 100644 index 0000000000..969fb52c7e --- /dev/null +++ b/intl/uconv/tests/gtest/moz.build @@ -0,0 +1,11 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += [ + "TestShortRead.cpp", +] + +FINAL_LIBRARY = "xul-gtest" diff --git a/intl/uconv/tests/mochitest.ini b/intl/uconv/tests/mochitest.ini new file mode 100644 index 0000000000..0f13e77971 --- /dev/null +++ b/intl/uconv/tests/mochitest.ini @@ -0,0 +1,14 @@ +[DEFAULT] + +[test_bug335816.html] +[test_bug843434.html] +[test_bug959058-1.html] +[test_bug959058-2.html] +[test_long_doc.html] +[test_singlebyte_overconsumption.html] +[test_unicode_noncharacterescapes.html] +[test_unicode_noncharacters_gb18030.html] +[test_unicode_noncharacters_utf8.html] +[test_utf8_overconsumption.html] +[test_big5_encoder.html] +[test_ncr_fallback.html] diff --git a/intl/uconv/tests/moz.build b/intl/uconv/tests/moz.build new file mode 100644 index 0000000000..888186fb26 --- /dev/null +++ b/intl/uconv/tests/moz.build @@ -0,0 +1,13 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +TEST_DIRS += [ + "gtest", +] + +XPCSHELL_TESTS_MANIFESTS += ["unit/xpcshell.ini"] + +MOCHITEST_MANIFESTS += ["mochitest.ini"] diff --git a/intl/uconv/tests/stressgb.pl b/intl/uconv/tests/stressgb.pl new file mode 100644 index 0000000000..5b37fb63fb --- /dev/null +++ b/intl/uconv/tests/stressgb.pl @@ -0,0 +1,23 @@ +#!/usr/bin/perl +use LWP::Simple; +use IO::Handle; +$stdout = *STDOUT; +open(RES , ">resultlog.txt") || die "cannot open result log file"; +#system("rm alldiff.txt in*.txt out*.txt"); +for($i=10;$i<909;$i++) +{ + RES->printf("Test Page %d \n", $i); + $url = "http://people.netscape.com/ftang/testscript/gb18030/gbtext.cgi?page=" . $i; + RES->printf( "URL = %s\n", $url); + $tmpfile = "> in". $i . ".txt"; + open STDOUT, $tmpfile || RES->print("cannot open " . $tmpfile . "\n"); + getprint $url; + $cmd2 = "../../../dist/win32_d.obj/bin/nsconv -f GB18030 -t GB18030 in" . $i . ".txt out" . $i . ".txt >err"; + $cmd3 = "diff -u in" . $i . ".txt out" . $i . ".txt >> alldiff.txt"; + RES->printf( "Run '%s'\n", $cmd2); + $st2 = system($cmd2); + RES->printf( "result = '%d'\n", $st2); + RES->printf( "Run '%s'\n", $cmd3); + $st3 = system($cmd3); + RES->printf( "result = '%d'\n", $st3); +} diff --git a/intl/uconv/tests/test_big5_encoder.html b/intl/uconv/tests/test_big5_encoder.html new file mode 100644 index 0000000000..7e86683f00 --- /dev/null +++ b/intl/uconv/tests/test_big5_encoder.html @@ -0,0 +1,43 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=912470 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for Unicode non-characters</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> +</head> +<body onload="test()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> +/* NOTE: + * When we make our data: URL origin work as in Blink, this test will fail. + * Hopefully, by that time are URL parser has become spec-compliant, so that + * we'll pass the Web Platform Test for the big5 encoder + * (testing/web-platform/tests/encoding/big5-encoder.html) and this test can + * simply be removed. + */ +SimpleTest.waitForExplicitFinish(); + +function test() { + var f = document.getElementsByTagName("iframe")[0]; + f.onload = function() { + var href = SpecialPowers.wrap(f).contentWindow.location.href; + var index = href.indexOf("?foo="); + var actual = href.substring(index + 5); + var expected = "h%26%2340614%3Bi%26%23156267%3Bj%A1%40k%A3%E1l%A4%40m%C8%A4n%C8%CDo%FE%FEp%26%238365%3Bq%FDjr%F9%F9s%26%23128169%3Bt"; + is(actual, expected, "Should have gotten the expected encode."); + SimpleTest.finish(); + } + SpecialPowers.wrap(f).contentDocument.forms[0].submit(); +} +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=912470">Mozilla Bug 912470</a> +<p id="display"></p> +<div id="content" style="display: none"><iframe src="data:text/html;charset=big5,<form><input name=foo value=h&%23x9EA6;i&%23x2626B;j&%23x3000;k&%23x20AC;l&%23x4E00;m&%23x27607;n&%23xFFE2;o&%23x79D4;p&%23x20AD;q&%23x203B5;r&%23x2550;s&%23x1F4A9;t></form>"> +</div> +</body> +</html> diff --git a/intl/uconv/tests/test_bug335816.html b/intl/uconv/tests/test_bug335816.html new file mode 100644 index 0000000000..58fe538b5d --- /dev/null +++ b/intl/uconv/tests/test_bug335816.html @@ -0,0 +1,40 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=335816 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for Bug 335816</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> +</head> +<body onload="test()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> + +/** Test for Bug 335816 **/ +function test() +{ + ok($("display").innerHTML != "Keep that breathless charm", "Hidden script not executed"); + SimpleTest.finish(); +} + +function WontYouPleaseArrangeIt() +{ + $("display").innerHTML = "Keep that breathless charm"; +} + +SimpleTest.waitForExplicitFinish(); +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=335816">Mozilla Bug 335816</a> +<p id="display">Lovely ... Never, ever change.</p> + +<!-- There is a UTF-8 BOM just here - - - + | which should not be stripped + v --> +<div id="content" style="display: none"><script>WontYouPleaseArrangeIt();</script></div> +</body> +</html> + diff --git a/intl/uconv/tests/test_bug843434.html b/intl/uconv/tests/test_bug843434.html new file mode 100644 index 0000000000..d79ad70e6a --- /dev/null +++ b/intl/uconv/tests/test_bug843434.html @@ -0,0 +1,27 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=843434 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for Bug 843434</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> + <!-- This next 'link' is part of the test, it may cause an assertion. --> + <link rel=stylesheet href="data:text/css;charset=ISO-2022-JP,%cc"> +</head> +<body onload="test()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> +function test() +{ + ok(true, "Test for no prior assertion."); + SimpleTest.finish(); +} +SimpleTest.waitForExplicitFinish(); +</script> +</pre> +</body> +</html> + diff --git a/intl/uconv/tests/test_bug959058-1.html b/intl/uconv/tests/test_bug959058-1.html new file mode 100644 index 0000000000..85d45a8841 --- /dev/null +++ b/intl/uconv/tests/test_bug959058-1.html @@ -0,0 +1,28 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=959058 +--> +<head> + <meta charset="gbk"> + <title>Test for Bug 959058</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> + <script type="application/javascript"> + + /** Test for Bug 959058 **/ + + is("92", "\uD83C\uDF54", "Should have gotten a hamburger."); + + </script> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=959058">Mozilla Bug 959058</a> +<p id="display"></p> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +</pre> +</body> +</html> diff --git a/intl/uconv/tests/test_bug959058-2.html b/intl/uconv/tests/test_bug959058-2.html new file mode 100644 index 0000000000..86bf500e1b --- /dev/null +++ b/intl/uconv/tests/test_bug959058-2.html @@ -0,0 +1,28 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=959058 +--> +<head> + <meta charset="gbk"> + <title>Test for Bug 959058</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> + <script type="application/javascript"> + + /** Test for Bug 959058 **/ + + is("", "\u20AC", "Should have gotten euro."); + + </script> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=959058">Mozilla Bug 959058</a> +<p id="display"></p> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +</pre> +</body> +</html> diff --git a/intl/uconv/tests/test_ncr_fallback.html b/intl/uconv/tests/test_ncr_fallback.html new file mode 100644 index 0000000000..846f18be8f --- /dev/null +++ b/intl/uconv/tests/test_ncr_fallback.html @@ -0,0 +1,74 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1202366 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for unpaired surrogates</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> +</head> +<body onload="step()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> +/* NOTE: + * When we make our data: URL origin work as in Blink, this test will fail. + * Don't let this test block alignment of data: URL origin with Blink. + */ +SimpleTest.waitForExplicitFinish(); + +var expectations = [ + "%26%2365533%3B", + "a%26%2365533%3B", + "%26%2365533%3Ba", + "a%26%2365533%3Ba", + "%26%2365533%3B", + "a%26%2365533%3B", + "%26%2365533%3Ba", + "a%26%2365533%3Ba", + "%26%23128169%3B", + "%26%23128169%3B", + "%1B%24B%22%29%1B%28B", + "%1B%24B%22%29%1B%28B%26%23128169%3B", +]; + +var i = 0; + +function step() { + var f = document.getElementsByTagName("iframe")[i]; + f.onload = function() { + var href = SpecialPowers.wrap(f).contentWindow.location.href; + var index = href.indexOf("?foo="); + var actual = href.substring(index + 5); + var expected = expectations[i]; + is(actual, expected, "Should have gotten the expected encode."); + i++ + if (i == document.getElementsByTagName("iframe").length) { + SimpleTest.finish(); + } else { + step(); + } + } + SpecialPowers.wrap(f).contentDocument.forms[0].submit(); +} +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1202366">Mozilla Bug 1202366</a> +<p id="display"></p> +<div id="content" style="display: none"> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uD83D></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=a\uD83D></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uD83Da></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=a\uD83Da></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uDCA9></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=a\uDCA9></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uDCA9a></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=a\uDCA9a></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uD83D\uDCA9></form>');</script>"></iframe> +<iframe src="data:text/html;charset=iso-2022-jp,<script>document.write('<form><input name=foo value=\uD83D\uDCA9></form>');</script>"></iframe> +<iframe src="data:text/html;charset=iso-2022-jp,<script>document.write('<form><input name=foo value=\u3012></form>');</script>"></iframe> +<iframe src="data:text/html;charset=iso-2022-jp,<script>document.write('<form><input name=foo value=\u3012\uD83D\uDCA9></form>');</script>"></iframe> +</div> +</body> +</html> diff --git a/intl/uconv/tests/test_singlebyte_overconsumption.html b/intl/uconv/tests/test_singlebyte_overconsumption.html new file mode 100644 index 0000000000..3aeeb928ec --- /dev/null +++ b/intl/uconv/tests/test_singlebyte_overconsumption.html @@ -0,0 +1,33 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=564679 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=windows-1253"> + <title>Test for undefined codepoints</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> +</head> +<body onload="test()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> + +/** test that single byte decoding resynchronizes after undefined codepoints */ +function test() +{ + is($("display").innerHTML, "All good.", "No overconsumption"); + SimpleTest.finish(); +} + + SimpleTest.waitForExplicitFinish(); +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=564679">Mozilla Bug 564679</a> +<p id="display">Evil.</p> +<div id="content" style="display: none"></div> + <script type="text/javascript"> + $("display").innerHTML = "All good."; + </script> -> +</body> +</html> diff --git a/intl/uconv/tests/test_unicode_noncharacterescapes.html b/intl/uconv/tests/test_unicode_noncharacterescapes.html new file mode 100644 index 0000000000..e44f8d782b --- /dev/null +++ b/intl/uconv/tests/test_unicode_noncharacterescapes.html @@ -0,0 +1,303 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=445886 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for Unicode non-characters</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> +</head> +<body onload="Inject()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> + +/* eslint-disable no-eval */ + +/** Test that unicode non-characters are not discarded **/ +function test() +{ + is($("display").innerHTML,"All good.", "Noncharacters not stripped"); + SimpleTest.finish(); +} + +// eslint-disable-next-line complexity +function Inject() +{ + // script fragments containing Unicode non-characters + try { + // U+FDD0 + eval("$(\"display\").inner\ufdd0HTML += \" U+FDD0 is evil\""); + } catch(e) {} + try { + // U+FDD1 + eval("$(\"display\").inner\ufdd1HTML += \" U+FDD1 is evil\""); + } catch(e) {} + try { + // U+FDD2 + eval("$(\"display\").inner\ufdd2HTML += \" U+FDD2 is evil\""); + } catch(e) {} + try { + // U+FDD3 + eval("$(\"display\").inner\ufdd3HTML += \" U+FDD3 is evil\""); + } catch(e) {} + try { + // U+FDD4 + eval("$(\"display\").inner\ufdd4HTML += \" U+FDD4 is evil\""); + } catch(e) {} + try { + // U+FDD5 + eval("$(\"display\").inner\ufdd5HTML += \" U+FDD5 is evil\""); + } catch(e) {} + try { + // U+FDD6 + eval("$(\"display\").inner\ufdd6HTML += \" U+FDD6 is evil\""); + } catch(e) {} + try { + // U+FDD7 + eval("$(\"display\").inner\ufdd7HTML += \" U+FDD7 is evil\""); + } catch(e) {} + try { + // U+FDD8 + eval("$(\"display\").inner\ufdd8HTML += \" U+FDD8 is evil\""); + } catch(e) {} + try { + // U+FDD9 + eval("$(\"display\").inner\ufdd9HTML += \" U+FDD9 is evil\""); + } catch(e) {} + try { + // U+FDDA + eval("$(\"display\").inner\ufddaHTML += \" U+FDDA is evil\""); + } catch(e) {} + try { + // U+FDDB + eval("$(\"display\").inner\ufddbHTML += \" U+FDDB is evil\""); + } catch(e) {} + try { + // U+FDDC + eval("$(\"display\").inner\ufddcHTML += \" U+FDDC is evil\""); + } catch(e) {} + try { + // U+FDDD + eval("$(\"display\").inner\ufdddHTML += \" U+FDDD is evil\""); + } catch(e) {} + try { + // U+FDDE + eval("$(\"display\").inner\ufddeHTML += \" U+FDDE is evil\""); + } catch(e) {} + try { + // U+FDDF + eval("$(\"display\").inner\ufddfHTML += \" U+FDDF is evil\""); + } catch(e) {} + try { + // U+FDE0 + eval("$(\"display\").inner\ufde0HTML += \" U+FDE0 is evil\""); + } catch(e) {} + try { + // U+FDE1 + eval("$(\"display\").inner\ufde1HTML += \" U+FDE1 is evil\""); + } catch(e) {} + try { + // U+FDE2 + eval("$(\"display\").inner\ufde2HTML += \" U+FDE2 is evil\""); + } catch(e) {} + try { + // U+FDE3 + eval("$(\"display\").inner\ufde3HTML += \" U+FDE3 is evil\""); + } catch(e) {} + try { + // U+FDE4 + eval("$(\"display\").inner\ufde4HTML += \" U+FDE4 is evil\""); + } catch(e) {} + try { + // U+FDE5 + eval("$(\"display\").inner\ufde5HTML += \" U+FDE5 is evil\""); + } catch(e) {} + try { + // U+FDE6 + eval("$(\"display\").inner\ufde6HTML += \" U+FDE6 is evil\""); + } catch(e) {} + try { + // U+FDE7 + eval("$(\"display\").inner\ufde7HTML += \" U+FDE7 is evil\""); + } catch(e) {} + try { + // U+FDE8 + eval("$(\"display\").inner\ufde8HTML += \" U+FDE8 is evil\""); + } catch(e) {} + try { + // U+FDE9 + eval("$(\"display\").inner\ufde9HTML += \" U+FDE9 is evil\""); + } catch(e) {} + try { + // U+FDEA + eval("$(\"display\").inner\ufdeaHTML += \" U+FDEA is evil\""); + } catch(e) {} + try { + // U+FDEB + eval("$(\"display\").inner\ufdebHTML += \" U+FDEB is evil\""); + } catch(e) {} + try { + // U+FDEC + eval("$(\"display\").inner\ufdecHTML += \" U+FDEC is evil\""); + } catch(e) {} + try { + // U+FDED + eval("$(\"display\").inner\ufdedHTML += \" U+FDED is evil\""); + } catch(e) {} + try { + // U+FDEE + eval("$(\"display\").inner\ufdeeHTML += \" U+FDEE is evil\""); + } catch(e) {} + try { + // U+FDEF + eval("$(\"display\").inner\ufdefHTML += \" U+FDEF is evil\""); + } catch(e) {} + try { + // U+FFFE + eval("$(\"display\").inner\ufffeHTML += \" U+FFFE is evil\""); + } catch(e) {} + try { + // U+FFFF + eval("$(\"display\").inner\uffffHTML += \" U+FFFF is evil\""); + } catch(e) {} + try { + // U+1FFFE + eval("$(\"display\").inner\ud83f\udffeHTML += \" U+1FFFE is evil\""); + } catch(e) {} + try { + // U+1FFFF + eval("$(\"display\").inner\ud83f\udfffHTML += \" U+1FFFF is evil\""); + } catch(e) {} + try { + // U+2FFFE + eval("$(\"display\").inner\ud87f\udffeHTML += \" U+2FFFE is evil\""); + } catch(e) {} + try { + // U+2FFFF + eval("$(\"display\").inner\ud87f\udfffHTML += \" U+2FFFF is evil\""); + } catch(e) {} + try { + // U+3FFFE + eval("$(\"display\").inner\ud8bf\udffeHTML += \" U+3FFFE is evil\""); + } catch(e) {} + try { + // U+3FFFF + eval("$(\"display\").inner\ud8bf\udfffHTML += \" U+3FFFF is evil\""); + } catch(e) {} + try { + // U+4FFFE + eval("$(\"display\").inner\ud8ff\udffeHTML += \" U+4FFFE is evil\""); + } catch(e) {} + try { + // U+4FFFF + eval("$(\"display\").inner\ud8ff\udfffHTML += \" U+4FFFF is evil\""); + } catch(e) {} + try { + // U+5FFFE + eval("$(\"display\").inner\ud93f\udffeHTML += \" U+5FFFE is evil\""); + } catch(e) {} + try { + // U+5FFFF + eval("$(\"display\").inner\ud93f\udfffHTML += \" U+5FFFF is evil\""); + } catch(e) {} + try { + // U+6FFFE + eval("$(\"display\").inner\ud97f\udffeHTML += \" U+6FFFE is evil\""); + } catch(e) {} + try { + // U+6FFFF + eval("$(\"display\").inner\ud97f\udfffHTML += \" U+6FFFF is evil\""); + } catch(e) {} + try { + // U+7FFFE + eval("$(\"display\").inner\ud9bf\udffeHTML += \" U+7FFFE is evil\""); + } catch(e) {} + try { + // U+7FFFF + eval("$(\"display\").inner\ud9bf\udfffHTML += \" U+7FFFF is evil\""); + } catch(e) {} + try { + // U+8FFFE + eval("$(\"display\").inner\ud9ff\udffeHTML += \" U+8FFFE is evil\""); + } catch(e) {} + try { + // U+8FFFF + eval("$(\"display\").inner\ud9ff\udfffHTML += \" U+8FFFF is evil\""); + } catch(e) {} + try { + // U+9FFFE + eval("$(\"display\").inner\uda3f\udffeHTML += \" U+9FFFE is evil\""); + } catch(e) {} + try { + // U+9FFFF + eval("$(\"display\").inner\uda3f\udfffHTML += \" U+9FFFF is evil\""); + } catch(e) {} + try { + // U+AFFFE + eval("$(\"display\").inner\uda7f\udffeHTML += \" U+AFFFE is evil\""); + } catch(e) {} + try { + // U+AFFFF + eval("$(\"display\").inner\uda7f\udfffHTML += \" U+AFFFF is evil\""); + } catch(e) {} + try { + // U+BFFFE + eval("$(\"display\").inner\udabf\udffeHTML += \" U+BFFFE is evil\""); + } catch(e) {} + try { + // U+BFFFF + eval("$(\"display\").inner\udabf\udfffHTML += \" U+BFFFF is evil\""); + } catch(e) {} + try { + // U+CFFFE + eval("$(\"display\").inner\udaff\udffeHTML += \" U+CFFFE is evil\""); + } catch(e) {} + try { + // U+CFFFF + eval("$(\"display\").inner\udaff\udfffHTML += \" U+CFFFF is evil\""); + } catch(e) {} + try { + // U+DFFFE + eval("$(\"display\").inner\udb3f\udffeHTML += \" U+DFFFE is evil\""); + } catch(e) {} + try { + // U+DFFFF + eval("$(\"display\").inner\udb3f\udfffHTML += \" U+DFFFF is evil\""); + } catch(e) {} + try { + // U+EFFFE + eval("$(\"display\").inner\udb7f\udffeHTML += \" U+EFFFE is evil\""); + } catch(e) {} + try { + // U+EFFFF + eval("$(\"display\").inner\udb7f\udfffHTML += \" U+EFFFF is evil\""); + } catch(e) {} + try { + // U+FFFFE + eval("$(\"display\").inner\udbbf\udffeHTML += \" U+FFFFE is evil\""); + } catch(e) {} + try { + // U+FFFFF + eval("$(\"display\").inner\udbbf\udfffHTML += \" U+FFFFF is evil\""); + } catch(e) {} + try { + // U+10FFFE + eval("$(\"display\").inner\udbff\udffeHTML += \" U+10FFFE is evil\""); + } catch(e) {} + try { + // U+10FFFF + eval("$(\"display\").inner\udbff\udfffHTML += \" U+10FFFF is evil\""); + } catch(e) {} + test(); +} + + SimpleTest.waitForExplicitFinish(); +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=445886">Mozilla Bug 445886</a> +<p id="display">All good.</p> +<div id="content" style="display: none"></div> +</body> +</html> diff --git a/intl/uconv/tests/test_unicode_noncharacters_gb18030.html b/intl/uconv/tests/test_unicode_noncharacters_gb18030.html new file mode 100644 index 0000000000..0c9156d9e3 --- /dev/null +++ b/intl/uconv/tests/test_unicode_noncharacters_gb18030.html @@ -0,0 +1,305 @@ +<!DOCTYPE HTML> +<html> +<head> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=445886 +--> + <meta http-equiv="Content-type" content="text/html; charset=gb18030"> + <title>Test for Unicode non-characters</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" + href="/tests/SimpleTest/test.css"> +</head> +<body onload="Inject()"> +<pre id="test"><script class="testbody" type="text/javascript"> + +/* eslint-disable no-eval */ + +/** Test that unicode non-characters are not discarded **/ +function test() +{ + is($("display").innerHTML, "All good.", "Noncharacters not stripped"); + SimpleTest.finish(); +} + +// eslint-disable-next-line complexity +function Inject() +{ + // script fragments containing Unicode non-characters + try { + // U+FDD0 + eval("$(\"display\").inner�0�2HTML += \" U+FDD0 is evil \""); + } catch(e) {} + try { + // U+FDD1 + eval("$(\"display\").inner�0�3HTML += \" U+FDD1 is evil \""); + } catch(e) {} + try { + // U+FDD2 + eval("$(\"display\").inner�0�4HTML += \" U+FDD2 is evil \""); + } catch(e) {} + try { + // U+FDD3 + eval("$(\"display\").inner�0�5HTML += \" U+FDD3 is evil \""); + } catch(e) {} + try { + // U+FDD4 + eval("$(\"display\").inner�0�6HTML += \" U+FDD4 is evil \""); + } catch(e) {} + try { + // U+FDD5 + eval("$(\"display\").inner�0�7HTML += \" U+FDD5 is evil \""); + } catch(e) {} + try { + // U+FDD6 + eval("$(\"display\").inner�0�8HTML += \" U+FDD6 is evil \""); + } catch(e) {} + try { + // U+FDD7 + eval("$(\"display\").inner�0�9HTML += \" U+FDD7 is evil \""); + } catch(e) {} + try { + // U+FDD8 + eval("$(\"display\").inner�0�0HTML += \" U+FDD8 is evil \""); + } catch(e) {} + try { + // U+FDD9 + eval("$(\"display\").inner�0�1HTML += \" U+FDD9 is evil \""); + } catch(e) {} + try { + // U+FDDA + eval("$(\"display\").inner�0�2HTML += \" U+FDDA is evil \""); + } catch(e) {} + try { + // U+FDDB + eval("$(\"display\").inner�0�3HTML += \" U+FDDB is evil \""); + } catch(e) {} + try { + // U+FDDC + eval("$(\"display\").inner�0�4HTML += \" U+FDDC is evil \""); + } catch(e) {} + try { + // U+FDDD + eval("$(\"display\").inner�0�5HTML += \" U+FDDD is evil \""); + } catch(e) {} + try { + // U+FDDE + eval("$(\"display\").inner�0�6HTML += \" U+FDDE is evil \""); + } catch(e) {} + try { + // U+FDDF + eval("$(\"display\").inner�0�7HTML += \" U+FDDF is evil \""); + } catch(e) {} + try { + // U+FDE0 + eval("$(\"display\").inner�0�8HTML += \" U+FDE0 is evil \""); + } catch(e) {} + try { + // U+FDE1 + eval("$(\"display\").inner�0�9HTML += \" U+FDE1 is evil \""); + } catch(e) {} + try { + // U+FDE2 + eval("$(\"display\").inner�0�0HTML += \" U+FDE2 is evil \""); + } catch(e) {} + try { + // U+FDE3 + eval("$(\"display\").inner�0�1HTML += \" U+FDE3 is evil \""); + } catch(e) {} + try { + // U+FDE4 + eval("$(\"display\").inner�0�2HTML += \" U+FDE4 is evil \""); + } catch(e) {} + try { + // U+FDE5 + eval("$(\"display\").inner�0�3HTML += \" U+FDE5 is evil \""); + } catch(e) {} + try { + // U+FDE6 + eval("$(\"display\").inner�0�4HTML += \" U+FDE6 is evil \""); + } catch(e) {} + try { + // U+FDE7 + eval("$(\"display\").inner�0�5HTML += \" U+FDE7 is evil \""); + } catch(e) {} + try { + // U+FDE8 + eval("$(\"display\").inner�0�6HTML += \" U+FDE8 is evil \""); + } catch(e) {} + try { + // U+FDE9 + eval("$(\"display\").inner�0�7HTML += \" U+FDE9 is evil \""); + } catch(e) {} + try { + // U+FDEA + eval("$(\"display\").inner�0�8HTML += \" U+FDEA is evil \""); + } catch(e) {} + try { + // U+FDEB + eval("$(\"display\").inner�0�9HTML += \" U+FDEB is evil \""); + } catch(e) {} + try { + // U+FDEC + eval("$(\"display\").inner�0�0HTML += \" U+FDEC is evil \""); + } catch(e) {} + try { + // U+FDED + eval("$(\"display\").inner�0�1HTML += \" U+FDED is evil \""); + } catch(e) {} + try { + // U+FDEE + eval("$(\"display\").inner�0�2HTML += \" U+FDEE is evil \""); + } catch(e) {} + try { + // U+FDEF + eval("$(\"display\").inner�0�3HTML += \" U+FDEF is evil \""); + } catch(e) {} + try { + // U+FFFE + eval("$(\"display\").inner�1�8HTML += \" U+FFFE is evil \""); + } catch(e) {} + try { + // U+FFFF + eval("$(\"display\").inner�1�9HTML += \" U+FFFF is evil \""); + } catch(e) {} + try { + // U+1FFFE + eval("$(\"display\").inner�2�4HTML += \" U+1FFFE is evil \""); + } catch(e) {} + try { + // U+1FFFF + eval("$(\"display\").inner�2�5HTML += \" U+1FFFF is evil \""); + } catch(e) {} + try { + // U+2FFFE + eval("$(\"display\").inner�4�0HTML += \" U+2FFFE is evil \""); + } catch(e) {} + try { + // U+2FFFF + eval("$(\"display\").inner�4�1HTML += \" U+2FFFF is evil \""); + } catch(e) {} + try { + // U+3FFFE + eval("$(\"display\").inner�6�6HTML += \" U+3FFFE is evil \""); + } catch(e) {} + try { + // U+3FFFF + eval("$(\"display\").inner�6�7HTML += \" U+3FFFF is evil \""); + } catch(e) {} + try { + // U+4FFFE + eval("$(\"display\").inner�8�2HTML += \" U+4FFFE is evil \""); + } catch(e) {} + try { + // U+4FFFF + eval("$(\"display\").inner�8�3HTML += \" U+4FFFF is evil \""); + } catch(e) {} + try { + // U+5FFFE + eval("$(\"display\").inner�0�8HTML += \" U+5FFFE is evil \""); + } catch(e) {} + try { + // U+5FFFF + eval("$(\"display\").inner�0�9HTML += \" U+5FFFF is evil \""); + } catch(e) {} + try { + // U+6FFFE + eval("$(\"display\").inner�2�4HTML += \" U+6FFFE is evil \""); + } catch(e) {} + try { + // U+6FFFF + eval("$(\"display\").inner�2�5HTML += \" U+6FFFF is evil \""); + } catch(e) {} + try { + // U+7FFFE + eval("$(\"display\").inner�4�0HTML += \" U+7FFFE is evil \""); + } catch(e) {} + try { + // U+7FFFF + eval("$(\"display\").inner�4�1HTML += \" U+7FFFF is evil \""); + } catch(e) {} + try { + // U+8FFFE + eval("$(\"display\").inner�6�6HTML += \" U+8FFFE is evil \""); + } catch(e) {} + try { + // U+8FFFF + eval("$(\"display\").inner�6�7HTML += \" U+8FFFF is evil \""); + } catch(e) {} + try { + // U+9FFFE + eval("$(\"display\").inner�8�2HTML += \" U+9FFFE is evil \""); + } catch(e) {} + try { + // U+9FFFF + eval("$(\"display\").inner�8�3HTML += \" U+9FFFF is evil \""); + } catch(e) {} + try { + // U+AFFFE + eval("$(\"display\").inner�0�8HTML += \" U+AFFFE is evil \""); + } catch(e) {} + try { + // U+AFFFF + eval("$(\"display\").inner�0�9HTML += \" U+AFFFF is evil \""); + } catch(e) {} + try { + // U+BFFFE + eval("$(\"display\").inner�2�4HTML += \" U+BFFFE is evil \""); + } catch(e) {} + try { + // U+BFFFF + eval("$(\"display\").inner�2�5HTML += \" U+BFFFF is evil \""); + } catch(e) {} + try { + // U+CFFFE + eval("$(\"display\").inner�4�0HTML += \" U+CFFFE is evil \""); + } catch(e) {} + try { + // U+CFFFF + eval("$(\"display\").inner�4�1HTML += \" U+CFFFF is evil \""); + } catch(e) {} + try { + // U+DFFFE + eval("$(\"display\").inner�6�6HTML += \" U+DFFFE is evil \""); + } catch(e) {} + try { + // U+DFFFF + eval("$(\"display\").inner�6�7HTML += \" U+DFFFF is evil \""); + } catch(e) {} + try { + // U+EFFFE + eval("$(\"display\").inner�8�2HTML += \" U+EFFFE is evil \""); + } catch(e) {} + try { + // U+EFFFF + eval("$(\"display\").inner�8�3HTML += \" U+EFFFF is evil \""); + } catch(e) {} + try { + // U+FFFFE + eval("$(\"display\").inner�0�8HTML += \" U+FFFFE is evil \""); + } catch(e) {} + try { + // U+FFFFF + eval("$(\"display\").inner�0�9HTML += \" U+FFFFF is evil \""); + } catch(e) {} + try { + // U+10FFFE + eval("$(\"display\").inner�2�4HTML += \" U+10FFFE is evil \""); + } catch(e) {} + try { + // U+10FFFF + eval("$(\"display\").inner�2�5HTML += \" U+10FFFF is evil \""); + } catch(e) {} + test(); +} + + SimpleTest.waitForExplicitFinish(); +</script> +</pre> +<a target="_blank" + href="https://bugzilla.mozilla.org/show_bug.cgi?id=445886">Mozilla Bug +445886</a> +<p id="display">All good.</p> +<div id="content" style="display: none;"></div> +</body> +</html> diff --git a/intl/uconv/tests/test_unicode_noncharacters_utf8.html b/intl/uconv/tests/test_unicode_noncharacters_utf8.html new file mode 100644 index 0000000000..ecfdbeae09 --- /dev/null +++ b/intl/uconv/tests/test_unicode_noncharacters_utf8.html @@ -0,0 +1,303 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=445886 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for Unicode non-characters</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> +</head> +<body onload="Inject()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> + +/* eslint-disable no-eval */ + +/** Test that unicode non-characters are not discarded **/ +function test() +{ + is($("display").innerHTML, "All good.", "Noncharacters not stripped"); + SimpleTest.finish(); +} + +// eslint-disable-next-line complexity +function Inject() +{ + // script fragments containing Unicode non-characters + try { + // U+FDD0 + eval("$(\"display\").innerHTML += \" U+FDD0 is evil \""); + } catch(e) {} + try { + // U+FDD1 + eval("$(\"display\").innerHTML += \" U+FDD1 is evil \""); + } catch(e) {} + try { + // U+FDD2 + eval("$(\"display\").innerHTML += \" U+FDD2 is evil \""); + } catch(e) {} + try { + // U+FDD3 + eval("$(\"display\").innerHTML += \" U+FDD3 is evil \""); + } catch(e) {} + try { + // U+FDD4 + eval("$(\"display\").innerHTML += \" U+FDD4 is evil \""); + } catch(e) {} + try { + // U+FDD5 + eval("$(\"display\").innerHTML += \" U+FDD5 is evil \""); + } catch(e) {} + try { + // U+FDD6 + eval("$(\"display\").innerHTML += \" U+FDD6 is evil \""); + } catch(e) {} + try { + // U+FDD7 + eval("$(\"display\").innerHTML += \" U+FDD7 is evil \""); + } catch(e) {} + try { + // U+FDD8 + eval("$(\"display\").innerHTML += \" U+FDD8 is evil \""); + } catch(e) {} + try { + // U+FDD9 + eval("$(\"display\").innerHTML += \" U+FDD9 is evil \""); + } catch(e) {} + try { + // U+FDDA + eval("$(\"display\").innerHTML += \" U+FDDA is evil \""); + } catch(e) {} + try { + // U+FDDB + eval("$(\"display\").innerHTML += \" U+FDDB is evil \""); + } catch(e) {} + try { + // U+FDDC + eval("$(\"display\").innerHTML += \" U+FDDC is evil \""); + } catch(e) {} + try { + // U+FDDD + eval("$(\"display\").innerHTML += \" U+FDDD is evil \""); + } catch(e) {} + try { + // U+FDDE + eval("$(\"display\").innerHTML += \" U+FDDE is evil \""); + } catch(e) {} + try { + // U+FDDF + eval("$(\"display\").innerHTML += \" U+FDDF is evil \""); + } catch(e) {} + try { + // U+FDE0 + eval("$(\"display\").innerHTML += \" U+FDE0 is evil \""); + } catch(e) {} + try { + // U+FDE1 + eval("$(\"display\").innerHTML += \" U+FDE1 is evil \""); + } catch(e) {} + try { + // U+FDE2 + eval("$(\"display\").innerHTML += \" U+FDE2 is evil \""); + } catch(e) {} + try { + // U+FDE3 + eval("$(\"display\").innerHTML += \" U+FDE3 is evil \""); + } catch(e) {} + try { + // U+FDE4 + eval("$(\"display\").innerHTML += \" U+FDE4 is evil \""); + } catch(e) {} + try { + // U+FDE5 + eval("$(\"display\").innerHTML += \" U+FDE5 is evil \""); + } catch(e) {} + try { + // U+FDE6 + eval("$(\"display\").innerHTML += \" U+FDE6 is evil \""); + } catch(e) {} + try { + // U+FDE7 + eval("$(\"display\").innerHTML += \" U+FDE7 is evil \""); + } catch(e) {} + try { + // U+FDE8 + eval("$(\"display\").innerHTML += \" U+FDE8 is evil \""); + } catch(e) {} + try { + // U+FDE9 + eval("$(\"display\").innerHTML += \" U+FDE9 is evil \""); + } catch(e) {} + try { + // U+FDEA + eval("$(\"display\").innerHTML += \" U+FDEA is evil \""); + } catch(e) {} + try { + // U+FDEB + eval("$(\"display\").innerHTML += \" U+FDEB is evil \""); + } catch(e) {} + try { + // U+FDEC + eval("$(\"display\").innerHTML += \" U+FDEC is evil \""); + } catch(e) {} + try { + // U+FDED + eval("$(\"display\").innerHTML += \" U+FDED is evil \""); + } catch(e) {} + try { + // U+FDEE + eval("$(\"display\").innerHTML += \" U+FDEE is evil \""); + } catch(e) {} + try { + // U+FDEF + eval("$(\"display\").innerHTML += \" U+FDEF is evil \""); + } catch(e) {} + try { + // U+FFFE + eval("$(\"display\").innerHTML += \" U+FFFE is evil \""); + } catch(e) {} + try { + // U+FFFF + eval("$(\"display\").innerHTML += \" U+FFFF is evil \""); + } catch(e) {} + try { + // U+1FFFE + eval("$(\"display\").innerHTML += \" U+1FFFE is evil \""); + } catch(e) {} + try { + // U+1FFFF + eval("$(\"display\").innerHTML += \" U+1FFFF is evil \""); + } catch(e) {} + try { + // U+2FFFE + eval("$(\"display\").innerHTML += \" U+2FFFE is evil \""); + } catch(e) {} + try { + // U+2FFFF + eval("$(\"display\").innerHTML += \" U+2FFFF is evil \""); + } catch(e) {} + try { + // U+3FFFE + eval("$(\"display\").innerHTML += \" U+3FFFE is evil \""); + } catch(e) {} + try { + // U+3FFFF + eval("$(\"display\").innerHTML += \" U+3FFFF is evil \""); + } catch(e) {} + try { + // U+4FFFE + eval("$(\"display\").innerHTML += \" U+4FFFE is evil \""); + } catch(e) {} + try { + // U+4FFFF + eval("$(\"display\").innerHTML += \" U+4FFFF is evil \""); + } catch(e) {} + try { + // U+5FFFE + eval("$(\"display\").innerHTML += \" U+5FFFE is evil \""); + } catch(e) {} + try { + // U+5FFFF + eval("$(\"display\").innerHTML += \" U+5FFFF is evil \""); + } catch(e) {} + try { + // U+6FFFE + eval("$(\"display\").innerHTML += \" U+6FFFE is evil \""); + } catch(e) {} + try { + // U+6FFFF + eval("$(\"display\").innerHTML += \" U+6FFFF is evil \""); + } catch(e) {} + try { + // U+7FFFE + eval("$(\"display\").innerHTML += \" U+7FFFE is evil \""); + } catch(e) {} + try { + // U+7FFFF + eval("$(\"display\").innerHTML += \" U+7FFFF is evil \""); + } catch(e) {} + try { + // U+8FFFE + eval("$(\"display\").innerHTML += \" U+8FFFE is evil \""); + } catch(e) {} + try { + // U+8FFFF + eval("$(\"display\").innerHTML += \" U+8FFFF is evil \""); + } catch(e) {} + try { + // U+9FFFE + eval("$(\"display\").innerHTML += \" U+9FFFE is evil \""); + } catch(e) {} + try { + // U+9FFFF + eval("$(\"display\").innerHTML += \" U+9FFFF is evil \""); + } catch(e) {} + try { + // U+AFFFE + eval("$(\"display\").innerHTML += \" U+AFFFE is evil \""); + } catch(e) {} + try { + // U+AFFFF + eval("$(\"display\").innerHTML += \" U+AFFFF is evil \""); + } catch(e) {} + try { + // U+BFFFE + eval("$(\"display\").innerHTML += \" U+BFFFE is evil \""); + } catch(e) {} + try { + // U+BFFFF + eval("$(\"display\").innerHTML += \" U+BFFFF is evil \""); + } catch(e) {} + try { + // U+CFFFE + eval("$(\"display\").innerHTML += \" U+CFFFE is evil \""); + } catch(e) {} + try { + // U+CFFFF + eval("$(\"display\").innerHTML += \" U+CFFFF is evil \""); + } catch(e) {} + try { + // U+DFFFE + eval("$(\"display\").innerHTML += \" U+DFFFE is evil \""); + } catch(e) {} + try { + // U+DFFFF + eval("$(\"display\").innerHTML += \" U+DFFFF is evil \""); + } catch(e) {} + try { + // U+EFFFE + eval("$(\"display\").innerHTML += \" U+EFFFE is evil \""); + } catch(e) {} + try { + // U+EFFFF + eval("$(\"display\").innerHTML += \" U+EFFFF is evil \""); + } catch(e) {} + try { + // U+FFFFE + eval("$(\"display\").innerHTML += \" U+FFFFE is evil \""); + } catch(e) {} + try { + // U+FFFFF + eval("$(\"display\").innerHTML += \" U+FFFFF is evil \""); + } catch(e) {} + try { + // U+10FFFE + eval("$(\"display\").innerHTML += \" U+10FFFE is evil \""); + } catch(e) {} + try { + // U+10FFFF + eval("$(\"display\").innerHTML += \" U+10FFFF is evil \""); + } catch(e) {} + test(); +} + + SimpleTest.waitForExplicitFinish(); +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=445886">Mozilla Bug 445886</a> +<p id="display">All good.</p> +<div id="content" style="display: none"></div> +</body> +</html> diff --git a/intl/uconv/tests/test_utf8_overconsumption.html b/intl/uconv/tests/test_utf8_overconsumption.html new file mode 100644 index 0000000000..25c4a273ea --- /dev/null +++ b/intl/uconv/tests/test_utf8_overconsumption.html @@ -0,0 +1,39 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=445886 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for Unicode non-characters</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> + <script type="text/javascript"> +function Inject() +{ + $("display").innerHTML = "Evil"; +} + </script> +</head> +<body >onload="Inject()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> + +/** test that UTF-8 decoding resynchronizes after incomplete sequences */ +function test() +{ + is($("display").innerHTML, "All good.", "No overconsumption"); + SimpleTest.finish(); +} + + addLoadEvent(function() { + setTimeout(test, 0); + }); + SimpleTest.waitForExplicitFinish(); +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=445886">Mozilla Bug 445886</a> +<p id="display">All good.</p> +<div id="content" style="display: none"></div> +</body> +</html> |