diff options
Diffstat (limited to 'intl/uconv')
122 files changed, 6028 insertions, 0 deletions
diff --git a/intl/uconv/components.conf b/intl/uconv/components.conf new file mode 100644 index 0000000000..00686f661a --- /dev/null +++ b/intl/uconv/components.conf @@ -0,0 +1,34 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + 'cid': '{2bc2ad62-ad5d-4b7b-a9db-f74ae203c527}', + 'contract_ids': ['@mozilla.org/intl/converter-input-stream;1'], + 'type': 'nsConverterInputStream', + 'headers': ['nsConverterInputStream.h'], + }, + { + 'cid': '{ff8780a5-bbb1-4bc5-8ee7-057e7bc5c925}', + 'contract_ids': ['@mozilla.org/intl/converter-output-stream;1'], + 'type': 'nsConverterOutputStream', + 'headers': ['/intl/uconv/nsConverterOutputStream.h'], + }, + { + 'cid': '{0a698c44-3bff-11d4-9649-00c0ca135b4e}', + 'contract_ids': ['@mozilla.org/intl/scriptableunicodeconverter'], + 'type': 'nsScriptableUnicodeConverter', + 'headers': ['/intl/uconv/nsScriptableUConv.h'], + }, + { + 'js_name': 'textToSubURI', + 'cid': '{8b042e22-6f87-11d3-b3c8-00805f8a6670}', + 'contract_ids': ['@mozilla.org/intl/texttosuburi;1'], + 'interfaces': ['nsITextToSubURI'], + 'type': 'nsTextToSubURI', + 'headers': ['/intl/uconv/nsTextToSubURI.h'], + }, +] diff --git a/intl/uconv/crashtests/563618.html b/intl/uconv/crashtests/563618.html new file mode 100644 index 0000000000..e36b664762 --- /dev/null +++ b/intl/uconv/crashtests/563618.html @@ -0,0 +1,12 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html> +<head> + <meta content="text/html; charset=euc-jp" + http-equiv="content-type"> + <title>Serbian Glyph Test</title> +</head> +<body> + + <p style="font-size: 20pt;">Ž</p> +</body> +</html> diff --git a/intl/uconv/crashtests/crashtests.list b/intl/uconv/crashtests/crashtests.list new file mode 100644 index 0000000000..6c54a699c1 --- /dev/null +++ b/intl/uconv/crashtests/crashtests.list @@ -0,0 +1,2 @@ +load 563618.html +load omt-non-utf-8-jsurl.html diff --git a/intl/uconv/crashtests/omt-non-utf-8-jsurl.html b/intl/uconv/crashtests/omt-non-utf-8-jsurl.html new file mode 100644 index 0000000000..033e38a280 --- /dev/null +++ b/intl/uconv/crashtests/omt-non-utf-8-jsurl.html @@ -0,0 +1,14 @@ +<!DOCTYPE html> +<html class=reftest-wait> +<head> + <meta charset=iso-8859-1><!-- must be non-UTF-8 --> + <title>Test for off the main thread non-UTF-8 javascript: URL</title> +</head> +<body> +<script> +new Worker("javascript:foo").onerror = () => { + document.documentElement.className = ""; +}; +</script> +</body> +</html> diff --git a/intl/uconv/directory.txt b/intl/uconv/directory.txt new file mode 100644 index 0000000000..2b6be7af7f --- /dev/null +++ b/intl/uconv/directory.txt @@ -0,0 +1,32 @@ +Directory Structure : +================================ + +idl - public .idl files +public - public header file +src - source directory of charset converter manager and utilities, and + charset converters for ISO-8859-1, CP1252, MacRoman and UTF-8 +tests - tests program and application for charset converter +tests/unit - xpcshell tests +tools - tools to build the tables used by the converters +util - utility functions used by the converters + +The following directories contain different charset converters: + +ucvcn - Simplified Chinese charsets - GB2312, HZ, ISO-2022-CN, GBK, GB18030 +ucvja - Japanese charsets - Shift-JIS, ISO-2022-JP, EUC-JP +ucvko - Korean charsets - ISO-2022-KR, EUC-KR, Johab +ucvlatin - Latin charsets and others - ISO-8859-x, CP1250-1258 + CP866, 874, KOI8, + Mac charsets, TIS620, UTF16 +ucvtw - Traditional Chinese charsets Set 1 - Big5 +ucvtw2 - Traditional Chinese charsets Set 2 - EUC-TW + +Within the directories containing charset converters: + +*.ut - tables used to convert to Unicode from a charset +*.uf - tables used to convert to a charset from Unicode + +The following directories are obsolete and should not be used: + +ucvth +ucvvt diff --git a/intl/uconv/moz.build b/intl/uconv/moz.build new file mode 100644 index 0000000000..f21e4055f9 --- /dev/null +++ b/intl/uconv/moz.build @@ -0,0 +1,32 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +TEST_DIRS += ["tests"] + +XPIDL_SOURCES += [ + "nsIScriptableUConv.idl", + "nsITextToSubURI.idl", +] + +XPIDL_MODULE = "uconv" + +EXPORTS += [ + "nsConverterInputStream.h", + "nsTextToSubURI.h", +] + +UNIFIED_SOURCES += [ + "nsConverterInputStream.cpp", + "nsConverterOutputStream.cpp", + "nsScriptableUConv.cpp", + "nsTextToSubURI.cpp", +] + +XPCOM_MANIFESTS += [ + "components.conf", +] + +FINAL_LIBRARY = "xul" diff --git a/intl/uconv/nsConverterInputStream.cpp b/intl/uconv/nsConverterInputStream.cpp new file mode 100644 index 0000000000..e3efdbc146 --- /dev/null +++ b/intl/uconv/nsConverterInputStream.cpp @@ -0,0 +1,256 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsConverterInputStream.h" +#include "nsIInputStream.h" +#include "nsReadLine.h" +#include "nsStreamUtils.h" + +#include <algorithm> +#include <tuple> + +using namespace mozilla; + +#define CONVERTER_BUFFER_SIZE 8192 + +NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream, + nsIUnicharInputStream, nsIUnicharLineInputStream) + +NS_IMETHODIMP +nsConverterInputStream::Init(nsIInputStream* aStream, const char* aCharset, + int32_t aBufferSize, char16_t aReplacementChar) { + nsAutoCString label; + if (!aCharset) { + label.AssignLiteral("UTF-8"); + } else { + label = aCharset; + } + + auto encoding = Encoding::ForLabelNoReplacement(label); + if (!encoding) { + return NS_ERROR_UCONV_NOCONV; + } + // Previously, the implementation auto-switched only + // between the two UTF-16 variants and only when + // initialized with an endianness-unspecific label. + mConverter = encoding->NewDecoder(); + + size_t outputBufferSize; + if (aBufferSize <= 0) { + aBufferSize = CONVERTER_BUFFER_SIZE; + outputBufferSize = CONVERTER_BUFFER_SIZE; + } else { + // NetUtil.jsm assumes that if buffer size equals + // the input size, the whole stream will be processed + // as one readString. This is not true with encoding_rs, + // because encoding_rs might want to see space for a + // surrogate pair, so let's compute a larger output + // buffer length. + CheckedInt<size_t> needed = mConverter->MaxUTF16BufferLength(aBufferSize); + if (!needed.isValid()) { + return NS_ERROR_OUT_OF_MEMORY; + } + outputBufferSize = needed.value(); + } + + // set up our buffers. + if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) || + !mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + mInput = aStream; + mErrorsAreFatal = !aReplacementChar; + return NS_OK; +} + +NS_IMETHODIMP +nsConverterInputStream::Close() { + nsresult rv = mInput ? mInput->Close() : NS_OK; + mLineBuffer = nullptr; + mInput = nullptr; + mConverter = nullptr; + mByteData.Clear(); + mUnicharData.Clear(); + return rv; +} + +NS_IMETHODIMP +nsConverterInputStream::Read(char16_t* aBuf, uint32_t aCount, + uint32_t* aReadCount) { + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; + if (0 == readCount) { + // Fill the unichar buffer + readCount = Fill(&mLastErrorCode); + if (readCount == 0) { + *aReadCount = 0; + return mLastErrorCode; + } + } + if (readCount > aCount) { + readCount = aCount; + } + memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset, + readCount * sizeof(char16_t)); + mUnicharDataOffset += readCount; + *aReadCount = readCount; + return NS_OK; +} + +NS_IMETHODIMP +nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, + void* aClosure, uint32_t aCount, + uint32_t* aReadCount) { + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t codeUnitsToWrite = mUnicharDataLength - mUnicharDataOffset; + if (0 == codeUnitsToWrite) { + // Fill the unichar buffer + codeUnitsToWrite = Fill(&mLastErrorCode); + if (codeUnitsToWrite == 0) { + *aReadCount = 0; + return mLastErrorCode; + } + } + + if (codeUnitsToWrite > aCount) { + codeUnitsToWrite = aCount; + } + + uint32_t codeUnitsWritten; + uint32_t totalCodeUnitsWritten = 0; + + while (codeUnitsToWrite) { + nsresult rv = + aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset, + totalCodeUnitsWritten, codeUnitsToWrite, &codeUnitsWritten); + if (NS_FAILED(rv)) { + // don't propagate errors to the caller + break; + } + + codeUnitsToWrite -= codeUnitsWritten; + totalCodeUnitsWritten += codeUnitsWritten; + mUnicharDataOffset += codeUnitsWritten; + } + + *aReadCount = totalCodeUnitsWritten; + + return NS_OK; +} + +NS_IMETHODIMP +nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString, + uint32_t* aReadCount) { + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; + if (0 == readCount) { + // Fill the unichar buffer + readCount = Fill(&mLastErrorCode); + if (readCount == 0) { + *aReadCount = 0; + return mLastErrorCode; + } + } + if (readCount > aCount) { + readCount = aCount; + } + const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset; + aString.Assign(buf, readCount); + mUnicharDataOffset += readCount; + *aReadCount = readCount; + return NS_OK; +} + +uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) { + if (!mInput) { + // We already closed the stream! + *aErrorCode = NS_BASE_STREAM_CLOSED; + return 0; + } + + if (NS_FAILED(mLastErrorCode)) { + // We failed to completely convert last time, and error-recovery + // is disabled. We will fare no better this time, so... + *aErrorCode = mLastErrorCode; + return 0; + } + + // mUnicharData.Length() is the buffer length, not the fill status. + // mUnicharDataLength reflects the current fill status. + mUnicharDataLength = 0; + // Whenever we convert, mUnicharData is logically empty. + mUnicharDataOffset = 0; + + // Continue trying to read from the source stream until we successfully decode + // a character or encounter an error, as returning `0` here implies that the + // stream is complete. + // + // If the converter has been cleared, we've fully consumed the stream, and + // want to report EOF. + while (mUnicharDataLength == 0 && mConverter) { + // We assume a many to one conversion and are using equal sizes for + // the two buffers. However if an error happens at the very start + // of a byte buffer we may end up in a situation where n bytes lead + // to n+1 unicode chars. Thus we need to keep track of the leftover + // bytes as we convert. + + uint32_t nb; + *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb); + if (NS_FAILED(*aErrorCode)) { + return 0; + } + + NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(), + "mByteData is lying to us somewhere"); + + // If `NS_FillArray` failed to read any new bytes, this is the last read, + // and we're at the end of the stream. + bool last = (nb == 0); + + // Now convert as much of the byte buffer to unicode as possible + auto src = AsBytes(Span(mByteData)); + auto dst = Span(mUnicharData); + + // Truncation from size_t to uint32_t below is OK, because the sizes + // are bounded by the lengths of mByteData and mUnicharData. + uint32_t result; + size_t read; + size_t written; + if (mErrorsAreFatal) { + std::tie(result, read, written) = + mConverter->DecodeToUTF16WithoutReplacement(src, dst, last); + } else { + std::tie(result, read, written, std::ignore) = + mConverter->DecodeToUTF16(src, dst, last); + } + mLeftOverBytes = mByteData.Length() - read; + mUnicharDataLength = written; + // Clear `mConverter` if we reached the end of the stream, as we can't + // call methods on it anymore. This will also signal EOF to the caller + // through the loop condition. + if (last) { + MOZ_ASSERT(mLeftOverBytes == 0, + "Failed to read all bytes on the last pass?"); + mConverter = nullptr; + } + // If we got a decode error, we're done. + if (result != kInputEmpty && result != kOutputFull) { + MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?"); + *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT; + return 0; + } + } + *aErrorCode = NS_OK; + return mUnicharDataLength; +} + +NS_IMETHODIMP +nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) { + if (!mLineBuffer) { + mLineBuffer = MakeUnique<nsLineBuffer<char16_t>>(); + } + return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult); +} diff --git a/intl/uconv/nsConverterInputStream.h b/intl/uconv/nsConverterInputStream.h new file mode 100644 index 0000000000..55555fc679 --- /dev/null +++ b/intl/uconv/nsConverterInputStream.h @@ -0,0 +1,64 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsConverterInputStream_h +#define nsConverterInputStream_h + +#include "nsIInputStream.h" +#include "nsIConverterInputStream.h" +#include "nsIUnicharLineInputStream.h" +#include "nsTArray.h" +#include "nsCOMPtr.h" +#include "nsReadLine.h" +#include "mozilla/Encoding.h" +#include "mozilla/UniquePtr.h" + +#define NS_CONVERTERINPUTSTREAM_CONTRACTID \ + "@mozilla.org/intl/converter-input-stream;1" + +// {2BC2AD62-AD5D-4b7b-A9DB-F74AE203C527} +#define NS_CONVERTERINPUTSTREAM_CID \ + { \ + 0x2bc2ad62, 0xad5d, 0x4b7b, { \ + 0xa9, 0xdb, 0xf7, 0x4a, 0xe2, 0x3, 0xc5, 0x27 \ + } \ + } + +class nsConverterInputStream : public nsIConverterInputStream, + public nsIUnicharLineInputStream { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSIUNICHARINPUTSTREAM + NS_DECL_NSIUNICHARLINEINPUTSTREAM + NS_DECL_NSICONVERTERINPUTSTREAM + + nsConverterInputStream() + : mLastErrorCode(NS_OK), + mLeftOverBytes(0), + mUnicharDataOffset(0), + mUnicharDataLength(0), + mErrorsAreFatal(false), + mLineBuffer(nullptr) {} + + private: + virtual ~nsConverterInputStream() { Close(); } + + uint32_t Fill(nsresult* aErrorCode); + + mozilla::UniquePtr<mozilla::Decoder> mConverter; + FallibleTArray<char> mByteData; + FallibleTArray<char16_t> mUnicharData; + nsCOMPtr<nsIInputStream> mInput; + + nsresult mLastErrorCode; + uint32_t mLeftOverBytes; + uint32_t mUnicharDataOffset; + uint32_t mUnicharDataLength; + bool mErrorsAreFatal; + + mozilla::UniquePtr<nsLineBuffer<char16_t> > mLineBuffer; +}; + +#endif diff --git a/intl/uconv/nsConverterOutputStream.cpp b/intl/uconv/nsConverterOutputStream.cpp new file mode 100644 index 0000000000..a24adb0377 --- /dev/null +++ b/intl/uconv/nsConverterOutputStream.cpp @@ -0,0 +1,115 @@ +/* vim:set expandtab ts=4 sw=2 sts=2 cin: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsCOMPtr.h" +#include "nsIOutputStream.h" +#include "nsString.h" +#include "nsConverterOutputStream.h" +#include "mozilla/Encoding.h" + +using namespace mozilla; + +NS_IMPL_ISUPPORTS(nsConverterOutputStream, nsIUnicharOutputStream, + nsIConverterOutputStream) + +nsConverterOutputStream::~nsConverterOutputStream() { Close(); } + +NS_IMETHODIMP +nsConverterOutputStream::Init(nsIOutputStream* aOutStream, + const char* aCharset) { + MOZ_ASSERT(aOutStream, "Null output stream!"); + + const Encoding* encoding; + if (!aCharset) { + encoding = UTF_8_ENCODING; + } else { + encoding = Encoding::ForLabelNoReplacement(MakeStringSpan(aCharset)); + if (!encoding || encoding == UTF_16LE_ENCODING || + encoding == UTF_16BE_ENCODING) { + return NS_ERROR_UCONV_NOCONV; + } + } + + mConverter = encoding->NewEncoder(); + + mOutStream = aOutStream; + + return NS_OK; +} + +NS_IMETHODIMP +nsConverterOutputStream::Write(uint32_t aCount, const char16_t* aChars, + bool* aSuccess) { + if (!mOutStream) { + NS_ASSERTION(!mConverter, "Closed streams shouldn't have converters"); + return NS_BASE_STREAM_CLOSED; + } + MOZ_ASSERT(mConverter, "Must have a converter when not closed"); + uint8_t buffer[4096]; + auto dst = Span(buffer); + auto src = Span(aChars, aCount); + for (;;) { + uint32_t result; + size_t read; + size_t written; + std::tie(result, read, written, std::ignore) = + mConverter->EncodeFromUTF16(src, dst, false); + src = src.From(read); + uint32_t streamWritten; + nsresult rv = mOutStream->Write(reinterpret_cast<char*>(dst.Elements()), + written, &streamWritten); + *aSuccess = NS_SUCCEEDED(rv) && written == streamWritten; + if (!(*aSuccess)) { + return rv; + } + if (result == kInputEmpty) { + return NS_OK; + } + } +} + +NS_IMETHODIMP +nsConverterOutputStream::WriteString(const nsAString& aString, bool* aSuccess) { + int32_t inLen = aString.Length(); + nsAString::const_iterator i; + aString.BeginReading(i); + return Write(inLen, i.get(), aSuccess); +} + +NS_IMETHODIMP +nsConverterOutputStream::Flush() { + if (!mOutStream) return NS_OK; // Already closed. + + // If we are encoding to ISO-2022-JP, potentially + // transition back to the ASCII state. The buffer + // needs to be large enough for an additional NCR, + // though. + uint8_t buffer[12]; + auto dst = Span(buffer); + Span<char16_t> src(nullptr); + uint32_t result; + size_t written; + std::tie(result, std::ignore, written, std::ignore) = + mConverter->EncodeFromUTF16(src, dst, true); + MOZ_ASSERT(result == kInputEmpty); + uint32_t streamWritten; + if (!written) { + return NS_OK; + } + return mOutStream->Write(reinterpret_cast<char*>(dst.Elements()), written, + &streamWritten); +} + +NS_IMETHODIMP +nsConverterOutputStream::Close() { + if (!mOutStream) return NS_OK; // Already closed. + + nsresult rv1 = Flush(); + + nsresult rv2 = mOutStream->Close(); + mOutStream = nullptr; + mConverter = nullptr; + return NS_FAILED(rv1) ? rv1 : rv2; +} diff --git a/intl/uconv/nsConverterOutputStream.h b/intl/uconv/nsConverterOutputStream.h new file mode 100644 index 0000000000..74b873acd5 --- /dev/null +++ b/intl/uconv/nsConverterOutputStream.h @@ -0,0 +1,39 @@ +/* vim:set expandtab ts=4 sw=2 sts=2 cin: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef NSCONVERTEROUTPUTSTREAM_H_ +#define NSCONVERTEROUTPUTSTREAM_H_ + +#include "nsIConverterOutputStream.h" +#include "nsCOMPtr.h" +#include "mozilla/Attributes.h" +#include "mozilla/Encoding.h" + +class nsIOutputStream; + +/* ff8780a5-bbb1-4bc5-8ee7-057e7bc5c925 */ +#define NS_CONVERTEROUTPUTSTREAM_CID \ + { \ + 0xff8780a5, 0xbbb1, 0x4bc5, { \ + 0x8e, 0xe7, 0x05, 0x7e, 0x7b, 0xc5, 0xc9, 0x25 \ + } \ + } + +class nsConverterOutputStream final : public nsIConverterOutputStream { + public: + nsConverterOutputStream() = default; + + NS_DECL_ISUPPORTS + NS_DECL_NSIUNICHAROUTPUTSTREAM + NS_DECL_NSICONVERTEROUTPUTSTREAM + + private: + ~nsConverterOutputStream(); + + mozilla::UniquePtr<mozilla::Encoder> mConverter; + nsCOMPtr<nsIOutputStream> mOutStream; +}; + +#endif diff --git a/intl/uconv/nsIScriptableUConv.idl b/intl/uconv/nsIScriptableUConv.idl new file mode 100644 index 0000000000..7f1334c0c3 --- /dev/null +++ b/intl/uconv/nsIScriptableUConv.idl @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +interface nsIInputStream; + +%{C++ +// {0A698C44-3BFF-11d4-9649-00C0CA135B4E} +#define NS_ISCRIPTABLEUNICODECONVERTER_CID { 0x0A698C44, 0x3BFF, 0x11d4, { 0x96, 0x49, 0x00, 0xC0, 0xCA, 0x13, 0x5B, 0x4E } } +#define NS_ISCRIPTABLEUNICODECONVERTER_CONTRACTID "@mozilla.org/intl/scriptableunicodeconverter" +%} + +/** + * In new code, please use the WebIDL TextDecoder and TextEncoder + * instead. They represent bytes as Uint8Array (or as view to such + * array), which is the current best practice for representing bytes + * in JavaScript. + * + * This interface converts between UTF-16 in JavaScript strings + * and bytes transported as the unsigned value of each byte + * transported in a code unit of the same numeric value in + * a JavaScript string. + * + * @created 8/Jun/2000 + * @author Makoto Kato [m_kato@ga2.so-net.ne.jp] + */ +[scriptable, uuid(f36ee324-5c1c-437f-ba10-2b4db7a18031)] +interface nsIScriptableUnicodeConverter : nsISupports +{ + /** + * Converts the data from Unicode to one Charset. + * Returns the converted string. After converting, Finish should be called + * and its return value appended to this return value. + */ + ACString ConvertFromUnicode(in AString aSrc); + + /** + * Returns the terminator string. + * Should be called after ConvertFromUnicode() and appended to that + * function's return value. + */ + ACString Finish(); + + /** + * Converts the data from one Charset to Unicode. + */ + AString ConvertToUnicode(in ACString aSrc); + + /** + * Current character set. + * + * @throw NS_ERROR_UCONV_NOCONV + * The requested charset is not supported. + */ + attribute ACString charset; + + /** + * Meaningless + */ + attribute boolean isInternal; +}; diff --git a/intl/uconv/nsITextToSubURI.idl b/intl/uconv/nsITextToSubURI.idl new file mode 100644 index 0000000000..3bb404e414 --- /dev/null +++ b/intl/uconv/nsITextToSubURI.idl @@ -0,0 +1,60 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + + +%{C++ +// {8B042E22-6F87-11d3-B3C8-00805F8A6670} +#define NS_TEXTTOSUBURI_CID { 0x8b042e22, 0x6f87, 0x11d3, { 0xb3, 0xc8, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } } +#define NS_ITEXTTOSUBURI_CONTRACTID "@mozilla.org/intl/texttosuburi;1" +%} + +[scriptable, uuid(8B042E24-6F87-11d3-B3C8-00805F8A6670)] +interface nsITextToSubURI : nsISupports +{ + ACString ConvertAndEscape(in ACString charset, in AString text); + AString UnEscapeAndConvert(in ACString charset, in ACString text); + + /** + * Unescapes the given URI fragment (for UI purpose only) + * Note: + * <ul> + * <li> escaping back the result (unescaped string) is not guaranteed to + * give the original escaped string + * <li> The URI fragment (escaped) is assumed to be in UTF-8 and converted + * to AString (UTF-16) + * <li> In case of successful conversion any resulting character listed + * in netwerk/dns/IDNCharacterBlocklist.inc (except space) is escaped + * <li> Always succeeeds (callers don't need to do error checking) + * </ul> + * + * @param aURIFragment the URI (or URI fragment) to unescape + * @param aDontEscape whether to escape IDN blocklisted characters + * @return Unescaped aURIFragment converted to unicode + */ + AString unEscapeURIForUI(in AUTF8String aURIFragment, + [optional] in boolean aDontEscape); +%{C++ + nsresult UnEscapeURIForUI(const nsACString& aURIFragment, + nsAString& _retval) { + return UnEscapeURIForUI(aURIFragment, false, _retval); + } +%} + + /** + * Unescapes only non ASCII characters in the given URI fragment + * note: this method assumes the URI as UTF-8 and fallbacks to the given + * charset if the charset is an ASCII superset + * + * @param aCharset the charset to convert from + * @param aURIFragment the URI (or URI fragment) to unescape + * @return Unescaped aURIFragment converted to unicode + * @throws NS_ERROR_UCONV_NOCONV when there is no decoder for aCharset + * or NS_ERROR_UDEC_ILLEGALINPUT in case of conversion failure + */ + [binaryname(UnEscapeNonAsciiURIJS)] + AString unEscapeNonAsciiURI(in ACString aCharset, in AUTF8String aURIFragment); +}; diff --git a/intl/uconv/nsScriptableUConv.cpp b/intl/uconv/nsScriptableUConv.cpp new file mode 100644 index 0000000000..e56d578efd --- /dev/null +++ b/intl/uconv/nsScriptableUConv.cpp @@ -0,0 +1,192 @@ + +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" +#include "nsIScriptableUConv.h" +#include "nsScriptableUConv.h" +#include "nsComponentManagerUtils.h" + +#include <tuple> + +using namespace mozilla; + +/* Implementation file */ +NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter) + +nsScriptableUnicodeConverter::nsScriptableUnicodeConverter() + : mIsInternal(false) {} + +nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default; + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc, + nsACString& _retval) { + if (!mEncoder) return NS_ERROR_FAILURE; + + // We can compute the length without replacement, because the + // the replacement is only one byte long and a mappable character + // would always output something, i.e. at least one byte. + // When encoding to ISO-2022-JP, unmappables shouldn't be able + // to cause more escape sequences to be emitted than the mappable + // worst case where every input character causes an escape into + // a different state. + CheckedInt<size_t> needed = + mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length()); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto dstChars = _retval.GetMutableData(needed.value(), fallible); + if (!dstChars) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto src = Span(aSrc); + auto dst = AsWritableBytes(*dstChars); + size_t totalWritten = 0; + for (;;) { + auto [result, read, written] = + mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false); + if (result != kInputEmpty && result != kOutputFull) { + MOZ_RELEASE_ASSERT(written < dst.Length(), + "Unmappables with one-byte replacement should not " + "exceed mappable worst case."); + dst[written++] = '?'; + } + totalWritten += written; + if (result == kInputEmpty) { + MOZ_ASSERT(totalWritten <= UINT32_MAX); + if (!_retval.SetLength(totalWritten, fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; + } + src = src.From(read); + dst = dst.From(written); + } +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::Finish(nsACString& _retval) { + // The documentation for this method says it should be called after + // ConvertFromUnicode(). However, our own tests called it after + // convertFromByteArray(), i.e. when *decoding*. + // Assuming that there exists extensions that similarly call + // this at the wrong time, let's deal. In general, it is a design + // error for this class to handle conversions in both directions. + if (!mEncoder) { + _retval.Truncate(); + mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); + return NS_OK; + } + // If we are encoding to ISO-2022-JP, potentially + // transition back to the ASCII state. The buffer + // needs to be large enough for an additional NCR, + // though. + _retval.SetLength(13); + auto dst = AsWritableBytes(_retval.GetMutableData(13)); + Span<char16_t> src(nullptr); + uint32_t result; + size_t read; + size_t written; + std::tie(result, read, written, std::ignore) = + mEncoder->EncodeFromUTF16(src, dst, true); + MOZ_ASSERT(!read); + MOZ_ASSERT(result == kInputEmpty); + _retval.SetLength(written); + + mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); + mEncoder->Encoding()->NewEncoderInto(*mEncoder); + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, + nsAString& _retval) { + if (!mDecoder) return NS_ERROR_FAILURE; + + uint32_t length = aSrc.Length(); + + CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto dst = _retval.GetMutableData(needed.value(), fallible); + if (!dst) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto src = + Span(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length); + uint32_t result; + size_t read; + size_t written; + // The UTF-8 decoder used to throw regardless of the error behavior. + // Simulating the old behavior for compatibility with legacy callers. + // If callers want control over the behavior, they should switch to + // TextDecoder. + if (mDecoder->Encoding() == UTF_8_ENCODING) { + std::tie(result, read, written) = + mDecoder->DecodeToUTF16WithoutReplacement(src, *dst, false); + if (result != kInputEmpty) { + return NS_ERROR_UDEC_ILLEGALINPUT; + } + } else { + std::tie(result, read, written, std::ignore) = + mDecoder->DecodeToUTF16(src, *dst, false); + } + MOZ_ASSERT(result == kInputEmpty); + MOZ_ASSERT(read == length); + MOZ_ASSERT(written <= needed.value()); + if (!_retval.SetLength(written, fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) { + if (!mDecoder) { + aCharset.Truncate(); + } else { + mDecoder->Encoding()->Name(aCharset); + } + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) { + return InitConverter(aCharset); +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) { + *aIsInternal = mIsInternal; + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) { + mIsInternal = aIsInternal; + return NS_OK; +} + +nsresult nsScriptableUnicodeConverter::InitConverter( + const nsACString& aCharset) { + mEncoder = nullptr; + mDecoder = nullptr; + + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + return NS_ERROR_UCONV_NOCONV; + } + if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) { + mEncoder = encoding->NewEncoder(); + } + mDecoder = encoding->NewDecoderWithBOMRemoval(); + return NS_OK; +} diff --git a/intl/uconv/nsScriptableUConv.h b/intl/uconv/nsScriptableUConv.h new file mode 100644 index 0000000000..059a4b430c --- /dev/null +++ b/intl/uconv/nsScriptableUConv.h @@ -0,0 +1,34 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __nsScriptableUConv_h_ +#define __nsScriptableUConv_h_ + +#include "nsIScriptableUConv.h" +#include "nsCOMPtr.h" +#include "mozilla/Encoding.h" + +class nsScriptableUnicodeConverter : public nsIScriptableUnicodeConverter { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSISCRIPTABLEUNICODECONVERTER + + nsScriptableUnicodeConverter(); + + protected: + virtual ~nsScriptableUnicodeConverter(); + + mozilla::UniquePtr<mozilla::Encoder> mEncoder; + mozilla::UniquePtr<mozilla::Decoder> mDecoder; + bool mIsInternal; + + nsresult FinishWithLength(char** _retval, int32_t* aLength); + nsresult ConvertFromUnicodeWithLength(const nsAString& aSrc, int32_t* aOutLen, + char** _retval); + + nsresult InitConverter(const nsACString& aCharset); +}; + +#endif diff --git a/intl/uconv/nsTextToSubURI.cpp b/intl/uconv/nsTextToSubURI.cpp new file mode 100644 index 0000000000..e70d9ccbd8 --- /dev/null +++ b/intl/uconv/nsTextToSubURI.cpp @@ -0,0 +1,178 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "nsString.h" +#include "nsITextToSubURI.h" +#include "nsEscape.h" +#include "nsTextToSubURI.h" +#include "nsCRT.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/Encoding.h" +#include "mozilla/Preferences.h" +#include "mozilla/TextUtils.h" +#include "mozilla/Utf8.h" + +using namespace mozilla; + +nsTextToSubURI::~nsTextToSubURI() = default; + +NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI) + +NS_IMETHODIMP +nsTextToSubURI::ConvertAndEscape(const nsACString& aCharset, + const nsAString& aText, nsACString& aOut) { + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + aOut.Truncate(); + return NS_ERROR_UCONV_NOCONV; + } + nsresult rv; + nsAutoCString intermediate; + std::tie(rv, std::ignore) = encoding->Encode(aText, intermediate); + if (NS_FAILED(rv)) { + aOut.Truncate(); + return rv; + } + bool ok = NS_Escape(intermediate, aOut, url_XPAlphas); + if (!ok) { + aOut.Truncate(); + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; +} + +NS_IMETHODIMP +nsTextToSubURI::UnEscapeAndConvert(const nsACString& aCharset, + const nsACString& aText, nsAString& aOut) { + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + aOut.Truncate(); + return NS_ERROR_UCONV_NOCONV; + } + nsAutoCString unescaped(aText); + NS_UnescapeURL(unescaped); + auto rv = encoding->DecodeWithoutBOMHandling(unescaped, aOut); + if (NS_SUCCEEDED(rv)) { + return NS_OK; + } + return rv; +} + +static bool statefulCharset(const char* charset) { + // HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in + // mozilla-central but keeping them here just in case for the benefit of + // comm-central. + if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-") - 1) || + !nsCRT::strcasecmp(charset, "UTF-7") || + !nsCRT::strcasecmp(charset, "HZ-GB-2312")) + return true; + + return false; +} + +// static +nsresult nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset, + const nsCString& aURI, + nsAString& aOut) { + // check for 7bit encoding the data may not be ASCII after we decode + bool isStatefulCharset = statefulCharset(aCharset.get()); + + if (!isStatefulCharset) { + if (IsAscii(aURI)) { + CopyASCIItoUTF16(aURI, aOut); + return NS_OK; + } + if (IsUtf8(aURI)) { + CopyUTF8toUTF16(aURI, aOut); + return NS_OK; + } + } + + // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8. + NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG); + + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + aOut.Truncate(); + return NS_ERROR_UCONV_NOCONV; + } + return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI, aOut); +} + +NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString& aURIFragment, + bool aDontEscape, + nsAString& _retval) { + nsAutoCString unescapedSpec; + // skip control octets (0x00 - 0x1f and 0x7f) when unescaping + NS_UnescapeURL(PromiseFlatCString(aURIFragment), + esc_SkipControl | esc_AlwaysCopy, unescapedSpec); + + // in case of failure, return escaped URI + // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte + // sequences are also considered failure in this context + if (convertURItoUnicode("UTF-8"_ns, unescapedSpec, _retval) != NS_OK) { + // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8 + CopyUTF8toUTF16(aURIFragment, _retval); + } + + if (aDontEscape) { + return NS_OK; + } + + // If there are any characters that are unsafe for URIs, reescape those. + if (mIDNBlocklist.IsEmpty()) { + mozilla::net::InitializeBlocklist(mIDNBlocklist); + // we allow SPACE and IDEOGRAPHIC SPACE in this method + mozilla::net::RemoveCharFromBlocklist(u' ', mIDNBlocklist); + mozilla::net::RemoveCharFromBlocklist(0x3000, mIDNBlocklist); + } + + MOZ_ASSERT(!mIDNBlocklist.IsEmpty()); + const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval); + nsString reescapedSpec; + _retval = NS_EscapeURL( + unescapedResult, + [&](char16_t aChar) -> bool { + return mozilla::net::CharInBlocklist(aChar, mIDNBlocklist); + }, + reescapedSpec); + + return NS_OK; +} + +NS_IMETHODIMP +nsTextToSubURI::UnEscapeNonAsciiURIJS(const nsACString& aCharset, + const nsACString& aURIFragment, + nsAString& _retval) { + return UnEscapeNonAsciiURI(aCharset, aURIFragment, _retval); +} + +// static +nsresult nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset, + const nsACString& aURIFragment, + nsAString& _retval) { + nsAutoCString unescapedSpec; + NS_UnescapeURL(PromiseFlatCString(aURIFragment), + esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec); + // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII + // superset since converting "http:" with such an encoding is always a bad + // idea. + if (!IsUtf8(unescapedSpec) && + (aCharset.LowerCaseEqualsLiteral("utf-16") || + aCharset.LowerCaseEqualsLiteral("utf-16be") || + aCharset.LowerCaseEqualsLiteral("utf-16le") || + aCharset.LowerCaseEqualsLiteral("utf-7") || + aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))) { + CopyASCIItoUTF16(aURIFragment, _retval); + return NS_OK; + } + + nsresult rv = + convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, _retval); + // NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error + // if the string ends with a valid (but incomplete) sequence. + return rv == NS_OK_UDEC_MOREINPUT ? NS_ERROR_UDEC_ILLEGALINPUT : rv; +} + +//---------------------------------------------------------------------- diff --git a/intl/uconv/nsTextToSubURI.h b/intl/uconv/nsTextToSubURI.h new file mode 100644 index 0000000000..1eaeb554dc --- /dev/null +++ b/intl/uconv/nsTextToSubURI.h @@ -0,0 +1,36 @@ +// -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- +// vim: set ts=2 et sw=2 tw=80: +// This Source Code is subject to the terms of the Mozilla Public License +// version 2.0 (the "License"). You can obtain a copy of the License at +// http://mozilla.org/MPL/2.0/. +#ifndef nsTextToSubURI_h__ +#define nsTextToSubURI_h__ + +#include "nsITextToSubURI.h" +#include "nsString.h" +#include "nsTArray.h" +#include "mozilla/net/IDNBlocklistUtils.h" + +class nsTextToSubURI : public nsITextToSubURI { + NS_DECL_ISUPPORTS + NS_DECL_NSITEXTTOSUBURI + + // Thread-safe function for C++ callers + static nsresult UnEscapeNonAsciiURI(const nsACString& aCharset, + const nsACString& aURIFragment, + nsAString& _retval); + + private: + virtual ~nsTextToSubURI(); + + // We assume that the URI is encoded as UTF-8. + static nsresult convertURItoUnicode(const nsCString& aCharset, + const nsCString& aURI, + nsAString& _retval); + + // Characters defined in netwerk/dns/IDNCharacterBlocklist.inc or via the + // network.IDN.extra_allowed_chars and network.IDN.extra_blocked_chars prefs. + nsTArray<mozilla::net::BlocklistRange> mIDNBlocklist; +}; + +#endif // nsTextToSubURI_h__ diff --git a/intl/uconv/tests/gtest/TestShortRead.cpp b/intl/uconv/tests/gtest/TestShortRead.cpp new file mode 100644 index 0000000000..393f5e0027 --- /dev/null +++ b/intl/uconv/tests/gtest/TestShortRead.cpp @@ -0,0 +1,109 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" +#include "mozilla/ErrorNames.h" +#include "nsCOMPtr.h" +#include "nsConverterInputStream.h" +#include "nsIInputStream.h" +#include "nsISupports.h" +#include "nsStringStream.h" + +namespace { + +class ShortReadWrapper final : public nsIInputStream { + public: + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSIINPUTSTREAM + + template <size_t N> + ShortReadWrapper(const uint32_t (&aShortReads)[N], + nsIInputStream* aBaseStream) + : mShortReadIter(std::begin(aShortReads)), + mShortReadEnd(std::end(aShortReads)), + mBaseStream(aBaseStream) {} + + ShortReadWrapper(const ShortReadWrapper&) = delete; + ShortReadWrapper& operator=(const ShortReadWrapper&) = delete; + + private: + ~ShortReadWrapper() = default; + + const uint32_t* mShortReadIter; + const uint32_t* mShortReadEnd; + nsCOMPtr<nsIInputStream> mBaseStream; +}; + +NS_IMPL_ISUPPORTS(ShortReadWrapper, nsIInputStream) + +NS_IMETHODIMP +ShortReadWrapper::Close() { return mBaseStream->Close(); } + +NS_IMETHODIMP +ShortReadWrapper::Available(uint64_t* aAvailable) { + nsresult rv = mBaseStream->Available(aAvailable); + NS_ENSURE_SUCCESS(rv, rv); + + if (mShortReadIter != mShortReadEnd) { + *aAvailable = std::min(uint64_t(*mShortReadIter), *aAvailable); + } + return NS_OK; +} + +NS_IMETHODIMP +ShortReadWrapper::StreamStatus() { return mBaseStream->StreamStatus(); } + +NS_IMETHODIMP +ShortReadWrapper::Read(char* aBuf, uint32_t aCount, uint32_t* _retval) { + if (mShortReadIter != mShortReadEnd) { + aCount = std::min(*mShortReadIter, aCount); + } + + nsresult rv = mBaseStream->Read(aBuf, aCount, _retval); + if (NS_SUCCEEDED(rv) && mShortReadIter != mShortReadEnd) { + ++mShortReadIter; + } + return rv; +} + +NS_IMETHODIMP +ShortReadWrapper::ReadSegments(nsWriteSegmentFun aWriter, void* aClosure, + uint32_t aCount, uint32_t* _retval) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +ShortReadWrapper::IsNonBlocking(bool* _retval) { + return mBaseStream->IsNonBlocking(_retval); +} + +} // namespace + +TEST(ConverterStreamShortRead, ShortRead) +{ + uint8_t bytes[] = {0xd8, 0x35, 0xdc, 0x20}; + nsCOMPtr<nsIInputStream> baseStream; + ASSERT_TRUE(NS_SUCCEEDED(NS_NewByteInputStream(getter_AddRefs(baseStream), + AsChars(mozilla::Span(bytes)), + NS_ASSIGNMENT_COPY))); + + static const uint32_t kShortReads[] = {1, 2, 1}; + nsCOMPtr<nsIInputStream> shortStream = + new ShortReadWrapper(kShortReads, baseStream); + + RefPtr<nsConverterInputStream> unicharStream = new nsConverterInputStream(); + ASSERT_TRUE(NS_SUCCEEDED( + unicharStream->Init(shortStream, "UTF-16BE", 4096, + nsIConverterInputStream::ERRORS_ARE_FATAL))); + + uint32_t read; + nsAutoString result; + ASSERT_TRUE( + NS_SUCCEEDED(unicharStream->ReadString(UINT32_MAX, result, &read))); + + ASSERT_EQ(read, 2u); + ASSERT_TRUE(result == u"\U0001d420"); +} diff --git a/intl/uconv/tests/gtest/moz.build b/intl/uconv/tests/gtest/moz.build new file mode 100644 index 0000000000..969fb52c7e --- /dev/null +++ b/intl/uconv/tests/gtest/moz.build @@ -0,0 +1,11 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += [ + "TestShortRead.cpp", +] + +FINAL_LIBRARY = "xul-gtest" diff --git a/intl/uconv/tests/mochitest.toml b/intl/uconv/tests/mochitest.toml new file mode 100644 index 0000000000..32afdba9e4 --- /dev/null +++ b/intl/uconv/tests/mochitest.toml @@ -0,0 +1,25 @@ +[DEFAULT] + +["test_big5_encoder.html"] + +["test_bug335816.html"] + +["test_bug843434.html"] + +["test_bug959058-1.html"] + +["test_bug959058-2.html"] + +["test_long_doc.html"] + +["test_ncr_fallback.html"] + +["test_singlebyte_overconsumption.html"] + +["test_unicode_noncharacterescapes.html"] + +["test_unicode_noncharacters_gb18030.html"] + +["test_unicode_noncharacters_utf8.html"] + +["test_utf8_overconsumption.html"] diff --git a/intl/uconv/tests/moz.build b/intl/uconv/tests/moz.build new file mode 100644 index 0000000000..b9cad75ed7 --- /dev/null +++ b/intl/uconv/tests/moz.build @@ -0,0 +1,13 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +TEST_DIRS += [ + "gtest", +] + +XPCSHELL_TESTS_MANIFESTS += ["unit/xpcshell.toml"] + +MOCHITEST_MANIFESTS += ["mochitest.toml"] diff --git a/intl/uconv/tests/stressgb.pl b/intl/uconv/tests/stressgb.pl new file mode 100644 index 0000000000..5b37fb63fb --- /dev/null +++ b/intl/uconv/tests/stressgb.pl @@ -0,0 +1,23 @@ +#!/usr/bin/perl +use LWP::Simple; +use IO::Handle; +$stdout = *STDOUT; +open(RES , ">resultlog.txt") || die "cannot open result log file"; +#system("rm alldiff.txt in*.txt out*.txt"); +for($i=10;$i<909;$i++) +{ + RES->printf("Test Page %d \n", $i); + $url = "http://people.netscape.com/ftang/testscript/gb18030/gbtext.cgi?page=" . $i; + RES->printf( "URL = %s\n", $url); + $tmpfile = "> in". $i . ".txt"; + open STDOUT, $tmpfile || RES->print("cannot open " . $tmpfile . "\n"); + getprint $url; + $cmd2 = "../../../dist/win32_d.obj/bin/nsconv -f GB18030 -t GB18030 in" . $i . ".txt out" . $i . ".txt >err"; + $cmd3 = "diff -u in" . $i . ".txt out" . $i . ".txt >> alldiff.txt"; + RES->printf( "Run '%s'\n", $cmd2); + $st2 = system($cmd2); + RES->printf( "result = '%d'\n", $st2); + RES->printf( "Run '%s'\n", $cmd3); + $st3 = system($cmd3); + RES->printf( "result = '%d'\n", $st3); +} diff --git a/intl/uconv/tests/test_big5_encoder.html b/intl/uconv/tests/test_big5_encoder.html new file mode 100644 index 0000000000..7e86683f00 --- /dev/null +++ b/intl/uconv/tests/test_big5_encoder.html @@ -0,0 +1,43 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=912470 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for Unicode non-characters</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> +</head> +<body onload="test()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> +/* NOTE: + * When we make our data: URL origin work as in Blink, this test will fail. + * Hopefully, by that time are URL parser has become spec-compliant, so that + * we'll pass the Web Platform Test for the big5 encoder + * (testing/web-platform/tests/encoding/big5-encoder.html) and this test can + * simply be removed. + */ +SimpleTest.waitForExplicitFinish(); + +function test() { + var f = document.getElementsByTagName("iframe")[0]; + f.onload = function() { + var href = SpecialPowers.wrap(f).contentWindow.location.href; + var index = href.indexOf("?foo="); + var actual = href.substring(index + 5); + var expected = "h%26%2340614%3Bi%26%23156267%3Bj%A1%40k%A3%E1l%A4%40m%C8%A4n%C8%CDo%FE%FEp%26%238365%3Bq%FDjr%F9%F9s%26%23128169%3Bt"; + is(actual, expected, "Should have gotten the expected encode."); + SimpleTest.finish(); + } + SpecialPowers.wrap(f).contentDocument.forms[0].submit(); +} +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=912470">Mozilla Bug 912470</a> +<p id="display"></p> +<div id="content" style="display: none"><iframe src="data:text/html;charset=big5,<form><input name=foo value=h&%23x9EA6;i&%23x2626B;j&%23x3000;k&%23x20AC;l&%23x4E00;m&%23x27607;n&%23xFFE2;o&%23x79D4;p&%23x20AD;q&%23x203B5;r&%23x2550;s&%23x1F4A9;t></form>"> +</div> +</body> +</html> diff --git a/intl/uconv/tests/test_bug335816.html b/intl/uconv/tests/test_bug335816.html new file mode 100644 index 0000000000..58fe538b5d --- /dev/null +++ b/intl/uconv/tests/test_bug335816.html @@ -0,0 +1,40 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=335816 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for Bug 335816</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> +</head> +<body onload="test()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> + +/** Test for Bug 335816 **/ +function test() +{ + ok($("display").innerHTML != "Keep that breathless charm", "Hidden script not executed"); + SimpleTest.finish(); +} + +function WontYouPleaseArrangeIt() +{ + $("display").innerHTML = "Keep that breathless charm"; +} + +SimpleTest.waitForExplicitFinish(); +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=335816">Mozilla Bug 335816</a> +<p id="display">Lovely ... Never, ever change.</p> + +<!-- There is a UTF-8 BOM just here - - - + | which should not be stripped + v --> +<div id="content" style="display: none"><script>WontYouPleaseArrangeIt();</script></div> +</body> +</html> + diff --git a/intl/uconv/tests/test_bug843434.html b/intl/uconv/tests/test_bug843434.html new file mode 100644 index 0000000000..d79ad70e6a --- /dev/null +++ b/intl/uconv/tests/test_bug843434.html @@ -0,0 +1,27 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=843434 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for Bug 843434</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> + <!-- This next 'link' is part of the test, it may cause an assertion. --> + <link rel=stylesheet href="data:text/css;charset=ISO-2022-JP,%cc"> +</head> +<body onload="test()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> +function test() +{ + ok(true, "Test for no prior assertion."); + SimpleTest.finish(); +} +SimpleTest.waitForExplicitFinish(); +</script> +</pre> +</body> +</html> + diff --git a/intl/uconv/tests/test_bug959058-1.html b/intl/uconv/tests/test_bug959058-1.html new file mode 100644 index 0000000000..85d45a8841 --- /dev/null +++ b/intl/uconv/tests/test_bug959058-1.html @@ -0,0 +1,28 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=959058 +--> +<head> + <meta charset="gbk"> + <title>Test for Bug 959058</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> + <script type="application/javascript"> + + /** Test for Bug 959058 **/ + + is("”9¸2", "\uD83C\uDF54", "Should have gotten a hamburger."); + + </script> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=959058">Mozilla Bug 959058</a> +<p id="display"></p> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +</pre> +</body> +</html> diff --git a/intl/uconv/tests/test_bug959058-2.html b/intl/uconv/tests/test_bug959058-2.html new file mode 100644 index 0000000000..86bf500e1b --- /dev/null +++ b/intl/uconv/tests/test_bug959058-2.html @@ -0,0 +1,28 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=959058 +--> +<head> + <meta charset="gbk"> + <title>Test for Bug 959058</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> + <script type="application/javascript"> + + /** Test for Bug 959058 **/ + + is("€", "\u20AC", "Should have gotten euro."); + + </script> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=959058">Mozilla Bug 959058</a> +<p id="display"></p> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +</pre> +</body> +</html> diff --git a/intl/uconv/tests/test_long_doc.html b/intl/uconv/tests/test_long_doc.html new file mode 100644 index 0000000000..cf07e7832f --- /dev/null +++ b/intl/uconv/tests/test_long_doc.html @@ -0,0 +1,98 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=718573 +--> +<head> + <title>Test for Bug 718573</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=718573">Mozilla Bug 718573</a> +<p id="display"></p> +<div id="content" style="display: none"> +</div> +<pre id="test"> +<script class="testbody" type="text/javascript"> +/** Test for Bug 718573 **/ + +/* Text longer than 2K bytes to ensure that it crosses a block boundary + while decoding */ +const inString = "Many years ago, I contracted an intimacy with a Mr. William Legrand. He was of an ancient Huguenot family, and had once been wealthy; but a series of misfortunes had reduced him to want. To avoid the mortification consequent upon his disasters, he left New Orleans, the city of his forefathers, and took up his residence at Sullivan's Island, near Charleston, South Carolina. This island is a very singular one. It consists of little else than the sea sand, and is about three miles long. Its breadth at no point exceeds a quarter of a mile. It is separated from the mainland by a scarcely perceptible creek, oozing its way through a wilderness of reeds and slime, a favorite resort of the marsh-hen. The vegetation, as might be supposed, is scant, or at least dwarfish. No trees of any magnitude are to be seen. Near the western extremity, where Fort Moultrie stands, and where are some miserable frame buildings, tenanted, during summer, by the fugitives from Charleston dust and fever, may be found, indeed, the bristly palmetto; but the whole island, with the exception of this western point, and a line of hard, white beach on the sea-coast, is covered with a dense undergrowth of the sweet myrtle so much prized by the horticulturists of England. The shrub here often attains the height of fifteen or twenty feet, and forms an almost impenetrable coppice, burthening the air with its fragrance. In the inmost recesses of this coppice, not far from the eastern or more remote end of the island, Legrand had built himself a small hut, which he occupied when I first, by mere accident, made his acquaintance. This soon ripened into friendship -- for there was much in the recluse to excite interest and esteem. I found him well educated, with unusual powers of mind, but infected with misanthropy, and subject to perverse moods of alternate enthusiasm and melancholy. He had with him many books, but rarely employed them. His chief amusements were gunning and fishing, or sauntering along the beach and through the myrtles, in quest of shells or entomological specimens -- his collection of the latter might have been envied by a Swammerdamm. In these excursions he was usually accompanied by an old negro, called Jupiter, who had been manumitted before the reverses of the family, but who could be induced, neither by threats nor by promises, to abandon what he considered his right of attendance upon the footsteps of his young 'Massa Will.' It is not improbable that the relatives of Legrand, conceiving him to be somewhat unsettled in intellect, had contrived to instil this obstinacy into Jupiter, with a view to the supervision and guardianship of the wanderer. "; + +const testContent = "<pre id='testPara'>" + inString; + +const Ci = SpecialPowers.Ci; +const Cc = SpecialPowers.Cc; + +var decoders = [ + "Big5", + "Big5-HKSCS", + "EUC-JP", + "EUC-KR", + "gb18030", + "IBM866", + "ISO-2022-JP", + "ISO-8859-3", + "ISO-8859-4", + "ISO-8859-5", + "ISO-8859-6", + "ISO-8859-7", + "ISO-8859-8", + "ISO-8859-8-I", + "ISO-8859-10", + "ISO-8859-13", + "ISO-8859-14", + "ISO-8859-15", + "ISO-8859-16", + "ISO-8859-2", + "KOI8-R", + "KOI8-U", + "Shift_JIS", + "windows-1250", + "windows-1251", + "windows-1252", + "windows-1253", + "windows-1254", + "windows-1255", + "windows-1256", + "windows-1257", + "windows-1258", + "windows-874", + "x-mac-cyrillic", + "UTF-8" +]; + +for (var i = 0; i < decoders.length; i++) { + var decoder = decoders[i]; + var data = encodeURI(testContent); + var dataURI = "data:text/html;charset=" + decoder + "," + data; + + var testFrame = document.createElement("iframe"); + let frameID = decoder; + testFrame.setAttribute("id", frameID); + var testFrameObj = document.body.appendChild(testFrame); + if (i < decoders.length) + testFrameObj.setAttribute("onload", "testDecoding('" + decoder + "')"); + else + testFrameObj.setAttribute("onload", "lastTest('" + decoder + "')"); + testFrameObj.contentDocument.location.assign(dataURI); +} + +function lastTest(frame) +{ + testDecoding(frame); + SimpleTest.finish(); +} + +function testDecoding(frame) +{ + var iframeDoc = SpecialPowers.wrap($(frame)).contentDocument; + var outString = iframeDoc.getElementById("testPara").innerHTML; + is(outString, inString, "content decoded as " + frame); +} +</script> +</pre> +</body> +</html> diff --git a/intl/uconv/tests/test_ncr_fallback.html b/intl/uconv/tests/test_ncr_fallback.html new file mode 100644 index 0000000000..846f18be8f --- /dev/null +++ b/intl/uconv/tests/test_ncr_fallback.html @@ -0,0 +1,74 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1202366 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for unpaired surrogates</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> +</head> +<body onload="step()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> +/* NOTE: + * When we make our data: URL origin work as in Blink, this test will fail. + * Don't let this test block alignment of data: URL origin with Blink. + */ +SimpleTest.waitForExplicitFinish(); + +var expectations = [ + "%26%2365533%3B", + "a%26%2365533%3B", + "%26%2365533%3Ba", + "a%26%2365533%3Ba", + "%26%2365533%3B", + "a%26%2365533%3B", + "%26%2365533%3Ba", + "a%26%2365533%3Ba", + "%26%23128169%3B", + "%26%23128169%3B", + "%1B%24B%22%29%1B%28B", + "%1B%24B%22%29%1B%28B%26%23128169%3B", +]; + +var i = 0; + +function step() { + var f = document.getElementsByTagName("iframe")[i]; + f.onload = function() { + var href = SpecialPowers.wrap(f).contentWindow.location.href; + var index = href.indexOf("?foo="); + var actual = href.substring(index + 5); + var expected = expectations[i]; + is(actual, expected, "Should have gotten the expected encode."); + i++ + if (i == document.getElementsByTagName("iframe").length) { + SimpleTest.finish(); + } else { + step(); + } + } + SpecialPowers.wrap(f).contentDocument.forms[0].submit(); +} +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1202366">Mozilla Bug 1202366</a> +<p id="display"></p> +<div id="content" style="display: none"> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uD83D></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=a\uD83D></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uD83Da></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=a\uD83Da></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uDCA9></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=a\uDCA9></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uDCA9a></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=a\uDCA9a></form>');</script>"></iframe> +<iframe src="data:text/html;charset=big5,<script>document.write('<form><input name=foo value=\uD83D\uDCA9></form>');</script>"></iframe> +<iframe src="data:text/html;charset=iso-2022-jp,<script>document.write('<form><input name=foo value=\uD83D\uDCA9></form>');</script>"></iframe> +<iframe src="data:text/html;charset=iso-2022-jp,<script>document.write('<form><input name=foo value=\u3012></form>');</script>"></iframe> +<iframe src="data:text/html;charset=iso-2022-jp,<script>document.write('<form><input name=foo value=\u3012\uD83D\uDCA9></form>');</script>"></iframe> +</div> +</body> +</html> diff --git a/intl/uconv/tests/test_singlebyte_overconsumption.html b/intl/uconv/tests/test_singlebyte_overconsumption.html new file mode 100644 index 0000000000..3aeeb928ec --- /dev/null +++ b/intl/uconv/tests/test_singlebyte_overconsumption.html @@ -0,0 +1,33 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=564679 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=windows-1253"> + <title>Test for undefined codepoints</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> +</head> +<body onload="test()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> + +/** test that single byte decoding resynchronizes after undefined codepoints */ +function test() +{ + is($("display").innerHTML, "All good.", "No overconsumption"); + SimpleTest.finish(); +} + + SimpleTest.waitForExplicitFinish(); +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=564679">Mozilla Bug 564679</a> +<p id="display">Evil.</p> +<div id="content" style="display: none"></div> + Ò<script type="text/javascript"> + $("display").innerHTML = "All good."; + </script> -> +</body> +</html> diff --git a/intl/uconv/tests/test_unicode_noncharacterescapes.html b/intl/uconv/tests/test_unicode_noncharacterescapes.html new file mode 100644 index 0000000000..e44f8d782b --- /dev/null +++ b/intl/uconv/tests/test_unicode_noncharacterescapes.html @@ -0,0 +1,303 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=445886 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for Unicode non-characters</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> +</head> +<body onload="Inject()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> + +/* eslint-disable no-eval */ + +/** Test that unicode non-characters are not discarded **/ +function test() +{ + is($("display").innerHTML,"All good.", "Noncharacters not stripped"); + SimpleTest.finish(); +} + +// eslint-disable-next-line complexity +function Inject() +{ + // script fragments containing Unicode non-characters + try { + // U+FDD0 + eval("$(\"display\").inner\ufdd0HTML += \" U+FDD0 is evil\""); + } catch(e) {} + try { + // U+FDD1 + eval("$(\"display\").inner\ufdd1HTML += \" U+FDD1 is evil\""); + } catch(e) {} + try { + // U+FDD2 + eval("$(\"display\").inner\ufdd2HTML += \" U+FDD2 is evil\""); + } catch(e) {} + try { + // U+FDD3 + eval("$(\"display\").inner\ufdd3HTML += \" U+FDD3 is evil\""); + } catch(e) {} + try { + // U+FDD4 + eval("$(\"display\").inner\ufdd4HTML += \" U+FDD4 is evil\""); + } catch(e) {} + try { + // U+FDD5 + eval("$(\"display\").inner\ufdd5HTML += \" U+FDD5 is evil\""); + } catch(e) {} + try { + // U+FDD6 + eval("$(\"display\").inner\ufdd6HTML += \" U+FDD6 is evil\""); + } catch(e) {} + try { + // U+FDD7 + eval("$(\"display\").inner\ufdd7HTML += \" U+FDD7 is evil\""); + } catch(e) {} + try { + // U+FDD8 + eval("$(\"display\").inner\ufdd8HTML += \" U+FDD8 is evil\""); + } catch(e) {} + try { + // U+FDD9 + eval("$(\"display\").inner\ufdd9HTML += \" U+FDD9 is evil\""); + } catch(e) {} + try { + // U+FDDA + eval("$(\"display\").inner\ufddaHTML += \" U+FDDA is evil\""); + } catch(e) {} + try { + // U+FDDB + eval("$(\"display\").inner\ufddbHTML += \" U+FDDB is evil\""); + } catch(e) {} + try { + // U+FDDC + eval("$(\"display\").inner\ufddcHTML += \" U+FDDC is evil\""); + } catch(e) {} + try { + // U+FDDD + eval("$(\"display\").inner\ufdddHTML += \" U+FDDD is evil\""); + } catch(e) {} + try { + // U+FDDE + eval("$(\"display\").inner\ufddeHTML += \" U+FDDE is evil\""); + } catch(e) {} + try { + // U+FDDF + eval("$(\"display\").inner\ufddfHTML += \" U+FDDF is evil\""); + } catch(e) {} + try { + // U+FDE0 + eval("$(\"display\").inner\ufde0HTML += \" U+FDE0 is evil\""); + } catch(e) {} + try { + // U+FDE1 + eval("$(\"display\").inner\ufde1HTML += \" U+FDE1 is evil\""); + } catch(e) {} + try { + // U+FDE2 + eval("$(\"display\").inner\ufde2HTML += \" U+FDE2 is evil\""); + } catch(e) {} + try { + // U+FDE3 + eval("$(\"display\").inner\ufde3HTML += \" U+FDE3 is evil\""); + } catch(e) {} + try { + // U+FDE4 + eval("$(\"display\").inner\ufde4HTML += \" U+FDE4 is evil\""); + } catch(e) {} + try { + // U+FDE5 + eval("$(\"display\").inner\ufde5HTML += \" U+FDE5 is evil\""); + } catch(e) {} + try { + // U+FDE6 + eval("$(\"display\").inner\ufde6HTML += \" U+FDE6 is evil\""); + } catch(e) {} + try { + // U+FDE7 + eval("$(\"display\").inner\ufde7HTML += \" U+FDE7 is evil\""); + } catch(e) {} + try { + // U+FDE8 + eval("$(\"display\").inner\ufde8HTML += \" U+FDE8 is evil\""); + } catch(e) {} + try { + // U+FDE9 + eval("$(\"display\").inner\ufde9HTML += \" U+FDE9 is evil\""); + } catch(e) {} + try { + // U+FDEA + eval("$(\"display\").inner\ufdeaHTML += \" U+FDEA is evil\""); + } catch(e) {} + try { + // U+FDEB + eval("$(\"display\").inner\ufdebHTML += \" U+FDEB is evil\""); + } catch(e) {} + try { + // U+FDEC + eval("$(\"display\").inner\ufdecHTML += \" U+FDEC is evil\""); + } catch(e) {} + try { + // U+FDED + eval("$(\"display\").inner\ufdedHTML += \" U+FDED is evil\""); + } catch(e) {} + try { + // U+FDEE + eval("$(\"display\").inner\ufdeeHTML += \" U+FDEE is evil\""); + } catch(e) {} + try { + // U+FDEF + eval("$(\"display\").inner\ufdefHTML += \" U+FDEF is evil\""); + } catch(e) {} + try { + // U+FFFE + eval("$(\"display\").inner\ufffeHTML += \" U+FFFE is evil\""); + } catch(e) {} + try { + // U+FFFF + eval("$(\"display\").inner\uffffHTML += \" U+FFFF is evil\""); + } catch(e) {} + try { + // U+1FFFE + eval("$(\"display\").inner\ud83f\udffeHTML += \" U+1FFFE is evil\""); + } catch(e) {} + try { + // U+1FFFF + eval("$(\"display\").inner\ud83f\udfffHTML += \" U+1FFFF is evil\""); + } catch(e) {} + try { + // U+2FFFE + eval("$(\"display\").inner\ud87f\udffeHTML += \" U+2FFFE is evil\""); + } catch(e) {} + try { + // U+2FFFF + eval("$(\"display\").inner\ud87f\udfffHTML += \" U+2FFFF is evil\""); + } catch(e) {} + try { + // U+3FFFE + eval("$(\"display\").inner\ud8bf\udffeHTML += \" U+3FFFE is evil\""); + } catch(e) {} + try { + // U+3FFFF + eval("$(\"display\").inner\ud8bf\udfffHTML += \" U+3FFFF is evil\""); + } catch(e) {} + try { + // U+4FFFE + eval("$(\"display\").inner\ud8ff\udffeHTML += \" U+4FFFE is evil\""); + } catch(e) {} + try { + // U+4FFFF + eval("$(\"display\").inner\ud8ff\udfffHTML += \" U+4FFFF is evil\""); + } catch(e) {} + try { + // U+5FFFE + eval("$(\"display\").inner\ud93f\udffeHTML += \" U+5FFFE is evil\""); + } catch(e) {} + try { + // U+5FFFF + eval("$(\"display\").inner\ud93f\udfffHTML += \" U+5FFFF is evil\""); + } catch(e) {} + try { + // U+6FFFE + eval("$(\"display\").inner\ud97f\udffeHTML += \" U+6FFFE is evil\""); + } catch(e) {} + try { + // U+6FFFF + eval("$(\"display\").inner\ud97f\udfffHTML += \" U+6FFFF is evil\""); + } catch(e) {} + try { + // U+7FFFE + eval("$(\"display\").inner\ud9bf\udffeHTML += \" U+7FFFE is evil\""); + } catch(e) {} + try { + // U+7FFFF + eval("$(\"display\").inner\ud9bf\udfffHTML += \" U+7FFFF is evil\""); + } catch(e) {} + try { + // U+8FFFE + eval("$(\"display\").inner\ud9ff\udffeHTML += \" U+8FFFE is evil\""); + } catch(e) {} + try { + // U+8FFFF + eval("$(\"display\").inner\ud9ff\udfffHTML += \" U+8FFFF is evil\""); + } catch(e) {} + try { + // U+9FFFE + eval("$(\"display\").inner\uda3f\udffeHTML += \" U+9FFFE is evil\""); + } catch(e) {} + try { + // U+9FFFF + eval("$(\"display\").inner\uda3f\udfffHTML += \" U+9FFFF is evil\""); + } catch(e) {} + try { + // U+AFFFE + eval("$(\"display\").inner\uda7f\udffeHTML += \" U+AFFFE is evil\""); + } catch(e) {} + try { + // U+AFFFF + eval("$(\"display\").inner\uda7f\udfffHTML += \" U+AFFFF is evil\""); + } catch(e) {} + try { + // U+BFFFE + eval("$(\"display\").inner\udabf\udffeHTML += \" U+BFFFE is evil\""); + } catch(e) {} + try { + // U+BFFFF + eval("$(\"display\").inner\udabf\udfffHTML += \" U+BFFFF is evil\""); + } catch(e) {} + try { + // U+CFFFE + eval("$(\"display\").inner\udaff\udffeHTML += \" U+CFFFE is evil\""); + } catch(e) {} + try { + // U+CFFFF + eval("$(\"display\").inner\udaff\udfffHTML += \" U+CFFFF is evil\""); + } catch(e) {} + try { + // U+DFFFE + eval("$(\"display\").inner\udb3f\udffeHTML += \" U+DFFFE is evil\""); + } catch(e) {} + try { + // U+DFFFF + eval("$(\"display\").inner\udb3f\udfffHTML += \" U+DFFFF is evil\""); + } catch(e) {} + try { + // U+EFFFE + eval("$(\"display\").inner\udb7f\udffeHTML += \" U+EFFFE is evil\""); + } catch(e) {} + try { + // U+EFFFF + eval("$(\"display\").inner\udb7f\udfffHTML += \" U+EFFFF is evil\""); + } catch(e) {} + try { + // U+FFFFE + eval("$(\"display\").inner\udbbf\udffeHTML += \" U+FFFFE is evil\""); + } catch(e) {} + try { + // U+FFFFF + eval("$(\"display\").inner\udbbf\udfffHTML += \" U+FFFFF is evil\""); + } catch(e) {} + try { + // U+10FFFE + eval("$(\"display\").inner\udbff\udffeHTML += \" U+10FFFE is evil\""); + } catch(e) {} + try { + // U+10FFFF + eval("$(\"display\").inner\udbff\udfffHTML += \" U+10FFFF is evil\""); + } catch(e) {} + test(); +} + + SimpleTest.waitForExplicitFinish(); +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=445886">Mozilla Bug 445886</a> +<p id="display">All good.</p> +<div id="content" style="display: none"></div> +</body> +</html> diff --git a/intl/uconv/tests/test_unicode_noncharacters_gb18030.html b/intl/uconv/tests/test_unicode_noncharacters_gb18030.html new file mode 100644 index 0000000000..0c9156d9e3 --- /dev/null +++ b/intl/uconv/tests/test_unicode_noncharacters_gb18030.html @@ -0,0 +1,305 @@ +<!DOCTYPE HTML> +<html> +<head> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=445886 +--> + <meta http-equiv="Content-type" content="text/html; charset=gb18030"> + <title>Test for Unicode non-characters</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" + href="/tests/SimpleTest/test.css"> +</head> +<body onload="Inject()"> +<pre id="test"><script class="testbody" type="text/javascript"> + +/* eslint-disable no-eval */ + +/** Test that unicode non-characters are not discarded **/ +function test() +{ + is($("display").innerHTML, "All good.", "Noncharacters not stripped"); + SimpleTest.finish(); +} + +// eslint-disable-next-line complexity +function Inject() +{ + // script fragments containing Unicode non-characters + try { + // U+FDD0 + eval("$(\"display\").inner�0�2HTML += \" U+FDD0 is evil \""); + } catch(e) {} + try { + // U+FDD1 + eval("$(\"display\").inner�0�3HTML += \" U+FDD1 is evil \""); + } catch(e) {} + try { + // U+FDD2 + eval("$(\"display\").inner�0�4HTML += \" U+FDD2 is evil \""); + } catch(e) {} + try { + // U+FDD3 + eval("$(\"display\").inner�0�5HTML += \" U+FDD3 is evil \""); + } catch(e) {} + try { + // U+FDD4 + eval("$(\"display\").inner�0�6HTML += \" U+FDD4 is evil \""); + } catch(e) {} + try { + // U+FDD5 + eval("$(\"display\").inner�0�7HTML += \" U+FDD5 is evil \""); + } catch(e) {} + try { + // U+FDD6 + eval("$(\"display\").inner�0�8HTML += \" U+FDD6 is evil \""); + } catch(e) {} + try { + // U+FDD7 + eval("$(\"display\").inner�0�9HTML += \" U+FDD7 is evil \""); + } catch(e) {} + try { + // U+FDD8 + eval("$(\"display\").inner�0�0HTML += \" U+FDD8 is evil \""); + } catch(e) {} + try { + // U+FDD9 + eval("$(\"display\").inner�0�1HTML += \" U+FDD9 is evil \""); + } catch(e) {} + try { + // U+FDDA + eval("$(\"display\").inner�0�2HTML += \" U+FDDA is evil \""); + } catch(e) {} + try { + // U+FDDB + eval("$(\"display\").inner�0�3HTML += \" U+FDDB is evil \""); + } catch(e) {} + try { + // U+FDDC + eval("$(\"display\").inner�0�4HTML += \" U+FDDC is evil \""); + } catch(e) {} + try { + // U+FDDD + eval("$(\"display\").inner�0�5HTML += \" U+FDDD is evil \""); + } catch(e) {} + try { + // U+FDDE + eval("$(\"display\").inner�0�6HTML += \" U+FDDE is evil \""); + } catch(e) {} + try { + // U+FDDF + eval("$(\"display\").inner�0�7HTML += \" U+FDDF is evil \""); + } catch(e) {} + try { + // U+FDE0 + eval("$(\"display\").inner�0�8HTML += \" U+FDE0 is evil \""); + } catch(e) {} + try { + // U+FDE1 + eval("$(\"display\").inner�0�9HTML += \" U+FDE1 is evil \""); + } catch(e) {} + try { + // U+FDE2 + eval("$(\"display\").inner�0�0HTML += \" U+FDE2 is evil \""); + } catch(e) {} + try { + // U+FDE3 + eval("$(\"display\").inner�0�1HTML += \" U+FDE3 is evil \""); + } catch(e) {} + try { + // U+FDE4 + eval("$(\"display\").inner�0�2HTML += \" U+FDE4 is evil \""); + } catch(e) {} + try { + // U+FDE5 + eval("$(\"display\").inner�0�3HTML += \" U+FDE5 is evil \""); + } catch(e) {} + try { + // U+FDE6 + eval("$(\"display\").inner�0�4HTML += \" U+FDE6 is evil \""); + } catch(e) {} + try { + // U+FDE7 + eval("$(\"display\").inner�0�5HTML += \" U+FDE7 is evil \""); + } catch(e) {} + try { + // U+FDE8 + eval("$(\"display\").inner�0�6HTML += \" U+FDE8 is evil \""); + } catch(e) {} + try { + // U+FDE9 + eval("$(\"display\").inner�0�7HTML += \" U+FDE9 is evil \""); + } catch(e) {} + try { + // U+FDEA + eval("$(\"display\").inner�0�8HTML += \" U+FDEA is evil \""); + } catch(e) {} + try { + // U+FDEB + eval("$(\"display\").inner�0�9HTML += \" U+FDEB is evil \""); + } catch(e) {} + try { + // U+FDEC + eval("$(\"display\").inner�0�0HTML += \" U+FDEC is evil \""); + } catch(e) {} + try { + // U+FDED + eval("$(\"display\").inner�0�1HTML += \" U+FDED is evil \""); + } catch(e) {} + try { + // U+FDEE + eval("$(\"display\").inner�0�2HTML += \" U+FDEE is evil \""); + } catch(e) {} + try { + // U+FDEF + eval("$(\"display\").inner�0�3HTML += \" U+FDEF is evil \""); + } catch(e) {} + try { + // U+FFFE + eval("$(\"display\").inner�1�8HTML += \" U+FFFE is evil \""); + } catch(e) {} + try { + // U+FFFF + eval("$(\"display\").inner�1�9HTML += \" U+FFFF is evil \""); + } catch(e) {} + try { + // U+1FFFE + eval("$(\"display\").inner�2�4HTML += \" U+1FFFE is evil \""); + } catch(e) {} + try { + // U+1FFFF + eval("$(\"display\").inner�2�5HTML += \" U+1FFFF is evil \""); + } catch(e) {} + try { + // U+2FFFE + eval("$(\"display\").inner�4�0HTML += \" U+2FFFE is evil \""); + } catch(e) {} + try { + // U+2FFFF + eval("$(\"display\").inner�4�1HTML += \" U+2FFFF is evil \""); + } catch(e) {} + try { + // U+3FFFE + eval("$(\"display\").inner�6�6HTML += \" U+3FFFE is evil \""); + } catch(e) {} + try { + // U+3FFFF + eval("$(\"display\").inner�6�7HTML += \" U+3FFFF is evil \""); + } catch(e) {} + try { + // U+4FFFE + eval("$(\"display\").inner�8�2HTML += \" U+4FFFE is evil \""); + } catch(e) {} + try { + // U+4FFFF + eval("$(\"display\").inner�8�3HTML += \" U+4FFFF is evil \""); + } catch(e) {} + try { + // U+5FFFE + eval("$(\"display\").inner�0�8HTML += \" U+5FFFE is evil \""); + } catch(e) {} + try { + // U+5FFFF + eval("$(\"display\").inner�0�9HTML += \" U+5FFFF is evil \""); + } catch(e) {} + try { + // U+6FFFE + eval("$(\"display\").inner�2�4HTML += \" U+6FFFE is evil \""); + } catch(e) {} + try { + // U+6FFFF + eval("$(\"display\").inner�2�5HTML += \" U+6FFFF is evil \""); + } catch(e) {} + try { + // U+7FFFE + eval("$(\"display\").inner�4�0HTML += \" U+7FFFE is evil \""); + } catch(e) {} + try { + // U+7FFFF + eval("$(\"display\").inner�4�1HTML += \" U+7FFFF is evil \""); + } catch(e) {} + try { + // U+8FFFE + eval("$(\"display\").inner�6�6HTML += \" U+8FFFE is evil \""); + } catch(e) {} + try { + // U+8FFFF + eval("$(\"display\").inner�6�7HTML += \" U+8FFFF is evil \""); + } catch(e) {} + try { + // U+9FFFE + eval("$(\"display\").inner�8�2HTML += \" U+9FFFE is evil \""); + } catch(e) {} + try { + // U+9FFFF + eval("$(\"display\").inner�8�3HTML += \" U+9FFFF is evil \""); + } catch(e) {} + try { + // U+AFFFE + eval("$(\"display\").inner�0�8HTML += \" U+AFFFE is evil \""); + } catch(e) {} + try { + // U+AFFFF + eval("$(\"display\").inner�0�9HTML += \" U+AFFFF is evil \""); + } catch(e) {} + try { + // U+BFFFE + eval("$(\"display\").inner�2�4HTML += \" U+BFFFE is evil \""); + } catch(e) {} + try { + // U+BFFFF + eval("$(\"display\").inner�2�5HTML += \" U+BFFFF is evil \""); + } catch(e) {} + try { + // U+CFFFE + eval("$(\"display\").inner�4�0HTML += \" U+CFFFE is evil \""); + } catch(e) {} + try { + // U+CFFFF + eval("$(\"display\").inner�4�1HTML += \" U+CFFFF is evil \""); + } catch(e) {} + try { + // U+DFFFE + eval("$(\"display\").inner�6�6HTML += \" U+DFFFE is evil \""); + } catch(e) {} + try { + // U+DFFFF + eval("$(\"display\").inner�6�7HTML += \" U+DFFFF is evil \""); + } catch(e) {} + try { + // U+EFFFE + eval("$(\"display\").inner�8�2HTML += \" U+EFFFE is evil \""); + } catch(e) {} + try { + // U+EFFFF + eval("$(\"display\").inner�8�3HTML += \" U+EFFFF is evil \""); + } catch(e) {} + try { + // U+FFFFE + eval("$(\"display\").inner�0�8HTML += \" U+FFFFE is evil \""); + } catch(e) {} + try { + // U+FFFFF + eval("$(\"display\").inner�0�9HTML += \" U+FFFFF is evil \""); + } catch(e) {} + try { + // U+10FFFE + eval("$(\"display\").inner�2�4HTML += \" U+10FFFE is evil \""); + } catch(e) {} + try { + // U+10FFFF + eval("$(\"display\").inner�2�5HTML += \" U+10FFFF is evil \""); + } catch(e) {} + test(); +} + + SimpleTest.waitForExplicitFinish(); +</script> +</pre> +<a target="_blank" + href="https://bugzilla.mozilla.org/show_bug.cgi?id=445886">Mozilla Bug +445886</a> +<p id="display">All good.</p> +<div id="content" style="display: none;"></div> +</body> +</html> diff --git a/intl/uconv/tests/test_unicode_noncharacters_utf8.html b/intl/uconv/tests/test_unicode_noncharacters_utf8.html new file mode 100644 index 0000000000..ecfdbeae09 --- /dev/null +++ b/intl/uconv/tests/test_unicode_noncharacters_utf8.html @@ -0,0 +1,303 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=445886 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for Unicode non-characters</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> +</head> +<body onload="Inject()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> + +/* eslint-disable no-eval */ + +/** Test that unicode non-characters are not discarded **/ +function test() +{ + is($("display").innerHTML, "All good.", "Noncharacters not stripped"); + SimpleTest.finish(); +} + +// eslint-disable-next-line complexity +function Inject() +{ + // script fragments containing Unicode non-characters + try { + // U+FDD0 + eval("$(\"display\").innerï·HTML += \" U+FDD0 is evil \""); + } catch(e) {} + try { + // U+FDD1 + eval("$(\"display\").innerï·‘HTML += \" U+FDD1 is evil \""); + } catch(e) {} + try { + // U+FDD2 + eval("$(\"display\").innerï·’HTML += \" U+FDD2 is evil \""); + } catch(e) {} + try { + // U+FDD3 + eval("$(\"display\").innerï·“HTML += \" U+FDD3 is evil \""); + } catch(e) {} + try { + // U+FDD4 + eval("$(\"display\").innerï·”HTML += \" U+FDD4 is evil \""); + } catch(e) {} + try { + // U+FDD5 + eval("$(\"display\").innerï·•HTML += \" U+FDD5 is evil \""); + } catch(e) {} + try { + // U+FDD6 + eval("$(\"display\").innerï·–HTML += \" U+FDD6 is evil \""); + } catch(e) {} + try { + // U+FDD7 + eval("$(\"display\").innerï·—HTML += \" U+FDD7 is evil \""); + } catch(e) {} + try { + // U+FDD8 + eval("$(\"display\").innerï·˜HTML += \" U+FDD8 is evil \""); + } catch(e) {} + try { + // U+FDD9 + eval("$(\"display\").innerï·™HTML += \" U+FDD9 is evil \""); + } catch(e) {} + try { + // U+FDDA + eval("$(\"display\").innerï·šHTML += \" U+FDDA is evil \""); + } catch(e) {} + try { + // U+FDDB + eval("$(\"display\").innerï·›HTML += \" U+FDDB is evil \""); + } catch(e) {} + try { + // U+FDDC + eval("$(\"display\").innerï·œHTML += \" U+FDDC is evil \""); + } catch(e) {} + try { + // U+FDDD + eval("$(\"display\").innerï·HTML += \" U+FDDD is evil \""); + } catch(e) {} + try { + // U+FDDE + eval("$(\"display\").innerï·žHTML += \" U+FDDE is evil \""); + } catch(e) {} + try { + // U+FDDF + eval("$(\"display\").innerï·ŸHTML += \" U+FDDF is evil \""); + } catch(e) {} + try { + // U+FDE0 + eval("$(\"display\").innerï· HTML += \" U+FDE0 is evil \""); + } catch(e) {} + try { + // U+FDE1 + eval("$(\"display\").innerï·¡HTML += \" U+FDE1 is evil \""); + } catch(e) {} + try { + // U+FDE2 + eval("$(\"display\").innerï·¢HTML += \" U+FDE2 is evil \""); + } catch(e) {} + try { + // U+FDE3 + eval("$(\"display\").innerï·£HTML += \" U+FDE3 is evil \""); + } catch(e) {} + try { + // U+FDE4 + eval("$(\"display\").innerï·¤HTML += \" U+FDE4 is evil \""); + } catch(e) {} + try { + // U+FDE5 + eval("$(\"display\").innerï·¥HTML += \" U+FDE5 is evil \""); + } catch(e) {} + try { + // U+FDE6 + eval("$(\"display\").innerï·¦HTML += \" U+FDE6 is evil \""); + } catch(e) {} + try { + // U+FDE7 + eval("$(\"display\").innerï·§HTML += \" U+FDE7 is evil \""); + } catch(e) {} + try { + // U+FDE8 + eval("$(\"display\").innerï·¨HTML += \" U+FDE8 is evil \""); + } catch(e) {} + try { + // U+FDE9 + eval("$(\"display\").innerï·©HTML += \" U+FDE9 is evil \""); + } catch(e) {} + try { + // U+FDEA + eval("$(\"display\").innerï·ªHTML += \" U+FDEA is evil \""); + } catch(e) {} + try { + // U+FDEB + eval("$(\"display\").innerï·«HTML += \" U+FDEB is evil \""); + } catch(e) {} + try { + // U+FDEC + eval("$(\"display\").innerï·¬HTML += \" U+FDEC is evil \""); + } catch(e) {} + try { + // U+FDED + eval("$(\"display\").innerï·HTML += \" U+FDED is evil \""); + } catch(e) {} + try { + // U+FDEE + eval("$(\"display\").innerï·®HTML += \" U+FDEE is evil \""); + } catch(e) {} + try { + // U+FDEF + eval("$(\"display\").innerï·¯HTML += \" U+FDEF is evil \""); + } catch(e) {} + try { + // U+FFFE + eval("$(\"display\").inner￾HTML += \" U+FFFE is evil \""); + } catch(e) {} + try { + // U+FFFF + eval("$(\"display\").innerï¿¿HTML += \" U+FFFF is evil \""); + } catch(e) {} + try { + // U+1FFFE + eval("$(\"display\").inner🿾HTML += \" U+1FFFE is evil \""); + } catch(e) {} + try { + // U+1FFFF + eval("$(\"display\").inner🿿HTML += \" U+1FFFF is evil \""); + } catch(e) {} + try { + // U+2FFFE + eval("$(\"display\").inner𯿾HTML += \" U+2FFFE is evil \""); + } catch(e) {} + try { + // U+2FFFF + eval("$(\"display\").inner𯿿HTML += \" U+2FFFF is evil \""); + } catch(e) {} + try { + // U+3FFFE + eval("$(\"display\").innerð¿¿¾HTML += \" U+3FFFE is evil \""); + } catch(e) {} + try { + // U+3FFFF + eval("$(\"display\").innerð¿¿¿HTML += \" U+3FFFF is evil \""); + } catch(e) {} + try { + // U+4FFFE + eval("$(\"display\").innerñ¿¾HTML += \" U+4FFFE is evil \""); + } catch(e) {} + try { + // U+4FFFF + eval("$(\"display\").innerñ¿¿HTML += \" U+4FFFF is evil \""); + } catch(e) {} + try { + // U+5FFFE + eval("$(\"display\").innerñŸ¿¾HTML += \" U+5FFFE is evil \""); + } catch(e) {} + try { + // U+5FFFF + eval("$(\"display\").innerñŸ¿¿HTML += \" U+5FFFF is evil \""); + } catch(e) {} + try { + // U+6FFFE + eval("$(\"display\").innerñ¯¿¾HTML += \" U+6FFFE is evil \""); + } catch(e) {} + try { + // U+6FFFF + eval("$(\"display\").innerñ¯¿¿HTML += \" U+6FFFF is evil \""); + } catch(e) {} + try { + // U+7FFFE + eval("$(\"display\").innerñ¿¿¾HTML += \" U+7FFFE is evil \""); + } catch(e) {} + try { + // U+7FFFF + eval("$(\"display\").innerñ¿¿¿HTML += \" U+7FFFF is evil \""); + } catch(e) {} + try { + // U+8FFFE + eval("$(\"display\").innerò¿¾HTML += \" U+8FFFE is evil \""); + } catch(e) {} + try { + // U+8FFFF + eval("$(\"display\").innerò¿¿HTML += \" U+8FFFF is evil \""); + } catch(e) {} + try { + // U+9FFFE + eval("$(\"display\").inneròŸ¿¾HTML += \" U+9FFFE is evil \""); + } catch(e) {} + try { + // U+9FFFF + eval("$(\"display\").inneròŸ¿¿HTML += \" U+9FFFF is evil \""); + } catch(e) {} + try { + // U+AFFFE + eval("$(\"display\").innerò¯¿¾HTML += \" U+AFFFE is evil \""); + } catch(e) {} + try { + // U+AFFFF + eval("$(\"display\").innerò¯¿¿HTML += \" U+AFFFF is evil \""); + } catch(e) {} + try { + // U+BFFFE + eval("$(\"display\").innerò¿¿¾HTML += \" U+BFFFE is evil \""); + } catch(e) {} + try { + // U+BFFFF + eval("$(\"display\").innerò¿¿¿HTML += \" U+BFFFF is evil \""); + } catch(e) {} + try { + // U+CFFFE + eval("$(\"display\").inneró¿¾HTML += \" U+CFFFE is evil \""); + } catch(e) {} + try { + // U+CFFFF + eval("$(\"display\").inneró¿¿HTML += \" U+CFFFF is evil \""); + } catch(e) {} + try { + // U+DFFFE + eval("$(\"display\").inner󟿾HTML += \" U+DFFFE is evil \""); + } catch(e) {} + try { + // U+DFFFF + eval("$(\"display\").inneróŸ¿¿HTML += \" U+DFFFF is evil \""); + } catch(e) {} + try { + // U+EFFFE + eval("$(\"display\").inner󯿾HTML += \" U+EFFFE is evil \""); + } catch(e) {} + try { + // U+EFFFF + eval("$(\"display\").inner󯿿HTML += \" U+EFFFF is evil \""); + } catch(e) {} + try { + // U+FFFFE + eval("$(\"display\").inneró¿¿¾HTML += \" U+FFFFE is evil \""); + } catch(e) {} + try { + // U+FFFFF + eval("$(\"display\").inneró¿¿¿HTML += \" U+FFFFF is evil \""); + } catch(e) {} + try { + // U+10FFFE + eval("$(\"display\").innerô¿¾HTML += \" U+10FFFE is evil \""); + } catch(e) {} + try { + // U+10FFFF + eval("$(\"display\").innerô¿¿HTML += \" U+10FFFF is evil \""); + } catch(e) {} + test(); +} + + SimpleTest.waitForExplicitFinish(); +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=445886">Mozilla Bug 445886</a> +<p id="display">All good.</p> +<div id="content" style="display: none"></div> +</body> +</html> diff --git a/intl/uconv/tests/test_utf8_overconsumption.html b/intl/uconv/tests/test_utf8_overconsumption.html new file mode 100644 index 0000000000..25c4a273ea --- /dev/null +++ b/intl/uconv/tests/test_utf8_overconsumption.html @@ -0,0 +1,39 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=445886 +--> +<head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> + <title>Test for Unicode non-characters</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" /> + <script type="text/javascript"> +function Inject() +{ + $("display").innerHTML = "Evil"; +} + </script> +</head> +<body Â>onload="Inject()"> +<pre id="test"> +<script class="testbody" type="text/javascript"> + +/** test that UTF-8 decoding resynchronizes after incomplete sequences */ +function test() +{ + is($("display").innerHTML, "All good.", "No overconsumption"); + SimpleTest.finish(); +} + + addLoadEvent(function() { + setTimeout(test, 0); + }); + SimpleTest.waitForExplicitFinish(); +</script> +</pre> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=445886">Mozilla Bug 445886</a> +<p id="display">All good.</p> +<div id="content" style="display: none"></div> +</body> +</html> diff --git a/intl/uconv/tests/unit/data/unicode-conversion.utf16.txt b/intl/uconv/tests/unit/data/unicode-conversion.utf16.txt Binary files differnew file mode 100644 index 0000000000..8a28caadfc --- /dev/null +++ b/intl/uconv/tests/unit/data/unicode-conversion.utf16.txt diff --git a/intl/uconv/tests/unit/data/unicode-conversion.utf16be.txt b/intl/uconv/tests/unit/data/unicode-conversion.utf16be.txt Binary files differnew file mode 100644 index 0000000000..e1c4e86dba --- /dev/null +++ b/intl/uconv/tests/unit/data/unicode-conversion.utf16be.txt diff --git a/intl/uconv/tests/unit/data/unicode-conversion.utf16le.txt b/intl/uconv/tests/unit/data/unicode-conversion.utf16le.txt Binary files differnew file mode 100644 index 0000000000..cde8acb70b --- /dev/null +++ b/intl/uconv/tests/unit/data/unicode-conversion.utf16le.txt diff --git a/intl/uconv/tests/unit/data/unicode-conversion.utf8.txt b/intl/uconv/tests/unit/data/unicode-conversion.utf8.txt new file mode 100644 index 0000000000..b45dff35d0 --- /dev/null +++ b/intl/uconv/tests/unit/data/unicode-conversion.utf8.txt @@ -0,0 +1,43 @@ +This is a Unicode converter test file containing Unicode data. Its encoding is +determined by the second-to-last dot-separated component of the filename. For +example, if this file is named foo.utf8.txt, its encoding is UTF-8; if this file +is named foo.utf16le.txt, its encoding is UTF-16LE. This file is marked as +binary in Mozilla's version control system so that it's not accidentally +"mangled". + +The contents of each file must differ ONLY by encoding, so if you edit this file +you must edit all files with the name of this file (with the encoding-specific +part changed). + +== BEGIN UNICODE TEST DATA == + +== U+000000 -- U+00007F == + +BELL: "" +DATA LINK ESCAPE: "" +DELETE: "" + +== U+000080 -- U+0007FF == + +CONTROL: "€" +NO-BREAK SPACE: " " +POUND SIGN: "£" +YEN SIGN: "Â¥" +CURRENCY SIGN: "¢" +LATIN SMALL LETTER SCHWA: "É™" +LATIN LETTER BILABIAL PERCUSSIVE: "ʬ" + +== U+000800 -- U+00FFFF == + +BUGINESE LETTER TA: "ᨈ" +BUGINESE LETTER DA: "ᨉ" +AIRPLANE: "✈" +ZERO WIDTH NO-BREAK SPACE: "" + + +== U+010000 -- U+10FFFF == + +SHAVIAN LETTER IAN: "ð‘¾" +MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE: "ð…¤" +CJK UNIFIED IDEOGRAPH-20000: "ð €€" +(private use U+10FEFF): "ô»¿" diff --git a/intl/uconv/tests/unit/head_charsetConversionTests.js b/intl/uconv/tests/unit/head_charsetConversionTests.js new file mode 100644 index 0000000000..4fbf6e9af7 --- /dev/null +++ b/intl/uconv/tests/unit/head_charsetConversionTests.js @@ -0,0 +1,110 @@ +var CC = Components.Constructor; + +function CreateScriptableConverter() { + var ScriptableUnicodeConverter = CC( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + return new ScriptableUnicodeConverter(); +} + +function checkDecode(converter, charset, inText, expectedText) { + try { + converter.charset = charset; + } catch (e) { + converter.charset = "iso-8859-1"; + } + + dump("testing decoding from " + charset + " to Unicode.\n"); + try { + var outText = converter.ConvertToUnicode(inText); + } catch (e) { + outText = "\ufffd"; + } + + if (outText != expectedText) { + for (var i = 0; i < inText.length; ++i) { + var inn = inText[i]; + var out = outText[i]; + var expected = expectedText[i]; + if (out != expected) { + dump( + "Decoding error at position " + + i + + ": for input " + + escape(inn) + + " expected " + + escape(expected) + + " but got " + + escape(out) + + "\n" + ); + } + } + } + Assert.equal(outText, expectedText); +} + +function checkEncode(converter, charset, inText, expectedText) { + try { + converter.charset = charset; + } catch (e) { + converter.charset = "iso-8859-1"; + } + + dump("testing encoding from Unicode to " + charset + "\n"); + var outText = converter.ConvertFromUnicode(inText) + converter.Finish(); + + if (outText != expectedText) { + for (var i = 0; i < inText.length; ++i) { + var inn = inText[i]; + var out = outText[i]; + var expected = expectedText[i]; + if (out != expected) { + dump( + "Encoding error at position " + + i + + ": for input " + + escape(inn) + + " expected " + + escape(expected) + + " but got " + + escape(out) + + "\n" + ); + } + } + } + Assert.equal(outText, expectedText); +} + +function testDecodeAliases(aliases, inString, expectedString) { + var converter = CreateScriptableConverter(); + for (var i = 0; i < aliases.length; ++i) { + checkDecode(converter, aliases[i], inString, expectedString); + } +} + +function testEncodeAliases(aliases, inString, expectedString) { + var converter = CreateScriptableConverter(); + for (var i = 0; i < aliases.length; ++i) { + checkEncode(converter, aliases[i], inString, expectedString); + } +} + +function testDecodeAliasesInternal(aliases, inString, expectedString) { + var converter = CreateScriptableConverter(); + converter.isInternal = true; + for (var i = 0; i < aliases.length; ++i) { + checkDecode(converter, aliases[i], inString, expectedString); + } +} + +function testEncodeAliasesInternal(aliases, inString, expectedString) { + var converter = CreateScriptableConverter(); + converter.isInternal = true; + for (var i = 0; i < aliases.length; ++i) { + checkEncode(converter, aliases[i], inString, expectedString); + } +} diff --git a/intl/uconv/tests/unit/test_bug116882.js b/intl/uconv/tests/unit/test_bug116882.js new file mode 100644 index 0000000000..5e76b30aa4 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug116882.js @@ -0,0 +1,11 @@ +/* Tests conversion of undefined and illegal sequences from Shift-JIS + * to Unicode (bug 116882) + */ + +const inText = "\xfd\xfe\xff\x81\x20\x81\x3f\x86\x3c"; +const expectedText = "\ufffd\ufffd\ufffd\ufffd \ufffd?\ufffd<"; +const charset = "Shift_JIS"; + +function run_test() { + checkDecode(CreateScriptableConverter(), charset, inText, expectedText); +} diff --git a/intl/uconv/tests/unit/test_bug317216.js b/intl/uconv/tests/unit/test_bug317216.js new file mode 100644 index 0000000000..cc10ef313e --- /dev/null +++ b/intl/uconv/tests/unit/test_bug317216.js @@ -0,0 +1,165 @@ +/* Test case for bug 317216 + * + * Uses nsIConverterInputStream to decode UTF-16 text with valid surrogate + * pairs and lone surrogate characters + * + * Sample text is: "A" in Mathematical Bold Capitals (U+1D400) + * + * The test uses buffers of 4 different lengths to test end of buffer in mid- + * UTF16 character and mid-surrogate pair + */ + +const { NetUtil } = ChromeUtils.importESModule( + "resource://gre/modules/NetUtil.sys.mjs" +); + +const test = [ + // 0: Valid surrogate pair + [ + "%D8%35%DC%20%00%2D%00%2D", + // expected: surrogate pair + "\uD835\uDC20--", + ], + // 1: Lone high surrogate + [ + "%D8%35%00%2D%00%2D", + // expected: one replacement char + "\uFFFD--", + ], + // 2: Lone low surrogate + [ + "%DC%20%00%2D%00%2D", + // expected: one replacement char + "\uFFFD--", + ], + // 3: Two high surrogates + [ + "%D8%35%D8%35%00%2D%00%2D", + // expected: two replacement chars + "\uFFFD\uFFFD--", + ], + // 4: Two low surrogates + [ + "%DC%20%DC%20%00%2D%00%2D", + // expected: two replacement chars + "\uFFFD\uFFFD--", + ], + // 5: Low surrogate followed by high surrogate + [ + "%DC%20%D8%35%00%2D%00%2D", + // expected: two replacement chars + "\uFFFD\uFFFD--", + ], + // 6: Lone high surrogate followed by valid surrogate pair + [ + "%D8%35%D8%35%DC%20%00%2D%00%2D", + // expected: replacement char followed by surrogate pair + "\uFFFD\uD835\uDC20--", + ], + // 7: Lone low surrogate followed by valid surrogate pair + [ + "%DC%20%D8%35%DC%20%00%2D%00%2D", + // expected: replacement char followed by surrogate pair + "\uFFFD\uD835\uDC20--", + ], + // 8: Valid surrogate pair followed by lone high surrogate + [ + "%D8%35%DC%20%D8%35%00%2D%00%2D", + // expected: surrogate pair followed by replacement char + "\uD835\uDC20\uFFFD--", + ], + // 9: Valid surrogate pair followed by lone low surrogate + [ + "%D8%35%DC%20%DC%20%00%2D%00%2D", + // expected: surrogate pair followed by replacement char + "\uD835\uDC20\uFFFD--", + ], + // 10: Lone high surrogate at the end of the input + [ + "%D8%35%", + // expected: one replacement char + "\uFFFD", + ], + // 11: Half code unit at the end of the input + [ + "%D8", + // expected: one replacement char + "\uFFFD", + ], +]; + +const ConverterInputStream = Components.Constructor( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" +); + +function testCase(testText, expectedText, bufferLength, charset) { + var dataURI = "data:text/plain;charset=" + charset + "," + testText; + var channel = NetUtil.newChannel({ + uri: dataURI, + loadUsingSystemPrincipal: true, + }); + var testInputStream = channel.open(); + var testConverter = new ConverterInputStream( + testInputStream, + charset, + bufferLength, + 0xfffd + ); + + if (!(testConverter instanceof Ci.nsIUnicharLineInputStream)) { + throw new Error("not line input stream"); + } + + var outStr = ""; + var more; + do { + // read the line and check for eof + var line = {}; + more = testConverter.readLine(line); + outStr += line.value; + } while (more); + + // escape the strings before comparing for better readability + Assert.equal(escape(outStr), escape(expectedText)); +} + +// Add 32 dummy characters to the test text to work around the minimum buffer +// size of an ns*Buffer +const MINIMUM_BUFFER_SIZE = 32; +function padBytes(str) { + var padding = ""; + for (var i = 0; i < MINIMUM_BUFFER_SIZE; ++i) { + padding += "%00%2D"; + } + return padding + str; +} + +function padUnichars(str) { + var padding = ""; + for (var i = 0; i < MINIMUM_BUFFER_SIZE; ++i) { + padding += "-"; + } + return padding + str; +} + +// Byte-swap %-encoded utf-16 +function flip(str) { + return str.replace(/(%..)(%..)/g, "$2$1"); +} + +function run_test() { + for (var i = 0; i < 12; ++i) { + for ( + var bufferLength = MINIMUM_BUFFER_SIZE; + bufferLength < MINIMUM_BUFFER_SIZE + 4; + ++bufferLength + ) { + var testText = padBytes(test[i][0]); + var expectedText = padUnichars(test[i][1]); + testCase(testText, expectedText, bufferLength, "UTF-16BE"); + testCase(flip(testText), expectedText, bufferLength, "UTF-16LE"); + } + } +} diff --git a/intl/uconv/tests/unit/test_bug321379.js b/intl/uconv/tests/unit/test_bug321379.js new file mode 100644 index 0000000000..338f59688e --- /dev/null +++ b/intl/uconv/tests/unit/test_bug321379.js @@ -0,0 +1,35 @@ +// Tests that calling close on a converter in/output stream doesn't crash +// (bug 321379) + +function run_test() { + var StorageStream = Components.Constructor( + "@mozilla.org/storagestream;1", + "nsIStorageStream", + "init" + ); + var ConverterInputStream = Components.Constructor( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" + ); + var ConverterOutputStream = Components.Constructor( + "@mozilla.org/intl/converter-output-stream;1", + "nsIConverterOutputStream", + "init" + ); + + var storage = new StorageStream(1024, -1, null); + + // Output + var outStr = storage.getOutputStream(0); + var out = new ConverterOutputStream(outStr, "UTF-8"); + out.writeString("Foo."); + out.close(); + out.close(); // This line should not crash. It should just do nothing. + + // Input + var inStr = storage.newInputStream(0); + var inp = new ConverterInputStream(inStr, "UTF-8", 1024, 0xfffd); + inp.close(); + inp.close(); // This line should not crash. It should just do nothing. +} diff --git a/intl/uconv/tests/unit/test_bug340714.js b/intl/uconv/tests/unit/test_bug340714.js new file mode 100644 index 0000000000..fdd30543f2 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug340714.js @@ -0,0 +1,123 @@ +/* Test case for bug 340714 + * + * Uses nsIConverterInputStream to decode UTF-16 text with all combinations + * of UTF-16BE and UTF-16LE with and without BOM. + * + * Sample text is: "Ð’Ñе ÑчаÑтливые Ñемьи похожи друг на друга, ÐºÐ°Ð¶Ð´Ð°Ñ Ð½ÐµÑчаÑÑ‚Ð»Ð¸Ð²Ð°Ñ ÑÐµÐ¼ÑŒÑ Ð½ÐµÑчаÑтлива по-Ñвоему." + * + * The enclosing quotation marks are included in the sample text to test that + * UTF-16LE is recognized even when there is no BOM and the UTF-16LE decoder is + * not explicitly called. This only works when the first character of the text + * is an eight-bit character. + */ + +const { NetUtil } = ChromeUtils.importESModule( + "resource://gre/modules/NetUtil.sys.mjs" +); + +const beBOM = "%FE%FF"; +const leBOM = "%FF%FE"; +const sampleUTF16BE = + "%00%22%04%12%04%41%04%35%00%20%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%4B%04%35%00%20%04%41%04%35%04%3C%04%4C%04%38%00%20%04%3F%04%3E%04%45%04%3E%04%36%04%38%00%20%04%34%04%40%04%43%04%33%00%20%04%3D%04%30%00%20%04%34%04%40%04%43%04%33%04%30%00%2C%00%20%04%3A%04%30%04%36%04%34%04%30%04%4F%00%20%04%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%4F%00%20%04%41%04%35%04%3C%04%4C%04%4F%00%20%04%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%00%20%04%3F%04%3E%00%2D%04%41%04%32%04%3E%04%35%04%3C%04%43%00%2E%00%22"; +const sampleUTF16LE = + "%22%00%12%04%41%04%35%04%20%00%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%4B%04%35%04%20%00%41%04%35%04%3C%04%4C%04%38%04%20%00%3F%04%3E%04%45%04%3E%04%36%04%38%04%20%00%34%04%40%04%43%04%33%04%20%00%3D%04%30%04%20%00%34%04%40%04%43%04%33%04%30%04%2C%00%20%00%3A%04%30%04%36%04%34%04%30%04%4F%04%20%00%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%4F%04%20%00%41%04%35%04%3C%04%4C%04%4F%04%20%00%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%20%00%3F%04%3E%04%2D%00%41%04%32%04%3E%04%35%04%3C%04%43%04%2E%00%22%00"; +const expected = + '"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443."'; + +Services.prefs.setBoolPref("security.allow_eval_with_system_principal", true); +registerCleanupFunction(() => { + Services.prefs.clearUserPref("security.allow_eval_with_system_principal"); +}); + +function makeText(withBOM, charset) { + const isBE = charset === "UTF16BE"; + const sampleText = isBE ? sampleUTF16BE : sampleUTF16LE; + const bom = isBE ? beBOM : leBOM; + return withBOM ? bom + sampleText : sampleText; +} + +function testCase(withBOM, charset, charsetDec, decoder, bufferLength) { + var dataURI = + "data:text/plain;charset=" + charsetDec + "," + makeText(withBOM, charset); + + var ConverterInputStream = Components.Constructor( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" + ); + + var channel = NetUtil.newChannel({ + uri: dataURI, + loadUsingSystemPrincipal: true, + }); + var testInputStream = channel.open(); + var testConverter = new ConverterInputStream( + testInputStream, + decoder, + bufferLength, + 0xfffd + ); + + if (!(testConverter instanceof Ci.nsIUnicharLineInputStream)) { + throw new Error("not line input stream"); + } + + var outStr = ""; + var more; + do { + // read the line and check for eof + var line = {}; + more = testConverter.readLine(line); + outStr += line.value; + } while (more); + + if (outStr != expected) { + dump( + "Failed with BOM = " + + withBOM + + "; charset = " + + charset + + "; charset declaration = " + + charsetDec + + "; decoder = " + + decoder + + "; bufferLength = " + + bufferLength + + "\n" + ); + if (outStr.length == expected.length) { + for (let i = 0; i < outStr.length; ++i) { + if (outStr.charCodeAt(i) != expected.charCodeAt(i)) { + dump( + i + + ": " + + outStr.charCodeAt(i).toString(16) + + " != " + + expected.charCodeAt(i).toString(16) + + "\n" + ); + } + } + } + } + + // escape the strings before comparing for better readability + Assert.equal(escape(outStr), escape(expected)); +} + +function run_test() { + /* BOM charset charset decoder buffer + declaration length */ + testCase(true, "UTF16LE", "UTF-16", "UTF-16BE", 64); + testCase(true, "UTF16BE", "UTF-16", "UTF-16LE", 64); + testCase(true, "UTF16LE", "UTF-16", "UTF-16LE", 64); + testCase(true, "UTF16BE", "UTF-16", "UTF-16BE", 64); + testCase(false, "UTF16LE", "UTF-16", "UTF-16LE", 64); + testCase(false, "UTF16BE", "UTF-16", "UTF-16BE", 64); + testCase(true, "UTF16LE", "UTF-16", "UTF-16BE", 65); + testCase(true, "UTF16BE", "UTF-16", "UTF-16LE", 65); + testCase(true, "UTF16LE", "UTF-16", "UTF-16LE", 65); + testCase(true, "UTF16BE", "UTF-16", "UTF-16BE", 65); + testCase(false, "UTF16LE", "UTF-16", "UTF-16LE", 65); + testCase(false, "UTF16BE", "UTF-16", "UTF-16BE", 65); +} diff --git a/intl/uconv/tests/unit/test_bug381412.Big5-HKSCS.js b/intl/uconv/tests/unit/test_bug381412.Big5-HKSCS.js new file mode 100644 index 0000000000..4108dc1090 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.Big5-HKSCS.js @@ -0,0 +1,64 @@ +const charset = "Big5-HKSCS"; + +function dumpStrings(inString, outString) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = "<empty>"; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); +} + +function error(inString, outString, msg) { + dumpStrings(inString, outString); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.Big5.js b/intl/uconv/tests/unit/test_bug381412.Big5.js new file mode 100644 index 0000000000..45c8bafc83 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.Big5.js @@ -0,0 +1,64 @@ +const charset = "Big5"; + +function dumpStrings(inString, outString) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = "<empty>"; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); +} + +function error(inString, outString, msg) { + dumpStrings(inString, outString); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.euc-kr.js b/intl/uconv/tests/unit/test_bug381412.euc-kr.js new file mode 100644 index 0000000000..58d36c76f4 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.euc-kr.js @@ -0,0 +1,64 @@ +const charset = "EUC-KR"; + +function dumpStrings(inString, outString) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = "<empty>"; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); +} + +function error(inString, outString, msg) { + dumpStrings(inString, outString); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.euc_jp.js b/intl/uconv/tests/unit/test_bug381412.euc_jp.js new file mode 100644 index 0000000000..7e07eb9e69 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.euc_jp.js @@ -0,0 +1,92 @@ +const charset = "EUC-JP"; +const ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" +); +var gConverter; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = "<empty>"; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function test(inString) { + var outString = gConverter.ConvertToUnicode(inString) + gConverter.Finish(); + + switch (outString.length) { + case 0: + case 1: + case 2: + error(inString, outString, "Unexpected error"); + break; + case 3: + error(inString, outString, "3 byte sequence eaten"); + break; + case 4: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 && + outString.charCodeAt(2) < 0x80 && + outString.charCodeAt(3) < 0x80 + ) { + error(inString, outString, "3 byte sequence converted to 1 ASCII"); + } + break; + case 5: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 && + outString.charCodeAt(2) < 0x80 && + outString.charCodeAt(3) < 0x80 && + outString.charCodeAt(4) < 0x80 + ) { + error(inString, outString, "3 byte sequence converted to 2 ASCII"); + } + break; + case 6: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 && + outString.charCodeAt(2) < 0x80 && + outString.charCodeAt(3) < 0x80 && + outString.charCodeAt(4) < 0x80 && + outString.charCodeAt(5) < 0x80 + ) { + error(inString, outString, "3 byte sequence converted to 3 ASCII"); + } + break; + } +} + +function run_test() { + gConverter = new ScriptableUnicodeConverter(); + gConverter.charset = charset; + + var byte1, byte2, byte3; + for (byte1 = 1; byte1 < 0x100; ++byte1) { + for (byte2 = 1; byte2 < 0x100; ++byte2) { + if (byte1 == 0x8f) { + for (byte3 = 1; byte3 < 0x100; ++byte3) { + test(String.fromCharCode(byte1, byte2, byte3) + "foo"); + } + } else { + test(String.fromCharCode(byte1, byte2) + " foo"); + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.gb2312.js b/intl/uconv/tests/unit/test_bug381412.gb2312.js new file mode 100644 index 0000000000..df680dadae --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.gb2312.js @@ -0,0 +1,60 @@ +const charset = "GB2312"; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = "<empty>"; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.js b/intl/uconv/tests/unit/test_bug381412.js new file mode 100644 index 0000000000..89849bf5e6 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.js @@ -0,0 +1,60 @@ +const charset = "Shift_JIS"; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = "<empty>"; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug396637.js b/intl/uconv/tests/unit/test_bug396637.js new file mode 100644 index 0000000000..6aac53e5d1 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug396637.js @@ -0,0 +1,9 @@ +// Tests conversion of a single byte from UTF-16 to Unicode + +const inString = "A"; +const expectedString = ""; +const charset = "UTF-16BE"; + +function run_test() { + checkDecode(CreateScriptableConverter(), charset, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_bug399257.js b/intl/uconv/tests/unit/test_bug399257.js new file mode 100644 index 0000000000..9acd3e9b38 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug399257.js @@ -0,0 +1,80 @@ +// Tests encoding of characters below U+0020 +const inString = "Hello\u000aWorld"; +const expectedString = "Hello\nWorld"; + +function run_test() { + var failures = false; + var encodingConverter = CreateScriptableConverter(); + + var encoders = [ + "Big5", + "Big5-HKSCS", + "EUC-JP", + "EUC-KR", + "gb18030", + "gbk", + "IBM866", + "ISO-2022-JP", + "ISO-8859-1", + "ISO-8859-2", + "ISO-8859-3", + "ISO-8859-4", + "ISO-8859-5", + "ISO-8859-6", + "ISO-8859-7", + "ISO-8859-8", + "ISO-8859-8-I", + "ISO-8859-10", + "ISO-8859-13", + "ISO-8859-14", + "ISO-8859-15", + "ISO-8859-16", + "KOI8-R", + "KOI8-U", + "Shift_JIS", + "windows-1250", + "windows-1251", + "windows-1252", + "windows-1253", + "windows-1254", + "windows-1255", + "windows-1256", + "windows-1257", + "windows-1258", + "windows-874", + "macintosh", + "x-mac-cyrillic", + "x-user-defined", + "UTF-8", + ]; + + var counter = 0; + while (counter < encoders.length) { + var charset = encoders[counter++]; + dump("testing " + counter + " " + charset + "\n"); + + encodingConverter.charset = charset; + var codepageString = + encodingConverter.ConvertFromUnicode(inString) + + encodingConverter.Finish(); + if (codepageString != expectedString) { + dump(charset + " encoding failed\n"); + for (var i = 0; i < expectedString.length; ++i) { + if (codepageString.charAt(i) != expectedString.charAt(i)) { + dump( + i.toString(16) + + ": 0x" + + codepageString.charCodeAt(i).toString(16) + + " != " + + expectedString.charCodeAt(i).toString(16) + + "\n" + ); + } + } + failures = true; + } + } + if (failures) { + do_throw("test failed\n"); + } +} diff --git a/intl/uconv/tests/unit/test_bug457886.js b/intl/uconv/tests/unit/test_bug457886.js new file mode 100644 index 0000000000..21c3036901 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug457886.js @@ -0,0 +1,12 @@ +// Tests conversion from Unicode to ISO-2022-JP + +const inString = + "\u3042\u3044\u3046\u3048\u304A\u000D\u000A\u304B\u304D\u304F\u3051\u3053"; + +const expectedString = '\x1B$B$"$$$&$($*\x1B(B\x0D\x0A\x1B$B$+$-$/$1$3\x1B(B'; + +const charset = "ISO-2022-JP"; + +function run_test() { + checkEncode(CreateScriptableConverter(), charset, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_bug522931.js b/intl/uconv/tests/unit/test_bug522931.js new file mode 100644 index 0000000000..2dae8d72e9 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug522931.js @@ -0,0 +1,4 @@ +// crash test with invaild parameter (bug 522931) +function run_test() { + Assert.equal(Services.textToSubURI.UnEscapeAndConvert("UTF-8", null), ""); +} diff --git a/intl/uconv/tests/unit/test_bug563283.js b/intl/uconv/tests/unit/test_bug563283.js new file mode 100644 index 0000000000..49c13dcfcb --- /dev/null +++ b/intl/uconv/tests/unit/test_bug563283.js @@ -0,0 +1,53 @@ +// Tests conversion from Unicode to ISO-2022-JP with Hankaku characters + +const inStrings = [ + // 。「」、・ヲァィゥェォャï½ï½®ï½¯ï½°ï½±ï½²ï½³ï½´ï½µï½¶ï½·ï½¸ï½¹ï½ºï½»ï½¼ï½½ï½¾ï½¿ï¾€ï¾ï¾‚テトナニヌネノハヒフï¾ï¾Žï¾ï¾ï¾‘メモヤユヨラリルレロワï¾ï¾žï¾Ÿ + "\uFF61\uFF62\uFF63\uFF64\uFF65\uFF66\uFF67\uFF68\uFF69\uFF6A\uFF6B\uFF6C\uFF6D\uFF6E\uFF6F\uFF70\uFF71\uFF72\uFF73\uFF74\uFF75\uFF76\uFF77\uFF78\uFF79\uFF7A\uFF7B\uFF7C\uFF7D\uFF7E\uFF7F\uFF80\uFF81\uFF82\uFF83\uFF84\uFF85\uFF86\uFF87\uFF88\uFF89\uFF8A\uFF8B\uFF8C\uFF8D\uFF8E\uFF8F\uFF90\uFF91\uFF92\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98\uFF99\uFF9A\uFF9B\uFF9C\uFF9D\uFF9E\uFF9F", + // equivalent to + // 。「ã€ã€ãƒ»ãƒ²ã‚¡ã‚£ã‚¥ã‚§ã‚©ãƒ£ãƒ¥ãƒ§ãƒƒãƒ¼ã‚¢ã‚¤ã‚¦ã‚¨ã‚ªã‚«ã‚クケコサシスセソタãƒãƒ„テトナニヌãƒãƒŽãƒãƒ’フヘホマミムメモヤユヨラリルレãƒãƒ¯ãƒ³ã‚›ã‚œ + // \u3002\u300c\u300d\u3001\u30fb\u30f2\u30a1\u30a3\u30a5\u30a7\u30a9\u30e3\u30e5\u30e7\u30c3\u30fc\u30a2\u30a4\u30a6\u30a8\u30aa\u30ab\u30ad\u30af\u30b1\u30b3\u30b5\u30b7\u30b9\u30bb\u30bd\u30bf\u30c1\u30c4\u30c6\u30c8\u30ca\u30cb\u30cc\u30cd\u30ce\u30cf\u30d2\u30d5\u30d8\u30db\u30de\u30df\u30e0\u30e1\u30e2\u30e4\u30e6\u30e8\u30e9\u30ea\u30eb\u30ec\u30ed\u30ef\u30f3\u309b\u309c" + + // ガギグゲゴザジズゼゾダï¾ï¾žï¾‚゙デドバビブï¾ï¾žï¾Žï¾ž + "\uFF76\uFF9E\uFF77\uFF9E\uFF78\uFF9E\uFF79\uFF9E\uFF7A\uFF9E\uFF7B\uFF9E\uFF7C\uFF9E\uFF7D\uFF9E\uFF7E\uFF9E\uFF7F\uFF9E\uFF80\uFF9E\uFF81\uFF9E\uFF82\uFF9E\uFF83\uFF9E\uFF84\uFF9E\uFF8A\uFF9E\uFF8B\uFF9E\uFF8C\uFF9E\uFF8D\uFF9E\uFF8E\uFF9E", + // equivalent to + // ã‚«ã‚›ã‚゛ク゛ケ゛コ゛サ゛シ゛ス゛セ゛ソ゛タ゛ãƒã‚›ãƒ„゛テ゛ト゛ãƒã‚›ãƒ’゛フ゛ヘ゛ホ゛ + // \u30AB\u309B\u30AD\u309B\u30AF\u309B\u30B1\u309B\u30B3\u309B\u30B5\u309B\u30B7\u309B\u30B9\u309B\u30BB\u309B\u30BD\u309B\u30BF\u309B\u30C1\u309B\u30C4\u309B\u30C6\u309B\u30C8\u309B\u30CF\u309B\u30D2\u309B\u30D5\u309B\u30D8\u309B\u30DB\u309B + + // パピプï¾ï¾Ÿï¾Žï¾Ÿ + "\uFF8A\uFF9F\uFF8B\uFF9F\uFF8C\uFF9F\uFF8D\uFF9F\uFF8E\uFF9F", + // equivalent to + // ãƒã‚œãƒ’゜フ゜ヘ゜ホ゜ + // \u30CF\u309C\u30D2\u309C\u30D5\u309C\u30D8\u309C\u30DB\u309C" + + // Hankaku preceded and followed by regular Katakana (no change of charset) + // フランツ・ヨーゼフ・ãƒã‚¤ãƒ‰ãƒ³ + "\u30D5\u30E9\u30F3\u30C4\u30FB\uFF96\uFF70\uFF7E\uFF9E\uFF8C\u30FB\u30CF\u30A4\u30C9\u30F3", + + // Hankaku preceded and followed by Roman (charset change) + // Mozilla (モジラ) Foundation + "Mozilla (\uFF93\uFF7C\uFF9E\uFF97) Foundation", + + // Hankaku preceded and followed by unencodable characters + // दिलà¥à¤²à¥€ï½¥ï¾ƒï¾žï¾˜ï½°ï½¥à¨¦à¨¿à©±à¨²à©€ + "\u0926\u093F\u0932\u094D\u0932\u0940\uFF65\uFF83\uFF9E\uFF98\uFF70\uFF65\u0A26\u0A3F\u0A71\u0A32\u0A40", +]; + +const expectedStrings = [ + "\x1B$B!#!V!W!\x22!&%r%!%#%%%'%)%c%e%g%C!<%\x22%$%&%(%*%+%-%/%1%3%5%7%9%;%=%?%A%D%F%H%J%K%L%M%N%O%R%U%X%[%^%_%`%a%b%d%f%h%i%j%k%l%m%o%s!+!,\x1B(B", + "\x1B$B%+!+%-!+%/!+%1!+%3!+%5!+%7!+%9!+%;!+%=!+%?!+%A!+%D!+%F!+%H!+%O!+%R!+%U!+%X!+%[!+\x1B(B", + "\x1B$B%O!,%R!,%U!,%X!,%[!,\x1B(B", + "\x1B$B%U%i%s%D!&%h!<%;!+%U!&%O%$%I%s\x1B(B", + "Mozilla (\x1B$B%b%7!+%i\x1B(B) Foundation", + "??????\x1B$B!&%F!+%j!<!&\x1B(B?????", +]; + +function run_test() { + for (var i = 0; i < inStrings.length; ++i) { + checkEncode( + CreateScriptableConverter(), + "ISO-2022-JP", + inStrings[i], + expectedStrings[i] + ); + } +} diff --git a/intl/uconv/tests/unit/test_bug563618.js b/intl/uconv/tests/unit/test_bug563618.js new file mode 100644 index 0000000000..f2d8a6e353 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug563618.js @@ -0,0 +1,99 @@ +/* Test case for bug 563618 + * + * Uses nsIConverterInputStream to decode invalid EUC-JP text + * + */ + +const { NetUtil } = ChromeUtils.importESModule( + "resource://gre/modules/NetUtil.sys.mjs" +); + +const test = [ + // 0: 0x8e followed by hi byte, not valid JIS X 0201 + [ + "abcdefghijklmnopqrstuvwxyz12test00%8e%80foobar", + // expected: one replacement character, invalid byte eaten + "abcdefghijklmnopqrstuvwxyz12test00\uFFFDfoobar", + ], + // 1: 0x8e followed by ASCII + [ + "abcdefghijklmnopqrstuvwxyz12test01%8efoobar", + // expected: one replacement character, invalid byte not eaten + "abcdefghijklmnopqrstuvwxyz12test01\uFFFDfoobar", + ], + // 2: JIS X 0208 lead byte followed by invalid hi byte + [ + "abcdefghijklmnopqrstuvwxyz12test02%bf%80foobar", + // expected: one replacement character, invalid byte eaten + "abcdefghijklmnopqrstuvwxyz12test02\uFFFDfoobar", + ], + // 3: JIS X 0208 lead byte followed by ASCII + [ + "abcdefghijklmnopqrstuvwxyz12test03%bffoobar", + // expected: one replacement character, invalid byte not eaten + "abcdefghijklmnopqrstuvwxyz12test03\uFFFDfoobar", + ], +]; + +const ConverterInputStream = Components.Constructor( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" +); + +function testCase(testText, expectedText, bufferLength, charset) { + var dataURI = "data:text/plain;charset=" + charset + "," + testText; + var channel = NetUtil.newChannel({ + uri: dataURI, + loadUsingSystemPrincipal: true, + }); + var testInputStream = channel.open(); + var testConverter = new ConverterInputStream( + testInputStream, + charset, + bufferLength, + 0xfffd + ); + + if (!(testConverter instanceof Ci.nsIUnicharLineInputStream)) { + throw new Error("not line input stream"); + } + + var outStr = ""; + var more; + do { + // read the line and check for eof + var line = {}; + more = testConverter.readLine(line); + outStr += line.value; + } while (more); + + if (outStr != expectedText) { + dump("Failed with bufferLength = " + bufferLength + "\n"); + if (outStr.length == expectedText.length) { + for (let i = 0; i < outStr.length; ++i) { + if (outStr.charCodeAt(i) != expectedText.charCodeAt(i)) { + dump( + i + + ": " + + outStr.charCodeAt(i).toString(16) + + " != " + + expectedText.charCodeAt(i).toString(16) + + "\n" + ); + } + } + } + } + + // escape the strings before comparing for better readability + Assert.equal(escape(outStr), escape(expectedText)); +} + +function run_test() { + for (var i = 0; i < test.length; ++i) { + for (var bufferLength = 32; bufferLength < 40; ++bufferLength) { + testCase(test[i][0], test[i][1], bufferLength, "EUC-JP"); + } + } +} diff --git a/intl/uconv/tests/unit/test_bug601429.js b/intl/uconv/tests/unit/test_bug601429.js new file mode 100644 index 0000000000..af5d357c50 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug601429.js @@ -0,0 +1,84 @@ +// Tests whether characters above 0x7F decode to ASCII characters liable to +// expose XSS vulnerabilities + +function run_test() { + var failures = false; + var decodingConverter = CreateScriptableConverter(); + + var decoders = [ + "Big5", + "Big5-HKSCS", + "EUC-JP", + "EUC-KR", + "gb18030", + "IBM866", + "ISO-2022-JP", + "ISO-8859-1", + "ISO-8859-2", + "ISO-8859-3", + "ISO-8859-4", + "ISO-8859-5", + "ISO-8859-6", + "ISO-8859-7", + "ISO-8859-8", + "ISO-8859-8-I", + "ISO-8859-10", + "ISO-8859-13", + "ISO-8859-14", + "ISO-8859-15", + "ISO-8859-16", + "KOI8-R", + "KOI8-U", + "Shift_JIS", + "windows-1250", + "windows-1251", + "windows-1252", + "windows-1253", + "windows-1254", + "windows-1255", + "windows-1256", + "windows-1257", + "windows-1258", + "windows-874", + "macintosh", + "x-mac-cyrillic", + "x-user-defined", + "UTF-8", + ]; + + var counter = 0; + while (counter < decoders.length) { + var charset = decoders[counter++]; + dump("testing " + counter + " " + charset + "\n"); + + decodingConverter.charset = charset; + for (var i = 0x80; i < 0x100; ++i) { + var inString = String.fromCharCode(i); + var outString; + try { + outString = + decodingConverter.ConvertToUnicode(inString) + + decodingConverter.Finish(); + } catch (e) { + outString = String.fromCharCode(0xfffd); + } + for (var n = 0; n < outString.length; ++n) { + var outChar = outString.charAt(n); + if (outChar == "<" || outChar == ">" || outChar == "/") { + dump( + charset + + " has a problem: " + + escape(inString) + + " decodes to '" + + outString + + "'\n" + ); + failures = true; + } + } + } + } + if (failures) { + do_throw("test failed\n"); + } +} diff --git a/intl/uconv/tests/unit/test_bug715319.dbcs.js b/intl/uconv/tests/unit/test_bug715319.dbcs.js new file mode 100644 index 0000000000..3ba405a925 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug715319.dbcs.js @@ -0,0 +1,56 @@ +// 2-byte charsets: +const charsets = ["Big5", "EUC-KR"]; +const ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" +); +var gConverter; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = "<empty>"; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function IsASCII(charCode) { + return charCode <= 0x7e; +} + +function test(inString) { + var outString = gConverter.ConvertToUnicode(inString) + gConverter.Finish(); + + var outLen = outString.length; + + if ( + IsASCII(inString.charCodeAt(1)) && + (outLen < 4 || outString.charCodeAt(outLen - 4) == 0xfffd) + ) { + error(inString, outString, "ASCII input eaten in " + gConverter.charset); + } +} + +function run_test() { + gConverter = new ScriptableUnicodeConverter(); + for (var i = 0; i < charsets.length; ++i) { + gConverter.charset = charsets[i]; + + var byte1, byte2; + for (byte1 = 1; byte1 < 0x100; ++byte1) { + for (byte2 = 1; byte2 < 0x100; ++byte2) { + test(String.fromCharCode(byte1, byte2) + "foo"); + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug715319.euc_jp.js b/intl/uconv/tests/unit/test_bug715319.euc_jp.js new file mode 100644 index 0000000000..537d073d14 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug715319.euc_jp.js @@ -0,0 +1,77 @@ +const charset = "EUC-JP"; +const ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" +); +var gConverter; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = "<empty>"; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function IsASCII(charCode) { + return charCode <= 0x7e; +} + +function IsNotGR(charCode) { + return charCode < 0xa1 || charCode > 0xfe; +} + +function test(inString) { + var outString = gConverter.ConvertToUnicode(inString) + gConverter.Finish(); + + var outLen = outString.length; + if ( + IsASCII(inString.charCodeAt(1)) && + inString.charCodeAt(1) != outString.charCodeAt(outLen - 5) + ) { + error(inString, outString, "ASCII second byte eaten"); + } else if ( + IsASCII(inString.charCodeAt(2)) && + inString.charCodeAt(2) != outString.charCodeAt(outLen - 4) + ) { + error(inString, outString, "ASCII third byte eaten"); + } else if ( + inString.charCodeAt(0) == 0x8f && + inString.charCodeAt(1) > 0x7f && + IsNotGR(inString.charCodeAt(2)) && + !( + outString.charCodeAt(outLen - 4) == 0xfffd || + outString.charCodeAt(outLen - 4) == inString.charCodeAt(2) + ) + ) { + error(inString, outString, "non-GR third byte eaten"); + } +} + +function run_test() { + gConverter = new ScriptableUnicodeConverter(); + gConverter.charset = charset; + + var byte1, byte2, byte3; + for (byte1 = 1; byte1 < 0x100; ++byte1) { + for (byte2 = 1; byte2 < 0x100; ++byte2) { + if (byte1 == 0x8f) { + for (byte3 = 1; byte3 < 0x100; ++byte3) { + test(String.fromCharCode(byte1, byte2, byte3) + "foo"); + } + } else { + test(String.fromCharCode(byte1, byte2) + " foo"); + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug715319.gb2312.js b/intl/uconv/tests/unit/test_bug715319.gb2312.js new file mode 100644 index 0000000000..f780ab81d1 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug715319.gb2312.js @@ -0,0 +1,87 @@ +const charset = "GB2312"; +const ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" +); +var gConverter; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = "<empty>"; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function IsASCII(charCode) { + return charCode <= 0x7e; +} + +function test(inString) { + var outString = gConverter.ConvertToUnicode(inString) + gConverter.Finish(); + + var outLen = outString.length; + for (var pos = 1; pos < 3; ++pos) { + let outPos = outLen - (9 - pos); + if (outPos < 0) { + outPos = 0; + } + let c0 = inString.charCodeAt(0); + let c1 = inString.charCodeAt(1); + let c2 = inString.charCodeAt(2); + let c3 = inString.charCodeAt(3); + if ( + IsASCII(inString.charCodeAt(pos)) && + !( + outString.charCodeAt(outPos) == inString.charCodeAt(pos) || + outString.charCodeAt(outPos) != 0xfffd || + // legal 4 byte range + (0x81 <= c0 && + c0 <= 0xfe && + 0x30 <= c1 && + c1 <= 0x39 && + 0x81 <= c2 && + c2 <= 0xfe && + 0x30 <= c3 && + c3 <= 0x39) + ) + ) { + dump("pos = " + pos + "; outPos = " + outPos + "\n"); + error(inString, outString, "ASCII input eaten"); + } + } +} + +function run_test() { + gConverter = new ScriptableUnicodeConverter(); + gConverter.charset = charset; + + var byte1, byte2, byte3, byte4; + + // 2-byte + for (byte1 = 1; byte1 < 0x100; ++byte1) { + for (byte2 = 1; byte2 < 0x100; ++byte2) { + test(String.fromCharCode(byte1, byte2) + " foo"); + } + } + // 4-byte (limited) + for (byte1 = 0x80; byte1 < 0x90; ++byte1) { + for (byte2 = 0x20; byte2 < 0x40; ++byte2) { + for (byte3 = 0x80; byte3 < 0x90; ++byte3) { + for (byte4 = 0x20; byte4 < 0x40; ++byte4) { + test(String.fromCharCode(byte1, byte2, byte3, byte4) + " foo"); + } + } + } + } +} diff --git a/intl/uconv/tests/unit/test_charset_conversion.js b/intl/uconv/tests/unit/test_charset_conversion.js new file mode 100644 index 0000000000..577a801081 --- /dev/null +++ b/intl/uconv/tests/unit/test_charset_conversion.js @@ -0,0 +1,373 @@ +const NS_ERROR_ILLEGAL_VALUE = Cr.NS_ERROR_ILLEGAL_VALUE; + +var BIS, BOS, _Pipe, COS, FIS, _SS, CIS; + +var dataDir; + +function run_test() { + BIS = Components.Constructor( + "@mozilla.org/binaryinputstream;1", + "nsIBinaryInputStream", + "setInputStream" + ); + BOS = Components.Constructor( + "@mozilla.org/binaryoutputstream;1", + "nsIBinaryOutputStream", + "setOutputStream" + ); + _Pipe = Components.Constructor("@mozilla.org/pipe;1", "nsIPipe", "init"); + COS = Components.Constructor( + "@mozilla.org/intl/converter-output-stream;1", + "nsIConverterOutputStream", + "init" + ); + FIS = Components.Constructor( + "@mozilla.org/network/file-input-stream;1", + "nsIFileInputStream", + "init" + ); + _SS = Components.Constructor( + "@mozilla.org/storagestream;1", + "nsIStorageStream", + "init" + ); + CIS = Components.Constructor( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" + ); + + dataDir = do_get_file("data/"); + + test_utf8_1(); + test_cross_conversion(); +} + +const UNICODE_STRINGS = [ + "\u00BD + \u00BE == \u00BD\u00B2 + \u00BC + \u00BE", + + "AZaz09 \u007F " + // U+000000 to U+00007F + "\u0080 \u0398 \u03BB \u0725 " + // U+000080 to U+0007FF + "\u0964 \u0F5F \u20AC \uFFFB", // U+000800 to U+00FFFF + + // there would be strings containing non-BMP code points here, but + // unfortunately JS strings are UCS-2 (and worse yet are treated as + // 16-bit values by the spec), so we have to do gymnastics to work + // with non-BMP -- manual surrogate decoding doesn't work because + // String.prototype.charCodeAt() ignores surrogate pairs and only + // returns 16-bit values +]; + +// test conversion equality -- keys are names of files containing equivalent +// Unicode data, values are the encoding of the file in the format expected by +// nsIConverter(In|Out)putStream.init +const UNICODE_FILES = { + "unicode-conversion.utf8.txt": "UTF-8", + "unicode-conversion.utf16.txt": "UTF-16", + "unicode-conversion.utf16le.txt": "UTF-16LE", + "unicode-conversion.utf16be.txt": "UTF-16BE", +}; + +function test_utf8_1() { + for (var i = 0; i < UNICODE_STRINGS.length; i++) { + var pipe = Pipe(); + var conv = new COS(pipe.outputStream, "UTF-8"); + Assert.ok(conv.writeString(UNICODE_STRINGS[i])); + conv.close(); + + if ( + !equalStreams( + new UTF8(pipe.inputStream), + stringToCodePoints(UNICODE_STRINGS[i]) + ) + ) { + do_throw("UNICODE_STRINGS[" + i + "] not handled correctly"); + } + } +} + +function test_cross_conversion() { + for (var fn1 in UNICODE_FILES) { + var fin = getBinaryInputStream(fn1); + var ss = StorageStream(); + + var bos = new BOS(ss.getOutputStream(0)); + var av; + while ((av = fin.available()) > 0) { + var data = fin.readByteArray(av); + bos.writeByteArray(data); + } + fin.close(); + bos.close(); + + for (var fn2 in UNICODE_FILES) { + var fin2 = getUnicharInputStream(fn2, UNICODE_FILES[fn2]); + var unichar = new CIS( + ss.newInputStream(0), + UNICODE_FILES[fn1], + 8192, + 0x0 + ); + + if (!equalUnicharStreams(unichar, fin2)) { + do_throw( + "unequal streams: " + UNICODE_FILES[fn1] + ", " + UNICODE_FILES[fn2] + ); + } + } + } +} + +// utility functions + +function StorageStream() { + return new _SS(8192, Math.pow(2, 32) - 1, null); +} + +function getUnicharInputStream(filename, encoding) { + var file = dataDir.clone(); + file.append(filename); + + const PR_RDONLY = 0x1; + var fis = new FIS( + file, + PR_RDONLY, + "0644", + Ci.nsIFileInputStream.CLOSE_ON_EOF + ); + return new CIS(fis, encoding, 8192, 0x0); +} + +function getBinaryInputStream(filename, encoding) { + var file = dataDir.clone(); + file.append(filename); + + const PR_RDONLY = 0x1; + var fis = new FIS( + file, + PR_RDONLY, + "0644", + Ci.nsIFileInputStream.CLOSE_ON_EOF + ); + return new BIS(fis); +} + +function equalStreams(stream, codePoints) { + var currIndex = 0; + while (true) { + var unit = stream.readUnit(); + if (unit < 0) { + return currIndex == codePoints.length; + } + if (unit !== codePoints[currIndex++]) { + return false; + } + } + // eslint-disable-next-line no-unreachable + do_throw("not reached"); + return false; +} + +function equalUnicharStreams(s1, s2) { + var r1, r2; + var str1 = {}, + str2 = {}; + while (true) { + r1 = s1.readString(1024, str1); + r2 = s2.readString(1024, str2); + + if (r1 != r2 || str1.value != str2.value) { + print("r1: " + r1 + ", r2: " + r2); + print(str1.value.length); + print(str2.value.length); + return false; + } + if (r1 == 0 && r2 == 0) { + return true; + } + } + + // not reached + // eslint-disable-next-line no-unreachable + return false; +} + +function stringToCodePoints(str) { + return str.split("").map(function (v) { + return v.charCodeAt(0); + }); +} + +function lowbits(n) { + return Math.pow(2, n) - 1; +} + +function Pipe() { + return new _Pipe(false, false, 1024, 10, null); +} + +// complex charset readers + +/** + * Wraps a UTF-8 stream to allow access to the Unicode code points in it. + * + * @param stream + * the stream to wrap + */ +function UTF8(stream) { + this._stream = new BIS(stream); +} +UTF8.prototype = { + // returns numeric code point at front of stream encoded in UTF-8, -1 if at + // end of stream, or throws if valid (and properly encoded!) code point not + // found + readUnit() { + var str = this._stream; + + var c, c2, c3, c4, rv; + + // if at end of stream, must distinguish failure to read any bytes + // (correct behavior) from failure to read some byte after the first + // in the character + try { + c = str.read8(); + } catch (e) { + return -1; + } + + if (c < 0x80) { + return c; + } + + if (c < 0xc0) { + // c < 11000000 + // byte doesn't have enough leading ones (must be at least two) + throw NS_ERROR_ILLEGAL_VALUE; + } + + c2 = str.read8(); + if (c2 >= 0xc0 || c2 < 0x80) { + throw NS_ERROR_ILLEGAL_VALUE; + } // not 10xxxxxx + + if (c < 0xe0) { + // c < 11100000 + // two-byte between U+000080 and U+0007FF + rv = ((lowbits(5) & c) << 6) + (lowbits(6) & c2); + // no upper bounds-check needed, by previous lines + if (rv >= 0x80) { + return rv; + } + throw NS_ERROR_ILLEGAL_VALUE; + } + + c3 = str.read8(); + if (c3 >= 0xc0 || c3 < 0x80) { + throw NS_ERROR_ILLEGAL_VALUE; + } // not 10xxxxxx + + if (c < 0xf0) { + // c < 11110000 + // three-byte between U+000800 and U+00FFFF + rv = + ((lowbits(4) & c) << 12) + ((lowbits(6) & c2) << 6) + (lowbits(6) & c3); + // no upper bounds-check needed, by previous lines + if (rv >= 0xe000 || (rv >= 0x800 && rv <= 0xd7ff)) { + return rv; + } + throw NS_ERROR_ILLEGAL_VALUE; + } + + c4 = str.read8(); + if (c4 >= 0xc0 || c4 < 0x80) { + throw NS_ERROR_ILLEGAL_VALUE; + } // not 10xxxxxx + + if (c < 0xf8) { + // c < 11111000 + // four-byte between U+010000 and U+10FFFF + rv = + ((lowbits(3) & c) << 18) + + ((lowbits(6) & c2) << 12) + + ((lowbits(6) & c3) << 6) + + (lowbits(6) & c4); + // need an upper bounds-check since 0x10FFFF isn't (2**n - 1) + if (rv >= 0x10000 && rv <= 0x10ffff) { + return rv; + } + throw NS_ERROR_ILLEGAL_VALUE; + } + + // 11111000 or greater -- no UTF-8 mapping + throw NS_ERROR_ILLEGAL_VALUE; + }, +}; + +/** + * Wraps a UTF-16 stream to allow access to the Unicode code points in it. + * + * @param stream + * the stream to wrap + * @param bigEndian + * true for UTF-16BE, false for UTF-16LE, not present at all for UTF-16 with + * a byte-order mark + */ +function UTF16(stream, bigEndian) { + this._stream = new BIS(stream); + if (arguments.length > 1) { + this._bigEndian = bigEndian; + } else { + var bom = this._stream.read16(); + if (bom == 0xfeff) { + this._bigEndian = true; + } else if (bom == 0xfffe) { + this._bigEndian = false; + } else { + do_throw("missing BOM: " + bom.toString(16).toUpperCase()); + } + } +} +UTF16.prototype = { + // returns numeric code point at front of stream encoded in UTF-16, + // -1 if at end of stream, or throws if UTF-16 code point not found + readUnit() { + var str = this._stream; + + // if at end of stream, must distinguish failure to read any bytes + // (correct behavior) from failure to read some byte after the first + // in the character + try { + var b1 = str.read8(); + } catch (e) { + return -1; + } + + var b2 = str.read8(); + + var w1 = this._bigEndian ? (b1 << 8) + b2 : (b2 << 8) + b1; + + if (w1 > 0xdbff && w1 < 0xe000) { + // second surrogate, but expecting none or first + throw NS_ERROR_ILLEGAL_VALUE; + } + + if (w1 > 0xd7ff && w1 < 0xdc00) { + // non-BMP, use surrogate pair + b1 = str.read8(); + b2 = str.read8(); + var w2 = this._bigEndian ? (b1 << 8) + b2 : (b2 << 8) + b1; + if (w2 < 0xdc00 || w2 > 0xdfff) { + throw NS_ERROR_ILLEGAL_VALUE; + } + + var rv = 0x100000 + ((lowbits(10) & w2) << 10) + (lowbits(10) & w1); + if (rv <= 0x10ffff) { + return rv; + } + throw NS_ERROR_ILLEGAL_VALUE; + } + + // non-surrogate + return w1; + }, +}; diff --git a/intl/uconv/tests/unit/test_decode_8859-1.js b/intl/uconv/tests/unit/test_decode_8859-1.js new file mode 100644 index 0000000000..d820b35ca8 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-1.js @@ -0,0 +1,26 @@ +// Tests conversion from ISO-8859-1 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const aliases = [ + "ISO-8859-1", + "iso-8859-1", + "latin1", + "iso_8859-1", + "iso8859-1", + "iso-ir-100", + "l1", + "ibm819", + "cp819", + "csisolatin1", + "iso88591", + "iso_8859-1:1987", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-10.js b/intl/uconv/tests/unit/test_decode_8859-10.js new file mode 100644 index 0000000000..47e934817e --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-10.js @@ -0,0 +1,22 @@ +// Tests conversion from ISO-8859-10 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u0112\u0122\u012a\u0128\u0136\u00a7\u013b\u0110\u0160\u0166\u017d\u00ad\u016a\u014a\u00b0\u0105\u0113\u0123\u012b\u0129\u0137\u00b7\u013c\u0111\u0161\u0167\u017e\u2015\u016b\u014b\u0100\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u012e\u010c\u00c9\u0118\u00cb\u0116\u00cd\u00ce\u00cf\u00d0\u0145\u014c\u00d3\u00d4\u00d5\u00d6\u0168\u00d8\u0172\u00da\u00db\u00dc\u00dd\u00de\u00df\u0101\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u012f\u010d\u00e9\u0119\u00eb\u0117\u00ed\u00ee\u00ef\u00f0\u0146\u014d\u00f3\u00f4\u00f5\u00f6\u0169\u00f8\u0173\u00fa\u00fb\u00fc\u00fd\u00fe\u0138"; + +const aliases = [ + "ISO-8859-10", + "iso-8859-10", + "iso8859-10", + "latin6", + "iso-ir-157", + "l6", + "csisolatin6", + "iso885910", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-11.js b/intl/uconv/tests/unit/test_decode_8859-11.js new file mode 100644 index 0000000000..b647ec503c --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-11.js @@ -0,0 +1,13 @@ +// Tests conversion from ISO-8859-11 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b"; + +const aliases = ["ISO-8859-11", "iso-8859-11", "iso8859-11", "iso885911"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-13.js b/intl/uconv/tests/unit/test_decode_8859-13.js new file mode 100644 index 0000000000..91443e6bee --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-13.js @@ -0,0 +1,13 @@ +// Tests conversion from ISO-8859-13 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u201d\u00a2\u00a3\u00a4\u201e\u00a6\u00a7\u00d8\u00a9\u0156\u00ab\u00ac\u00ad\u00ae\u00c6\u00b0\u00b1\u00b2\u00b3\u201c\u00b5\u00b6\u00b7\u00f8\u00b9\u0157\u00bb\u00bc\u00bd\u00be\u00e6\u0104\u012e\u0100\u0106\u00c4\u00c5\u0118\u0112\u010c\u00c9\u0179\u0116\u0122\u0136\u012a\u013b\u0160\u0143\u0145\u00d3\u014c\u00d5\u00d6\u00d7\u0172\u0141\u015a\u016a\u00dc\u017b\u017d\u00df\u0105\u012f\u0101\u0107\u00e4\u00e5\u0119\u0113\u010d\u00e9\u017a\u0117\u0123\u0137\u012b\u013c\u0161\u0144\u0146\u00f3\u014d\u00f5\u00f6\u00f7\u0173\u0142\u015b\u016b\u00fc\u017c\u017e\u2019"; + +const aliases = ["ISO-8859-13", "iso-8859-13", "iso8859-13", "iso885913"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-14.js b/intl/uconv/tests/unit/test_decode_8859-14.js new file mode 100644 index 0000000000..d1fdcb204c --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-14.js @@ -0,0 +1,13 @@ +// Tests conversion from ISO-8859-14 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u1e02\u1e03\u00a3\u010a\u010b\u1e0a\u00a7\u1e80\u00a9\u1e82\u1e0b\u1ef2\u00ad\u00ae\u0178\u1e1e\u1e1f\u0120\u0121\u1e40\u1e41\u00b6\u1e56\u1e81\u1e57\u1e83\u1e60\u1ef3\u1e84\u1e85\u1e61\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u0174\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u1e6a\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u0176\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u0175\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u1e6b\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u0177\u00ff"; + +const aliases = ["ISO-8859-14", "iso-8859-14", "iso8859-14", "iso885914"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-15.js b/intl/uconv/tests/unit/test_decode_8859-15.js new file mode 100644 index 0000000000..7344fb55a8 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-15.js @@ -0,0 +1,21 @@ +// Tests conversion from ISO-8859-15 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u20ac\u00a5\u0160\u00a7\u0161\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u017d\u00b5\u00b6\u00b7\u017e\u00b9\u00ba\u00bb\u0152\u0153\u0178\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const aliases = [ + "ISO-8859-15", + "iso-8859-15", + "iso8859-15", + "iso_8859-15", + "iso885915", + "csisolatin9", + "l9", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-2.js b/intl/uconv/tests/unit/test_decode_8859-2.js new file mode 100644 index 0000000000..0e3c15bdee --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-2.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-2 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u02d8\u0141\u00a4\u013d\u015a\u00a7\u00a8\u0160\u015e\u0164\u0179\u00ad\u017d\u017b\u00b0\u0105\u02db\u0142\u00b4\u013e\u015b\u02c7\u00b8\u0161\u015f\u0165\u017a\u02dd\u017e\u017c\u0154\u00c1\u00c2\u0102\u00c4\u0139\u0106\u00c7\u010c\u00c9\u0118\u00cb\u011a\u00cd\u00ce\u010e\u0110\u0143\u0147\u00d3\u00d4\u0150\u00d6\u00d7\u0158\u016e\u00da\u0170\u00dc\u00dd\u0162\u00df\u0155\u00e1\u00e2\u0103\u00e4\u013a\u0107\u00e7\u010d\u00e9\u0119\u00eb\u011b\u00ed\u00ee\u010f\u0111\u0144\u0148\u00f3\u00f4\u0151\u00f6\u00f7\u0159\u016f\u00fa\u0171\u00fc\u00fd\u0163\u02d9"; + +const aliases = [ + "ISO-8859-2", + "iso-8859-2", + "latin2", + "iso_8859-2", + "iso8859-2", + "iso-ir-101", + "l2", + "csisolatin2", + "iso88592", + "iso_8859-2:1987", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-3.js b/intl/uconv/tests/unit/test_decode_8859-3.js new file mode 100644 index 0000000000..011f82de87 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-3.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-3 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbf\xc0\xc1\xc2\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0126\u02d8\u00a3\u00a4\u0124\u00a7\u00a8\u0130\u015e\u011e\u0134\u00ad\u017b\u00b0\u0127\u00b2\u00b3\u00b4\u00b5\u0125\u00b7\u00b8\u0131\u015f\u011f\u0135\u00bd\u017c\u00c0\u00c1\u00c2\u00c4\u010a\u0108\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d1\u00d2\u00d3\u00d4\u0120\u00d6\u00d7\u011c\u00d9\u00da\u00db\u00dc\u016c\u015c\u00df\u00e0\u00e1\u00e2\u00e4\u010b\u0109\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u0121\u00f6\u00f7\u011d\u00f9\u00fa\u00fb\u00fc\u016d\u015d\u02d9"; + +const aliases = [ + "ISO-8859-3", + "iso-8859-3", + "latin3", + "iso_8859-3", + "iso8859-3", + "iso-ir-109", + "l3", + "csisolatin3", + "iso88593", + "iso_8859-3:1988", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-4.js b/intl/uconv/tests/unit/test_decode_8859-4.js new file mode 100644 index 0000000000..6a8b89c2ef --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-4.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-4 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u0138\u0156\u00a4\u0128\u013b\u00a7\u00a8\u0160\u0112\u0122\u0166\u00ad\u017d\u00af\u00b0\u0105\u02db\u0157\u00b4\u0129\u013c\u02c7\u00b8\u0161\u0113\u0123\u0167\u014a\u017e\u014b\u0100\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u012e\u010c\u00c9\u0118\u00cb\u0116\u00cd\u00ce\u012a\u0110\u0145\u014c\u0136\u00d4\u00d5\u00d6\u00d7\u00d8\u0172\u00da\u00db\u00dc\u0168\u016a\u00df\u0101\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u012f\u010d\u00e9\u0119\u00eb\u0117\u00ed\u00ee\u012b\u0111\u0146\u014d\u0137\u00f4\u00f5\u00f6\u00f7\u00f8\u0173\u00fa\u00fb\u00fc\u0169\u016b\u02d9"; + +const aliases = [ + "ISO-8859-4", + "iso-8859-4", + "latin4", + "iso_8859-4", + "iso8859-4", + "iso-ir-110", + "l4", + "csisolatin4", + "iso88594", + "iso_8859-4:1988", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-5.js b/intl/uconv/tests/unit/test_decode_8859-5.js new file mode 100644 index 0000000000..220a12ab3a --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-5.js @@ -0,0 +1,22 @@ +// Tests conversion from ISO-8859-5 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0401\u0402\u0403\u0404\u0405\u0406\u0407\u0408\u0409\u040a\u040b\u040c\u00ad\u040e\u040f\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f\u2116\u0451\u0452\u0453\u0454\u0455\u0456\u0457\u0458\u0459\u045a\u045b\u045c\u00a7\u045e\u045f"; + +const aliases = [ + "ISO-8859-5", + "iso-8859-5", + "iso_8859-5", + "iso8859-5", + "iso-ir-144", + "csisolatincyrillic", + "iso88595", + "iso_8859-5:1988", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-6.js b/intl/uconv/tests/unit/test_decode_8859-6.js new file mode 100644 index 0000000000..9c94ef8673 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-6.js @@ -0,0 +1,25 @@ +// Tests conversion from ISO-8859-6 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa4\xac\xad\xbb\xbf\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a4\u060c\u00ad\u061b\u061f\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b\u064c\u064d\u064e\u064f\u0650\u0651\u0652"; + +const aliases = [ + "ISO-8859-6", + "iso-8859-6", + "iso_8859-6", + "iso8859-6", + "arabic", + "iso-ir-127", + "ecma-114", + "asmo-708", + "csisolatinarabic", + "iso88596", + "iso_8859-6:1987", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-7.js b/intl/uconv/tests/unit/test_decode_8859-7.js new file mode 100644 index 0000000000..9d74342345 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-7.js @@ -0,0 +1,27 @@ +// Tests conversion from ISO-8859-7 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u2018\u2019\u00a3\u20ac\u20af\u00a6\u00a7\u00a8\u00a9\u037a\u00ab\u00ac\u00ad\u2015\u00b0\u00b1\u00b2\u00b3\u0384\u0385\u0386\u00b7\u0388\u0389\u038a\u00bb\u038c\u00bd\u038e\u038f\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039a\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1\u03a3\u03a4\u03a5\u03a6\u03a7\u03a8\u03a9\u03aa\u03ab\u03ac\u03ad\u03ae\u03af\u03b0\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03ca\u03cb\u03cc\u03cd\u03ce"; + +const aliases = [ + "ISO-8859-7", + "iso-8859-7", + "greek", + "greek8", + "sun_eu_greek", + "iso_8859-7", + "iso8859-7", + "iso-ir-126", + "elot_928", + "ecma-118", + "csisolatingreek", + "iso88597", + "iso_8859-7:1987", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-8.js b/intl/uconv/tests/unit/test_decode_8859-8.js new file mode 100644 index 0000000000..c7b758bf03 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-8.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-8 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfd\xfe"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00d7\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00f7\u00bb\u00bc\u00bd\u00be\u2017\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\u200e\u200f"; + +const aliases = [ + "ISO-8859-8", + "iso-8859-8", + "hebrew", + "visual", + "iso_8859-8", + "iso8859-8", + "iso-ir-138", + "csisolatinhebrew", + "iso88598", + "iso_8859-8:1988", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-9.js b/intl/uconv/tests/unit/test_decode_8859-9.js new file mode 100644 index 0000000000..1582e2093f --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-9.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-9 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u011e\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u0130\u015e\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u011f\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u0131\u015f\u00ff"; + +const aliases = [ + "ISO-8859-9", + "iso-8859-9", + "latin5", + "iso_8859-9", + "iso8859-9", + "iso-ir-148", + "l5", + "csisolatin5", + "iso88599", + "iso_8859-9:1989", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1250.js b/intl/uconv/tests/unit/test_decode_CP1250.js new file mode 100644 index 0000000000..d044c67801 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1250.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1250 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0083\u201e\u2026\u2020\u2021\u0088\u2030\u0160\u2039\u015a\u0164\u017d\u0179\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u0161\u203a\u015b\u0165\u017e\u017a\u00a0\u02c7\u02d8\u0141\u00a4\u0104\u00a6\u00a7\u00a8\u00a9\u015e\u00ab\u00ac\u00ad\u00ae\u017b\u00b0\u00b1\u02db\u0142\u00b4\u00b5\u00b6\u00b7\u00b8\u0105\u015f\u00bb\u013d\u02dd\u013e\u017c\u0154\u00c1\u00c2\u0102\u00c4\u0139\u0106\u00c7\u010c\u00c9\u0118\u00cb\u011a\u00cd\u00ce\u010e\u0110\u0143\u0147\u00d3\u00d4\u0150\u00d6\u00d7\u0158\u016e\u00da\u0170\u00dc\u00dd\u0162\u00df\u0155\u00e1\u00e2\u0103\u00e4\u013a\u0107\u00e7\u010d\u00e9\u0119\u00eb\u011b\u00ed\u00ee\u010f\u0111\u0144\u0148\u00f3\u00f4\u0151\u00f6\u00f7\u0159\u016f\u00fa\u0171\u00fc\u00fd\u0163\u02d9"; + +const aliases = ["windows-1250", "x-cp1250", "cp1250"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1251.js b/intl/uconv/tests/unit/test_decode_CP1251.js new file mode 100644 index 0000000000..01153e8650 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1251.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1251 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0402\u0403\u201a\u0453\u201e\u2026\u2020\u2021\u20ac\u2030\u0409\u2039\u040a\u040c\u040b\u040f\u0452\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u0459\u203a\u045a\u045c\u045b\u045f\u00a0\u040e\u045e\u0408\u00a4\u0490\u00a6\u00a7\u0401\u00a9\u0404\u00ab\u00ac\u00ad\u00ae\u0407\u00b0\u00b1\u0406\u0456\u0491\u00b5\u00b6\u00b7\u0451\u2116\u0454\u00bb\u0458\u0405\u0455\u0457\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f"; + +const aliases = ["windows-1251", "x-cp1251", "cp1251"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1252.js b/intl/uconv/tests/unit/test_decode_CP1252.js new file mode 100644 index 0000000000..d41d7d72a3 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1252.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1252 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u017d\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u017e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const aliases = ["windows-1252", "x-cp1252", "cp1252"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1253.js b/intl/uconv/tests/unit/test_decode_CP1253.js new file mode 100644 index 0000000000..880e2dae74 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1253.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1253 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u0088\u2030\u008a\u2039\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u009a\u203a\u009c\u009d\u009e\u009f\u00a0\u0385\u0386\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\ufffd\u00ab\u00ac\u00ad\u00ae\u2015\u00b0\u00b1\u00b2\u00b3\u0384\u00b5\u00b6\u00b7\u0388\u0389\u038a\u00bb\u038c\u00bd\u038e\u038f\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039a\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1\ufffd\u03a3\u03a4\u03a5\u03a6\u03a7\u03a8\u03a9\u03aa\u03ab\u03ac\u03ad\u03ae\u03af\u03b0\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03ca\u03cb\u03cc\u03cd\u03ce\ufffd"; + +const aliases = ["windows-1253", "x-cp1253", "cp1253"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1254.js b/intl/uconv/tests/unit/test_decode_CP1254.js new file mode 100644 index 0000000000..f4af7e2088 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1254.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1254 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u009e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u011e\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u0130\u015e\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u011f\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u0131\u015f\u00ff"; + +const aliases = ["windows-1254", "x-cp1254", "cp1254"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1255.js b/intl/uconv/tests/unit/test_decode_CP1255.js new file mode 100644 index 0000000000..57e1b54636 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1255.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1255 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u008a\u2039\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u009a\u203a\u009c\u009d\u009e\u009f\u00a0\u00a1\u00a2\u00a3\u20aa\u00a5\u00a6\u00a7\u00a8\u00a9\u00d7\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00f7\u00bb\u00bc\u00bd\u00be\u00bf\u05b0\u05b1\u05b2\u05b3\u05b4\u05b5\u05b6\u05b7\u05b8\u05b9\u05ba\u05bb\u05bc\u05bd\u05be\u05bf\u05c0\u05c1\u05c2\u05c3\u05f0\u05f1\u05f2\u05f3\u05f4\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\ufffd\ufffd\u200e\u200f\ufffd"; + +const aliases = ["windows-1255", "x-cp1255", "cp1255"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1256.js b/intl/uconv/tests/unit/test_decode_CP1256.js new file mode 100644 index 0000000000..b91e448a46 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1256.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1256 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u067e\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0679\u2039\u0152\u0686\u0698\u0688\u06af\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u06a9\u2122\u0691\u203a\u0153\u200c\u200d\u06ba\u00a0\u060c\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u06be\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u061b\u00bb\u00bc\u00bd\u00be\u061f\u06c1\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u00d7\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u00e0\u0644\u00e2\u0645\u0646\u0647\u0648\u00e7\u00e8\u00e9\u00ea\u00eb\u0649\u064a\u00ee\u00ef\u064b\u064c\u064d\u064e\u00f4\u064f\u0650\u00f7\u0651\u00f9\u0652\u00fb\u00fc\u200e\u200f\u06d2"; + +const aliases = ["windows-1256", "x-cp1256", "cp1256"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1257.js b/intl/uconv/tests/unit/test_decode_CP1257.js new file mode 100644 index 0000000000..a61bf2e870 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1257.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1257 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0083\u201e\u2026\u2020\u2021\u0088\u2030\u008a\u2039\u008c\u00a8\u02c7\u00b8\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u009a\u203a\u009c\u00af\u02db\u009f\u00a0\ufffd\u00a2\u00a3\u00a4\ufffd\u00a6\u00a7\u00d8\u00a9\u0156\u00ab\u00ac\u00ad\u00ae\u00c6\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00f8\u00b9\u0157\u00bb\u00bc\u00bd\u00be\u00e6\u0104\u012e\u0100\u0106\u00c4\u00c5\u0118\u0112\u010c\u00c9\u0179\u0116\u0122\u0136\u012a\u013b\u0160\u0143\u0145\u00d3\u014c\u00d5\u00d6\u00d7\u0172\u0141\u015a\u016a\u00dc\u017b\u017d\u00df\u0105\u012f\u0101\u0107\u00e4\u00e5\u0119\u0113\u010d\u00e9\u017a\u0117\u0123\u0137\u012b\u013c\u0161\u0144\u0146\u00f3\u014d\u00f5\u00f6\u00f7\u0173\u0142\u015b\u016b\u00fc\u017c\u017e\u02d9"; + +const aliases = ["windows-1257", "x-cp1257", "cp1257"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1258.js b/intl/uconv/tests/unit/test_decode_CP1258.js new file mode 100644 index 0000000000..422e9a7985 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1258.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1258 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u008a\u2039\u0152\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u009a\u203a\u0153\u009d\u009e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u0102\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u0300\u00cd\u00ce\u00cf\u0110\u00d1\u0309\u00d3\u00d4\u01a0\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u01af\u0303\u00df\u00e0\u00e1\u00e2\u0103\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u0301\u00ed\u00ee\u00ef\u0111\u00f1\u0323\u00f3\u00f4\u01a1\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u01b0\u20ab\u00ff"; + +const aliases = ["windows-1258", "x-cp1258", "cp1258"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP874.js b/intl/uconv/tests/unit/test_decode_CP874.js new file mode 100644 index 0000000000..7f05e7669c --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP874.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-874 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u0082\u0083\u0084\u2026\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\ufffd\ufffd\ufffd\ufffd\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b\ufffd\ufffd\ufffd\ufffd"; + +const aliases = ["windows-874", "dos-874"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_gb18030.js b/intl/uconv/tests/unit/test_decode_gb18030.js new file mode 100644 index 0000000000..ca5796bbaa --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_gb18030.js @@ -0,0 +1,16 @@ +// Tests conversion from gb18030 to Unicode +// This is a sniff test which doesn't cover the full gb18030 range: the test string +// includes only the ASCII range and the first 63 double byte characters +// and border values of 4 byte characters + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x81@\x81A\x81B\x81C\x81D\x81E\x81F\x81G\x81H\x81I\x81J\x81K\x81L\x81M\x81N\x81O\x81P\x81Q\x81R\x81S\x81T\x81U\x81V\x81W\x81X\x81Y\x81Z\x81[\x81\\\x81]\x81^\x81_\x81`\x81a\x81b\x81c\x81d\x81e\x81f\x81g\x81h\x81i\x81j\x81k\x81l\x81m\x81n\x81o\x81p\x81q\x81r\x81s\x81t\x81u\x81v\x81w\x81x\x81y\x81z\x81{\x81|\x81}\x81~\x810\x810\x841\xa46\x841\xa47\x849\xfe9\x850\x810\x859\xfe9\x860\x810\x8f9\xfe9\x900\x810\xe32\x9a5\xe32\x9a6\xe39\xfe9\xe40\x810\xfc9\xfe9\xfd0\x810\xfe9\xfe9\xa8\xa0\xa8\xbc\xa8\xbf\xa8\xc1\x815\xf46\x815\xf47\x815\xf48"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u4E02\u4E04\u4E05\u4E06\u4E0F\u4E12\u4E17\u4E1F\u4E20\u4E21\u4E23\u4E26\u4E29\u4E2E\u4E2F\u4E31\u4E33\u4E35\u4E37\u4E3C\u4E40\u4E41\u4E42\u4E44\u4E46\u4E4A\u4E51\u4E55\u4E57\u4E5A\u4E5B\u4E62\u4E63\u4E64\u4E65\u4E67\u4E68\u4E6A\u4E6B\u4E6C\u4E6D\u4E6E\u4E6F\u4E72\u4E74\u4E75\u4E76\u4E77\u4E78\u4E79\u4E7A\u4E7B\u4E7C\u4E7D\u4E7F\u4E80\u4E81\u4E82\u4E83\u4E84\u4E85\u4E87\u4E8A\x80\uFFFC\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uD800\uDC00\uDBFF\uDFFF\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uE7C6\u1E3F\u01F9\uE7C9\u1E3E\uE7C7\u1E40"; + +const aliases = ["gb18030"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_gbk.js b/intl/uconv/tests/unit/test_decode_gbk.js new file mode 100644 index 0000000000..6e4414722d --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_gbk.js @@ -0,0 +1,15 @@ +// Tests conversion from gbk to Unicode +// This is a sniff test which doesn't cover the full gbk range: the test string +// includes only the ASCII range and the first 63 double byte characters + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x81@\x81A\x81B\x81C\x81D\x81E\x81F\x81G\x81H\x81I\x81J\x81K\x81L\x81M\x81N\x81O\x81P\x81Q\x81R\x81S\x81T\x81U\x81V\x81W\x81X\x81Y\x81Z\x81[\x81\\\x81]\x81^\x81_\x81`\x81a\x81b\x81c\x81d\x81e\x81f\x81g\x81h\x81i\x81j\x81k\x81l\x81m\x81n\x81o\x81p\x81q\x81r\x81s\x81t\x81u\x81v\x81w\x81x\x81y\x81z\x81{\x81|\x81}\x81~"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u4E02\u4E04\u4E05\u4E06\u4E0F\u4E12\u4E17\u4E1F\u4E20\u4E21\u4E23\u4E26\u4E29\u4E2E\u4E2F\u4E31\u4E33\u4E35\u4E37\u4E3C\u4E40\u4E41\u4E42\u4E44\u4E46\u4E4A\u4E51\u4E55\u4E57\u4E5A\u4E5B\u4E62\u4E63\u4E64\u4E65\u4E67\u4E68\u4E6A\u4E6B\u4E6C\u4E6D\u4E6E\u4E6F\u4E72\u4E74\u4E75\u4E76\u4E77\u4E78\u4E79\u4E7A\u4E7B\u4E7C\u4E7D\u4E7F\u4E80\u4E81\u4E82\u4E83\u4E84\u4E85\u4E87\u4E8A"; + +const aliases = ["gbk", "x-gbk"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_macintosh.js b/intl/uconv/tests/unit/test_decode_macintosh.js new file mode 100644 index 0000000000..5504a630ec --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_macintosh.js @@ -0,0 +1,13 @@ +// Tests conversion from x-mac-roman to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca\u00ff\u0178\u2044\u20ac\u2039\u203a\ufb01\ufb02\u2021\u00b7\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; + +const aliases = ["csMacintosh", "mac", "macintosh", "x-mac-roman"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_x_mac_cyrillic.js b/intl/uconv/tests/unit/test_decode_x_mac_cyrillic.js new file mode 100644 index 0000000000..ad783b0e96 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_x_mac_cyrillic.js @@ -0,0 +1,13 @@ +// Tests conversion from x-mac-cyrillic to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u2020\u00b0\u0490\u00a3\u00a7\u2022\u00b6\u0406\u00ae\u00a9\u2122\u0402\u0452\u2260\u0403\u0453\u221e\u00b1\u2264\u2265\u0456\u00b5\u0491\u0408\u0404\u0454\u0407\u0457\u0409\u0459\u040a\u045a\u0458\u0405\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u040b\u045b\u040c\u045c\u0455\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u201e\u040e\u045e\u040f\u045f\u2116\u0401\u0451\u044f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u20ac"; + +const aliases = ["x-mac-cyrillic"]; + +function run_test() { + testDecodeAliasesInternal(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_x_mac_ukrainian.js b/intl/uconv/tests/unit/test_decode_x_mac_ukrainian.js new file mode 100644 index 0000000000..b8b3d63018 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_x_mac_ukrainian.js @@ -0,0 +1,13 @@ +// Tests conversion from x-mac-ukrainian to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u2020\u00b0\u0490\u00a3\u00a7\u2022\u00b6\u0406\u00ae\u00a9\u2122\u0402\u0452\u2260\u0403\u0453\u221e\u00b1\u2264\u2265\u0456\u00b5\u0491\u0408\u0404\u0454\u0407\u0457\u0409\u0459\u040a\u045a\u0458\u0405\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u040b\u045b\u040c\u045c\u0455\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u201e\u040e\u045e\u040f\u045f\u2116\u0401\u0451\u044f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u20ac"; + +const aliases = ["x-mac-ukrainian"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-1.js b/intl/uconv/tests/unit/test_encode_8859-1.js new file mode 100644 index 0000000000..f5a6559de1 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-1.js @@ -0,0 +1,26 @@ +// Tests conversion from Unicode to ISO-8859-1 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-1", + "iso-8859-1", + "latin1", + "iso_8859-1", + "iso8859-1", + "iso-ir-100", + "l1", + "ibm819", + "cp819", + "csisolatin1", + "iso88591", + "iso_8859-1:1987", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-10.js b/intl/uconv/tests/unit/test_encode_8859-10.js new file mode 100644 index 0000000000..2ecad6013f --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-10.js @@ -0,0 +1,22 @@ +// Tests conversion from Unicode to ISO-8859-10 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u0112\u0122\u012a\u0128\u0136\u00a7\u013b\u0110\u0160\u0166\u017d\u00ad\u016a\u014a\u00b0\u0105\u0113\u0123\u012b\u0129\u0137\u00b7\u013c\u0111\u0161\u0167\u017e\u2015\u016b\u014b\u0100\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u012e\u010c\u00c9\u0118\u00cb\u0116\u00cd\u00ce\u00cf\u00d0\u0145\u014c\u00d3\u00d4\u00d5\u00d6\u0168\u00d8\u0172\u00da\u00db\u00dc\u00dd\u00de\u00df\u0101\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u012f\u010d\u00e9\u0119\u00eb\u0117\u00ed\u00ee\u00ef\u00f0\u0146\u014d\u00f3\u00f4\u00f5\u00f6\u0169\u00f8\u0173\u00fa\u00fb\u00fc\u00fd\u00fe\u0138"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-10", + "iso-8859-10", + "iso8859-10", + "latin6", + "iso-ir-157", + "l6", + "csisolatin6", + "iso885910", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-11.js b/intl/uconv/tests/unit/test_encode_8859-11.js new file mode 100644 index 0000000000..7011c26688 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-11.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to ISO-8859-11 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb"; + +const aliases = ["ISO-8859-11", "iso-8859-11", "iso8859-11", "iso885911"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-13.js b/intl/uconv/tests/unit/test_encode_8859-13.js new file mode 100644 index 0000000000..cf6ad98466 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-13.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to ISO-8859-13 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u201d\u00a2\u00a3\u00a4\u201e\u00a6\u00a7\u00d8\u00a9\u0156\u00ab\u00ac\u00ad\u00ae\u00c6\u00b0\u00b1\u00b2\u00b3\u201c\u00b5\u00b6\u00b7\u00f8\u00b9\u0157\u00bb\u00bc\u00bd\u00be\u00e6\u0104\u012e\u0100\u0106\u00c4\u00c5\u0118\u0112\u010c\u00c9\u0179\u0116\u0122\u0136\u012a\u013b\u0160\u0143\u0145\u00d3\u014c\u00d5\u00d6\u00d7\u0172\u0141\u015a\u016a\u00dc\u017b\u017d\u00df\u0105\u012f\u0101\u0107\u00e4\u00e5\u0119\u0113\u010d\u00e9\u017a\u0117\u0123\u0137\u012b\u013c\u0161\u0144\u0146\u00f3\u014d\u00f5\u00f6\u00f7\u0173\u0142\u015b\u016b\u00fc\u017c\u017e\u2019"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["ISO-8859-13", "iso-8859-13", "iso8859-13", "iso885913"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-14.js b/intl/uconv/tests/unit/test_encode_8859-14.js new file mode 100644 index 0000000000..9a0e8dc00a --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-14.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to ISO-8859-14 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u1e02\u1e03\u00a3\u010a\u010b\u1e0a\u00a7\u1e80\u00a9\u1e82\u1e0b\u1ef2\u00ad\u00ae\u0178\u1e1e\u1e1f\u0120\u0121\u1e40\u1e41\u00b6\u1e56\u1e81\u1e57\u1e83\u1e60\u1ef3\u1e84\u1e85\u1e61\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u0174\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u1e6a\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u0176\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u0175\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u1e6b\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u0177\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["ISO-8859-14", "iso-8859-14", "iso8859-14", "iso885914"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-15.js b/intl/uconv/tests/unit/test_encode_8859-15.js new file mode 100644 index 0000000000..ed5cd2ec90 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-15.js @@ -0,0 +1,21 @@ +// Tests conversion from Unicode to ISO-8859-15 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u20ac\u00a5\u0160\u00a7\u0161\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u017d\u00b5\u00b6\u00b7\u017e\u00b9\u00ba\u00bb\u0152\u0153\u0178\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-15", + "iso-8859-15", + "iso8859-15", + "iso_8859-15", + "iso885915", + "csisolatin9", + "l9", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-2.js b/intl/uconv/tests/unit/test_encode_8859-2.js new file mode 100644 index 0000000000..1b34672bcb --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-2.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-2 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u02d8\u0141\u00a4\u013d\u015a\u00a7\u00a8\u0160\u015e\u0164\u0179\u00ad\u017d\u017b\u00b0\u0105\u02db\u0142\u00b4\u013e\u015b\u02c7\u00b8\u0161\u015f\u0165\u017a\u02dd\u017e\u017c\u0154\u00c1\u00c2\u0102\u00c4\u0139\u0106\u00c7\u010c\u00c9\u0118\u00cb\u011a\u00cd\u00ce\u010e\u0110\u0143\u0147\u00d3\u00d4\u0150\u00d6\u00d7\u0158\u016e\u00da\u0170\u00dc\u00dd\u0162\u00df\u0155\u00e1\u00e2\u0103\u00e4\u013a\u0107\u00e7\u010d\u00e9\u0119\u00eb\u011b\u00ed\u00ee\u010f\u0111\u0144\u0148\u00f3\u00f4\u0151\u00f6\u00f7\u0159\u016f\u00fa\u0171\u00fc\u00fd\u0163\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-2", + "iso-8859-2", + "latin2", + "iso_8859-2", + "iso8859-2", + "iso-ir-101", + "l2", + "csisolatin2", + "iso88592", + "iso_8859-2:1987", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-3.js b/intl/uconv/tests/unit/test_encode_8859-3.js new file mode 100644 index 0000000000..fff6243431 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-3.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-3 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0126\u02d8\u00a3\u00a4\u0124\u00a7\u00a8\u0130\u015e\u011e\u0134\u00ad\u017b\u00b0\u0127\u00b2\u00b3\u00b4\u00b5\u0125\u00b7\u00b8\u0131\u015f\u011f\u0135\u00bd\u017c\u00c0\u00c1\u00c2\u00c4\u010a\u0108\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d1\u00d2\u00d3\u00d4\u0120\u00d6\u00d7\u011c\u00d9\u00da\u00db\u00dc\u016c\u015c\u00df\u00e0\u00e1\u00e2\u00e4\u010b\u0109\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u0121\u00f6\u00f7\u011d\u00f9\u00fa\u00fb\u00fc\u016d\u015d\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbf\xc0\xc1\xc2\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-3", + "iso-8859-3", + "latin3", + "iso_8859-3", + "iso8859-3", + "iso-ir-109", + "l3", + "csisolatin3", + "iso88593", + "iso_8859-3:1988", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-4.js b/intl/uconv/tests/unit/test_encode_8859-4.js new file mode 100644 index 0000000000..192d13fd52 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-4.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-4 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u0138\u0156\u00a4\u0128\u013b\u00a7\u00a8\u0160\u0112\u0122\u0166\u00ad\u017d\u00af\u00b0\u0105\u02db\u0157\u00b4\u0129\u013c\u02c7\u00b8\u0161\u0113\u0123\u0167\u014a\u017e\u014b\u0100\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u012e\u010c\u00c9\u0118\u00cb\u0116\u00cd\u00ce\u012a\u0110\u0145\u014c\u0136\u00d4\u00d5\u00d6\u00d7\u00d8\u0172\u00da\u00db\u00dc\u0168\u016a\u00df\u0101\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u012f\u010d\u00e9\u0119\u00eb\u0117\u00ed\u00ee\u012b\u0111\u0146\u014d\u0137\u00f4\u00f5\u00f6\u00f7\u00f8\u0173\u00fa\u00fb\u00fc\u0169\u016b\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-4", + "iso-8859-4", + "latin4", + "iso_8859-4", + "iso8859-4", + "iso-ir-110", + "l4", + "csisolatin4", + "iso88594", + "iso_8859-4:1988", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-5.js b/intl/uconv/tests/unit/test_encode_8859-5.js new file mode 100644 index 0000000000..fb2a05a693 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-5.js @@ -0,0 +1,22 @@ +// Tests conversion from Unicode to ISO-8859-5 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0401\u0402\u0403\u0404\u0405\u0406\u0407\u0408\u0409\u040a\u040b\u040c\u00ad\u040e\u040f\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f\u2116\u0451\u0452\u0453\u0454\u0455\u0456\u0457\u0458\u0459\u045a\u045b\u045c\u00a7\u045e\u045f"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-5", + "iso-8859-5", + "iso_8859-5", + "iso8859-5", + "iso-ir-144", + "csisolatincyrillic", + "iso88595", + "iso_8859-5:1988", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-6.js b/intl/uconv/tests/unit/test_encode_8859-6.js new file mode 100644 index 0000000000..1768b89d82 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-6.js @@ -0,0 +1,25 @@ +// Tests conversion from Unicode to ISO-8859-6 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a4\u060c\u00ad\u061b\u061f\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b\u064c\u064d\u064e\u064f\u0650\u0651\u0652"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa4\xac\xad\xbb\xbf\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2"; + +const aliases = [ + "ISO-8859-6", + "iso-8859-6", + "iso_8859-6", + "iso8859-6", + "arabic", + "iso-ir-127", + "ecma-114", + "asmo-708", + "csisolatinarabic", + "iso88596", + "iso_8859-6:1987", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-7.js b/intl/uconv/tests/unit/test_encode_8859-7.js new file mode 100644 index 0000000000..3452130e74 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-7.js @@ -0,0 +1,27 @@ +// Tests conversion from Unicode to ISO-8859-7 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u2018\u2019\u00a3\u20ac\u20af\u00a6\u00a7\u00a8\u00a9\u037a\u00ab\u00ac\u00ad\u2015\u00b0\u00b1\u00b2\u00b3\u0384\u0385\u0386\u00b7\u0388\u0389\u038a\u00bb\u038c\u00bd\u038e\u038f\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039a\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1\u03a3\u03a4\u03a5\u03a6\u03a7\u03a8\u03a9\u03aa\u03ab\u03ac\u03ad\u03ae\u03af\u03b0\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03ca\u03cb\u03cc\u03cd\u03ce"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe"; + +const aliases = [ + "ISO-8859-7", + "iso-8859-7", + "greek", + "greek8", + "sun_eu_greek", + "iso_8859-7", + "iso8859-7", + "iso-ir-126", + "elot_928", + "ecma-118", + "csisolatingreek", + "iso88597", + "iso_8859-7:1987", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-8.js b/intl/uconv/tests/unit/test_encode_8859-8.js new file mode 100644 index 0000000000..12402dfb56 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-8.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-8 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00d7\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00f7\u00bb\u00bc\u00bd\u00be\u2017\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\u200e\u200f"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfd\xfe"; + +const aliases = [ + "ISO-8859-8", + "iso-8859-8", + "hebrew", + "visual", + "iso_8859-8", + "iso8859-8", + "iso-ir-138", + "csisolatinhebrew", + "iso88598", + "iso_8859-8:1988", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-9.js b/intl/uconv/tests/unit/test_encode_8859-9.js new file mode 100644 index 0000000000..7658ebe5ef --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-9.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-9 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u011e\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u0130\u015e\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u011f\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u0131\u015f\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-9", + "iso-8859-9", + "latin5", + "iso_8859-9", + "iso8859-9", + "iso-ir-148", + "l5", + "csisolatin5", + "iso88599", + "iso_8859-9:1989", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1250.js b/intl/uconv/tests/unit/test_encode_CP1250.js new file mode 100644 index 0000000000..5b5c0d2f0a --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1250.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1250 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0083\u201e\u2026\u2020\u2021\u0088\u2030\u0160\u2039\u015a\u0164\u017d\u0179\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u0161\u203a\u015b\u0165\u017e\u017a\u00a0\u02c7\u02d8\u0141\u00a4\u0104\u00a6\u00a7\u00a8\u00a9\u015e\u00ab\u00ac\u00ad\u00ae\u017b\u00b0\u00b1\u02db\u0142\u00b4\u00b5\u00b6\u00b7\u00b8\u0105\u015f\u00bb\u013d\u02dd\u013e\u017c\u0154\u00c1\u00c2\u0102\u00c4\u0139\u0106\u00c7\u010c\u00c9\u0118\u00cb\u011a\u00cd\u00ce\u010e\u0110\u0143\u0147\u00d3\u00d4\u0150\u00d6\u00d7\u0158\u016e\u00da\u0170\u00dc\u00dd\u0162\u00df\u0155\u00e1\u00e2\u0103\u00e4\u013a\u0107\u00e7\u010d\u00e9\u0119\u00eb\u011b\u00ed\u00ee\u010f\u0111\u0144\u0148\u00f3\u00f4\u0151\u00f6\u00f7\u0159\u016f\u00fa\u0171\u00fc\u00fd\u0163\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1250", "x-cp1250", "cp1250"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1251.js b/intl/uconv/tests/unit/test_encode_CP1251.js new file mode 100644 index 0000000000..52d15d8731 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1251.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1251 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0402\u0403\u201a\u0453\u201e\u2026\u2020\u2021\u20ac\u2030\u0409\u2039\u040a\u040c\u040b\u040f\u0452\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u0459\u203a\u045a\u045c\u045b\u045f\u00a0\u040e\u045e\u0408\u00a4\u0490\u00a6\u00a7\u0401\u00a9\u0404\u00ab\u00ac\u00ad\u00ae\u0407\u00b0\u00b1\u0406\u0456\u0491\u00b5\u00b6\u00b7\u0451\u2116\u0454\u00bb\u0458\u0405\u0455\u0457\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1251", "x-cp1251", "cp1251"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1252.js b/intl/uconv/tests/unit/test_encode_CP1252.js new file mode 100644 index 0000000000..2f99791408 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1252.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1252 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u017d\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u017e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1252", "x-cp1252", "cp1252"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1253.js b/intl/uconv/tests/unit/test_encode_CP1253.js new file mode 100644 index 0000000000..a7ba34cb9f --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1253.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1253 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u0088\u2030\u008a\u2039\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u009a\u203a\u009c\u009d\u009e\u009f\u00a0\u0385\u0386\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ac\u00ad\u00ae\u2015\u00b0\u00b1\u00b2\u00b3\u0384\u00b5\u00b6\u00b7\u0388\u0389\u038a\u00bb\u038c\u00bd\u038e\u038f\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039a\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1\u03a3\u03a4\u03a5\u03a6\u03a7\u03a8\u03a9\u03aa\u03ab\u03ac\u03ad\u03ae\u03af\u03b0\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03ca\u03cb\u03cc\u03cd\u03ce"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe"; + +const aliases = ["windows-1253", "x-cp1253", "cp1253"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1254.js b/intl/uconv/tests/unit/test_encode_CP1254.js new file mode 100644 index 0000000000..593a33841e --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1254.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1254 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u009e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u011e\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u0130\u015e\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u011f\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u0131\u015f\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1254", "x-cp1254", "cp1254"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1255.js b/intl/uconv/tests/unit/test_encode_CP1255.js new file mode 100644 index 0000000000..6da4cd53a2 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1255.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1255 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u008a\u2039\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u009a\u203a\u009c\u009d\u009e\u009f\u00a0\u00a1\u00a2\u00a3\u20aa\u00a5\u00a6\u00a7\u00a8\u00a9\u00d7\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00f7\u00bb\u00bc\u00bd\u00be\u00bf\u05b0\u05b1\u05b2\u05b3\u05b4\u05b5\u05b6\u05b7\u05b8\u05b9\u05ba\u05bb\u05bc\u05bd\u05be\u05bf\u05c0\u05c1\u05c2\u05c3\u05f0\u05f1\u05f2\u05f3\u05f4\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\u200e\u200f"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfd\xfe"; + +const aliases = ["windows-1255", "x-cp1255", "cp1255"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1256.js b/intl/uconv/tests/unit/test_encode_CP1256.js new file mode 100644 index 0000000000..c8fbbb6192 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1256.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1256 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u067e\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0679\u2039\u0152\u0686\u0698\u0688\u06af\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u06a9\u2122\u0691\u203a\u0153\u200c\u200d\u06ba\u00a0\u060c\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u06be\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u061b\u00bb\u00bc\u00bd\u00be\u061f\u06c1\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u00d7\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u00e0\u0644\u00e2\u0645\u0646\u0647\u0648\u00e7\u00e8\u00e9\u00ea\u00eb\u0649\u064a\u00ee\u00ef\u064b\u064c\u064d\u064e\u00f4\u064f\u0650\u00f7\u0651\u00f9\u0652\u00fb\u00fc\u200e\u200f\u06d2"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1256", "x-cp1256", "cp1256"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1257.js b/intl/uconv/tests/unit/test_encode_CP1257.js new file mode 100644 index 0000000000..d56241ef88 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1257.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1257 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0083\u201e\u2026\u2020\u2021\u0088\u2030\u008a\u2039\u008c\u00a8\u02c7\u00b8\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u009a\u203a\u009c\u00af\u02db\u009f\u00a0\u00a2\u00a3\u00a4\u00a6\u00a7\u00d8\u00a9\u0156\u00ab\u00ac\u00ad\u00ae\u00c6\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00f8\u00b9\u0157\u00bb\u00bc\u00bd\u00be\u00e6\u0104\u012e\u0100\u0106\u00c4\u00c5\u0118\u0112\u010c\u00c9\u0179\u0116\u0122\u0136\u012a\u013b\u0160\u0143\u0145\u00d3\u014c\u00d5\u00d6\u00d7\u0172\u0141\u015a\u016a\u00dc\u017b\u017d\u00df\u0105\u012f\u0101\u0107\u00e4\u00e5\u0119\u0113\u010d\u00e9\u017a\u0117\u0123\u0137\u012b\u013c\u0161\u0144\u0146\u00f3\u014d\u00f5\u00f6\u00f7\u0173\u0142\u015b\u016b\u00fc\u017c\u017e\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa2\xa3\xa4\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1257", "x-cp1257", "cp1257"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1258.js b/intl/uconv/tests/unit/test_encode_CP1258.js new file mode 100644 index 0000000000..e60a2f79ba --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1258.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1258 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u008a\u2039\u0152\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u009a\u203a\u0153\u009d\u009e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u0102\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u0300\u00cd\u00ce\u00cf\u0110\u00d1\u0309\u00d3\u00d4\u01a0\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u01af\u0303\u00df\u00e0\u00e1\u00e2\u0103\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u0301\u00ed\u00ee\u00ef\u0111\u00f1\u0323\u00f3\u00f4\u01a1\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u01b0\u20ab\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1258", "x-cp1258", "cp1258"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP874.js b/intl/uconv/tests/unit/test_encode_CP874.js new file mode 100644 index 0000000000..18158ef7e1 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP874.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-874 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u0082\u0083\u0084\u2026\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb"; + +const aliases = ["windows-874", "dos-874"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_gb18030.js b/intl/uconv/tests/unit/test_encode_gb18030.js new file mode 100644 index 0000000000..c080e67535 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_gb18030.js @@ -0,0 +1,15 @@ +// Tests conversion from Unicode to gb18030 +// This is a sniff test which doesn't cover the full gbk range: the test string +// includes only the ASCII range and the first 63 double byte characters + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20AC\u4E02\u4E04\u4E05\u4E06\u4E0F\u4E12\u4E17\u4E1F\u4E20\u4E21\u4E23\u4E26\u4E29\u4E2E\u4E2F\u4E31\u4E33\u4E35\u4E37\u4E3C\u4E40\u4E41\u4E42\u4E44\u4E46\u4E4A\u4E51\u4E55\u4E57\u4E5A\u4E5B\u4E62\u4E63\u4E64\u4E65\u4E67\u4E68\u4E6A\u4E6B\u4E6C\u4E6D\u4E6E\u4E6F\u4E72\u4E74\u4E75\u4E76\u4E77\u4E78\u4E79\u4E7A\u4E7B\u4E7C\u4E7D\u4E7F\u4E80\u4E81\u4E82\u4E83\u4E84\u4E85\u4E87\u4E8A\uFFFD\uE7C6\u1E3F\u01F9\uE7C9\u1E3E\uE7C7\u1E40"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa2\xe3\x81@\x81A\x81B\x81C\x81D\x81E\x81F\x81G\x81H\x81I\x81J\x81K\x81L\x81M\x81N\x81O\x81P\x81Q\x81R\x81S\x81T\x81U\x81V\x81W\x81X\x81Y\x81Z\x81[\x81\\\x81]\x81^\x81_\x81`\x81a\x81b\x81c\x81d\x81e\x81f\x81g\x81h\x81i\x81j\x81k\x81l\x81m\x81n\x81o\x81p\x81q\x81r\x81s\x81t\x81u\x81v\x81w\x81x\x81y\x81z\x81{\x81|\x81}\x81~\x84\x31\xa4\x37\xa8\xa0\xa8\xbc\xa8\xbf\xa8\xc1\x815\xf46\x815\xf47\x815\xf48"; + +const aliases = ["gb18030"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_gbk.js b/intl/uconv/tests/unit/test_encode_gbk.js new file mode 100644 index 0000000000..50763b9dab --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_gbk.js @@ -0,0 +1,15 @@ +// Tests conversion from Unicode to gbk +// This is a sniff test which doesn't cover the full gbk range: the test string +// includes only the ASCII range and the first 63 double byte characters + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20AC\u4E02\u4E04\u4E05\u4E06\u4E0F\u4E12\u4E17\u4E1F\u4E20\u4E21\u4E23\u4E26\u4E29\u4E2E\u4E2F\u4E31\u4E33\u4E35\u4E37\u4E3C\u4E40\u4E41\u4E42\u4E44\u4E46\u4E4A\u4E51\u4E55\u4E57\u4E5A\u4E5B\u4E62\u4E63\u4E64\u4E65\u4E67\u4E68\u4E6A\u4E6B\u4E6C\u4E6D\u4E6E\u4E6F\u4E72\u4E74\u4E75\u4E76\u4E77\u4E78\u4E79\u4E7A\u4E7B\u4E7C\u4E7D\u4E7F\u4E80\u4E81\u4E82\u4E83\u4E84\u4E85\u4E87\u4E8A\uFFFD"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81@\x81A\x81B\x81C\x81D\x81E\x81F\x81G\x81H\x81I\x81J\x81K\x81L\x81M\x81N\x81O\x81P\x81Q\x81R\x81S\x81T\x81U\x81V\x81W\x81X\x81Y\x81Z\x81[\x81\\\x81]\x81^\x81_\x81`\x81a\x81b\x81c\x81d\x81e\x81f\x81g\x81h\x81i\x81j\x81k\x81l\x81m\x81n\x81o\x81p\x81q\x81r\x81s\x81t\x81u\x81v\x81w\x81x\x81y\x81z\x81{\x81|\x81}\x81~?"; + +const aliases = ["gbk", "x-gbk"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_macintosh.js b/intl/uconv/tests/unit/test_encode_macintosh.js new file mode 100644 index 0000000000..949d0aaf7c --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_macintosh.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to x-mac-roman + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca\u00ff\u0178\u2044\u20ac\u2039\u203a\ufb01\ufb02\u2021\u00b7\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["csMacintosh", "mac", "macintosh", "x-mac-roman"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_x_mac_cyrillic.js b/intl/uconv/tests/unit/test_encode_x_mac_cyrillic.js new file mode 100644 index 0000000000..ac27c57cc4 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_x_mac_cyrillic.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to x-mac-cyrillic + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u2020\u00b0\u0490\u00a3\u00a7\u2022\u00b6\u0406\u00ae\u00a9\u2122\u0402\u0452\u2260\u0403\u0453\u221e\u00b1\u2264\u2265\u0456\u00b5\u0491\u0408\u0404\u0454\u0407\u0457\u0409\u0459\u040a\u045a\u0458\u0405\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u040b\u045b\u040c\u045c\u0455\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u201e\u040e\u045e\u040f\u045f\u2116\u0401\u0451\u044f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u20ac"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["x-mac-cyrillic", "x-mac-ukrainian"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_input_stream.js b/intl/uconv/tests/unit/test_input_stream.js new file mode 100644 index 0000000000..b33fb98356 --- /dev/null +++ b/intl/uconv/tests/unit/test_input_stream.js @@ -0,0 +1,41 @@ +var CC = Components.Constructor; +var converter = Cc[ + "@mozilla.org/intl/scriptableunicodeconverter" +].createInstance(Ci.nsIScriptableUnicodeConverter); +converter.charset = "UTF-8"; + +var SIS = CC( + "@mozilla.org/scriptableinputstream;1", + "nsIScriptableInputStream", + "init" +); + +function test_char(code) { + dump("test_char(0x" + code.toString(16) + ")\n"); + var original = String.fromCharCode(code); + var nativeStream = Cc["@mozilla.org/io/string-input-stream;1"].createInstance( + Ci.nsIStringInputStream + ); + nativeStream.setUTF8Data(original); + var stream = new SIS(nativeStream); + var utf8Result = stream.read(stream.available()); + stream.close(); + var result = converter.ConvertToUnicode(utf8Result); + Assert.equal(escape(original), escape(result)); +} + +function run_test() { + // This is not a very comprehensive test. + for (var i = 0x007f - 2; i <= 0x007f; i++) { + test_char(i); + } + for (i = 0x07ff - 2; i <= 0x07ff; i++) { + test_char(i); + } + for (i = 0x1000 - 2; i <= 0x1000 + 2; i++) { + test_char(i); + } + for (i = 0xe000; i <= 0xe000 + 2; i++) { + test_char(i); + } +} diff --git a/intl/uconv/tests/unit/test_unEscapeNonAsciiURI.js b/intl/uconv/tests/unit/test_unEscapeNonAsciiURI.js new file mode 100644 index 0000000000..f447959244 --- /dev/null +++ b/intl/uconv/tests/unit/test_unEscapeNonAsciiURI.js @@ -0,0 +1,58 @@ +// Tests for nsITextToSubURI.unEscapeNonAsciiURI +function run_test() { + // Tests whether nsTextToSubURI does UTF-16 unescaping (it shouldn't) + const testURI = "data:text/html,%FE%FF"; + Assert.equal( + Services.textToSubURI.unEscapeNonAsciiURI("UTF-16", testURI), + testURI + ); + + // Tests whether incomplete multibyte sequences throw. + const tests = [ + { + input: "http://example.com/?p=%E9", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?p=%E9%80", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?p=%E9%80%80", + expected: "http://example.com/?p=\u9000", + }, + { + input: "http://example.com/?p=%E9e", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?p=%E9%E9", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?name=M%FCller/", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?name=M%C3%BCller/", + expected: "http://example.com/?name=Müller/", + }, + ]; + + for (const t of tests) { + if (t.throws !== undefined) { + let thrown = undefined; + try { + Services.textToSubURI.unEscapeNonAsciiURI("UTF-8", t.input); + } catch (e) { + thrown = e.result; + } + Assert.equal(thrown, t.throws); + } else { + Assert.equal( + Services.textToSubURI.unEscapeNonAsciiURI("UTF-8", t.input), + t.expected + ); + } + } +} diff --git a/intl/uconv/tests/unit/test_unEscapeURIForUI.js b/intl/uconv/tests/unit/test_unEscapeURIForUI.js new file mode 100644 index 0000000000..7f2fb167cc --- /dev/null +++ b/intl/uconv/tests/unit/test_unEscapeURIForUI.js @@ -0,0 +1,23 @@ +// Tests for nsITextToSubURI.unEscapeURIForUI +function run_test() { + // Tests whether incomplete multibyte sequences throw. + const tests = [ + { + input: "http://example.com/?p=%E3%80%82", + //TODO: should be the same as input, bug 1248812 + expected: "http://example.com/?p=%u3002", + }, + { + input: "http://example.com/?name=%E3%80%82", + dontEscape: true, + expected: "http://example.com/?name=\u3002", + }, + ]; + + for (const t of tests) { + Assert.equal( + Services.textToSubURI.unEscapeURIForUI(t.input, t.dontEscape), + t.expected + ); + } +} diff --git a/intl/uconv/tests/unit/test_unmapped.js b/intl/uconv/tests/unit/test_unmapped.js new file mode 100644 index 0000000000..13ee13c20e --- /dev/null +++ b/intl/uconv/tests/unit/test_unmapped.js @@ -0,0 +1,86 @@ +// Tests encoding of unmapped characters +const inString = "\uE5E5"; +const expectedString = "?"; + +function run_test() { + var failures = false; + var encodingConverter = CreateScriptableConverter(); + + // this list excludes codepages that can represent all Unicode + var encoders = [ + "Big5", + "EUC-JP", + "EUC-KR", + "GBK", + "gb18030", + "IBM866", + "ISO-2022-JP", + "ISO-8859-3", + "ISO-8859-4", + "ISO-8859-5", + "ISO-8859-6", + "ISO-8859-7", + "ISO-8859-8", + "ISO-8859-8-I", + "ISO-8859-10", + "ISO-8859-13", + "ISO-8859-14", + "ISO-8859-15", + "ISO-8859-16", + "ISO-8859-2", + "KOI8-R", + "KOI8-U", + "Shift_JIS", + "windows-1250", + "windows-1251", + "windows-1252", + "windows-1253", + "windows-1254", + "windows-1255", + "windows-1256", + "windows-1257", + "windows-1258", + "windows-874", + "x-mac-cyrillic", + ]; + + var counter = 0; + while (counter < encoders.length) { + var charset = encoders[counter++]; + + dump("testing " + counter + " " + charset + "\n"); + encodingConverter.charset = charset; + var codepageString = + encodingConverter.ConvertFromUnicode(inString) + + encodingConverter.Finish(); + if (codepageString != expectedString) { + dump(charset + " encoding failed\n"); + for (var i = 0; i < expectedString.length; ++i) { + if (i >= codepageString.length) { + dump( + "output length " + + codepageString.length + + " less than expected length " + + expectedString.length + + "\n" + ); + break; + } + if (codepageString.charAt(i) != expectedString.charAt(i)) { + dump( + i.toString(16) + + ": 0x" + + codepageString.charCodeAt(i).toString(16) + + " != " + + expectedString.charCodeAt(i).toString(16) + + "\n" + ); + } + } + failures = true; + } + } + if (failures) { + do_throw("test failed\n"); + } +} diff --git a/intl/uconv/tests/unit/test_utf8_illegals.js b/intl/uconv/tests/unit/test_utf8_illegals.js new file mode 100644 index 0000000000..55aec6ab8c --- /dev/null +++ b/intl/uconv/tests/unit/test_utf8_illegals.js @@ -0,0 +1,164 @@ +// Tests illegal UTF-8 sequences + +var Cc = Components.Constructor; + +const { NetUtil } = ChromeUtils.importESModule( + "resource://gre/modules/NetUtil.sys.mjs" +); + +const tests = [ + { + inStrings: [ + "%80", // Illegal or incomplete sequences + "%8f", + "%90", + "%9f", + "%a0", + "%bf", + "%c0", + "%c1", + "%c2", + "%df", + "%e0", + "%e0%a0", + "%e0%bf", + "%ed%80", + "%ed%9f", + "%ef", + "%ef%bf", + "%f0", + "%f0%90", + "%f0%90%80", + "%f0%90%bf", + "%f0%bf", + "%f0%bf%80", + "%f0%bf%bf", + "%f4", + "%f4%80", + "%f4%80%80", + "%f4%80%bf", + "%f4%8f", + "%f4%8f%80", + "%f4%8f%bf", + "%f5", + "%f7", + "%f8", + "%fb", + "%fc", + "%fd", + ], + expected: "ABC\ufffdXYZ", + }, + + { + inStrings: [ + "%c0%af", // Illegal bytes in 2-octet + "%c1%af", + ], // sequences + expected: "ABC\ufffd\ufffdXYZ", + }, + + { + inStrings: [ + "%e0%80%80", // Illegal bytes in 3-octet + "%e0%80%af", // sequences + "%e0%9f%bf", + // long surrogates + "%ed%a0%80", // D800 + "%ed%ad%bf", // DB7F + "%ed%ae%80", // DB80 + "%ed%af%bf", // DBFF + "%ed%b0%80", // DC00 + "%ed%be%80", // DF80 + "%ed%bf%bf", + ], // DFFF + expected: "ABC\ufffd\ufffd\ufffdXYZ", + }, + + { + inStrings: [ + "%f0%80%80%80", // Illegal bytes in 4-octet + "%f0%80%80%af", // sequences + "%f0%8f%bf%bf", + "%f4%90%80%80", + "%f4%bf%bf%bf", + "%f5%80%80%80", + "%f7%bf%bf%bf", + ], + expected: "ABC\ufffd\ufffd\ufffd\ufffdXYZ", + }, + + { + inStrings: [ + "%f8%80%80%80%80", // Illegal bytes in 5-octet + "%f8%80%80%80%af", // sequences + "%fb%bf%bf%bf%bf", + ], + expected: "ABC\ufffd\ufffd\ufffd\ufffd\ufffdXYZ", + }, + + // Surrogate pairs + { + inStrings: [ + "%ed%a0%80%ed%b0%80", // D800 DC00 + "%ed%a0%80%ed%bf%bf", // D800 DFFF + "%ed%ad%bf%ed%b0%80", // DB7F DC00 + "%ed%ad%bf%ed%bf%bf", // DB7F DFFF + "%ed%ae%80%ed%b0%80", // DB80 DC00 + "%ed%ae%80%ed%bf%bf", // DB80 DFFF + "%ed%af%bf%ed%b0%80", // DBFF DC00 + "%ed%ad%bf%ed%bf%bf", // DBFF DFFF + "%fc%80%80%80%80%80", // Illegal bytes in 6-octet + "%fc%80%80%80%80%af", // sequences + "%fd%bf%bf%bf%bf%bf", + ], + expected: "ABC\ufffd\ufffd\ufffd\ufffd\ufffd\ufffdXYZ", + }, +]; + +function testCaseInputStream(inStr, expected) { + var dataURI = "data:text/plain; charset=UTF-8,ABC" + inStr + "XYZ"; + dump(inStr + "==>"); + + var ConverterInputStream = Cc( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" + ); + var channel = NetUtil.newChannel({ + uri: dataURI, + loadUsingSystemPrincipal: true, + }); + var testInputStream = channel.open(); + var testConverter = new ConverterInputStream( + testInputStream, + "UTF-8", + 16, + 0xfffd + ); + + if (!(testConverter instanceof Ci.nsIUnicharLineInputStream)) { + throw new Error("not line input stream"); + } + + var outStr = ""; + var more; + do { + // read the line and check for eof + var line = {}; + more = testConverter.readLine(line); + outStr += line.value; + } while (more); + + dump(outStr + "; expected=" + expected + "\n"); + Assert.equal(outStr, expected); + Assert.equal(outStr.length, expected.length); +} + +function run_test() { + for (var t of tests) { + for (var inStr of t.inStrings) { + testCaseInputStream(inStr, t.expected); + } + } +} diff --git a/intl/uconv/tests/unit/xpcshell.toml b/intl/uconv/tests/unit/xpcshell.toml new file mode 100644 index 0000000000..47362902ba --- /dev/null +++ b/intl/uconv/tests/unit/xpcshell.toml @@ -0,0 +1,175 @@ +[DEFAULT] +head = "head_charsetConversionTests.js" +support-files = [ + "data/unicode-conversion.utf16.txt", + "data/unicode-conversion.utf16be.txt", + "data/unicode-conversion.utf16le.txt", + "data/unicode-conversion.utf8.txt", +] + +["test_bug116882.js"] + +["test_bug317216.js"] + +["test_bug321379.js"] + +["test_bug340714.js"] + +["test_bug381412.Big5-HKSCS.js"] + +["test_bug381412.Big5.js"] + +["test_bug381412.euc-kr.js"] + +["test_bug381412.euc_jp.js"] + +["test_bug381412.gb2312.js"] + +["test_bug381412.js"] + +["test_bug396637.js"] + +["test_bug399257.js"] + +["test_bug457886.js"] + +["test_bug522931.js"] + +["test_bug563283.js"] + +["test_bug563618.js"] + +["test_bug601429.js"] + +["test_bug715319.dbcs.js"] + +["test_bug715319.euc_jp.js"] + +["test_bug715319.gb2312.js"] + +["test_charset_conversion.js"] + +["test_decode_8859-1.js"] + +["test_decode_8859-10.js"] + +["test_decode_8859-11.js"] + +["test_decode_8859-13.js"] + +["test_decode_8859-14.js"] + +["test_decode_8859-15.js"] + +["test_decode_8859-2.js"] + +["test_decode_8859-3.js"] + +["test_decode_8859-4.js"] + +["test_decode_8859-5.js"] + +["test_decode_8859-6.js"] + +["test_decode_8859-7.js"] + +["test_decode_8859-8.js"] + +["test_decode_8859-9.js"] + +["test_decode_CP1250.js"] + +["test_decode_CP1251.js"] + +["test_decode_CP1252.js"] + +["test_decode_CP1253.js"] + +["test_decode_CP1254.js"] + +["test_decode_CP1255.js"] + +["test_decode_CP1256.js"] + +["test_decode_CP1257.js"] + +["test_decode_CP1258.js"] + +["test_decode_CP874.js"] + +["test_decode_gb18030.js"] + +["test_decode_gbk.js"] + +["test_decode_macintosh.js"] + +["test_decode_x_mac_cyrillic.js"] + +["test_decode_x_mac_ukrainian.js"] + +["test_encode_8859-1.js"] + +["test_encode_8859-10.js"] + +["test_encode_8859-11.js"] + +["test_encode_8859-13.js"] + +["test_encode_8859-14.js"] + +["test_encode_8859-15.js"] + +["test_encode_8859-2.js"] + +["test_encode_8859-3.js"] + +["test_encode_8859-4.js"] + +["test_encode_8859-5.js"] + +["test_encode_8859-6.js"] + +["test_encode_8859-7.js"] + +["test_encode_8859-8.js"] + +["test_encode_8859-9.js"] + +["test_encode_CP1250.js"] + +["test_encode_CP1251.js"] + +["test_encode_CP1252.js"] + +["test_encode_CP1253.js"] + +["test_encode_CP1254.js"] + +["test_encode_CP1255.js"] + +["test_encode_CP1256.js"] + +["test_encode_CP1257.js"] + +["test_encode_CP1258.js"] + +["test_encode_CP874.js"] + +["test_encode_gb18030.js"] + +["test_encode_gbk.js"] + +["test_encode_macintosh.js"] + +["test_encode_x_mac_cyrillic.js"] + +["test_input_stream.js"] + +["test_unEscapeNonAsciiURI.js"] + +["test_unEscapeURIForUI.js"] + +["test_unmapped.js"] + +["test_utf8_illegals.js"] + |