From 2aa4a82499d4becd2284cdb482213d541b8804dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 28 Apr 2024 16:29:10 +0200 Subject: Adding upstream version 86.0.1. Signed-off-by: Daniel Baumann --- intl/uconv/components.conf | 34 ++ intl/uconv/crashtests/563618.html | 12 + intl/uconv/crashtests/crashtests.list | 1 + intl/uconv/directory.txt | 32 ++ intl/uconv/moz.build | 34 ++ intl/uconv/nsConverterInputStream.cpp | 240 +++++++++++++ intl/uconv/nsConverterInputStream.h | 64 ++++ intl/uconv/nsConverterOutputStream.cpp | 123 +++++++ intl/uconv/nsConverterOutputStream.h | 39 +++ intl/uconv/nsIScriptableUConv.idl | 79 +++++ intl/uconv/nsITextToSubURI.idl | 51 +++ intl/uconv/nsScriptableUConv.cpp | 261 ++++++++++++++ intl/uconv/nsScriptableUConv.h | 34 ++ intl/uconv/nsTextToSubURI.cpp | 167 +++++++++ intl/uconv/nsTextToSubURI.h | 30 ++ intl/uconv/tests/.eslintrc.js | 5 + intl/uconv/tests/mochitest.ini | 14 + intl/uconv/tests/moz.build | 9 + intl/uconv/tests/stressgb.pl | 23 ++ intl/uconv/tests/test_big5_encoder.html | 43 +++ intl/uconv/tests/test_bug335816.html | 40 +++ intl/uconv/tests/test_bug843434.html | 27 ++ intl/uconv/tests/test_bug959058-1.html | 28 ++ intl/uconv/tests/test_bug959058-2.html | 28 ++ intl/uconv/tests/test_long_doc.html | 98 ++++++ intl/uconv/tests/test_ncr_fallback.html | 74 ++++ .../tests/test_singlebyte_overconsumption.html | 33 ++ .../tests/test_unicode_noncharacterescapes.html | 303 +++++++++++++++++ .../tests/test_unicode_noncharacters_gb18030.html | 305 +++++++++++++++++ .../tests/test_unicode_noncharacters_utf8.html | 303 +++++++++++++++++ intl/uconv/tests/test_utf8_overconsumption.html | 39 +++ .../tests/unit/data/unicode-conversion.utf16.txt | Bin 0 -> 2814 bytes .../tests/unit/data/unicode-conversion.utf16be.txt | Bin 0 -> 2812 bytes .../tests/unit/data/unicode-conversion.utf16le.txt | Bin 0 -> 2812 bytes .../tests/unit/data/unicode-conversion.utf8.txt | 43 +++ .../tests/unit/head_charsetConversionTests.js | 112 +++++++ intl/uconv/tests/unit/test_bug116882.js | 11 + intl/uconv/tests/unit/test_bug317216.js | 168 ++++++++++ intl/uconv/tests/unit/test_bug321379.js | 35 ++ intl/uconv/tests/unit/test_bug340714.js | 127 +++++++ intl/uconv/tests/unit/test_bug381412.Big5-HKSCS.js | 64 ++++ intl/uconv/tests/unit/test_bug381412.Big5.js | 64 ++++ intl/uconv/tests/unit/test_bug381412.euc-kr.js | 64 ++++ intl/uconv/tests/unit/test_bug381412.euc_jp.js | 92 +++++ intl/uconv/tests/unit/test_bug381412.gb2312.js | 60 ++++ intl/uconv/tests/unit/test_bug381412.js | 60 ++++ intl/uconv/tests/unit/test_bug396637.js | 9 + intl/uconv/tests/unit/test_bug399257.js | 80 +++++ intl/uconv/tests/unit/test_bug457886.js | 12 + intl/uconv/tests/unit/test_bug522931.js | 4 + intl/uconv/tests/unit/test_bug563283.js | 53 +++ intl/uconv/tests/unit/test_bug563618.js | 97 ++++++ intl/uconv/tests/unit/test_bug601429.js | 84 +++++ intl/uconv/tests/unit/test_bug715319.dbcs.js | 56 ++++ intl/uconv/tests/unit/test_bug715319.euc_jp.js | 77 +++++ intl/uconv/tests/unit/test_bug715319.gb2312.js | 87 +++++ intl/uconv/tests/unit/test_charset_conversion.js | 373 +++++++++++++++++++++ intl/uconv/tests/unit/test_decode_8859-1.js | 26 ++ intl/uconv/tests/unit/test_decode_8859-10.js | 22 ++ intl/uconv/tests/unit/test_decode_8859-11.js | 13 + intl/uconv/tests/unit/test_decode_8859-13.js | 13 + intl/uconv/tests/unit/test_decode_8859-14.js | 13 + intl/uconv/tests/unit/test_decode_8859-15.js | 21 ++ intl/uconv/tests/unit/test_decode_8859-2.js | 24 ++ intl/uconv/tests/unit/test_decode_8859-3.js | 24 ++ intl/uconv/tests/unit/test_decode_8859-4.js | 24 ++ intl/uconv/tests/unit/test_decode_8859-5.js | 22 ++ intl/uconv/tests/unit/test_decode_8859-6.js | 25 ++ intl/uconv/tests/unit/test_decode_8859-7.js | 27 ++ intl/uconv/tests/unit/test_decode_8859-8.js | 24 ++ intl/uconv/tests/unit/test_decode_8859-9.js | 24 ++ intl/uconv/tests/unit/test_decode_CP1250.js | 13 + intl/uconv/tests/unit/test_decode_CP1251.js | 13 + intl/uconv/tests/unit/test_decode_CP1252.js | 13 + intl/uconv/tests/unit/test_decode_CP1253.js | 13 + intl/uconv/tests/unit/test_decode_CP1254.js | 13 + intl/uconv/tests/unit/test_decode_CP1255.js | 13 + intl/uconv/tests/unit/test_decode_CP1256.js | 13 + intl/uconv/tests/unit/test_decode_CP1257.js | 13 + intl/uconv/tests/unit/test_decode_CP1258.js | 13 + intl/uconv/tests/unit/test_decode_CP874.js | 13 + intl/uconv/tests/unit/test_decode_gb18030.js | 16 + intl/uconv/tests/unit/test_decode_gbk.js | 15 + intl/uconv/tests/unit/test_decode_macintosh.js | 13 + .../uconv/tests/unit/test_decode_x_mac_cyrillic.js | 13 + .../tests/unit/test_decode_x_mac_ukrainian.js | 13 + intl/uconv/tests/unit/test_encode_8859-1.js | 26 ++ intl/uconv/tests/unit/test_encode_8859-10.js | 22 ++ intl/uconv/tests/unit/test_encode_8859-11.js | 13 + intl/uconv/tests/unit/test_encode_8859-13.js | 13 + intl/uconv/tests/unit/test_encode_8859-14.js | 13 + intl/uconv/tests/unit/test_encode_8859-15.js | 21 ++ intl/uconv/tests/unit/test_encode_8859-2.js | 24 ++ intl/uconv/tests/unit/test_encode_8859-3.js | 24 ++ intl/uconv/tests/unit/test_encode_8859-4.js | 24 ++ intl/uconv/tests/unit/test_encode_8859-5.js | 22 ++ intl/uconv/tests/unit/test_encode_8859-6.js | 25 ++ intl/uconv/tests/unit/test_encode_8859-7.js | 27 ++ intl/uconv/tests/unit/test_encode_8859-8.js | 24 ++ intl/uconv/tests/unit/test_encode_8859-9.js | 24 ++ intl/uconv/tests/unit/test_encode_CP1250.js | 13 + intl/uconv/tests/unit/test_encode_CP1251.js | 13 + intl/uconv/tests/unit/test_encode_CP1252.js | 13 + intl/uconv/tests/unit/test_encode_CP1253.js | 13 + intl/uconv/tests/unit/test_encode_CP1254.js | 13 + intl/uconv/tests/unit/test_encode_CP1255.js | 13 + intl/uconv/tests/unit/test_encode_CP1256.js | 13 + intl/uconv/tests/unit/test_encode_CP1257.js | 13 + intl/uconv/tests/unit/test_encode_CP1258.js | 13 + intl/uconv/tests/unit/test_encode_CP874.js | 13 + intl/uconv/tests/unit/test_encode_gb18030.js | 15 + intl/uconv/tests/unit/test_encode_gbk.js | 15 + intl/uconv/tests/unit/test_encode_macintosh.js | 13 + .../uconv/tests/unit/test_encode_x_mac_cyrillic.js | 13 + intl/uconv/tests/unit/test_input_stream.js | 38 +++ intl/uconv/tests/unit/test_unEscapeNonAsciiURI.js | 58 ++++ intl/uconv/tests/unit/test_unmapped.js | 86 +++++ intl/uconv/tests/unit/test_utf8_illegals.js | 162 +++++++++ intl/uconv/tests/unit/xpcshell.ini | 90 +++++ 119 files changed, 5829 insertions(+) create mode 100644 intl/uconv/components.conf create mode 100644 intl/uconv/crashtests/563618.html create mode 100644 intl/uconv/crashtests/crashtests.list create mode 100644 intl/uconv/directory.txt create mode 100644 intl/uconv/moz.build create mode 100644 intl/uconv/nsConverterInputStream.cpp create mode 100644 intl/uconv/nsConverterInputStream.h create mode 100644 intl/uconv/nsConverterOutputStream.cpp create mode 100644 intl/uconv/nsConverterOutputStream.h create mode 100644 intl/uconv/nsIScriptableUConv.idl create mode 100644 intl/uconv/nsITextToSubURI.idl create mode 100644 intl/uconv/nsScriptableUConv.cpp create mode 100644 intl/uconv/nsScriptableUConv.h create mode 100644 intl/uconv/nsTextToSubURI.cpp create mode 100644 intl/uconv/nsTextToSubURI.h create mode 100644 intl/uconv/tests/.eslintrc.js create mode 100644 intl/uconv/tests/mochitest.ini create mode 100644 intl/uconv/tests/moz.build create mode 100644 intl/uconv/tests/stressgb.pl create mode 100644 intl/uconv/tests/test_big5_encoder.html create mode 100644 intl/uconv/tests/test_bug335816.html create mode 100644 intl/uconv/tests/test_bug843434.html create mode 100644 intl/uconv/tests/test_bug959058-1.html create mode 100644 intl/uconv/tests/test_bug959058-2.html create mode 100644 intl/uconv/tests/test_long_doc.html create mode 100644 intl/uconv/tests/test_ncr_fallback.html create mode 100644 intl/uconv/tests/test_singlebyte_overconsumption.html create mode 100644 intl/uconv/tests/test_unicode_noncharacterescapes.html create mode 100644 intl/uconv/tests/test_unicode_noncharacters_gb18030.html create mode 100644 intl/uconv/tests/test_unicode_noncharacters_utf8.html create mode 100644 intl/uconv/tests/test_utf8_overconsumption.html create mode 100644 intl/uconv/tests/unit/data/unicode-conversion.utf16.txt create mode 100644 intl/uconv/tests/unit/data/unicode-conversion.utf16be.txt create mode 100644 intl/uconv/tests/unit/data/unicode-conversion.utf16le.txt create mode 100644 intl/uconv/tests/unit/data/unicode-conversion.utf8.txt create mode 100644 intl/uconv/tests/unit/head_charsetConversionTests.js create mode 100644 intl/uconv/tests/unit/test_bug116882.js create mode 100644 intl/uconv/tests/unit/test_bug317216.js create mode 100644 intl/uconv/tests/unit/test_bug321379.js create mode 100644 intl/uconv/tests/unit/test_bug340714.js create mode 100644 intl/uconv/tests/unit/test_bug381412.Big5-HKSCS.js create mode 100644 intl/uconv/tests/unit/test_bug381412.Big5.js create mode 100644 intl/uconv/tests/unit/test_bug381412.euc-kr.js create mode 100644 intl/uconv/tests/unit/test_bug381412.euc_jp.js create mode 100644 intl/uconv/tests/unit/test_bug381412.gb2312.js create mode 100644 intl/uconv/tests/unit/test_bug381412.js create mode 100644 intl/uconv/tests/unit/test_bug396637.js create mode 100644 intl/uconv/tests/unit/test_bug399257.js create mode 100644 intl/uconv/tests/unit/test_bug457886.js create mode 100644 intl/uconv/tests/unit/test_bug522931.js create mode 100644 intl/uconv/tests/unit/test_bug563283.js create mode 100644 intl/uconv/tests/unit/test_bug563618.js create mode 100644 intl/uconv/tests/unit/test_bug601429.js create mode 100644 intl/uconv/tests/unit/test_bug715319.dbcs.js create mode 100644 intl/uconv/tests/unit/test_bug715319.euc_jp.js create mode 100644 intl/uconv/tests/unit/test_bug715319.gb2312.js create mode 100644 intl/uconv/tests/unit/test_charset_conversion.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-1.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-10.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-11.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-13.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-14.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-15.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-2.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-3.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-4.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-5.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-6.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-7.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-8.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-9.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1250.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1251.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1252.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1253.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1254.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1255.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1256.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1257.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1258.js create mode 100644 intl/uconv/tests/unit/test_decode_CP874.js create mode 100644 intl/uconv/tests/unit/test_decode_gb18030.js create mode 100644 intl/uconv/tests/unit/test_decode_gbk.js create mode 100644 intl/uconv/tests/unit/test_decode_macintosh.js create mode 100644 intl/uconv/tests/unit/test_decode_x_mac_cyrillic.js create mode 100644 intl/uconv/tests/unit/test_decode_x_mac_ukrainian.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-1.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-10.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-11.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-13.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-14.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-15.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-2.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-3.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-4.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-5.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-6.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-7.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-8.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-9.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1250.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1251.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1252.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1253.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1254.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1255.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1256.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1257.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1258.js create mode 100644 intl/uconv/tests/unit/test_encode_CP874.js create mode 100644 intl/uconv/tests/unit/test_encode_gb18030.js create mode 100644 intl/uconv/tests/unit/test_encode_gbk.js create mode 100644 intl/uconv/tests/unit/test_encode_macintosh.js create mode 100644 intl/uconv/tests/unit/test_encode_x_mac_cyrillic.js create mode 100644 intl/uconv/tests/unit/test_input_stream.js create mode 100644 intl/uconv/tests/unit/test_unEscapeNonAsciiURI.js create mode 100644 intl/uconv/tests/unit/test_unmapped.js create mode 100644 intl/uconv/tests/unit/test_utf8_illegals.js create mode 100644 intl/uconv/tests/unit/xpcshell.ini (limited to 'intl/uconv') diff --git a/intl/uconv/components.conf b/intl/uconv/components.conf new file mode 100644 index 0000000000..00686f661a --- /dev/null +++ b/intl/uconv/components.conf @@ -0,0 +1,34 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + 'cid': '{2bc2ad62-ad5d-4b7b-a9db-f74ae203c527}', + 'contract_ids': ['@mozilla.org/intl/converter-input-stream;1'], + 'type': 'nsConverterInputStream', + 'headers': ['nsConverterInputStream.h'], + }, + { + 'cid': '{ff8780a5-bbb1-4bc5-8ee7-057e7bc5c925}', + 'contract_ids': ['@mozilla.org/intl/converter-output-stream;1'], + 'type': 'nsConverterOutputStream', + 'headers': ['/intl/uconv/nsConverterOutputStream.h'], + }, + { + 'cid': '{0a698c44-3bff-11d4-9649-00c0ca135b4e}', + 'contract_ids': ['@mozilla.org/intl/scriptableunicodeconverter'], + 'type': 'nsScriptableUnicodeConverter', + 'headers': ['/intl/uconv/nsScriptableUConv.h'], + }, + { + 'js_name': 'textToSubURI', + 'cid': '{8b042e22-6f87-11d3-b3c8-00805f8a6670}', + 'contract_ids': ['@mozilla.org/intl/texttosuburi;1'], + 'interfaces': ['nsITextToSubURI'], + 'type': 'nsTextToSubURI', + 'headers': ['/intl/uconv/nsTextToSubURI.h'], + }, +] diff --git a/intl/uconv/crashtests/563618.html b/intl/uconv/crashtests/563618.html new file mode 100644 index 0000000000..e36b664762 --- /dev/null +++ b/intl/uconv/crashtests/563618.html @@ -0,0 +1,12 @@ + + + + + Serbian Glyph Test + + + +

+ + diff --git a/intl/uconv/crashtests/crashtests.list b/intl/uconv/crashtests/crashtests.list new file mode 100644 index 0000000000..5d92626324 --- /dev/null +++ b/intl/uconv/crashtests/crashtests.list @@ -0,0 +1 @@ +load 563618.html diff --git a/intl/uconv/directory.txt b/intl/uconv/directory.txt new file mode 100644 index 0000000000..2b6be7af7f --- /dev/null +++ b/intl/uconv/directory.txt @@ -0,0 +1,32 @@ +Directory Structure : +================================ + +idl - public .idl files +public - public header file +src - source directory of charset converter manager and utilities, and + charset converters for ISO-8859-1, CP1252, MacRoman and UTF-8 +tests - tests program and application for charset converter +tests/unit - xpcshell tests +tools - tools to build the tables used by the converters +util - utility functions used by the converters + +The following directories contain different charset converters: + +ucvcn - Simplified Chinese charsets - GB2312, HZ, ISO-2022-CN, GBK, GB18030 +ucvja - Japanese charsets - Shift-JIS, ISO-2022-JP, EUC-JP +ucvko - Korean charsets - ISO-2022-KR, EUC-KR, Johab +ucvlatin - Latin charsets and others - ISO-8859-x, CP1250-1258 + CP866, 874, KOI8, + Mac charsets, TIS620, UTF16 +ucvtw - Traditional Chinese charsets Set 1 - Big5 +ucvtw2 - Traditional Chinese charsets Set 2 - EUC-TW + +Within the directories containing charset converters: + +*.ut - tables used to convert to Unicode from a charset +*.uf - tables used to convert to a charset from Unicode + +The following directories are obsolete and should not be used: + +ucvth +ucvvt diff --git a/intl/uconv/moz.build b/intl/uconv/moz.build new file mode 100644 index 0000000000..acad06dfa1 --- /dev/null +++ b/intl/uconv/moz.build @@ -0,0 +1,34 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +TEST_DIRS += ["tests"] + +XPIDL_SOURCES += [ + "nsIScriptableUConv.idl", + "nsITextToSubURI.idl", +] + +XPIDL_MODULE = "uconv" + +EXPORTS += [ + "nsConverterInputStream.h", +] + +UNIFIED_SOURCES += [ + "nsConverterInputStream.cpp", + "nsConverterOutputStream.cpp", + "nsScriptableUConv.cpp", + "nsTextToSubURI.cpp", +] + +XPCOM_MANIFESTS += [ + "components.conf", +] + +FINAL_LIBRARY = "xul" + +if CONFIG["CC_TYPE"] in ("clang", "gcc"): + CXXFLAGS += ["-Wno-error=shadow"] diff --git a/intl/uconv/nsConverterInputStream.cpp b/intl/uconv/nsConverterInputStream.cpp new file mode 100644 index 0000000000..d581164eb0 --- /dev/null +++ b/intl/uconv/nsConverterInputStream.cpp @@ -0,0 +1,240 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsConverterInputStream.h" +#include "nsIInputStream.h" +#include "nsReadLine.h" +#include "nsStreamUtils.h" +#include +#include "mozilla/Unused.h" + +using namespace mozilla; + +#define CONVERTER_BUFFER_SIZE 8192 + +NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream, + nsIUnicharInputStream, nsIUnicharLineInputStream) + +NS_IMETHODIMP +nsConverterInputStream::Init(nsIInputStream* aStream, const char* aCharset, + int32_t aBufferSize, char16_t aReplacementChar) { + nsAutoCString label; + if (!aCharset) { + label.AssignLiteral("UTF-8"); + } else { + label = aCharset; + } + + auto encoding = Encoding::ForLabelNoReplacement(label); + if (!encoding) { + return NS_ERROR_UCONV_NOCONV; + } + // Previously, the implementation auto-switched only + // between the two UTF-16 variants and only when + // initialized with an endianness-unspecific label. + mConverter = encoding->NewDecoder(); + + size_t outputBufferSize; + if (aBufferSize <= 0) { + aBufferSize = CONVERTER_BUFFER_SIZE; + outputBufferSize = CONVERTER_BUFFER_SIZE; + } else { + // NetUtil.jsm assumes that if buffer size equals + // the input size, the whole stream will be processed + // as one readString. This is not true with encoding_rs, + // because encoding_rs might want to see space for a + // surrogate pair, so let's compute a larger output + // buffer length. + CheckedInt needed = mConverter->MaxUTF16BufferLength(aBufferSize); + if (!needed.isValid()) { + return NS_ERROR_OUT_OF_MEMORY; + } + outputBufferSize = needed.value(); + } + + // set up our buffers. + if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) || + !mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + mInput = aStream; + mErrorsAreFatal = !aReplacementChar; + return NS_OK; +} + +NS_IMETHODIMP +nsConverterInputStream::Close() { + nsresult rv = mInput ? mInput->Close() : NS_OK; + mLineBuffer = nullptr; + mInput = nullptr; + mConverter = nullptr; + mByteData.Clear(); + mUnicharData.Clear(); + return rv; +} + +NS_IMETHODIMP +nsConverterInputStream::Read(char16_t* aBuf, uint32_t aCount, + uint32_t* aReadCount) { + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; + if (0 == readCount) { + // Fill the unichar buffer + readCount = Fill(&mLastErrorCode); + if (readCount == 0) { + *aReadCount = 0; + return mLastErrorCode; + } + } + if (readCount > aCount) { + readCount = aCount; + } + memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset, + readCount * sizeof(char16_t)); + mUnicharDataOffset += readCount; + *aReadCount = readCount; + return NS_OK; +} + +NS_IMETHODIMP +nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, + void* aClosure, uint32_t aCount, + uint32_t* aReadCount) { + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset; + nsresult rv; + if (0 == bytesToWrite) { + // Fill the unichar buffer + bytesToWrite = Fill(&rv); + if (bytesToWrite <= 0) { + *aReadCount = 0; + return rv; + } + if (NS_FAILED(rv)) { + return rv; + } + } + + if (bytesToWrite > aCount) bytesToWrite = aCount; + + uint32_t bytesWritten; + uint32_t totalBytesWritten = 0; + + while (bytesToWrite) { + rv = aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset, + totalBytesWritten, bytesToWrite, &bytesWritten); + if (NS_FAILED(rv)) { + // don't propagate errors to the caller + break; + } + + bytesToWrite -= bytesWritten; + totalBytesWritten += bytesWritten; + mUnicharDataOffset += bytesWritten; + } + + *aReadCount = totalBytesWritten; + + return NS_OK; +} + +NS_IMETHODIMP +nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString, + uint32_t* aReadCount) { + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; + if (0 == readCount) { + // Fill the unichar buffer + readCount = Fill(&mLastErrorCode); + if (readCount == 0) { + *aReadCount = 0; + return mLastErrorCode; + } + } + if (readCount > aCount) { + readCount = aCount; + } + const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset; + aString.Assign(buf, readCount); + mUnicharDataOffset += readCount; + *aReadCount = readCount; + return NS_OK; +} + +uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) { + if (nullptr == mInput) { + // We already closed the stream! + *aErrorCode = NS_BASE_STREAM_CLOSED; + return 0; + } + + if (NS_FAILED(mLastErrorCode)) { + // We failed to completely convert last time, and error-recovery + // is disabled. We will fare no better this time, so... + *aErrorCode = mLastErrorCode; + return 0; + } + + // We assume a many to one conversion and are using equal sizes for + // the two buffers. However if an error happens at the very start + // of a byte buffer we may end up in a situation where n bytes lead + // to n+1 unicode chars. Thus we need to keep track of the leftover + // bytes as we convert. + + uint32_t nb; + *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb); + if (nb == 0 && mLeftOverBytes == 0) { + // No more data + *aErrorCode = NS_OK; + return 0; + } + + NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(), + "mByteData is lying to us somewhere"); + + // Now convert as much of the byte buffer to unicode as possible + auto src = AsBytes(Span(mByteData)); + auto dst = Span(mUnicharData); + // mUnicharData.Length() is the buffer length, not the fill status. + // mUnicharDataLength reflects the current fill status. + mUnicharDataLength = 0; + // Whenever we convert, mUnicharData is logically empty. + mUnicharDataOffset = 0; + // Truncation from size_t to uint32_t below is OK, because the sizes + // are bounded by the lengths of mByteData and mUnicharData. + uint32_t result; + size_t read; + size_t written; + bool hadErrors; + // The design of this class is fundamentally bogus in that trailing + // errors are ignored. Always passing false as the last argument to + // Decode* calls below. + if (mErrorsAreFatal) { + Tie(result, read, written) = + mConverter->DecodeToUTF16WithoutReplacement(src, dst, false); + } else { + Tie(result, read, written, hadErrors) = + mConverter->DecodeToUTF16(src, dst, false); + } + Unused << hadErrors; + mLeftOverBytes = mByteData.Length() - read; + mUnicharDataLength = written; + if (result == kInputEmpty || result == kOutputFull) { + *aErrorCode = NS_OK; + } else { + MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?"); + *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT; + } + return mUnicharDataLength; +} + +NS_IMETHODIMP +nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) { + if (!mLineBuffer) { + mLineBuffer = MakeUnique>(); + } + return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult); +} diff --git a/intl/uconv/nsConverterInputStream.h b/intl/uconv/nsConverterInputStream.h new file mode 100644 index 0000000000..55555fc679 --- /dev/null +++ b/intl/uconv/nsConverterInputStream.h @@ -0,0 +1,64 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsConverterInputStream_h +#define nsConverterInputStream_h + +#include "nsIInputStream.h" +#include "nsIConverterInputStream.h" +#include "nsIUnicharLineInputStream.h" +#include "nsTArray.h" +#include "nsCOMPtr.h" +#include "nsReadLine.h" +#include "mozilla/Encoding.h" +#include "mozilla/UniquePtr.h" + +#define NS_CONVERTERINPUTSTREAM_CONTRACTID \ + "@mozilla.org/intl/converter-input-stream;1" + +// {2BC2AD62-AD5D-4b7b-A9DB-F74AE203C527} +#define NS_CONVERTERINPUTSTREAM_CID \ + { \ + 0x2bc2ad62, 0xad5d, 0x4b7b, { \ + 0xa9, 0xdb, 0xf7, 0x4a, 0xe2, 0x3, 0xc5, 0x27 \ + } \ + } + +class nsConverterInputStream : public nsIConverterInputStream, + public nsIUnicharLineInputStream { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSIUNICHARINPUTSTREAM + NS_DECL_NSIUNICHARLINEINPUTSTREAM + NS_DECL_NSICONVERTERINPUTSTREAM + + nsConverterInputStream() + : mLastErrorCode(NS_OK), + mLeftOverBytes(0), + mUnicharDataOffset(0), + mUnicharDataLength(0), + mErrorsAreFatal(false), + mLineBuffer(nullptr) {} + + private: + virtual ~nsConverterInputStream() { Close(); } + + uint32_t Fill(nsresult* aErrorCode); + + mozilla::UniquePtr mConverter; + FallibleTArray mByteData; + FallibleTArray mUnicharData; + nsCOMPtr mInput; + + nsresult mLastErrorCode; + uint32_t mLeftOverBytes; + uint32_t mUnicharDataOffset; + uint32_t mUnicharDataLength; + bool mErrorsAreFatal; + + mozilla::UniquePtr > mLineBuffer; +}; + +#endif diff --git a/intl/uconv/nsConverterOutputStream.cpp b/intl/uconv/nsConverterOutputStream.cpp new file mode 100644 index 0000000000..7ad7acfda2 --- /dev/null +++ b/intl/uconv/nsConverterOutputStream.cpp @@ -0,0 +1,123 @@ +/* vim:set expandtab ts=4 sw=2 sts=2 cin: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsCOMPtr.h" + +#include "nsIOutputStream.h" +#include "nsString.h" + +#include "nsConverterOutputStream.h" +#include "mozilla/Encoding.h" +#include "mozilla/Unused.h" + +using namespace mozilla; + +NS_IMPL_ISUPPORTS(nsConverterOutputStream, nsIUnicharOutputStream, + nsIConverterOutputStream) + +nsConverterOutputStream::~nsConverterOutputStream() { Close(); } + +NS_IMETHODIMP +nsConverterOutputStream::Init(nsIOutputStream* aOutStream, + const char* aCharset) { + MOZ_ASSERT(aOutStream, "Null output stream!"); + + const Encoding* encoding; + if (!aCharset) { + encoding = UTF_8_ENCODING; + } else { + encoding = Encoding::ForLabelNoReplacement(MakeStringSpan(aCharset)); + if (!encoding || encoding == UTF_16LE_ENCODING || + encoding == UTF_16BE_ENCODING) { + return NS_ERROR_UCONV_NOCONV; + } + } + + mConverter = encoding->NewEncoder(); + + mOutStream = aOutStream; + + return NS_OK; +} + +NS_IMETHODIMP +nsConverterOutputStream::Write(uint32_t aCount, const char16_t* aChars, + bool* aSuccess) { + if (!mOutStream) { + NS_ASSERTION(!mConverter, "Closed streams shouldn't have converters"); + return NS_BASE_STREAM_CLOSED; + } + MOZ_ASSERT(mConverter, "Must have a converter when not closed"); + uint8_t buffer[4096]; + auto dst = Span(buffer); + auto src = Span(aChars, aCount); + for (;;) { + uint32_t result; + size_t read; + size_t written; + bool hadErrors; + Tie(result, read, written, hadErrors) = + mConverter->EncodeFromUTF16(src, dst, false); + Unused << hadErrors; + src = src.From(read); + uint32_t streamWritten; + nsresult rv = mOutStream->Write(reinterpret_cast(dst.Elements()), + written, &streamWritten); + *aSuccess = NS_SUCCEEDED(rv) && written == streamWritten; + if (!(*aSuccess)) { + return rv; + } + if (result == kInputEmpty) { + return NS_OK; + } + } +} + +NS_IMETHODIMP +nsConverterOutputStream::WriteString(const nsAString& aString, bool* aSuccess) { + int32_t inLen = aString.Length(); + nsAString::const_iterator i; + aString.BeginReading(i); + return Write(inLen, i.get(), aSuccess); +} + +NS_IMETHODIMP +nsConverterOutputStream::Flush() { + if (!mOutStream) return NS_OK; // Already closed. + + // If we are encoding to ISO-2022-JP, potentially + // transition back to the ASCII state. The buffer + // needs to be large enough for an additional NCR, + // though. + uint8_t buffer[12]; + auto dst = Span(buffer); + Span src(nullptr); + uint32_t result; + size_t read; + size_t written; + bool hadErrors; + Tie(result, read, written, hadErrors) = + mConverter->EncodeFromUTF16(src, dst, true); + Unused << hadErrors; + MOZ_ASSERT(result == kInputEmpty); + uint32_t streamWritten; + if (!written) { + return NS_OK; + } + return mOutStream->Write(reinterpret_cast(dst.Elements()), written, + &streamWritten); +} + +NS_IMETHODIMP +nsConverterOutputStream::Close() { + if (!mOutStream) return NS_OK; // Already closed. + + nsresult rv1 = Flush(); + + nsresult rv2 = mOutStream->Close(); + mOutStream = nullptr; + mConverter = nullptr; + return NS_FAILED(rv1) ? rv1 : rv2; +} diff --git a/intl/uconv/nsConverterOutputStream.h b/intl/uconv/nsConverterOutputStream.h new file mode 100644 index 0000000000..74b873acd5 --- /dev/null +++ b/intl/uconv/nsConverterOutputStream.h @@ -0,0 +1,39 @@ +/* vim:set expandtab ts=4 sw=2 sts=2 cin: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef NSCONVERTEROUTPUTSTREAM_H_ +#define NSCONVERTEROUTPUTSTREAM_H_ + +#include "nsIConverterOutputStream.h" +#include "nsCOMPtr.h" +#include "mozilla/Attributes.h" +#include "mozilla/Encoding.h" + +class nsIOutputStream; + +/* ff8780a5-bbb1-4bc5-8ee7-057e7bc5c925 */ +#define NS_CONVERTEROUTPUTSTREAM_CID \ + { \ + 0xff8780a5, 0xbbb1, 0x4bc5, { \ + 0x8e, 0xe7, 0x05, 0x7e, 0x7b, 0xc5, 0xc9, 0x25 \ + } \ + } + +class nsConverterOutputStream final : public nsIConverterOutputStream { + public: + nsConverterOutputStream() = default; + + NS_DECL_ISUPPORTS + NS_DECL_NSIUNICHAROUTPUTSTREAM + NS_DECL_NSICONVERTEROUTPUTSTREAM + + private: + ~nsConverterOutputStream(); + + mozilla::UniquePtr mConverter; + nsCOMPtr mOutStream; +}; + +#endif diff --git a/intl/uconv/nsIScriptableUConv.idl b/intl/uconv/nsIScriptableUConv.idl new file mode 100644 index 0000000000..f4557dce8b --- /dev/null +++ b/intl/uconv/nsIScriptableUConv.idl @@ -0,0 +1,79 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +interface nsIInputStream; + +%{C++ +// {0A698C44-3BFF-11d4-9649-00C0CA135B4E} +#define NS_ISCRIPTABLEUNICODECONVERTER_CID { 0x0A698C44, 0x3BFF, 0x11d4, { 0x96, 0x49, 0x00, 0xC0, 0xCA, 0x13, 0x5B, 0x4E } } +#define NS_ISCRIPTABLEUNICODECONVERTER_CONTRACTID "@mozilla.org/intl/scriptableunicodeconverter" +%} + +/** + * In new code, please use the WebIDL TextDecoder and TextEncoder + * instead. They represent bytes as Uint8Array (or as view to such + * array), which is the current best practice for representing bytes + * in JavaScript. + * + * This interface converts between UTF-16 in JavaScript strings + * and bytes transported as the unsigned value of each byte + * transported in a code unit of the same numeric value in + * a JavaScript string. + * + * @created 8/Jun/2000 + * @author Makoto Kato [m_kato@ga2.so-net.ne.jp] + */ +[scriptable, uuid(f36ee324-5c1c-437f-ba10-2b4db7a18031)] +interface nsIScriptableUnicodeConverter : nsISupports +{ + /** + * Converts the data from Unicode to one Charset. + * Returns the converted string. After converting, Finish should be called + * and its return value appended to this return value. + */ + ACString ConvertFromUnicode(in AString aSrc); + + /** + * Returns the terminator string. + * Should be called after ConvertFromUnicode() and appended to that + * function's return value. + */ + ACString Finish(); + + /** + * Converts the data from one Charset to Unicode. + */ + AString ConvertToUnicode(in ACString aSrc); + + /** + * Convert a unicode string to an array of bytes. Finish does not need to be + * called. + */ + void convertToByteArray(in AString aString, + [optional] out unsigned long aLen, + [array, size_is(aLen),retval] out octet aData); + + /** + * Converts a unicode string to an input stream. The bytes in the stream are + * encoded according to the charset attribute. + * The returned stream will be nonblocking. + */ + nsIInputStream convertToInputStream(in AString aString); + + /** + * Current character set. + * + * @throw NS_ERROR_UCONV_NOCONV + * The requested charset is not supported. + */ + attribute ACString charset; + + /** + * Meaningless + */ + attribute boolean isInternal; +}; diff --git a/intl/uconv/nsITextToSubURI.idl b/intl/uconv/nsITextToSubURI.idl new file mode 100644 index 0000000000..ce53121a29 --- /dev/null +++ b/intl/uconv/nsITextToSubURI.idl @@ -0,0 +1,51 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + + +%{C++ +// {8B042E22-6F87-11d3-B3C8-00805F8A6670} +#define NS_TEXTTOSUBURI_CID { 0x8b042e22, 0x6f87, 0x11d3, { 0xb3, 0xc8, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } } +#define NS_ITEXTTOSUBURI_CONTRACTID "@mozilla.org/intl/texttosuburi;1" +%} + +[scriptable, uuid(8B042E24-6F87-11d3-B3C8-00805F8A6670)] +interface nsITextToSubURI : nsISupports +{ + ACString ConvertAndEscape(in ACString charset, in AString text); + AString UnEscapeAndConvert(in ACString charset, in ACString text); + + /** + * Unescapes the given URI fragment (for UI purpose only) + * Note: + *
    + *
  • escaping back the result (unescaped string) is not guaranteed to + * give the original escaped string + *
  • The URI fragment (escaped) is assumed to be in UTF-8 and converted + * to AString (UTF-16) + *
  • In case of successful conversion any resulting character listed + * in netwerk/dns/IDNCharacterBlocklist.inc (except space) is escaped + *
  • Always succeeeds (callers don't need to do error checking) + *
+ * + * @param aURIFragment the URI (or URI fragment) to unescape + * @return Unescaped aURIFragment converted to unicode + */ + AString unEscapeURIForUI(in AUTF8String aURIFragment); + + /** + * Unescapes only non ASCII characters in the given URI fragment + * note: this method assumes the URI as UTF-8 and fallbacks to the given + * charset if the charset is an ASCII superset + * + * @param aCharset the charset to convert from + * @param aURIFragment the URI (or URI fragment) to unescape + * @return Unescaped aURIFragment converted to unicode + * @throws NS_ERROR_UCONV_NOCONV when there is no decoder for aCharset + * or NS_ERROR_UDEC_ILLEGALINPUT in case of conversion failure + */ + AString unEscapeNonAsciiURI(in ACString aCharset, in AUTF8String aURIFragment); +}; diff --git a/intl/uconv/nsScriptableUConv.cpp b/intl/uconv/nsScriptableUConv.cpp new file mode 100644 index 0000000000..942da4e7dc --- /dev/null +++ b/intl/uconv/nsScriptableUConv.cpp @@ -0,0 +1,261 @@ + +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" +#include "nsIScriptableUConv.h" +#include "nsScriptableUConv.h" +#include "nsIStringStream.h" +#include "nsComponentManagerUtils.h" + +using namespace mozilla; + +/* Implementation file */ +NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter) + +nsScriptableUnicodeConverter::nsScriptableUnicodeConverter() + : mIsInternal(false) {} + +nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default; + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc, + nsACString& _retval) { + if (!mEncoder) return NS_ERROR_FAILURE; + + // We can compute the length without replacement, because the + // the replacement is only one byte long and a mappable character + // would always output something, i.e. at least one byte. + // When encoding to ISO-2022-JP, unmappables shouldn't be able + // to cause more escape sequences to be emitted than the mappable + // worst case where every input character causes an escape into + // a different state. + CheckedInt needed = + mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length()); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + if (!_retval.SetLength(needed.value(), fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto src = Span(aSrc); + auto dst = AsWritableBytes(Span(_retval)); + size_t totalWritten = 0; + for (;;) { + uint32_t result; + size_t read; + size_t written; + Tie(result, read, written) = + mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false); + if (result != kInputEmpty && result != kOutputFull) { + MOZ_RELEASE_ASSERT(written < dst.Length(), + "Unmappables with one-byte replacement should not " + "exceed mappable worst case."); + dst[written++] = '?'; + } + totalWritten += written; + if (result == kInputEmpty) { + MOZ_ASSERT(totalWritten <= UINT32_MAX); + if (!_retval.SetLength(totalWritten, fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; + } + src = src.From(read); + dst = dst.From(written); + } +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::Finish(nsACString& _retval) { + // The documentation for this method says it should be called after + // ConvertFromUnicode(). However, our own tests called it after + // convertFromByteArray(), i.e. when *decoding*. + // Assuming that there exists extensions that similarly call + // this at the wrong time, let's deal. In general, it is a design + // error for this class to handle conversions in both directions. + if (!mEncoder) { + _retval.Truncate(); + mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); + return NS_OK; + } + // If we are encoding to ISO-2022-JP, potentially + // transition back to the ASCII state. The buffer + // needs to be large enough for an additional NCR, + // though. + _retval.SetLength(13); + Span src(nullptr); + uint32_t result; + size_t read; + size_t written; + bool hadErrors; + Tie(result, read, written, hadErrors) = + mEncoder->EncodeFromUTF16(src, _retval, true); + Unused << hadErrors; + MOZ_ASSERT(!read); + MOZ_ASSERT(result == kInputEmpty); + _retval.SetLength(written); + + mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); + mEncoder->Encoding()->NewEncoderInto(*mEncoder); + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, + nsAString& _retval) { + if (!mDecoder) return NS_ERROR_FAILURE; + + uint32_t length = aSrc.Length(); + + CheckedInt needed = mDecoder->MaxUTF16BufferLength(length); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + if (!_retval.SetLength(needed.value(), fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto src = + Span(reinterpret_cast(aSrc.BeginReading()), length); + uint32_t result; + size_t read; + size_t written; + bool hadErrors; + // The UTF-8 decoder used to throw regardless of the error behavior. + // Simulating the old behavior for compatibility with legacy callers. + // If callers want control over the behavior, they should switch to + // TextDecoder. + if (mDecoder->Encoding() == UTF_8_ENCODING) { + Tie(result, read, written) = + mDecoder->DecodeToUTF16WithoutReplacement(src, _retval, false); + if (result != kInputEmpty) { + return NS_ERROR_UDEC_ILLEGALINPUT; + } + } else { + Tie(result, read, written, hadErrors) = + mDecoder->DecodeToUTF16(src, _retval, false); + } + MOZ_ASSERT(result == kInputEmpty); + MOZ_ASSERT(read == length); + MOZ_ASSERT(written <= needed.value()); + Unused << hadErrors; + if (!_retval.SetLength(written, fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString, + uint32_t* aLen, + uint8_t** _aData) { + if (!mEncoder) return NS_ERROR_FAILURE; + + CheckedInt needed = + mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aString.Length()); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + uint8_t* data = (uint8_t*)malloc(needed.value()); + if (!data) { + return NS_ERROR_OUT_OF_MEMORY; + } + auto src = Span(aString); + auto dst = Span(data, needed.value()); + size_t totalWritten = 0; + for (;;) { + uint32_t result; + size_t read; + size_t written; + Tie(result, read, written) = + mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true); + if (result != kInputEmpty && result != kOutputFull) { + // There's always room for one byte in the case of + // an unmappable character, because otherwise + // we'd have gotten `kOutputFull`. + dst[written++] = '?'; + } + totalWritten += written; + if (result == kInputEmpty) { + *_aData = data; + MOZ_ASSERT(totalWritten <= UINT32_MAX); + *aLen = totalWritten; + return NS_OK; + } + src = src.From(read); + dst = dst.From(written); + } +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString, + nsIInputStream** _retval) { + nsresult rv; + nsCOMPtr inputStream = + do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv); + if (NS_FAILED(rv)) return rv; + + uint8_t* data; + uint32_t dataLen; + rv = ConvertToByteArray(aString, &dataLen, &data); + if (NS_FAILED(rv)) return rv; + + rv = inputStream->AdoptData(reinterpret_cast(data), dataLen); + if (NS_FAILED(rv)) { + free(data); + return rv; + } + + NS_ADDREF(*_retval = inputStream); + return rv; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) { + if (!mDecoder) { + aCharset.Truncate(); + } else { + mDecoder->Encoding()->Name(aCharset); + } + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) { + return InitConverter(aCharset); +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) { + *aIsInternal = mIsInternal; + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) { + mIsInternal = aIsInternal; + return NS_OK; +} + +nsresult nsScriptableUnicodeConverter::InitConverter( + const nsACString& aCharset) { + mEncoder = nullptr; + mDecoder = nullptr; + + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + return NS_ERROR_UCONV_NOCONV; + } + if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) { + mEncoder = encoding->NewEncoder(); + } + mDecoder = encoding->NewDecoderWithBOMRemoval(); + return NS_OK; +} diff --git a/intl/uconv/nsScriptableUConv.h b/intl/uconv/nsScriptableUConv.h new file mode 100644 index 0000000000..059a4b430c --- /dev/null +++ b/intl/uconv/nsScriptableUConv.h @@ -0,0 +1,34 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __nsScriptableUConv_h_ +#define __nsScriptableUConv_h_ + +#include "nsIScriptableUConv.h" +#include "nsCOMPtr.h" +#include "mozilla/Encoding.h" + +class nsScriptableUnicodeConverter : public nsIScriptableUnicodeConverter { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSISCRIPTABLEUNICODECONVERTER + + nsScriptableUnicodeConverter(); + + protected: + virtual ~nsScriptableUnicodeConverter(); + + mozilla::UniquePtr mEncoder; + mozilla::UniquePtr mDecoder; + bool mIsInternal; + + nsresult FinishWithLength(char** _retval, int32_t* aLength); + nsresult ConvertFromUnicodeWithLength(const nsAString& aSrc, int32_t* aOutLen, + char** _retval); + + nsresult InitConverter(const nsACString& aCharset); +}; + +#endif diff --git a/intl/uconv/nsTextToSubURI.cpp b/intl/uconv/nsTextToSubURI.cpp new file mode 100644 index 0000000000..b293a93ec5 --- /dev/null +++ b/intl/uconv/nsTextToSubURI.cpp @@ -0,0 +1,167 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "nsString.h" +#include "nsITextToSubURI.h" +#include "nsEscape.h" +#include "nsTextToSubURI.h" +#include "nsCRT.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/Encoding.h" +#include "mozilla/Preferences.h" +#include "mozilla/TextUtils.h" +#include "mozilla/Utf8.h" + +using namespace mozilla; + +nsTextToSubURI::~nsTextToSubURI() = default; + +NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI) + +NS_IMETHODIMP +nsTextToSubURI::ConvertAndEscape(const nsACString& aCharset, + const nsAString& aText, nsACString& aOut) { + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + aOut.Truncate(); + return NS_ERROR_UCONV_NOCONV; + } + nsresult rv; + const Encoding* actualEncoding; + nsAutoCString intermediate; + Tie(rv, actualEncoding) = encoding->Encode(aText, intermediate); + Unused << actualEncoding; + if (NS_FAILED(rv)) { + aOut.Truncate(); + return rv; + } + bool ok = NS_Escape(intermediate, aOut, url_XPAlphas); + if (!ok) { + aOut.Truncate(); + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; +} + +NS_IMETHODIMP +nsTextToSubURI::UnEscapeAndConvert(const nsACString& aCharset, + const nsACString& aText, nsAString& aOut) { + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + aOut.Truncate(); + return NS_ERROR_UCONV_NOCONV; + } + nsAutoCString unescaped(aText); + NS_UnescapeURL(unescaped); + auto rv = encoding->DecodeWithoutBOMHandling(unescaped, aOut); + if (NS_SUCCEEDED(rv)) { + return NS_OK; + } + return rv; +} + +static bool statefulCharset(const char* charset) { + // HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in + // mozilla-central but keeping them here just in case for the benefit of + // comm-central. + if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-") - 1) || + !nsCRT::strcasecmp(charset, "UTF-7") || + !nsCRT::strcasecmp(charset, "HZ-GB-2312")) + return true; + + return false; +} + +nsresult nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset, + const nsCString& aURI, + nsAString& aOut) { + // check for 7bit encoding the data may not be ASCII after we decode + bool isStatefulCharset = statefulCharset(aCharset.get()); + + if (!isStatefulCharset) { + if (IsAscii(aURI)) { + CopyASCIItoUTF16(aURI, aOut); + return NS_OK; + } + if (IsUtf8(aURI)) { + CopyUTF8toUTF16(aURI, aOut); + return NS_OK; + } + } + + // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8. + NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG); + + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + aOut.Truncate(); + return NS_ERROR_UCONV_NOCONV; + } + return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI, aOut); +} + +NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString& aURIFragment, + nsAString& _retval) { + nsAutoCString unescapedSpec; + // skip control octets (0x00 - 0x1f and 0x7f) when unescaping + NS_UnescapeURL(PromiseFlatCString(aURIFragment), + esc_SkipControl | esc_AlwaysCopy, unescapedSpec); + + // in case of failure, return escaped URI + // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte + // sequences are also considered failure in this context + if (convertURItoUnicode("UTF-8"_ns, unescapedSpec, _retval) != NS_OK) { + // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8 + CopyUTF8toUTF16(aURIFragment, _retval); + } + + // If there are any characters that are unsafe for URIs, reescape those. + if (mIDNBlocklist.IsEmpty()) { + mozilla::net::InitializeBlocklist(mIDNBlocklist); + // we allow SPACE and IDEOGRAPHIC SPACE in this method + mozilla::net::RemoveCharFromBlocklist(u' ', mIDNBlocklist); + mozilla::net::RemoveCharFromBlocklist(0x3000, mIDNBlocklist); + } + + MOZ_ASSERT(!mIDNBlocklist.IsEmpty()); + const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval); + nsString reescapedSpec; + _retval = NS_EscapeURL( + unescapedResult, + [&](char16_t aChar) -> bool { + return mozilla::net::CharInBlocklist(aChar, mIDNBlocklist); + }, + reescapedSpec); + + return NS_OK; +} + +NS_IMETHODIMP +nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset, + const nsACString& aURIFragment, + nsAString& _retval) { + nsAutoCString unescapedSpec; + NS_UnescapeURL(PromiseFlatCString(aURIFragment), + esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec); + // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII + // superset since converting "http:" with such an encoding is always a bad + // idea. + if (!IsUtf8(unescapedSpec) && + (aCharset.LowerCaseEqualsLiteral("utf-16") || + aCharset.LowerCaseEqualsLiteral("utf-16be") || + aCharset.LowerCaseEqualsLiteral("utf-16le") || + aCharset.LowerCaseEqualsLiteral("utf-7") || + aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))) { + CopyASCIItoUTF16(aURIFragment, _retval); + return NS_OK; + } + + nsresult rv = + convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, _retval); + // NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error + // if the string ends with a valid (but incomplete) sequence. + return rv == NS_OK_UDEC_MOREINPUT ? NS_ERROR_UDEC_ILLEGALINPUT : rv; +} + +//---------------------------------------------------------------------- diff --git a/intl/uconv/nsTextToSubURI.h b/intl/uconv/nsTextToSubURI.h new file mode 100644 index 0000000000..6b2b742888 --- /dev/null +++ b/intl/uconv/nsTextToSubURI.h @@ -0,0 +1,30 @@ +// -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- +// vim: set ts=2 et sw=2 tw=80: +// This Source Code is subject to the terms of the Mozilla Public License +// version 2.0 (the "License"). You can obtain a copy of the License at +// http://mozilla.org/MPL/2.0/. +#ifndef nsTextToSubURI_h__ +#define nsTextToSubURI_h__ + +#include "nsITextToSubURI.h" +#include "nsString.h" +#include "nsTArray.h" +#include "mozilla/net/IDNBlocklistUtils.h" + +class nsTextToSubURI : public nsITextToSubURI { + NS_DECL_ISUPPORTS + NS_DECL_NSITEXTTOSUBURI + + private: + virtual ~nsTextToSubURI(); + + // We assume that the URI is encoded as UTF-8. + nsresult convertURItoUnicode(const nsCString& aCharset, const nsCString& aURI, + nsAString& _retval); + + // Characters defined in netwerk/dns/IDNCharacterBlocklist.inc or via the + // network.IDN.extra_allowed_chars and network.IDN.extra_blocked_chars prefs. + nsTArray mIDNBlocklist; +}; + +#endif // nsTextToSubURI_h__ diff --git a/intl/uconv/tests/.eslintrc.js b/intl/uconv/tests/.eslintrc.js new file mode 100644 index 0000000000..845ed3f013 --- /dev/null +++ b/intl/uconv/tests/.eslintrc.js @@ -0,0 +1,5 @@ +"use strict"; + +module.exports = { + extends: ["plugin:mozilla/mochitest-test"], +}; diff --git a/intl/uconv/tests/mochitest.ini b/intl/uconv/tests/mochitest.ini new file mode 100644 index 0000000000..0f13e77971 --- /dev/null +++ b/intl/uconv/tests/mochitest.ini @@ -0,0 +1,14 @@ +[DEFAULT] + +[test_bug335816.html] +[test_bug843434.html] +[test_bug959058-1.html] +[test_bug959058-2.html] +[test_long_doc.html] +[test_singlebyte_overconsumption.html] +[test_unicode_noncharacterescapes.html] +[test_unicode_noncharacters_gb18030.html] +[test_unicode_noncharacters_utf8.html] +[test_utf8_overconsumption.html] +[test_big5_encoder.html] +[test_ncr_fallback.html] diff --git a/intl/uconv/tests/moz.build b/intl/uconv/tests/moz.build new file mode 100644 index 0000000000..4400a9849b --- /dev/null +++ b/intl/uconv/tests/moz.build @@ -0,0 +1,9 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +XPCSHELL_TESTS_MANIFESTS += ["unit/xpcshell.ini"] + +MOCHITEST_MANIFESTS += ["mochitest.ini"] diff --git a/intl/uconv/tests/stressgb.pl b/intl/uconv/tests/stressgb.pl new file mode 100644 index 0000000000..5b37fb63fb --- /dev/null +++ b/intl/uconv/tests/stressgb.pl @@ -0,0 +1,23 @@ +#!/usr/bin/perl +use LWP::Simple; +use IO::Handle; +$stdout = *STDOUT; +open(RES , ">resultlog.txt") || die "cannot open result log file"; +#system("rm alldiff.txt in*.txt out*.txt"); +for($i=10;$i<909;$i++) +{ + RES->printf("Test Page %d \n", $i); + $url = "http://people.netscape.com/ftang/testscript/gb18030/gbtext.cgi?page=" . $i; + RES->printf( "URL = %s\n", $url); + $tmpfile = "> in". $i . ".txt"; + open STDOUT, $tmpfile || RES->print("cannot open " . $tmpfile . "\n"); + getprint $url; + $cmd2 = "../../../dist/win32_d.obj/bin/nsconv -f GB18030 -t GB18030 in" . $i . ".txt out" . $i . ".txt >err"; + $cmd3 = "diff -u in" . $i . ".txt out" . $i . ".txt >> alldiff.txt"; + RES->printf( "Run '%s'\n", $cmd2); + $st2 = system($cmd2); + RES->printf( "result = '%d'\n", $st2); + RES->printf( "Run '%s'\n", $cmd3); + $st3 = system($cmd3); + RES->printf( "result = '%d'\n", $st3); +} diff --git a/intl/uconv/tests/test_big5_encoder.html b/intl/uconv/tests/test_big5_encoder.html new file mode 100644 index 0000000000..7e86683f00 --- /dev/null +++ b/intl/uconv/tests/test_big5_encoder.html @@ -0,0 +1,43 @@ + + + + + + Test for Unicode non-characters + + + + +
+
+
+Mozilla Bug 912470 +

+ + + diff --git a/intl/uconv/tests/test_singlebyte_overconsumption.html b/intl/uconv/tests/test_singlebyte_overconsumption.html new file mode 100644 index 0000000000..3aeeb928ec --- /dev/null +++ b/intl/uconv/tests/test_singlebyte_overconsumption.html @@ -0,0 +1,33 @@ + + + + + + Test for undefined codepoints + + + + +
+
+
+Mozilla Bug 564679 +

Evil.

+ + -> + + diff --git a/intl/uconv/tests/test_unicode_noncharacterescapes.html b/intl/uconv/tests/test_unicode_noncharacterescapes.html new file mode 100644 index 0000000000..e44f8d782b --- /dev/null +++ b/intl/uconv/tests/test_unicode_noncharacterescapes.html @@ -0,0 +1,303 @@ + + + + + + Test for Unicode non-characters + + + + +
+
+
+Mozilla Bug 445886 +

All good.

+ + + diff --git a/intl/uconv/tests/test_unicode_noncharacters_gb18030.html b/intl/uconv/tests/test_unicode_noncharacters_gb18030.html new file mode 100644 index 0000000000..0c9156d9e3 --- /dev/null +++ b/intl/uconv/tests/test_unicode_noncharacters_gb18030.html @@ -0,0 +1,305 @@ + + + + + + Test for Unicode non-characters + + + + +

+
+Mozilla Bug +445886 +

All good.

+ + + diff --git a/intl/uconv/tests/test_unicode_noncharacters_utf8.html b/intl/uconv/tests/test_unicode_noncharacters_utf8.html new file mode 100644 index 0000000000..ecfdbeae09 --- /dev/null +++ b/intl/uconv/tests/test_unicode_noncharacters_utf8.html @@ -0,0 +1,303 @@ + + + + + + Test for Unicode non-characters + + + + +
+
+
+Mozilla Bug 445886 +

All good.

+ + + diff --git a/intl/uconv/tests/test_utf8_overconsumption.html b/intl/uconv/tests/test_utf8_overconsumption.html new file mode 100644 index 0000000000..25c4a273ea --- /dev/null +++ b/intl/uconv/tests/test_utf8_overconsumption.html @@ -0,0 +1,39 @@ + + + + + + Test for Unicode non-characters + + + + +onload="Inject()"> +
+
+
+Mozilla Bug 445886 +

All good.

+ + + diff --git a/intl/uconv/tests/unit/data/unicode-conversion.utf16.txt b/intl/uconv/tests/unit/data/unicode-conversion.utf16.txt new file mode 100644 index 0000000000..8a28caadfc Binary files /dev/null and b/intl/uconv/tests/unit/data/unicode-conversion.utf16.txt differ diff --git a/intl/uconv/tests/unit/data/unicode-conversion.utf16be.txt b/intl/uconv/tests/unit/data/unicode-conversion.utf16be.txt new file mode 100644 index 0000000000..e1c4e86dba Binary files /dev/null and b/intl/uconv/tests/unit/data/unicode-conversion.utf16be.txt differ diff --git a/intl/uconv/tests/unit/data/unicode-conversion.utf16le.txt b/intl/uconv/tests/unit/data/unicode-conversion.utf16le.txt new file mode 100644 index 0000000000..cde8acb70b Binary files /dev/null and b/intl/uconv/tests/unit/data/unicode-conversion.utf16le.txt differ diff --git a/intl/uconv/tests/unit/data/unicode-conversion.utf8.txt b/intl/uconv/tests/unit/data/unicode-conversion.utf8.txt new file mode 100644 index 0000000000..b45dff35d0 --- /dev/null +++ b/intl/uconv/tests/unit/data/unicode-conversion.utf8.txt @@ -0,0 +1,43 @@ +This is a Unicode converter test file containing Unicode data. Its encoding is +determined by the second-to-last dot-separated component of the filename. For +example, if this file is named foo.utf8.txt, its encoding is UTF-8; if this file +is named foo.utf16le.txt, its encoding is UTF-16LE. This file is marked as +binary in Mozilla's version control system so that it's not accidentally +"mangled". + +The contents of each file must differ ONLY by encoding, so if you edit this file +you must edit all files with the name of this file (with the encoding-specific +part changed). + +== BEGIN UNICODE TEST DATA == + +== U+000000 -- U+00007F == + +BELL: "" +DATA LINK ESCAPE: "" +DELETE: "" + +== U+000080 -- U+0007FF == + +CONTROL: "€" +NO-BREAK SPACE: " " +POUND SIGN: "£" +YEN SIGN: "¥" +CURRENCY SIGN: "¢" +LATIN SMALL LETTER SCHWA: "ə" +LATIN LETTER BILABIAL PERCUSSIVE: "ʬ" + +== U+000800 -- U+00FFFF == + +BUGINESE LETTER TA: "ᨈ" +BUGINESE LETTER DA: "ᨉ" +AIRPLANE: "✈" +ZERO WIDTH NO-BREAK SPACE: "" + + +== U+010000 -- U+10FFFF == + +SHAVIAN LETTER IAN: "𐑾" +MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE: "𝅘𝅥𝅲" +CJK UNIFIED IDEOGRAPH-20000: "𠀀" +(private use U+10FEFF): "􏻿" diff --git a/intl/uconv/tests/unit/head_charsetConversionTests.js b/intl/uconv/tests/unit/head_charsetConversionTests.js new file mode 100644 index 0000000000..1f983f917c --- /dev/null +++ b/intl/uconv/tests/unit/head_charsetConversionTests.js @@ -0,0 +1,112 @@ +const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm"); + +var CC = Components.Constructor; + +function CreateScriptableConverter() { + var ScriptableUnicodeConverter = CC( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + return new ScriptableUnicodeConverter(); +} + +function checkDecode(converter, charset, inText, expectedText) { + try { + converter.charset = charset; + } catch (e) { + converter.charset = "iso-8859-1"; + } + + dump("testing decoding from " + charset + " to Unicode.\n"); + try { + var outText = converter.ConvertToUnicode(inText); + } catch (e) { + outText = "\ufffd"; + } + + if (outText != expectedText) { + for (var i = 0; i < inText.length; ++i) { + var inn = inText[i]; + var out = outText[i]; + var expected = expectedText[i]; + if (out != expected) { + dump( + "Decoding error at position " + + i + + ": for input " + + escape(inn) + + " expected " + + escape(expected) + + " but got " + + escape(out) + + "\n" + ); + } + } + } + Assert.equal(outText, expectedText); +} + +function checkEncode(converter, charset, inText, expectedText) { + try { + converter.charset = charset; + } catch (e) { + converter.charset = "iso-8859-1"; + } + + dump("testing encoding from Unicode to " + charset + "\n"); + var outText = converter.ConvertFromUnicode(inText) + converter.Finish(); + + if (outText != expectedText) { + for (var i = 0; i < inText.length; ++i) { + var inn = inText[i]; + var out = outText[i]; + var expected = expectedText[i]; + if (out != expected) { + dump( + "Encoding error at position " + + i + + ": for input " + + escape(inn) + + " expected " + + escape(expected) + + " but got " + + escape(out) + + "\n" + ); + } + } + } + Assert.equal(outText, expectedText); +} + +function testDecodeAliases(aliases, inString, expectedString) { + var converter = CreateScriptableConverter(); + for (var i = 0; i < aliases.length; ++i) { + checkDecode(converter, aliases[i], inString, expectedString); + } +} + +function testEncodeAliases(aliases, inString, expectedString) { + var converter = CreateScriptableConverter(); + for (var i = 0; i < aliases.length; ++i) { + checkEncode(converter, aliases[i], inString, expectedString); + } +} + +function testDecodeAliasesInternal(aliases, inString, expectedString) { + var converter = CreateScriptableConverter(); + converter.isInternal = true; + for (var i = 0; i < aliases.length; ++i) { + checkDecode(converter, aliases[i], inString, expectedString); + } +} + +function testEncodeAliasesInternal(aliases, inString, expectedString) { + var converter = CreateScriptableConverter(); + converter.isInternal = true; + for (var i = 0; i < aliases.length; ++i) { + checkEncode(converter, aliases[i], inString, expectedString); + } +} diff --git a/intl/uconv/tests/unit/test_bug116882.js b/intl/uconv/tests/unit/test_bug116882.js new file mode 100644 index 0000000000..5e76b30aa4 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug116882.js @@ -0,0 +1,11 @@ +/* Tests conversion of undefined and illegal sequences from Shift-JIS + * to Unicode (bug 116882) + */ + +const inText = "\xfd\xfe\xff\x81\x20\x81\x3f\x86\x3c"; +const expectedText = "\ufffd\ufffd\ufffd\ufffd \ufffd?\ufffd<"; +const charset = "Shift_JIS"; + +function run_test() { + checkDecode(CreateScriptableConverter(), charset, inText, expectedText); +} diff --git a/intl/uconv/tests/unit/test_bug317216.js b/intl/uconv/tests/unit/test_bug317216.js new file mode 100644 index 0000000000..507e492bbe --- /dev/null +++ b/intl/uconv/tests/unit/test_bug317216.js @@ -0,0 +1,168 @@ +/* Test case for bug 317216 + * + * Uses nsIConverterInputStream to decode UTF-16 text with valid surrogate + * pairs and lone surrogate characters + * + * Sample text is: "A" in Mathematical Bold Capitals (U+1D400) + * + * The test uses buffers of 4 different lengths to test end of buffer in mid- + * UTF16 character and mid-surrogate pair + */ + +const { NetUtil } = ChromeUtils.import("resource://gre/modules/NetUtil.jsm"); + +const test = [ + // 0: Valid surrogate pair + [ + "%D8%35%DC%20%00%2D%00%2D", + // expected: surrogate pair + "\uD835\uDC20--", + ], + // 1: Lone high surrogate + [ + "%D8%35%00%2D%00%2D", + // expected: one replacement char + "\uFFFD--", + ], + // 2: Lone low surrogate + [ + "%DC%20%00%2D%00%2D", + // expected: one replacement char + "\uFFFD--", + ], + // 3: Two high surrogates + [ + "%D8%35%D8%35%00%2D%00%2D", + // expected: two replacement chars + "\uFFFD\uFFFD--", + ], + // 4: Two low surrogates + [ + "%DC%20%DC%20%00%2D%00%2D", + // expected: two replacement chars + "\uFFFD\uFFFD--", + ], + // 5: Low surrogate followed by high surrogate + [ + "%DC%20%D8%35%00%2D%00%2D", + // expected: two replacement chars + "\uFFFD\uFFFD--", + ], + // 6: Lone high surrogate followed by valid surrogate pair + [ + "%D8%35%D8%35%DC%20%00%2D%00%2D", + // expected: replacement char followed by surrogate pair + "\uFFFD\uD835\uDC20--", + ], + // 7: Lone low surrogate followed by valid surrogate pair + [ + "%DC%20%D8%35%DC%20%00%2D%00%2D", + // expected: replacement char followed by surrogate pair + "\uFFFD\uD835\uDC20--", + ], + // 8: Valid surrogate pair followed by lone high surrogate + [ + "%D8%35%DC%20%D8%35%00%2D%00%2D", + // expected: surrogate pair followed by replacement char + "\uD835\uDC20\uFFFD--", + ], + // 9: Valid surrogate pair followed by lone low surrogate + [ + "%D8%35%DC%20%DC%20%00%2D%00%2D", + // expected: surrogate pair followed by replacement char + "\uD835\uDC20\uFFFD--", + ], + // 10: Lone high surrogate at the end of the input + [ + "%D8%35%", + // expected: nothing + "", + ], + // 11: Half code unit at the end of the input + [ + "%D8", + // expected: nothing + "", + ], +]; + +const IOService = Components.Constructor( + "@mozilla.org/network/io-service;1", + "nsIIOService" +); +const ConverterInputStream = Components.Constructor( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" +); +const ios = new IOService(); + +function testCase(testText, expectedText, bufferLength, charset) { + var dataURI = "data:text/plain;charset=" + charset + "," + testText; + var channel = NetUtil.newChannel({ + uri: dataURI, + loadUsingSystemPrincipal: true, + }); + var testInputStream = channel.open(); + var testConverter = new ConverterInputStream( + testInputStream, + charset, + bufferLength, + 0xfffd + ); + + if (!(testConverter instanceof Ci.nsIUnicharLineInputStream)) { + throw new Error("not line input stream"); + } + + var outStr = ""; + var more; + do { + // read the line and check for eof + var line = {}; + more = testConverter.readLine(line); + outStr += line.value; + } while (more); + + // escape the strings before comparing for better readability + Assert.equal(escape(outStr), escape(expectedText)); +} + +// Add 32 dummy characters to the test text to work around the minimum buffer +// size of an ns*Buffer +const MINIMUM_BUFFER_SIZE = 32; +function padBytes(str) { + var padding = ""; + for (var i = 0; i < MINIMUM_BUFFER_SIZE; ++i) { + padding += "%00%2D"; + } + return padding + str; +} + +function padUnichars(str) { + var padding = ""; + for (var i = 0; i < MINIMUM_BUFFER_SIZE; ++i) { + padding += "-"; + } + return padding + str; +} + +// Byte-swap %-encoded utf-16 +function flip(str) { + return str.replace(/(%..)(%..)/g, "$2$1"); +} + +function run_test() { + for (var i = 0; i < 12; ++i) { + for ( + var bufferLength = MINIMUM_BUFFER_SIZE; + bufferLength < MINIMUM_BUFFER_SIZE + 4; + ++bufferLength + ) { + var testText = padBytes(test[i][0]); + var expectedText = padUnichars(test[i][1]); + testCase(testText, expectedText, bufferLength, "UTF-16BE"); + testCase(flip(testText), expectedText, bufferLength, "UTF-16LE"); + } + } +} diff --git a/intl/uconv/tests/unit/test_bug321379.js b/intl/uconv/tests/unit/test_bug321379.js new file mode 100644 index 0000000000..338f59688e --- /dev/null +++ b/intl/uconv/tests/unit/test_bug321379.js @@ -0,0 +1,35 @@ +// Tests that calling close on a converter in/output stream doesn't crash +// (bug 321379) + +function run_test() { + var StorageStream = Components.Constructor( + "@mozilla.org/storagestream;1", + "nsIStorageStream", + "init" + ); + var ConverterInputStream = Components.Constructor( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" + ); + var ConverterOutputStream = Components.Constructor( + "@mozilla.org/intl/converter-output-stream;1", + "nsIConverterOutputStream", + "init" + ); + + var storage = new StorageStream(1024, -1, null); + + // Output + var outStr = storage.getOutputStream(0); + var out = new ConverterOutputStream(outStr, "UTF-8"); + out.writeString("Foo."); + out.close(); + out.close(); // This line should not crash. It should just do nothing. + + // Input + var inStr = storage.newInputStream(0); + var inp = new ConverterInputStream(inStr, "UTF-8", 1024, 0xfffd); + inp.close(); + inp.close(); // This line should not crash. It should just do nothing. +} diff --git a/intl/uconv/tests/unit/test_bug340714.js b/intl/uconv/tests/unit/test_bug340714.js new file mode 100644 index 0000000000..feba35c37f --- /dev/null +++ b/intl/uconv/tests/unit/test_bug340714.js @@ -0,0 +1,127 @@ +/* Test case for bug 340714 + * + * Uses nsIConverterInputStream to decode UTF-16 text with all combinations + * of UTF-16BE and UTF-16LE with and without BOM. + * + * Sample text is: "Все счастливые семьи похожи друг на друга, каждая несчастливая семья несчастлива по-своему." + * + * The enclosing quotation marks are included in the sample text to test that + * UTF-16LE is recognized even when there is no BOM and the UTF-16LE decoder is + * not explicitly called. This only works when the first character of the text + * is an eight-bit character. + */ + +const { NetUtil } = ChromeUtils.import("resource://gre/modules/NetUtil.jsm"); + +const beBOM = "%FE%FF"; +const leBOM = "%FF%FE"; +const sampleUTF16BE = + "%00%22%04%12%04%41%04%35%00%20%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%4B%04%35%00%20%04%41%04%35%04%3C%04%4C%04%38%00%20%04%3F%04%3E%04%45%04%3E%04%36%04%38%00%20%04%34%04%40%04%43%04%33%00%20%04%3D%04%30%00%20%04%34%04%40%04%43%04%33%04%30%00%2C%00%20%04%3A%04%30%04%36%04%34%04%30%04%4F%00%20%04%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%4F%00%20%04%41%04%35%04%3C%04%4C%04%4F%00%20%04%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%00%20%04%3F%04%3E%00%2D%04%41%04%32%04%3E%04%35%04%3C%04%43%00%2E%00%22"; +const sampleUTF16LE = + "%22%00%12%04%41%04%35%04%20%00%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%4B%04%35%04%20%00%41%04%35%04%3C%04%4C%04%38%04%20%00%3F%04%3E%04%45%04%3E%04%36%04%38%04%20%00%34%04%40%04%43%04%33%04%20%00%3D%04%30%04%20%00%34%04%40%04%43%04%33%04%30%04%2C%00%20%00%3A%04%30%04%36%04%34%04%30%04%4F%04%20%00%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%4F%04%20%00%41%04%35%04%3C%04%4C%04%4F%04%20%00%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%20%00%3F%04%3E%04%2D%00%41%04%32%04%3E%04%35%04%3C%04%43%04%2E%00%22%00"; +const expected = + '"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443."'; + +Services.prefs.setBoolPref("security.allow_eval_with_system_principal", true); +registerCleanupFunction(() => { + Services.prefs.clearUserPref("security.allow_eval_with_system_principal"); +}); + +function makeText(withBOM, charset) { + // eslint-disable-next-line no-eval + var theText = eval("sample" + charset); + if (withBOM) { + if (charset == "UTF16BE") { + theText = beBOM + theText; + } else { + theText = leBOM + theText; + } + } + return theText; +} + +function testCase(withBOM, charset, charsetDec, decoder, bufferLength) { + var dataURI = + "data:text/plain;charset=" + charsetDec + "," + makeText(withBOM, charset); + + var ConverterInputStream = Components.Constructor( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" + ); + + var channel = NetUtil.newChannel({ + uri: dataURI, + loadUsingSystemPrincipal: true, + }); + var testInputStream = channel.open(); + var testConverter = new ConverterInputStream( + testInputStream, + decoder, + bufferLength, + 0xfffd + ); + + if (!(testConverter instanceof Ci.nsIUnicharLineInputStream)) { + throw new Error("not line input stream"); + } + + var outStr = ""; + var more; + do { + // read the line and check for eof + var line = {}; + more = testConverter.readLine(line); + outStr += line.value; + } while (more); + + if (outStr != expected) { + dump( + "Failed with BOM = " + + withBOM + + "; charset = " + + charset + + "; charset declaration = " + + charsetDec + + "; decoder = " + + decoder + + "; bufferLength = " + + bufferLength + + "\n" + ); + if (outStr.length == expected.length) { + for (let i = 0; i < outStr.length; ++i) { + if (outStr.charCodeAt(i) != expected.charCodeAt(i)) { + dump( + i + + ": " + + outStr.charCodeAt(i).toString(16) + + " != " + + expected.charCodeAt(i).toString(16) + + "\n" + ); + } + } + } + } + + // escape the strings before comparing for better readability + Assert.equal(escape(outStr), escape(expected)); +} + +function run_test() { + /* BOM charset charset decoder buffer + declaration length */ + testCase(true, "UTF16LE", "UTF-16", "UTF-16BE", 64); + testCase(true, "UTF16BE", "UTF-16", "UTF-16LE", 64); + testCase(true, "UTF16LE", "UTF-16", "UTF-16LE", 64); + testCase(true, "UTF16BE", "UTF-16", "UTF-16BE", 64); + testCase(false, "UTF16LE", "UTF-16", "UTF-16LE", 64); + testCase(false, "UTF16BE", "UTF-16", "UTF-16BE", 64); + testCase(true, "UTF16LE", "UTF-16", "UTF-16BE", 65); + testCase(true, "UTF16BE", "UTF-16", "UTF-16LE", 65); + testCase(true, "UTF16LE", "UTF-16", "UTF-16LE", 65); + testCase(true, "UTF16BE", "UTF-16", "UTF-16BE", 65); + testCase(false, "UTF16LE", "UTF-16", "UTF-16LE", 65); + testCase(false, "UTF16BE", "UTF-16", "UTF-16BE", 65); +} diff --git a/intl/uconv/tests/unit/test_bug381412.Big5-HKSCS.js b/intl/uconv/tests/unit/test_bug381412.Big5-HKSCS.js new file mode 100644 index 0000000000..4108dc1090 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.Big5-HKSCS.js @@ -0,0 +1,64 @@ +const charset = "Big5-HKSCS"; + +function dumpStrings(inString, outString) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); +} + +function error(inString, outString, msg) { + dumpStrings(inString, outString); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.Big5.js b/intl/uconv/tests/unit/test_bug381412.Big5.js new file mode 100644 index 0000000000..45c8bafc83 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.Big5.js @@ -0,0 +1,64 @@ +const charset = "Big5"; + +function dumpStrings(inString, outString) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); +} + +function error(inString, outString, msg) { + dumpStrings(inString, outString); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.euc-kr.js b/intl/uconv/tests/unit/test_bug381412.euc-kr.js new file mode 100644 index 0000000000..58d36c76f4 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.euc-kr.js @@ -0,0 +1,64 @@ +const charset = "EUC-KR"; + +function dumpStrings(inString, outString) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); +} + +function error(inString, outString, msg) { + dumpStrings(inString, outString); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.euc_jp.js b/intl/uconv/tests/unit/test_bug381412.euc_jp.js new file mode 100644 index 0000000000..7e07eb9e69 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.euc_jp.js @@ -0,0 +1,92 @@ +const charset = "EUC-JP"; +const ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" +); +var gConverter; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function test(inString) { + var outString = gConverter.ConvertToUnicode(inString) + gConverter.Finish(); + + switch (outString.length) { + case 0: + case 1: + case 2: + error(inString, outString, "Unexpected error"); + break; + case 3: + error(inString, outString, "3 byte sequence eaten"); + break; + case 4: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 && + outString.charCodeAt(2) < 0x80 && + outString.charCodeAt(3) < 0x80 + ) { + error(inString, outString, "3 byte sequence converted to 1 ASCII"); + } + break; + case 5: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 && + outString.charCodeAt(2) < 0x80 && + outString.charCodeAt(3) < 0x80 && + outString.charCodeAt(4) < 0x80 + ) { + error(inString, outString, "3 byte sequence converted to 2 ASCII"); + } + break; + case 6: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 && + outString.charCodeAt(2) < 0x80 && + outString.charCodeAt(3) < 0x80 && + outString.charCodeAt(4) < 0x80 && + outString.charCodeAt(5) < 0x80 + ) { + error(inString, outString, "3 byte sequence converted to 3 ASCII"); + } + break; + } +} + +function run_test() { + gConverter = new ScriptableUnicodeConverter(); + gConverter.charset = charset; + + var byte1, byte2, byte3; + for (byte1 = 1; byte1 < 0x100; ++byte1) { + for (byte2 = 1; byte2 < 0x100; ++byte2) { + if (byte1 == 0x8f) { + for (byte3 = 1; byte3 < 0x100; ++byte3) { + test(String.fromCharCode(byte1, byte2, byte3) + "foo"); + } + } else { + test(String.fromCharCode(byte1, byte2) + " foo"); + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.gb2312.js b/intl/uconv/tests/unit/test_bug381412.gb2312.js new file mode 100644 index 0000000000..df680dadae --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.gb2312.js @@ -0,0 +1,60 @@ +const charset = "GB2312"; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.js b/intl/uconv/tests/unit/test_bug381412.js new file mode 100644 index 0000000000..89849bf5e6 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.js @@ -0,0 +1,60 @@ +const charset = "Shift_JIS"; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug396637.js b/intl/uconv/tests/unit/test_bug396637.js new file mode 100644 index 0000000000..6aac53e5d1 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug396637.js @@ -0,0 +1,9 @@ +// Tests conversion of a single byte from UTF-16 to Unicode + +const inString = "A"; +const expectedString = ""; +const charset = "UTF-16BE"; + +function run_test() { + checkDecode(CreateScriptableConverter(), charset, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_bug399257.js b/intl/uconv/tests/unit/test_bug399257.js new file mode 100644 index 0000000000..9acd3e9b38 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug399257.js @@ -0,0 +1,80 @@ +// Tests encoding of characters below U+0020 +const inString = "Hello\u000aWorld"; +const expectedString = "Hello\nWorld"; + +function run_test() { + var failures = false; + var encodingConverter = CreateScriptableConverter(); + + var encoders = [ + "Big5", + "Big5-HKSCS", + "EUC-JP", + "EUC-KR", + "gb18030", + "gbk", + "IBM866", + "ISO-2022-JP", + "ISO-8859-1", + "ISO-8859-2", + "ISO-8859-3", + "ISO-8859-4", + "ISO-8859-5", + "ISO-8859-6", + "ISO-8859-7", + "ISO-8859-8", + "ISO-8859-8-I", + "ISO-8859-10", + "ISO-8859-13", + "ISO-8859-14", + "ISO-8859-15", + "ISO-8859-16", + "KOI8-R", + "KOI8-U", + "Shift_JIS", + "windows-1250", + "windows-1251", + "windows-1252", + "windows-1253", + "windows-1254", + "windows-1255", + "windows-1256", + "windows-1257", + "windows-1258", + "windows-874", + "macintosh", + "x-mac-cyrillic", + "x-user-defined", + "UTF-8", + ]; + + var counter = 0; + while (counter < encoders.length) { + var charset = encoders[counter++]; + dump("testing " + counter + " " + charset + "\n"); + + encodingConverter.charset = charset; + var codepageString = + encodingConverter.ConvertFromUnicode(inString) + + encodingConverter.Finish(); + if (codepageString != expectedString) { + dump(charset + " encoding failed\n"); + for (var i = 0; i < expectedString.length; ++i) { + if (codepageString.charAt(i) != expectedString.charAt(i)) { + dump( + i.toString(16) + + ": 0x" + + codepageString.charCodeAt(i).toString(16) + + " != " + + expectedString.charCodeAt(i).toString(16) + + "\n" + ); + } + } + failures = true; + } + } + if (failures) { + do_throw("test failed\n"); + } +} diff --git a/intl/uconv/tests/unit/test_bug457886.js b/intl/uconv/tests/unit/test_bug457886.js new file mode 100644 index 0000000000..21c3036901 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug457886.js @@ -0,0 +1,12 @@ +// Tests conversion from Unicode to ISO-2022-JP + +const inString = + "\u3042\u3044\u3046\u3048\u304A\u000D\u000A\u304B\u304D\u304F\u3051\u3053"; + +const expectedString = '\x1B$B$"$$$&$($*\x1B(B\x0D\x0A\x1B$B$+$-$/$1$3\x1B(B'; + +const charset = "ISO-2022-JP"; + +function run_test() { + checkEncode(CreateScriptableConverter(), charset, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_bug522931.js b/intl/uconv/tests/unit/test_bug522931.js new file mode 100644 index 0000000000..2dae8d72e9 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug522931.js @@ -0,0 +1,4 @@ +// crash test with invaild parameter (bug 522931) +function run_test() { + Assert.equal(Services.textToSubURI.UnEscapeAndConvert("UTF-8", null), ""); +} diff --git a/intl/uconv/tests/unit/test_bug563283.js b/intl/uconv/tests/unit/test_bug563283.js new file mode 100644 index 0000000000..49c13dcfcb --- /dev/null +++ b/intl/uconv/tests/unit/test_bug563283.js @@ -0,0 +1,53 @@ +// Tests conversion from Unicode to ISO-2022-JP with Hankaku characters + +const inStrings = [ + // 。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚ + "\uFF61\uFF62\uFF63\uFF64\uFF65\uFF66\uFF67\uFF68\uFF69\uFF6A\uFF6B\uFF6C\uFF6D\uFF6E\uFF6F\uFF70\uFF71\uFF72\uFF73\uFF74\uFF75\uFF76\uFF77\uFF78\uFF79\uFF7A\uFF7B\uFF7C\uFF7D\uFF7E\uFF7F\uFF80\uFF81\uFF82\uFF83\uFF84\uFF85\uFF86\uFF87\uFF88\uFF89\uFF8A\uFF8B\uFF8C\uFF8D\uFF8E\uFF8F\uFF90\uFF91\uFF92\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98\uFF99\uFF9A\uFF9B\uFF9C\uFF9D\uFF9E\uFF9F", + // equivalent to + // 。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゛゜ + // \u3002\u300c\u300d\u3001\u30fb\u30f2\u30a1\u30a3\u30a5\u30a7\u30a9\u30e3\u30e5\u30e7\u30c3\u30fc\u30a2\u30a4\u30a6\u30a8\u30aa\u30ab\u30ad\u30af\u30b1\u30b3\u30b5\u30b7\u30b9\u30bb\u30bd\u30bf\u30c1\u30c4\u30c6\u30c8\u30ca\u30cb\u30cc\u30cd\u30ce\u30cf\u30d2\u30d5\u30d8\u30db\u30de\u30df\u30e0\u30e1\u30e2\u30e4\u30e6\u30e8\u30e9\u30ea\u30eb\u30ec\u30ed\u30ef\u30f3\u309b\u309c" + + // ガギグゲゴザジズゼゾダヂヅデドバビブベボ + "\uFF76\uFF9E\uFF77\uFF9E\uFF78\uFF9E\uFF79\uFF9E\uFF7A\uFF9E\uFF7B\uFF9E\uFF7C\uFF9E\uFF7D\uFF9E\uFF7E\uFF9E\uFF7F\uFF9E\uFF80\uFF9E\uFF81\uFF9E\uFF82\uFF9E\uFF83\uFF9E\uFF84\uFF9E\uFF8A\uFF9E\uFF8B\uFF9E\uFF8C\uFF9E\uFF8D\uFF9E\uFF8E\uFF9E", + // equivalent to + // カ゛キ゛ク゛ケ゛コ゛サ゛シ゛ス゛セ゛ソ゛タ゛チ゛ツ゛テ゛ト゛ハ゛ヒ゛フ゛ヘ゛ホ゛ + // \u30AB\u309B\u30AD\u309B\u30AF\u309B\u30B1\u309B\u30B3\u309B\u30B5\u309B\u30B7\u309B\u30B9\u309B\u30BB\u309B\u30BD\u309B\u30BF\u309B\u30C1\u309B\u30C4\u309B\u30C6\u309B\u30C8\u309B\u30CF\u309B\u30D2\u309B\u30D5\u309B\u30D8\u309B\u30DB\u309B + + // パピプペポ + "\uFF8A\uFF9F\uFF8B\uFF9F\uFF8C\uFF9F\uFF8D\uFF9F\uFF8E\uFF9F", + // equivalent to + // ハ゜ヒ゜フ゜ヘ゜ホ゜ + // \u30CF\u309C\u30D2\u309C\u30D5\u309C\u30D8\u309C\u30DB\u309C" + + // Hankaku preceded and followed by regular Katakana (no change of charset) + // フランツ・ヨーゼフ・ハイドン + "\u30D5\u30E9\u30F3\u30C4\u30FB\uFF96\uFF70\uFF7E\uFF9E\uFF8C\u30FB\u30CF\u30A4\u30C9\u30F3", + + // Hankaku preceded and followed by Roman (charset change) + // Mozilla (モジラ) Foundation + "Mozilla (\uFF93\uFF7C\uFF9E\uFF97) Foundation", + + // Hankaku preceded and followed by unencodable characters + // दिल्ली・デリー・ਦਿੱਲੀ + "\u0926\u093F\u0932\u094D\u0932\u0940\uFF65\uFF83\uFF9E\uFF98\uFF70\uFF65\u0A26\u0A3F\u0A71\u0A32\u0A40", +]; + +const expectedStrings = [ + "\x1B$B!#!V!W!\x22!&%r%!%#%%%'%)%c%e%g%C!<%\x22%$%&%(%*%+%-%/%1%3%5%7%9%;%=%?%A%D%F%H%J%K%L%M%N%O%R%U%X%[%^%_%`%a%b%d%f%h%i%j%k%l%m%o%s!+!,\x1B(B", + "\x1B$B%+!+%-!+%/!+%1!+%3!+%5!+%7!+%9!+%;!+%=!+%?!+%A!+%D!+%F!+%H!+%O!+%R!+%U!+%X!+%[!+\x1B(B", + "\x1B$B%O!,%R!,%U!,%X!,%[!,\x1B(B", + "\x1B$B%U%i%s%D!&%h!<%;!+%U!&%O%$%I%s\x1B(B", + "Mozilla (\x1B$B%b%7!+%i\x1B(B) Foundation", + "??????\x1B$B!&%F!+%j!" || outChar == "/") { + dump( + charset + + " has a problem: " + + escape(inString) + + " decodes to '" + + outString + + "'\n" + ); + failures = true; + } + } + } + } + if (failures) { + do_throw("test failed\n"); + } +} diff --git a/intl/uconv/tests/unit/test_bug715319.dbcs.js b/intl/uconv/tests/unit/test_bug715319.dbcs.js new file mode 100644 index 0000000000..3ba405a925 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug715319.dbcs.js @@ -0,0 +1,56 @@ +// 2-byte charsets: +const charsets = ["Big5", "EUC-KR"]; +const ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" +); +var gConverter; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function IsASCII(charCode) { + return charCode <= 0x7e; +} + +function test(inString) { + var outString = gConverter.ConvertToUnicode(inString) + gConverter.Finish(); + + var outLen = outString.length; + + if ( + IsASCII(inString.charCodeAt(1)) && + (outLen < 4 || outString.charCodeAt(outLen - 4) == 0xfffd) + ) { + error(inString, outString, "ASCII input eaten in " + gConverter.charset); + } +} + +function run_test() { + gConverter = new ScriptableUnicodeConverter(); + for (var i = 0; i < charsets.length; ++i) { + gConverter.charset = charsets[i]; + + var byte1, byte2; + for (byte1 = 1; byte1 < 0x100; ++byte1) { + for (byte2 = 1; byte2 < 0x100; ++byte2) { + test(String.fromCharCode(byte1, byte2) + "foo"); + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug715319.euc_jp.js b/intl/uconv/tests/unit/test_bug715319.euc_jp.js new file mode 100644 index 0000000000..537d073d14 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug715319.euc_jp.js @@ -0,0 +1,77 @@ +const charset = "EUC-JP"; +const ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" +); +var gConverter; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function IsASCII(charCode) { + return charCode <= 0x7e; +} + +function IsNotGR(charCode) { + return charCode < 0xa1 || charCode > 0xfe; +} + +function test(inString) { + var outString = gConverter.ConvertToUnicode(inString) + gConverter.Finish(); + + var outLen = outString.length; + if ( + IsASCII(inString.charCodeAt(1)) && + inString.charCodeAt(1) != outString.charCodeAt(outLen - 5) + ) { + error(inString, outString, "ASCII second byte eaten"); + } else if ( + IsASCII(inString.charCodeAt(2)) && + inString.charCodeAt(2) != outString.charCodeAt(outLen - 4) + ) { + error(inString, outString, "ASCII third byte eaten"); + } else if ( + inString.charCodeAt(0) == 0x8f && + inString.charCodeAt(1) > 0x7f && + IsNotGR(inString.charCodeAt(2)) && + !( + outString.charCodeAt(outLen - 4) == 0xfffd || + outString.charCodeAt(outLen - 4) == inString.charCodeAt(2) + ) + ) { + error(inString, outString, "non-GR third byte eaten"); + } +} + +function run_test() { + gConverter = new ScriptableUnicodeConverter(); + gConverter.charset = charset; + + var byte1, byte2, byte3; + for (byte1 = 1; byte1 < 0x100; ++byte1) { + for (byte2 = 1; byte2 < 0x100; ++byte2) { + if (byte1 == 0x8f) { + for (byte3 = 1; byte3 < 0x100; ++byte3) { + test(String.fromCharCode(byte1, byte2, byte3) + "foo"); + } + } else { + test(String.fromCharCode(byte1, byte2) + " foo"); + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug715319.gb2312.js b/intl/uconv/tests/unit/test_bug715319.gb2312.js new file mode 100644 index 0000000000..f780ab81d1 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug715319.gb2312.js @@ -0,0 +1,87 @@ +const charset = "GB2312"; +const ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" +); +var gConverter; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function IsASCII(charCode) { + return charCode <= 0x7e; +} + +function test(inString) { + var outString = gConverter.ConvertToUnicode(inString) + gConverter.Finish(); + + var outLen = outString.length; + for (var pos = 1; pos < 3; ++pos) { + let outPos = outLen - (9 - pos); + if (outPos < 0) { + outPos = 0; + } + let c0 = inString.charCodeAt(0); + let c1 = inString.charCodeAt(1); + let c2 = inString.charCodeAt(2); + let c3 = inString.charCodeAt(3); + if ( + IsASCII(inString.charCodeAt(pos)) && + !( + outString.charCodeAt(outPos) == inString.charCodeAt(pos) || + outString.charCodeAt(outPos) != 0xfffd || + // legal 4 byte range + (0x81 <= c0 && + c0 <= 0xfe && + 0x30 <= c1 && + c1 <= 0x39 && + 0x81 <= c2 && + c2 <= 0xfe && + 0x30 <= c3 && + c3 <= 0x39) + ) + ) { + dump("pos = " + pos + "; outPos = " + outPos + "\n"); + error(inString, outString, "ASCII input eaten"); + } + } +} + +function run_test() { + gConverter = new ScriptableUnicodeConverter(); + gConverter.charset = charset; + + var byte1, byte2, byte3, byte4; + + // 2-byte + for (byte1 = 1; byte1 < 0x100; ++byte1) { + for (byte2 = 1; byte2 < 0x100; ++byte2) { + test(String.fromCharCode(byte1, byte2) + " foo"); + } + } + // 4-byte (limited) + for (byte1 = 0x80; byte1 < 0x90; ++byte1) { + for (byte2 = 0x20; byte2 < 0x40; ++byte2) { + for (byte3 = 0x80; byte3 < 0x90; ++byte3) { + for (byte4 = 0x20; byte4 < 0x40; ++byte4) { + test(String.fromCharCode(byte1, byte2, byte3, byte4) + " foo"); + } + } + } + } +} diff --git a/intl/uconv/tests/unit/test_charset_conversion.js b/intl/uconv/tests/unit/test_charset_conversion.js new file mode 100644 index 0000000000..8f1793ca18 --- /dev/null +++ b/intl/uconv/tests/unit/test_charset_conversion.js @@ -0,0 +1,373 @@ +const NS_ERROR_ILLEGAL_VALUE = Cr.NS_ERROR_ILLEGAL_VALUE; + +var BIS, BOS, _Pipe, COS, FIS, _SS, CIS; + +var dataDir; + +function run_test() { + BIS = Components.Constructor( + "@mozilla.org/binaryinputstream;1", + "nsIBinaryInputStream", + "setInputStream" + ); + BOS = Components.Constructor( + "@mozilla.org/binaryoutputstream;1", + "nsIBinaryOutputStream", + "setOutputStream" + ); + _Pipe = Components.Constructor("@mozilla.org/pipe;1", "nsIPipe", "init"); + COS = Components.Constructor( + "@mozilla.org/intl/converter-output-stream;1", + "nsIConverterOutputStream", + "init" + ); + FIS = Components.Constructor( + "@mozilla.org/network/file-input-stream;1", + "nsIFileInputStream", + "init" + ); + _SS = Components.Constructor( + "@mozilla.org/storagestream;1", + "nsIStorageStream", + "init" + ); + CIS = Components.Constructor( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" + ); + + dataDir = do_get_file("data/"); + + test_utf8_1(); + test_cross_conversion(); +} + +const UNICODE_STRINGS = [ + "\u00BD + \u00BE == \u00BD\u00B2 + \u00BC + \u00BE", + + "AZaz09 \u007F " + // U+000000 to U+00007F + "\u0080 \u0398 \u03BB \u0725 " + // U+000080 to U+0007FF + "\u0964 \u0F5F \u20AC \uFFFB", // U+000800 to U+00FFFF + + // there would be strings containing non-BMP code points here, but + // unfortunately JS strings are UCS-2 (and worse yet are treated as + // 16-bit values by the spec), so we have to do gymnastics to work + // with non-BMP -- manual surrogate decoding doesn't work because + // String.prototype.charCodeAt() ignores surrogate pairs and only + // returns 16-bit values +]; + +// test conversion equality -- keys are names of files containing equivalent +// Unicode data, values are the encoding of the file in the format expected by +// nsIConverter(In|Out)putStream.init +const UNICODE_FILES = { + "unicode-conversion.utf8.txt": "UTF-8", + "unicode-conversion.utf16.txt": "UTF-16", + "unicode-conversion.utf16le.txt": "UTF-16LE", + "unicode-conversion.utf16be.txt": "UTF-16BE", +}; + +function test_utf8_1() { + for (var i = 0; i < UNICODE_STRINGS.length; i++) { + var pipe = Pipe(); + var conv = new COS(pipe.outputStream, "UTF-8"); + Assert.ok(conv.writeString(UNICODE_STRINGS[i])); + conv.close(); + + if ( + !equalStreams( + new UTF8(pipe.inputStream), + stringToCodePoints(UNICODE_STRINGS[i]) + ) + ) { + do_throw("UNICODE_STRINGS[" + i + "] not handled correctly"); + } + } +} + +function test_cross_conversion() { + for (var fn1 in UNICODE_FILES) { + var fin = getBinaryInputStream(fn1); + var ss = StorageStream(); + + var bos = new BOS(ss.getOutputStream(0)); + var av; + while ((av = fin.available()) > 0) { + var data = fin.readByteArray(av); + bos.writeByteArray(data); + } + fin.close(); + bos.close(); + + for (var fn2 in UNICODE_FILES) { + var fin2 = getUnicharInputStream(fn2, UNICODE_FILES[fn2]); + var unichar = new CIS( + ss.newInputStream(0), + UNICODE_FILES[fn1], + 8192, + 0x0 + ); + + if (!equalUnicharStreams(unichar, fin2)) { + do_throw( + "unequal streams: " + UNICODE_FILES[fn1] + ", " + UNICODE_FILES[fn2] + ); + } + } + } +} + +// utility functions + +function StorageStream() { + return new _SS(8192, Math.pow(2, 32) - 1, null); +} + +function getUnicharInputStream(filename, encoding) { + var file = dataDir.clone(); + file.append(filename); + + const PR_RDONLY = 0x1; + var fis = new FIS( + file, + PR_RDONLY, + "0644", + Ci.nsIFileInputStream.CLOSE_ON_EOF + ); + return new CIS(fis, encoding, 8192, 0x0); +} + +function getBinaryInputStream(filename, encoding) { + var file = dataDir.clone(); + file.append(filename); + + const PR_RDONLY = 0x1; + var fis = new FIS( + file, + PR_RDONLY, + "0644", + Ci.nsIFileInputStream.CLOSE_ON_EOF + ); + return new BIS(fis); +} + +function equalStreams(stream, codePoints) { + var currIndex = 0; + while (true) { + var unit = stream.readUnit(); + if (unit < 0) { + return currIndex == codePoints.length; + } + if (unit !== codePoints[currIndex++]) { + return false; + } + } + // eslint-disable-next-line no-unreachable + do_throw("not reached"); + return false; +} + +function equalUnicharStreams(s1, s2) { + var r1, r2; + var str1 = {}, + str2 = {}; + while (true) { + r1 = s1.readString(1024, str1); + r2 = s2.readString(1024, str2); + + if (r1 != r2 || str1.value != str2.value) { + print("r1: " + r1 + ", r2: " + r2); + print(str1.value.length); + print(str2.value.length); + return false; + } + if (r1 == 0 && r2 == 0) { + return true; + } + } + + // not reached + // eslint-disable-next-line no-unreachable + return false; +} + +function stringToCodePoints(str) { + return str.split("").map(function(v) { + return v.charCodeAt(0); + }); +} + +function lowbits(n) { + return Math.pow(2, n) - 1; +} + +function Pipe() { + return new _Pipe(false, false, 1024, 10, null); +} + +// complex charset readers + +/** + * Wraps a UTF-8 stream to allow access to the Unicode code points in it. + * + * @param stream + * the stream to wrap + */ +function UTF8(stream) { + this._stream = new BIS(stream); +} +UTF8.prototype = { + // returns numeric code point at front of stream encoded in UTF-8, -1 if at + // end of stream, or throws if valid (and properly encoded!) code point not + // found + readUnit() { + var str = this._stream; + + var c, c2, c3, c4, rv; + + // if at end of stream, must distinguish failure to read any bytes + // (correct behavior) from failure to read some byte after the first + // in the character + try { + c = str.read8(); + } catch (e) { + return -1; + } + + if (c < 0x80) { + return c; + } + + if (c < 0xc0) { + // c < 11000000 + // byte doesn't have enough leading ones (must be at least two) + throw NS_ERROR_ILLEGAL_VALUE; + } + + c2 = str.read8(); + if (c2 >= 0xc0 || c2 < 0x80) { + throw NS_ERROR_ILLEGAL_VALUE; + } // not 10xxxxxx + + if (c < 0xe0) { + // c < 11100000 + // two-byte between U+000080 and U+0007FF + rv = ((lowbits(5) & c) << 6) + (lowbits(6) & c2); + // no upper bounds-check needed, by previous lines + if (rv >= 0x80) { + return rv; + } + throw NS_ERROR_ILLEGAL_VALUE; + } + + c3 = str.read8(); + if (c3 >= 0xc0 || c3 < 0x80) { + throw NS_ERROR_ILLEGAL_VALUE; + } // not 10xxxxxx + + if (c < 0xf0) { + // c < 11110000 + // three-byte between U+000800 and U+00FFFF + rv = + ((lowbits(4) & c) << 12) + ((lowbits(6) & c2) << 6) + (lowbits(6) & c3); + // no upper bounds-check needed, by previous lines + if (rv >= 0xe000 || (rv >= 0x800 && rv <= 0xd7ff)) { + return rv; + } + throw NS_ERROR_ILLEGAL_VALUE; + } + + c4 = str.read8(); + if (c4 >= 0xc0 || c4 < 0x80) { + throw NS_ERROR_ILLEGAL_VALUE; + } // not 10xxxxxx + + if (c < 0xf8) { + // c < 11111000 + // four-byte between U+010000 and U+10FFFF + rv = + ((lowbits(3) & c) << 18) + + ((lowbits(6) & c2) << 12) + + ((lowbits(6) & c3) << 6) + + (lowbits(6) & c4); + // need an upper bounds-check since 0x10FFFF isn't (2**n - 1) + if (rv >= 0x10000 && rv <= 0x10ffff) { + return rv; + } + throw NS_ERROR_ILLEGAL_VALUE; + } + + // 11111000 or greater -- no UTF-8 mapping + throw NS_ERROR_ILLEGAL_VALUE; + }, +}; + +/** + * Wraps a UTF-16 stream to allow access to the Unicode code points in it. + * + * @param stream + * the stream to wrap + * @param bigEndian + * true for UTF-16BE, false for UTF-16LE, not present at all for UTF-16 with + * a byte-order mark + */ +function UTF16(stream, bigEndian) { + this._stream = new BIS(stream); + if (arguments.length > 1) { + this._bigEndian = bigEndian; + } else { + var bom = this._stream.read16(); + if (bom == 0xfeff) { + this._bigEndian = true; + } else if (bom == 0xfffe) { + this._bigEndian = false; + } else { + do_throw("missing BOM: " + bom.toString(16).toUpperCase()); + } + } +} +UTF16.prototype = { + // returns numeric code point at front of stream encoded in UTF-16, + // -1 if at end of stream, or throws if UTF-16 code point not found + readUnit() { + var str = this._stream; + + // if at end of stream, must distinguish failure to read any bytes + // (correct behavior) from failure to read some byte after the first + // in the character + try { + var b1 = str.read8(); + } catch (e) { + return -1; + } + + var b2 = str.read8(); + + var w1 = this._bigEndian ? (b1 << 8) + b2 : (b2 << 8) + b1; + + if (w1 > 0xdbff && w1 < 0xe000) { + // second surrogate, but expecting none or first + throw NS_ERROR_ILLEGAL_VALUE; + } + + if (w1 > 0xd7ff && w1 < 0xdc00) { + // non-BMP, use surrogate pair + b1 = str.read8(); + b2 = str.read8(); + var w2 = this._bigEndian ? (b1 << 8) + b2 : (b2 << 8) + b1; + if (w2 < 0xdc00 || w2 > 0xdfff) { + throw NS_ERROR_ILLEGAL_VALUE; + } + + var rv = 0x100000 + ((lowbits(10) & w2) << 10) + (lowbits(10) & w1); + if (rv <= 0x10ffff) { + return rv; + } + throw NS_ERROR_ILLEGAL_VALUE; + } + + // non-surrogate + return w1; + }, +}; diff --git a/intl/uconv/tests/unit/test_decode_8859-1.js b/intl/uconv/tests/unit/test_decode_8859-1.js new file mode 100644 index 0000000000..d820b35ca8 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-1.js @@ -0,0 +1,26 @@ +// Tests conversion from ISO-8859-1 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const aliases = [ + "ISO-8859-1", + "iso-8859-1", + "latin1", + "iso_8859-1", + "iso8859-1", + "iso-ir-100", + "l1", + "ibm819", + "cp819", + "csisolatin1", + "iso88591", + "iso_8859-1:1987", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-10.js b/intl/uconv/tests/unit/test_decode_8859-10.js new file mode 100644 index 0000000000..47e934817e --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-10.js @@ -0,0 +1,22 @@ +// Tests conversion from ISO-8859-10 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u0112\u0122\u012a\u0128\u0136\u00a7\u013b\u0110\u0160\u0166\u017d\u00ad\u016a\u014a\u00b0\u0105\u0113\u0123\u012b\u0129\u0137\u00b7\u013c\u0111\u0161\u0167\u017e\u2015\u016b\u014b\u0100\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u012e\u010c\u00c9\u0118\u00cb\u0116\u00cd\u00ce\u00cf\u00d0\u0145\u014c\u00d3\u00d4\u00d5\u00d6\u0168\u00d8\u0172\u00da\u00db\u00dc\u00dd\u00de\u00df\u0101\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u012f\u010d\u00e9\u0119\u00eb\u0117\u00ed\u00ee\u00ef\u00f0\u0146\u014d\u00f3\u00f4\u00f5\u00f6\u0169\u00f8\u0173\u00fa\u00fb\u00fc\u00fd\u00fe\u0138"; + +const aliases = [ + "ISO-8859-10", + "iso-8859-10", + "iso8859-10", + "latin6", + "iso-ir-157", + "l6", + "csisolatin6", + "iso885910", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-11.js b/intl/uconv/tests/unit/test_decode_8859-11.js new file mode 100644 index 0000000000..b647ec503c --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-11.js @@ -0,0 +1,13 @@ +// Tests conversion from ISO-8859-11 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b"; + +const aliases = ["ISO-8859-11", "iso-8859-11", "iso8859-11", "iso885911"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-13.js b/intl/uconv/tests/unit/test_decode_8859-13.js new file mode 100644 index 0000000000..91443e6bee --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-13.js @@ -0,0 +1,13 @@ +// Tests conversion from ISO-8859-13 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u201d\u00a2\u00a3\u00a4\u201e\u00a6\u00a7\u00d8\u00a9\u0156\u00ab\u00ac\u00ad\u00ae\u00c6\u00b0\u00b1\u00b2\u00b3\u201c\u00b5\u00b6\u00b7\u00f8\u00b9\u0157\u00bb\u00bc\u00bd\u00be\u00e6\u0104\u012e\u0100\u0106\u00c4\u00c5\u0118\u0112\u010c\u00c9\u0179\u0116\u0122\u0136\u012a\u013b\u0160\u0143\u0145\u00d3\u014c\u00d5\u00d6\u00d7\u0172\u0141\u015a\u016a\u00dc\u017b\u017d\u00df\u0105\u012f\u0101\u0107\u00e4\u00e5\u0119\u0113\u010d\u00e9\u017a\u0117\u0123\u0137\u012b\u013c\u0161\u0144\u0146\u00f3\u014d\u00f5\u00f6\u00f7\u0173\u0142\u015b\u016b\u00fc\u017c\u017e\u2019"; + +const aliases = ["ISO-8859-13", "iso-8859-13", "iso8859-13", "iso885913"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-14.js b/intl/uconv/tests/unit/test_decode_8859-14.js new file mode 100644 index 0000000000..d1fdcb204c --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-14.js @@ -0,0 +1,13 @@ +// Tests conversion from ISO-8859-14 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u1e02\u1e03\u00a3\u010a\u010b\u1e0a\u00a7\u1e80\u00a9\u1e82\u1e0b\u1ef2\u00ad\u00ae\u0178\u1e1e\u1e1f\u0120\u0121\u1e40\u1e41\u00b6\u1e56\u1e81\u1e57\u1e83\u1e60\u1ef3\u1e84\u1e85\u1e61\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u0174\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u1e6a\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u0176\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u0175\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u1e6b\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u0177\u00ff"; + +const aliases = ["ISO-8859-14", "iso-8859-14", "iso8859-14", "iso885914"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-15.js b/intl/uconv/tests/unit/test_decode_8859-15.js new file mode 100644 index 0000000000..7344fb55a8 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-15.js @@ -0,0 +1,21 @@ +// Tests conversion from ISO-8859-15 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u20ac\u00a5\u0160\u00a7\u0161\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u017d\u00b5\u00b6\u00b7\u017e\u00b9\u00ba\u00bb\u0152\u0153\u0178\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const aliases = [ + "ISO-8859-15", + "iso-8859-15", + "iso8859-15", + "iso_8859-15", + "iso885915", + "csisolatin9", + "l9", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-2.js b/intl/uconv/tests/unit/test_decode_8859-2.js new file mode 100644 index 0000000000..0e3c15bdee --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-2.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-2 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u02d8\u0141\u00a4\u013d\u015a\u00a7\u00a8\u0160\u015e\u0164\u0179\u00ad\u017d\u017b\u00b0\u0105\u02db\u0142\u00b4\u013e\u015b\u02c7\u00b8\u0161\u015f\u0165\u017a\u02dd\u017e\u017c\u0154\u00c1\u00c2\u0102\u00c4\u0139\u0106\u00c7\u010c\u00c9\u0118\u00cb\u011a\u00cd\u00ce\u010e\u0110\u0143\u0147\u00d3\u00d4\u0150\u00d6\u00d7\u0158\u016e\u00da\u0170\u00dc\u00dd\u0162\u00df\u0155\u00e1\u00e2\u0103\u00e4\u013a\u0107\u00e7\u010d\u00e9\u0119\u00eb\u011b\u00ed\u00ee\u010f\u0111\u0144\u0148\u00f3\u00f4\u0151\u00f6\u00f7\u0159\u016f\u00fa\u0171\u00fc\u00fd\u0163\u02d9"; + +const aliases = [ + "ISO-8859-2", + "iso-8859-2", + "latin2", + "iso_8859-2", + "iso8859-2", + "iso-ir-101", + "l2", + "csisolatin2", + "iso88592", + "iso_8859-2:1987", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-3.js b/intl/uconv/tests/unit/test_decode_8859-3.js new file mode 100644 index 0000000000..011f82de87 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-3.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-3 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbf\xc0\xc1\xc2\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0126\u02d8\u00a3\u00a4\u0124\u00a7\u00a8\u0130\u015e\u011e\u0134\u00ad\u017b\u00b0\u0127\u00b2\u00b3\u00b4\u00b5\u0125\u00b7\u00b8\u0131\u015f\u011f\u0135\u00bd\u017c\u00c0\u00c1\u00c2\u00c4\u010a\u0108\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d1\u00d2\u00d3\u00d4\u0120\u00d6\u00d7\u011c\u00d9\u00da\u00db\u00dc\u016c\u015c\u00df\u00e0\u00e1\u00e2\u00e4\u010b\u0109\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u0121\u00f6\u00f7\u011d\u00f9\u00fa\u00fb\u00fc\u016d\u015d\u02d9"; + +const aliases = [ + "ISO-8859-3", + "iso-8859-3", + "latin3", + "iso_8859-3", + "iso8859-3", + "iso-ir-109", + "l3", + "csisolatin3", + "iso88593", + "iso_8859-3:1988", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-4.js b/intl/uconv/tests/unit/test_decode_8859-4.js new file mode 100644 index 0000000000..6a8b89c2ef --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-4.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-4 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u0138\u0156\u00a4\u0128\u013b\u00a7\u00a8\u0160\u0112\u0122\u0166\u00ad\u017d\u00af\u00b0\u0105\u02db\u0157\u00b4\u0129\u013c\u02c7\u00b8\u0161\u0113\u0123\u0167\u014a\u017e\u014b\u0100\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u012e\u010c\u00c9\u0118\u00cb\u0116\u00cd\u00ce\u012a\u0110\u0145\u014c\u0136\u00d4\u00d5\u00d6\u00d7\u00d8\u0172\u00da\u00db\u00dc\u0168\u016a\u00df\u0101\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u012f\u010d\u00e9\u0119\u00eb\u0117\u00ed\u00ee\u012b\u0111\u0146\u014d\u0137\u00f4\u00f5\u00f6\u00f7\u00f8\u0173\u00fa\u00fb\u00fc\u0169\u016b\u02d9"; + +const aliases = [ + "ISO-8859-4", + "iso-8859-4", + "latin4", + "iso_8859-4", + "iso8859-4", + "iso-ir-110", + "l4", + "csisolatin4", + "iso88594", + "iso_8859-4:1988", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-5.js b/intl/uconv/tests/unit/test_decode_8859-5.js new file mode 100644 index 0000000000..220a12ab3a --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-5.js @@ -0,0 +1,22 @@ +// Tests conversion from ISO-8859-5 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0401\u0402\u0403\u0404\u0405\u0406\u0407\u0408\u0409\u040a\u040b\u040c\u00ad\u040e\u040f\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f\u2116\u0451\u0452\u0453\u0454\u0455\u0456\u0457\u0458\u0459\u045a\u045b\u045c\u00a7\u045e\u045f"; + +const aliases = [ + "ISO-8859-5", + "iso-8859-5", + "iso_8859-5", + "iso8859-5", + "iso-ir-144", + "csisolatincyrillic", + "iso88595", + "iso_8859-5:1988", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-6.js b/intl/uconv/tests/unit/test_decode_8859-6.js new file mode 100644 index 0000000000..9c94ef8673 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-6.js @@ -0,0 +1,25 @@ +// Tests conversion from ISO-8859-6 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa4\xac\xad\xbb\xbf\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a4\u060c\u00ad\u061b\u061f\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b\u064c\u064d\u064e\u064f\u0650\u0651\u0652"; + +const aliases = [ + "ISO-8859-6", + "iso-8859-6", + "iso_8859-6", + "iso8859-6", + "arabic", + "iso-ir-127", + "ecma-114", + "asmo-708", + "csisolatinarabic", + "iso88596", + "iso_8859-6:1987", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-7.js b/intl/uconv/tests/unit/test_decode_8859-7.js new file mode 100644 index 0000000000..9d74342345 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-7.js @@ -0,0 +1,27 @@ +// Tests conversion from ISO-8859-7 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u2018\u2019\u00a3\u20ac\u20af\u00a6\u00a7\u00a8\u00a9\u037a\u00ab\u00ac\u00ad\u2015\u00b0\u00b1\u00b2\u00b3\u0384\u0385\u0386\u00b7\u0388\u0389\u038a\u00bb\u038c\u00bd\u038e\u038f\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039a\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1\u03a3\u03a4\u03a5\u03a6\u03a7\u03a8\u03a9\u03aa\u03ab\u03ac\u03ad\u03ae\u03af\u03b0\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03ca\u03cb\u03cc\u03cd\u03ce"; + +const aliases = [ + "ISO-8859-7", + "iso-8859-7", + "greek", + "greek8", + "sun_eu_greek", + "iso_8859-7", + "iso8859-7", + "iso-ir-126", + "elot_928", + "ecma-118", + "csisolatingreek", + "iso88597", + "iso_8859-7:1987", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-8.js b/intl/uconv/tests/unit/test_decode_8859-8.js new file mode 100644 index 0000000000..c7b758bf03 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-8.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-8 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfd\xfe"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00d7\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00f7\u00bb\u00bc\u00bd\u00be\u2017\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\u200e\u200f"; + +const aliases = [ + "ISO-8859-8", + "iso-8859-8", + "hebrew", + "visual", + "iso_8859-8", + "iso8859-8", + "iso-ir-138", + "csisolatinhebrew", + "iso88598", + "iso_8859-8:1988", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-9.js b/intl/uconv/tests/unit/test_decode_8859-9.js new file mode 100644 index 0000000000..1582e2093f --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-9.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-9 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u011e\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u0130\u015e\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u011f\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u0131\u015f\u00ff"; + +const aliases = [ + "ISO-8859-9", + "iso-8859-9", + "latin5", + "iso_8859-9", + "iso8859-9", + "iso-ir-148", + "l5", + "csisolatin5", + "iso88599", + "iso_8859-9:1989", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1250.js b/intl/uconv/tests/unit/test_decode_CP1250.js new file mode 100644 index 0000000000..d044c67801 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1250.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1250 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0083\u201e\u2026\u2020\u2021\u0088\u2030\u0160\u2039\u015a\u0164\u017d\u0179\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u0161\u203a\u015b\u0165\u017e\u017a\u00a0\u02c7\u02d8\u0141\u00a4\u0104\u00a6\u00a7\u00a8\u00a9\u015e\u00ab\u00ac\u00ad\u00ae\u017b\u00b0\u00b1\u02db\u0142\u00b4\u00b5\u00b6\u00b7\u00b8\u0105\u015f\u00bb\u013d\u02dd\u013e\u017c\u0154\u00c1\u00c2\u0102\u00c4\u0139\u0106\u00c7\u010c\u00c9\u0118\u00cb\u011a\u00cd\u00ce\u010e\u0110\u0143\u0147\u00d3\u00d4\u0150\u00d6\u00d7\u0158\u016e\u00da\u0170\u00dc\u00dd\u0162\u00df\u0155\u00e1\u00e2\u0103\u00e4\u013a\u0107\u00e7\u010d\u00e9\u0119\u00eb\u011b\u00ed\u00ee\u010f\u0111\u0144\u0148\u00f3\u00f4\u0151\u00f6\u00f7\u0159\u016f\u00fa\u0171\u00fc\u00fd\u0163\u02d9"; + +const aliases = ["windows-1250", "x-cp1250", "cp1250"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1251.js b/intl/uconv/tests/unit/test_decode_CP1251.js new file mode 100644 index 0000000000..01153e8650 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1251.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1251 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0402\u0403\u201a\u0453\u201e\u2026\u2020\u2021\u20ac\u2030\u0409\u2039\u040a\u040c\u040b\u040f\u0452\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u0459\u203a\u045a\u045c\u045b\u045f\u00a0\u040e\u045e\u0408\u00a4\u0490\u00a6\u00a7\u0401\u00a9\u0404\u00ab\u00ac\u00ad\u00ae\u0407\u00b0\u00b1\u0406\u0456\u0491\u00b5\u00b6\u00b7\u0451\u2116\u0454\u00bb\u0458\u0405\u0455\u0457\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f"; + +const aliases = ["windows-1251", "x-cp1251", "cp1251"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1252.js b/intl/uconv/tests/unit/test_decode_CP1252.js new file mode 100644 index 0000000000..d41d7d72a3 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1252.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1252 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u017d\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u017e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const aliases = ["windows-1252", "x-cp1252", "cp1252"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1253.js b/intl/uconv/tests/unit/test_decode_CP1253.js new file mode 100644 index 0000000000..880e2dae74 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1253.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1253 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u0088\u2030\u008a\u2039\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u009a\u203a\u009c\u009d\u009e\u009f\u00a0\u0385\u0386\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\ufffd\u00ab\u00ac\u00ad\u00ae\u2015\u00b0\u00b1\u00b2\u00b3\u0384\u00b5\u00b6\u00b7\u0388\u0389\u038a\u00bb\u038c\u00bd\u038e\u038f\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039a\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1\ufffd\u03a3\u03a4\u03a5\u03a6\u03a7\u03a8\u03a9\u03aa\u03ab\u03ac\u03ad\u03ae\u03af\u03b0\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03ca\u03cb\u03cc\u03cd\u03ce\ufffd"; + +const aliases = ["windows-1253", "x-cp1253", "cp1253"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1254.js b/intl/uconv/tests/unit/test_decode_CP1254.js new file mode 100644 index 0000000000..f4af7e2088 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1254.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1254 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u009e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u011e\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u0130\u015e\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u011f\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u0131\u015f\u00ff"; + +const aliases = ["windows-1254", "x-cp1254", "cp1254"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1255.js b/intl/uconv/tests/unit/test_decode_CP1255.js new file mode 100644 index 0000000000..57e1b54636 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1255.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1255 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u008a\u2039\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u009a\u203a\u009c\u009d\u009e\u009f\u00a0\u00a1\u00a2\u00a3\u20aa\u00a5\u00a6\u00a7\u00a8\u00a9\u00d7\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00f7\u00bb\u00bc\u00bd\u00be\u00bf\u05b0\u05b1\u05b2\u05b3\u05b4\u05b5\u05b6\u05b7\u05b8\u05b9\u05ba\u05bb\u05bc\u05bd\u05be\u05bf\u05c0\u05c1\u05c2\u05c3\u05f0\u05f1\u05f2\u05f3\u05f4\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\ufffd\ufffd\u200e\u200f\ufffd"; + +const aliases = ["windows-1255", "x-cp1255", "cp1255"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1256.js b/intl/uconv/tests/unit/test_decode_CP1256.js new file mode 100644 index 0000000000..b91e448a46 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1256.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1256 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u067e\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0679\u2039\u0152\u0686\u0698\u0688\u06af\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u06a9\u2122\u0691\u203a\u0153\u200c\u200d\u06ba\u00a0\u060c\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u06be\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u061b\u00bb\u00bc\u00bd\u00be\u061f\u06c1\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u00d7\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u00e0\u0644\u00e2\u0645\u0646\u0647\u0648\u00e7\u00e8\u00e9\u00ea\u00eb\u0649\u064a\u00ee\u00ef\u064b\u064c\u064d\u064e\u00f4\u064f\u0650\u00f7\u0651\u00f9\u0652\u00fb\u00fc\u200e\u200f\u06d2"; + +const aliases = ["windows-1256", "x-cp1256", "cp1256"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1257.js b/intl/uconv/tests/unit/test_decode_CP1257.js new file mode 100644 index 0000000000..a61bf2e870 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1257.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1257 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0083\u201e\u2026\u2020\u2021\u0088\u2030\u008a\u2039\u008c\u00a8\u02c7\u00b8\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u009a\u203a\u009c\u00af\u02db\u009f\u00a0\ufffd\u00a2\u00a3\u00a4\ufffd\u00a6\u00a7\u00d8\u00a9\u0156\u00ab\u00ac\u00ad\u00ae\u00c6\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00f8\u00b9\u0157\u00bb\u00bc\u00bd\u00be\u00e6\u0104\u012e\u0100\u0106\u00c4\u00c5\u0118\u0112\u010c\u00c9\u0179\u0116\u0122\u0136\u012a\u013b\u0160\u0143\u0145\u00d3\u014c\u00d5\u00d6\u00d7\u0172\u0141\u015a\u016a\u00dc\u017b\u017d\u00df\u0105\u012f\u0101\u0107\u00e4\u00e5\u0119\u0113\u010d\u00e9\u017a\u0117\u0123\u0137\u012b\u013c\u0161\u0144\u0146\u00f3\u014d\u00f5\u00f6\u00f7\u0173\u0142\u015b\u016b\u00fc\u017c\u017e\u02d9"; + +const aliases = ["windows-1257", "x-cp1257", "cp1257"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1258.js b/intl/uconv/tests/unit/test_decode_CP1258.js new file mode 100644 index 0000000000..422e9a7985 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1258.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1258 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u008a\u2039\u0152\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u009a\u203a\u0153\u009d\u009e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u0102\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u0300\u00cd\u00ce\u00cf\u0110\u00d1\u0309\u00d3\u00d4\u01a0\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u01af\u0303\u00df\u00e0\u00e1\u00e2\u0103\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u0301\u00ed\u00ee\u00ef\u0111\u00f1\u0323\u00f3\u00f4\u01a1\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u01b0\u20ab\u00ff"; + +const aliases = ["windows-1258", "x-cp1258", "cp1258"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP874.js b/intl/uconv/tests/unit/test_decode_CP874.js new file mode 100644 index 0000000000..7f05e7669c --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP874.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-874 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u0082\u0083\u0084\u2026\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\ufffd\ufffd\ufffd\ufffd\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b\ufffd\ufffd\ufffd\ufffd"; + +const aliases = ["windows-874", "dos-874"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_gb18030.js b/intl/uconv/tests/unit/test_decode_gb18030.js new file mode 100644 index 0000000000..ca5796bbaa --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_gb18030.js @@ -0,0 +1,16 @@ +// Tests conversion from gb18030 to Unicode +// This is a sniff test which doesn't cover the full gb18030 range: the test string +// includes only the ASCII range and the first 63 double byte characters +// and border values of 4 byte characters + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x81@\x81A\x81B\x81C\x81D\x81E\x81F\x81G\x81H\x81I\x81J\x81K\x81L\x81M\x81N\x81O\x81P\x81Q\x81R\x81S\x81T\x81U\x81V\x81W\x81X\x81Y\x81Z\x81[\x81\\\x81]\x81^\x81_\x81`\x81a\x81b\x81c\x81d\x81e\x81f\x81g\x81h\x81i\x81j\x81k\x81l\x81m\x81n\x81o\x81p\x81q\x81r\x81s\x81t\x81u\x81v\x81w\x81x\x81y\x81z\x81{\x81|\x81}\x81~\x810\x810\x841\xa46\x841\xa47\x849\xfe9\x850\x810\x859\xfe9\x860\x810\x8f9\xfe9\x900\x810\xe32\x9a5\xe32\x9a6\xe39\xfe9\xe40\x810\xfc9\xfe9\xfd0\x810\xfe9\xfe9\xa8\xa0\xa8\xbc\xa8\xbf\xa8\xc1\x815\xf46\x815\xf47\x815\xf48"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u4E02\u4E04\u4E05\u4E06\u4E0F\u4E12\u4E17\u4E1F\u4E20\u4E21\u4E23\u4E26\u4E29\u4E2E\u4E2F\u4E31\u4E33\u4E35\u4E37\u4E3C\u4E40\u4E41\u4E42\u4E44\u4E46\u4E4A\u4E51\u4E55\u4E57\u4E5A\u4E5B\u4E62\u4E63\u4E64\u4E65\u4E67\u4E68\u4E6A\u4E6B\u4E6C\u4E6D\u4E6E\u4E6F\u4E72\u4E74\u4E75\u4E76\u4E77\u4E78\u4E79\u4E7A\u4E7B\u4E7C\u4E7D\u4E7F\u4E80\u4E81\u4E82\u4E83\u4E84\u4E85\u4E87\u4E8A\x80\uFFFC\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uD800\uDC00\uDBFF\uDFFF\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uE7C6\u1E3F\u01F9\uE7C9\u1E3E\uE7C7\u1E40"; + +const aliases = ["gb18030"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_gbk.js b/intl/uconv/tests/unit/test_decode_gbk.js new file mode 100644 index 0000000000..6e4414722d --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_gbk.js @@ -0,0 +1,15 @@ +// Tests conversion from gbk to Unicode +// This is a sniff test which doesn't cover the full gbk range: the test string +// includes only the ASCII range and the first 63 double byte characters + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x81@\x81A\x81B\x81C\x81D\x81E\x81F\x81G\x81H\x81I\x81J\x81K\x81L\x81M\x81N\x81O\x81P\x81Q\x81R\x81S\x81T\x81U\x81V\x81W\x81X\x81Y\x81Z\x81[\x81\\\x81]\x81^\x81_\x81`\x81a\x81b\x81c\x81d\x81e\x81f\x81g\x81h\x81i\x81j\x81k\x81l\x81m\x81n\x81o\x81p\x81q\x81r\x81s\x81t\x81u\x81v\x81w\x81x\x81y\x81z\x81{\x81|\x81}\x81~"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u4E02\u4E04\u4E05\u4E06\u4E0F\u4E12\u4E17\u4E1F\u4E20\u4E21\u4E23\u4E26\u4E29\u4E2E\u4E2F\u4E31\u4E33\u4E35\u4E37\u4E3C\u4E40\u4E41\u4E42\u4E44\u4E46\u4E4A\u4E51\u4E55\u4E57\u4E5A\u4E5B\u4E62\u4E63\u4E64\u4E65\u4E67\u4E68\u4E6A\u4E6B\u4E6C\u4E6D\u4E6E\u4E6F\u4E72\u4E74\u4E75\u4E76\u4E77\u4E78\u4E79\u4E7A\u4E7B\u4E7C\u4E7D\u4E7F\u4E80\u4E81\u4E82\u4E83\u4E84\u4E85\u4E87\u4E8A"; + +const aliases = ["gbk", "x-gbk"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_macintosh.js b/intl/uconv/tests/unit/test_decode_macintosh.js new file mode 100644 index 0000000000..5504a630ec --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_macintosh.js @@ -0,0 +1,13 @@ +// Tests conversion from x-mac-roman to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca\u00ff\u0178\u2044\u20ac\u2039\u203a\ufb01\ufb02\u2021\u00b7\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; + +const aliases = ["csMacintosh", "mac", "macintosh", "x-mac-roman"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_x_mac_cyrillic.js b/intl/uconv/tests/unit/test_decode_x_mac_cyrillic.js new file mode 100644 index 0000000000..ad783b0e96 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_x_mac_cyrillic.js @@ -0,0 +1,13 @@ +// Tests conversion from x-mac-cyrillic to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u2020\u00b0\u0490\u00a3\u00a7\u2022\u00b6\u0406\u00ae\u00a9\u2122\u0402\u0452\u2260\u0403\u0453\u221e\u00b1\u2264\u2265\u0456\u00b5\u0491\u0408\u0404\u0454\u0407\u0457\u0409\u0459\u040a\u045a\u0458\u0405\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u040b\u045b\u040c\u045c\u0455\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u201e\u040e\u045e\u040f\u045f\u2116\u0401\u0451\u044f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u20ac"; + +const aliases = ["x-mac-cyrillic"]; + +function run_test() { + testDecodeAliasesInternal(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_x_mac_ukrainian.js b/intl/uconv/tests/unit/test_decode_x_mac_ukrainian.js new file mode 100644 index 0000000000..b8b3d63018 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_x_mac_ukrainian.js @@ -0,0 +1,13 @@ +// Tests conversion from x-mac-ukrainian to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u2020\u00b0\u0490\u00a3\u00a7\u2022\u00b6\u0406\u00ae\u00a9\u2122\u0402\u0452\u2260\u0403\u0453\u221e\u00b1\u2264\u2265\u0456\u00b5\u0491\u0408\u0404\u0454\u0407\u0457\u0409\u0459\u040a\u045a\u0458\u0405\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u040b\u045b\u040c\u045c\u0455\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u201e\u040e\u045e\u040f\u045f\u2116\u0401\u0451\u044f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u20ac"; + +const aliases = ["x-mac-ukrainian"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-1.js b/intl/uconv/tests/unit/test_encode_8859-1.js new file mode 100644 index 0000000000..f5a6559de1 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-1.js @@ -0,0 +1,26 @@ +// Tests conversion from Unicode to ISO-8859-1 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-1", + "iso-8859-1", + "latin1", + "iso_8859-1", + "iso8859-1", + "iso-ir-100", + "l1", + "ibm819", + "cp819", + "csisolatin1", + "iso88591", + "iso_8859-1:1987", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-10.js b/intl/uconv/tests/unit/test_encode_8859-10.js new file mode 100644 index 0000000000..2ecad6013f --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-10.js @@ -0,0 +1,22 @@ +// Tests conversion from Unicode to ISO-8859-10 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u0112\u0122\u012a\u0128\u0136\u00a7\u013b\u0110\u0160\u0166\u017d\u00ad\u016a\u014a\u00b0\u0105\u0113\u0123\u012b\u0129\u0137\u00b7\u013c\u0111\u0161\u0167\u017e\u2015\u016b\u014b\u0100\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u012e\u010c\u00c9\u0118\u00cb\u0116\u00cd\u00ce\u00cf\u00d0\u0145\u014c\u00d3\u00d4\u00d5\u00d6\u0168\u00d8\u0172\u00da\u00db\u00dc\u00dd\u00de\u00df\u0101\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u012f\u010d\u00e9\u0119\u00eb\u0117\u00ed\u00ee\u00ef\u00f0\u0146\u014d\u00f3\u00f4\u00f5\u00f6\u0169\u00f8\u0173\u00fa\u00fb\u00fc\u00fd\u00fe\u0138"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-10", + "iso-8859-10", + "iso8859-10", + "latin6", + "iso-ir-157", + "l6", + "csisolatin6", + "iso885910", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-11.js b/intl/uconv/tests/unit/test_encode_8859-11.js new file mode 100644 index 0000000000..7011c26688 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-11.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to ISO-8859-11 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb"; + +const aliases = ["ISO-8859-11", "iso-8859-11", "iso8859-11", "iso885911"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-13.js b/intl/uconv/tests/unit/test_encode_8859-13.js new file mode 100644 index 0000000000..cf6ad98466 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-13.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to ISO-8859-13 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u201d\u00a2\u00a3\u00a4\u201e\u00a6\u00a7\u00d8\u00a9\u0156\u00ab\u00ac\u00ad\u00ae\u00c6\u00b0\u00b1\u00b2\u00b3\u201c\u00b5\u00b6\u00b7\u00f8\u00b9\u0157\u00bb\u00bc\u00bd\u00be\u00e6\u0104\u012e\u0100\u0106\u00c4\u00c5\u0118\u0112\u010c\u00c9\u0179\u0116\u0122\u0136\u012a\u013b\u0160\u0143\u0145\u00d3\u014c\u00d5\u00d6\u00d7\u0172\u0141\u015a\u016a\u00dc\u017b\u017d\u00df\u0105\u012f\u0101\u0107\u00e4\u00e5\u0119\u0113\u010d\u00e9\u017a\u0117\u0123\u0137\u012b\u013c\u0161\u0144\u0146\u00f3\u014d\u00f5\u00f6\u00f7\u0173\u0142\u015b\u016b\u00fc\u017c\u017e\u2019"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["ISO-8859-13", "iso-8859-13", "iso8859-13", "iso885913"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-14.js b/intl/uconv/tests/unit/test_encode_8859-14.js new file mode 100644 index 0000000000..9a0e8dc00a --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-14.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to ISO-8859-14 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u1e02\u1e03\u00a3\u010a\u010b\u1e0a\u00a7\u1e80\u00a9\u1e82\u1e0b\u1ef2\u00ad\u00ae\u0178\u1e1e\u1e1f\u0120\u0121\u1e40\u1e41\u00b6\u1e56\u1e81\u1e57\u1e83\u1e60\u1ef3\u1e84\u1e85\u1e61\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u0174\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u1e6a\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u0176\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u0175\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u1e6b\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u0177\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["ISO-8859-14", "iso-8859-14", "iso8859-14", "iso885914"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-15.js b/intl/uconv/tests/unit/test_encode_8859-15.js new file mode 100644 index 0000000000..ed5cd2ec90 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-15.js @@ -0,0 +1,21 @@ +// Tests conversion from Unicode to ISO-8859-15 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u20ac\u00a5\u0160\u00a7\u0161\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u017d\u00b5\u00b6\u00b7\u017e\u00b9\u00ba\u00bb\u0152\u0153\u0178\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-15", + "iso-8859-15", + "iso8859-15", + "iso_8859-15", + "iso885915", + "csisolatin9", + "l9", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-2.js b/intl/uconv/tests/unit/test_encode_8859-2.js new file mode 100644 index 0000000000..1b34672bcb --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-2.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-2 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u02d8\u0141\u00a4\u013d\u015a\u00a7\u00a8\u0160\u015e\u0164\u0179\u00ad\u017d\u017b\u00b0\u0105\u02db\u0142\u00b4\u013e\u015b\u02c7\u00b8\u0161\u015f\u0165\u017a\u02dd\u017e\u017c\u0154\u00c1\u00c2\u0102\u00c4\u0139\u0106\u00c7\u010c\u00c9\u0118\u00cb\u011a\u00cd\u00ce\u010e\u0110\u0143\u0147\u00d3\u00d4\u0150\u00d6\u00d7\u0158\u016e\u00da\u0170\u00dc\u00dd\u0162\u00df\u0155\u00e1\u00e2\u0103\u00e4\u013a\u0107\u00e7\u010d\u00e9\u0119\u00eb\u011b\u00ed\u00ee\u010f\u0111\u0144\u0148\u00f3\u00f4\u0151\u00f6\u00f7\u0159\u016f\u00fa\u0171\u00fc\u00fd\u0163\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-2", + "iso-8859-2", + "latin2", + "iso_8859-2", + "iso8859-2", + "iso-ir-101", + "l2", + "csisolatin2", + "iso88592", + "iso_8859-2:1987", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-3.js b/intl/uconv/tests/unit/test_encode_8859-3.js new file mode 100644 index 0000000000..fff6243431 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-3.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-3 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0126\u02d8\u00a3\u00a4\u0124\u00a7\u00a8\u0130\u015e\u011e\u0134\u00ad\u017b\u00b0\u0127\u00b2\u00b3\u00b4\u00b5\u0125\u00b7\u00b8\u0131\u015f\u011f\u0135\u00bd\u017c\u00c0\u00c1\u00c2\u00c4\u010a\u0108\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d1\u00d2\u00d3\u00d4\u0120\u00d6\u00d7\u011c\u00d9\u00da\u00db\u00dc\u016c\u015c\u00df\u00e0\u00e1\u00e2\u00e4\u010b\u0109\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u0121\u00f6\u00f7\u011d\u00f9\u00fa\u00fb\u00fc\u016d\u015d\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbf\xc0\xc1\xc2\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-3", + "iso-8859-3", + "latin3", + "iso_8859-3", + "iso8859-3", + "iso-ir-109", + "l3", + "csisolatin3", + "iso88593", + "iso_8859-3:1988", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-4.js b/intl/uconv/tests/unit/test_encode_8859-4.js new file mode 100644 index 0000000000..192d13fd52 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-4.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-4 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u0138\u0156\u00a4\u0128\u013b\u00a7\u00a8\u0160\u0112\u0122\u0166\u00ad\u017d\u00af\u00b0\u0105\u02db\u0157\u00b4\u0129\u013c\u02c7\u00b8\u0161\u0113\u0123\u0167\u014a\u017e\u014b\u0100\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u012e\u010c\u00c9\u0118\u00cb\u0116\u00cd\u00ce\u012a\u0110\u0145\u014c\u0136\u00d4\u00d5\u00d6\u00d7\u00d8\u0172\u00da\u00db\u00dc\u0168\u016a\u00df\u0101\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u012f\u010d\u00e9\u0119\u00eb\u0117\u00ed\u00ee\u012b\u0111\u0146\u014d\u0137\u00f4\u00f5\u00f6\u00f7\u00f8\u0173\u00fa\u00fb\u00fc\u0169\u016b\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-4", + "iso-8859-4", + "latin4", + "iso_8859-4", + "iso8859-4", + "iso-ir-110", + "l4", + "csisolatin4", + "iso88594", + "iso_8859-4:1988", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-5.js b/intl/uconv/tests/unit/test_encode_8859-5.js new file mode 100644 index 0000000000..fb2a05a693 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-5.js @@ -0,0 +1,22 @@ +// Tests conversion from Unicode to ISO-8859-5 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0401\u0402\u0403\u0404\u0405\u0406\u0407\u0408\u0409\u040a\u040b\u040c\u00ad\u040e\u040f\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f\u2116\u0451\u0452\u0453\u0454\u0455\u0456\u0457\u0458\u0459\u045a\u045b\u045c\u00a7\u045e\u045f"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-5", + "iso-8859-5", + "iso_8859-5", + "iso8859-5", + "iso-ir-144", + "csisolatincyrillic", + "iso88595", + "iso_8859-5:1988", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-6.js b/intl/uconv/tests/unit/test_encode_8859-6.js new file mode 100644 index 0000000000..1768b89d82 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-6.js @@ -0,0 +1,25 @@ +// Tests conversion from Unicode to ISO-8859-6 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a4\u060c\u00ad\u061b\u061f\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b\u064c\u064d\u064e\u064f\u0650\u0651\u0652"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa4\xac\xad\xbb\xbf\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2"; + +const aliases = [ + "ISO-8859-6", + "iso-8859-6", + "iso_8859-6", + "iso8859-6", + "arabic", + "iso-ir-127", + "ecma-114", + "asmo-708", + "csisolatinarabic", + "iso88596", + "iso_8859-6:1987", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-7.js b/intl/uconv/tests/unit/test_encode_8859-7.js new file mode 100644 index 0000000000..3452130e74 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-7.js @@ -0,0 +1,27 @@ +// Tests conversion from Unicode to ISO-8859-7 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u2018\u2019\u00a3\u20ac\u20af\u00a6\u00a7\u00a8\u00a9\u037a\u00ab\u00ac\u00ad\u2015\u00b0\u00b1\u00b2\u00b3\u0384\u0385\u0386\u00b7\u0388\u0389\u038a\u00bb\u038c\u00bd\u038e\u038f\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039a\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1\u03a3\u03a4\u03a5\u03a6\u03a7\u03a8\u03a9\u03aa\u03ab\u03ac\u03ad\u03ae\u03af\u03b0\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03ca\u03cb\u03cc\u03cd\u03ce"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe"; + +const aliases = [ + "ISO-8859-7", + "iso-8859-7", + "greek", + "greek8", + "sun_eu_greek", + "iso_8859-7", + "iso8859-7", + "iso-ir-126", + "elot_928", + "ecma-118", + "csisolatingreek", + "iso88597", + "iso_8859-7:1987", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-8.js b/intl/uconv/tests/unit/test_encode_8859-8.js new file mode 100644 index 0000000000..12402dfb56 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-8.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-8 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00d7\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00f7\u00bb\u00bc\u00bd\u00be\u2017\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\u200e\u200f"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfd\xfe"; + +const aliases = [ + "ISO-8859-8", + "iso-8859-8", + "hebrew", + "visual", + "iso_8859-8", + "iso8859-8", + "iso-ir-138", + "csisolatinhebrew", + "iso88598", + "iso_8859-8:1988", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-9.js b/intl/uconv/tests/unit/test_encode_8859-9.js new file mode 100644 index 0000000000..7658ebe5ef --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-9.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-9 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u011e\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u0130\u015e\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u011f\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u0131\u015f\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-9", + "iso-8859-9", + "latin5", + "iso_8859-9", + "iso8859-9", + "iso-ir-148", + "l5", + "csisolatin5", + "iso88599", + "iso_8859-9:1989", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1250.js b/intl/uconv/tests/unit/test_encode_CP1250.js new file mode 100644 index 0000000000..5b5c0d2f0a --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1250.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1250 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0083\u201e\u2026\u2020\u2021\u0088\u2030\u0160\u2039\u015a\u0164\u017d\u0179\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u0161\u203a\u015b\u0165\u017e\u017a\u00a0\u02c7\u02d8\u0141\u00a4\u0104\u00a6\u00a7\u00a8\u00a9\u015e\u00ab\u00ac\u00ad\u00ae\u017b\u00b0\u00b1\u02db\u0142\u00b4\u00b5\u00b6\u00b7\u00b8\u0105\u015f\u00bb\u013d\u02dd\u013e\u017c\u0154\u00c1\u00c2\u0102\u00c4\u0139\u0106\u00c7\u010c\u00c9\u0118\u00cb\u011a\u00cd\u00ce\u010e\u0110\u0143\u0147\u00d3\u00d4\u0150\u00d6\u00d7\u0158\u016e\u00da\u0170\u00dc\u00dd\u0162\u00df\u0155\u00e1\u00e2\u0103\u00e4\u013a\u0107\u00e7\u010d\u00e9\u0119\u00eb\u011b\u00ed\u00ee\u010f\u0111\u0144\u0148\u00f3\u00f4\u0151\u00f6\u00f7\u0159\u016f\u00fa\u0171\u00fc\u00fd\u0163\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1250", "x-cp1250", "cp1250"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1251.js b/intl/uconv/tests/unit/test_encode_CP1251.js new file mode 100644 index 0000000000..52d15d8731 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1251.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1251 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0402\u0403\u201a\u0453\u201e\u2026\u2020\u2021\u20ac\u2030\u0409\u2039\u040a\u040c\u040b\u040f\u0452\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u0459\u203a\u045a\u045c\u045b\u045f\u00a0\u040e\u045e\u0408\u00a4\u0490\u00a6\u00a7\u0401\u00a9\u0404\u00ab\u00ac\u00ad\u00ae\u0407\u00b0\u00b1\u0406\u0456\u0491\u00b5\u00b6\u00b7\u0451\u2116\u0454\u00bb\u0458\u0405\u0455\u0457\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1251", "x-cp1251", "cp1251"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1252.js b/intl/uconv/tests/unit/test_encode_CP1252.js new file mode 100644 index 0000000000..2f99791408 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1252.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1252 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u017d\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u017e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1252", "x-cp1252", "cp1252"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1253.js b/intl/uconv/tests/unit/test_encode_CP1253.js new file mode 100644 index 0000000000..a7ba34cb9f --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1253.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1253 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u0088\u2030\u008a\u2039\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u009a\u203a\u009c\u009d\u009e\u009f\u00a0\u0385\u0386\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ac\u00ad\u00ae\u2015\u00b0\u00b1\u00b2\u00b3\u0384\u00b5\u00b6\u00b7\u0388\u0389\u038a\u00bb\u038c\u00bd\u038e\u038f\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039a\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1\u03a3\u03a4\u03a5\u03a6\u03a7\u03a8\u03a9\u03aa\u03ab\u03ac\u03ad\u03ae\u03af\u03b0\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03ca\u03cb\u03cc\u03cd\u03ce"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe"; + +const aliases = ["windows-1253", "x-cp1253", "cp1253"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1254.js b/intl/uconv/tests/unit/test_encode_CP1254.js new file mode 100644 index 0000000000..593a33841e --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1254.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1254 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u009e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u011e\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u0130\u015e\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u011f\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u0131\u015f\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1254", "x-cp1254", "cp1254"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1255.js b/intl/uconv/tests/unit/test_encode_CP1255.js new file mode 100644 index 0000000000..6da4cd53a2 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1255.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1255 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u008a\u2039\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u009a\u203a\u009c\u009d\u009e\u009f\u00a0\u00a1\u00a2\u00a3\u20aa\u00a5\u00a6\u00a7\u00a8\u00a9\u00d7\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00f7\u00bb\u00bc\u00bd\u00be\u00bf\u05b0\u05b1\u05b2\u05b3\u05b4\u05b5\u05b6\u05b7\u05b8\u05b9\u05ba\u05bb\u05bc\u05bd\u05be\u05bf\u05c0\u05c1\u05c2\u05c3\u05f0\u05f1\u05f2\u05f3\u05f4\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\u200e\u200f"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfd\xfe"; + +const aliases = ["windows-1255", "x-cp1255", "cp1255"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1256.js b/intl/uconv/tests/unit/test_encode_CP1256.js new file mode 100644 index 0000000000..c8fbbb6192 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1256.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1256 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u067e\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0679\u2039\u0152\u0686\u0698\u0688\u06af\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u06a9\u2122\u0691\u203a\u0153\u200c\u200d\u06ba\u00a0\u060c\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u06be\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u061b\u00bb\u00bc\u00bd\u00be\u061f\u06c1\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u00d7\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u00e0\u0644\u00e2\u0645\u0646\u0647\u0648\u00e7\u00e8\u00e9\u00ea\u00eb\u0649\u064a\u00ee\u00ef\u064b\u064c\u064d\u064e\u00f4\u064f\u0650\u00f7\u0651\u00f9\u0652\u00fb\u00fc\u200e\u200f\u06d2"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1256", "x-cp1256", "cp1256"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1257.js b/intl/uconv/tests/unit/test_encode_CP1257.js new file mode 100644 index 0000000000..d56241ef88 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1257.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1257 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0083\u201e\u2026\u2020\u2021\u0088\u2030\u008a\u2039\u008c\u00a8\u02c7\u00b8\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u009a\u203a\u009c\u00af\u02db\u009f\u00a0\u00a2\u00a3\u00a4\u00a6\u00a7\u00d8\u00a9\u0156\u00ab\u00ac\u00ad\u00ae\u00c6\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00f8\u00b9\u0157\u00bb\u00bc\u00bd\u00be\u00e6\u0104\u012e\u0100\u0106\u00c4\u00c5\u0118\u0112\u010c\u00c9\u0179\u0116\u0122\u0136\u012a\u013b\u0160\u0143\u0145\u00d3\u014c\u00d5\u00d6\u00d7\u0172\u0141\u015a\u016a\u00dc\u017b\u017d\u00df\u0105\u012f\u0101\u0107\u00e4\u00e5\u0119\u0113\u010d\u00e9\u017a\u0117\u0123\u0137\u012b\u013c\u0161\u0144\u0146\u00f3\u014d\u00f5\u00f6\u00f7\u0173\u0142\u015b\u016b\u00fc\u017c\u017e\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa2\xa3\xa4\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1257", "x-cp1257", "cp1257"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1258.js b/intl/uconv/tests/unit/test_encode_CP1258.js new file mode 100644 index 0000000000..e60a2f79ba --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1258.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1258 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u008a\u2039\u0152\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u009a\u203a\u0153\u009d\u009e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u0102\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u0300\u00cd\u00ce\u00cf\u0110\u00d1\u0309\u00d3\u00d4\u01a0\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u01af\u0303\u00df\u00e0\u00e1\u00e2\u0103\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u0301\u00ed\u00ee\u00ef\u0111\u00f1\u0323\u00f3\u00f4\u01a1\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u01b0\u20ab\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1258", "x-cp1258", "cp1258"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP874.js b/intl/uconv/tests/unit/test_encode_CP874.js new file mode 100644 index 0000000000..18158ef7e1 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP874.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-874 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u0082\u0083\u0084\u2026\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb"; + +const aliases = ["windows-874", "dos-874"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_gb18030.js b/intl/uconv/tests/unit/test_encode_gb18030.js new file mode 100644 index 0000000000..c080e67535 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_gb18030.js @@ -0,0 +1,15 @@ +// Tests conversion from Unicode to gb18030 +// This is a sniff test which doesn't cover the full gbk range: the test string +// includes only the ASCII range and the first 63 double byte characters + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20AC\u4E02\u4E04\u4E05\u4E06\u4E0F\u4E12\u4E17\u4E1F\u4E20\u4E21\u4E23\u4E26\u4E29\u4E2E\u4E2F\u4E31\u4E33\u4E35\u4E37\u4E3C\u4E40\u4E41\u4E42\u4E44\u4E46\u4E4A\u4E51\u4E55\u4E57\u4E5A\u4E5B\u4E62\u4E63\u4E64\u4E65\u4E67\u4E68\u4E6A\u4E6B\u4E6C\u4E6D\u4E6E\u4E6F\u4E72\u4E74\u4E75\u4E76\u4E77\u4E78\u4E79\u4E7A\u4E7B\u4E7C\u4E7D\u4E7F\u4E80\u4E81\u4E82\u4E83\u4E84\u4E85\u4E87\u4E8A\uFFFD\uE7C6\u1E3F\u01F9\uE7C9\u1E3E\uE7C7\u1E40"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa2\xe3\x81@\x81A\x81B\x81C\x81D\x81E\x81F\x81G\x81H\x81I\x81J\x81K\x81L\x81M\x81N\x81O\x81P\x81Q\x81R\x81S\x81T\x81U\x81V\x81W\x81X\x81Y\x81Z\x81[\x81\\\x81]\x81^\x81_\x81`\x81a\x81b\x81c\x81d\x81e\x81f\x81g\x81h\x81i\x81j\x81k\x81l\x81m\x81n\x81o\x81p\x81q\x81r\x81s\x81t\x81u\x81v\x81w\x81x\x81y\x81z\x81{\x81|\x81}\x81~\x84\x31\xa4\x37\xa8\xa0\xa8\xbc\xa8\xbf\xa8\xc1\x815\xf46\x815\xf47\x815\xf48"; + +const aliases = ["gb18030"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_gbk.js b/intl/uconv/tests/unit/test_encode_gbk.js new file mode 100644 index 0000000000..50763b9dab --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_gbk.js @@ -0,0 +1,15 @@ +// Tests conversion from Unicode to gbk +// This is a sniff test which doesn't cover the full gbk range: the test string +// includes only the ASCII range and the first 63 double byte characters + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20AC\u4E02\u4E04\u4E05\u4E06\u4E0F\u4E12\u4E17\u4E1F\u4E20\u4E21\u4E23\u4E26\u4E29\u4E2E\u4E2F\u4E31\u4E33\u4E35\u4E37\u4E3C\u4E40\u4E41\u4E42\u4E44\u4E46\u4E4A\u4E51\u4E55\u4E57\u4E5A\u4E5B\u4E62\u4E63\u4E64\u4E65\u4E67\u4E68\u4E6A\u4E6B\u4E6C\u4E6D\u4E6E\u4E6F\u4E72\u4E74\u4E75\u4E76\u4E77\u4E78\u4E79\u4E7A\u4E7B\u4E7C\u4E7D\u4E7F\u4E80\u4E81\u4E82\u4E83\u4E84\u4E85\u4E87\u4E8A\uFFFD"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81@\x81A\x81B\x81C\x81D\x81E\x81F\x81G\x81H\x81I\x81J\x81K\x81L\x81M\x81N\x81O\x81P\x81Q\x81R\x81S\x81T\x81U\x81V\x81W\x81X\x81Y\x81Z\x81[\x81\\\x81]\x81^\x81_\x81`\x81a\x81b\x81c\x81d\x81e\x81f\x81g\x81h\x81i\x81j\x81k\x81l\x81m\x81n\x81o\x81p\x81q\x81r\x81s\x81t\x81u\x81v\x81w\x81x\x81y\x81z\x81{\x81|\x81}\x81~?"; + +const aliases = ["gbk", "x-gbk"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_macintosh.js b/intl/uconv/tests/unit/test_encode_macintosh.js new file mode 100644 index 0000000000..949d0aaf7c --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_macintosh.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to x-mac-roman + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca\u00ff\u0178\u2044\u20ac\u2039\u203a\ufb01\ufb02\u2021\u00b7\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["csMacintosh", "mac", "macintosh", "x-mac-roman"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_x_mac_cyrillic.js b/intl/uconv/tests/unit/test_encode_x_mac_cyrillic.js new file mode 100644 index 0000000000..ac27c57cc4 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_x_mac_cyrillic.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to x-mac-cyrillic + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u2020\u00b0\u0490\u00a3\u00a7\u2022\u00b6\u0406\u00ae\u00a9\u2122\u0402\u0452\u2260\u0403\u0453\u221e\u00b1\u2264\u2265\u0456\u00b5\u0491\u0408\u0404\u0454\u0407\u0457\u0409\u0459\u040a\u045a\u0458\u0405\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u040b\u045b\u040c\u045c\u0455\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u201e\u040e\u045e\u040f\u045f\u2116\u0401\u0451\u044f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u20ac"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["x-mac-cyrillic", "x-mac-ukrainian"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_input_stream.js b/intl/uconv/tests/unit/test_input_stream.js new file mode 100644 index 0000000000..6d62fcccf3 --- /dev/null +++ b/intl/uconv/tests/unit/test_input_stream.js @@ -0,0 +1,38 @@ +var CC = Components.Constructor; +var converter = Cc[ + "@mozilla.org/intl/scriptableunicodeconverter" +].createInstance(Ci.nsIScriptableUnicodeConverter); +converter.charset = "UTF-8"; + +var SIS = CC( + "@mozilla.org/scriptableinputstream;1", + "nsIScriptableInputStream", + "init" +); + +function test_char(code) { + dump("test_char(0x" + code.toString(16) + ")\n"); + var original = String.fromCharCode(code); + var nativeStream = converter.convertToInputStream(original); + var stream = new SIS(nativeStream); + var utf8Result = stream.read(stream.available()); + stream.close(); + var result = converter.ConvertToUnicode(utf8Result); + Assert.equal(escape(original), escape(result)); +} + +function run_test() { + // This is not a very comprehensive test. + for (var i = 0x007f - 2; i <= 0x007f; i++) { + test_char(i); + } + for (i = 0x07ff - 2; i <= 0x07ff; i++) { + test_char(i); + } + for (i = 0x1000 - 2; i <= 0x1000 + 2; i++) { + test_char(i); + } + for (i = 0xe000; i <= 0xe000 + 2; i++) { + test_char(i); + } +} diff --git a/intl/uconv/tests/unit/test_unEscapeNonAsciiURI.js b/intl/uconv/tests/unit/test_unEscapeNonAsciiURI.js new file mode 100644 index 0000000000..f447959244 --- /dev/null +++ b/intl/uconv/tests/unit/test_unEscapeNonAsciiURI.js @@ -0,0 +1,58 @@ +// Tests for nsITextToSubURI.unEscapeNonAsciiURI +function run_test() { + // Tests whether nsTextToSubURI does UTF-16 unescaping (it shouldn't) + const testURI = "data:text/html,%FE%FF"; + Assert.equal( + Services.textToSubURI.unEscapeNonAsciiURI("UTF-16", testURI), + testURI + ); + + // Tests whether incomplete multibyte sequences throw. + const tests = [ + { + input: "http://example.com/?p=%E9", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?p=%E9%80", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?p=%E9%80%80", + expected: "http://example.com/?p=\u9000", + }, + { + input: "http://example.com/?p=%E9e", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?p=%E9%E9", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?name=M%FCller/", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?name=M%C3%BCller/", + expected: "http://example.com/?name=Müller/", + }, + ]; + + for (const t of tests) { + if (t.throws !== undefined) { + let thrown = undefined; + try { + Services.textToSubURI.unEscapeNonAsciiURI("UTF-8", t.input); + } catch (e) { + thrown = e.result; + } + Assert.equal(thrown, t.throws); + } else { + Assert.equal( + Services.textToSubURI.unEscapeNonAsciiURI("UTF-8", t.input), + t.expected + ); + } + } +} diff --git a/intl/uconv/tests/unit/test_unmapped.js b/intl/uconv/tests/unit/test_unmapped.js new file mode 100644 index 0000000000..13ee13c20e --- /dev/null +++ b/intl/uconv/tests/unit/test_unmapped.js @@ -0,0 +1,86 @@ +// Tests encoding of unmapped characters +const inString = "\uE5E5"; +const expectedString = "?"; + +function run_test() { + var failures = false; + var encodingConverter = CreateScriptableConverter(); + + // this list excludes codepages that can represent all Unicode + var encoders = [ + "Big5", + "EUC-JP", + "EUC-KR", + "GBK", + "gb18030", + "IBM866", + "ISO-2022-JP", + "ISO-8859-3", + "ISO-8859-4", + "ISO-8859-5", + "ISO-8859-6", + "ISO-8859-7", + "ISO-8859-8", + "ISO-8859-8-I", + "ISO-8859-10", + "ISO-8859-13", + "ISO-8859-14", + "ISO-8859-15", + "ISO-8859-16", + "ISO-8859-2", + "KOI8-R", + "KOI8-U", + "Shift_JIS", + "windows-1250", + "windows-1251", + "windows-1252", + "windows-1253", + "windows-1254", + "windows-1255", + "windows-1256", + "windows-1257", + "windows-1258", + "windows-874", + "x-mac-cyrillic", + ]; + + var counter = 0; + while (counter < encoders.length) { + var charset = encoders[counter++]; + + dump("testing " + counter + " " + charset + "\n"); + encodingConverter.charset = charset; + var codepageString = + encodingConverter.ConvertFromUnicode(inString) + + encodingConverter.Finish(); + if (codepageString != expectedString) { + dump(charset + " encoding failed\n"); + for (var i = 0; i < expectedString.length; ++i) { + if (i >= codepageString.length) { + dump( + "output length " + + codepageString.length + + " less than expected length " + + expectedString.length + + "\n" + ); + break; + } + if (codepageString.charAt(i) != expectedString.charAt(i)) { + dump( + i.toString(16) + + ": 0x" + + codepageString.charCodeAt(i).toString(16) + + " != " + + expectedString.charCodeAt(i).toString(16) + + "\n" + ); + } + } + failures = true; + } + } + if (failures) { + do_throw("test failed\n"); + } +} diff --git a/intl/uconv/tests/unit/test_utf8_illegals.js b/intl/uconv/tests/unit/test_utf8_illegals.js new file mode 100644 index 0000000000..8712b4c3e8 --- /dev/null +++ b/intl/uconv/tests/unit/test_utf8_illegals.js @@ -0,0 +1,162 @@ +// Tests illegal UTF-8 sequences + +var Cc = Components.Constructor; + +const { NetUtil } = ChromeUtils.import("resource://gre/modules/NetUtil.jsm"); + +const tests = [ + { + inStrings: [ + "%80", // Illegal or incomplete sequences + "%8f", + "%90", + "%9f", + "%a0", + "%bf", + "%c0", + "%c1", + "%c2", + "%df", + "%e0", + "%e0%a0", + "%e0%bf", + "%ed%80", + "%ed%9f", + "%ef", + "%ef%bf", + "%f0", + "%f0%90", + "%f0%90%80", + "%f0%90%bf", + "%f0%bf", + "%f0%bf%80", + "%f0%bf%bf", + "%f4", + "%f4%80", + "%f4%80%80", + "%f4%80%bf", + "%f4%8f", + "%f4%8f%80", + "%f4%8f%bf", + "%f5", + "%f7", + "%f8", + "%fb", + "%fc", + "%fd", + ], + expected: "ABC\ufffdXYZ", + }, + + { + inStrings: [ + "%c0%af", // Illegal bytes in 2-octet + "%c1%af", + ], // sequences + expected: "ABC\ufffd\ufffdXYZ", + }, + + { + inStrings: [ + "%e0%80%80", // Illegal bytes in 3-octet + "%e0%80%af", // sequences + "%e0%9f%bf", + // long surrogates + "%ed%a0%80", // D800 + "%ed%ad%bf", // DB7F + "%ed%ae%80", // DB80 + "%ed%af%bf", // DBFF + "%ed%b0%80", // DC00 + "%ed%be%80", // DF80 + "%ed%bf%bf", + ], // DFFF + expected: "ABC\ufffd\ufffd\ufffdXYZ", + }, + + { + inStrings: [ + "%f0%80%80%80", // Illegal bytes in 4-octet + "%f0%80%80%af", // sequences + "%f0%8f%bf%bf", + "%f4%90%80%80", + "%f4%bf%bf%bf", + "%f5%80%80%80", + "%f7%bf%bf%bf", + ], + expected: "ABC\ufffd\ufffd\ufffd\ufffdXYZ", + }, + + { + inStrings: [ + "%f8%80%80%80%80", // Illegal bytes in 5-octet + "%f8%80%80%80%af", // sequences + "%fb%bf%bf%bf%bf", + ], + expected: "ABC\ufffd\ufffd\ufffd\ufffd\ufffdXYZ", + }, + + // Surrogate pairs + { + inStrings: [ + "%ed%a0%80%ed%b0%80", // D800 DC00 + "%ed%a0%80%ed%bf%bf", // D800 DFFF + "%ed%ad%bf%ed%b0%80", // DB7F DC00 + "%ed%ad%bf%ed%bf%bf", // DB7F DFFF + "%ed%ae%80%ed%b0%80", // DB80 DC00 + "%ed%ae%80%ed%bf%bf", // DB80 DFFF + "%ed%af%bf%ed%b0%80", // DBFF DC00 + "%ed%ad%bf%ed%bf%bf", // DBFF DFFF + "%fc%80%80%80%80%80", // Illegal bytes in 6-octet + "%fc%80%80%80%80%af", // sequences + "%fd%bf%bf%bf%bf%bf", + ], + expected: "ABC\ufffd\ufffd\ufffd\ufffd\ufffd\ufffdXYZ", + }, +]; + +function testCaseInputStream(inStr, expected) { + var dataURI = "data:text/plain; charset=UTF-8,ABC" + inStr + "XYZ"; + dump(inStr + "==>"); + + var ConverterInputStream = Cc( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" + ); + var channel = NetUtil.newChannel({ + uri: dataURI, + loadUsingSystemPrincipal: true, + }); + var testInputStream = channel.open(); + var testConverter = new ConverterInputStream( + testInputStream, + "UTF-8", + 16, + 0xfffd + ); + + if (!(testConverter instanceof Ci.nsIUnicharLineInputStream)) { + throw new Error("not line input stream"); + } + + var outStr = ""; + var more; + do { + // read the line and check for eof + var line = {}; + more = testConverter.readLine(line); + outStr += line.value; + } while (more); + + dump(outStr + "; expected=" + expected + "\n"); + Assert.equal(outStr, expected); + Assert.equal(outStr.length, expected.length); +} + +function run_test() { + for (var t of tests) { + for (var inStr of t.inStrings) { + testCaseInputStream(inStr, t.expected); + } + } +} diff --git a/intl/uconv/tests/unit/xpcshell.ini b/intl/uconv/tests/unit/xpcshell.ini new file mode 100644 index 0000000000..a3fd57badd --- /dev/null +++ b/intl/uconv/tests/unit/xpcshell.ini @@ -0,0 +1,90 @@ +[DEFAULT] +head = head_charsetConversionTests.js +support-files = + data/unicode-conversion.utf16.txt + data/unicode-conversion.utf16be.txt + data/unicode-conversion.utf16le.txt + data/unicode-conversion.utf8.txt + +[test_bug116882.js] +[test_bug317216.js] +[test_bug321379.js] +[test_bug340714.js] +[test_bug381412.Big5-HKSCS.js] +[test_bug381412.Big5.js] +[test_bug381412.euc-kr.js] +[test_bug381412.euc_jp.js] +[test_bug381412.gb2312.js] +[test_bug381412.js] +[test_bug396637.js] +[test_bug399257.js] +[test_bug457886.js] +[test_bug522931.js] +[test_bug563283.js] +[test_bug563618.js] +[test_bug601429.js] +[test_bug715319.euc_jp.js] +[test_bug715319.gb2312.js] +[test_bug715319.dbcs.js] +[test_charset_conversion.js] +[test_decode_8859-1.js] +[test_decode_8859-10.js] +[test_decode_8859-11.js] +[test_decode_8859-13.js] +[test_decode_8859-14.js] +[test_decode_8859-15.js] +[test_decode_8859-2.js] +[test_decode_8859-3.js] +[test_decode_8859-4.js] +[test_decode_8859-5.js] +[test_decode_8859-6.js] +[test_decode_8859-7.js] +[test_decode_8859-8.js] +[test_decode_8859-9.js] +[test_decode_CP1250.js] +[test_decode_CP1251.js] +[test_decode_CP1252.js] +[test_decode_CP1253.js] +[test_decode_CP1254.js] +[test_decode_CP1255.js] +[test_decode_CP1256.js] +[test_decode_CP1257.js] +[test_decode_CP1258.js] +[test_decode_CP874.js] +[test_decode_gb18030.js] +[test_decode_gbk.js] +[test_decode_x_mac_cyrillic.js] +[test_decode_macintosh.js] +[test_decode_x_mac_ukrainian.js] +[test_encode_8859-1.js] +[test_encode_8859-10.js] +[test_encode_8859-11.js] +[test_encode_8859-13.js] +[test_encode_8859-14.js] +[test_encode_8859-15.js] +[test_encode_8859-2.js] +[test_encode_8859-3.js] +[test_encode_8859-4.js] +[test_encode_8859-5.js] +[test_encode_8859-6.js] +[test_encode_8859-7.js] +[test_encode_8859-8.js] +[test_encode_8859-9.js] +[test_encode_CP1250.js] +[test_encode_CP1251.js] +[test_encode_CP1252.js] +[test_encode_CP1253.js] +[test_encode_CP1254.js] +[test_encode_CP1255.js] +[test_encode_CP1256.js] +[test_encode_CP1257.js] +[test_encode_CP1258.js] +[test_encode_CP874.js] +[test_encode_gb18030.js] +[test_encode_gbk.js] +[test_encode_x_mac_cyrillic.js] +[test_encode_macintosh.js] +[test_input_stream.js] +[test_unEscapeNonAsciiURI.js] +[test_unmapped.js] +[test_utf8_illegals.js] -- cgit v1.2.3