From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- intl/uconv/components.conf | 34 ++ intl/uconv/crashtests/563618.html | 12 + intl/uconv/crashtests/crashtests.list | 2 + intl/uconv/crashtests/omt-non-utf-8-jsurl.html | 14 + intl/uconv/directory.txt | 32 ++ intl/uconv/moz.build | 32 ++ intl/uconv/nsConverterInputStream.cpp | 256 ++++++++++++++ intl/uconv/nsConverterInputStream.h | 64 ++++ intl/uconv/nsConverterOutputStream.cpp | 115 +++++++ intl/uconv/nsConverterOutputStream.h | 39 +++ intl/uconv/nsIScriptableUConv.idl | 64 ++++ intl/uconv/nsITextToSubURI.idl | 60 ++++ intl/uconv/nsScriptableUConv.cpp | 192 +++++++++++ intl/uconv/nsScriptableUConv.h | 34 ++ intl/uconv/nsTextToSubURI.cpp | 178 ++++++++++ intl/uconv/nsTextToSubURI.h | 36 ++ intl/uconv/tests/gtest/TestShortRead.cpp | 109 ++++++ intl/uconv/tests/gtest/moz.build | 11 + intl/uconv/tests/mochitest.toml | 25 ++ intl/uconv/tests/moz.build | 13 + intl/uconv/tests/stressgb.pl | 23 ++ intl/uconv/tests/test_big5_encoder.html | 43 +++ intl/uconv/tests/test_bug335816.html | 40 +++ intl/uconv/tests/test_bug843434.html | 27 ++ intl/uconv/tests/test_bug959058-1.html | 28 ++ intl/uconv/tests/test_bug959058-2.html | 28 ++ intl/uconv/tests/test_long_doc.html | 98 ++++++ intl/uconv/tests/test_ncr_fallback.html | 74 ++++ .../tests/test_singlebyte_overconsumption.html | 33 ++ .../tests/test_unicode_noncharacterescapes.html | 303 +++++++++++++++++ .../tests/test_unicode_noncharacters_gb18030.html | 305 +++++++++++++++++ .../tests/test_unicode_noncharacters_utf8.html | 303 +++++++++++++++++ intl/uconv/tests/test_utf8_overconsumption.html | 39 +++ .../tests/unit/data/unicode-conversion.utf16.txt | Bin 0 -> 2814 bytes .../tests/unit/data/unicode-conversion.utf16be.txt | Bin 0 -> 2812 bytes .../tests/unit/data/unicode-conversion.utf16le.txt | Bin 0 -> 2812 bytes .../tests/unit/data/unicode-conversion.utf8.txt | 43 +++ .../tests/unit/head_charsetConversionTests.js | 110 ++++++ intl/uconv/tests/unit/test_bug116882.js | 11 + intl/uconv/tests/unit/test_bug317216.js | 165 +++++++++ intl/uconv/tests/unit/test_bug321379.js | 35 ++ intl/uconv/tests/unit/test_bug340714.js | 123 +++++++ intl/uconv/tests/unit/test_bug381412.Big5-HKSCS.js | 64 ++++ intl/uconv/tests/unit/test_bug381412.Big5.js | 64 ++++ intl/uconv/tests/unit/test_bug381412.euc-kr.js | 64 ++++ intl/uconv/tests/unit/test_bug381412.euc_jp.js | 92 +++++ intl/uconv/tests/unit/test_bug381412.gb2312.js | 60 ++++ intl/uconv/tests/unit/test_bug381412.js | 60 ++++ intl/uconv/tests/unit/test_bug396637.js | 9 + intl/uconv/tests/unit/test_bug399257.js | 80 +++++ intl/uconv/tests/unit/test_bug457886.js | 12 + intl/uconv/tests/unit/test_bug522931.js | 4 + intl/uconv/tests/unit/test_bug563283.js | 53 +++ intl/uconv/tests/unit/test_bug563618.js | 99 ++++++ intl/uconv/tests/unit/test_bug601429.js | 84 +++++ intl/uconv/tests/unit/test_bug715319.dbcs.js | 56 ++++ intl/uconv/tests/unit/test_bug715319.euc_jp.js | 77 +++++ intl/uconv/tests/unit/test_bug715319.gb2312.js | 87 +++++ intl/uconv/tests/unit/test_charset_conversion.js | 373 +++++++++++++++++++++ intl/uconv/tests/unit/test_decode_8859-1.js | 26 ++ intl/uconv/tests/unit/test_decode_8859-10.js | 22 ++ intl/uconv/tests/unit/test_decode_8859-11.js | 13 + intl/uconv/tests/unit/test_decode_8859-13.js | 13 + intl/uconv/tests/unit/test_decode_8859-14.js | 13 + intl/uconv/tests/unit/test_decode_8859-15.js | 21 ++ intl/uconv/tests/unit/test_decode_8859-2.js | 24 ++ intl/uconv/tests/unit/test_decode_8859-3.js | 24 ++ intl/uconv/tests/unit/test_decode_8859-4.js | 24 ++ intl/uconv/tests/unit/test_decode_8859-5.js | 22 ++ intl/uconv/tests/unit/test_decode_8859-6.js | 25 ++ intl/uconv/tests/unit/test_decode_8859-7.js | 27 ++ intl/uconv/tests/unit/test_decode_8859-8.js | 24 ++ intl/uconv/tests/unit/test_decode_8859-9.js | 24 ++ intl/uconv/tests/unit/test_decode_CP1250.js | 13 + intl/uconv/tests/unit/test_decode_CP1251.js | 13 + intl/uconv/tests/unit/test_decode_CP1252.js | 13 + intl/uconv/tests/unit/test_decode_CP1253.js | 13 + intl/uconv/tests/unit/test_decode_CP1254.js | 13 + intl/uconv/tests/unit/test_decode_CP1255.js | 13 + intl/uconv/tests/unit/test_decode_CP1256.js | 13 + intl/uconv/tests/unit/test_decode_CP1257.js | 13 + intl/uconv/tests/unit/test_decode_CP1258.js | 13 + intl/uconv/tests/unit/test_decode_CP874.js | 13 + intl/uconv/tests/unit/test_decode_gb18030.js | 16 + intl/uconv/tests/unit/test_decode_gbk.js | 15 + intl/uconv/tests/unit/test_decode_macintosh.js | 13 + .../uconv/tests/unit/test_decode_x_mac_cyrillic.js | 13 + .../tests/unit/test_decode_x_mac_ukrainian.js | 13 + intl/uconv/tests/unit/test_encode_8859-1.js | 26 ++ intl/uconv/tests/unit/test_encode_8859-10.js | 22 ++ intl/uconv/tests/unit/test_encode_8859-11.js | 13 + intl/uconv/tests/unit/test_encode_8859-13.js | 13 + intl/uconv/tests/unit/test_encode_8859-14.js | 13 + intl/uconv/tests/unit/test_encode_8859-15.js | 21 ++ intl/uconv/tests/unit/test_encode_8859-2.js | 24 ++ intl/uconv/tests/unit/test_encode_8859-3.js | 24 ++ intl/uconv/tests/unit/test_encode_8859-4.js | 24 ++ intl/uconv/tests/unit/test_encode_8859-5.js | 22 ++ intl/uconv/tests/unit/test_encode_8859-6.js | 25 ++ intl/uconv/tests/unit/test_encode_8859-7.js | 27 ++ intl/uconv/tests/unit/test_encode_8859-8.js | 24 ++ intl/uconv/tests/unit/test_encode_8859-9.js | 24 ++ intl/uconv/tests/unit/test_encode_CP1250.js | 13 + intl/uconv/tests/unit/test_encode_CP1251.js | 13 + intl/uconv/tests/unit/test_encode_CP1252.js | 13 + intl/uconv/tests/unit/test_encode_CP1253.js | 13 + intl/uconv/tests/unit/test_encode_CP1254.js | 13 + intl/uconv/tests/unit/test_encode_CP1255.js | 13 + intl/uconv/tests/unit/test_encode_CP1256.js | 13 + intl/uconv/tests/unit/test_encode_CP1257.js | 13 + intl/uconv/tests/unit/test_encode_CP1258.js | 13 + intl/uconv/tests/unit/test_encode_CP874.js | 13 + intl/uconv/tests/unit/test_encode_gb18030.js | 15 + intl/uconv/tests/unit/test_encode_gbk.js | 15 + intl/uconv/tests/unit/test_encode_macintosh.js | 13 + .../uconv/tests/unit/test_encode_x_mac_cyrillic.js | 13 + intl/uconv/tests/unit/test_input_stream.js | 41 +++ intl/uconv/tests/unit/test_unEscapeNonAsciiURI.js | 58 ++++ intl/uconv/tests/unit/test_unEscapeURIForUI.js | 23 ++ intl/uconv/tests/unit/test_unmapped.js | 86 +++++ intl/uconv/tests/unit/test_utf8_illegals.js | 164 +++++++++ intl/uconv/tests/unit/xpcshell.toml | 175 ++++++++++ 122 files changed, 6028 insertions(+) create mode 100644 intl/uconv/components.conf create mode 100644 intl/uconv/crashtests/563618.html create mode 100644 intl/uconv/crashtests/crashtests.list create mode 100644 intl/uconv/crashtests/omt-non-utf-8-jsurl.html create mode 100644 intl/uconv/directory.txt create mode 100644 intl/uconv/moz.build create mode 100644 intl/uconv/nsConverterInputStream.cpp create mode 100644 intl/uconv/nsConverterInputStream.h create mode 100644 intl/uconv/nsConverterOutputStream.cpp create mode 100644 intl/uconv/nsConverterOutputStream.h create mode 100644 intl/uconv/nsIScriptableUConv.idl create mode 100644 intl/uconv/nsITextToSubURI.idl create mode 100644 intl/uconv/nsScriptableUConv.cpp create mode 100644 intl/uconv/nsScriptableUConv.h create mode 100644 intl/uconv/nsTextToSubURI.cpp create mode 100644 intl/uconv/nsTextToSubURI.h create mode 100644 intl/uconv/tests/gtest/TestShortRead.cpp create mode 100644 intl/uconv/tests/gtest/moz.build create mode 100644 intl/uconv/tests/mochitest.toml create mode 100644 intl/uconv/tests/moz.build create mode 100644 intl/uconv/tests/stressgb.pl create mode 100644 intl/uconv/tests/test_big5_encoder.html create mode 100644 intl/uconv/tests/test_bug335816.html create mode 100644 intl/uconv/tests/test_bug843434.html create mode 100644 intl/uconv/tests/test_bug959058-1.html create mode 100644 intl/uconv/tests/test_bug959058-2.html create mode 100644 intl/uconv/tests/test_long_doc.html create mode 100644 intl/uconv/tests/test_ncr_fallback.html create mode 100644 intl/uconv/tests/test_singlebyte_overconsumption.html create mode 100644 intl/uconv/tests/test_unicode_noncharacterescapes.html create mode 100644 intl/uconv/tests/test_unicode_noncharacters_gb18030.html create mode 100644 intl/uconv/tests/test_unicode_noncharacters_utf8.html create mode 100644 intl/uconv/tests/test_utf8_overconsumption.html create mode 100644 intl/uconv/tests/unit/data/unicode-conversion.utf16.txt create mode 100644 intl/uconv/tests/unit/data/unicode-conversion.utf16be.txt create mode 100644 intl/uconv/tests/unit/data/unicode-conversion.utf16le.txt create mode 100644 intl/uconv/tests/unit/data/unicode-conversion.utf8.txt create mode 100644 intl/uconv/tests/unit/head_charsetConversionTests.js create mode 100644 intl/uconv/tests/unit/test_bug116882.js create mode 100644 intl/uconv/tests/unit/test_bug317216.js create mode 100644 intl/uconv/tests/unit/test_bug321379.js create mode 100644 intl/uconv/tests/unit/test_bug340714.js create mode 100644 intl/uconv/tests/unit/test_bug381412.Big5-HKSCS.js create mode 100644 intl/uconv/tests/unit/test_bug381412.Big5.js create mode 100644 intl/uconv/tests/unit/test_bug381412.euc-kr.js create mode 100644 intl/uconv/tests/unit/test_bug381412.euc_jp.js create mode 100644 intl/uconv/tests/unit/test_bug381412.gb2312.js create mode 100644 intl/uconv/tests/unit/test_bug381412.js create mode 100644 intl/uconv/tests/unit/test_bug396637.js create mode 100644 intl/uconv/tests/unit/test_bug399257.js create mode 100644 intl/uconv/tests/unit/test_bug457886.js create mode 100644 intl/uconv/tests/unit/test_bug522931.js create mode 100644 intl/uconv/tests/unit/test_bug563283.js create mode 100644 intl/uconv/tests/unit/test_bug563618.js create mode 100644 intl/uconv/tests/unit/test_bug601429.js create mode 100644 intl/uconv/tests/unit/test_bug715319.dbcs.js create mode 100644 intl/uconv/tests/unit/test_bug715319.euc_jp.js create mode 100644 intl/uconv/tests/unit/test_bug715319.gb2312.js create mode 100644 intl/uconv/tests/unit/test_charset_conversion.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-1.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-10.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-11.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-13.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-14.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-15.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-2.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-3.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-4.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-5.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-6.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-7.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-8.js create mode 100644 intl/uconv/tests/unit/test_decode_8859-9.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1250.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1251.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1252.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1253.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1254.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1255.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1256.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1257.js create mode 100644 intl/uconv/tests/unit/test_decode_CP1258.js create mode 100644 intl/uconv/tests/unit/test_decode_CP874.js create mode 100644 intl/uconv/tests/unit/test_decode_gb18030.js create mode 100644 intl/uconv/tests/unit/test_decode_gbk.js create mode 100644 intl/uconv/tests/unit/test_decode_macintosh.js create mode 100644 intl/uconv/tests/unit/test_decode_x_mac_cyrillic.js create mode 100644 intl/uconv/tests/unit/test_decode_x_mac_ukrainian.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-1.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-10.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-11.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-13.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-14.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-15.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-2.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-3.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-4.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-5.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-6.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-7.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-8.js create mode 100644 intl/uconv/tests/unit/test_encode_8859-9.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1250.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1251.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1252.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1253.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1254.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1255.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1256.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1257.js create mode 100644 intl/uconv/tests/unit/test_encode_CP1258.js create mode 100644 intl/uconv/tests/unit/test_encode_CP874.js create mode 100644 intl/uconv/tests/unit/test_encode_gb18030.js create mode 100644 intl/uconv/tests/unit/test_encode_gbk.js create mode 100644 intl/uconv/tests/unit/test_encode_macintosh.js create mode 100644 intl/uconv/tests/unit/test_encode_x_mac_cyrillic.js create mode 100644 intl/uconv/tests/unit/test_input_stream.js create mode 100644 intl/uconv/tests/unit/test_unEscapeNonAsciiURI.js create mode 100644 intl/uconv/tests/unit/test_unEscapeURIForUI.js create mode 100644 intl/uconv/tests/unit/test_unmapped.js create mode 100644 intl/uconv/tests/unit/test_utf8_illegals.js create mode 100644 intl/uconv/tests/unit/xpcshell.toml (limited to 'intl/uconv') diff --git a/intl/uconv/components.conf b/intl/uconv/components.conf new file mode 100644 index 0000000000..00686f661a --- /dev/null +++ b/intl/uconv/components.conf @@ -0,0 +1,34 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + 'cid': '{2bc2ad62-ad5d-4b7b-a9db-f74ae203c527}', + 'contract_ids': ['@mozilla.org/intl/converter-input-stream;1'], + 'type': 'nsConverterInputStream', + 'headers': ['nsConverterInputStream.h'], + }, + { + 'cid': '{ff8780a5-bbb1-4bc5-8ee7-057e7bc5c925}', + 'contract_ids': ['@mozilla.org/intl/converter-output-stream;1'], + 'type': 'nsConverterOutputStream', + 'headers': ['/intl/uconv/nsConverterOutputStream.h'], + }, + { + 'cid': '{0a698c44-3bff-11d4-9649-00c0ca135b4e}', + 'contract_ids': ['@mozilla.org/intl/scriptableunicodeconverter'], + 'type': 'nsScriptableUnicodeConverter', + 'headers': ['/intl/uconv/nsScriptableUConv.h'], + }, + { + 'js_name': 'textToSubURI', + 'cid': '{8b042e22-6f87-11d3-b3c8-00805f8a6670}', + 'contract_ids': ['@mozilla.org/intl/texttosuburi;1'], + 'interfaces': ['nsITextToSubURI'], + 'type': 'nsTextToSubURI', + 'headers': ['/intl/uconv/nsTextToSubURI.h'], + }, +] diff --git a/intl/uconv/crashtests/563618.html b/intl/uconv/crashtests/563618.html new file mode 100644 index 0000000000..e36b664762 --- /dev/null +++ b/intl/uconv/crashtests/563618.html @@ -0,0 +1,12 @@ + + + + + Serbian Glyph Test + + + +

Ž

+ + diff --git a/intl/uconv/crashtests/crashtests.list b/intl/uconv/crashtests/crashtests.list new file mode 100644 index 0000000000..6c54a699c1 --- /dev/null +++ b/intl/uconv/crashtests/crashtests.list @@ -0,0 +1,2 @@ +load 563618.html +load omt-non-utf-8-jsurl.html diff --git a/intl/uconv/crashtests/omt-non-utf-8-jsurl.html b/intl/uconv/crashtests/omt-non-utf-8-jsurl.html new file mode 100644 index 0000000000..033e38a280 --- /dev/null +++ b/intl/uconv/crashtests/omt-non-utf-8-jsurl.html @@ -0,0 +1,14 @@ + + + + + Test for off the main thread non-UTF-8 javascript: URL + + + + + diff --git a/intl/uconv/directory.txt b/intl/uconv/directory.txt new file mode 100644 index 0000000000..2b6be7af7f --- /dev/null +++ b/intl/uconv/directory.txt @@ -0,0 +1,32 @@ +Directory Structure : +================================ + +idl - public .idl files +public - public header file +src - source directory of charset converter manager and utilities, and + charset converters for ISO-8859-1, CP1252, MacRoman and UTF-8 +tests - tests program and application for charset converter +tests/unit - xpcshell tests +tools - tools to build the tables used by the converters +util - utility functions used by the converters + +The following directories contain different charset converters: + +ucvcn - Simplified Chinese charsets - GB2312, HZ, ISO-2022-CN, GBK, GB18030 +ucvja - Japanese charsets - Shift-JIS, ISO-2022-JP, EUC-JP +ucvko - Korean charsets - ISO-2022-KR, EUC-KR, Johab +ucvlatin - Latin charsets and others - ISO-8859-x, CP1250-1258 + CP866, 874, KOI8, + Mac charsets, TIS620, UTF16 +ucvtw - Traditional Chinese charsets Set 1 - Big5 +ucvtw2 - Traditional Chinese charsets Set 2 - EUC-TW + +Within the directories containing charset converters: + +*.ut - tables used to convert to Unicode from a charset +*.uf - tables used to convert to a charset from Unicode + +The following directories are obsolete and should not be used: + +ucvth +ucvvt diff --git a/intl/uconv/moz.build b/intl/uconv/moz.build new file mode 100644 index 0000000000..f21e4055f9 --- /dev/null +++ b/intl/uconv/moz.build @@ -0,0 +1,32 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +TEST_DIRS += ["tests"] + +XPIDL_SOURCES += [ + "nsIScriptableUConv.idl", + "nsITextToSubURI.idl", +] + +XPIDL_MODULE = "uconv" + +EXPORTS += [ + "nsConverterInputStream.h", + "nsTextToSubURI.h", +] + +UNIFIED_SOURCES += [ + "nsConverterInputStream.cpp", + "nsConverterOutputStream.cpp", + "nsScriptableUConv.cpp", + "nsTextToSubURI.cpp", +] + +XPCOM_MANIFESTS += [ + "components.conf", +] + +FINAL_LIBRARY = "xul" diff --git a/intl/uconv/nsConverterInputStream.cpp b/intl/uconv/nsConverterInputStream.cpp new file mode 100644 index 0000000000..e3efdbc146 --- /dev/null +++ b/intl/uconv/nsConverterInputStream.cpp @@ -0,0 +1,256 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsConverterInputStream.h" +#include "nsIInputStream.h" +#include "nsReadLine.h" +#include "nsStreamUtils.h" + +#include +#include + +using namespace mozilla; + +#define CONVERTER_BUFFER_SIZE 8192 + +NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream, + nsIUnicharInputStream, nsIUnicharLineInputStream) + +NS_IMETHODIMP +nsConverterInputStream::Init(nsIInputStream* aStream, const char* aCharset, + int32_t aBufferSize, char16_t aReplacementChar) { + nsAutoCString label; + if (!aCharset) { + label.AssignLiteral("UTF-8"); + } else { + label = aCharset; + } + + auto encoding = Encoding::ForLabelNoReplacement(label); + if (!encoding) { + return NS_ERROR_UCONV_NOCONV; + } + // Previously, the implementation auto-switched only + // between the two UTF-16 variants and only when + // initialized with an endianness-unspecific label. + mConverter = encoding->NewDecoder(); + + size_t outputBufferSize; + if (aBufferSize <= 0) { + aBufferSize = CONVERTER_BUFFER_SIZE; + outputBufferSize = CONVERTER_BUFFER_SIZE; + } else { + // NetUtil.jsm assumes that if buffer size equals + // the input size, the whole stream will be processed + // as one readString. This is not true with encoding_rs, + // because encoding_rs might want to see space for a + // surrogate pair, so let's compute a larger output + // buffer length. + CheckedInt needed = mConverter->MaxUTF16BufferLength(aBufferSize); + if (!needed.isValid()) { + return NS_ERROR_OUT_OF_MEMORY; + } + outputBufferSize = needed.value(); + } + + // set up our buffers. + if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) || + !mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + mInput = aStream; + mErrorsAreFatal = !aReplacementChar; + return NS_OK; +} + +NS_IMETHODIMP +nsConverterInputStream::Close() { + nsresult rv = mInput ? mInput->Close() : NS_OK; + mLineBuffer = nullptr; + mInput = nullptr; + mConverter = nullptr; + mByteData.Clear(); + mUnicharData.Clear(); + return rv; +} + +NS_IMETHODIMP +nsConverterInputStream::Read(char16_t* aBuf, uint32_t aCount, + uint32_t* aReadCount) { + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; + if (0 == readCount) { + // Fill the unichar buffer + readCount = Fill(&mLastErrorCode); + if (readCount == 0) { + *aReadCount = 0; + return mLastErrorCode; + } + } + if (readCount > aCount) { + readCount = aCount; + } + memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset, + readCount * sizeof(char16_t)); + mUnicharDataOffset += readCount; + *aReadCount = readCount; + return NS_OK; +} + +NS_IMETHODIMP +nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, + void* aClosure, uint32_t aCount, + uint32_t* aReadCount) { + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t codeUnitsToWrite = mUnicharDataLength - mUnicharDataOffset; + if (0 == codeUnitsToWrite) { + // Fill the unichar buffer + codeUnitsToWrite = Fill(&mLastErrorCode); + if (codeUnitsToWrite == 0) { + *aReadCount = 0; + return mLastErrorCode; + } + } + + if (codeUnitsToWrite > aCount) { + codeUnitsToWrite = aCount; + } + + uint32_t codeUnitsWritten; + uint32_t totalCodeUnitsWritten = 0; + + while (codeUnitsToWrite) { + nsresult rv = + aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset, + totalCodeUnitsWritten, codeUnitsToWrite, &codeUnitsWritten); + if (NS_FAILED(rv)) { + // don't propagate errors to the caller + break; + } + + codeUnitsToWrite -= codeUnitsWritten; + totalCodeUnitsWritten += codeUnitsWritten; + mUnicharDataOffset += codeUnitsWritten; + } + + *aReadCount = totalCodeUnitsWritten; + + return NS_OK; +} + +NS_IMETHODIMP +nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString, + uint32_t* aReadCount) { + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); + uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; + if (0 == readCount) { + // Fill the unichar buffer + readCount = Fill(&mLastErrorCode); + if (readCount == 0) { + *aReadCount = 0; + return mLastErrorCode; + } + } + if (readCount > aCount) { + readCount = aCount; + } + const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset; + aString.Assign(buf, readCount); + mUnicharDataOffset += readCount; + *aReadCount = readCount; + return NS_OK; +} + +uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) { + if (!mInput) { + // We already closed the stream! + *aErrorCode = NS_BASE_STREAM_CLOSED; + return 0; + } + + if (NS_FAILED(mLastErrorCode)) { + // We failed to completely convert last time, and error-recovery + // is disabled. We will fare no better this time, so... + *aErrorCode = mLastErrorCode; + return 0; + } + + // mUnicharData.Length() is the buffer length, not the fill status. + // mUnicharDataLength reflects the current fill status. + mUnicharDataLength = 0; + // Whenever we convert, mUnicharData is logically empty. + mUnicharDataOffset = 0; + + // Continue trying to read from the source stream until we successfully decode + // a character or encounter an error, as returning `0` here implies that the + // stream is complete. + // + // If the converter has been cleared, we've fully consumed the stream, and + // want to report EOF. + while (mUnicharDataLength == 0 && mConverter) { + // We assume a many to one conversion and are using equal sizes for + // the two buffers. However if an error happens at the very start + // of a byte buffer we may end up in a situation where n bytes lead + // to n+1 unicode chars. Thus we need to keep track of the leftover + // bytes as we convert. + + uint32_t nb; + *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb); + if (NS_FAILED(*aErrorCode)) { + return 0; + } + + NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(), + "mByteData is lying to us somewhere"); + + // If `NS_FillArray` failed to read any new bytes, this is the last read, + // and we're at the end of the stream. + bool last = (nb == 0); + + // Now convert as much of the byte buffer to unicode as possible + auto src = AsBytes(Span(mByteData)); + auto dst = Span(mUnicharData); + + // Truncation from size_t to uint32_t below is OK, because the sizes + // are bounded by the lengths of mByteData and mUnicharData. + uint32_t result; + size_t read; + size_t written; + if (mErrorsAreFatal) { + std::tie(result, read, written) = + mConverter->DecodeToUTF16WithoutReplacement(src, dst, last); + } else { + std::tie(result, read, written, std::ignore) = + mConverter->DecodeToUTF16(src, dst, last); + } + mLeftOverBytes = mByteData.Length() - read; + mUnicharDataLength = written; + // Clear `mConverter` if we reached the end of the stream, as we can't + // call methods on it anymore. This will also signal EOF to the caller + // through the loop condition. + if (last) { + MOZ_ASSERT(mLeftOverBytes == 0, + "Failed to read all bytes on the last pass?"); + mConverter = nullptr; + } + // If we got a decode error, we're done. + if (result != kInputEmpty && result != kOutputFull) { + MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?"); + *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT; + return 0; + } + } + *aErrorCode = NS_OK; + return mUnicharDataLength; +} + +NS_IMETHODIMP +nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) { + if (!mLineBuffer) { + mLineBuffer = MakeUnique>(); + } + return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult); +} diff --git a/intl/uconv/nsConverterInputStream.h b/intl/uconv/nsConverterInputStream.h new file mode 100644 index 0000000000..55555fc679 --- /dev/null +++ b/intl/uconv/nsConverterInputStream.h @@ -0,0 +1,64 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsConverterInputStream_h +#define nsConverterInputStream_h + +#include "nsIInputStream.h" +#include "nsIConverterInputStream.h" +#include "nsIUnicharLineInputStream.h" +#include "nsTArray.h" +#include "nsCOMPtr.h" +#include "nsReadLine.h" +#include "mozilla/Encoding.h" +#include "mozilla/UniquePtr.h" + +#define NS_CONVERTERINPUTSTREAM_CONTRACTID \ + "@mozilla.org/intl/converter-input-stream;1" + +// {2BC2AD62-AD5D-4b7b-A9DB-F74AE203C527} +#define NS_CONVERTERINPUTSTREAM_CID \ + { \ + 0x2bc2ad62, 0xad5d, 0x4b7b, { \ + 0xa9, 0xdb, 0xf7, 0x4a, 0xe2, 0x3, 0xc5, 0x27 \ + } \ + } + +class nsConverterInputStream : public nsIConverterInputStream, + public nsIUnicharLineInputStream { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSIUNICHARINPUTSTREAM + NS_DECL_NSIUNICHARLINEINPUTSTREAM + NS_DECL_NSICONVERTERINPUTSTREAM + + nsConverterInputStream() + : mLastErrorCode(NS_OK), + mLeftOverBytes(0), + mUnicharDataOffset(0), + mUnicharDataLength(0), + mErrorsAreFatal(false), + mLineBuffer(nullptr) {} + + private: + virtual ~nsConverterInputStream() { Close(); } + + uint32_t Fill(nsresult* aErrorCode); + + mozilla::UniquePtr mConverter; + FallibleTArray mByteData; + FallibleTArray mUnicharData; + nsCOMPtr mInput; + + nsresult mLastErrorCode; + uint32_t mLeftOverBytes; + uint32_t mUnicharDataOffset; + uint32_t mUnicharDataLength; + bool mErrorsAreFatal; + + mozilla::UniquePtr > mLineBuffer; +}; + +#endif diff --git a/intl/uconv/nsConverterOutputStream.cpp b/intl/uconv/nsConverterOutputStream.cpp new file mode 100644 index 0000000000..a24adb0377 --- /dev/null +++ b/intl/uconv/nsConverterOutputStream.cpp @@ -0,0 +1,115 @@ +/* vim:set expandtab ts=4 sw=2 sts=2 cin: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsCOMPtr.h" +#include "nsIOutputStream.h" +#include "nsString.h" +#include "nsConverterOutputStream.h" +#include "mozilla/Encoding.h" + +using namespace mozilla; + +NS_IMPL_ISUPPORTS(nsConverterOutputStream, nsIUnicharOutputStream, + nsIConverterOutputStream) + +nsConverterOutputStream::~nsConverterOutputStream() { Close(); } + +NS_IMETHODIMP +nsConverterOutputStream::Init(nsIOutputStream* aOutStream, + const char* aCharset) { + MOZ_ASSERT(aOutStream, "Null output stream!"); + + const Encoding* encoding; + if (!aCharset) { + encoding = UTF_8_ENCODING; + } else { + encoding = Encoding::ForLabelNoReplacement(MakeStringSpan(aCharset)); + if (!encoding || encoding == UTF_16LE_ENCODING || + encoding == UTF_16BE_ENCODING) { + return NS_ERROR_UCONV_NOCONV; + } + } + + mConverter = encoding->NewEncoder(); + + mOutStream = aOutStream; + + return NS_OK; +} + +NS_IMETHODIMP +nsConverterOutputStream::Write(uint32_t aCount, const char16_t* aChars, + bool* aSuccess) { + if (!mOutStream) { + NS_ASSERTION(!mConverter, "Closed streams shouldn't have converters"); + return NS_BASE_STREAM_CLOSED; + } + MOZ_ASSERT(mConverter, "Must have a converter when not closed"); + uint8_t buffer[4096]; + auto dst = Span(buffer); + auto src = Span(aChars, aCount); + for (;;) { + uint32_t result; + size_t read; + size_t written; + std::tie(result, read, written, std::ignore) = + mConverter->EncodeFromUTF16(src, dst, false); + src = src.From(read); + uint32_t streamWritten; + nsresult rv = mOutStream->Write(reinterpret_cast(dst.Elements()), + written, &streamWritten); + *aSuccess = NS_SUCCEEDED(rv) && written == streamWritten; + if (!(*aSuccess)) { + return rv; + } + if (result == kInputEmpty) { + return NS_OK; + } + } +} + +NS_IMETHODIMP +nsConverterOutputStream::WriteString(const nsAString& aString, bool* aSuccess) { + int32_t inLen = aString.Length(); + nsAString::const_iterator i; + aString.BeginReading(i); + return Write(inLen, i.get(), aSuccess); +} + +NS_IMETHODIMP +nsConverterOutputStream::Flush() { + if (!mOutStream) return NS_OK; // Already closed. + + // If we are encoding to ISO-2022-JP, potentially + // transition back to the ASCII state. The buffer + // needs to be large enough for an additional NCR, + // though. + uint8_t buffer[12]; + auto dst = Span(buffer); + Span src(nullptr); + uint32_t result; + size_t written; + std::tie(result, std::ignore, written, std::ignore) = + mConverter->EncodeFromUTF16(src, dst, true); + MOZ_ASSERT(result == kInputEmpty); + uint32_t streamWritten; + if (!written) { + return NS_OK; + } + return mOutStream->Write(reinterpret_cast(dst.Elements()), written, + &streamWritten); +} + +NS_IMETHODIMP +nsConverterOutputStream::Close() { + if (!mOutStream) return NS_OK; // Already closed. + + nsresult rv1 = Flush(); + + nsresult rv2 = mOutStream->Close(); + mOutStream = nullptr; + mConverter = nullptr; + return NS_FAILED(rv1) ? rv1 : rv2; +} diff --git a/intl/uconv/nsConverterOutputStream.h b/intl/uconv/nsConverterOutputStream.h new file mode 100644 index 0000000000..74b873acd5 --- /dev/null +++ b/intl/uconv/nsConverterOutputStream.h @@ -0,0 +1,39 @@ +/* vim:set expandtab ts=4 sw=2 sts=2 cin: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef NSCONVERTEROUTPUTSTREAM_H_ +#define NSCONVERTEROUTPUTSTREAM_H_ + +#include "nsIConverterOutputStream.h" +#include "nsCOMPtr.h" +#include "mozilla/Attributes.h" +#include "mozilla/Encoding.h" + +class nsIOutputStream; + +/* ff8780a5-bbb1-4bc5-8ee7-057e7bc5c925 */ +#define NS_CONVERTEROUTPUTSTREAM_CID \ + { \ + 0xff8780a5, 0xbbb1, 0x4bc5, { \ + 0x8e, 0xe7, 0x05, 0x7e, 0x7b, 0xc5, 0xc9, 0x25 \ + } \ + } + +class nsConverterOutputStream final : public nsIConverterOutputStream { + public: + nsConverterOutputStream() = default; + + NS_DECL_ISUPPORTS + NS_DECL_NSIUNICHAROUTPUTSTREAM + NS_DECL_NSICONVERTEROUTPUTSTREAM + + private: + ~nsConverterOutputStream(); + + mozilla::UniquePtr mConverter; + nsCOMPtr mOutStream; +}; + +#endif diff --git a/intl/uconv/nsIScriptableUConv.idl b/intl/uconv/nsIScriptableUConv.idl new file mode 100644 index 0000000000..7f1334c0c3 --- /dev/null +++ b/intl/uconv/nsIScriptableUConv.idl @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +interface nsIInputStream; + +%{C++ +// {0A698C44-3BFF-11d4-9649-00C0CA135B4E} +#define NS_ISCRIPTABLEUNICODECONVERTER_CID { 0x0A698C44, 0x3BFF, 0x11d4, { 0x96, 0x49, 0x00, 0xC0, 0xCA, 0x13, 0x5B, 0x4E } } +#define NS_ISCRIPTABLEUNICODECONVERTER_CONTRACTID "@mozilla.org/intl/scriptableunicodeconverter" +%} + +/** + * In new code, please use the WebIDL TextDecoder and TextEncoder + * instead. They represent bytes as Uint8Array (or as view to such + * array), which is the current best practice for representing bytes + * in JavaScript. + * + * This interface converts between UTF-16 in JavaScript strings + * and bytes transported as the unsigned value of each byte + * transported in a code unit of the same numeric value in + * a JavaScript string. + * + * @created 8/Jun/2000 + * @author Makoto Kato [m_kato@ga2.so-net.ne.jp] + */ +[scriptable, uuid(f36ee324-5c1c-437f-ba10-2b4db7a18031)] +interface nsIScriptableUnicodeConverter : nsISupports +{ + /** + * Converts the data from Unicode to one Charset. + * Returns the converted string. After converting, Finish should be called + * and its return value appended to this return value. + */ + ACString ConvertFromUnicode(in AString aSrc); + + /** + * Returns the terminator string. + * Should be called after ConvertFromUnicode() and appended to that + * function's return value. + */ + ACString Finish(); + + /** + * Converts the data from one Charset to Unicode. + */ + AString ConvertToUnicode(in ACString aSrc); + + /** + * Current character set. + * + * @throw NS_ERROR_UCONV_NOCONV + * The requested charset is not supported. + */ + attribute ACString charset; + + /** + * Meaningless + */ + attribute boolean isInternal; +}; diff --git a/intl/uconv/nsITextToSubURI.idl b/intl/uconv/nsITextToSubURI.idl new file mode 100644 index 0000000000..3bb404e414 --- /dev/null +++ b/intl/uconv/nsITextToSubURI.idl @@ -0,0 +1,60 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + + +%{C++ +// {8B042E22-6F87-11d3-B3C8-00805F8A6670} +#define NS_TEXTTOSUBURI_CID { 0x8b042e22, 0x6f87, 0x11d3, { 0xb3, 0xc8, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } } +#define NS_ITEXTTOSUBURI_CONTRACTID "@mozilla.org/intl/texttosuburi;1" +%} + +[scriptable, uuid(8B042E24-6F87-11d3-B3C8-00805F8A6670)] +interface nsITextToSubURI : nsISupports +{ + ACString ConvertAndEscape(in ACString charset, in AString text); + AString UnEscapeAndConvert(in ACString charset, in ACString text); + + /** + * Unescapes the given URI fragment (for UI purpose only) + * Note: + *
    + *
  • escaping back the result (unescaped string) is not guaranteed to + * give the original escaped string + *
  • The URI fragment (escaped) is assumed to be in UTF-8 and converted + * to AString (UTF-16) + *
  • In case of successful conversion any resulting character listed + * in netwerk/dns/IDNCharacterBlocklist.inc (except space) is escaped + *
  • Always succeeeds (callers don't need to do error checking) + *
+ * + * @param aURIFragment the URI (or URI fragment) to unescape + * @param aDontEscape whether to escape IDN blocklisted characters + * @return Unescaped aURIFragment converted to unicode + */ + AString unEscapeURIForUI(in AUTF8String aURIFragment, + [optional] in boolean aDontEscape); +%{C++ + nsresult UnEscapeURIForUI(const nsACString& aURIFragment, + nsAString& _retval) { + return UnEscapeURIForUI(aURIFragment, false, _retval); + } +%} + + /** + * Unescapes only non ASCII characters in the given URI fragment + * note: this method assumes the URI as UTF-8 and fallbacks to the given + * charset if the charset is an ASCII superset + * + * @param aCharset the charset to convert from + * @param aURIFragment the URI (or URI fragment) to unescape + * @return Unescaped aURIFragment converted to unicode + * @throws NS_ERROR_UCONV_NOCONV when there is no decoder for aCharset + * or NS_ERROR_UDEC_ILLEGALINPUT in case of conversion failure + */ + [binaryname(UnEscapeNonAsciiURIJS)] + AString unEscapeNonAsciiURI(in ACString aCharset, in AUTF8String aURIFragment); +}; diff --git a/intl/uconv/nsScriptableUConv.cpp b/intl/uconv/nsScriptableUConv.cpp new file mode 100644 index 0000000000..e56d578efd --- /dev/null +++ b/intl/uconv/nsScriptableUConv.cpp @@ -0,0 +1,192 @@ + +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsString.h" +#include "nsIScriptableUConv.h" +#include "nsScriptableUConv.h" +#include "nsComponentManagerUtils.h" + +#include + +using namespace mozilla; + +/* Implementation file */ +NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter) + +nsScriptableUnicodeConverter::nsScriptableUnicodeConverter() + : mIsInternal(false) {} + +nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default; + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc, + nsACString& _retval) { + if (!mEncoder) return NS_ERROR_FAILURE; + + // We can compute the length without replacement, because the + // the replacement is only one byte long and a mappable character + // would always output something, i.e. at least one byte. + // When encoding to ISO-2022-JP, unmappables shouldn't be able + // to cause more escape sequences to be emitted than the mappable + // worst case where every input character causes an escape into + // a different state. + CheckedInt needed = + mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length()); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto dstChars = _retval.GetMutableData(needed.value(), fallible); + if (!dstChars) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto src = Span(aSrc); + auto dst = AsWritableBytes(*dstChars); + size_t totalWritten = 0; + for (;;) { + auto [result, read, written] = + mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false); + if (result != kInputEmpty && result != kOutputFull) { + MOZ_RELEASE_ASSERT(written < dst.Length(), + "Unmappables with one-byte replacement should not " + "exceed mappable worst case."); + dst[written++] = '?'; + } + totalWritten += written; + if (result == kInputEmpty) { + MOZ_ASSERT(totalWritten <= UINT32_MAX); + if (!_retval.SetLength(totalWritten, fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; + } + src = src.From(read); + dst = dst.From(written); + } +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::Finish(nsACString& _retval) { + // The documentation for this method says it should be called after + // ConvertFromUnicode(). However, our own tests called it after + // convertFromByteArray(), i.e. when *decoding*. + // Assuming that there exists extensions that similarly call + // this at the wrong time, let's deal. In general, it is a design + // error for this class to handle conversions in both directions. + if (!mEncoder) { + _retval.Truncate(); + mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); + return NS_OK; + } + // If we are encoding to ISO-2022-JP, potentially + // transition back to the ASCII state. The buffer + // needs to be large enough for an additional NCR, + // though. + _retval.SetLength(13); + auto dst = AsWritableBytes(_retval.GetMutableData(13)); + Span src(nullptr); + uint32_t result; + size_t read; + size_t written; + std::tie(result, read, written, std::ignore) = + mEncoder->EncodeFromUTF16(src, dst, true); + MOZ_ASSERT(!read); + MOZ_ASSERT(result == kInputEmpty); + _retval.SetLength(written); + + mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); + mEncoder->Encoding()->NewEncoderInto(*mEncoder); + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, + nsAString& _retval) { + if (!mDecoder) return NS_ERROR_FAILURE; + + uint32_t length = aSrc.Length(); + + CheckedInt needed = mDecoder->MaxUTF16BufferLength(length); + if (!needed.isValid() || needed.value() > UINT32_MAX) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto dst = _retval.GetMutableData(needed.value(), fallible); + if (!dst) { + return NS_ERROR_OUT_OF_MEMORY; + } + + auto src = + Span(reinterpret_cast(aSrc.BeginReading()), length); + uint32_t result; + size_t read; + size_t written; + // The UTF-8 decoder used to throw regardless of the error behavior. + // Simulating the old behavior for compatibility with legacy callers. + // If callers want control over the behavior, they should switch to + // TextDecoder. + if (mDecoder->Encoding() == UTF_8_ENCODING) { + std::tie(result, read, written) = + mDecoder->DecodeToUTF16WithoutReplacement(src, *dst, false); + if (result != kInputEmpty) { + return NS_ERROR_UDEC_ILLEGALINPUT; + } + } else { + std::tie(result, read, written, std::ignore) = + mDecoder->DecodeToUTF16(src, *dst, false); + } + MOZ_ASSERT(result == kInputEmpty); + MOZ_ASSERT(read == length); + MOZ_ASSERT(written <= needed.value()); + if (!_retval.SetLength(written, fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) { + if (!mDecoder) { + aCharset.Truncate(); + } else { + mDecoder->Encoding()->Name(aCharset); + } + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) { + return InitConverter(aCharset); +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) { + *aIsInternal = mIsInternal; + return NS_OK; +} + +NS_IMETHODIMP +nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) { + mIsInternal = aIsInternal; + return NS_OK; +} + +nsresult nsScriptableUnicodeConverter::InitConverter( + const nsACString& aCharset) { + mEncoder = nullptr; + mDecoder = nullptr; + + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + return NS_ERROR_UCONV_NOCONV; + } + if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) { + mEncoder = encoding->NewEncoder(); + } + mDecoder = encoding->NewDecoderWithBOMRemoval(); + return NS_OK; +} diff --git a/intl/uconv/nsScriptableUConv.h b/intl/uconv/nsScriptableUConv.h new file mode 100644 index 0000000000..059a4b430c --- /dev/null +++ b/intl/uconv/nsScriptableUConv.h @@ -0,0 +1,34 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __nsScriptableUConv_h_ +#define __nsScriptableUConv_h_ + +#include "nsIScriptableUConv.h" +#include "nsCOMPtr.h" +#include "mozilla/Encoding.h" + +class nsScriptableUnicodeConverter : public nsIScriptableUnicodeConverter { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSISCRIPTABLEUNICODECONVERTER + + nsScriptableUnicodeConverter(); + + protected: + virtual ~nsScriptableUnicodeConverter(); + + mozilla::UniquePtr mEncoder; + mozilla::UniquePtr mDecoder; + bool mIsInternal; + + nsresult FinishWithLength(char** _retval, int32_t* aLength); + nsresult ConvertFromUnicodeWithLength(const nsAString& aSrc, int32_t* aOutLen, + char** _retval); + + nsresult InitConverter(const nsACString& aCharset); +}; + +#endif diff --git a/intl/uconv/nsTextToSubURI.cpp b/intl/uconv/nsTextToSubURI.cpp new file mode 100644 index 0000000000..e70d9ccbd8 --- /dev/null +++ b/intl/uconv/nsTextToSubURI.cpp @@ -0,0 +1,178 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "nsString.h" +#include "nsITextToSubURI.h" +#include "nsEscape.h" +#include "nsTextToSubURI.h" +#include "nsCRT.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/Encoding.h" +#include "mozilla/Preferences.h" +#include "mozilla/TextUtils.h" +#include "mozilla/Utf8.h" + +using namespace mozilla; + +nsTextToSubURI::~nsTextToSubURI() = default; + +NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI) + +NS_IMETHODIMP +nsTextToSubURI::ConvertAndEscape(const nsACString& aCharset, + const nsAString& aText, nsACString& aOut) { + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + aOut.Truncate(); + return NS_ERROR_UCONV_NOCONV; + } + nsresult rv; + nsAutoCString intermediate; + std::tie(rv, std::ignore) = encoding->Encode(aText, intermediate); + if (NS_FAILED(rv)) { + aOut.Truncate(); + return rv; + } + bool ok = NS_Escape(intermediate, aOut, url_XPAlphas); + if (!ok) { + aOut.Truncate(); + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; +} + +NS_IMETHODIMP +nsTextToSubURI::UnEscapeAndConvert(const nsACString& aCharset, + const nsACString& aText, nsAString& aOut) { + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + aOut.Truncate(); + return NS_ERROR_UCONV_NOCONV; + } + nsAutoCString unescaped(aText); + NS_UnescapeURL(unescaped); + auto rv = encoding->DecodeWithoutBOMHandling(unescaped, aOut); + if (NS_SUCCEEDED(rv)) { + return NS_OK; + } + return rv; +} + +static bool statefulCharset(const char* charset) { + // HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in + // mozilla-central but keeping them here just in case for the benefit of + // comm-central. + if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-") - 1) || + !nsCRT::strcasecmp(charset, "UTF-7") || + !nsCRT::strcasecmp(charset, "HZ-GB-2312")) + return true; + + return false; +} + +// static +nsresult nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset, + const nsCString& aURI, + nsAString& aOut) { + // check for 7bit encoding the data may not be ASCII after we decode + bool isStatefulCharset = statefulCharset(aCharset.get()); + + if (!isStatefulCharset) { + if (IsAscii(aURI)) { + CopyASCIItoUTF16(aURI, aOut); + return NS_OK; + } + if (IsUtf8(aURI)) { + CopyUTF8toUTF16(aURI, aOut); + return NS_OK; + } + } + + // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8. + NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG); + + auto encoding = Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + aOut.Truncate(); + return NS_ERROR_UCONV_NOCONV; + } + return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI, aOut); +} + +NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString& aURIFragment, + bool aDontEscape, + nsAString& _retval) { + nsAutoCString unescapedSpec; + // skip control octets (0x00 - 0x1f and 0x7f) when unescaping + NS_UnescapeURL(PromiseFlatCString(aURIFragment), + esc_SkipControl | esc_AlwaysCopy, unescapedSpec); + + // in case of failure, return escaped URI + // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte + // sequences are also considered failure in this context + if (convertURItoUnicode("UTF-8"_ns, unescapedSpec, _retval) != NS_OK) { + // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8 + CopyUTF8toUTF16(aURIFragment, _retval); + } + + if (aDontEscape) { + return NS_OK; + } + + // If there are any characters that are unsafe for URIs, reescape those. + if (mIDNBlocklist.IsEmpty()) { + mozilla::net::InitializeBlocklist(mIDNBlocklist); + // we allow SPACE and IDEOGRAPHIC SPACE in this method + mozilla::net::RemoveCharFromBlocklist(u' ', mIDNBlocklist); + mozilla::net::RemoveCharFromBlocklist(0x3000, mIDNBlocklist); + } + + MOZ_ASSERT(!mIDNBlocklist.IsEmpty()); + const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval); + nsString reescapedSpec; + _retval = NS_EscapeURL( + unescapedResult, + [&](char16_t aChar) -> bool { + return mozilla::net::CharInBlocklist(aChar, mIDNBlocklist); + }, + reescapedSpec); + + return NS_OK; +} + +NS_IMETHODIMP +nsTextToSubURI::UnEscapeNonAsciiURIJS(const nsACString& aCharset, + const nsACString& aURIFragment, + nsAString& _retval) { + return UnEscapeNonAsciiURI(aCharset, aURIFragment, _retval); +} + +// static +nsresult nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset, + const nsACString& aURIFragment, + nsAString& _retval) { + nsAutoCString unescapedSpec; + NS_UnescapeURL(PromiseFlatCString(aURIFragment), + esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec); + // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII + // superset since converting "http:" with such an encoding is always a bad + // idea. + if (!IsUtf8(unescapedSpec) && + (aCharset.LowerCaseEqualsLiteral("utf-16") || + aCharset.LowerCaseEqualsLiteral("utf-16be") || + aCharset.LowerCaseEqualsLiteral("utf-16le") || + aCharset.LowerCaseEqualsLiteral("utf-7") || + aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))) { + CopyASCIItoUTF16(aURIFragment, _retval); + return NS_OK; + } + + nsresult rv = + convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, _retval); + // NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error + // if the string ends with a valid (but incomplete) sequence. + return rv == NS_OK_UDEC_MOREINPUT ? NS_ERROR_UDEC_ILLEGALINPUT : rv; +} + +//---------------------------------------------------------------------- diff --git a/intl/uconv/nsTextToSubURI.h b/intl/uconv/nsTextToSubURI.h new file mode 100644 index 0000000000..1eaeb554dc --- /dev/null +++ b/intl/uconv/nsTextToSubURI.h @@ -0,0 +1,36 @@ +// -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- +// vim: set ts=2 et sw=2 tw=80: +// This Source Code is subject to the terms of the Mozilla Public License +// version 2.0 (the "License"). You can obtain a copy of the License at +// http://mozilla.org/MPL/2.0/. +#ifndef nsTextToSubURI_h__ +#define nsTextToSubURI_h__ + +#include "nsITextToSubURI.h" +#include "nsString.h" +#include "nsTArray.h" +#include "mozilla/net/IDNBlocklistUtils.h" + +class nsTextToSubURI : public nsITextToSubURI { + NS_DECL_ISUPPORTS + NS_DECL_NSITEXTTOSUBURI + + // Thread-safe function for C++ callers + static nsresult UnEscapeNonAsciiURI(const nsACString& aCharset, + const nsACString& aURIFragment, + nsAString& _retval); + + private: + virtual ~nsTextToSubURI(); + + // We assume that the URI is encoded as UTF-8. + static nsresult convertURItoUnicode(const nsCString& aCharset, + const nsCString& aURI, + nsAString& _retval); + + // Characters defined in netwerk/dns/IDNCharacterBlocklist.inc or via the + // network.IDN.extra_allowed_chars and network.IDN.extra_blocked_chars prefs. + nsTArray mIDNBlocklist; +}; + +#endif // nsTextToSubURI_h__ diff --git a/intl/uconv/tests/gtest/TestShortRead.cpp b/intl/uconv/tests/gtest/TestShortRead.cpp new file mode 100644 index 0000000000..393f5e0027 --- /dev/null +++ b/intl/uconv/tests/gtest/TestShortRead.cpp @@ -0,0 +1,109 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" +#include "mozilla/ErrorNames.h" +#include "nsCOMPtr.h" +#include "nsConverterInputStream.h" +#include "nsIInputStream.h" +#include "nsISupports.h" +#include "nsStringStream.h" + +namespace { + +class ShortReadWrapper final : public nsIInputStream { + public: + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSIINPUTSTREAM + + template + ShortReadWrapper(const uint32_t (&aShortReads)[N], + nsIInputStream* aBaseStream) + : mShortReadIter(std::begin(aShortReads)), + mShortReadEnd(std::end(aShortReads)), + mBaseStream(aBaseStream) {} + + ShortReadWrapper(const ShortReadWrapper&) = delete; + ShortReadWrapper& operator=(const ShortReadWrapper&) = delete; + + private: + ~ShortReadWrapper() = default; + + const uint32_t* mShortReadIter; + const uint32_t* mShortReadEnd; + nsCOMPtr mBaseStream; +}; + +NS_IMPL_ISUPPORTS(ShortReadWrapper, nsIInputStream) + +NS_IMETHODIMP +ShortReadWrapper::Close() { return mBaseStream->Close(); } + +NS_IMETHODIMP +ShortReadWrapper::Available(uint64_t* aAvailable) { + nsresult rv = mBaseStream->Available(aAvailable); + NS_ENSURE_SUCCESS(rv, rv); + + if (mShortReadIter != mShortReadEnd) { + *aAvailable = std::min(uint64_t(*mShortReadIter), *aAvailable); + } + return NS_OK; +} + +NS_IMETHODIMP +ShortReadWrapper::StreamStatus() { return mBaseStream->StreamStatus(); } + +NS_IMETHODIMP +ShortReadWrapper::Read(char* aBuf, uint32_t aCount, uint32_t* _retval) { + if (mShortReadIter != mShortReadEnd) { + aCount = std::min(*mShortReadIter, aCount); + } + + nsresult rv = mBaseStream->Read(aBuf, aCount, _retval); + if (NS_SUCCEEDED(rv) && mShortReadIter != mShortReadEnd) { + ++mShortReadIter; + } + return rv; +} + +NS_IMETHODIMP +ShortReadWrapper::ReadSegments(nsWriteSegmentFun aWriter, void* aClosure, + uint32_t aCount, uint32_t* _retval) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +ShortReadWrapper::IsNonBlocking(bool* _retval) { + return mBaseStream->IsNonBlocking(_retval); +} + +} // namespace + +TEST(ConverterStreamShortRead, ShortRead) +{ + uint8_t bytes[] = {0xd8, 0x35, 0xdc, 0x20}; + nsCOMPtr baseStream; + ASSERT_TRUE(NS_SUCCEEDED(NS_NewByteInputStream(getter_AddRefs(baseStream), + AsChars(mozilla::Span(bytes)), + NS_ASSIGNMENT_COPY))); + + static const uint32_t kShortReads[] = {1, 2, 1}; + nsCOMPtr shortStream = + new ShortReadWrapper(kShortReads, baseStream); + + RefPtr unicharStream = new nsConverterInputStream(); + ASSERT_TRUE(NS_SUCCEEDED( + unicharStream->Init(shortStream, "UTF-16BE", 4096, + nsIConverterInputStream::ERRORS_ARE_FATAL))); + + uint32_t read; + nsAutoString result; + ASSERT_TRUE( + NS_SUCCEEDED(unicharStream->ReadString(UINT32_MAX, result, &read))); + + ASSERT_EQ(read, 2u); + ASSERT_TRUE(result == u"\U0001d420"); +} diff --git a/intl/uconv/tests/gtest/moz.build b/intl/uconv/tests/gtest/moz.build new file mode 100644 index 0000000000..969fb52c7e --- /dev/null +++ b/intl/uconv/tests/gtest/moz.build @@ -0,0 +1,11 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += [ + "TestShortRead.cpp", +] + +FINAL_LIBRARY = "xul-gtest" diff --git a/intl/uconv/tests/mochitest.toml b/intl/uconv/tests/mochitest.toml new file mode 100644 index 0000000000..32afdba9e4 --- /dev/null +++ b/intl/uconv/tests/mochitest.toml @@ -0,0 +1,25 @@ +[DEFAULT] + +["test_big5_encoder.html"] + +["test_bug335816.html"] + +["test_bug843434.html"] + +["test_bug959058-1.html"] + +["test_bug959058-2.html"] + +["test_long_doc.html"] + +["test_ncr_fallback.html"] + +["test_singlebyte_overconsumption.html"] + +["test_unicode_noncharacterescapes.html"] + +["test_unicode_noncharacters_gb18030.html"] + +["test_unicode_noncharacters_utf8.html"] + +["test_utf8_overconsumption.html"] diff --git a/intl/uconv/tests/moz.build b/intl/uconv/tests/moz.build new file mode 100644 index 0000000000..b9cad75ed7 --- /dev/null +++ b/intl/uconv/tests/moz.build @@ -0,0 +1,13 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +TEST_DIRS += [ + "gtest", +] + +XPCSHELL_TESTS_MANIFESTS += ["unit/xpcshell.toml"] + +MOCHITEST_MANIFESTS += ["mochitest.toml"] diff --git a/intl/uconv/tests/stressgb.pl b/intl/uconv/tests/stressgb.pl new file mode 100644 index 0000000000..5b37fb63fb --- /dev/null +++ b/intl/uconv/tests/stressgb.pl @@ -0,0 +1,23 @@ +#!/usr/bin/perl +use LWP::Simple; +use IO::Handle; +$stdout = *STDOUT; +open(RES , ">resultlog.txt") || die "cannot open result log file"; +#system("rm alldiff.txt in*.txt out*.txt"); +for($i=10;$i<909;$i++) +{ + RES->printf("Test Page %d \n", $i); + $url = "http://people.netscape.com/ftang/testscript/gb18030/gbtext.cgi?page=" . $i; + RES->printf( "URL = %s\n", $url); + $tmpfile = "> in". $i . ".txt"; + open STDOUT, $tmpfile || RES->print("cannot open " . $tmpfile . "\n"); + getprint $url; + $cmd2 = "../../../dist/win32_d.obj/bin/nsconv -f GB18030 -t GB18030 in" . $i . ".txt out" . $i . ".txt >err"; + $cmd3 = "diff -u in" . $i . ".txt out" . $i . ".txt >> alldiff.txt"; + RES->printf( "Run '%s'\n", $cmd2); + $st2 = system($cmd2); + RES->printf( "result = '%d'\n", $st2); + RES->printf( "Run '%s'\n", $cmd3); + $st3 = system($cmd3); + RES->printf( "result = '%d'\n", $st3); +} diff --git a/intl/uconv/tests/test_big5_encoder.html b/intl/uconv/tests/test_big5_encoder.html new file mode 100644 index 0000000000..7e86683f00 --- /dev/null +++ b/intl/uconv/tests/test_big5_encoder.html @@ -0,0 +1,43 @@ + + + + + + Test for Unicode non-characters + + + + +
+
+
+Mozilla Bug 912470 +

+ + + diff --git a/intl/uconv/tests/test_singlebyte_overconsumption.html b/intl/uconv/tests/test_singlebyte_overconsumption.html new file mode 100644 index 0000000000..3aeeb928ec --- /dev/null +++ b/intl/uconv/tests/test_singlebyte_overconsumption.html @@ -0,0 +1,33 @@ + + + + + + Test for undefined codepoints + + + + +
+
+
+Mozilla Bug 564679 +

Evil.

+ + Ò -> + + diff --git a/intl/uconv/tests/test_unicode_noncharacterescapes.html b/intl/uconv/tests/test_unicode_noncharacterescapes.html new file mode 100644 index 0000000000..e44f8d782b --- /dev/null +++ b/intl/uconv/tests/test_unicode_noncharacterescapes.html @@ -0,0 +1,303 @@ + + + + + + Test for Unicode non-characters + + + + +
+
+
+Mozilla Bug 445886 +

All good.

+ + + diff --git a/intl/uconv/tests/test_unicode_noncharacters_gb18030.html b/intl/uconv/tests/test_unicode_noncharacters_gb18030.html new file mode 100644 index 0000000000..0c9156d9e3 --- /dev/null +++ b/intl/uconv/tests/test_unicode_noncharacters_gb18030.html @@ -0,0 +1,305 @@ + + + + + + Test for Unicode non-characters + + + + +

+
+Mozilla Bug +445886 +

All good.

+ + + diff --git a/intl/uconv/tests/test_unicode_noncharacters_utf8.html b/intl/uconv/tests/test_unicode_noncharacters_utf8.html new file mode 100644 index 0000000000..ecfdbeae09 --- /dev/null +++ b/intl/uconv/tests/test_unicode_noncharacters_utf8.html @@ -0,0 +1,303 @@ + + + + + + Test for Unicode non-characters + + + + +
+
+
+Mozilla Bug 445886 +

All good.

+ + + diff --git a/intl/uconv/tests/test_utf8_overconsumption.html b/intl/uconv/tests/test_utf8_overconsumption.html new file mode 100644 index 0000000000..25c4a273ea --- /dev/null +++ b/intl/uconv/tests/test_utf8_overconsumption.html @@ -0,0 +1,39 @@ + + + + + + Test for Unicode non-characters + + + + +onload="Inject()"> +
+
+
+Mozilla Bug 445886 +

All good.

+ + + diff --git a/intl/uconv/tests/unit/data/unicode-conversion.utf16.txt b/intl/uconv/tests/unit/data/unicode-conversion.utf16.txt new file mode 100644 index 0000000000..8a28caadfc Binary files /dev/null and b/intl/uconv/tests/unit/data/unicode-conversion.utf16.txt differ diff --git a/intl/uconv/tests/unit/data/unicode-conversion.utf16be.txt b/intl/uconv/tests/unit/data/unicode-conversion.utf16be.txt new file mode 100644 index 0000000000..e1c4e86dba Binary files /dev/null and b/intl/uconv/tests/unit/data/unicode-conversion.utf16be.txt differ diff --git a/intl/uconv/tests/unit/data/unicode-conversion.utf16le.txt b/intl/uconv/tests/unit/data/unicode-conversion.utf16le.txt new file mode 100644 index 0000000000..cde8acb70b Binary files /dev/null and b/intl/uconv/tests/unit/data/unicode-conversion.utf16le.txt differ diff --git a/intl/uconv/tests/unit/data/unicode-conversion.utf8.txt b/intl/uconv/tests/unit/data/unicode-conversion.utf8.txt new file mode 100644 index 0000000000..b45dff35d0 --- /dev/null +++ b/intl/uconv/tests/unit/data/unicode-conversion.utf8.txt @@ -0,0 +1,43 @@ +This is a Unicode converter test file containing Unicode data. Its encoding is +determined by the second-to-last dot-separated component of the filename. For +example, if this file is named foo.utf8.txt, its encoding is UTF-8; if this file +is named foo.utf16le.txt, its encoding is UTF-16LE. This file is marked as +binary in Mozilla's version control system so that it's not accidentally +"mangled". + +The contents of each file must differ ONLY by encoding, so if you edit this file +you must edit all files with the name of this file (with the encoding-specific +part changed). + +== BEGIN UNICODE TEST DATA == + +== U+000000 -- U+00007F == + +BELL: "" +DATA LINK ESCAPE: "" +DELETE: "" + +== U+000080 -- U+0007FF == + +CONTROL: "€" +NO-BREAK SPACE: " " +POUND SIGN: "£" +YEN SIGN: "Â¥" +CURRENCY SIGN: "¢" +LATIN SMALL LETTER SCHWA: "É™" +LATIN LETTER BILABIAL PERCUSSIVE: "ʬ" + +== U+000800 -- U+00FFFF == + +BUGINESE LETTER TA: "ᨈ" +BUGINESE LETTER DA: "ᨉ" +AIRPLANE: "✈" +ZERO WIDTH NO-BREAK SPACE: "" + + +== U+010000 -- U+10FFFF == + +SHAVIAN LETTER IAN: "ð‘¾" +MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE: "ð…¤" +CJK UNIFIED IDEOGRAPH-20000: "ð €€" +(private use U+10FEFF): "ô»¿" diff --git a/intl/uconv/tests/unit/head_charsetConversionTests.js b/intl/uconv/tests/unit/head_charsetConversionTests.js new file mode 100644 index 0000000000..4fbf6e9af7 --- /dev/null +++ b/intl/uconv/tests/unit/head_charsetConversionTests.js @@ -0,0 +1,110 @@ +var CC = Components.Constructor; + +function CreateScriptableConverter() { + var ScriptableUnicodeConverter = CC( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + return new ScriptableUnicodeConverter(); +} + +function checkDecode(converter, charset, inText, expectedText) { + try { + converter.charset = charset; + } catch (e) { + converter.charset = "iso-8859-1"; + } + + dump("testing decoding from " + charset + " to Unicode.\n"); + try { + var outText = converter.ConvertToUnicode(inText); + } catch (e) { + outText = "\ufffd"; + } + + if (outText != expectedText) { + for (var i = 0; i < inText.length; ++i) { + var inn = inText[i]; + var out = outText[i]; + var expected = expectedText[i]; + if (out != expected) { + dump( + "Decoding error at position " + + i + + ": for input " + + escape(inn) + + " expected " + + escape(expected) + + " but got " + + escape(out) + + "\n" + ); + } + } + } + Assert.equal(outText, expectedText); +} + +function checkEncode(converter, charset, inText, expectedText) { + try { + converter.charset = charset; + } catch (e) { + converter.charset = "iso-8859-1"; + } + + dump("testing encoding from Unicode to " + charset + "\n"); + var outText = converter.ConvertFromUnicode(inText) + converter.Finish(); + + if (outText != expectedText) { + for (var i = 0; i < inText.length; ++i) { + var inn = inText[i]; + var out = outText[i]; + var expected = expectedText[i]; + if (out != expected) { + dump( + "Encoding error at position " + + i + + ": for input " + + escape(inn) + + " expected " + + escape(expected) + + " but got " + + escape(out) + + "\n" + ); + } + } + } + Assert.equal(outText, expectedText); +} + +function testDecodeAliases(aliases, inString, expectedString) { + var converter = CreateScriptableConverter(); + for (var i = 0; i < aliases.length; ++i) { + checkDecode(converter, aliases[i], inString, expectedString); + } +} + +function testEncodeAliases(aliases, inString, expectedString) { + var converter = CreateScriptableConverter(); + for (var i = 0; i < aliases.length; ++i) { + checkEncode(converter, aliases[i], inString, expectedString); + } +} + +function testDecodeAliasesInternal(aliases, inString, expectedString) { + var converter = CreateScriptableConverter(); + converter.isInternal = true; + for (var i = 0; i < aliases.length; ++i) { + checkDecode(converter, aliases[i], inString, expectedString); + } +} + +function testEncodeAliasesInternal(aliases, inString, expectedString) { + var converter = CreateScriptableConverter(); + converter.isInternal = true; + for (var i = 0; i < aliases.length; ++i) { + checkEncode(converter, aliases[i], inString, expectedString); + } +} diff --git a/intl/uconv/tests/unit/test_bug116882.js b/intl/uconv/tests/unit/test_bug116882.js new file mode 100644 index 0000000000..5e76b30aa4 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug116882.js @@ -0,0 +1,11 @@ +/* Tests conversion of undefined and illegal sequences from Shift-JIS + * to Unicode (bug 116882) + */ + +const inText = "\xfd\xfe\xff\x81\x20\x81\x3f\x86\x3c"; +const expectedText = "\ufffd\ufffd\ufffd\ufffd \ufffd?\ufffd<"; +const charset = "Shift_JIS"; + +function run_test() { + checkDecode(CreateScriptableConverter(), charset, inText, expectedText); +} diff --git a/intl/uconv/tests/unit/test_bug317216.js b/intl/uconv/tests/unit/test_bug317216.js new file mode 100644 index 0000000000..cc10ef313e --- /dev/null +++ b/intl/uconv/tests/unit/test_bug317216.js @@ -0,0 +1,165 @@ +/* Test case for bug 317216 + * + * Uses nsIConverterInputStream to decode UTF-16 text with valid surrogate + * pairs and lone surrogate characters + * + * Sample text is: "A" in Mathematical Bold Capitals (U+1D400) + * + * The test uses buffers of 4 different lengths to test end of buffer in mid- + * UTF16 character and mid-surrogate pair + */ + +const { NetUtil } = ChromeUtils.importESModule( + "resource://gre/modules/NetUtil.sys.mjs" +); + +const test = [ + // 0: Valid surrogate pair + [ + "%D8%35%DC%20%00%2D%00%2D", + // expected: surrogate pair + "\uD835\uDC20--", + ], + // 1: Lone high surrogate + [ + "%D8%35%00%2D%00%2D", + // expected: one replacement char + "\uFFFD--", + ], + // 2: Lone low surrogate + [ + "%DC%20%00%2D%00%2D", + // expected: one replacement char + "\uFFFD--", + ], + // 3: Two high surrogates + [ + "%D8%35%D8%35%00%2D%00%2D", + // expected: two replacement chars + "\uFFFD\uFFFD--", + ], + // 4: Two low surrogates + [ + "%DC%20%DC%20%00%2D%00%2D", + // expected: two replacement chars + "\uFFFD\uFFFD--", + ], + // 5: Low surrogate followed by high surrogate + [ + "%DC%20%D8%35%00%2D%00%2D", + // expected: two replacement chars + "\uFFFD\uFFFD--", + ], + // 6: Lone high surrogate followed by valid surrogate pair + [ + "%D8%35%D8%35%DC%20%00%2D%00%2D", + // expected: replacement char followed by surrogate pair + "\uFFFD\uD835\uDC20--", + ], + // 7: Lone low surrogate followed by valid surrogate pair + [ + "%DC%20%D8%35%DC%20%00%2D%00%2D", + // expected: replacement char followed by surrogate pair + "\uFFFD\uD835\uDC20--", + ], + // 8: Valid surrogate pair followed by lone high surrogate + [ + "%D8%35%DC%20%D8%35%00%2D%00%2D", + // expected: surrogate pair followed by replacement char + "\uD835\uDC20\uFFFD--", + ], + // 9: Valid surrogate pair followed by lone low surrogate + [ + "%D8%35%DC%20%DC%20%00%2D%00%2D", + // expected: surrogate pair followed by replacement char + "\uD835\uDC20\uFFFD--", + ], + // 10: Lone high surrogate at the end of the input + [ + "%D8%35%", + // expected: one replacement char + "\uFFFD", + ], + // 11: Half code unit at the end of the input + [ + "%D8", + // expected: one replacement char + "\uFFFD", + ], +]; + +const ConverterInputStream = Components.Constructor( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" +); + +function testCase(testText, expectedText, bufferLength, charset) { + var dataURI = "data:text/plain;charset=" + charset + "," + testText; + var channel = NetUtil.newChannel({ + uri: dataURI, + loadUsingSystemPrincipal: true, + }); + var testInputStream = channel.open(); + var testConverter = new ConverterInputStream( + testInputStream, + charset, + bufferLength, + 0xfffd + ); + + if (!(testConverter instanceof Ci.nsIUnicharLineInputStream)) { + throw new Error("not line input stream"); + } + + var outStr = ""; + var more; + do { + // read the line and check for eof + var line = {}; + more = testConverter.readLine(line); + outStr += line.value; + } while (more); + + // escape the strings before comparing for better readability + Assert.equal(escape(outStr), escape(expectedText)); +} + +// Add 32 dummy characters to the test text to work around the minimum buffer +// size of an ns*Buffer +const MINIMUM_BUFFER_SIZE = 32; +function padBytes(str) { + var padding = ""; + for (var i = 0; i < MINIMUM_BUFFER_SIZE; ++i) { + padding += "%00%2D"; + } + return padding + str; +} + +function padUnichars(str) { + var padding = ""; + for (var i = 0; i < MINIMUM_BUFFER_SIZE; ++i) { + padding += "-"; + } + return padding + str; +} + +// Byte-swap %-encoded utf-16 +function flip(str) { + return str.replace(/(%..)(%..)/g, "$2$1"); +} + +function run_test() { + for (var i = 0; i < 12; ++i) { + for ( + var bufferLength = MINIMUM_BUFFER_SIZE; + bufferLength < MINIMUM_BUFFER_SIZE + 4; + ++bufferLength + ) { + var testText = padBytes(test[i][0]); + var expectedText = padUnichars(test[i][1]); + testCase(testText, expectedText, bufferLength, "UTF-16BE"); + testCase(flip(testText), expectedText, bufferLength, "UTF-16LE"); + } + } +} diff --git a/intl/uconv/tests/unit/test_bug321379.js b/intl/uconv/tests/unit/test_bug321379.js new file mode 100644 index 0000000000..338f59688e --- /dev/null +++ b/intl/uconv/tests/unit/test_bug321379.js @@ -0,0 +1,35 @@ +// Tests that calling close on a converter in/output stream doesn't crash +// (bug 321379) + +function run_test() { + var StorageStream = Components.Constructor( + "@mozilla.org/storagestream;1", + "nsIStorageStream", + "init" + ); + var ConverterInputStream = Components.Constructor( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" + ); + var ConverterOutputStream = Components.Constructor( + "@mozilla.org/intl/converter-output-stream;1", + "nsIConverterOutputStream", + "init" + ); + + var storage = new StorageStream(1024, -1, null); + + // Output + var outStr = storage.getOutputStream(0); + var out = new ConverterOutputStream(outStr, "UTF-8"); + out.writeString("Foo."); + out.close(); + out.close(); // This line should not crash. It should just do nothing. + + // Input + var inStr = storage.newInputStream(0); + var inp = new ConverterInputStream(inStr, "UTF-8", 1024, 0xfffd); + inp.close(); + inp.close(); // This line should not crash. It should just do nothing. +} diff --git a/intl/uconv/tests/unit/test_bug340714.js b/intl/uconv/tests/unit/test_bug340714.js new file mode 100644 index 0000000000..fdd30543f2 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug340714.js @@ -0,0 +1,123 @@ +/* Test case for bug 340714 + * + * Uses nsIConverterInputStream to decode UTF-16 text with all combinations + * of UTF-16BE and UTF-16LE with and without BOM. + * + * Sample text is: "Ð’Ñе ÑчаÑтливые Ñемьи похожи друг на друга, ÐºÐ°Ð¶Ð´Ð°Ñ Ð½ÐµÑчаÑÑ‚Ð»Ð¸Ð²Ð°Ñ ÑÐµÐ¼ÑŒÑ Ð½ÐµÑчаÑтлива по-Ñвоему." + * + * The enclosing quotation marks are included in the sample text to test that + * UTF-16LE is recognized even when there is no BOM and the UTF-16LE decoder is + * not explicitly called. This only works when the first character of the text + * is an eight-bit character. + */ + +const { NetUtil } = ChromeUtils.importESModule( + "resource://gre/modules/NetUtil.sys.mjs" +); + +const beBOM = "%FE%FF"; +const leBOM = "%FF%FE"; +const sampleUTF16BE = + "%00%22%04%12%04%41%04%35%00%20%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%4B%04%35%00%20%04%41%04%35%04%3C%04%4C%04%38%00%20%04%3F%04%3E%04%45%04%3E%04%36%04%38%00%20%04%34%04%40%04%43%04%33%00%20%04%3D%04%30%00%20%04%34%04%40%04%43%04%33%04%30%00%2C%00%20%04%3A%04%30%04%36%04%34%04%30%04%4F%00%20%04%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%4F%00%20%04%41%04%35%04%3C%04%4C%04%4F%00%20%04%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%00%20%04%3F%04%3E%00%2D%04%41%04%32%04%3E%04%35%04%3C%04%43%00%2E%00%22"; +const sampleUTF16LE = + "%22%00%12%04%41%04%35%04%20%00%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%4B%04%35%04%20%00%41%04%35%04%3C%04%4C%04%38%04%20%00%3F%04%3E%04%45%04%3E%04%36%04%38%04%20%00%34%04%40%04%43%04%33%04%20%00%3D%04%30%04%20%00%34%04%40%04%43%04%33%04%30%04%2C%00%20%00%3A%04%30%04%36%04%34%04%30%04%4F%04%20%00%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%4F%04%20%00%41%04%35%04%3C%04%4C%04%4F%04%20%00%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%20%00%3F%04%3E%04%2D%00%41%04%32%04%3E%04%35%04%3C%04%43%04%2E%00%22%00"; +const expected = + '"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443."'; + +Services.prefs.setBoolPref("security.allow_eval_with_system_principal", true); +registerCleanupFunction(() => { + Services.prefs.clearUserPref("security.allow_eval_with_system_principal"); +}); + +function makeText(withBOM, charset) { + const isBE = charset === "UTF16BE"; + const sampleText = isBE ? sampleUTF16BE : sampleUTF16LE; + const bom = isBE ? beBOM : leBOM; + return withBOM ? bom + sampleText : sampleText; +} + +function testCase(withBOM, charset, charsetDec, decoder, bufferLength) { + var dataURI = + "data:text/plain;charset=" + charsetDec + "," + makeText(withBOM, charset); + + var ConverterInputStream = Components.Constructor( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" + ); + + var channel = NetUtil.newChannel({ + uri: dataURI, + loadUsingSystemPrincipal: true, + }); + var testInputStream = channel.open(); + var testConverter = new ConverterInputStream( + testInputStream, + decoder, + bufferLength, + 0xfffd + ); + + if (!(testConverter instanceof Ci.nsIUnicharLineInputStream)) { + throw new Error("not line input stream"); + } + + var outStr = ""; + var more; + do { + // read the line and check for eof + var line = {}; + more = testConverter.readLine(line); + outStr += line.value; + } while (more); + + if (outStr != expected) { + dump( + "Failed with BOM = " + + withBOM + + "; charset = " + + charset + + "; charset declaration = " + + charsetDec + + "; decoder = " + + decoder + + "; bufferLength = " + + bufferLength + + "\n" + ); + if (outStr.length == expected.length) { + for (let i = 0; i < outStr.length; ++i) { + if (outStr.charCodeAt(i) != expected.charCodeAt(i)) { + dump( + i + + ": " + + outStr.charCodeAt(i).toString(16) + + " != " + + expected.charCodeAt(i).toString(16) + + "\n" + ); + } + } + } + } + + // escape the strings before comparing for better readability + Assert.equal(escape(outStr), escape(expected)); +} + +function run_test() { + /* BOM charset charset decoder buffer + declaration length */ + testCase(true, "UTF16LE", "UTF-16", "UTF-16BE", 64); + testCase(true, "UTF16BE", "UTF-16", "UTF-16LE", 64); + testCase(true, "UTF16LE", "UTF-16", "UTF-16LE", 64); + testCase(true, "UTF16BE", "UTF-16", "UTF-16BE", 64); + testCase(false, "UTF16LE", "UTF-16", "UTF-16LE", 64); + testCase(false, "UTF16BE", "UTF-16", "UTF-16BE", 64); + testCase(true, "UTF16LE", "UTF-16", "UTF-16BE", 65); + testCase(true, "UTF16BE", "UTF-16", "UTF-16LE", 65); + testCase(true, "UTF16LE", "UTF-16", "UTF-16LE", 65); + testCase(true, "UTF16BE", "UTF-16", "UTF-16BE", 65); + testCase(false, "UTF16LE", "UTF-16", "UTF-16LE", 65); + testCase(false, "UTF16BE", "UTF-16", "UTF-16BE", 65); +} diff --git a/intl/uconv/tests/unit/test_bug381412.Big5-HKSCS.js b/intl/uconv/tests/unit/test_bug381412.Big5-HKSCS.js new file mode 100644 index 0000000000..4108dc1090 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.Big5-HKSCS.js @@ -0,0 +1,64 @@ +const charset = "Big5-HKSCS"; + +function dumpStrings(inString, outString) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); +} + +function error(inString, outString, msg) { + dumpStrings(inString, outString); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.Big5.js b/intl/uconv/tests/unit/test_bug381412.Big5.js new file mode 100644 index 0000000000..45c8bafc83 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.Big5.js @@ -0,0 +1,64 @@ +const charset = "Big5"; + +function dumpStrings(inString, outString) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); +} + +function error(inString, outString, msg) { + dumpStrings(inString, outString); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.euc-kr.js b/intl/uconv/tests/unit/test_bug381412.euc-kr.js new file mode 100644 index 0000000000..58d36c76f4 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.euc-kr.js @@ -0,0 +1,64 @@ +const charset = "EUC-KR"; + +function dumpStrings(inString, outString) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); +} + +function error(inString, outString, msg) { + dumpStrings(inString, outString); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.euc_jp.js b/intl/uconv/tests/unit/test_bug381412.euc_jp.js new file mode 100644 index 0000000000..7e07eb9e69 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.euc_jp.js @@ -0,0 +1,92 @@ +const charset = "EUC-JP"; +const ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" +); +var gConverter; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function test(inString) { + var outString = gConverter.ConvertToUnicode(inString) + gConverter.Finish(); + + switch (outString.length) { + case 0: + case 1: + case 2: + error(inString, outString, "Unexpected error"); + break; + case 3: + error(inString, outString, "3 byte sequence eaten"); + break; + case 4: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 && + outString.charCodeAt(2) < 0x80 && + outString.charCodeAt(3) < 0x80 + ) { + error(inString, outString, "3 byte sequence converted to 1 ASCII"); + } + break; + case 5: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 && + outString.charCodeAt(2) < 0x80 && + outString.charCodeAt(3) < 0x80 && + outString.charCodeAt(4) < 0x80 + ) { + error(inString, outString, "3 byte sequence converted to 2 ASCII"); + } + break; + case 6: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 && + outString.charCodeAt(2) < 0x80 && + outString.charCodeAt(3) < 0x80 && + outString.charCodeAt(4) < 0x80 && + outString.charCodeAt(5) < 0x80 + ) { + error(inString, outString, "3 byte sequence converted to 3 ASCII"); + } + break; + } +} + +function run_test() { + gConverter = new ScriptableUnicodeConverter(); + gConverter.charset = charset; + + var byte1, byte2, byte3; + for (byte1 = 1; byte1 < 0x100; ++byte1) { + for (byte2 = 1; byte2 < 0x100; ++byte2) { + if (byte1 == 0x8f) { + for (byte3 = 1; byte3 < 0x100; ++byte3) { + test(String.fromCharCode(byte1, byte2, byte3) + "foo"); + } + } else { + test(String.fromCharCode(byte1, byte2) + " foo"); + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.gb2312.js b/intl/uconv/tests/unit/test_bug381412.gb2312.js new file mode 100644 index 0000000000..df680dadae --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.gb2312.js @@ -0,0 +1,60 @@ +const charset = "GB2312"; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug381412.js b/intl/uconv/tests/unit/test_bug381412.js new file mode 100644 index 0000000000..89849bf5e6 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug381412.js @@ -0,0 +1,60 @@ +const charset = "Shift_JIS"; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function run_test() { + var ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + var converter = new ScriptableUnicodeConverter(); + converter.charset = charset; + + var leadByte, trailByte; + var inString; + for (leadByte = 1; leadByte < 0x100; ++leadByte) { + for (trailByte = 1; trailByte < 0x100; ++trailByte) { + inString = String.fromCharCode(leadByte, trailByte, 65); + var outString = converter.ConvertToUnicode(inString) + converter.Finish(); + switch (outString.length) { + case 1: + error(inString, outString, "2 byte sequence eaten"); + break; + case 2: + if ( + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 1 ASCII"); + } + break; + case 3: + if ( + outString != inString && + outString.charCodeAt(0) < 0x80 && + outString.charCodeAt(1) < 0x80 + ) { + error(inString, outString, "2 byte sequence converted to 2 ASCII"); + } + break; + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug396637.js b/intl/uconv/tests/unit/test_bug396637.js new file mode 100644 index 0000000000..6aac53e5d1 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug396637.js @@ -0,0 +1,9 @@ +// Tests conversion of a single byte from UTF-16 to Unicode + +const inString = "A"; +const expectedString = ""; +const charset = "UTF-16BE"; + +function run_test() { + checkDecode(CreateScriptableConverter(), charset, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_bug399257.js b/intl/uconv/tests/unit/test_bug399257.js new file mode 100644 index 0000000000..9acd3e9b38 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug399257.js @@ -0,0 +1,80 @@ +// Tests encoding of characters below U+0020 +const inString = "Hello\u000aWorld"; +const expectedString = "Hello\nWorld"; + +function run_test() { + var failures = false; + var encodingConverter = CreateScriptableConverter(); + + var encoders = [ + "Big5", + "Big5-HKSCS", + "EUC-JP", + "EUC-KR", + "gb18030", + "gbk", + "IBM866", + "ISO-2022-JP", + "ISO-8859-1", + "ISO-8859-2", + "ISO-8859-3", + "ISO-8859-4", + "ISO-8859-5", + "ISO-8859-6", + "ISO-8859-7", + "ISO-8859-8", + "ISO-8859-8-I", + "ISO-8859-10", + "ISO-8859-13", + "ISO-8859-14", + "ISO-8859-15", + "ISO-8859-16", + "KOI8-R", + "KOI8-U", + "Shift_JIS", + "windows-1250", + "windows-1251", + "windows-1252", + "windows-1253", + "windows-1254", + "windows-1255", + "windows-1256", + "windows-1257", + "windows-1258", + "windows-874", + "macintosh", + "x-mac-cyrillic", + "x-user-defined", + "UTF-8", + ]; + + var counter = 0; + while (counter < encoders.length) { + var charset = encoders[counter++]; + dump("testing " + counter + " " + charset + "\n"); + + encodingConverter.charset = charset; + var codepageString = + encodingConverter.ConvertFromUnicode(inString) + + encodingConverter.Finish(); + if (codepageString != expectedString) { + dump(charset + " encoding failed\n"); + for (var i = 0; i < expectedString.length; ++i) { + if (codepageString.charAt(i) != expectedString.charAt(i)) { + dump( + i.toString(16) + + ": 0x" + + codepageString.charCodeAt(i).toString(16) + + " != " + + expectedString.charCodeAt(i).toString(16) + + "\n" + ); + } + } + failures = true; + } + } + if (failures) { + do_throw("test failed\n"); + } +} diff --git a/intl/uconv/tests/unit/test_bug457886.js b/intl/uconv/tests/unit/test_bug457886.js new file mode 100644 index 0000000000..21c3036901 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug457886.js @@ -0,0 +1,12 @@ +// Tests conversion from Unicode to ISO-2022-JP + +const inString = + "\u3042\u3044\u3046\u3048\u304A\u000D\u000A\u304B\u304D\u304F\u3051\u3053"; + +const expectedString = '\x1B$B$"$$$&$($*\x1B(B\x0D\x0A\x1B$B$+$-$/$1$3\x1B(B'; + +const charset = "ISO-2022-JP"; + +function run_test() { + checkEncode(CreateScriptableConverter(), charset, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_bug522931.js b/intl/uconv/tests/unit/test_bug522931.js new file mode 100644 index 0000000000..2dae8d72e9 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug522931.js @@ -0,0 +1,4 @@ +// crash test with invaild parameter (bug 522931) +function run_test() { + Assert.equal(Services.textToSubURI.UnEscapeAndConvert("UTF-8", null), ""); +} diff --git a/intl/uconv/tests/unit/test_bug563283.js b/intl/uconv/tests/unit/test_bug563283.js new file mode 100644 index 0000000000..49c13dcfcb --- /dev/null +++ b/intl/uconv/tests/unit/test_bug563283.js @@ -0,0 +1,53 @@ +// Tests conversion from Unicode to ISO-2022-JP with Hankaku characters + +const inStrings = [ + // 。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタï¾ï¾‚テトナニヌネノハヒフï¾ï¾Žï¾ï¾ï¾‘メモヤユヨラリルレロワï¾ï¾žï¾Ÿ + "\uFF61\uFF62\uFF63\uFF64\uFF65\uFF66\uFF67\uFF68\uFF69\uFF6A\uFF6B\uFF6C\uFF6D\uFF6E\uFF6F\uFF70\uFF71\uFF72\uFF73\uFF74\uFF75\uFF76\uFF77\uFF78\uFF79\uFF7A\uFF7B\uFF7C\uFF7D\uFF7E\uFF7F\uFF80\uFF81\uFF82\uFF83\uFF84\uFF85\uFF86\uFF87\uFF88\uFF89\uFF8A\uFF8B\uFF8C\uFF8D\uFF8E\uFF8F\uFF90\uFF91\uFF92\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98\uFF99\uFF9A\uFF9B\uFF9C\uFF9D\uFF9E\uFF9F", + // equivalent to + // 。「ã€ã€ãƒ»ãƒ²ã‚¡ã‚£ã‚¥ã‚§ã‚©ãƒ£ãƒ¥ãƒ§ãƒƒãƒ¼ã‚¢ã‚¤ã‚¦ã‚¨ã‚ªã‚«ã‚­ã‚¯ã‚±ã‚³ã‚µã‚·ã‚¹ã‚»ã‚½ã‚¿ãƒãƒ„テトナニヌãƒãƒŽãƒãƒ’フヘホマミムメモヤユヨラリルレロワン゛゜ + // \u3002\u300c\u300d\u3001\u30fb\u30f2\u30a1\u30a3\u30a5\u30a7\u30a9\u30e3\u30e5\u30e7\u30c3\u30fc\u30a2\u30a4\u30a6\u30a8\u30aa\u30ab\u30ad\u30af\u30b1\u30b3\u30b5\u30b7\u30b9\u30bb\u30bd\u30bf\u30c1\u30c4\u30c6\u30c8\u30ca\u30cb\u30cc\u30cd\u30ce\u30cf\u30d2\u30d5\u30d8\u30db\u30de\u30df\u30e0\u30e1\u30e2\u30e4\u30e6\u30e8\u30e9\u30ea\u30eb\u30ec\u30ed\u30ef\u30f3\u309b\u309c" + + // ガギグゲゴザジズゼゾダï¾ï¾žï¾‚゙デドバビブï¾ï¾žï¾Žï¾ž + "\uFF76\uFF9E\uFF77\uFF9E\uFF78\uFF9E\uFF79\uFF9E\uFF7A\uFF9E\uFF7B\uFF9E\uFF7C\uFF9E\uFF7D\uFF9E\uFF7E\uFF9E\uFF7F\uFF9E\uFF80\uFF9E\uFF81\uFF9E\uFF82\uFF9E\uFF83\uFF9E\uFF84\uFF9E\uFF8A\uFF9E\uFF8B\uFF9E\uFF8C\uFF9E\uFF8D\uFF9E\uFF8E\uFF9E", + // equivalent to + // カ゛キ゛ク゛ケ゛コ゛サ゛シ゛ス゛セ゛ソ゛タ゛ãƒã‚›ãƒ„゛テ゛ト゛ãƒã‚›ãƒ’゛フ゛ヘ゛ホ゛ + // \u30AB\u309B\u30AD\u309B\u30AF\u309B\u30B1\u309B\u30B3\u309B\u30B5\u309B\u30B7\u309B\u30B9\u309B\u30BB\u309B\u30BD\u309B\u30BF\u309B\u30C1\u309B\u30C4\u309B\u30C6\u309B\u30C8\u309B\u30CF\u309B\u30D2\u309B\u30D5\u309B\u30D8\u309B\u30DB\u309B + + // パピプï¾ï¾Ÿï¾Žï¾Ÿ + "\uFF8A\uFF9F\uFF8B\uFF9F\uFF8C\uFF9F\uFF8D\uFF9F\uFF8E\uFF9F", + // equivalent to + // ãƒã‚œãƒ’゜フ゜ヘ゜ホ゜ + // \u30CF\u309C\u30D2\u309C\u30D5\u309C\u30D8\u309C\u30DB\u309C" + + // Hankaku preceded and followed by regular Katakana (no change of charset) + // フランツ・ヨーゼフ・ãƒã‚¤ãƒ‰ãƒ³ + "\u30D5\u30E9\u30F3\u30C4\u30FB\uFF96\uFF70\uFF7E\uFF9E\uFF8C\u30FB\u30CF\u30A4\u30C9\u30F3", + + // Hankaku preceded and followed by Roman (charset change) + // Mozilla (モジラ) Foundation + "Mozilla (\uFF93\uFF7C\uFF9E\uFF97) Foundation", + + // Hankaku preceded and followed by unencodable characters + // दिलà¥à¤²à¥€ï½¥ï¾ƒï¾žï¾˜ï½°ï½¥à¨¦à¨¿à©±à¨²à©€ + "\u0926\u093F\u0932\u094D\u0932\u0940\uFF65\uFF83\uFF9E\uFF98\uFF70\uFF65\u0A26\u0A3F\u0A71\u0A32\u0A40", +]; + +const expectedStrings = [ + "\x1B$B!#!V!W!\x22!&%r%!%#%%%'%)%c%e%g%C!<%\x22%$%&%(%*%+%-%/%1%3%5%7%9%;%=%?%A%D%F%H%J%K%L%M%N%O%R%U%X%[%^%_%`%a%b%d%f%h%i%j%k%l%m%o%s!+!,\x1B(B", + "\x1B$B%+!+%-!+%/!+%1!+%3!+%5!+%7!+%9!+%;!+%=!+%?!+%A!+%D!+%F!+%H!+%O!+%R!+%U!+%X!+%[!+\x1B(B", + "\x1B$B%O!,%R!,%U!,%X!,%[!,\x1B(B", + "\x1B$B%U%i%s%D!&%h!<%;!+%U!&%O%$%I%s\x1B(B", + "Mozilla (\x1B$B%b%7!+%i\x1B(B) Foundation", + "??????\x1B$B!&%F!+%j!" || outChar == "/") { + dump( + charset + + " has a problem: " + + escape(inString) + + " decodes to '" + + outString + + "'\n" + ); + failures = true; + } + } + } + } + if (failures) { + do_throw("test failed\n"); + } +} diff --git a/intl/uconv/tests/unit/test_bug715319.dbcs.js b/intl/uconv/tests/unit/test_bug715319.dbcs.js new file mode 100644 index 0000000000..3ba405a925 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug715319.dbcs.js @@ -0,0 +1,56 @@ +// 2-byte charsets: +const charsets = ["Big5", "EUC-KR"]; +const ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" +); +var gConverter; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function IsASCII(charCode) { + return charCode <= 0x7e; +} + +function test(inString) { + var outString = gConverter.ConvertToUnicode(inString) + gConverter.Finish(); + + var outLen = outString.length; + + if ( + IsASCII(inString.charCodeAt(1)) && + (outLen < 4 || outString.charCodeAt(outLen - 4) == 0xfffd) + ) { + error(inString, outString, "ASCII input eaten in " + gConverter.charset); + } +} + +function run_test() { + gConverter = new ScriptableUnicodeConverter(); + for (var i = 0; i < charsets.length; ++i) { + gConverter.charset = charsets[i]; + + var byte1, byte2; + for (byte1 = 1; byte1 < 0x100; ++byte1) { + for (byte2 = 1; byte2 < 0x100; ++byte2) { + test(String.fromCharCode(byte1, byte2) + "foo"); + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug715319.euc_jp.js b/intl/uconv/tests/unit/test_bug715319.euc_jp.js new file mode 100644 index 0000000000..537d073d14 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug715319.euc_jp.js @@ -0,0 +1,77 @@ +const charset = "EUC-JP"; +const ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" +); +var gConverter; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function IsASCII(charCode) { + return charCode <= 0x7e; +} + +function IsNotGR(charCode) { + return charCode < 0xa1 || charCode > 0xfe; +} + +function test(inString) { + var outString = gConverter.ConvertToUnicode(inString) + gConverter.Finish(); + + var outLen = outString.length; + if ( + IsASCII(inString.charCodeAt(1)) && + inString.charCodeAt(1) != outString.charCodeAt(outLen - 5) + ) { + error(inString, outString, "ASCII second byte eaten"); + } else if ( + IsASCII(inString.charCodeAt(2)) && + inString.charCodeAt(2) != outString.charCodeAt(outLen - 4) + ) { + error(inString, outString, "ASCII third byte eaten"); + } else if ( + inString.charCodeAt(0) == 0x8f && + inString.charCodeAt(1) > 0x7f && + IsNotGR(inString.charCodeAt(2)) && + !( + outString.charCodeAt(outLen - 4) == 0xfffd || + outString.charCodeAt(outLen - 4) == inString.charCodeAt(2) + ) + ) { + error(inString, outString, "non-GR third byte eaten"); + } +} + +function run_test() { + gConverter = new ScriptableUnicodeConverter(); + gConverter.charset = charset; + + var byte1, byte2, byte3; + for (byte1 = 1; byte1 < 0x100; ++byte1) { + for (byte2 = 1; byte2 < 0x100; ++byte2) { + if (byte1 == 0x8f) { + for (byte3 = 1; byte3 < 0x100; ++byte3) { + test(String.fromCharCode(byte1, byte2, byte3) + "foo"); + } + } else { + test(String.fromCharCode(byte1, byte2) + " foo"); + } + } + } +} diff --git a/intl/uconv/tests/unit/test_bug715319.gb2312.js b/intl/uconv/tests/unit/test_bug715319.gb2312.js new file mode 100644 index 0000000000..f780ab81d1 --- /dev/null +++ b/intl/uconv/tests/unit/test_bug715319.gb2312.js @@ -0,0 +1,87 @@ +const charset = "GB2312"; +const ScriptableUnicodeConverter = Components.Constructor( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" +); +var gConverter; + +function error(inString, outString, msg) { + var dispIn = ""; + var dispOut = ""; + var i; + for (i = 0; i < inString.length; ++i) { + dispIn += " x" + inString.charCodeAt(i).toString(16); + } + if (!outString.length) { + dispOut = ""; + } else { + for (i = 0; i < outString.length; ++i) { + dispOut += " x" + outString.charCodeAt(i).toString(16); + } + } + dump('"' + dispIn + '" ==> "' + dispOut + '"\n'); + do_throw("security risk: " + msg); +} + +function IsASCII(charCode) { + return charCode <= 0x7e; +} + +function test(inString) { + var outString = gConverter.ConvertToUnicode(inString) + gConverter.Finish(); + + var outLen = outString.length; + for (var pos = 1; pos < 3; ++pos) { + let outPos = outLen - (9 - pos); + if (outPos < 0) { + outPos = 0; + } + let c0 = inString.charCodeAt(0); + let c1 = inString.charCodeAt(1); + let c2 = inString.charCodeAt(2); + let c3 = inString.charCodeAt(3); + if ( + IsASCII(inString.charCodeAt(pos)) && + !( + outString.charCodeAt(outPos) == inString.charCodeAt(pos) || + outString.charCodeAt(outPos) != 0xfffd || + // legal 4 byte range + (0x81 <= c0 && + c0 <= 0xfe && + 0x30 <= c1 && + c1 <= 0x39 && + 0x81 <= c2 && + c2 <= 0xfe && + 0x30 <= c3 && + c3 <= 0x39) + ) + ) { + dump("pos = " + pos + "; outPos = " + outPos + "\n"); + error(inString, outString, "ASCII input eaten"); + } + } +} + +function run_test() { + gConverter = new ScriptableUnicodeConverter(); + gConverter.charset = charset; + + var byte1, byte2, byte3, byte4; + + // 2-byte + for (byte1 = 1; byte1 < 0x100; ++byte1) { + for (byte2 = 1; byte2 < 0x100; ++byte2) { + test(String.fromCharCode(byte1, byte2) + " foo"); + } + } + // 4-byte (limited) + for (byte1 = 0x80; byte1 < 0x90; ++byte1) { + for (byte2 = 0x20; byte2 < 0x40; ++byte2) { + for (byte3 = 0x80; byte3 < 0x90; ++byte3) { + for (byte4 = 0x20; byte4 < 0x40; ++byte4) { + test(String.fromCharCode(byte1, byte2, byte3, byte4) + " foo"); + } + } + } + } +} diff --git a/intl/uconv/tests/unit/test_charset_conversion.js b/intl/uconv/tests/unit/test_charset_conversion.js new file mode 100644 index 0000000000..577a801081 --- /dev/null +++ b/intl/uconv/tests/unit/test_charset_conversion.js @@ -0,0 +1,373 @@ +const NS_ERROR_ILLEGAL_VALUE = Cr.NS_ERROR_ILLEGAL_VALUE; + +var BIS, BOS, _Pipe, COS, FIS, _SS, CIS; + +var dataDir; + +function run_test() { + BIS = Components.Constructor( + "@mozilla.org/binaryinputstream;1", + "nsIBinaryInputStream", + "setInputStream" + ); + BOS = Components.Constructor( + "@mozilla.org/binaryoutputstream;1", + "nsIBinaryOutputStream", + "setOutputStream" + ); + _Pipe = Components.Constructor("@mozilla.org/pipe;1", "nsIPipe", "init"); + COS = Components.Constructor( + "@mozilla.org/intl/converter-output-stream;1", + "nsIConverterOutputStream", + "init" + ); + FIS = Components.Constructor( + "@mozilla.org/network/file-input-stream;1", + "nsIFileInputStream", + "init" + ); + _SS = Components.Constructor( + "@mozilla.org/storagestream;1", + "nsIStorageStream", + "init" + ); + CIS = Components.Constructor( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" + ); + + dataDir = do_get_file("data/"); + + test_utf8_1(); + test_cross_conversion(); +} + +const UNICODE_STRINGS = [ + "\u00BD + \u00BE == \u00BD\u00B2 + \u00BC + \u00BE", + + "AZaz09 \u007F " + // U+000000 to U+00007F + "\u0080 \u0398 \u03BB \u0725 " + // U+000080 to U+0007FF + "\u0964 \u0F5F \u20AC \uFFFB", // U+000800 to U+00FFFF + + // there would be strings containing non-BMP code points here, but + // unfortunately JS strings are UCS-2 (and worse yet are treated as + // 16-bit values by the spec), so we have to do gymnastics to work + // with non-BMP -- manual surrogate decoding doesn't work because + // String.prototype.charCodeAt() ignores surrogate pairs and only + // returns 16-bit values +]; + +// test conversion equality -- keys are names of files containing equivalent +// Unicode data, values are the encoding of the file in the format expected by +// nsIConverter(In|Out)putStream.init +const UNICODE_FILES = { + "unicode-conversion.utf8.txt": "UTF-8", + "unicode-conversion.utf16.txt": "UTF-16", + "unicode-conversion.utf16le.txt": "UTF-16LE", + "unicode-conversion.utf16be.txt": "UTF-16BE", +}; + +function test_utf8_1() { + for (var i = 0; i < UNICODE_STRINGS.length; i++) { + var pipe = Pipe(); + var conv = new COS(pipe.outputStream, "UTF-8"); + Assert.ok(conv.writeString(UNICODE_STRINGS[i])); + conv.close(); + + if ( + !equalStreams( + new UTF8(pipe.inputStream), + stringToCodePoints(UNICODE_STRINGS[i]) + ) + ) { + do_throw("UNICODE_STRINGS[" + i + "] not handled correctly"); + } + } +} + +function test_cross_conversion() { + for (var fn1 in UNICODE_FILES) { + var fin = getBinaryInputStream(fn1); + var ss = StorageStream(); + + var bos = new BOS(ss.getOutputStream(0)); + var av; + while ((av = fin.available()) > 0) { + var data = fin.readByteArray(av); + bos.writeByteArray(data); + } + fin.close(); + bos.close(); + + for (var fn2 in UNICODE_FILES) { + var fin2 = getUnicharInputStream(fn2, UNICODE_FILES[fn2]); + var unichar = new CIS( + ss.newInputStream(0), + UNICODE_FILES[fn1], + 8192, + 0x0 + ); + + if (!equalUnicharStreams(unichar, fin2)) { + do_throw( + "unequal streams: " + UNICODE_FILES[fn1] + ", " + UNICODE_FILES[fn2] + ); + } + } + } +} + +// utility functions + +function StorageStream() { + return new _SS(8192, Math.pow(2, 32) - 1, null); +} + +function getUnicharInputStream(filename, encoding) { + var file = dataDir.clone(); + file.append(filename); + + const PR_RDONLY = 0x1; + var fis = new FIS( + file, + PR_RDONLY, + "0644", + Ci.nsIFileInputStream.CLOSE_ON_EOF + ); + return new CIS(fis, encoding, 8192, 0x0); +} + +function getBinaryInputStream(filename, encoding) { + var file = dataDir.clone(); + file.append(filename); + + const PR_RDONLY = 0x1; + var fis = new FIS( + file, + PR_RDONLY, + "0644", + Ci.nsIFileInputStream.CLOSE_ON_EOF + ); + return new BIS(fis); +} + +function equalStreams(stream, codePoints) { + var currIndex = 0; + while (true) { + var unit = stream.readUnit(); + if (unit < 0) { + return currIndex == codePoints.length; + } + if (unit !== codePoints[currIndex++]) { + return false; + } + } + // eslint-disable-next-line no-unreachable + do_throw("not reached"); + return false; +} + +function equalUnicharStreams(s1, s2) { + var r1, r2; + var str1 = {}, + str2 = {}; + while (true) { + r1 = s1.readString(1024, str1); + r2 = s2.readString(1024, str2); + + if (r1 != r2 || str1.value != str2.value) { + print("r1: " + r1 + ", r2: " + r2); + print(str1.value.length); + print(str2.value.length); + return false; + } + if (r1 == 0 && r2 == 0) { + return true; + } + } + + // not reached + // eslint-disable-next-line no-unreachable + return false; +} + +function stringToCodePoints(str) { + return str.split("").map(function (v) { + return v.charCodeAt(0); + }); +} + +function lowbits(n) { + return Math.pow(2, n) - 1; +} + +function Pipe() { + return new _Pipe(false, false, 1024, 10, null); +} + +// complex charset readers + +/** + * Wraps a UTF-8 stream to allow access to the Unicode code points in it. + * + * @param stream + * the stream to wrap + */ +function UTF8(stream) { + this._stream = new BIS(stream); +} +UTF8.prototype = { + // returns numeric code point at front of stream encoded in UTF-8, -1 if at + // end of stream, or throws if valid (and properly encoded!) code point not + // found + readUnit() { + var str = this._stream; + + var c, c2, c3, c4, rv; + + // if at end of stream, must distinguish failure to read any bytes + // (correct behavior) from failure to read some byte after the first + // in the character + try { + c = str.read8(); + } catch (e) { + return -1; + } + + if (c < 0x80) { + return c; + } + + if (c < 0xc0) { + // c < 11000000 + // byte doesn't have enough leading ones (must be at least two) + throw NS_ERROR_ILLEGAL_VALUE; + } + + c2 = str.read8(); + if (c2 >= 0xc0 || c2 < 0x80) { + throw NS_ERROR_ILLEGAL_VALUE; + } // not 10xxxxxx + + if (c < 0xe0) { + // c < 11100000 + // two-byte between U+000080 and U+0007FF + rv = ((lowbits(5) & c) << 6) + (lowbits(6) & c2); + // no upper bounds-check needed, by previous lines + if (rv >= 0x80) { + return rv; + } + throw NS_ERROR_ILLEGAL_VALUE; + } + + c3 = str.read8(); + if (c3 >= 0xc0 || c3 < 0x80) { + throw NS_ERROR_ILLEGAL_VALUE; + } // not 10xxxxxx + + if (c < 0xf0) { + // c < 11110000 + // three-byte between U+000800 and U+00FFFF + rv = + ((lowbits(4) & c) << 12) + ((lowbits(6) & c2) << 6) + (lowbits(6) & c3); + // no upper bounds-check needed, by previous lines + if (rv >= 0xe000 || (rv >= 0x800 && rv <= 0xd7ff)) { + return rv; + } + throw NS_ERROR_ILLEGAL_VALUE; + } + + c4 = str.read8(); + if (c4 >= 0xc0 || c4 < 0x80) { + throw NS_ERROR_ILLEGAL_VALUE; + } // not 10xxxxxx + + if (c < 0xf8) { + // c < 11111000 + // four-byte between U+010000 and U+10FFFF + rv = + ((lowbits(3) & c) << 18) + + ((lowbits(6) & c2) << 12) + + ((lowbits(6) & c3) << 6) + + (lowbits(6) & c4); + // need an upper bounds-check since 0x10FFFF isn't (2**n - 1) + if (rv >= 0x10000 && rv <= 0x10ffff) { + return rv; + } + throw NS_ERROR_ILLEGAL_VALUE; + } + + // 11111000 or greater -- no UTF-8 mapping + throw NS_ERROR_ILLEGAL_VALUE; + }, +}; + +/** + * Wraps a UTF-16 stream to allow access to the Unicode code points in it. + * + * @param stream + * the stream to wrap + * @param bigEndian + * true for UTF-16BE, false for UTF-16LE, not present at all for UTF-16 with + * a byte-order mark + */ +function UTF16(stream, bigEndian) { + this._stream = new BIS(stream); + if (arguments.length > 1) { + this._bigEndian = bigEndian; + } else { + var bom = this._stream.read16(); + if (bom == 0xfeff) { + this._bigEndian = true; + } else if (bom == 0xfffe) { + this._bigEndian = false; + } else { + do_throw("missing BOM: " + bom.toString(16).toUpperCase()); + } + } +} +UTF16.prototype = { + // returns numeric code point at front of stream encoded in UTF-16, + // -1 if at end of stream, or throws if UTF-16 code point not found + readUnit() { + var str = this._stream; + + // if at end of stream, must distinguish failure to read any bytes + // (correct behavior) from failure to read some byte after the first + // in the character + try { + var b1 = str.read8(); + } catch (e) { + return -1; + } + + var b2 = str.read8(); + + var w1 = this._bigEndian ? (b1 << 8) + b2 : (b2 << 8) + b1; + + if (w1 > 0xdbff && w1 < 0xe000) { + // second surrogate, but expecting none or first + throw NS_ERROR_ILLEGAL_VALUE; + } + + if (w1 > 0xd7ff && w1 < 0xdc00) { + // non-BMP, use surrogate pair + b1 = str.read8(); + b2 = str.read8(); + var w2 = this._bigEndian ? (b1 << 8) + b2 : (b2 << 8) + b1; + if (w2 < 0xdc00 || w2 > 0xdfff) { + throw NS_ERROR_ILLEGAL_VALUE; + } + + var rv = 0x100000 + ((lowbits(10) & w2) << 10) + (lowbits(10) & w1); + if (rv <= 0x10ffff) { + return rv; + } + throw NS_ERROR_ILLEGAL_VALUE; + } + + // non-surrogate + return w1; + }, +}; diff --git a/intl/uconv/tests/unit/test_decode_8859-1.js b/intl/uconv/tests/unit/test_decode_8859-1.js new file mode 100644 index 0000000000..d820b35ca8 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-1.js @@ -0,0 +1,26 @@ +// Tests conversion from ISO-8859-1 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const aliases = [ + "ISO-8859-1", + "iso-8859-1", + "latin1", + "iso_8859-1", + "iso8859-1", + "iso-ir-100", + "l1", + "ibm819", + "cp819", + "csisolatin1", + "iso88591", + "iso_8859-1:1987", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-10.js b/intl/uconv/tests/unit/test_decode_8859-10.js new file mode 100644 index 0000000000..47e934817e --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-10.js @@ -0,0 +1,22 @@ +// Tests conversion from ISO-8859-10 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u0112\u0122\u012a\u0128\u0136\u00a7\u013b\u0110\u0160\u0166\u017d\u00ad\u016a\u014a\u00b0\u0105\u0113\u0123\u012b\u0129\u0137\u00b7\u013c\u0111\u0161\u0167\u017e\u2015\u016b\u014b\u0100\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u012e\u010c\u00c9\u0118\u00cb\u0116\u00cd\u00ce\u00cf\u00d0\u0145\u014c\u00d3\u00d4\u00d5\u00d6\u0168\u00d8\u0172\u00da\u00db\u00dc\u00dd\u00de\u00df\u0101\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u012f\u010d\u00e9\u0119\u00eb\u0117\u00ed\u00ee\u00ef\u00f0\u0146\u014d\u00f3\u00f4\u00f5\u00f6\u0169\u00f8\u0173\u00fa\u00fb\u00fc\u00fd\u00fe\u0138"; + +const aliases = [ + "ISO-8859-10", + "iso-8859-10", + "iso8859-10", + "latin6", + "iso-ir-157", + "l6", + "csisolatin6", + "iso885910", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-11.js b/intl/uconv/tests/unit/test_decode_8859-11.js new file mode 100644 index 0000000000..b647ec503c --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-11.js @@ -0,0 +1,13 @@ +// Tests conversion from ISO-8859-11 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b"; + +const aliases = ["ISO-8859-11", "iso-8859-11", "iso8859-11", "iso885911"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-13.js b/intl/uconv/tests/unit/test_decode_8859-13.js new file mode 100644 index 0000000000..91443e6bee --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-13.js @@ -0,0 +1,13 @@ +// Tests conversion from ISO-8859-13 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u201d\u00a2\u00a3\u00a4\u201e\u00a6\u00a7\u00d8\u00a9\u0156\u00ab\u00ac\u00ad\u00ae\u00c6\u00b0\u00b1\u00b2\u00b3\u201c\u00b5\u00b6\u00b7\u00f8\u00b9\u0157\u00bb\u00bc\u00bd\u00be\u00e6\u0104\u012e\u0100\u0106\u00c4\u00c5\u0118\u0112\u010c\u00c9\u0179\u0116\u0122\u0136\u012a\u013b\u0160\u0143\u0145\u00d3\u014c\u00d5\u00d6\u00d7\u0172\u0141\u015a\u016a\u00dc\u017b\u017d\u00df\u0105\u012f\u0101\u0107\u00e4\u00e5\u0119\u0113\u010d\u00e9\u017a\u0117\u0123\u0137\u012b\u013c\u0161\u0144\u0146\u00f3\u014d\u00f5\u00f6\u00f7\u0173\u0142\u015b\u016b\u00fc\u017c\u017e\u2019"; + +const aliases = ["ISO-8859-13", "iso-8859-13", "iso8859-13", "iso885913"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-14.js b/intl/uconv/tests/unit/test_decode_8859-14.js new file mode 100644 index 0000000000..d1fdcb204c --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-14.js @@ -0,0 +1,13 @@ +// Tests conversion from ISO-8859-14 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u1e02\u1e03\u00a3\u010a\u010b\u1e0a\u00a7\u1e80\u00a9\u1e82\u1e0b\u1ef2\u00ad\u00ae\u0178\u1e1e\u1e1f\u0120\u0121\u1e40\u1e41\u00b6\u1e56\u1e81\u1e57\u1e83\u1e60\u1ef3\u1e84\u1e85\u1e61\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u0174\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u1e6a\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u0176\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u0175\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u1e6b\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u0177\u00ff"; + +const aliases = ["ISO-8859-14", "iso-8859-14", "iso8859-14", "iso885914"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-15.js b/intl/uconv/tests/unit/test_decode_8859-15.js new file mode 100644 index 0000000000..7344fb55a8 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-15.js @@ -0,0 +1,21 @@ +// Tests conversion from ISO-8859-15 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u20ac\u00a5\u0160\u00a7\u0161\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u017d\u00b5\u00b6\u00b7\u017e\u00b9\u00ba\u00bb\u0152\u0153\u0178\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const aliases = [ + "ISO-8859-15", + "iso-8859-15", + "iso8859-15", + "iso_8859-15", + "iso885915", + "csisolatin9", + "l9", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-2.js b/intl/uconv/tests/unit/test_decode_8859-2.js new file mode 100644 index 0000000000..0e3c15bdee --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-2.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-2 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u02d8\u0141\u00a4\u013d\u015a\u00a7\u00a8\u0160\u015e\u0164\u0179\u00ad\u017d\u017b\u00b0\u0105\u02db\u0142\u00b4\u013e\u015b\u02c7\u00b8\u0161\u015f\u0165\u017a\u02dd\u017e\u017c\u0154\u00c1\u00c2\u0102\u00c4\u0139\u0106\u00c7\u010c\u00c9\u0118\u00cb\u011a\u00cd\u00ce\u010e\u0110\u0143\u0147\u00d3\u00d4\u0150\u00d6\u00d7\u0158\u016e\u00da\u0170\u00dc\u00dd\u0162\u00df\u0155\u00e1\u00e2\u0103\u00e4\u013a\u0107\u00e7\u010d\u00e9\u0119\u00eb\u011b\u00ed\u00ee\u010f\u0111\u0144\u0148\u00f3\u00f4\u0151\u00f6\u00f7\u0159\u016f\u00fa\u0171\u00fc\u00fd\u0163\u02d9"; + +const aliases = [ + "ISO-8859-2", + "iso-8859-2", + "latin2", + "iso_8859-2", + "iso8859-2", + "iso-ir-101", + "l2", + "csisolatin2", + "iso88592", + "iso_8859-2:1987", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-3.js b/intl/uconv/tests/unit/test_decode_8859-3.js new file mode 100644 index 0000000000..011f82de87 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-3.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-3 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbf\xc0\xc1\xc2\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0126\u02d8\u00a3\u00a4\u0124\u00a7\u00a8\u0130\u015e\u011e\u0134\u00ad\u017b\u00b0\u0127\u00b2\u00b3\u00b4\u00b5\u0125\u00b7\u00b8\u0131\u015f\u011f\u0135\u00bd\u017c\u00c0\u00c1\u00c2\u00c4\u010a\u0108\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d1\u00d2\u00d3\u00d4\u0120\u00d6\u00d7\u011c\u00d9\u00da\u00db\u00dc\u016c\u015c\u00df\u00e0\u00e1\u00e2\u00e4\u010b\u0109\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u0121\u00f6\u00f7\u011d\u00f9\u00fa\u00fb\u00fc\u016d\u015d\u02d9"; + +const aliases = [ + "ISO-8859-3", + "iso-8859-3", + "latin3", + "iso_8859-3", + "iso8859-3", + "iso-ir-109", + "l3", + "csisolatin3", + "iso88593", + "iso_8859-3:1988", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-4.js b/intl/uconv/tests/unit/test_decode_8859-4.js new file mode 100644 index 0000000000..6a8b89c2ef --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-4.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-4 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u0138\u0156\u00a4\u0128\u013b\u00a7\u00a8\u0160\u0112\u0122\u0166\u00ad\u017d\u00af\u00b0\u0105\u02db\u0157\u00b4\u0129\u013c\u02c7\u00b8\u0161\u0113\u0123\u0167\u014a\u017e\u014b\u0100\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u012e\u010c\u00c9\u0118\u00cb\u0116\u00cd\u00ce\u012a\u0110\u0145\u014c\u0136\u00d4\u00d5\u00d6\u00d7\u00d8\u0172\u00da\u00db\u00dc\u0168\u016a\u00df\u0101\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u012f\u010d\u00e9\u0119\u00eb\u0117\u00ed\u00ee\u012b\u0111\u0146\u014d\u0137\u00f4\u00f5\u00f6\u00f7\u00f8\u0173\u00fa\u00fb\u00fc\u0169\u016b\u02d9"; + +const aliases = [ + "ISO-8859-4", + "iso-8859-4", + "latin4", + "iso_8859-4", + "iso8859-4", + "iso-ir-110", + "l4", + "csisolatin4", + "iso88594", + "iso_8859-4:1988", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-5.js b/intl/uconv/tests/unit/test_decode_8859-5.js new file mode 100644 index 0000000000..220a12ab3a --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-5.js @@ -0,0 +1,22 @@ +// Tests conversion from ISO-8859-5 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0401\u0402\u0403\u0404\u0405\u0406\u0407\u0408\u0409\u040a\u040b\u040c\u00ad\u040e\u040f\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f\u2116\u0451\u0452\u0453\u0454\u0455\u0456\u0457\u0458\u0459\u045a\u045b\u045c\u00a7\u045e\u045f"; + +const aliases = [ + "ISO-8859-5", + "iso-8859-5", + "iso_8859-5", + "iso8859-5", + "iso-ir-144", + "csisolatincyrillic", + "iso88595", + "iso_8859-5:1988", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-6.js b/intl/uconv/tests/unit/test_decode_8859-6.js new file mode 100644 index 0000000000..9c94ef8673 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-6.js @@ -0,0 +1,25 @@ +// Tests conversion from ISO-8859-6 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa4\xac\xad\xbb\xbf\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a4\u060c\u00ad\u061b\u061f\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b\u064c\u064d\u064e\u064f\u0650\u0651\u0652"; + +const aliases = [ + "ISO-8859-6", + "iso-8859-6", + "iso_8859-6", + "iso8859-6", + "arabic", + "iso-ir-127", + "ecma-114", + "asmo-708", + "csisolatinarabic", + "iso88596", + "iso_8859-6:1987", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-7.js b/intl/uconv/tests/unit/test_decode_8859-7.js new file mode 100644 index 0000000000..9d74342345 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-7.js @@ -0,0 +1,27 @@ +// Tests conversion from ISO-8859-7 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u2018\u2019\u00a3\u20ac\u20af\u00a6\u00a7\u00a8\u00a9\u037a\u00ab\u00ac\u00ad\u2015\u00b0\u00b1\u00b2\u00b3\u0384\u0385\u0386\u00b7\u0388\u0389\u038a\u00bb\u038c\u00bd\u038e\u038f\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039a\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1\u03a3\u03a4\u03a5\u03a6\u03a7\u03a8\u03a9\u03aa\u03ab\u03ac\u03ad\u03ae\u03af\u03b0\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03ca\u03cb\u03cc\u03cd\u03ce"; + +const aliases = [ + "ISO-8859-7", + "iso-8859-7", + "greek", + "greek8", + "sun_eu_greek", + "iso_8859-7", + "iso8859-7", + "iso-ir-126", + "elot_928", + "ecma-118", + "csisolatingreek", + "iso88597", + "iso_8859-7:1987", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-8.js b/intl/uconv/tests/unit/test_decode_8859-8.js new file mode 100644 index 0000000000..c7b758bf03 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-8.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-8 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfd\xfe"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00d7\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00f7\u00bb\u00bc\u00bd\u00be\u2017\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\u200e\u200f"; + +const aliases = [ + "ISO-8859-8", + "iso-8859-8", + "hebrew", + "visual", + "iso_8859-8", + "iso8859-8", + "iso-ir-138", + "csisolatinhebrew", + "iso88598", + "iso_8859-8:1988", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_8859-9.js b/intl/uconv/tests/unit/test_decode_8859-9.js new file mode 100644 index 0000000000..1582e2093f --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_8859-9.js @@ -0,0 +1,24 @@ +// Tests conversion from ISO-8859-9 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u011e\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u0130\u015e\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u011f\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u0131\u015f\u00ff"; + +const aliases = [ + "ISO-8859-9", + "iso-8859-9", + "latin5", + "iso_8859-9", + "iso8859-9", + "iso-ir-148", + "l5", + "csisolatin5", + "iso88599", + "iso_8859-9:1989", +]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1250.js b/intl/uconv/tests/unit/test_decode_CP1250.js new file mode 100644 index 0000000000..d044c67801 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1250.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1250 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0083\u201e\u2026\u2020\u2021\u0088\u2030\u0160\u2039\u015a\u0164\u017d\u0179\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u0161\u203a\u015b\u0165\u017e\u017a\u00a0\u02c7\u02d8\u0141\u00a4\u0104\u00a6\u00a7\u00a8\u00a9\u015e\u00ab\u00ac\u00ad\u00ae\u017b\u00b0\u00b1\u02db\u0142\u00b4\u00b5\u00b6\u00b7\u00b8\u0105\u015f\u00bb\u013d\u02dd\u013e\u017c\u0154\u00c1\u00c2\u0102\u00c4\u0139\u0106\u00c7\u010c\u00c9\u0118\u00cb\u011a\u00cd\u00ce\u010e\u0110\u0143\u0147\u00d3\u00d4\u0150\u00d6\u00d7\u0158\u016e\u00da\u0170\u00dc\u00dd\u0162\u00df\u0155\u00e1\u00e2\u0103\u00e4\u013a\u0107\u00e7\u010d\u00e9\u0119\u00eb\u011b\u00ed\u00ee\u010f\u0111\u0144\u0148\u00f3\u00f4\u0151\u00f6\u00f7\u0159\u016f\u00fa\u0171\u00fc\u00fd\u0163\u02d9"; + +const aliases = ["windows-1250", "x-cp1250", "cp1250"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1251.js b/intl/uconv/tests/unit/test_decode_CP1251.js new file mode 100644 index 0000000000..01153e8650 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1251.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1251 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0402\u0403\u201a\u0453\u201e\u2026\u2020\u2021\u20ac\u2030\u0409\u2039\u040a\u040c\u040b\u040f\u0452\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u0459\u203a\u045a\u045c\u045b\u045f\u00a0\u040e\u045e\u0408\u00a4\u0490\u00a6\u00a7\u0401\u00a9\u0404\u00ab\u00ac\u00ad\u00ae\u0407\u00b0\u00b1\u0406\u0456\u0491\u00b5\u00b6\u00b7\u0451\u2116\u0454\u00bb\u0458\u0405\u0455\u0457\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f"; + +const aliases = ["windows-1251", "x-cp1251", "cp1251"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1252.js b/intl/uconv/tests/unit/test_decode_CP1252.js new file mode 100644 index 0000000000..d41d7d72a3 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1252.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1252 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u017d\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u017e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const aliases = ["windows-1252", "x-cp1252", "cp1252"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1253.js b/intl/uconv/tests/unit/test_decode_CP1253.js new file mode 100644 index 0000000000..880e2dae74 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1253.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1253 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u0088\u2030\u008a\u2039\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u009a\u203a\u009c\u009d\u009e\u009f\u00a0\u0385\u0386\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\ufffd\u00ab\u00ac\u00ad\u00ae\u2015\u00b0\u00b1\u00b2\u00b3\u0384\u00b5\u00b6\u00b7\u0388\u0389\u038a\u00bb\u038c\u00bd\u038e\u038f\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039a\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1\ufffd\u03a3\u03a4\u03a5\u03a6\u03a7\u03a8\u03a9\u03aa\u03ab\u03ac\u03ad\u03ae\u03af\u03b0\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03ca\u03cb\u03cc\u03cd\u03ce\ufffd"; + +const aliases = ["windows-1253", "x-cp1253", "cp1253"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1254.js b/intl/uconv/tests/unit/test_decode_CP1254.js new file mode 100644 index 0000000000..f4af7e2088 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1254.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1254 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u009e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u011e\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u0130\u015e\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u011f\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u0131\u015f\u00ff"; + +const aliases = ["windows-1254", "x-cp1254", "cp1254"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1255.js b/intl/uconv/tests/unit/test_decode_CP1255.js new file mode 100644 index 0000000000..57e1b54636 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1255.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1255 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u008a\u2039\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u009a\u203a\u009c\u009d\u009e\u009f\u00a0\u00a1\u00a2\u00a3\u20aa\u00a5\u00a6\u00a7\u00a8\u00a9\u00d7\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00f7\u00bb\u00bc\u00bd\u00be\u00bf\u05b0\u05b1\u05b2\u05b3\u05b4\u05b5\u05b6\u05b7\u05b8\u05b9\u05ba\u05bb\u05bc\u05bd\u05be\u05bf\u05c0\u05c1\u05c2\u05c3\u05f0\u05f1\u05f2\u05f3\u05f4\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\ufffd\ufffd\u200e\u200f\ufffd"; + +const aliases = ["windows-1255", "x-cp1255", "cp1255"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1256.js b/intl/uconv/tests/unit/test_decode_CP1256.js new file mode 100644 index 0000000000..b91e448a46 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1256.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1256 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u067e\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0679\u2039\u0152\u0686\u0698\u0688\u06af\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u06a9\u2122\u0691\u203a\u0153\u200c\u200d\u06ba\u00a0\u060c\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u06be\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u061b\u00bb\u00bc\u00bd\u00be\u061f\u06c1\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u00d7\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u00e0\u0644\u00e2\u0645\u0646\u0647\u0648\u00e7\u00e8\u00e9\u00ea\u00eb\u0649\u064a\u00ee\u00ef\u064b\u064c\u064d\u064e\u00f4\u064f\u0650\u00f7\u0651\u00f9\u0652\u00fb\u00fc\u200e\u200f\u06d2"; + +const aliases = ["windows-1256", "x-cp1256", "cp1256"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1257.js b/intl/uconv/tests/unit/test_decode_CP1257.js new file mode 100644 index 0000000000..a61bf2e870 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1257.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1257 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0083\u201e\u2026\u2020\u2021\u0088\u2030\u008a\u2039\u008c\u00a8\u02c7\u00b8\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u009a\u203a\u009c\u00af\u02db\u009f\u00a0\ufffd\u00a2\u00a3\u00a4\ufffd\u00a6\u00a7\u00d8\u00a9\u0156\u00ab\u00ac\u00ad\u00ae\u00c6\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00f8\u00b9\u0157\u00bb\u00bc\u00bd\u00be\u00e6\u0104\u012e\u0100\u0106\u00c4\u00c5\u0118\u0112\u010c\u00c9\u0179\u0116\u0122\u0136\u012a\u013b\u0160\u0143\u0145\u00d3\u014c\u00d5\u00d6\u00d7\u0172\u0141\u015a\u016a\u00dc\u017b\u017d\u00df\u0105\u012f\u0101\u0107\u00e4\u00e5\u0119\u0113\u010d\u00e9\u017a\u0117\u0123\u0137\u012b\u013c\u0161\u0144\u0146\u00f3\u014d\u00f5\u00f6\u00f7\u0173\u0142\u015b\u016b\u00fc\u017c\u017e\u02d9"; + +const aliases = ["windows-1257", "x-cp1257", "cp1257"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP1258.js b/intl/uconv/tests/unit/test_decode_CP1258.js new file mode 100644 index 0000000000..422e9a7985 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP1258.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-1258 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u008a\u2039\u0152\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u009a\u203a\u0153\u009d\u009e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u0102\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u0300\u00cd\u00ce\u00cf\u0110\u00d1\u0309\u00d3\u00d4\u01a0\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u01af\u0303\u00df\u00e0\u00e1\u00e2\u0103\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u0301\u00ed\u00ee\u00ef\u0111\u00f1\u0323\u00f3\u00f4\u01a1\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u01b0\u20ab\u00ff"; + +const aliases = ["windows-1258", "x-cp1258", "cp1258"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_CP874.js b/intl/uconv/tests/unit/test_decode_CP874.js new file mode 100644 index 0000000000..7f05e7669c --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_CP874.js @@ -0,0 +1,13 @@ +// Tests conversion from windows-874 to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u0082\u0083\u0084\u2026\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\ufffd\ufffd\ufffd\ufffd\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b\ufffd\ufffd\ufffd\ufffd"; + +const aliases = ["windows-874", "dos-874"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_gb18030.js b/intl/uconv/tests/unit/test_decode_gb18030.js new file mode 100644 index 0000000000..ca5796bbaa --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_gb18030.js @@ -0,0 +1,16 @@ +// Tests conversion from gb18030 to Unicode +// This is a sniff test which doesn't cover the full gb18030 range: the test string +// includes only the ASCII range and the first 63 double byte characters +// and border values of 4 byte characters + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x81@\x81A\x81B\x81C\x81D\x81E\x81F\x81G\x81H\x81I\x81J\x81K\x81L\x81M\x81N\x81O\x81P\x81Q\x81R\x81S\x81T\x81U\x81V\x81W\x81X\x81Y\x81Z\x81[\x81\\\x81]\x81^\x81_\x81`\x81a\x81b\x81c\x81d\x81e\x81f\x81g\x81h\x81i\x81j\x81k\x81l\x81m\x81n\x81o\x81p\x81q\x81r\x81s\x81t\x81u\x81v\x81w\x81x\x81y\x81z\x81{\x81|\x81}\x81~\x810\x810\x841\xa46\x841\xa47\x849\xfe9\x850\x810\x859\xfe9\x860\x810\x8f9\xfe9\x900\x810\xe32\x9a5\xe32\x9a6\xe39\xfe9\xe40\x810\xfc9\xfe9\xfd0\x810\xfe9\xfe9\xa8\xa0\xa8\xbc\xa8\xbf\xa8\xc1\x815\xf46\x815\xf47\x815\xf48"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u4E02\u4E04\u4E05\u4E06\u4E0F\u4E12\u4E17\u4E1F\u4E20\u4E21\u4E23\u4E26\u4E29\u4E2E\u4E2F\u4E31\u4E33\u4E35\u4E37\u4E3C\u4E40\u4E41\u4E42\u4E44\u4E46\u4E4A\u4E51\u4E55\u4E57\u4E5A\u4E5B\u4E62\u4E63\u4E64\u4E65\u4E67\u4E68\u4E6A\u4E6B\u4E6C\u4E6D\u4E6E\u4E6F\u4E72\u4E74\u4E75\u4E76\u4E77\u4E78\u4E79\u4E7A\u4E7B\u4E7C\u4E7D\u4E7F\u4E80\u4E81\u4E82\u4E83\u4E84\u4E85\u4E87\u4E8A\x80\uFFFC\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uD800\uDC00\uDBFF\uDFFF\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uE7C6\u1E3F\u01F9\uE7C9\u1E3E\uE7C7\u1E40"; + +const aliases = ["gb18030"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_gbk.js b/intl/uconv/tests/unit/test_decode_gbk.js new file mode 100644 index 0000000000..6e4414722d --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_gbk.js @@ -0,0 +1,15 @@ +// Tests conversion from gbk to Unicode +// This is a sniff test which doesn't cover the full gbk range: the test string +// includes only the ASCII range and the first 63 double byte characters + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x81@\x81A\x81B\x81C\x81D\x81E\x81F\x81G\x81H\x81I\x81J\x81K\x81L\x81M\x81N\x81O\x81P\x81Q\x81R\x81S\x81T\x81U\x81V\x81W\x81X\x81Y\x81Z\x81[\x81\\\x81]\x81^\x81_\x81`\x81a\x81b\x81c\x81d\x81e\x81f\x81g\x81h\x81i\x81j\x81k\x81l\x81m\x81n\x81o\x81p\x81q\x81r\x81s\x81t\x81u\x81v\x81w\x81x\x81y\x81z\x81{\x81|\x81}\x81~"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u4E02\u4E04\u4E05\u4E06\u4E0F\u4E12\u4E17\u4E1F\u4E20\u4E21\u4E23\u4E26\u4E29\u4E2E\u4E2F\u4E31\u4E33\u4E35\u4E37\u4E3C\u4E40\u4E41\u4E42\u4E44\u4E46\u4E4A\u4E51\u4E55\u4E57\u4E5A\u4E5B\u4E62\u4E63\u4E64\u4E65\u4E67\u4E68\u4E6A\u4E6B\u4E6C\u4E6D\u4E6E\u4E6F\u4E72\u4E74\u4E75\u4E76\u4E77\u4E78\u4E79\u4E7A\u4E7B\u4E7C\u4E7D\u4E7F\u4E80\u4E81\u4E82\u4E83\u4E84\u4E85\u4E87\u4E8A"; + +const aliases = ["gbk", "x-gbk"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_macintosh.js b/intl/uconv/tests/unit/test_decode_macintosh.js new file mode 100644 index 0000000000..5504a630ec --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_macintosh.js @@ -0,0 +1,13 @@ +// Tests conversion from x-mac-roman to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca\u00ff\u0178\u2044\u20ac\u2039\u203a\ufb01\ufb02\u2021\u00b7\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; + +const aliases = ["csMacintosh", "mac", "macintosh", "x-mac-roman"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_x_mac_cyrillic.js b/intl/uconv/tests/unit/test_decode_x_mac_cyrillic.js new file mode 100644 index 0000000000..ad783b0e96 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_x_mac_cyrillic.js @@ -0,0 +1,13 @@ +// Tests conversion from x-mac-cyrillic to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u2020\u00b0\u0490\u00a3\u00a7\u2022\u00b6\u0406\u00ae\u00a9\u2122\u0402\u0452\u2260\u0403\u0453\u221e\u00b1\u2264\u2265\u0456\u00b5\u0491\u0408\u0404\u0454\u0407\u0457\u0409\u0459\u040a\u045a\u0458\u0405\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u040b\u045b\u040c\u045c\u0455\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u201e\u040e\u045e\u040f\u045f\u2116\u0401\u0451\u044f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u20ac"; + +const aliases = ["x-mac-cyrillic"]; + +function run_test() { + testDecodeAliasesInternal(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_decode_x_mac_ukrainian.js b/intl/uconv/tests/unit/test_decode_x_mac_ukrainian.js new file mode 100644 index 0000000000..b8b3d63018 --- /dev/null +++ b/intl/uconv/tests/unit/test_decode_x_mac_ukrainian.js @@ -0,0 +1,13 @@ +// Tests conversion from x-mac-ukrainian to Unicode + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u2020\u00b0\u0490\u00a3\u00a7\u2022\u00b6\u0406\u00ae\u00a9\u2122\u0402\u0452\u2260\u0403\u0453\u221e\u00b1\u2264\u2265\u0456\u00b5\u0491\u0408\u0404\u0454\u0407\u0457\u0409\u0459\u040a\u045a\u0458\u0405\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u040b\u045b\u040c\u045c\u0455\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u201e\u040e\u045e\u040f\u045f\u2116\u0401\u0451\u044f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u20ac"; + +const aliases = ["x-mac-ukrainian"]; + +function run_test() { + testDecodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-1.js b/intl/uconv/tests/unit/test_encode_8859-1.js new file mode 100644 index 0000000000..f5a6559de1 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-1.js @@ -0,0 +1,26 @@ +// Tests conversion from Unicode to ISO-8859-1 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-1", + "iso-8859-1", + "latin1", + "iso_8859-1", + "iso8859-1", + "iso-ir-100", + "l1", + "ibm819", + "cp819", + "csisolatin1", + "iso88591", + "iso_8859-1:1987", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-10.js b/intl/uconv/tests/unit/test_encode_8859-10.js new file mode 100644 index 0000000000..2ecad6013f --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-10.js @@ -0,0 +1,22 @@ +// Tests conversion from Unicode to ISO-8859-10 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u0112\u0122\u012a\u0128\u0136\u00a7\u013b\u0110\u0160\u0166\u017d\u00ad\u016a\u014a\u00b0\u0105\u0113\u0123\u012b\u0129\u0137\u00b7\u013c\u0111\u0161\u0167\u017e\u2015\u016b\u014b\u0100\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u012e\u010c\u00c9\u0118\u00cb\u0116\u00cd\u00ce\u00cf\u00d0\u0145\u014c\u00d3\u00d4\u00d5\u00d6\u0168\u00d8\u0172\u00da\u00db\u00dc\u00dd\u00de\u00df\u0101\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u012f\u010d\u00e9\u0119\u00eb\u0117\u00ed\u00ee\u00ef\u00f0\u0146\u014d\u00f3\u00f4\u00f5\u00f6\u0169\u00f8\u0173\u00fa\u00fb\u00fc\u00fd\u00fe\u0138"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-10", + "iso-8859-10", + "iso8859-10", + "latin6", + "iso-ir-157", + "l6", + "csisolatin6", + "iso885910", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-11.js b/intl/uconv/tests/unit/test_encode_8859-11.js new file mode 100644 index 0000000000..7011c26688 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-11.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to ISO-8859-11 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb"; + +const aliases = ["ISO-8859-11", "iso-8859-11", "iso8859-11", "iso885911"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-13.js b/intl/uconv/tests/unit/test_encode_8859-13.js new file mode 100644 index 0000000000..cf6ad98466 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-13.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to ISO-8859-13 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u201d\u00a2\u00a3\u00a4\u201e\u00a6\u00a7\u00d8\u00a9\u0156\u00ab\u00ac\u00ad\u00ae\u00c6\u00b0\u00b1\u00b2\u00b3\u201c\u00b5\u00b6\u00b7\u00f8\u00b9\u0157\u00bb\u00bc\u00bd\u00be\u00e6\u0104\u012e\u0100\u0106\u00c4\u00c5\u0118\u0112\u010c\u00c9\u0179\u0116\u0122\u0136\u012a\u013b\u0160\u0143\u0145\u00d3\u014c\u00d5\u00d6\u00d7\u0172\u0141\u015a\u016a\u00dc\u017b\u017d\u00df\u0105\u012f\u0101\u0107\u00e4\u00e5\u0119\u0113\u010d\u00e9\u017a\u0117\u0123\u0137\u012b\u013c\u0161\u0144\u0146\u00f3\u014d\u00f5\u00f6\u00f7\u0173\u0142\u015b\u016b\u00fc\u017c\u017e\u2019"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["ISO-8859-13", "iso-8859-13", "iso8859-13", "iso885913"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-14.js b/intl/uconv/tests/unit/test_encode_8859-14.js new file mode 100644 index 0000000000..9a0e8dc00a --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-14.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to ISO-8859-14 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u1e02\u1e03\u00a3\u010a\u010b\u1e0a\u00a7\u1e80\u00a9\u1e82\u1e0b\u1ef2\u00ad\u00ae\u0178\u1e1e\u1e1f\u0120\u0121\u1e40\u1e41\u00b6\u1e56\u1e81\u1e57\u1e83\u1e60\u1ef3\u1e84\u1e85\u1e61\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u0174\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u1e6a\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u0176\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u0175\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u1e6b\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u0177\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["ISO-8859-14", "iso-8859-14", "iso8859-14", "iso885914"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-15.js b/intl/uconv/tests/unit/test_encode_8859-15.js new file mode 100644 index 0000000000..ed5cd2ec90 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-15.js @@ -0,0 +1,21 @@ +// Tests conversion from Unicode to ISO-8859-15 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u20ac\u00a5\u0160\u00a7\u0161\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u017d\u00b5\u00b6\u00b7\u017e\u00b9\u00ba\u00bb\u0152\u0153\u0178\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-15", + "iso-8859-15", + "iso8859-15", + "iso_8859-15", + "iso885915", + "csisolatin9", + "l9", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-2.js b/intl/uconv/tests/unit/test_encode_8859-2.js new file mode 100644 index 0000000000..1b34672bcb --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-2.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-2 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u02d8\u0141\u00a4\u013d\u015a\u00a7\u00a8\u0160\u015e\u0164\u0179\u00ad\u017d\u017b\u00b0\u0105\u02db\u0142\u00b4\u013e\u015b\u02c7\u00b8\u0161\u015f\u0165\u017a\u02dd\u017e\u017c\u0154\u00c1\u00c2\u0102\u00c4\u0139\u0106\u00c7\u010c\u00c9\u0118\u00cb\u011a\u00cd\u00ce\u010e\u0110\u0143\u0147\u00d3\u00d4\u0150\u00d6\u00d7\u0158\u016e\u00da\u0170\u00dc\u00dd\u0162\u00df\u0155\u00e1\u00e2\u0103\u00e4\u013a\u0107\u00e7\u010d\u00e9\u0119\u00eb\u011b\u00ed\u00ee\u010f\u0111\u0144\u0148\u00f3\u00f4\u0151\u00f6\u00f7\u0159\u016f\u00fa\u0171\u00fc\u00fd\u0163\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-2", + "iso-8859-2", + "latin2", + "iso_8859-2", + "iso8859-2", + "iso-ir-101", + "l2", + "csisolatin2", + "iso88592", + "iso_8859-2:1987", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-3.js b/intl/uconv/tests/unit/test_encode_8859-3.js new file mode 100644 index 0000000000..fff6243431 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-3.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-3 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0126\u02d8\u00a3\u00a4\u0124\u00a7\u00a8\u0130\u015e\u011e\u0134\u00ad\u017b\u00b0\u0127\u00b2\u00b3\u00b4\u00b5\u0125\u00b7\u00b8\u0131\u015f\u011f\u0135\u00bd\u017c\u00c0\u00c1\u00c2\u00c4\u010a\u0108\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d1\u00d2\u00d3\u00d4\u0120\u00d6\u00d7\u011c\u00d9\u00da\u00db\u00dc\u016c\u015c\u00df\u00e0\u00e1\u00e2\u00e4\u010b\u0109\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u0121\u00f6\u00f7\u011d\u00f9\u00fa\u00fb\u00fc\u016d\u015d\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbf\xc0\xc1\xc2\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-3", + "iso-8859-3", + "latin3", + "iso_8859-3", + "iso8859-3", + "iso-ir-109", + "l3", + "csisolatin3", + "iso88593", + "iso_8859-3:1988", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-4.js b/intl/uconv/tests/unit/test_encode_8859-4.js new file mode 100644 index 0000000000..192d13fd52 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-4.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-4 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0104\u0138\u0156\u00a4\u0128\u013b\u00a7\u00a8\u0160\u0112\u0122\u0166\u00ad\u017d\u00af\u00b0\u0105\u02db\u0157\u00b4\u0129\u013c\u02c7\u00b8\u0161\u0113\u0123\u0167\u014a\u017e\u014b\u0100\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u012e\u010c\u00c9\u0118\u00cb\u0116\u00cd\u00ce\u012a\u0110\u0145\u014c\u0136\u00d4\u00d5\u00d6\u00d7\u00d8\u0172\u00da\u00db\u00dc\u0168\u016a\u00df\u0101\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u012f\u010d\u00e9\u0119\u00eb\u0117\u00ed\u00ee\u012b\u0111\u0146\u014d\u0137\u00f4\u00f5\u00f6\u00f7\u00f8\u0173\u00fa\u00fb\u00fc\u0169\u016b\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-4", + "iso-8859-4", + "latin4", + "iso_8859-4", + "iso8859-4", + "iso-ir-110", + "l4", + "csisolatin4", + "iso88594", + "iso_8859-4:1988", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-5.js b/intl/uconv/tests/unit/test_encode_8859-5.js new file mode 100644 index 0000000000..fb2a05a693 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-5.js @@ -0,0 +1,22 @@ +// Tests conversion from Unicode to ISO-8859-5 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u0401\u0402\u0403\u0404\u0405\u0406\u0407\u0408\u0409\u040a\u040b\u040c\u00ad\u040e\u040f\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f\u2116\u0451\u0452\u0453\u0454\u0455\u0456\u0457\u0458\u0459\u045a\u045b\u045c\u00a7\u045e\u045f"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-5", + "iso-8859-5", + "iso_8859-5", + "iso8859-5", + "iso-ir-144", + "csisolatincyrillic", + "iso88595", + "iso_8859-5:1988", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-6.js b/intl/uconv/tests/unit/test_encode_8859-6.js new file mode 100644 index 0000000000..1768b89d82 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-6.js @@ -0,0 +1,25 @@ +// Tests conversion from Unicode to ISO-8859-6 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a4\u060c\u00ad\u061b\u061f\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b\u064c\u064d\u064e\u064f\u0650\u0651\u0652"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa4\xac\xad\xbb\xbf\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2"; + +const aliases = [ + "ISO-8859-6", + "iso-8859-6", + "iso_8859-6", + "iso8859-6", + "arabic", + "iso-ir-127", + "ecma-114", + "asmo-708", + "csisolatinarabic", + "iso88596", + "iso_8859-6:1987", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-7.js b/intl/uconv/tests/unit/test_encode_8859-7.js new file mode 100644 index 0000000000..3452130e74 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-7.js @@ -0,0 +1,27 @@ +// Tests conversion from Unicode to ISO-8859-7 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u2018\u2019\u00a3\u20ac\u20af\u00a6\u00a7\u00a8\u00a9\u037a\u00ab\u00ac\u00ad\u2015\u00b0\u00b1\u00b2\u00b3\u0384\u0385\u0386\u00b7\u0388\u0389\u038a\u00bb\u038c\u00bd\u038e\u038f\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039a\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1\u03a3\u03a4\u03a5\u03a6\u03a7\u03a8\u03a9\u03aa\u03ab\u03ac\u03ad\u03ae\u03af\u03b0\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03ca\u03cb\u03cc\u03cd\u03ce"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe"; + +const aliases = [ + "ISO-8859-7", + "iso-8859-7", + "greek", + "greek8", + "sun_eu_greek", + "iso_8859-7", + "iso8859-7", + "iso-ir-126", + "elot_928", + "ecma-118", + "csisolatingreek", + "iso88597", + "iso_8859-7:1987", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-8.js b/intl/uconv/tests/unit/test_encode_8859-8.js new file mode 100644 index 0000000000..12402dfb56 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-8.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-8 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00d7\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00f7\u00bb\u00bc\u00bd\u00be\u2017\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\u200e\u200f"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfd\xfe"; + +const aliases = [ + "ISO-8859-8", + "iso-8859-8", + "hebrew", + "visual", + "iso_8859-8", + "iso8859-8", + "iso-ir-138", + "csisolatinhebrew", + "iso88598", + "iso_8859-8:1988", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_8859-9.js b/intl/uconv/tests/unit/test_encode_8859-9.js new file mode 100644 index 0000000000..7658ebe5ef --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_8859-9.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to ISO-8859-9 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u011e\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u0130\u015e\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u011f\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u0131\u015f\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = [ + "ISO-8859-9", + "iso-8859-9", + "latin5", + "iso_8859-9", + "iso8859-9", + "iso-ir-148", + "l5", + "csisolatin5", + "iso88599", + "iso_8859-9:1989", +]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1250.js b/intl/uconv/tests/unit/test_encode_CP1250.js new file mode 100644 index 0000000000..5b5c0d2f0a --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1250.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1250 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0083\u201e\u2026\u2020\u2021\u0088\u2030\u0160\u2039\u015a\u0164\u017d\u0179\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u0161\u203a\u015b\u0165\u017e\u017a\u00a0\u02c7\u02d8\u0141\u00a4\u0104\u00a6\u00a7\u00a8\u00a9\u015e\u00ab\u00ac\u00ad\u00ae\u017b\u00b0\u00b1\u02db\u0142\u00b4\u00b5\u00b6\u00b7\u00b8\u0105\u015f\u00bb\u013d\u02dd\u013e\u017c\u0154\u00c1\u00c2\u0102\u00c4\u0139\u0106\u00c7\u010c\u00c9\u0118\u00cb\u011a\u00cd\u00ce\u010e\u0110\u0143\u0147\u00d3\u00d4\u0150\u00d6\u00d7\u0158\u016e\u00da\u0170\u00dc\u00dd\u0162\u00df\u0155\u00e1\u00e2\u0103\u00e4\u013a\u0107\u00e7\u010d\u00e9\u0119\u00eb\u011b\u00ed\u00ee\u010f\u0111\u0144\u0148\u00f3\u00f4\u0151\u00f6\u00f7\u0159\u016f\u00fa\u0171\u00fc\u00fd\u0163\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1250", "x-cp1250", "cp1250"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1251.js b/intl/uconv/tests/unit/test_encode_CP1251.js new file mode 100644 index 0000000000..52d15d8731 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1251.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1251 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0402\u0403\u201a\u0453\u201e\u2026\u2020\u2021\u20ac\u2030\u0409\u2039\u040a\u040c\u040b\u040f\u0452\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u0459\u203a\u045a\u045c\u045b\u045f\u00a0\u040e\u045e\u0408\u00a4\u0490\u00a6\u00a7\u0401\u00a9\u0404\u00ab\u00ac\u00ad\u00ae\u0407\u00b0\u00b1\u0406\u0456\u0491\u00b5\u00b6\u00b7\u0451\u2116\u0454\u00bb\u0458\u0405\u0455\u0457\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1251", "x-cp1251", "cp1251"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1252.js b/intl/uconv/tests/unit/test_encode_CP1252.js new file mode 100644 index 0000000000..2f99791408 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1252.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1252 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u017d\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u017e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1252", "x-cp1252", "cp1252"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1253.js b/intl/uconv/tests/unit/test_encode_CP1253.js new file mode 100644 index 0000000000..a7ba34cb9f --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1253.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1253 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u0088\u2030\u008a\u2039\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u009a\u203a\u009c\u009d\u009e\u009f\u00a0\u0385\u0386\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ac\u00ad\u00ae\u2015\u00b0\u00b1\u00b2\u00b3\u0384\u00b5\u00b6\u00b7\u0388\u0389\u038a\u00bb\u038c\u00bd\u038e\u038f\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039a\u039b\u039c\u039d\u039e\u039f\u03a0\u03a1\u03a3\u03a4\u03a5\u03a6\u03a7\u03a8\u03a9\u03aa\u03ab\u03ac\u03ad\u03ae\u03af\u03b0\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c2\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03ca\u03cb\u03cc\u03cd\u03ce"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe"; + +const aliases = ["windows-1253", "x-cp1253", "cp1253"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1254.js b/intl/uconv/tests/unit/test_encode_CP1254.js new file mode 100644 index 0000000000..593a33841e --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1254.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1254 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u009e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u011e\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u0130\u015e\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u011f\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u0131\u015f\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1254", "x-cp1254", "cp1254"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1255.js b/intl/uconv/tests/unit/test_encode_CP1255.js new file mode 100644 index 0000000000..6da4cd53a2 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1255.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1255 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u008a\u2039\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u009a\u203a\u009c\u009d\u009e\u009f\u00a0\u00a1\u00a2\u00a3\u20aa\u00a5\u00a6\u00a7\u00a8\u00a9\u00d7\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00f7\u00bb\u00bc\u00bd\u00be\u00bf\u05b0\u05b1\u05b2\u05b3\u05b4\u05b5\u05b6\u05b7\u05b8\u05b9\u05ba\u05bb\u05bc\u05bd\u05be\u05bf\u05c0\u05c1\u05c2\u05c3\u05f0\u05f1\u05f2\u05f3\u05f4\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\u200e\u200f"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfd\xfe"; + +const aliases = ["windows-1255", "x-cp1255", "cp1255"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1256.js b/intl/uconv/tests/unit/test_encode_CP1256.js new file mode 100644 index 0000000000..c8fbbb6192 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1256.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1256 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u067e\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0679\u2039\u0152\u0686\u0698\u0688\u06af\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u06a9\u2122\u0691\u203a\u0153\u200c\u200d\u06ba\u00a0\u060c\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u06be\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u061b\u00bb\u00bc\u00bd\u00be\u061f\u06c1\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u00d7\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u00e0\u0644\u00e2\u0645\u0646\u0647\u0648\u00e7\u00e8\u00e9\u00ea\u00eb\u0649\u064a\u00ee\u00ef\u064b\u064c\u064d\u064e\u00f4\u064f\u0650\u00f7\u0651\u00f9\u0652\u00fb\u00fc\u200e\u200f\u06d2"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1256", "x-cp1256", "cp1256"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1257.js b/intl/uconv/tests/unit/test_encode_CP1257.js new file mode 100644 index 0000000000..d56241ef88 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1257.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1257 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0083\u201e\u2026\u2020\u2021\u0088\u2030\u008a\u2039\u008c\u00a8\u02c7\u00b8\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u2122\u009a\u203a\u009c\u00af\u02db\u009f\u00a0\u00a2\u00a3\u00a4\u00a6\u00a7\u00d8\u00a9\u0156\u00ab\u00ac\u00ad\u00ae\u00c6\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00f8\u00b9\u0157\u00bb\u00bc\u00bd\u00be\u00e6\u0104\u012e\u0100\u0106\u00c4\u00c5\u0118\u0112\u010c\u00c9\u0179\u0116\u0122\u0136\u012a\u013b\u0160\u0143\u0145\u00d3\u014c\u00d5\u00d6\u00d7\u0172\u0141\u015a\u016a\u00dc\u017b\u017d\u00df\u0105\u012f\u0101\u0107\u00e4\u00e5\u0119\u0113\u010d\u00e9\u017a\u0117\u0123\u0137\u012b\u013c\u0161\u0144\u0146\u00f3\u014d\u00f5\u00f6\u00f7\u0173\u0142\u015b\u016b\u00fc\u017c\u017e\u02d9"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa2\xa3\xa4\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1257", "x-cp1257", "cp1257"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP1258.js b/intl/uconv/tests/unit/test_encode_CP1258.js new file mode 100644 index 0000000000..e60a2f79ba --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP1258.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-1258 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u008a\u2039\u0152\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u009a\u203a\u0153\u009d\u009e\u0178\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00c0\u00c1\u00c2\u0102\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u0300\u00cd\u00ce\u00cf\u0110\u00d1\u0309\u00d3\u00d4\u01a0\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u01af\u0303\u00df\u00e0\u00e1\u00e2\u0103\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u0301\u00ed\u00ee\u00ef\u0111\u00f1\u0323\u00f3\u00f4\u01a1\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u01b0\u20ab\u00ff"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["windows-1258", "x-cp1258", "cp1258"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_CP874.js b/intl/uconv/tests/unit/test_encode_CP874.js new file mode 100644 index 0000000000..18158ef7e1 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_CP874.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to windows-874 + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20ac\u0081\u0082\u0083\u0084\u2026\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e26\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e3a\u0e3f\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59\u0e5a\u0e5b"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb"; + +const aliases = ["windows-874", "dos-874"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_gb18030.js b/intl/uconv/tests/unit/test_encode_gb18030.js new file mode 100644 index 0000000000..c080e67535 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_gb18030.js @@ -0,0 +1,15 @@ +// Tests conversion from Unicode to gb18030 +// This is a sniff test which doesn't cover the full gbk range: the test string +// includes only the ASCII range and the first 63 double byte characters + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20AC\u4E02\u4E04\u4E05\u4E06\u4E0F\u4E12\u4E17\u4E1F\u4E20\u4E21\u4E23\u4E26\u4E29\u4E2E\u4E2F\u4E31\u4E33\u4E35\u4E37\u4E3C\u4E40\u4E41\u4E42\u4E44\u4E46\u4E4A\u4E51\u4E55\u4E57\u4E5A\u4E5B\u4E62\u4E63\u4E64\u4E65\u4E67\u4E68\u4E6A\u4E6B\u4E6C\u4E6D\u4E6E\u4E6F\u4E72\u4E74\u4E75\u4E76\u4E77\u4E78\u4E79\u4E7A\u4E7B\u4E7C\u4E7D\u4E7F\u4E80\u4E81\u4E82\u4E83\u4E84\u4E85\u4E87\u4E8A\uFFFD\uE7C6\u1E3F\u01F9\uE7C9\u1E3E\uE7C7\u1E40"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\xa2\xe3\x81@\x81A\x81B\x81C\x81D\x81E\x81F\x81G\x81H\x81I\x81J\x81K\x81L\x81M\x81N\x81O\x81P\x81Q\x81R\x81S\x81T\x81U\x81V\x81W\x81X\x81Y\x81Z\x81[\x81\\\x81]\x81^\x81_\x81`\x81a\x81b\x81c\x81d\x81e\x81f\x81g\x81h\x81i\x81j\x81k\x81l\x81m\x81n\x81o\x81p\x81q\x81r\x81s\x81t\x81u\x81v\x81w\x81x\x81y\x81z\x81{\x81|\x81}\x81~\x84\x31\xa4\x37\xa8\xa0\xa8\xbc\xa8\xbf\xa8\xc1\x815\xf46\x815\xf47\x815\xf48"; + +const aliases = ["gb18030"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_gbk.js b/intl/uconv/tests/unit/test_encode_gbk.js new file mode 100644 index 0000000000..50763b9dab --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_gbk.js @@ -0,0 +1,15 @@ +// Tests conversion from Unicode to gbk +// This is a sniff test which doesn't cover the full gbk range: the test string +// includes only the ASCII range and the first 63 double byte characters + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u20AC\u4E02\u4E04\u4E05\u4E06\u4E0F\u4E12\u4E17\u4E1F\u4E20\u4E21\u4E23\u4E26\u4E29\u4E2E\u4E2F\u4E31\u4E33\u4E35\u4E37\u4E3C\u4E40\u4E41\u4E42\u4E44\u4E46\u4E4A\u4E51\u4E55\u4E57\u4E5A\u4E5B\u4E62\u4E63\u4E64\u4E65\u4E67\u4E68\u4E6A\u4E6B\u4E6C\u4E6D\u4E6E\u4E6F\u4E72\u4E74\u4E75\u4E76\u4E77\u4E78\u4E79\u4E7A\u4E7B\u4E7C\u4E7D\u4E7F\u4E80\u4E81\u4E82\u4E83\u4E84\u4E85\u4E87\u4E8A\uFFFD"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81@\x81A\x81B\x81C\x81D\x81E\x81F\x81G\x81H\x81I\x81J\x81K\x81L\x81M\x81N\x81O\x81P\x81Q\x81R\x81S\x81T\x81U\x81V\x81W\x81X\x81Y\x81Z\x81[\x81\\\x81]\x81^\x81_\x81`\x81a\x81b\x81c\x81d\x81e\x81f\x81g\x81h\x81i\x81j\x81k\x81l\x81m\x81n\x81o\x81p\x81q\x81r\x81s\x81t\x81u\x81v\x81w\x81x\x81y\x81z\x81{\x81|\x81}\x81~?"; + +const aliases = ["gbk", "x-gbk"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_macintosh.js b/intl/uconv/tests/unit/test_encode_macintosh.js new file mode 100644 index 0000000000..949d0aaf7c --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_macintosh.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to x-mac-roman + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca\u00ff\u0178\u2044\u20ac\u2039\u203a\ufb01\ufb02\u2021\u00b7\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["csMacintosh", "mac", "macintosh", "x-mac-roman"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_encode_x_mac_cyrillic.js b/intl/uconv/tests/unit/test_encode_x_mac_cyrillic.js new file mode 100644 index 0000000000..ac27c57cc4 --- /dev/null +++ b/intl/uconv/tests/unit/test_encode_x_mac_cyrillic.js @@ -0,0 +1,13 @@ +// Tests conversion from Unicode to x-mac-cyrillic + +const inString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b\u042c\u042d\u042e\u042f\u2020\u00b0\u0490\u00a3\u00a7\u2022\u00b6\u0406\u00ae\u00a9\u2122\u0402\u0452\u2260\u0403\u0453\u221e\u00b1\u2264\u2265\u0456\u00b5\u0491\u0408\u0404\u0454\u0407\u0457\u0409\u0459\u040a\u045a\u0458\u0405\u00ac\u221a\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u040b\u045b\u040c\u045c\u0455\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u201e\u040e\u045e\u040f\u045f\u2116\u0401\u0451\u044f\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u20ac"; + +const expectedString = + " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; + +const aliases = ["x-mac-cyrillic", "x-mac-ukrainian"]; + +function run_test() { + testEncodeAliases(aliases, inString, expectedString); +} diff --git a/intl/uconv/tests/unit/test_input_stream.js b/intl/uconv/tests/unit/test_input_stream.js new file mode 100644 index 0000000000..b33fb98356 --- /dev/null +++ b/intl/uconv/tests/unit/test_input_stream.js @@ -0,0 +1,41 @@ +var CC = Components.Constructor; +var converter = Cc[ + "@mozilla.org/intl/scriptableunicodeconverter" +].createInstance(Ci.nsIScriptableUnicodeConverter); +converter.charset = "UTF-8"; + +var SIS = CC( + "@mozilla.org/scriptableinputstream;1", + "nsIScriptableInputStream", + "init" +); + +function test_char(code) { + dump("test_char(0x" + code.toString(16) + ")\n"); + var original = String.fromCharCode(code); + var nativeStream = Cc["@mozilla.org/io/string-input-stream;1"].createInstance( + Ci.nsIStringInputStream + ); + nativeStream.setUTF8Data(original); + var stream = new SIS(nativeStream); + var utf8Result = stream.read(stream.available()); + stream.close(); + var result = converter.ConvertToUnicode(utf8Result); + Assert.equal(escape(original), escape(result)); +} + +function run_test() { + // This is not a very comprehensive test. + for (var i = 0x007f - 2; i <= 0x007f; i++) { + test_char(i); + } + for (i = 0x07ff - 2; i <= 0x07ff; i++) { + test_char(i); + } + for (i = 0x1000 - 2; i <= 0x1000 + 2; i++) { + test_char(i); + } + for (i = 0xe000; i <= 0xe000 + 2; i++) { + test_char(i); + } +} diff --git a/intl/uconv/tests/unit/test_unEscapeNonAsciiURI.js b/intl/uconv/tests/unit/test_unEscapeNonAsciiURI.js new file mode 100644 index 0000000000..f447959244 --- /dev/null +++ b/intl/uconv/tests/unit/test_unEscapeNonAsciiURI.js @@ -0,0 +1,58 @@ +// Tests for nsITextToSubURI.unEscapeNonAsciiURI +function run_test() { + // Tests whether nsTextToSubURI does UTF-16 unescaping (it shouldn't) + const testURI = "data:text/html,%FE%FF"; + Assert.equal( + Services.textToSubURI.unEscapeNonAsciiURI("UTF-16", testURI), + testURI + ); + + // Tests whether incomplete multibyte sequences throw. + const tests = [ + { + input: "http://example.com/?p=%E9", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?p=%E9%80", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?p=%E9%80%80", + expected: "http://example.com/?p=\u9000", + }, + { + input: "http://example.com/?p=%E9e", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?p=%E9%E9", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?name=M%FCller/", + throws: Cr.NS_ERROR_ILLEGAL_INPUT, + }, + { + input: "http://example.com/?name=M%C3%BCller/", + expected: "http://example.com/?name=Müller/", + }, + ]; + + for (const t of tests) { + if (t.throws !== undefined) { + let thrown = undefined; + try { + Services.textToSubURI.unEscapeNonAsciiURI("UTF-8", t.input); + } catch (e) { + thrown = e.result; + } + Assert.equal(thrown, t.throws); + } else { + Assert.equal( + Services.textToSubURI.unEscapeNonAsciiURI("UTF-8", t.input), + t.expected + ); + } + } +} diff --git a/intl/uconv/tests/unit/test_unEscapeURIForUI.js b/intl/uconv/tests/unit/test_unEscapeURIForUI.js new file mode 100644 index 0000000000..7f2fb167cc --- /dev/null +++ b/intl/uconv/tests/unit/test_unEscapeURIForUI.js @@ -0,0 +1,23 @@ +// Tests for nsITextToSubURI.unEscapeURIForUI +function run_test() { + // Tests whether incomplete multibyte sequences throw. + const tests = [ + { + input: "http://example.com/?p=%E3%80%82", + //TODO: should be the same as input, bug 1248812 + expected: "http://example.com/?p=%u3002", + }, + { + input: "http://example.com/?name=%E3%80%82", + dontEscape: true, + expected: "http://example.com/?name=\u3002", + }, + ]; + + for (const t of tests) { + Assert.equal( + Services.textToSubURI.unEscapeURIForUI(t.input, t.dontEscape), + t.expected + ); + } +} diff --git a/intl/uconv/tests/unit/test_unmapped.js b/intl/uconv/tests/unit/test_unmapped.js new file mode 100644 index 0000000000..13ee13c20e --- /dev/null +++ b/intl/uconv/tests/unit/test_unmapped.js @@ -0,0 +1,86 @@ +// Tests encoding of unmapped characters +const inString = "\uE5E5"; +const expectedString = "?"; + +function run_test() { + var failures = false; + var encodingConverter = CreateScriptableConverter(); + + // this list excludes codepages that can represent all Unicode + var encoders = [ + "Big5", + "EUC-JP", + "EUC-KR", + "GBK", + "gb18030", + "IBM866", + "ISO-2022-JP", + "ISO-8859-3", + "ISO-8859-4", + "ISO-8859-5", + "ISO-8859-6", + "ISO-8859-7", + "ISO-8859-8", + "ISO-8859-8-I", + "ISO-8859-10", + "ISO-8859-13", + "ISO-8859-14", + "ISO-8859-15", + "ISO-8859-16", + "ISO-8859-2", + "KOI8-R", + "KOI8-U", + "Shift_JIS", + "windows-1250", + "windows-1251", + "windows-1252", + "windows-1253", + "windows-1254", + "windows-1255", + "windows-1256", + "windows-1257", + "windows-1258", + "windows-874", + "x-mac-cyrillic", + ]; + + var counter = 0; + while (counter < encoders.length) { + var charset = encoders[counter++]; + + dump("testing " + counter + " " + charset + "\n"); + encodingConverter.charset = charset; + var codepageString = + encodingConverter.ConvertFromUnicode(inString) + + encodingConverter.Finish(); + if (codepageString != expectedString) { + dump(charset + " encoding failed\n"); + for (var i = 0; i < expectedString.length; ++i) { + if (i >= codepageString.length) { + dump( + "output length " + + codepageString.length + + " less than expected length " + + expectedString.length + + "\n" + ); + break; + } + if (codepageString.charAt(i) != expectedString.charAt(i)) { + dump( + i.toString(16) + + ": 0x" + + codepageString.charCodeAt(i).toString(16) + + " != " + + expectedString.charCodeAt(i).toString(16) + + "\n" + ); + } + } + failures = true; + } + } + if (failures) { + do_throw("test failed\n"); + } +} diff --git a/intl/uconv/tests/unit/test_utf8_illegals.js b/intl/uconv/tests/unit/test_utf8_illegals.js new file mode 100644 index 0000000000..55aec6ab8c --- /dev/null +++ b/intl/uconv/tests/unit/test_utf8_illegals.js @@ -0,0 +1,164 @@ +// Tests illegal UTF-8 sequences + +var Cc = Components.Constructor; + +const { NetUtil } = ChromeUtils.importESModule( + "resource://gre/modules/NetUtil.sys.mjs" +); + +const tests = [ + { + inStrings: [ + "%80", // Illegal or incomplete sequences + "%8f", + "%90", + "%9f", + "%a0", + "%bf", + "%c0", + "%c1", + "%c2", + "%df", + "%e0", + "%e0%a0", + "%e0%bf", + "%ed%80", + "%ed%9f", + "%ef", + "%ef%bf", + "%f0", + "%f0%90", + "%f0%90%80", + "%f0%90%bf", + "%f0%bf", + "%f0%bf%80", + "%f0%bf%bf", + "%f4", + "%f4%80", + "%f4%80%80", + "%f4%80%bf", + "%f4%8f", + "%f4%8f%80", + "%f4%8f%bf", + "%f5", + "%f7", + "%f8", + "%fb", + "%fc", + "%fd", + ], + expected: "ABC\ufffdXYZ", + }, + + { + inStrings: [ + "%c0%af", // Illegal bytes in 2-octet + "%c1%af", + ], // sequences + expected: "ABC\ufffd\ufffdXYZ", + }, + + { + inStrings: [ + "%e0%80%80", // Illegal bytes in 3-octet + "%e0%80%af", // sequences + "%e0%9f%bf", + // long surrogates + "%ed%a0%80", // D800 + "%ed%ad%bf", // DB7F + "%ed%ae%80", // DB80 + "%ed%af%bf", // DBFF + "%ed%b0%80", // DC00 + "%ed%be%80", // DF80 + "%ed%bf%bf", + ], // DFFF + expected: "ABC\ufffd\ufffd\ufffdXYZ", + }, + + { + inStrings: [ + "%f0%80%80%80", // Illegal bytes in 4-octet + "%f0%80%80%af", // sequences + "%f0%8f%bf%bf", + "%f4%90%80%80", + "%f4%bf%bf%bf", + "%f5%80%80%80", + "%f7%bf%bf%bf", + ], + expected: "ABC\ufffd\ufffd\ufffd\ufffdXYZ", + }, + + { + inStrings: [ + "%f8%80%80%80%80", // Illegal bytes in 5-octet + "%f8%80%80%80%af", // sequences + "%fb%bf%bf%bf%bf", + ], + expected: "ABC\ufffd\ufffd\ufffd\ufffd\ufffdXYZ", + }, + + // Surrogate pairs + { + inStrings: [ + "%ed%a0%80%ed%b0%80", // D800 DC00 + "%ed%a0%80%ed%bf%bf", // D800 DFFF + "%ed%ad%bf%ed%b0%80", // DB7F DC00 + "%ed%ad%bf%ed%bf%bf", // DB7F DFFF + "%ed%ae%80%ed%b0%80", // DB80 DC00 + "%ed%ae%80%ed%bf%bf", // DB80 DFFF + "%ed%af%bf%ed%b0%80", // DBFF DC00 + "%ed%ad%bf%ed%bf%bf", // DBFF DFFF + "%fc%80%80%80%80%80", // Illegal bytes in 6-octet + "%fc%80%80%80%80%af", // sequences + "%fd%bf%bf%bf%bf%bf", + ], + expected: "ABC\ufffd\ufffd\ufffd\ufffd\ufffd\ufffdXYZ", + }, +]; + +function testCaseInputStream(inStr, expected) { + var dataURI = "data:text/plain; charset=UTF-8,ABC" + inStr + "XYZ"; + dump(inStr + "==>"); + + var ConverterInputStream = Cc( + "@mozilla.org/intl/converter-input-stream;1", + "nsIConverterInputStream", + "init" + ); + var channel = NetUtil.newChannel({ + uri: dataURI, + loadUsingSystemPrincipal: true, + }); + var testInputStream = channel.open(); + var testConverter = new ConverterInputStream( + testInputStream, + "UTF-8", + 16, + 0xfffd + ); + + if (!(testConverter instanceof Ci.nsIUnicharLineInputStream)) { + throw new Error("not line input stream"); + } + + var outStr = ""; + var more; + do { + // read the line and check for eof + var line = {}; + more = testConverter.readLine(line); + outStr += line.value; + } while (more); + + dump(outStr + "; expected=" + expected + "\n"); + Assert.equal(outStr, expected); + Assert.equal(outStr.length, expected.length); +} + +function run_test() { + for (var t of tests) { + for (var inStr of t.inStrings) { + testCaseInputStream(inStr, t.expected); + } + } +} diff --git a/intl/uconv/tests/unit/xpcshell.toml b/intl/uconv/tests/unit/xpcshell.toml new file mode 100644 index 0000000000..47362902ba --- /dev/null +++ b/intl/uconv/tests/unit/xpcshell.toml @@ -0,0 +1,175 @@ +[DEFAULT] +head = "head_charsetConversionTests.js" +support-files = [ + "data/unicode-conversion.utf16.txt", + "data/unicode-conversion.utf16be.txt", + "data/unicode-conversion.utf16le.txt", + "data/unicode-conversion.utf8.txt", +] + +["test_bug116882.js"] + +["test_bug317216.js"] + +["test_bug321379.js"] + +["test_bug340714.js"] + +["test_bug381412.Big5-HKSCS.js"] + +["test_bug381412.Big5.js"] + +["test_bug381412.euc-kr.js"] + +["test_bug381412.euc_jp.js"] + +["test_bug381412.gb2312.js"] + +["test_bug381412.js"] + +["test_bug396637.js"] + +["test_bug399257.js"] + +["test_bug457886.js"] + +["test_bug522931.js"] + +["test_bug563283.js"] + +["test_bug563618.js"] + +["test_bug601429.js"] + +["test_bug715319.dbcs.js"] + +["test_bug715319.euc_jp.js"] + +["test_bug715319.gb2312.js"] + +["test_charset_conversion.js"] + +["test_decode_8859-1.js"] + +["test_decode_8859-10.js"] + +["test_decode_8859-11.js"] + +["test_decode_8859-13.js"] + +["test_decode_8859-14.js"] + +["test_decode_8859-15.js"] + +["test_decode_8859-2.js"] + +["test_decode_8859-3.js"] + +["test_decode_8859-4.js"] + +["test_decode_8859-5.js"] + +["test_decode_8859-6.js"] + +["test_decode_8859-7.js"] + +["test_decode_8859-8.js"] + +["test_decode_8859-9.js"] + +["test_decode_CP1250.js"] + +["test_decode_CP1251.js"] + +["test_decode_CP1252.js"] + +["test_decode_CP1253.js"] + +["test_decode_CP1254.js"] + +["test_decode_CP1255.js"] + +["test_decode_CP1256.js"] + +["test_decode_CP1257.js"] + +["test_decode_CP1258.js"] + +["test_decode_CP874.js"] + +["test_decode_gb18030.js"] + +["test_decode_gbk.js"] + +["test_decode_macintosh.js"] + +["test_decode_x_mac_cyrillic.js"] + +["test_decode_x_mac_ukrainian.js"] + +["test_encode_8859-1.js"] + +["test_encode_8859-10.js"] + +["test_encode_8859-11.js"] + +["test_encode_8859-13.js"] + +["test_encode_8859-14.js"] + +["test_encode_8859-15.js"] + +["test_encode_8859-2.js"] + +["test_encode_8859-3.js"] + +["test_encode_8859-4.js"] + +["test_encode_8859-5.js"] + +["test_encode_8859-6.js"] + +["test_encode_8859-7.js"] + +["test_encode_8859-8.js"] + +["test_encode_8859-9.js"] + +["test_encode_CP1250.js"] + +["test_encode_CP1251.js"] + +["test_encode_CP1252.js"] + +["test_encode_CP1253.js"] + +["test_encode_CP1254.js"] + +["test_encode_CP1255.js"] + +["test_encode_CP1256.js"] + +["test_encode_CP1257.js"] + +["test_encode_CP1258.js"] + +["test_encode_CP874.js"] + +["test_encode_gb18030.js"] + +["test_encode_gbk.js"] + +["test_encode_macintosh.js"] + +["test_encode_x_mac_cyrillic.js"] + +["test_input_stream.js"] + +["test_unEscapeNonAsciiURI.js"] + +["test_unEscapeURIForUI.js"] + +["test_unmapped.js"] + +["test_utf8_illegals.js"] + -- cgit v1.2.3