summaryrefslogtreecommitdiffstats
path: root/testing/web-platform/tests/encoding/legacy-mb-schinese
diff options
context:
space:
mode:
Diffstat (limited to 'testing/web-platform/tests/encoding/legacy-mb-schinese')
-rw-r--r--testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-decoder.any.js63
-rw-r--r--testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-encoder.html48
-rw-r--r--testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/resources/ranges.js210
-rw-r--r--testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-decoder.any.js28
-rw-r--r--testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-encoder.html26
5 files changed, 375 insertions, 0 deletions
diff --git a/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-decoder.any.js b/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-decoder.any.js
new file mode 100644
index 0000000000..99a0253ba6
--- /dev/null
+++ b/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-decoder.any.js
@@ -0,0 +1,63 @@
+// META: script=./resources/ranges.js
+
+const decode = (input, output, desc) => {
+ test(function () {
+ for (const encoding of ["gb18030", "gbk"]) {
+ assert_equals(
+ new TextDecoder(encoding).decode(new Uint8Array(input)),
+ output,
+ );
+ }
+ }, "gb18030 decoder: " + desc);
+};
+
+decode([115], "s", "ASCII");
+decode([0x80], "\u20AC", "euro");
+decode([0xFF], "\uFFFD", "initial byte out of accepted ranges");
+decode([0x81], "\uFFFD", "end of queue, gb18030 first not 0");
+decode([0x81, 0x28], "\ufffd(", "two bytes 0x81 0x28");
+decode([0x81, 0x40], "\u4E02", "two bytes 0x81 0x40");
+decode([0x81, 0x7E], "\u4E8A", "two bytes 0x81 0x7e");
+decode([0x81, 0x7F], "\ufffd\u007f", "two bytes 0x81 0x7f");
+decode([0x81, 0x80], "\u4E90", "two bytes 0x81 0x80");
+decode([0x81, 0xFE], "\u4FA2", "two bytes 0x81 0xFE");
+decode([0x81, 0xFF], "\ufffd", "two bytes 0x81 0xFF");
+decode([0xFE, 0x40], "\uFA0C", "two bytes 0xFE 0x40");
+decode([0xFE, 0xFE], "\uE4C5", "two bytes 0xFE 0xFE");
+decode([0xFE, 0xFF], "\ufffd", "two bytes 0xFE 0xFF");
+decode([0x81, 0x30], "\ufffd", "two bytes 0x81 0x30");
+decode([0x81, 0x30, 0xFE], "\ufffd", "three bytes 0x81 0x30 0xFE");
+decode([0x81, 0x30, 0xFF], "\ufffd0\ufffd", "three bytes 0x81 0x30 0xFF");
+decode(
+ [0x81, 0x30, 0xFE, 0x29],
+ "\ufffd0\ufffd)",
+ "four bytes 0x81 0x30 0xFE 0x29",
+);
+decode([0xFE, 0x39, 0xFE, 0x39], "\ufffd", "four bytes 0xFE 0x39 0xFE 0x39");
+decode([0x81, 0x35, 0xF4, 0x36], "\u1E3E", "pointer 7458");
+decode([0x81, 0x35, 0xF4, 0x37], "\ue7c7", "pointer 7457");
+decode([0x81, 0x35, 0xF4, 0x38], "\u1E40", "pointer 7459");
+decode([0x84, 0x31, 0xA4, 0x39], "\uffff", "pointer 39419");
+decode([0x84, 0x31, 0xA5, 0x30], "\ufffd", "pointer 39420");
+decode([0x8F, 0x39, 0xFE, 0x39], "\ufffd", "pointer 189999");
+decode([0x90, 0x30, 0x81, 0x30], "\u{10000}", "pointer 189000");
+decode([0xE3, 0x32, 0x9A, 0x35], "\u{10FFFF}", "pointer 1237575");
+decode([0xE3, 0x32, 0x9A, 0x36], "\ufffd", "pointer 1237576");
+decode([0x83, 0x36, 0xC8, 0x30], "\uE7C8", "legacy ICU special case 1");
+decode([0xA1, 0xAD], "\u2026", "legacy ICU special case 2");
+decode([0xA1, 0xAB], "\uFF5E", "legacy ICU special case 3");
+
+let i = 0;
+for (const range of ranges) {
+ const pointer = range[0];
+ decode(
+ [
+ Math.floor(pointer / 12600) + 0x81,
+ Math.floor((pointer % 12600) / 1260) + 0x30,
+ Math.floor((pointer % 1260) / 10) + 0x81,
+ pointer % 10 + 0x30,
+ ],
+ range[1],
+ "range " + i++,
+ );
+}
diff --git a/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-encoder.html b/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-encoder.html
new file mode 100644
index 0000000000..a6570c8d2b
--- /dev/null
+++ b/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-encoder.html
@@ -0,0 +1,48 @@
+<!doctype html>
+<meta charset=gb18030>
+<script src=/resources/testharness.js></script>
+<script src=/resources/testharnessreport.js></script>
+<script src=resources/ranges.js></script>
+<script>
+ const encode = (input, output, desc) => {
+ test(function() {
+ const a = document.createElement("a"); // <a> uses document encoding for URL's query
+ a.href = "https://example.com/?" + input;
+ assert_equals(a.search.substr(1), output); // remove leading "?"
+ }, "gb18030 encoder: " + desc);
+ }
+
+ encode("s", "s", "very basic");
+ encode("\u20AC", "%A2%E3", "Euro");
+ encode("\u4E02", "%81@", "character");
+ encode("\uE4C6", "%A1@", "PUA");
+ encode("\uE4C5", "%FE%FE", "PUA #2");
+ encode("\uE5E5", "%26%2358853%3B", "PUA #3");
+ encode("\ud83d\udca9", "%949%DA3", "poo");
+ encode("\uE7C7", "%815%F47", "Ranges pointer special case");
+ encode("\uE7C8", "%836%C80", "legacy ICU special case 1");
+ encode("\u2026", "%A1%AD", "legacy ICU special case 2");
+ encode("\uFF5E", "%A1%AB", "legacy ICU special case 3");
+
+ const upperCaseNibble = x => {
+ return Math.floor(x).toString(16).toUpperCase();
+ }
+
+ const encodePointer = pointer => {
+ const firstByte = Math.floor(pointer / 12600) + 0x81;
+ const thirdByte = Math.floor((pointer % 1260) / 10) + 0x81;
+ return "%"
+ + upperCaseNibble(firstByte / 16)
+ + upperCaseNibble(firstByte % 16)
+ + String.fromCharCode(Math.floor((pointer % 12600) / 1260) + 0x30)
+ + "%"
+ + upperCaseNibble(thirdByte / 16)
+ + upperCaseNibble(thirdByte % 16)
+ + String.fromCharCode(pointer % 10 + 0x30);
+ }
+
+ let i = 0;
+ for (const range of ranges) {
+ encode(range[1], encodePointer(range[0]), "range " + i++);
+ }
+</script>
diff --git a/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/resources/ranges.js b/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/resources/ranges.js
new file mode 100644
index 0000000000..5bbd553dc0
--- /dev/null
+++ b/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/resources/ranges.js
@@ -0,0 +1,210 @@
+// Based on https://encoding.spec.whatwg.org/index-gb18030-ranges.txt
+const ranges = [
+ [0, "\u0080"],
+ [36, "\u00A5"],
+ [38, "\u00A9"],
+ [45, "\u00B2"],
+ [50, "\u00B8"],
+ [81, "\u00D8"],
+ [89, "\u00E2"],
+ [95, "\u00EB"],
+ [96, "\u00EE"],
+ [100, "\u00F4"],
+ [103, "\u00F8"],
+ [104, "\u00FB"],
+ [105, "\u00FD"],
+ [109, "\u0102"],
+ [126, "\u0114"],
+ [133, "\u011C"],
+ [148, "\u012C"],
+ [172, "\u0145"],
+ [175, "\u0149"],
+ [179, "\u014E"],
+ [208, "\u016C"],
+ [306, "\u01CF"],
+ [307, "\u01D1"],
+ [308, "\u01D3"],
+ [309, "\u01D5"],
+ [310, "\u01D7"],
+ [311, "\u01D9"],
+ [312, "\u01DB"],
+ [313, "\u01DD"],
+ [341, "\u01FA"],
+ [428, "\u0252"],
+ [443, "\u0262"],
+ [544, "\u02C8"],
+ [545, "\u02CC"],
+ [558, "\u02DA"],
+ [741, "\u03A2"],
+ [742, "\u03AA"],
+ [749, "\u03C2"],
+ [750, "\u03CA"],
+ [805, "\u0402"],
+ [819, "\u0450"],
+ [820, "\u0452"],
+ [7922, "\u2011"],
+ [7924, "\u2017"],
+ [7925, "\u201A"],
+ [7927, "\u201E"],
+ [7934, "\u2027"],
+ [7943, "\u2031"],
+ [7944, "\u2034"],
+ [7945, "\u2036"],
+ [7950, "\u203C"],
+ [8062, "\u20AD"],
+ [8148, "\u2104"],
+ [8149, "\u2106"],
+ [8152, "\u210A"],
+ [8164, "\u2117"],
+ [8174, "\u2122"],
+ [8236, "\u216C"],
+ [8240, "\u217A"],
+ [8262, "\u2194"],
+ [8264, "\u219A"],
+ [8374, "\u2209"],
+ [8380, "\u2210"],
+ [8381, "\u2212"],
+ [8384, "\u2216"],
+ [8388, "\u221B"],
+ [8390, "\u2221"],
+ [8392, "\u2224"],
+ [8393, "\u2226"],
+ [8394, "\u222C"],
+ [8396, "\u222F"],
+ [8401, "\u2238"],
+ [8406, "\u223E"],
+ [8416, "\u2249"],
+ [8419, "\u224D"],
+ [8424, "\u2253"],
+ [8437, "\u2262"],
+ [8439, "\u2268"],
+ [8445, "\u2270"],
+ [8482, "\u2296"],
+ [8485, "\u229A"],
+ [8496, "\u22A6"],
+ [8521, "\u22C0"],
+ [8603, "\u2313"],
+ [8936, "\u246A"],
+ [8946, "\u249C"],
+ [9046, "\u254C"],
+ [9050, "\u2574"],
+ [9063, "\u2590"],
+ [9066, "\u2596"],
+ [9076, "\u25A2"],
+ [9092, "\u25B4"],
+ [9100, "\u25BE"],
+ [9108, "\u25C8"],
+ [9111, "\u25CC"],
+ [9113, "\u25D0"],
+ [9131, "\u25E6"],
+ [9162, "\u2607"],
+ [9164, "\u260A"],
+ [9218, "\u2641"],
+ [9219, "\u2643"],
+ [11329, "\u2E82"],
+ [11331, "\u2E85"],
+ [11334, "\u2E89"],
+ [11336, "\u2E8D"],
+ [11346, "\u2E98"],
+ [11361, "\u2EA8"],
+ [11363, "\u2EAB"],
+ [11366, "\u2EAF"],
+ [11370, "\u2EB4"],
+ [11372, "\u2EB8"],
+ [11375, "\u2EBC"],
+ [11389, "\u2ECB"],
+ [11682, "\u2FFC"],
+ [11686, "\u3004"],
+ [11687, "\u3018"],
+ [11692, "\u301F"],
+ [11694, "\u302A"],
+ [11714, "\u303F"],
+ [11716, "\u3094"],
+ [11723, "\u309F"],
+ [11725, "\u30F7"],
+ [11730, "\u30FF"],
+ [11736, "\u312A"],
+ [11982, "\u322A"],
+ [11989, "\u3232"],
+ [12102, "\u32A4"],
+ [12336, "\u3390"],
+ [12348, "\u339F"],
+ [12350, "\u33A2"],
+ [12384, "\u33C5"],
+ [12393, "\u33CF"],
+ [12395, "\u33D3"],
+ [12397, "\u33D6"],
+ [12510, "\u3448"],
+ [12553, "\u3474"],
+ [12851, "\u359F"],
+ [12962, "\u360F"],
+ [12973, "\u361B"],
+ [13738, "\u3919"],
+ [13823, "\u396F"],
+ [13919, "\u39D1"],
+ [13933, "\u39E0"],
+ [14080, "\u3A74"],
+ [14298, "\u3B4F"],
+ [14585, "\u3C6F"],
+ [14698, "\u3CE1"],
+ [15583, "\u4057"],
+ [15847, "\u4160"],
+ [16318, "\u4338"],
+ [16434, "\u43AD"],
+ [16438, "\u43B2"],
+ [16481, "\u43DE"],
+ [16729, "\u44D7"],
+ [17102, "\u464D"],
+ [17122, "\u4662"],
+ [17315, "\u4724"],
+ [17320, "\u472A"],
+ [17402, "\u477D"],
+ [17418, "\u478E"],
+ [17859, "\u4948"],
+ [17909, "\u497B"],
+ [17911, "\u497E"],
+ [17915, "\u4984"],
+ [17916, "\u4987"],
+ [17936, "\u499C"],
+ [17939, "\u49A0"],
+ [17961, "\u49B8"],
+ [18664, "\u4C78"],
+ [18703, "\u4CA4"],
+ [18814, "\u4D1A"],
+ [18962, "\u4DAF"],
+ [19043, "\u9FA6"],
+ [33469, "\uE76C"],
+ [33470, "\uE7C8"],
+ [33471, "\uE7E7"],
+ [33484, "\uE815"],
+ [33485, "\uE819"],
+ [33490, "\uE81F"],
+ [33497, "\uE827"],
+ [33501, "\uE82D"],
+ [33505, "\uE833"],
+ [33513, "\uE83C"],
+ [33520, "\uE844"],
+ [33536, "\uE856"],
+ [33550, "\uE865"],
+ [37845, "\uF92D"],
+ [37921, "\uF97A"],
+ [37948, "\uF996"],
+ [38029, "\uF9E8"],
+ [38038, "\uF9F2"],
+ [38064, "\uFA10"],
+ [38065, "\uFA12"],
+ [38066, "\uFA15"],
+ [38069, "\uFA19"],
+ [38075, "\uFA22"],
+ [38076, "\uFA25"],
+ [38078, "\uFA2A"],
+ [39108, "\uFE32"],
+ [39109, "\uFE45"],
+ [39113, "\uFE53"],
+ [39114, "\uFE58"],
+ [39115, "\uFE67"],
+ [39116, "\uFE6C"],
+ [39265, "\uFF5F"],
+ [39394, "\uFFE6"],
+ [189000, "\u{10000}"]
+];
diff --git a/testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-decoder.any.js b/testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-decoder.any.js
new file mode 100644
index 0000000000..c0221480da
--- /dev/null
+++ b/testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-decoder.any.js
@@ -0,0 +1,28 @@
+const gbkPointers = [
+ 6432, 7533, 7536, 7672, 7673, 7674, 7675, 7676, 7677, 7678, 7679, 7680, 7681, 7682, 7683, 7684,
+ 23766, 23770, 23771, 23772, 23773, 23774, 23776, 23777, 23778, 23779, 23780, 23781, 23782, 23784, 23785, 23786,
+ 23787, 23790, 23791, 23792, 23793, 23796, 23797, 23798, 23799, 23800, 23801, 23802, 23803, 23805, 23806, 23807,
+ 23808, 23809, 23810, 23811, 23813, 23814, 23815, 23816, 23817, 23818, 23819, 23820, 23821, 23822, 23823, 23824,
+ 23825, 23826, 23827, 23828, 23831, 23832, 23833, 23834, 23835, 23836, 23837, 23838, 23839, 23840, 23841, 23842,
+ 23843, 23844
+];
+const codePoints = [
+ 0x20ac, 0x1e3f, 0x01f9, 0x303e, 0x2ff0, 0x2ff1, 0x2ff2, 0x2ff3, 0x2ff4, 0x2ff5, 0x2ff6, 0x2ff7, 0x2ff8, 0x2ff9, 0x2ffa, 0x2ffb,
+ 0x2e81, 0x2e84, 0x3473, 0x3447, 0x2e88, 0x2e8b, 0x359e, 0x361a, 0x360e, 0x2e8c, 0x2e97, 0x396e, 0x3918, 0x39cf, 0x39df, 0x3a73,
+ 0x39d0, 0x3b4e, 0x3c6e, 0x3ce0, 0x2ea7, 0x2eaa, 0x4056, 0x415f, 0x2eae, 0x4337, 0x2eb3, 0x2eb6, 0x2eb7, 0x43b1, 0x43ac, 0x2ebb,
+ 0x43dd, 0x44d6, 0x4661, 0x464c, 0x4723, 0x4729, 0x477c, 0x478d, 0x2eca, 0x4947, 0x497a, 0x497d, 0x4982, 0x4983, 0x4985, 0x4986,
+ 0x499f, 0x499b, 0x49b7, 0x49b6, 0x4ca3, 0x4c9f, 0x4ca0, 0x4ca1, 0x4c77, 0x4ca2, 0x4d13, 0x4d14, 0x4d15, 0x4d16, 0x4d17, 0x4d18,
+ 0x4d19, 0x4dae
+];
+
+for (let i = 0; i < gbkPointers.length; i++) {
+ const pointer = gbkPointers[i];
+ test(function() {
+ const lead = pointer / 190 + 0x81;
+ const trail = pointer % 190;
+ const offset = trail < 0x3F ? 0x40 : 0x41;
+ const encoded = [lead, trail + offset];
+ const decoded = new TextDecoder("GBK").decode(new Uint8Array(encoded)).charCodeAt(0);
+ assert_equals(decoded, codePoints[i]);
+ }, "gbk pointer: " + pointer)
+}
diff --git a/testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-encoder.html b/testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-encoder.html
new file mode 100644
index 0000000000..e43cb73fea
--- /dev/null
+++ b/testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-encoder.html
@@ -0,0 +1,26 @@
+<!doctype html>
+<meta charset=gbk> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
+<script src=/resources/testharness.js></script>
+<script src=/resources/testharnessreport.js></script>
+<script>
+ function encode(input, output, desc) {
+ test(function() {
+ const a = document.createElement("a") // <a> uses document encoding for URL's query
+ a.href = "https://example.com/?" + input;
+ assert_equals(a.search.substr(1), output); // remove leading "?"
+ }, "gbk encoder: " + desc);
+ }
+
+ encode("s", "s", "very basic");
+ encode("\u20AC", "%80", "Euro");
+ encode("\u4E02", "%81@", "character");
+ encode("\uE4C6", "%A1@", "PUA");
+ encode("\uE4C5", "%FE%FE", "PUA #2");
+ encode("\ud83d\udca9", "%26%23128169%3B", "poo");
+ encode("\uE7C8", "%26%2359336%3B", "legacy ICU special case 1");
+ encode("\u2026", "%A1%AD", "legacy ICU special case 2");
+ encode("\uFF5E", "%A1%AB", "legacy ICU special case 3");
+ encode("\u00A5", "%26%23165%3B", "legacy WebKit case 1");
+ encode("\u22EF", "%26%238943%3B", "legacy WebKit case 2");
+ encode("\u301C", "%26%2312316%3B", "legacy WebKit case 3");
+</script>