diff options
Diffstat (limited to 'testing/web-platform/tests/encoding/legacy-mb-schinese')
5 files changed, 375 insertions, 0 deletions
diff --git a/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-decoder.any.js b/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-decoder.any.js new file mode 100644 index 0000000000..99a0253ba6 --- /dev/null +++ b/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-decoder.any.js @@ -0,0 +1,63 @@ +// META: script=./resources/ranges.js + +const decode = (input, output, desc) => { + test(function () { + for (const encoding of ["gb18030", "gbk"]) { + assert_equals( + new TextDecoder(encoding).decode(new Uint8Array(input)), + output, + ); + } + }, "gb18030 decoder: " + desc); +}; + +decode([115], "s", "ASCII"); +decode([0x80], "\u20AC", "euro"); +decode([0xFF], "\uFFFD", "initial byte out of accepted ranges"); +decode([0x81], "\uFFFD", "end of queue, gb18030 first not 0"); +decode([0x81, 0x28], "\ufffd(", "two bytes 0x81 0x28"); +decode([0x81, 0x40], "\u4E02", "two bytes 0x81 0x40"); +decode([0x81, 0x7E], "\u4E8A", "two bytes 0x81 0x7e"); +decode([0x81, 0x7F], "\ufffd\u007f", "two bytes 0x81 0x7f"); +decode([0x81, 0x80], "\u4E90", "two bytes 0x81 0x80"); +decode([0x81, 0xFE], "\u4FA2", "two bytes 0x81 0xFE"); +decode([0x81, 0xFF], "\ufffd", "two bytes 0x81 0xFF"); +decode([0xFE, 0x40], "\uFA0C", "two bytes 0xFE 0x40"); +decode([0xFE, 0xFE], "\uE4C5", "two bytes 0xFE 0xFE"); +decode([0xFE, 0xFF], "\ufffd", "two bytes 0xFE 0xFF"); +decode([0x81, 0x30], "\ufffd", "two bytes 0x81 0x30"); +decode([0x81, 0x30, 0xFE], "\ufffd", "three bytes 0x81 0x30 0xFE"); +decode([0x81, 0x30, 0xFF], "\ufffd0\ufffd", "three bytes 0x81 0x30 0xFF"); +decode( + [0x81, 0x30, 0xFE, 0x29], + "\ufffd0\ufffd)", + "four bytes 0x81 0x30 0xFE 0x29", +); +decode([0xFE, 0x39, 0xFE, 0x39], "\ufffd", "four bytes 0xFE 0x39 0xFE 0x39"); +decode([0x81, 0x35, 0xF4, 0x36], "\u1E3E", "pointer 7458"); +decode([0x81, 0x35, 0xF4, 0x37], "\ue7c7", "pointer 7457"); +decode([0x81, 0x35, 0xF4, 0x38], "\u1E40", "pointer 7459"); +decode([0x84, 0x31, 0xA4, 0x39], "\uffff", "pointer 39419"); +decode([0x84, 0x31, 0xA5, 0x30], "\ufffd", "pointer 39420"); +decode([0x8F, 0x39, 0xFE, 0x39], "\ufffd", "pointer 189999"); +decode([0x90, 0x30, 0x81, 0x30], "\u{10000}", "pointer 189000"); +decode([0xE3, 0x32, 0x9A, 0x35], "\u{10FFFF}", "pointer 1237575"); +decode([0xE3, 0x32, 0x9A, 0x36], "\ufffd", "pointer 1237576"); +decode([0x83, 0x36, 0xC8, 0x30], "\uE7C8", "legacy ICU special case 1"); +decode([0xA1, 0xAD], "\u2026", "legacy ICU special case 2"); +decode([0xA1, 0xAB], "\uFF5E", "legacy ICU special case 3"); + +let i = 0; +for (const range of ranges) { + const pointer = range[0]; + decode( + [ + Math.floor(pointer / 12600) + 0x81, + Math.floor((pointer % 12600) / 1260) + 0x30, + Math.floor((pointer % 1260) / 10) + 0x81, + pointer % 10 + 0x30, + ], + range[1], + "range " + i++, + ); +} diff --git a/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-encoder.html b/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-encoder.html new file mode 100644 index 0000000000..a6570c8d2b --- /dev/null +++ b/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/gb18030-encoder.html @@ -0,0 +1,48 @@ +<!doctype html> +<meta charset=gb18030> +<script src=/resources/testharness.js></script> +<script src=/resources/testharnessreport.js></script> +<script src=resources/ranges.js></script> +<script> + const encode = (input, output, desc) => { + test(function() { + const a = document.createElement("a"); // <a> uses document encoding for URL's query + a.href = "https://example.com/?" + input; + assert_equals(a.search.substr(1), output); // remove leading "?" + }, "gb18030 encoder: " + desc); + } + + encode("s", "s", "very basic"); + encode("\u20AC", "%A2%E3", "Euro"); + encode("\u4E02", "%81@", "character"); + encode("\uE4C6", "%A1@", "PUA"); + encode("\uE4C5", "%FE%FE", "PUA #2"); + encode("\uE5E5", "%26%2358853%3B", "PUA #3"); + encode("\ud83d\udca9", "%949%DA3", "poo"); + encode("\uE7C7", "%815%F47", "Ranges pointer special case"); + encode("\uE7C8", "%836%C80", "legacy ICU special case 1"); + encode("\u2026", "%A1%AD", "legacy ICU special case 2"); + encode("\uFF5E", "%A1%AB", "legacy ICU special case 3"); + + const upperCaseNibble = x => { + return Math.floor(x).toString(16).toUpperCase(); + } + + const encodePointer = pointer => { + const firstByte = Math.floor(pointer / 12600) + 0x81; + const thirdByte = Math.floor((pointer % 1260) / 10) + 0x81; + return "%" + + upperCaseNibble(firstByte / 16) + + upperCaseNibble(firstByte % 16) + + String.fromCharCode(Math.floor((pointer % 12600) / 1260) + 0x30) + + "%" + + upperCaseNibble(thirdByte / 16) + + upperCaseNibble(thirdByte % 16) + + String.fromCharCode(pointer % 10 + 0x30); + } + + let i = 0; + for (const range of ranges) { + encode(range[1], encodePointer(range[0]), "range " + i++); + } +</script> diff --git a/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/resources/ranges.js b/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/resources/ranges.js new file mode 100644 index 0000000000..5bbd553dc0 --- /dev/null +++ b/testing/web-platform/tests/encoding/legacy-mb-schinese/gb18030/resources/ranges.js @@ -0,0 +1,210 @@ +// Based on https://encoding.spec.whatwg.org/index-gb18030-ranges.txt +const ranges = [ + [0, "\u0080"], + [36, "\u00A5"], + [38, "\u00A9"], + [45, "\u00B2"], + [50, "\u00B8"], + [81, "\u00D8"], + [89, "\u00E2"], + [95, "\u00EB"], + [96, "\u00EE"], + [100, "\u00F4"], + [103, "\u00F8"], + [104, "\u00FB"], + [105, "\u00FD"], + [109, "\u0102"], + [126, "\u0114"], + [133, "\u011C"], + [148, "\u012C"], + [172, "\u0145"], + [175, "\u0149"], + [179, "\u014E"], + [208, "\u016C"], + [306, "\u01CF"], + [307, "\u01D1"], + [308, "\u01D3"], + [309, "\u01D5"], + [310, "\u01D7"], + [311, "\u01D9"], + [312, "\u01DB"], + [313, "\u01DD"], + [341, "\u01FA"], + [428, "\u0252"], + [443, "\u0262"], + [544, "\u02C8"], + [545, "\u02CC"], + [558, "\u02DA"], + [741, "\u03A2"], + [742, "\u03AA"], + [749, "\u03C2"], + [750, "\u03CA"], + [805, "\u0402"], + [819, "\u0450"], + [820, "\u0452"], + [7922, "\u2011"], + [7924, "\u2017"], + [7925, "\u201A"], + [7927, "\u201E"], + [7934, "\u2027"], + [7943, "\u2031"], + [7944, "\u2034"], + [7945, "\u2036"], + [7950, "\u203C"], + [8062, "\u20AD"], + [8148, "\u2104"], + [8149, "\u2106"], + [8152, "\u210A"], + [8164, "\u2117"], + [8174, "\u2122"], + [8236, "\u216C"], + [8240, "\u217A"], + [8262, "\u2194"], + [8264, "\u219A"], + [8374, "\u2209"], + [8380, "\u2210"], + [8381, "\u2212"], + [8384, "\u2216"], + [8388, "\u221B"], + [8390, "\u2221"], + [8392, "\u2224"], + [8393, "\u2226"], + [8394, "\u222C"], + [8396, "\u222F"], + [8401, "\u2238"], + [8406, "\u223E"], + [8416, "\u2249"], + [8419, "\u224D"], + [8424, "\u2253"], + [8437, "\u2262"], + [8439, "\u2268"], + [8445, "\u2270"], + [8482, "\u2296"], + [8485, "\u229A"], + [8496, "\u22A6"], + [8521, "\u22C0"], + [8603, "\u2313"], + [8936, "\u246A"], + [8946, "\u249C"], + [9046, "\u254C"], + [9050, "\u2574"], + [9063, "\u2590"], + [9066, "\u2596"], + [9076, "\u25A2"], + [9092, "\u25B4"], + [9100, "\u25BE"], + [9108, "\u25C8"], + [9111, "\u25CC"], + [9113, "\u25D0"], + [9131, "\u25E6"], + [9162, "\u2607"], + [9164, "\u260A"], + [9218, "\u2641"], + [9219, "\u2643"], + [11329, "\u2E82"], + [11331, "\u2E85"], + [11334, "\u2E89"], + [11336, "\u2E8D"], + [11346, "\u2E98"], + [11361, "\u2EA8"], + [11363, "\u2EAB"], + [11366, "\u2EAF"], + [11370, "\u2EB4"], + [11372, "\u2EB8"], + [11375, "\u2EBC"], + [11389, "\u2ECB"], + [11682, "\u2FFC"], + [11686, "\u3004"], + [11687, "\u3018"], + [11692, "\u301F"], + [11694, "\u302A"], + [11714, "\u303F"], + [11716, "\u3094"], + [11723, "\u309F"], + [11725, "\u30F7"], + [11730, "\u30FF"], + [11736, "\u312A"], + [11982, "\u322A"], + [11989, "\u3232"], + [12102, "\u32A4"], + [12336, "\u3390"], + [12348, "\u339F"], + [12350, "\u33A2"], + [12384, "\u33C5"], + [12393, "\u33CF"], + [12395, "\u33D3"], + [12397, "\u33D6"], + [12510, "\u3448"], + [12553, "\u3474"], + [12851, "\u359F"], + [12962, "\u360F"], + [12973, "\u361B"], + [13738, "\u3919"], + [13823, "\u396F"], + [13919, "\u39D1"], + [13933, "\u39E0"], + [14080, "\u3A74"], + [14298, "\u3B4F"], + [14585, "\u3C6F"], + [14698, "\u3CE1"], + [15583, "\u4057"], + [15847, "\u4160"], + [16318, "\u4338"], + [16434, "\u43AD"], + [16438, "\u43B2"], + [16481, "\u43DE"], + [16729, "\u44D7"], + [17102, "\u464D"], + [17122, "\u4662"], + [17315, "\u4724"], + [17320, "\u472A"], + [17402, "\u477D"], + [17418, "\u478E"], + [17859, "\u4948"], + [17909, "\u497B"], + [17911, "\u497E"], + [17915, "\u4984"], + [17916, "\u4987"], + [17936, "\u499C"], + [17939, "\u49A0"], + [17961, "\u49B8"], + [18664, "\u4C78"], + [18703, "\u4CA4"], + [18814, "\u4D1A"], + [18962, "\u4DAF"], + [19043, "\u9FA6"], + [33469, "\uE76C"], + [33470, "\uE7C8"], + [33471, "\uE7E7"], + [33484, "\uE815"], + [33485, "\uE819"], + [33490, "\uE81F"], + [33497, "\uE827"], + [33501, "\uE82D"], + [33505, "\uE833"], + [33513, "\uE83C"], + [33520, "\uE844"], + [33536, "\uE856"], + [33550, "\uE865"], + [37845, "\uF92D"], + [37921, "\uF97A"], + [37948, "\uF996"], + [38029, "\uF9E8"], + [38038, "\uF9F2"], + [38064, "\uFA10"], + [38065, "\uFA12"], + [38066, "\uFA15"], + [38069, "\uFA19"], + [38075, "\uFA22"], + [38076, "\uFA25"], + [38078, "\uFA2A"], + [39108, "\uFE32"], + [39109, "\uFE45"], + [39113, "\uFE53"], + [39114, "\uFE58"], + [39115, "\uFE67"], + [39116, "\uFE6C"], + [39265, "\uFF5F"], + [39394, "\uFFE6"], + [189000, "\u{10000}"] +]; diff --git a/testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-decoder.any.js b/testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-decoder.any.js new file mode 100644 index 0000000000..c0221480da --- /dev/null +++ b/testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-decoder.any.js @@ -0,0 +1,28 @@ +const gbkPointers = [ + 6432, 7533, 7536, 7672, 7673, 7674, 7675, 7676, 7677, 7678, 7679, 7680, 7681, 7682, 7683, 7684, + 23766, 23770, 23771, 23772, 23773, 23774, 23776, 23777, 23778, 23779, 23780, 23781, 23782, 23784, 23785, 23786, + 23787, 23790, 23791, 23792, 23793, 23796, 23797, 23798, 23799, 23800, 23801, 23802, 23803, 23805, 23806, 23807, + 23808, 23809, 23810, 23811, 23813, 23814, 23815, 23816, 23817, 23818, 23819, 23820, 23821, 23822, 23823, 23824, + 23825, 23826, 23827, 23828, 23831, 23832, 23833, 23834, 23835, 23836, 23837, 23838, 23839, 23840, 23841, 23842, + 23843, 23844 +]; +const codePoints = [ + 0x20ac, 0x1e3f, 0x01f9, 0x303e, 0x2ff0, 0x2ff1, 0x2ff2, 0x2ff3, 0x2ff4, 0x2ff5, 0x2ff6, 0x2ff7, 0x2ff8, 0x2ff9, 0x2ffa, 0x2ffb, + 0x2e81, 0x2e84, 0x3473, 0x3447, 0x2e88, 0x2e8b, 0x359e, 0x361a, 0x360e, 0x2e8c, 0x2e97, 0x396e, 0x3918, 0x39cf, 0x39df, 0x3a73, + 0x39d0, 0x3b4e, 0x3c6e, 0x3ce0, 0x2ea7, 0x2eaa, 0x4056, 0x415f, 0x2eae, 0x4337, 0x2eb3, 0x2eb6, 0x2eb7, 0x43b1, 0x43ac, 0x2ebb, + 0x43dd, 0x44d6, 0x4661, 0x464c, 0x4723, 0x4729, 0x477c, 0x478d, 0x2eca, 0x4947, 0x497a, 0x497d, 0x4982, 0x4983, 0x4985, 0x4986, + 0x499f, 0x499b, 0x49b7, 0x49b6, 0x4ca3, 0x4c9f, 0x4ca0, 0x4ca1, 0x4c77, 0x4ca2, 0x4d13, 0x4d14, 0x4d15, 0x4d16, 0x4d17, 0x4d18, + 0x4d19, 0x4dae +]; + +for (let i = 0; i < gbkPointers.length; i++) { + const pointer = gbkPointers[i]; + test(function() { + const lead = pointer / 190 + 0x81; + const trail = pointer % 190; + const offset = trail < 0x3F ? 0x40 : 0x41; + const encoded = [lead, trail + offset]; + const decoded = new TextDecoder("GBK").decode(new Uint8Array(encoded)).charCodeAt(0); + assert_equals(decoded, codePoints[i]); + }, "gbk pointer: " + pointer) +} diff --git a/testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-encoder.html b/testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-encoder.html new file mode 100644 index 0000000000..e43cb73fea --- /dev/null +++ b/testing/web-platform/tests/encoding/legacy-mb-schinese/gbk/gbk-encoder.html @@ -0,0 +1,26 @@ +<!doctype html> +<meta charset=gbk> <!-- if the server overrides this, it is stupid, as this is a testsuite --> +<script src=/resources/testharness.js></script> +<script src=/resources/testharnessreport.js></script> +<script> + function encode(input, output, desc) { + test(function() { + const a = document.createElement("a") // <a> uses document encoding for URL's query + a.href = "https://example.com/?" + input; + assert_equals(a.search.substr(1), output); // remove leading "?" + }, "gbk encoder: " + desc); + } + + encode("s", "s", "very basic"); + encode("\u20AC", "%80", "Euro"); + encode("\u4E02", "%81@", "character"); + encode("\uE4C6", "%A1@", "PUA"); + encode("\uE4C5", "%FE%FE", "PUA #2"); + encode("\ud83d\udca9", "%26%23128169%3B", "poo"); + encode("\uE7C8", "%26%2359336%3B", "legacy ICU special case 1"); + encode("\u2026", "%A1%AD", "legacy ICU special case 2"); + encode("\uFF5E", "%A1%AB", "legacy ICU special case 3"); + encode("\u00A5", "%26%23165%3B", "legacy WebKit case 1"); + encode("\u22EF", "%26%238943%3B", "legacy WebKit case 2"); + encode("\u301C", "%26%2312316%3B", "legacy WebKit case 3"); +</script> |