diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /testing/web-platform/tests/encoding/resources | |
parent | Initial commit. (diff) | |
download | firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'testing/web-platform/tests/encoding/resources')
19 files changed, 774 insertions, 0 deletions
diff --git a/testing/web-platform/tests/encoding/resources/decode-common.js b/testing/web-platform/tests/encoding/resources/decode-common.js new file mode 100644 index 0000000000..19dd6939ac --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/decode-common.js @@ -0,0 +1,40 @@ +var tests = []; + +function iframeRef(frameRef) { + return frameRef.contentWindow + ? frameRef.contentWindow.document + : frameRef.contentDocument; +} + +function showNodes(decoder) { + var iframe = iframeRef(document.getElementById("scrwin")); + nodes = iframe.querySelectorAll("span"); + + for (var i = 0; i < nodes.length; i++) { + var test = subsetTest(async_test, + "U+" + + nodes[i].dataset.cp + + " " + + String.fromCodePoint(parseInt(nodes[i].dataset.cp, 16)) + + " " + + decoder(nodes[i].dataset.bytes) + + " " + + nodes[i].dataset.bytes + ); + if (test) { + tests[i] = test; + } + } + + for (var i = 0; i < nodes.length; i++) { + if (tests[i]) { + tests[i].step(function() { + assert_equals( + nodes[i].textContent, + decoder(nodes[i].dataset.bytes) + ); + }); + tests[i].done(); + } + } +} diff --git a/testing/web-platform/tests/encoding/resources/decoding-helpers.js b/testing/web-platform/tests/encoding/resources/decoding-helpers.js new file mode 100644 index 0000000000..78e52da014 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/decoding-helpers.js @@ -0,0 +1,32 @@ +// Decode an URL encoded string, using XHR and data: URL. Returns a Promise. +function decode(label, url_encoded_string) { + return new Promise((resolve, reject) => { + const req = new XMLHttpRequest; + req.open('GET', `data:text/plain,${url_encoded_string}`); + req.overrideMimeType(`text/plain; charset="${label}"`); + req.send(); + req.onload = () => resolve(req.responseText); + req.onerror = () => reject(new Error(req.statusText)); + }); +} + +// Convert code units in a decoded string into: "U+0001/U+0002/...' +function to_code_units(string) { + return string.split('') + .map(unit => unit.charCodeAt(0)) + .map(code => 'U+' + ('0000' + code.toString(16).toUpperCase()).slice(-4)) + .join('/'); +} + +function decode_test(label, + url_encoded_input, + expected_code_units, + description) { + promise_test(() => { + return decode(label, url_encoded_input) + .then(decoded => to_code_units(decoded)) + .then(actual => { + assert_equals(actual, expected_code_units, `Decoding with ${label}`); + }); + }, description); +} diff --git a/testing/web-platform/tests/encoding/resources/encode-form-common.js b/testing/web-platform/tests/encoding/resources/encode-form-common.js new file mode 100644 index 0000000000..6f8777b39b --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/encode-form-common.js @@ -0,0 +1,140 @@ +// These are defined by the test: +// errors (boolean) +// encoder (function) +// ranges (array) +// separator (string) +// expect (function) + +var tests = []; +var cplist = []; +var numTests = null; +var numFrames = 2; +var chunkSize = 400; +var numChunks = null; +var frames = null; +var frames = null; +var forms = null; +var encodedSeparator = encodeURIComponent(separator); +var currentChunkIndex = 0; +var pageCharset = document.querySelector("meta[charset]").getAttribute("charset"); + +setup(function() { + // create a simple list of just those code points for which there is an encoding possible + codepoints = []; + for (var range of ranges) { + for (var i = range[0]; i < range[1]; i++) { + result = encoder(String.fromCodePoint(i)); + var success = !!result; + if (errors) { + success = !success; + } + if (success) { + var item = {}; + codepoints.push(item); + item.cp = i; + item.expected = expect(result, i); + item.desc = range[2]; + } + } + } + + // convert the information into a simple array of objects that can be easily traversed + var currentChunk = []; + var currentTests = []; + cplist = [currentChunk]; + tests = [currentTests]; + for (i = 0; i < codepoints.length; i++) { + if (currentChunk.length == chunkSize) { + currentChunk = []; + cplist.push(currentChunk); + currentTests = []; + tests.push(currentTests); + } + var item = {}; + currentChunk.push(item); + item.cp = codepoints[i].cp; + item.expected = codepoints[i].expected; + item.desc = codepoints[i].desc; + currentTests.push(subsetTest(async_test, + (item.desc ? item.desc + " " : "") + + "U+" + + item.cp.toString(16).toUpperCase() + + " " + + String.fromCodePoint(item.cp) + + " " + + item.expected + )); + } + + numChunks = cplist.length; + + for (var i = 0; i < numFrames; i++) { + var frame = document.createElement("iframe"); + frame.id = frame.name = "frame-" + i; + document.body.appendChild(frame); + var form = document.createElement("form"); + form.id = "form-" + i; + form.method = "GET"; + form.action = "/common/blank.html"; + form.acceptCharset = pageCharset; + form.target = frame.id; + var input = document.createElement("input"); + input.id = input.name = "input-" + i; + form.appendChild(input); + document.body.appendChild(form); + } + + addEventListener("load", function() { + frames = Array.prototype.slice.call( + document.getElementsByTagName("iframe") + ); + forms = Array.prototype.slice.call( + document.getElementsByTagName("form") + ); + inputs = Array.prototype.slice.call( + document.getElementsByTagName("input") + ); + for (var i = 0; i < Math.min(numFrames, numChunks); i++) { + runNext(i); + } + }); +}); + +function runNext(id) { + var i = currentChunkIndex; + currentChunkIndex += 1; + + var iframe = frames[id]; + var form = forms[id]; + var input = inputs[id]; + + input.value = cplist[i] + .map(function(x) { + return String.fromCodePoint(x.cp); + }) + .join(separator); + form.submit(); + + iframe.onload = function() { + var url = iframe.contentWindow.location; + var query = url.search; + var result_string = query.substr(query.indexOf("=") + 1); + var results = result_string.split(encodedSeparator); + + for (var j = 0; j < cplist[i].length; j++) { + var t = tests[i][j]; + if (t) { + t.step(function() { + assert_equals( + normalizeStr(results[j]), + normalizeStr(cplist[i][j].expected) + ); + }); + t.done(); + } + } + if (currentChunkIndex < numChunks) { + runNext(id); + } + }; +} diff --git a/testing/web-platform/tests/encoding/resources/encode-href-common.js b/testing/web-platform/tests/encoding/resources/encode-href-common.js new file mode 100644 index 0000000000..dc646fe8c1 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/encode-href-common.js @@ -0,0 +1,57 @@ +// These are defined by the test: +// errors (boolean) +// encoder (function) +// ranges (array) +// expect (function) + +function encode(input, expected, desc) { + // tests whether a Unicode character is converted to an equivalent byte sequence by href + // input: a Unicode character + // expected: expected byte sequence + // desc: what's being tested + subsetTest(test, function() { + var a = document.createElement("a"); // <a> uses document encoding for URL's query + a.href = "https://example.com/?" + input; + result = a.search.substr(1); // remove leading "?" + assert_equals(normalizeStr(result), normalizeStr(expected)); + }, desc); +} + +// set up a simple array of unicode codepoints that are not encoded +var codepoints = []; + +for (var range of ranges) { + for (var i = range[0]; i < range[1]; i++) { + result = encoder(String.fromCodePoint(i)); + var success = !!result; + if (errors) { + success = !success; + } + if (success) { + var item = {}; + codepoints.push(item); + item.cp = i; + item.expected = expect(result, i); + item.desc = range[2] ? range[2] + " " : ""; + } + } +} + +// run the tests +for (var x = 0; x < codepoints.length; x++) { + encode( + String.fromCodePoint(codepoints[x].cp), + codepoints[x].expected, + codepoints[x].desc + + " U+" + + codepoints[x].cp.toString(16).toUpperCase() + + " " + + String.fromCodePoint(codepoints[x].cp) + + " " + + codepoints[x].expected + ); +} + +// NOTES +// this test relies on support for String.fromCodePoint, which appears to be supported by major desktop browsers +// the tests exclude ASCII characters diff --git a/testing/web-platform/tests/encoding/resources/encodings.js b/testing/web-platform/tests/encoding/resources/encodings.js new file mode 100644 index 0000000000..80933bf938 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/encodings.js @@ -0,0 +1,468 @@ +// Straight from https://encoding.spec.whatwg.org/encodings.json +const encodings_table = +[ + { + "encodings": [ + { + "labels": [ + "unicode-1-1-utf-8", + "unicode11utf8", + "unicode20utf8", + "utf-8", + "utf8", + "x-unicode20utf8" + ], + "name": "UTF-8" + } + ], + "heading": "The Encoding" + }, + { + "encodings": [ + { + "labels": [ + "866", + "cp866", + "csibm866", + "ibm866" + ], + "name": "IBM866" + }, + { + "labels": [ + "csisolatin2", + "iso-8859-2", + "iso-ir-101", + "iso8859-2", + "iso88592", + "iso_8859-2", + "iso_8859-2:1987", + "l2", + "latin2" + ], + "name": "ISO-8859-2" + }, + { + "labels": [ + "csisolatin3", + "iso-8859-3", + "iso-ir-109", + "iso8859-3", + "iso88593", + "iso_8859-3", + "iso_8859-3:1988", + "l3", + "latin3" + ], + "name": "ISO-8859-3" + }, + { + "labels": [ + "csisolatin4", + "iso-8859-4", + "iso-ir-110", + "iso8859-4", + "iso88594", + "iso_8859-4", + "iso_8859-4:1988", + "l4", + "latin4" + ], + "name": "ISO-8859-4" + }, + { + "labels": [ + "csisolatincyrillic", + "cyrillic", + "iso-8859-5", + "iso-ir-144", + "iso8859-5", + "iso88595", + "iso_8859-5", + "iso_8859-5:1988" + ], + "name": "ISO-8859-5" + }, + { + "labels": [ + "arabic", + "asmo-708", + "csiso88596e", + "csiso88596i", + "csisolatinarabic", + "ecma-114", + "iso-8859-6", + "iso-8859-6-e", + "iso-8859-6-i", + "iso-ir-127", + "iso8859-6", + "iso88596", + "iso_8859-6", + "iso_8859-6:1987" + ], + "name": "ISO-8859-6" + }, + { + "labels": [ + "csisolatingreek", + "ecma-118", + "elot_928", + "greek", + "greek8", + "iso-8859-7", + "iso-ir-126", + "iso8859-7", + "iso88597", + "iso_8859-7", + "iso_8859-7:1987", + "sun_eu_greek" + ], + "name": "ISO-8859-7" + }, + { + "labels": [ + "csiso88598e", + "csisolatinhebrew", + "hebrew", + "iso-8859-8", + "iso-8859-8-e", + "iso-ir-138", + "iso8859-8", + "iso88598", + "iso_8859-8", + "iso_8859-8:1988", + "visual" + ], + "name": "ISO-8859-8" + }, + { + "labels": [ + "csiso88598i", + "iso-8859-8-i", + "logical" + ], + "name": "ISO-8859-8-I" + }, + { + "labels": [ + "csisolatin6", + "iso-8859-10", + "iso-ir-157", + "iso8859-10", + "iso885910", + "l6", + "latin6" + ], + "name": "ISO-8859-10" + }, + { + "labels": [ + "iso-8859-13", + "iso8859-13", + "iso885913" + ], + "name": "ISO-8859-13" + }, + { + "labels": [ + "iso-8859-14", + "iso8859-14", + "iso885914" + ], + "name": "ISO-8859-14" + }, + { + "labels": [ + "csisolatin9", + "iso-8859-15", + "iso8859-15", + "iso885915", + "iso_8859-15", + "l9" + ], + "name": "ISO-8859-15" + }, + { + "labels": [ + "iso-8859-16" + ], + "name": "ISO-8859-16" + }, + { + "labels": [ + "cskoi8r", + "koi", + "koi8", + "koi8-r", + "koi8_r" + ], + "name": "KOI8-R" + }, + { + "labels": [ + "koi8-ru", + "koi8-u" + ], + "name": "KOI8-U" + }, + { + "labels": [ + "csmacintosh", + "mac", + "macintosh", + "x-mac-roman" + ], + "name": "macintosh" + }, + { + "labels": [ + "dos-874", + "iso-8859-11", + "iso8859-11", + "iso885911", + "tis-620", + "windows-874" + ], + "name": "windows-874" + }, + { + "labels": [ + "cp1250", + "windows-1250", + "x-cp1250" + ], + "name": "windows-1250" + }, + { + "labels": [ + "cp1251", + "windows-1251", + "x-cp1251" + ], + "name": "windows-1251" + }, + { + "labels": [ + "ansi_x3.4-1968", + "ascii", + "cp1252", + "cp819", + "csisolatin1", + "ibm819", + "iso-8859-1", + "iso-ir-100", + "iso8859-1", + "iso88591", + "iso_8859-1", + "iso_8859-1:1987", + "l1", + "latin1", + "us-ascii", + "windows-1252", + "x-cp1252" + ], + "name": "windows-1252" + }, + { + "labels": [ + "cp1253", + "windows-1253", + "x-cp1253" + ], + "name": "windows-1253" + }, + { + "labels": [ + "cp1254", + "csisolatin5", + "iso-8859-9", + "iso-ir-148", + "iso8859-9", + "iso88599", + "iso_8859-9", + "iso_8859-9:1989", + "l5", + "latin5", + "windows-1254", + "x-cp1254" + ], + "name": "windows-1254" + }, + { + "labels": [ + "cp1255", + "windows-1255", + "x-cp1255" + ], + "name": "windows-1255" + }, + { + "labels": [ + "cp1256", + "windows-1256", + "x-cp1256" + ], + "name": "windows-1256" + }, + { + "labels": [ + "cp1257", + "windows-1257", + "x-cp1257" + ], + "name": "windows-1257" + }, + { + "labels": [ + "cp1258", + "windows-1258", + "x-cp1258" + ], + "name": "windows-1258" + }, + { + "labels": [ + "x-mac-cyrillic", + "x-mac-ukrainian" + ], + "name": "x-mac-cyrillic" + } + ], + "heading": "Legacy single-byte encodings" + }, + { + "encodings": [ + { + "labels": [ + "chinese", + "csgb2312", + "csiso58gb231280", + "gb2312", + "gb_2312", + "gb_2312-80", + "gbk", + "iso-ir-58", + "x-gbk" + ], + "name": "GBK" + }, + { + "labels": [ + "gb18030" + ], + "name": "gb18030" + } + ], + "heading": "Legacy multi-byte Chinese (simplified) encodings" + }, + { + "encodings": [ + { + "labels": [ + "big5", + "big5-hkscs", + "cn-big5", + "csbig5", + "x-x-big5" + ], + "name": "Big5" + } + ], + "heading": "Legacy multi-byte Chinese (traditional) encodings" + }, + { + "encodings": [ + { + "labels": [ + "cseucpkdfmtjapanese", + "euc-jp", + "x-euc-jp" + ], + "name": "EUC-JP" + }, + { + "labels": [ + "csiso2022jp", + "iso-2022-jp" + ], + "name": "ISO-2022-JP" + }, + { + "labels": [ + "csshiftjis", + "ms932", + "ms_kanji", + "shift-jis", + "shift_jis", + "sjis", + "windows-31j", + "x-sjis" + ], + "name": "Shift_JIS" + } + ], + "heading": "Legacy multi-byte Japanese encodings" + }, + { + "encodings": [ + { + "labels": [ + "cseuckr", + "csksc56011987", + "euc-kr", + "iso-ir-149", + "korean", + "ks_c_5601-1987", + "ks_c_5601-1989", + "ksc5601", + "ksc_5601", + "windows-949" + ], + "name": "EUC-KR" + } + ], + "heading": "Legacy multi-byte Korean encodings" + }, + { + "encodings": [ + { + "labels": [ + "csiso2022kr", + "hz-gb-2312", + "iso-2022-cn", + "iso-2022-cn-ext", + "iso-2022-kr", + "replacement" + ], + "name": "replacement" + }, + { + "labels": [ + "unicodefffe", + "utf-16be" + ], + "name": "UTF-16BE" + }, + { + "labels": [ + "csunicode", + "iso-10646-ucs-2", + "ucs-2", + "unicode", + "unicodefeff", + "utf-16", + "utf-16le" + ], + "name": "UTF-16LE" + }, + { + "labels": [ + "x-user-defined" + ], + "name": "x-user-defined" + } + ], + "heading": "Legacy miscellaneous encodings" + } +] +; diff --git a/testing/web-platform/tests/encoding/resources/ranges.js b/testing/web-platform/tests/encoding/resources/ranges.js new file mode 100644 index 0000000000..81dc711fc7 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/ranges.js @@ -0,0 +1,28 @@ +var rangesHan = [ + [0x4e00, 0x9fba, "cjk"], + [0xf900, 0xfa6e, "compatibility"], + [0xfa70, 0xfada, "compatibility"], + [0x3400, 0x4dbf, "extension A"], +]; +var rangesHangul = [ + [0xac00, 0xd7af, "hangul"], +]; +var rangesMisc = [ + [0x80, 0x4ff, "latin, greek, cyrillic, etc"], + [0x2000, 0x23ff, "punctuation, currency, symbols"], + [0x2460, 0x26ff, "enclosed chars and boxes"], + [0x3000, 0x33ff, "various asian"], + [0xff00, 0xffef, "half/full width"], +]; +var rangesAll = [ + [0x80, 0xffff], +]; +var rangesExtBa = [ + [0x20000, 0x2536b, "extB (pt 1)"], +]; +var rangesExtBb = [ + [0x2536b, 0x2a6e0, "extB (pt 2)"], +]; +var rangesPua = [ + [0xe000, 0xf8ff, "pua"], +]; diff --git a/testing/web-platform/tests/encoding/resources/single-byte-raw.py b/testing/web-platform/tests/encoding/resources/single-byte-raw.py new file mode 100644 index 0000000000..7e6bb4d408 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/single-byte-raw.py @@ -0,0 +1,3 @@ +def main(request, response): + response.headers.set(b"Content-Type", b"text/plain;charset=" + request.GET.first(b"label")) + response.content = bytes(range(255)) diff --git a/testing/web-platform/tests/encoding/resources/text-plain-charset.py b/testing/web-platform/tests/encoding/resources/text-plain-charset.py new file mode 100644 index 0000000000..d7e7882442 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/text-plain-charset.py @@ -0,0 +1,3 @@ +def main(request, response): + response.headers.set(b"Content-Type", b"text/plain;charset=" + request.GET.first(b"label")) + response.content = b"hello encoding" diff --git a/testing/web-platform/tests/encoding/resources/two-boms-utf-16be.html b/testing/web-platform/tests/encoding/resources/two-boms-utf-16be.html new file mode 100644 index 0000000000..6a5b0a5517 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/two-boms-utf-16be.html @@ -0,0 +1 @@ +þÿþÿ
\ No newline at end of file diff --git a/testing/web-platform/tests/encoding/resources/two-boms-utf-16le.html b/testing/web-platform/tests/encoding/resources/two-boms-utf-16le.html new file mode 100644 index 0000000000..535a40d398 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/two-boms-utf-16le.html @@ -0,0 +1 @@ +ÿþÿþ
\ No newline at end of file diff --git a/testing/web-platform/tests/encoding/resources/two-boms-utf-8.html b/testing/web-platform/tests/encoding/resources/two-boms-utf-8.html new file mode 100644 index 0000000000..83ea941a53 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/two-boms-utf-8.html @@ -0,0 +1 @@ +
\ No newline at end of file diff --git a/testing/web-platform/tests/encoding/resources/utf-32-big-endian-bom.html b/testing/web-platform/tests/encoding/resources/utf-32-big-endian-bom.html Binary files differnew file mode 100644 index 0000000000..db551fa894 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/utf-32-big-endian-bom.html diff --git a/testing/web-platform/tests/encoding/resources/utf-32-big-endian-bom.xml b/testing/web-platform/tests/encoding/resources/utf-32-big-endian-bom.xml Binary files differnew file mode 100644 index 0000000000..c97662aa16 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/utf-32-big-endian-bom.xml diff --git a/testing/web-platform/tests/encoding/resources/utf-32-big-endian-nobom.html b/testing/web-platform/tests/encoding/resources/utf-32-big-endian-nobom.html Binary files differnew file mode 100644 index 0000000000..fe32ab0408 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/utf-32-big-endian-nobom.html diff --git a/testing/web-platform/tests/encoding/resources/utf-32-big-endian-nobom.xml b/testing/web-platform/tests/encoding/resources/utf-32-big-endian-nobom.xml Binary files differnew file mode 100644 index 0000000000..f704501cca --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/utf-32-big-endian-nobom.xml diff --git a/testing/web-platform/tests/encoding/resources/utf-32-little-endian-bom.html b/testing/web-platform/tests/encoding/resources/utf-32-little-endian-bom.html Binary files differnew file mode 100644 index 0000000000..432b96f26a --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/utf-32-little-endian-bom.html diff --git a/testing/web-platform/tests/encoding/resources/utf-32-little-endian-bom.xml b/testing/web-platform/tests/encoding/resources/utf-32-little-endian-bom.xml Binary files differnew file mode 100644 index 0000000000..f896b511c7 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/utf-32-little-endian-bom.xml diff --git a/testing/web-platform/tests/encoding/resources/utf-32-little-endian-nobom.html b/testing/web-platform/tests/encoding/resources/utf-32-little-endian-nobom.html Binary files differnew file mode 100644 index 0000000000..2de355aab1 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/utf-32-little-endian-nobom.html diff --git a/testing/web-platform/tests/encoding/resources/utf-32-little-endian-nobom.xml b/testing/web-platform/tests/encoding/resources/utf-32-little-endian-nobom.xml Binary files differnew file mode 100644 index 0000000000..465f44df5f --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/utf-32-little-endian-nobom.xml |