From 43a97878ce14b72f0981164f87f2e35e14151312 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 11:22:09 +0200 Subject: Adding upstream version 110.0.1. Signed-off-by: Daniel Baumann --- .../iso-2022-jp/iso2022jp-encoder.js | 246 +++++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 testing/web-platform/tests/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js (limited to 'testing/web-platform/tests/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js') diff --git a/testing/web-platform/tests/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js b/testing/web-platform/tests/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js new file mode 100644 index 0000000000..9f07d0b6f4 --- /dev/null +++ b/testing/web-platform/tests/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js @@ -0,0 +1,246 @@ +// set up a sparse array of all unicode codepoints listed in the index +// this will be used for lookup in iso2022jpEncoded +var jis0208CPs = []; // index is unicode cp, value is pointer +for (var p = 0; p < jis0208.length; p++) { + if (jis0208[p] != null && jis0208CPs[jis0208[p]] == null) { + jis0208CPs[jis0208[p]] = p; + } +} + +// set up mappings for half/full width katakana +// index is a katakana index pointer, value is Unicode codepoint (dec) +// this is copy-pasted from the json version of the index belonging to the Encoding spec +var iso2022jpkatakana = [ + 12290, + 12300, + 12301, + 12289, + 12539, + 12530, + 12449, + 12451, + 12453, + 12455, + 12457, + 12515, + 12517, + 12519, + 12483, + 12540, + 12450, + 12452, + 12454, + 12456, + 12458, + 12459, + 12461, + 12463, + 12465, + 12467, + 12469, + 12471, + 12473, + 12475, + 12477, + 12479, + 12481, + 12484, + 12486, + 12488, + 12490, + 12491, + 12492, + 12493, + 12494, + 12495, + 12498, + 12501, + 12504, + 12507, + 12510, + 12511, + 12512, + 12513, + 12514, + 12516, + 12518, + 12520, + 12521, + 12522, + 12523, + 12524, + 12525, + 12527, + 12531, + 12443, + 12444 +]; + +function chars2cps(chars) { + // this is needed because of javascript's handling of supplementary characters + // char: a string of unicode characters + // returns an array of decimal code point values + var haut = 0; + var out = []; + for (var i = 0; i < chars.length; i++) { + var b = chars.charCodeAt(i); + if (b < 0 || b > 0xffff) { + alert( + "Error in chars2cps: byte out of range " + b.toString(16) + "!" + ); + } + if (haut != 0) { + if (0xdc00 <= b && b <= 0xdfff) { + out.push(0x10000 + ((haut - 0xd800) << 10) + (b - 0xdc00)); + haut = 0; + continue; + } else { + alert( + "Error in chars2cps: surrogate out of range " + + haut.toString(16) + + "!" + ); + haut = 0; + } + } + if (0xd800 <= b && b <= 0xdbff) { + haut = b; + } else { + out.push(b); + } + } + return out; +} + +function iso2022jpEncoder(stream) { + var cps = chars2cps(stream); + var endofstream = 2000000; + var out = ""; + var encState = "ascii"; + var finished = false; + var cp, ptr; + + while (!finished) { + if (cps.length == 0) cp = endofstream; + else cp = cps.shift(); + if (cp == endofstream && encState != "ascii") { + cps.unshift(cp); + encState = "ascii"; + out += " 1B 28 42"; + continue; + } + if (cp == endofstream && encState == "ascii") { + finished = true; + continue; + } + if ( + (encState === "ascii" || encState === "roman") && + (cp === 0x0e || cp === 0x0f || cp === 0x1b) + ) { + //out += ' &#'+cp+';' + // continue + return null; + } + if (encState == "ascii" && cp >= 0x00 && cp <= 0x7f) { + out += " " + cp.toString(16).toUpperCase(); + continue; + } + if ( + encState == "roman" && + ((cp >= 0x00 && cp <= 0x7f && cp !== 0x5c && cp !== 0x7e) || + cp == 0xa5 || + cp == 0x203e) + ) { + if (cp >= 0x00 && cp <= 0x7f) { + // ASCII + out += " " + cp.toString(16).toUpperCase(); + continue; + } + if (cp == 0xa5) { + out += " 5C"; + continue; + } + if (cp == 0x203e) { + out += " 7E"; + continue; + } + } + if (encState != "ascii" && cp >= 0x00 && cp <= 0x7f) { + cps.unshift(cp); + encState = "ascii"; + out += " 1B 28 42"; + continue; + } + if ((cp == 0xa5 || cp == 0x203e) && encState != "roman") { + cps.unshift(cp); + encState = "roman"; + out += " 1B 28 4A"; + continue; + } + if (cp == 0x2212) cp = 0xff0d; + if (cp >= 0xff61 && cp <= 0xff9f) { + cp = iso2022jpkatakana[cp - 0xff61]; + } + ptr = jis0208CPs[cp]; + if (ptr == null) { + //out += ' &#'+cp+';' + //continue + return null; + } + if (encState != "jis0208") { + cps.unshift(cp); + encState = "jis0208"; + out += " 1B 24 42"; + continue; + } + var lead = Math.floor(ptr / 94) + 0x21; + var trail = ptr % 94 + 0x21; + out += + " " + + lead.toString(16).toUpperCase() + + " " + + trail.toString(16).toUpperCase(); + } + return out.trim(); +} + +function convertToHex(str) { + // converts a string of ASCII characters to hex byte codes + var out = ""; + var result; + for (var c = 0; c < str.length; c++) { + result = + str + .charCodeAt(c) + .toString(16) + .toUpperCase() + " "; + out += result; + } + return out; +} + +function normalizeStr(str) { + var out = ""; + for (var c = 0; c < str.length; c++) { + if ( + str.charAt(c) == "%" && + str.charAt(c + 1) != "%" && + str.charAt(c + 2) != "%" + ) { + out += String.fromCodePoint( + parseInt(str.charAt(c + 1) + str.charAt(c + 2), 16) + ); + c += 2; + } else out += str.charAt(c); + } + var result = ""; + for (var o = 0; o < out.length; o++) { + result += + "%" + + out + .charCodeAt(o) + .toString(16) + .toUpperCase(); + } + return result.replace(/%1B%28%42$/, ""); +} -- cgit v1.2.3