summaryrefslogtreecommitdiffstats
path: root/testing/web-platform/tests/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
commit43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree620249daf56c0258faa40cbdcf9cfba06de2a846 /testing/web-platform/tests/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js
parentInitial commit. (diff)
downloadfirefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz
firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'testing/web-platform/tests/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js')
-rw-r--r--testing/web-platform/tests/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js246
1 files changed, 246 insertions, 0 deletions
diff --git a/testing/web-platform/tests/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js b/testing/web-platform/tests/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js
new file mode 100644
index 0000000000..9f07d0b6f4
--- /dev/null
+++ b/testing/web-platform/tests/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js
@@ -0,0 +1,246 @@
+// set up a sparse array of all unicode codepoints listed in the index
+// this will be used for lookup in iso2022jpEncoded
+var jis0208CPs = []; // index is unicode cp, value is pointer
+for (var p = 0; p < jis0208.length; p++) {
+ if (jis0208[p] != null && jis0208CPs[jis0208[p]] == null) {
+ jis0208CPs[jis0208[p]] = p;
+ }
+}
+
+// set up mappings for half/full width katakana
+// index is a katakana index pointer, value is Unicode codepoint (dec)
+// this is copy-pasted from the json version of the index belonging to the Encoding spec
+var iso2022jpkatakana = [
+ 12290,
+ 12300,
+ 12301,
+ 12289,
+ 12539,
+ 12530,
+ 12449,
+ 12451,
+ 12453,
+ 12455,
+ 12457,
+ 12515,
+ 12517,
+ 12519,
+ 12483,
+ 12540,
+ 12450,
+ 12452,
+ 12454,
+ 12456,
+ 12458,
+ 12459,
+ 12461,
+ 12463,
+ 12465,
+ 12467,
+ 12469,
+ 12471,
+ 12473,
+ 12475,
+ 12477,
+ 12479,
+ 12481,
+ 12484,
+ 12486,
+ 12488,
+ 12490,
+ 12491,
+ 12492,
+ 12493,
+ 12494,
+ 12495,
+ 12498,
+ 12501,
+ 12504,
+ 12507,
+ 12510,
+ 12511,
+ 12512,
+ 12513,
+ 12514,
+ 12516,
+ 12518,
+ 12520,
+ 12521,
+ 12522,
+ 12523,
+ 12524,
+ 12525,
+ 12527,
+ 12531,
+ 12443,
+ 12444
+];
+
+function chars2cps(chars) {
+ // this is needed because of javascript's handling of supplementary characters
+ // char: a string of unicode characters
+ // returns an array of decimal code point values
+ var haut = 0;
+ var out = [];
+ for (var i = 0; i < chars.length; i++) {
+ var b = chars.charCodeAt(i);
+ if (b < 0 || b > 0xffff) {
+ alert(
+ "Error in chars2cps: byte out of range " + b.toString(16) + "!"
+ );
+ }
+ if (haut != 0) {
+ if (0xdc00 <= b && b <= 0xdfff) {
+ out.push(0x10000 + ((haut - 0xd800) << 10) + (b - 0xdc00));
+ haut = 0;
+ continue;
+ } else {
+ alert(
+ "Error in chars2cps: surrogate out of range " +
+ haut.toString(16) +
+ "!"
+ );
+ haut = 0;
+ }
+ }
+ if (0xd800 <= b && b <= 0xdbff) {
+ haut = b;
+ } else {
+ out.push(b);
+ }
+ }
+ return out;
+}
+
+function iso2022jpEncoder(stream) {
+ var cps = chars2cps(stream);
+ var endofstream = 2000000;
+ var out = "";
+ var encState = "ascii";
+ var finished = false;
+ var cp, ptr;
+
+ while (!finished) {
+ if (cps.length == 0) cp = endofstream;
+ else cp = cps.shift();
+ if (cp == endofstream && encState != "ascii") {
+ cps.unshift(cp);
+ encState = "ascii";
+ out += " 1B 28 42";
+ continue;
+ }
+ if (cp == endofstream && encState == "ascii") {
+ finished = true;
+ continue;
+ }
+ if (
+ (encState === "ascii" || encState === "roman") &&
+ (cp === 0x0e || cp === 0x0f || cp === 0x1b)
+ ) {
+ //out += ' &#'+cp+';'
+ // continue
+ return null;
+ }
+ if (encState == "ascii" && cp >= 0x00 && cp <= 0x7f) {
+ out += " " + cp.toString(16).toUpperCase();
+ continue;
+ }
+ if (
+ encState == "roman" &&
+ ((cp >= 0x00 && cp <= 0x7f && cp !== 0x5c && cp !== 0x7e) ||
+ cp == 0xa5 ||
+ cp == 0x203e)
+ ) {
+ if (cp >= 0x00 && cp <= 0x7f) {
+ // ASCII
+ out += " " + cp.toString(16).toUpperCase();
+ continue;
+ }
+ if (cp == 0xa5) {
+ out += " 5C";
+ continue;
+ }
+ if (cp == 0x203e) {
+ out += " 7E";
+ continue;
+ }
+ }
+ if (encState != "ascii" && cp >= 0x00 && cp <= 0x7f) {
+ cps.unshift(cp);
+ encState = "ascii";
+ out += " 1B 28 42";
+ continue;
+ }
+ if ((cp == 0xa5 || cp == 0x203e) && encState != "roman") {
+ cps.unshift(cp);
+ encState = "roman";
+ out += " 1B 28 4A";
+ continue;
+ }
+ if (cp == 0x2212) cp = 0xff0d;
+ if (cp >= 0xff61 && cp <= 0xff9f) {
+ cp = iso2022jpkatakana[cp - 0xff61];
+ }
+ ptr = jis0208CPs[cp];
+ if (ptr == null) {
+ //out += ' &#'+cp+';'
+ //continue
+ return null;
+ }
+ if (encState != "jis0208") {
+ cps.unshift(cp);
+ encState = "jis0208";
+ out += " 1B 24 42";
+ continue;
+ }
+ var lead = Math.floor(ptr / 94) + 0x21;
+ var trail = ptr % 94 + 0x21;
+ out +=
+ " " +
+ lead.toString(16).toUpperCase() +
+ " " +
+ trail.toString(16).toUpperCase();
+ }
+ return out.trim();
+}
+
+function convertToHex(str) {
+ // converts a string of ASCII characters to hex byte codes
+ var out = "";
+ var result;
+ for (var c = 0; c < str.length; c++) {
+ result =
+ str
+ .charCodeAt(c)
+ .toString(16)
+ .toUpperCase() + " ";
+ out += result;
+ }
+ return out;
+}
+
+function normalizeStr(str) {
+ var out = "";
+ for (var c = 0; c < str.length; c++) {
+ if (
+ str.charAt(c) == "%" &&
+ str.charAt(c + 1) != "%" &&
+ str.charAt(c + 2) != "%"
+ ) {
+ out += String.fromCodePoint(
+ parseInt(str.charAt(c + 1) + str.charAt(c + 2), 16)
+ );
+ c += 2;
+ } else out += str.charAt(c);
+ }
+ var result = "";
+ for (var o = 0; o < out.length; o++) {
+ result +=
+ "%" +
+ out
+ .charCodeAt(o)
+ .toString(16)
+ .toUpperCase();
+ }
+ return result.replace(/%1B%28%42$/, "");
+}