// NOTE: Requires testharness.js // http://www.w3.org/2008/webapps/wiki/Harness test(function () { var badStrings = [ { input: "\ud800", expected: "\ufffd" }, // Surrogate half { input: "\udc00", expected: "\ufffd" }, // Surrogate half { input: "abc\ud800def", expected: "abc\ufffddef" }, // Surrogate half { input: "abc\udc00def", expected: "abc\ufffddef" }, // Surrogate half { input: "\udc00\ud800", expected: "\ufffd\ufffd" }, // Wrong order ]; badStrings.forEach(function (t) { var encoded = new TextEncoder().encode(t.input); var decoded = new TextDecoder("utf-8").decode(encoded); assert_equals(t.expected, decoded); }); }, "bad data"); test(function () { var bad = [ { encoding: "utf-8", input: [0xc0] }, // ends early { encoding: "utf-8", input: [0xc0, 0x00] }, // invalid trail { encoding: "utf-8", input: [0xc0, 0xc0] }, // invalid trail { encoding: "utf-8", input: [0xe0] }, // ends early { encoding: "utf-8", input: [0xe0, 0x00] }, // invalid trail { encoding: "utf-8", input: [0xe0, 0xc0] }, // invalid trail { encoding: "utf-8", input: [0xe0, 0x80, 0x00] }, // invalid trail { encoding: "utf-8", input: [0xe0, 0x80, 0xc0] }, // invalid trail { encoding: "utf-8", input: [0xfc, 0x80, 0x80, 0x80, 0x80, 0x80] }, // > 0x10FFFF { encoding: "utf-16le", input: [0x00] }, // truncated code unit { encoding: "utf-16le", input: [0x00, 0xd8] }, // surrogate half { encoding: "utf-16le", input: [0x00, 0xd8, 0x00, 0x00] }, // surrogate half { encoding: "utf-16le", input: [0x00, 0xdc, 0x00, 0x00] }, // trail surrogate { encoding: "utf-16le", input: [0x00, 0xdc, 0x00, 0xd8] }, // swapped surrogates // TODO: Single byte encoding cases ]; bad.forEach(function (t) { assert_throws({ name: "TypeError" }, function () { new TextDecoder(t.encoding, { fatal: true }).decode( new Uint8Array(t.input) ); }); }); }, "fatal flag"); test(function () { var encodings = [ { label: "utf-8", encoding: "utf-8" }, { label: "utf-16", encoding: "utf-16le" }, { label: "utf-16le", encoding: "utf-16le" }, { label: "utf-16be", encoding: "utf-16be" }, { label: "ascii", encoding: "windows-1252" }, { label: "iso-8859-1", encoding: "windows-1252" }, ]; encodings.forEach(function (test) { assert_equals( new TextDecoder(test.label.toLowerCase()).encoding, test.encoding ); assert_equals( new TextDecoder(test.label.toUpperCase()).encoding, test.encoding ); }); }, "Encoding names are case insensitive"); test(function () { var utf8_bom = [0xef, 0xbb, 0xbf]; var utf8 = [ 0x7a, 0xc2, 0xa2, 0xe6, 0xb0, 0xb4, 0xf0, 0x9d, 0x84, 0x9e, 0xf4, 0x8f, 0xbf, 0xbd, ]; var utf16le_bom = [0xff, 0xfe]; var utf16le = [ 0x7a, 0x00, 0xa2, 0x00, 0x34, 0x6c, 0x34, 0xd8, 0x1e, 0xdd, 0xff, 0xdb, 0xfd, 0xdf, ]; var utf16be_bom = [0xfe, 0xff]; var utf16be = [ 0x00, 0x7a, 0x00, 0xa2, 0x6c, 0x34, 0xd8, 0x34, 0xdd, 0x1e, 0xdb, 0xff, 0xdf, 0xfd, ]; var string = "z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD"; // z, cent, CJK water, G-Clef, Private-use character // missing BOMs assert_equals(new TextDecoder("utf-8").decode(new Uint8Array(utf8)), string); assert_equals( new TextDecoder("utf-16le").decode(new Uint8Array(utf16le)), string ); assert_equals( new TextDecoder("utf-16be").decode(new Uint8Array(utf16be)), string ); // matching BOMs assert_equals( new TextDecoder("utf-8").decode(new Uint8Array(utf8_bom.concat(utf8))), string ); assert_equals( new TextDecoder("utf-16le").decode( new Uint8Array(utf16le_bom.concat(utf16le)) ), string ); assert_equals( new TextDecoder("utf-16be").decode( new Uint8Array(utf16be_bom.concat(utf16be)) ), string ); // matching BOMs split var decoder8 = new TextDecoder("utf-8"); assert_equals( decoder8.decode(new Uint8Array(utf8_bom.slice(0, 1)), { stream: true }), "" ); assert_equals( decoder8.decode(new Uint8Array(utf8_bom.slice(1).concat(utf8))), string ); assert_equals( decoder8.decode(new Uint8Array(utf8_bom.slice(0, 2)), { stream: true }), "" ); assert_equals( decoder8.decode(new Uint8Array(utf8_bom.slice(2).concat(utf8))), string ); var decoder16le = new TextDecoder("utf-16le"); assert_equals( decoder16le.decode(new Uint8Array(utf16le_bom.slice(0, 1)), { stream: true, }), "" ); assert_equals( decoder16le.decode(new Uint8Array(utf16le_bom.slice(1).concat(utf16le))), string ); var decoder16be = new TextDecoder("utf-16be"); assert_equals( decoder16be.decode(new Uint8Array(utf16be_bom.slice(0, 1)), { stream: true, }), "" ); assert_equals( decoder16be.decode(new Uint8Array(utf16be_bom.slice(1).concat(utf16be))), string ); // mismatching BOMs assert_not_equals( new TextDecoder("utf-8").decode(new Uint8Array(utf16le_bom.concat(utf8))), string ); assert_not_equals( new TextDecoder("utf-8").decode(new Uint8Array(utf16be_bom.concat(utf8))), string ); assert_not_equals( new TextDecoder("utf-16le").decode( new Uint8Array(utf8_bom.concat(utf16le)) ), string ); assert_not_equals( new TextDecoder("utf-16le").decode( new Uint8Array(utf16be_bom.concat(utf16le)) ), string ); assert_not_equals( new TextDecoder("utf-16be").decode( new Uint8Array(utf8_bom.concat(utf16be)) ), string ); assert_not_equals( new TextDecoder("utf-16be").decode( new Uint8Array(utf16le_bom.concat(utf16be)) ), string ); }, "Byte-order marks"); test(function () { assert_equals(new TextDecoder("utf-8").encoding, "utf-8"); // canonical case assert_equals(new TextDecoder("UTF-16").encoding, "utf-16le"); // canonical case and name assert_equals(new TextDecoder("UTF-16BE").encoding, "utf-16be"); // canonical case and name assert_equals(new TextDecoder("iso8859-1").encoding, "windows-1252"); // canonical case and name assert_equals(new TextDecoder("iso-8859-1").encoding, "windows-1252"); // canonical case and name }, "Encoding names"); test(function () { ["utf-8", "utf-16le", "utf-16be"].forEach(function (encoding) { var string = "\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF"; var octets = { "utf-16le": [ 0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42, 0x00, 0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xff, 0x00, 0x00, 0x01, 0x00, 0x10, 0xfd, 0xff, 0x00, 0xd8, 0x00, 0xdc, 0xff, 0xdb, 0xff, 0xdf, ], "utf-16be": [ 0x00, 0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42, 0x00, 0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xff, 0x01, 0x00, 0x10, 0x00, 0xff, 0xfd, 0xd8, 0x00, 0xdc, 0x00, 0xdb, 0xff, 0xdf, 0xff, ], }; var encoded = octets[encoding] || new TextEncoder().encode(string); for (var len = 1; len <= 5; ++len) { var out = "", decoder = new TextDecoder(encoding); for (var i = 0; i < encoded.length; i += len) { var sub = []; for (var j = i; j < encoded.length && j < i + len; ++j) { sub.push(encoded[j]); } out += decoder.decode(new Uint8Array(sub), { stream: true }); } out += decoder.decode(); assert_equals(out, string, "streaming decode " + encoding); } }); }, "Streaming Decode"); test(function () { var jis = [0x82, 0xc9, 0x82, 0xd9, 0x82, 0xf1]; var expected = "\u306B\u307B\u3093"; // Nihon assert_equals( new TextDecoder("shift_jis").decode(new Uint8Array(jis)), expected ); }, "Shift_JIS Decode"); test(function () { var encodings = [ "utf-8", "ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "x-user-defined", ]; encodings.forEach(function (encoding) { var string = "", bytes = []; for (var i = 0; i < 128; ++i) { // Encodings that have escape codes in 0x00-0x7F if ( encoding === "iso-2022-jp" && (i === 0x1b || i === 0xe || i === 0xf) ) { continue; } string += String.fromCharCode(i); bytes.push(i); } var ascii_encoded = new TextEncoder().encode(string); assert_equals( new TextDecoder(encoding).decode(ascii_encoded), string, encoding ); //assert_array_equals(new TextEncoder().encode(string), bytes, encoding); }); }, "Supersets of ASCII decode ASCII correctly"); test(function () { assert_throws({ name: "TypeError" }, function () { new TextDecoder("utf-8", { fatal: true }).decode(new Uint8Array([0xff])); }); // This should not hang: new TextDecoder("utf-8").decode(new Uint8Array([0xff])); assert_throws({ name: "TypeError" }, function () { new TextDecoder("utf-16", { fatal: true }).decode(new Uint8Array([0x00])); }); // This should not hang: new TextDecoder("utf-16").decode(new Uint8Array([0x00])); assert_throws({ name: "TypeError" }, function () { new TextDecoder("utf-16be", { fatal: true }).decode(new Uint8Array([0x00])); }); // This should not hang: new TextDecoder("utf-16be").decode(new Uint8Array([0x00])); }, "Non-fatal errors at EOF"); test(function () { var encodings = [ "utf-8", "ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "x-user-defined", "utf-16le", "utf-16be", ]; encodings.forEach(function (encoding) { assert_equals(new TextDecoder(encoding).encoding, encoding); assert_equals(new TextEncoder(encoding).encoding, "utf-8"); }); }, "Non-UTF-8 encodings supported only for decode, not encode");