diff options
Diffstat (limited to '')
-rw-r--r-- | dom/encoding/test/test_BOMEncoding.js | 308 |
1 files changed, 308 insertions, 0 deletions
diff --git a/dom/encoding/test/test_BOMEncoding.js b/dom/encoding/test/test_BOMEncoding.js new file mode 100644 index 0000000000..7b7d6eb8f5 --- /dev/null +++ b/dom/encoding/test/test_BOMEncoding.js @@ -0,0 +1,308 @@ +/* + * test_TextDecoderBOMEncoding.js + * bug 764234 tests + */ + +function runTextDecoderBOMEnoding() { + test(testDecodeValidBOMUTF16, "testDecodeValidBOMUTF16"); + test(testBOMEncodingUTF8, "testBOMEncodingUTF8"); + test(testMoreBOMEncoding, "testMoreBOMEncoding"); +} + +function testDecodeValidBOMUTF16() { + var expectedString = + '"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443."'; + + // Testing UTF-16BE + var data = [ + 0xfe, 0xff, 0x00, 0x22, 0x04, 0x12, 0x04, 0x41, 0x04, 0x35, 0x00, 0x20, + 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, + 0x04, 0x38, 0x04, 0x32, 0x04, 0x4b, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, + 0x04, 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x38, 0x00, 0x20, 0x04, 0x3f, + 0x04, 0x3e, 0x04, 0x45, 0x04, 0x3e, 0x04, 0x36, 0x04, 0x38, 0x00, 0x20, + 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x00, 0x20, 0x04, 0x3d, + 0x04, 0x30, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, + 0x04, 0x30, 0x00, 0x2c, 0x00, 0x20, 0x04, 0x3a, 0x04, 0x30, 0x04, 0x36, + 0x04, 0x34, 0x04, 0x30, 0x04, 0x4f, 0x00, 0x20, 0x04, 0x3d, 0x04, 0x35, + 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, + 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4f, 0x00, 0x20, 0x04, 0x41, + 0x04, 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x4f, 0x00, 0x20, 0x04, 0x3d, + 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, + 0x04, 0x3b, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x00, 0x20, 0x04, 0x3f, + 0x04, 0x3e, 0x00, 0x2d, 0x04, 0x41, 0x04, 0x32, 0x04, 0x3e, 0x04, 0x35, + 0x04, 0x3c, 0x04, 0x43, 0x00, 0x2e, 0x00, 0x22, + ]; + testBOMCharset({ + encoding: "utf-16be", + data, + expected: expectedString, + msg: "decoder valid UTF-16BE test.", + }); +} + +function testBOMEncodingUTF8() { + // basic utf-8 test with valid encoding and byte stream. no byte om provided. + var data = [0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; + var expectedString = " !\"#$%&'"; + testBOMCharset({ + encoding: "utf-8", + data, + expected: expectedString, + msg: "utf-8 encoding.", + }); + + // test valid encoding provided with valid byte OM also provided. + data = [0xef, 0xbb, 0xbf, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; + expectedString = " !\"#$%&'"; + testBOMCharset({ + encoding: "utf-8", + data, + expected: expectedString, + msg: "valid utf-8 encoding provided with VALID utf-8 BOM test.", + }); + + // test valid encoding provided with invalid byte OM also provided. + data = [0xff, 0xfe, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; + testBOMCharset({ + encoding: "utf-8", + fatal: true, + data, + error: "TypeError", + msg: "valid utf-8 encoding provided with invalid utf-8 fatal BOM test.", + }); + + // test valid encoding provided with invalid byte OM also provided. + data = [0xff, 0xfe, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; + expectedString = "\ufffd\ufffd !\"#$%&'"; + testBOMCharset({ + encoding: "utf-8", + data, + expected: expectedString, + msg: "valid utf-8 encoding provided with invalid utf-8 BOM test.", + }); + + // test empty encoding provided with invalid byte OM also provided. + data = [0xff, 0xfe, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; + testBOMCharset({ + encoding: "", + data, + error: "RangeError", + msg: "empty encoding provided with invalid utf-8 BOM test.", + }); +} + +function testMoreBOMEncoding() { + var expectedString = + '"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443."'; + + // Testing user provided encoding is UTF-16BE & bom encoding is utf-16le + var data = [ + 0xff, 0xfe, 0x00, 0x22, 0x04, 0x12, 0x04, 0x41, 0x04, 0x35, 0x00, 0x20, + 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, + 0x04, 0x38, 0x04, 0x32, 0x04, 0x4b, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, + 0x04, 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x38, 0x00, 0x20, 0x04, 0x3f, + 0x04, 0x3e, 0x04, 0x45, 0x04, 0x3e, 0x04, 0x36, 0x04, 0x38, 0x00, 0x20, + 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x00, 0x20, 0x04, 0x3d, + 0x04, 0x30, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, + 0x04, 0x30, 0x00, 0x2c, 0x00, 0x20, 0x04, 0x3a, 0x04, 0x30, 0x04, 0x36, + 0x04, 0x34, 0x04, 0x30, 0x04, 0x4f, 0x00, 0x20, 0x04, 0x3d, 0x04, 0x35, + 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, + 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4f, 0x00, 0x20, 0x04, 0x41, + 0x04, 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x4f, 0x00, 0x20, 0x04, 0x3d, + 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, + 0x04, 0x3b, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x00, 0x20, 0x04, 0x3f, + 0x04, 0x3e, 0x00, 0x2d, 0x04, 0x41, 0x04, 0x32, 0x04, 0x3e, 0x04, 0x35, + 0x04, 0x3c, 0x04, 0x43, 0x00, 0x2e, 0x00, 0x22, + ]; + + testBOMCharset({ + encoding: "utf-16be", + fatal: true, + data, + expected: "\ufffe" + expectedString, + msg: "test decoder invalid BOM encoding for utf-16be fatal.", + }); + + testBOMCharset({ + encoding: "utf-16be", + data, + expected: "\ufffe" + expectedString, + msg: "test decoder invalid BOM encoding for utf-16be.", + }); + + // Testing user provided encoding is UTF-16LE & bom encoding is utf-16be + var dataUTF16 = [ + 0xfe, 0xff, 0x22, 0x00, 0x12, 0x04, 0x41, 0x04, 0x35, 0x04, 0x20, 0x00, + 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, 0x04, + 0x38, 0x04, 0x32, 0x04, 0x4b, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, + 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x38, 0x04, 0x20, 0x00, 0x3f, 0x04, + 0x3e, 0x04, 0x45, 0x04, 0x3e, 0x04, 0x36, 0x04, 0x38, 0x04, 0x20, 0x00, + 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x20, 0x00, 0x3d, 0x04, + 0x30, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, + 0x30, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x3a, 0x04, 0x30, 0x04, 0x36, 0x04, + 0x34, 0x04, 0x30, 0x04, 0x4f, 0x04, 0x20, 0x00, 0x3d, 0x04, 0x35, 0x04, + 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, 0x04, + 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4f, 0x04, 0x20, 0x00, 0x41, 0x04, + 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x4f, 0x04, 0x20, 0x00, 0x3d, 0x04, + 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, + 0x3b, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x20, 0x00, 0x3f, 0x04, + 0x3e, 0x04, 0x2d, 0x00, 0x41, 0x04, 0x32, 0x04, 0x3e, 0x04, 0x35, 0x04, + 0x3c, 0x04, 0x43, 0x04, 0x2e, 0x00, 0x22, 0x00, + ]; + testBOMCharset({ + encoding: "utf-16le", + fatal: true, + data: dataUTF16, + expected: "\ufffe" + expectedString, + msg: "test decoder invalid BOM encoding for utf-16le fatal.", + }); + + testBOMCharset({ + encoding: "utf-16le", + data: dataUTF16, + expected: "\ufffe" + expectedString, + msg: "test decoder invalid BOM encoding for utf-16le.", + }); + + // Testing user provided encoding is UTF-16 & bom encoding is utf-16be + testBOMCharset({ + encoding: "utf-16", + fatal: true, + data: dataUTF16, + expected: "\ufffe" + expectedString, + msg: "test decoder invalid BOM encoding for utf-16 fatal.", + }); + + testBOMCharset({ + encoding: "utf-16", + data: dataUTF16, + expected: "\ufffe" + expectedString, + msg: "test decoder invalid BOM encoding for utf-16.", + }); + + // Testing user provided encoding is UTF-16 & bom encoding is utf-16le + dataUTF16 = [ + 0xff, 0xfe, 0x22, 0x00, 0x12, 0x04, 0x41, 0x04, 0x35, 0x04, 0x20, 0x00, + 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, 0x04, + 0x38, 0x04, 0x32, 0x04, 0x4b, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, + 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x38, 0x04, 0x20, 0x00, 0x3f, 0x04, + 0x3e, 0x04, 0x45, 0x04, 0x3e, 0x04, 0x36, 0x04, 0x38, 0x04, 0x20, 0x00, + 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x20, 0x00, 0x3d, 0x04, + 0x30, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, + 0x30, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x3a, 0x04, 0x30, 0x04, 0x36, 0x04, + 0x34, 0x04, 0x30, 0x04, 0x4f, 0x04, 0x20, 0x00, 0x3d, 0x04, 0x35, 0x04, + 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, 0x04, + 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4f, 0x04, 0x20, 0x00, 0x41, 0x04, + 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x4f, 0x04, 0x20, 0x00, 0x3d, 0x04, + 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, + 0x3b, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x20, 0x00, 0x3f, 0x04, + 0x3e, 0x04, 0x2d, 0x00, 0x41, 0x04, 0x32, 0x04, 0x3e, 0x04, 0x35, 0x04, + 0x3c, 0x04, 0x43, 0x04, 0x2e, 0x00, 0x22, 0x00, + ]; + testBOMCharset({ + encoding: "utf-16", + fatal: true, + data: dataUTF16, + expected: expectedString, + msg: "test decoder BOM encoding for utf-16 fatal.", + }); + + testBOMCharset({ + encoding: "utf-16", + data: dataUTF16, + expected: expectedString, + msg: "test decoder BOM encoding for utf-16.", + }); + + // Testing user provided encoding is UTF-8 & bom encoding is utf-16be + data = [ + 0xfe, 0xff, 0x22, 0xd0, 0x92, 0xd1, 0x81, 0xd0, 0xb5, 0x20, 0xd1, 0x81, + 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbb, 0xd0, 0xb8, + 0xd0, 0xb2, 0xd1, 0x8b, 0xd0, 0xb5, 0x20, 0xd1, 0x81, 0xd0, 0xb5, 0xd0, + 0xbc, 0xd1, 0x8c, 0xd0, 0xb8, 0x20, 0xd0, 0xbf, 0xd0, 0xbe, 0xd1, 0x85, + 0xd0, 0xbe, 0xd0, 0xb6, 0xd0, 0xb8, 0x20, 0xd0, 0xb4, 0xd1, 0x80, 0xd1, + 0x83, 0xd0, 0xb3, 0x20, 0xd0, 0xbd, 0xd0, 0xb0, 0x20, 0xd0, 0xb4, 0xd1, + 0x80, 0xd1, 0x83, 0xd0, 0xb3, 0xd0, 0xb0, 0x2c, 0x20, 0xd0, 0xba, 0xd0, + 0xb0, 0xd0, 0xb6, 0xd0, 0xb4, 0xd0, 0xb0, 0xd1, 0x8f, 0x20, 0xd0, 0xbd, + 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, + 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb0, 0xd1, 0x8f, 0x20, 0xd1, + 0x81, 0xd0, 0xb5, 0xd0, 0xbc, 0xd1, 0x8c, 0xd1, 0x8f, 0x20, 0xd0, 0xbd, + 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, + 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb0, 0x20, 0xd0, 0xbf, 0xd0, + 0xbe, 0x2d, 0xd1, 0x81, 0xd0, 0xb2, 0xd0, 0xbe, 0xd0, 0xb5, 0xd0, 0xbc, + 0xd1, 0x83, 0x2e, 0x22, + ]; + + testBOMCharset({ + encoding: "utf-8", + fatal: true, + data, + error: "TypeError", + msg: "test decoder invalid BOM encoding for valid utf-8 fatal provided label.", + }); + + testBOMCharset({ + encoding: "utf-8", + data, + expected: "\ufffd\ufffd" + expectedString, + msg: "test decoder invalid BOM encoding for valid utf-8 provided label.", + }); + + // Testing user provided encoding is non-UTF & bom encoding is utf-16be + data = [ + 0xfe, 0xff, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, + 0xac, 0xad, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, + 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, + 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, + 0xd1, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, + 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, + 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, + 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, + ]; + + expectedString = + "\u03CE\uFFFD\u2019\xA3\u20AC\u20AF\xA6\xA7\xA8\xA9\u037A\xAB\xAC\xAD\u2015" + + "\xB0\xB1\xB2\xB3\u0384\u0385\u0386\xB7\u0388\u0389\u038A\xBB\u038C\xBD\u038E\u038F" + + "\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039A\u039B\u039C\u039D\u039E\u039F" + + "\u03A0\u03A1\u03A3\u03A4\u03A5\u03A6\u03A7\u03A8\u03A9\u03AA\u03AB\u03AC\u03AD\u03AE\u03AF" + + "\u03B0\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB\u03BC\u03BD\u03BE\u03BF" + + "\u03C0\u03C1\u03C2\u03C3\u03C4\u03C5\u03C6\u03C7\u03C8\u03C9\u03CA\u03CB\u03CC\u03CD\u03CE"; + + testBOMCharset({ + encoding: "greek", + fatal: true, + data, + error: "TypeError", + msg: "test decoder encoding provided with invalid BOM encoding for greek.", + }); + + testBOMCharset({ + encoding: "greek", + data, + expected: expectedString, + msg: "test decoder encoding provided with invalid BOM encoding for greek.", + }); +} + +function testBOMCharset(test) { + var outText; + try { + var decoder = + "fatal" in test + ? new TextDecoder(test.encoding, { fatal: test.fatal }) + : new TextDecoder(test.encoding); + outText = decoder.decode(new Uint8Array(test.data)); + } catch (e) { + assert_equals(e.name, test.error, test.msg); + return; + } + assert_true(!test.error, test.msg); + + if (outText !== test.expected) { + assert_equals( + escape(outText), + escape(test.expected), + test.msg + " Code points do not match expected code points." + ); + } +} |