summaryrefslogtreecommitdiffstats
path: root/intl/uconv/tests/unit/test_bug317216.js
diff options
context:
space:
mode:
Diffstat (limited to 'intl/uconv/tests/unit/test_bug317216.js')
-rw-r--r--intl/uconv/tests/unit/test_bug317216.js165
1 files changed, 165 insertions, 0 deletions
diff --git a/intl/uconv/tests/unit/test_bug317216.js b/intl/uconv/tests/unit/test_bug317216.js
new file mode 100644
index 0000000000..cc10ef313e
--- /dev/null
+++ b/intl/uconv/tests/unit/test_bug317216.js
@@ -0,0 +1,165 @@
+/* Test case for bug 317216
+ *
+ * Uses nsIConverterInputStream to decode UTF-16 text with valid surrogate
+ * pairs and lone surrogate characters
+ *
+ * Sample text is: "A" in Mathematical Bold Capitals (U+1D400)
+ *
+ * The test uses buffers of 4 different lengths to test end of buffer in mid-
+ * UTF16 character and mid-surrogate pair
+ */
+
+const { NetUtil } = ChromeUtils.importESModule(
+ "resource://gre/modules/NetUtil.sys.mjs"
+);
+
+const test = [
+ // 0: Valid surrogate pair
+ [
+ "%D8%35%DC%20%00%2D%00%2D",
+ // expected: surrogate pair
+ "\uD835\uDC20--",
+ ],
+ // 1: Lone high surrogate
+ [
+ "%D8%35%00%2D%00%2D",
+ // expected: one replacement char
+ "\uFFFD--",
+ ],
+ // 2: Lone low surrogate
+ [
+ "%DC%20%00%2D%00%2D",
+ // expected: one replacement char
+ "\uFFFD--",
+ ],
+ // 3: Two high surrogates
+ [
+ "%D8%35%D8%35%00%2D%00%2D",
+ // expected: two replacement chars
+ "\uFFFD\uFFFD--",
+ ],
+ // 4: Two low surrogates
+ [
+ "%DC%20%DC%20%00%2D%00%2D",
+ // expected: two replacement chars
+ "\uFFFD\uFFFD--",
+ ],
+ // 5: Low surrogate followed by high surrogate
+ [
+ "%DC%20%D8%35%00%2D%00%2D",
+ // expected: two replacement chars
+ "\uFFFD\uFFFD--",
+ ],
+ // 6: Lone high surrogate followed by valid surrogate pair
+ [
+ "%D8%35%D8%35%DC%20%00%2D%00%2D",
+ // expected: replacement char followed by surrogate pair
+ "\uFFFD\uD835\uDC20--",
+ ],
+ // 7: Lone low surrogate followed by valid surrogate pair
+ [
+ "%DC%20%D8%35%DC%20%00%2D%00%2D",
+ // expected: replacement char followed by surrogate pair
+ "\uFFFD\uD835\uDC20--",
+ ],
+ // 8: Valid surrogate pair followed by lone high surrogate
+ [
+ "%D8%35%DC%20%D8%35%00%2D%00%2D",
+ // expected: surrogate pair followed by replacement char
+ "\uD835\uDC20\uFFFD--",
+ ],
+ // 9: Valid surrogate pair followed by lone low surrogate
+ [
+ "%D8%35%DC%20%DC%20%00%2D%00%2D",
+ // expected: surrogate pair followed by replacement char
+ "\uD835\uDC20\uFFFD--",
+ ],
+ // 10: Lone high surrogate at the end of the input
+ [
+ "%D8%35%",
+ // expected: one replacement char
+ "\uFFFD",
+ ],
+ // 11: Half code unit at the end of the input
+ [
+ "%D8",
+ // expected: one replacement char
+ "\uFFFD",
+ ],
+];
+
+const ConverterInputStream = Components.Constructor(
+ "@mozilla.org/intl/converter-input-stream;1",
+ "nsIConverterInputStream",
+ "init"
+);
+
+function testCase(testText, expectedText, bufferLength, charset) {
+ var dataURI = "data:text/plain;charset=" + charset + "," + testText;
+ var channel = NetUtil.newChannel({
+ uri: dataURI,
+ loadUsingSystemPrincipal: true,
+ });
+ var testInputStream = channel.open();
+ var testConverter = new ConverterInputStream(
+ testInputStream,
+ charset,
+ bufferLength,
+ 0xfffd
+ );
+
+ if (!(testConverter instanceof Ci.nsIUnicharLineInputStream)) {
+ throw new Error("not line input stream");
+ }
+
+ var outStr = "";
+ var more;
+ do {
+ // read the line and check for eof
+ var line = {};
+ more = testConverter.readLine(line);
+ outStr += line.value;
+ } while (more);
+
+ // escape the strings before comparing for better readability
+ Assert.equal(escape(outStr), escape(expectedText));
+}
+
+// Add 32 dummy characters to the test text to work around the minimum buffer
+// size of an ns*Buffer
+const MINIMUM_BUFFER_SIZE = 32;
+function padBytes(str) {
+ var padding = "";
+ for (var i = 0; i < MINIMUM_BUFFER_SIZE; ++i) {
+ padding += "%00%2D";
+ }
+ return padding + str;
+}
+
+function padUnichars(str) {
+ var padding = "";
+ for (var i = 0; i < MINIMUM_BUFFER_SIZE; ++i) {
+ padding += "-";
+ }
+ return padding + str;
+}
+
+// Byte-swap %-encoded utf-16
+function flip(str) {
+ return str.replace(/(%..)(%..)/g, "$2$1");
+}
+
+function run_test() {
+ for (var i = 0; i < 12; ++i) {
+ for (
+ var bufferLength = MINIMUM_BUFFER_SIZE;
+ bufferLength < MINIMUM_BUFFER_SIZE + 4;
+ ++bufferLength
+ ) {
+ var testText = padBytes(test[i][0]);
+ var expectedText = padUnichars(test[i][1]);
+ testCase(testText, expectedText, bufferLength, "UTF-16BE");
+ testCase(flip(testText), expectedText, bufferLength, "UTF-16LE");
+ }
+ }
+}