summaryrefslogtreecommitdiffstats
path: root/testing/web-platform/tests/encoding/streams/encode-utf8.any.js
diff options
context:
space:
mode:
Diffstat (limited to 'testing/web-platform/tests/encoding/streams/encode-utf8.any.js')
-rw-r--r--testing/web-platform/tests/encoding/streams/encode-utf8.any.js144
1 files changed, 144 insertions, 0 deletions
diff --git a/testing/web-platform/tests/encoding/streams/encode-utf8.any.js b/testing/web-platform/tests/encoding/streams/encode-utf8.any.js
new file mode 100644
index 0000000000..a5ba8f91ea
--- /dev/null
+++ b/testing/web-platform/tests/encoding/streams/encode-utf8.any.js
@@ -0,0 +1,144 @@
+// META: global=window,worker
+// META: script=resources/readable-stream-from-array.js
+// META: script=resources/readable-stream-to-array.js
+
+'use strict';
+const inputString = 'I \u{1F499} streams';
+const expectedOutputBytes = [0x49, 0x20, 0xf0, 0x9f, 0x92, 0x99, 0x20, 0x73,
+ 0x74, 0x72, 0x65, 0x61, 0x6d, 0x73];
+// This is a character that must be represented in two code units in a string,
+// ie. it is not in the Basic Multilingual Plane.
+const astralCharacter = '\u{1F499}'; // BLUE HEART
+const astralCharacterEncoded = [0xf0, 0x9f, 0x92, 0x99];
+const leading = astralCharacter[0];
+const trailing = astralCharacter[1];
+const replacementEncoded = [0xef, 0xbf, 0xbd];
+
+// These tests assume that the implementation correctly classifies leading and
+// trailing surrogates and treats all the code units in each set equivalently.
+
+const testCases = [
+ {
+ input: [inputString],
+ output: [expectedOutputBytes],
+ description: 'encoding one string of UTF-8 should give one complete chunk'
+ },
+ {
+ input: [leading, trailing],
+ output: [astralCharacterEncoded],
+ description: 'a character split between chunks should be correctly encoded'
+ },
+ {
+ input: [leading, trailing + astralCharacter],
+ output: [astralCharacterEncoded.concat(astralCharacterEncoded)],
+ description: 'a character following one split between chunks should be ' +
+ 'correctly encoded'
+ },
+ {
+ input: [leading, trailing + leading, trailing],
+ output: [astralCharacterEncoded, astralCharacterEncoded],
+ description: 'two consecutive astral characters each split down the ' +
+ 'middle should be correctly reassembled'
+ },
+ {
+ input: [leading, trailing + leading + leading, trailing],
+ output: [astralCharacterEncoded.concat(replacementEncoded), astralCharacterEncoded],
+ description: 'two consecutive astral characters each split down the ' +
+ 'middle with an invalid surrogate in the middle should be correctly ' +
+ 'encoded'
+ },
+ {
+ input: [leading],
+ output: [replacementEncoded],
+ description: 'a stream ending in a leading surrogate should emit a ' +
+ 'replacement character as a final chunk'
+ },
+ {
+ input: [leading, astralCharacter],
+ output: [replacementEncoded.concat(astralCharacterEncoded)],
+ description: 'an unmatched surrogate at the end of a chunk followed by ' +
+ 'an astral character in the next chunk should be replaced with ' +
+ 'the replacement character at the start of the next output chunk'
+ },
+ {
+ input: [leading, 'A'],
+ output: [replacementEncoded.concat([65])],
+ description: 'an unmatched surrogate at the end of a chunk followed by ' +
+ 'an ascii character in the next chunk should be replaced with ' +
+ 'the replacement character at the start of the next output chunk'
+ },
+ {
+ input: [leading, leading, trailing],
+ output: [replacementEncoded, astralCharacterEncoded],
+ description: 'an unmatched surrogate at the end of a chunk followed by ' +
+ 'a plane 1 character split into two chunks should result in ' +
+ 'the encoded plane 1 character appearing in the last output chunk'
+ },
+ {
+ input: [leading, leading],
+ output: [replacementEncoded, replacementEncoded],
+ description: 'two leading chunks should result in two replacement ' +
+ 'characters'
+ },
+ {
+ input: [leading + leading, trailing],
+ output: [replacementEncoded, astralCharacterEncoded],
+ description: 'a non-terminal unpaired leading surrogate should ' +
+ 'immediately be replaced'
+ },
+ {
+ input: [trailing, astralCharacter],
+ output: [replacementEncoded, astralCharacterEncoded],
+ description: 'a terminal unpaired trailing surrogate should ' +
+ 'immediately be replaced'
+ },
+ {
+ input: [leading, '', trailing],
+ output: [astralCharacterEncoded],
+ description: 'a leading surrogate chunk should be carried past empty chunks'
+ },
+ {
+ input: [leading, ''],
+ output: [replacementEncoded],
+ description: 'a leading surrogate chunk should error when it is clear ' +
+ 'it didn\'t form a pair'
+ },
+ {
+ input: [''],
+ output: [],
+ description: 'an empty string should result in no output chunk'
+ },
+ {
+ input: ['', inputString],
+ output: [expectedOutputBytes],
+ description: 'a leading empty chunk should be ignored'
+ },
+ {
+ input: [inputString, ''],
+ output: [expectedOutputBytes],
+ description: 'a trailing empty chunk should be ignored'
+ },
+ {
+ input: ['A'],
+ output: [[65]],
+ description: 'a plain ASCII chunk should be converted'
+ },
+ {
+ input: ['\xff'],
+ output: [[195, 191]],
+ description: 'characters in the ISO-8859-1 range should be encoded correctly'
+ },
+];
+
+for (const {input, output, description} of testCases) {
+ promise_test(async () => {
+ const inputStream = readableStreamFromArray(input);
+ const outputStream = inputStream.pipeThrough(new TextEncoderStream());
+ const chunkArray = await readableStreamToArray(outputStream);
+ assert_equals(chunkArray.length, output.length,
+ 'number of chunks should match');
+ for (let i = 0; i < output.length; ++i) {
+ assert_array_equals(chunkArray[i], output[i], `chunk ${i} should match`);
+ }
+ }, description);
+}