'use strict'; // See /FileAPI/file/resources/echo-content-escaped.py function escapeString(string) { return string.replace(/\\/g, "\\\\").replace( /[^\x20-\x7E]/g, (x) => { let hex = x.charCodeAt(0).toString(16); if (hex.length < 2) hex = "0" + hex; return `\\x${hex}`; }, ).replace(/\\x0d\\x0a/g, "\r\n"); } // Rationale for this particular test character sequence, which is // used in filenames and also in file contents: // // - ABC~ ensures the string starts with something we can read to // ensure it is from the correct source; ~ is used because even // some 1-byte otherwise-ASCII-like parts of ISO-2022-JP // interpret it differently. // - ‾¥ are inside a single-byte range of ISO-2022-JP and help // diagnose problems due to filesystem encoding or locale // - ≈ is inside IBM437 and helps diagnose problems due to filesystem // encoding or locale // - ¤ is inside Latin-1 and helps diagnose problems due to // filesystem encoding or locale; it is also the "simplest" case // needing substitution in ISO-2022-JP // - ・ is inside a single-byte range of ISO-2022-JP in some variants // and helps diagnose problems due to filesystem encoding or locale; // on the web it is distinct when decoding but unified when encoding // - ・ is inside a double-byte range of ISO-2022-JP and helps // diagnose problems due to filesystem encoding or locale // - • is inside Windows-1252 and helps diagnose problems due to // filesystem encoding or locale and also ensures these aren't // accidentally turned into e.g. control codes // - ∙ is inside IBM437 and helps diagnose problems due to filesystem // encoding or locale // - · is inside Latin-1 and helps diagnose problems due to // filesystem encoding or locale and also ensures HTML named // character references (e.g. ·) are not used // - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to // filesystem encoding or locale and also ensures these aren't // accidentally turned into e.g. control codes // - ★ is inside ISO-2022-JP on a non-Kanji page and makes correct // output easier to spot // - 星 is inside ISO-2022-JP on a Kanji page and makes correct // output easier to spot // - 🌟 is outside the BMP and makes incorrect surrogate pair // substitution detectable and ensures substitutions work // correctly immediately after Kanji 2-byte ISO-2022-JP // - 星 repeated here ensures the correct codec state is used // after a non-BMP substitution // - ★ repeated here also makes correct output easier to spot // - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to // filesystem encoding or locale and also ensures these aren't // accidentally turned into e.g. control codes and also ensures // substitutions work correctly immediately after non-Kanji // 2-byte ISO-2022-JP // - · is inside Latin-1 and helps diagnose problems due to // filesystem encoding or locale and also ensures HTML named // character references (e.g. ·) are not used // - ∙ is inside IBM437 and helps diagnose problems due to filesystem // encoding or locale // - • is inside Windows-1252 and again helps diagnose problems // due to filesystem encoding or locale // - ・ is inside a double-byte range of ISO-2022-JP and helps // diagnose problems due to filesystem encoding or locale // - ・ is inside a single-byte range of ISO-2022-JP in some variants // and helps diagnose problems due to filesystem encoding or locale; // on the web it is distinct when decoding but unified when encoding // - ¤ is inside Latin-1 and helps diagnose problems due to // filesystem encoding or locale; again it is a "simple" // substitution case // - ≈ is inside IBM437 and helps diagnose problems due to filesystem // encoding or locale // - ¥‾ are inside a single-byte range of ISO-2022-JP and help // diagnose problems due to filesystem encoding or locale // - ~XYZ ensures earlier errors don't lead to misencoding of // simple ASCII // // Overall the near-symmetry makes common I18N mistakes like // off-by-1-after-non-BMP easier to spot. All the characters // are also allowed in Windows Unicode filenames. const kTestChars = 'ABC~‾¥≈¤・・•∙·☼★星🌟星★☼·∙•・・¤≈¥‾~XYZ'; // The kTestFallback* strings represent the expected byte sequence from // encoding kTestChars with the given encoding with "html" replacement // mode, isomorphic-decoded. That means, characters that can't be // encoded in that encoding get HTML-escaped, but no further // `escapeString`-like escapes are needed. const kTestFallbackUtf8 = ( "ABC~\xE2\x80\xBE\xC2\xA5\xE2\x89\x88\xC2\xA4\xEF\xBD\xA5\xE3\x83\xBB\xE2" + "\x80\xA2\xE2\x88\x99\xC2\xB7\xE2\x98\xBC\xE2\x98\x85\xE6\x98\x9F\xF0\x9F" + "\x8C\x9F\xE6\x98\x9F\xE2\x98\x85\xE2\x98\xBC\xC2\xB7\xE2\x88\x99\xE2\x80" + "\xA2\xE3\x83\xBB\xEF\xBD\xA5\xC2\xA4\xE2\x89\x88\xC2\xA5\xE2\x80\xBE~XYZ" ); const kTestFallbackIso2022jp = ( ("ABC~\x1B(J~\\≈¤\x1B$B!&!&\x1B(B•∙·☼\x1B$B!z@1\x1B(B🌟" + "\x1B$B@1!z\x1B(B☼·∙•\x1B$B!&!&\x1B(B¤≈\x1B(J\\~\x1B(B~XYZ") .replace(/[^\0-\x7F]/gu, (x) => `&#${x.codePointAt(0)};`) ); const kTestFallbackWindows1252 = ( "ABC~‾\xA5≈\xA4・・\x95∙\xB7☼★星🌟星★☼\xB7∙\x95・・\xA4≈\xA5‾~XYZ".replace( /[^\0-\xFF]/gu, (x) => `&#${x.codePointAt(0)};`, ) ); const kTestFallbackXUserDefined = kTestChars.replace( /[^\0-\x7F]/gu, (x) => `&#${x.codePointAt(0)};`, ); // formPostFileUploadTest - verifies multipart upload structure and // numeric character reference replacement for filenames, field names, // and field values using form submission. // // Uses /FileAPI/file/resources/echo-content-escaped.py to echo the // upload POST with controls and non-ASCII bytes escaped. This is done // because navigations whose response body contains [\0\b\v] may get // treated as a download, which is not what we want. Use the // `escapeString` function to replicate that kind of escape (note that // it takes an isomorphic-decoded string, not a byte sequence). // // Fields in the parameter object: // // - fileNameSource: purely explanatory and gives a clue about which // character encoding is the source for the non-7-bit-ASCII parts of // the fileBaseName, or Unicode if no smaller-than-Unicode source // contains all the characters. Used in the test name. // - fileBaseName: the not-necessarily-just-7-bit-ASCII file basename // used for the constructed test file. Used in the test name. // - formEncoding: the acceptCharset of the form used to submit the // test file. Used in the test name. // - expectedEncodedBaseName: the expected formEncoding-encoded // version of fileBaseName, isomorphic-decoded. That means, characters // that can't be encoded in that encoding get HTML-escaped, but no // further `escapeString`-like escapes are needed. const formPostFileUploadTest = ({ fileNameSource, fileBaseName, formEncoding, expectedEncodedBaseName, }) => { promise_test(async testCase => { if (document.readyState !== 'complete') { await new Promise(resolve => addEventListener('load', resolve)); } const formTargetFrame = Object.assign(document.createElement('iframe'), { name: 'formtargetframe', }); document.body.append(formTargetFrame); testCase.add_cleanup(() => { document.body.removeChild(formTargetFrame); }); const form = Object.assign(document.createElement('form'), { acceptCharset: formEncoding, action: '/FileAPI/file/resources/echo-content-escaped.py', method: 'POST', enctype: 'multipart/form-data', target: formTargetFrame.name, }); document.body.append(form); testCase.add_cleanup(() => { document.body.removeChild(form); }); // Used to verify that the browser agrees with the test about // which form charset is used. form.append(Object.assign(document.createElement('input'), { type: 'hidden', name: '_charset_', })); // Used to verify that the browser agrees with the test about // field value replacement and encoding independently of file system // idiosyncracies. form.append(Object.assign(document.createElement('input'), { type: 'hidden', name: 'filename', value: fileBaseName, })); // Same, but with name and value reversed to ensure field names // get the same treatment. form.append(Object.assign(document.createElement('input'), { type: 'hidden', name: fileBaseName, value: 'filename', })); const fileInput = Object.assign(document.createElement('input'), { type: 'file', name: 'file', }); form.append(fileInput); // Removes c:\fakepath\ or other pseudofolder and returns just the // final component of filePath; allows both / and \ as segment // delimiters. const baseNameOfFilePath = filePath => filePath.split(/[\/\\]/).pop(); await new Promise(resolve => { const dataTransfer = new DataTransfer; dataTransfer.items.add( new File([kTestChars], fileBaseName, {type: 'text/plain'})); fileInput.files = dataTransfer.files; // For historical reasons .value will be prefixed with // c:\fakepath\, but the basename should match the file name // exposed through the newer .files[0].name API. This check // verifies that assumption. assert_equals( baseNameOfFilePath(fileInput.files[0].name), baseNameOfFilePath(fileInput.value), `The basename of the field's value should match its files[0].name`); form.submit(); formTargetFrame.onload = resolve; }); const formDataText = formTargetFrame.contentDocument.body.textContent; const formDataLines = formDataText.split('\n'); if (formDataLines.length && !formDataLines[formDataLines.length - 1]) { --formDataLines.length; } assert_greater_than( formDataLines.length, 2, `${fileBaseName}: multipart form data must have at least 3 lines: ${ JSON.stringify(formDataText) }`); const boundary = formDataLines[0]; assert_equals( formDataLines[formDataLines.length - 1], boundary + '--', `${fileBaseName}: multipart form data must end with ${boundary}--: ${ JSON.stringify(formDataText) }`); const asValue = expectedEncodedBaseName.replace(/\r\n?|\n/g, "\r\n"); const asName = asValue.replace(/[\r\n"]/g, encodeURIComponent); const asFilename = expectedEncodedBaseName.replace(/[\r\n"]/g, encodeURIComponent); // The response body from echo-content-escaped.py has controls and non-ASCII // bytes escaped, so any caller-provided field that might contain such bytes // must be passed to `escapeString`, after any other expected // transformations. const expectedText = [ boundary, 'Content-Disposition: form-data; name="_charset_"', '', formEncoding, boundary, 'Content-Disposition: form-data; name="filename"', '', // Unlike for names and filenames, multipart/form-data values don't escape // \r\n linebreaks, and when they're read from an iframe they become \n. escapeString(asValue).replace(/\r\n/g, "\n"), boundary, `Content-Disposition: form-data; name="${escapeString(asName)}"`, '', 'filename', boundary, `Content-Disposition: form-data; name="file"; ` + `filename="${escapeString(asFilename)}"`, 'Content-Type: text/plain', '', escapeString(kTestFallbackUtf8), boundary + '--', ].join('\n'); assert_true( formDataText.startsWith(expectedText), `Unexpected multipart-shaped form data received:\n${ formDataText }\nExpected:\n${expectedText}`); }, `Upload ${fileBaseName} (${fileNameSource}) in ${formEncoding} form`); };