// |reftest| skip-if(!this.hasOwnProperty('Reflect')||!Reflect.parse) -- uses Reflect.parse(..., { loc: true}) to trigger the column-computing API /* * Any copyright is dedicated to the Public Domain. * http://creativecommons.org/licenses/publicdomain/ */ //----------------------------------------------------------------------------- var BUGNUMBER = 1551916; var summary = "Optimize computing a column number as count of code points by caching " + "column numbers (and whether each chunk might contain anything multi-unit) " + "and counting forward from them"; print(BUGNUMBER + ": " + summary); /************** * BEGIN TEST * **************/ // Various testing of column-number computations, with respect to counting as // code points or units, for very long lines. // // This test should pass when column numbers are counts of code points (current // behavior) or code units (past behavior). It also *should* pass for any valid // |TokenStreamAnyChars::ColumnChunkLength| value (it must be at least 4 so that // the maximum-length code point in UTF-8/16 will fit in a single chunk), // because the value of that constant should be externally invisible save for // perf effects. (As a result, recompiling and running this test with a variety // of different values assigned to that constant is a good smoke-test of column // number computation, that doesn't require having to closely inspect any // column-related code.) // // However, this test is structured on the assumption that that constant has the // value 128, in order to exercise in targeted fashion various column number // computation edge cases. // // All this testing *could* be written to not be done with |Reflect.parse| -- // backwards column computations happen even when compiling normal code, in some // cases. But it's much more the exception than the rule. And |Reflect.parse| // has *very* predictable column-computation operations (basically start/end // coordinates are computed immediately when the end of an AST node is reached) // that make it easier to recognize what the exact pattern of computations for // which offsets will look like. // Helper function for checking node location tersely. function checkLoc(node, expectedStart, expectedEnd) { let start = node.loc.start; assertEq(start.line, expectedStart[0], "start line number must be as expected"); assertEq(start.column, expectedStart[1], "start column number must be as expected"); let end = node.loc.end; assertEq(end.line, expectedEnd[0], "end line number must be as expected"); assertEq(end.column, expectedEnd[1], "end column number must be as expected"); } function lengthInCodePoints(str) { return [...str].length; } // True if column numbers are counts of code points, false otherwise. This // constant can be used to short-circuit testing that isn't point/unit-agnostic. const columnsAreCodePoints = (function() { var columnTypes = []; function checkColumn(actual, expectedPoints, expectedUnits) { if (actual === expectedPoints) columnTypes.push("p"); else if (actual === expectedUnits) columnTypes.push("u"); else columnTypes.push("x"); } var script = Reflect.parse('"😱😱😱😱";', { loc: true }); assertEq(script.type, "Program"); assertEq(script.loc.start.line, 1); assertEq(script.loc.end.line, 1); assertEq(script.loc.start.column, 0); checkColumn(script.loc.end.column, 7, 11); var body = script.body; assertEq(body.length, 1); var stmt = body[0]; assertEq(stmt.type, "ExpressionStatement"); assertEq(stmt.loc.start.line, 1); assertEq(stmt.loc.end.line, 1); assertEq(stmt.loc.start.column, 0); checkColumn(stmt.loc.end.column, 7, 11); var expr = stmt.expression; assertEq(expr.type, "Literal"); assertEq(expr.value, "😱😱😱😱"); assertEq(expr.loc.start.line, 1); assertEq(expr.loc.end.line, 1); assertEq(expr.loc.start.column, 0); checkColumn(expr.loc.end.column, 6, 10); var checkResult = columnTypes.join(","); assertEq(checkResult === "p,p,p" || checkResult === "u,u,u", true, "columns must be wholly code points or units: " + checkResult); return checkResult === "p,p,p"; })(); // Start with some basic sanity-testing, without regard to exactly when, how, or // in what order (offset => column) computations are performed. function testSimple() { if (!columnsAreCodePoints) return; // Array elements within the full |simpleCode| string constructed below are // one-element arrays containing the string "😱😱#x" where "#" is the // character that, in C++, could be written as |'(' + i| where |i| is the // index of the array within the outer array. let simpleCodeArray = [ 'var Q = [[', // column 0, offset 0 // REPEAT '"😱😱(x"],["', // column 10, offset 10 '😱😱)x"],["😱', // column 20, offset 22 '😱*x"],["😱😱', // column 30, offset 35 '+x"],["😱😱,', // column 40, offset 48 'x"],["😱😱-x', // column 50, offset 60 '"],["😱😱.x"', // column 60, offset 72 '],["😱😱/x"]', // column 70, offset 84 ',["😱😱0x"],', // column 80, offset 96 '["😱😱1x"],[', // column 90, offset 108 // REPEAT '"😱😱2x"],["', // column 100, offset 120 -- chunk limit between "] '😱😱3x"],["😱', // column 110, offset 132 '😱4x"],["😱😱', // column 120, offset 145 '5x"],["😱😱6', // column 130, offset 158 'x"],["😱😱7x', // column 140, offset 170 '"],["😱😱8x"', // column 150, offset 182 '],["😱😱9x"]', // column 160, offset 194 ',["😱😱:x"],', // column 170, offset 206 '["😱😱;x"],[', // column 180, offset 218 // REPEAT '"😱😱x"],["😱😱', // column 210, offset 255 -- chunk limit splits first 😱 '?x"],["😱😱@', // column 220, offset 268 'x"],["😱😱Ax', // column 230, offset 280 '"],["😱😱Bx"', // column 240, offset 292 '],["😱😱Cx"]', // column 250, offset 304 ',["😱😱Dx"],', // column 260, offset 316 '["😱😱Ex"],[', // column 270, offset 328 // REPEAT '"😱😱Fx"],["', // column 280, offset 340 '😱😱Gx"],["😱', // column 290, offset 352 '😱Hx"],["😱😱', // column 300, offset 365 'Ix"],["😱😱J', // column 310, offset 378 -- chunk limit between [" 'x"],["😱😱Kx', // column 320, offset 390 '"],["😱😱Lx"', // column 330, offset 402 '],["😱😱Mx"]', // column 340, offset 414 ',["😱😱Nx"],', // column 350, offset 426 '["😱😱Ox"]];', // column 360 (10 long), offset 438 (+12 to end) ]; let simpleCode = simpleCodeArray.join(""); // |simpleCode| overall contains this many code points. (This number is // chosen to be several |TokenStreamAnyChars::ColumnChunkLength = 128| chunks // long so that long-line handling is exercised, and the relevant vector // increased in length, for more than one chunk [which would be too short to // trigger chunking] and for more than two chunks [so that vector extension // will eventually occur].) const CodePointLength = 370; assertEq(lengthInCodePoints(simpleCode), CodePointLength, "code point count should be correct"); // |simpleCodeArray| contains this many REPEAT-delimited cycles. const RepetitionNumber = 4; // Each cycle consists of this many elements. const ElementsPerCycle = 9; // Each element in a cycle has at least this many 😱. const MinFaceScreamingPerElementInCycle = 2; // Each cycle consists of many elements with three 😱. const ElementsInCycleWithThreeFaceScreaming = 2; // Compute the overall number of UTF-16 code units. (UTF-16 because this is a // JS string as input.) const OverallCodeUnitCount = CodePointLength + RepetitionNumber * (ElementsPerCycle * MinFaceScreamingPerElementInCycle + ElementsInCycleWithThreeFaceScreaming); // Code units != code points. assertEq(OverallCodeUnitCount > CodePointLength, true, "string contains code points outside BMP, so length in units " + "exceeds length in points"); // The overall computed number of code units has this exact numeric value. assertEq(OverallCodeUnitCount, 450, "code unit count computation produces this value"); // The overall computed number of code units matches the string length. assertEq(simpleCode.length, OverallCodeUnitCount, "string length must match"); // Evaluate the string. var Q; eval(simpleCode); // Verify characteristics of the resulting execution. assertEq(Array.isArray(Q), true); const NumArrayElements = 40; assertEq(Q.length, NumArrayElements); Q.forEach((v, i) => { assertEq(Array.isArray(v), true); assertEq(v.length, 1); assertEq(v[0], "😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x"); }); let parseTree = Reflect.parse(simpleCode, { loc: true }); // Check the overall script. assertEq(parseTree.type, "Program"); checkLoc(parseTree, [1, 0], [1, 370]); assertEq(parseTree.body.length, 1); // Check the coordinates of the declaration. let varDecl = parseTree.body[0]; assertEq(varDecl.type, "VariableDeclaration"); checkLoc(varDecl, [1, 0], [1, 369]); // ...and its initializing expression. let varInit = varDecl.declarations[0].init; assertEq(varInit.type, "ArrayExpression"); checkLoc(varInit, [1, 8], [1, 369]); // ...and then every literal inside it. assertEq(varInit.elements.length, NumArrayElements, "array literal length"); const ItemLength = lengthInCodePoints('["😱😱#x"],'); assertEq(ItemLength, 9, "item length check"); for (let i = 0; i < NumArrayElements; i++) { let elem = varInit.elements[i]; assertEq(elem.type, "ArrayExpression"); let startCol = 9 + i * ItemLength; let endCol = startCol + ItemLength - 1; checkLoc(elem, [1, startCol], [1, endCol]); let arrayElems = elem.elements; assertEq(arrayElems.length, 1); let str = arrayElems[0]; assertEq(str.type, "Literal"); assertEq(str.value, "😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x"); checkLoc(str, [1, startCol + 1], [1, endCol - 1]); } } testSimple(); // Test |ChunkInfo::unitsType() == UnitsType::GuaranteedSingleUnit| -- not that // it should be observable, precisely, but effects of mis-applying or // miscomputing it would in principle be observable if such were happening. // This test also is intended to to be useful for (manually, in a debugger) // verifying that the optimization is computed and kicks in correctly. function testGuaranteedSingleUnit() { if (!columnsAreCodePoints) return; // Begin a few array literals in a first chunk to test column computation in // that first chunk. // // End some of them in the first chunk to test columns *before* we know we // have a long line. // // End one array *outside* the first chunk to test a computation inside a // first chunk *after* we know we have a long line and have computed a first // chunk. let mixedChunksCode = "var Z = [ [ [],"; // column 0, offset 0 assertEq(mixedChunksCode.length, 15); assertEq(lengthInCodePoints(mixedChunksCode), 15); mixedChunksCode += " ".repeat(128 - mixedChunksCode.length); // column 15, offset 15 assertEq(mixedChunksCode.length, 128); assertEq(lengthInCodePoints(mixedChunksCode), 128); // Fill out a second chunk as also single-unit, with an outer array literal // that begins in this chunk but finishes in the next (to test column // computation in a prior, guaranteed-single-unit chunk). mixedChunksCode += "[" + "[],".repeat(42) + " "; // column 128, offset 128 assertEq(mixedChunksCode.length, 256); assertEq(lengthInCodePoints(mixedChunksCode), 256); // Add a third chunk with one last empty nested array literal (so that we // tack on another chunk, and conclude the second chunk is single-unit, before // closing the enclosing array literal). Then close the enclosing array // literal. Finally start a new string literal element containing // multi-unit code points. For good measure, make the chunk *end* in the // middle of such a code point, so that the relevant chunk limit must be // retracted one code unit. mixedChunksCode += "[] ], '" + "😱".repeat(61); // column 256, offset 256 assertEq(mixedChunksCode.length, 384 + 1); assertEq(lengthInCodePoints(mixedChunksCode), 324); // Wrap things up. Terminate the string, then terminate the nested array // literal to trigger a column computation within the first chunk that can // benefit from knowing the first chunk is all single-unit. Next add a *new* // element to the outermost array, a string literal that contains a line // terminator. The terminator invalidates the column computation cache, so // when the outermost array is closed, location info for it will not hit the // cache. Finally, tack on the terminating semicolon for good measure. mixedChunksCode += "' ], '\u2028' ];"; // column 324, offset 385 assertEq(mixedChunksCode.length, 396); assertEq(lengthInCodePoints(mixedChunksCode), 335); let parseTree = Reflect.parse(mixedChunksCode, { loc: true }); // Check the overall script. assertEq(parseTree.type, "Program"); checkLoc(parseTree, [1, 0], [2, 4]); assertEq(parseTree.body.length, 1); // Check the coordinates of the declaration. let varDecl = parseTree.body[0]; assertEq(varDecl.type, "VariableDeclaration"); checkLoc(varDecl, [1, 0], [2, 3]); // ...and its initializing expression. let varInit = varDecl.declarations[0].init; assertEq(varInit.type, "ArrayExpression"); checkLoc(varInit, [1, 8], [2, 3]); let outerArrayElements = varInit.elements; assertEq(outerArrayElements.length, 2); { // Next the first element, the array inside the initializing expression. let nestedArray = varInit.elements[0]; assertEq(nestedArray.type, "ArrayExpression"); checkLoc(nestedArray, [1, 10], [1, 327]); // Now inside that nested array. let nestedArrayElements = nestedArray.elements; assertEq(nestedArrayElements.length, 3); // First the [] in chunk #0 let emptyArray = nestedArrayElements[0]; assertEq(emptyArray.type, "ArrayExpression"); assertEq(emptyArray.elements.length, 0); checkLoc(emptyArray, [1, 12], [1, 14]); // Then the big array of empty arrays starting in chunk #1 and ending just // barely in chunk #2. let bigArrayOfEmpties = nestedArrayElements[1]; assertEq(bigArrayOfEmpties.type, "ArrayExpression"); assertEq(bigArrayOfEmpties.elements.length, 42 + 1); bigArrayOfEmpties.elements.forEach((elem, i) => { assertEq(elem.type, "ArrayExpression"); assertEq(elem.elements.length, 0); if (i !== 42) checkLoc(elem, [1, 129 + i * 3], [1, 131 + i * 3]); else checkLoc(elem, [1, 256], [1, 258]); // last element was hand-placed }); // Then the string literal of multi-unit code points beginning in chunk #2 // and ending just into chunk #3 on a second line. let multiUnitStringLiteral = nestedArrayElements[2]; assertEq(multiUnitStringLiteral.type, "Literal"); assertEq(multiUnitStringLiteral.value, "😱".repeat(61)); checkLoc(multiUnitStringLiteral, [1, 262], [1, 325]); } { // Finally, the string literal containing a line terminator as element in // the outermost array. let stringLiteralWithEmbeddedTerminator = outerArrayElements[1]; assertEq(stringLiteralWithEmbeddedTerminator.type, "Literal"); assertEq(stringLiteralWithEmbeddedTerminator.value, "\u2028"); checkLoc(stringLiteralWithEmbeddedTerminator, [1, 329], [2, 1]); } } testGuaranteedSingleUnit(); if (typeof reportCompare === "function") reportCompare(true, true); print("Testing completed");