1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
|
// |reftest| skip-if(!this.hasOwnProperty('Reflect')||!Reflect.parse) -- uses Reflect.parse(..., { loc: true}) to trigger the column-computing API
/*
* Any copyright is dedicated to the Public Domain.
* http://creativecommons.org/licenses/publicdomain/
*/
//-----------------------------------------------------------------------------
var BUGNUMBER = 1551916;
var summary =
"Optimize computing a column number as count of code points by caching " +
"column numbers (and whether each chunk might contain anything multi-unit) " +
"and counting forward from them";
print(BUGNUMBER + ": " + summary);
/**************
* BEGIN TEST *
**************/
// Various testing of column-number computations, with respect to counting as
// code points or units, for very long lines.
//
// This test should pass when column numbers are counts of code points (current
// behavior) or code units (past behavior). It also *should* pass for any valid
// |TokenStreamAnyChars::ColumnChunkLength| value (it must be at least 4 so that
// the maximum-length code point in UTF-8/16 will fit in a single chunk),
// because the value of that constant should be externally invisible save for
// perf effects. (As a result, recompiling and running this test with a variety
// of different values assigned to that constant is a good smoke-test of column
// number computation, that doesn't require having to closely inspect any
// column-related code.)
//
// However, this test is structured on the assumption that that constant has the
// value 128, in order to exercise in targeted fashion various column number
// computation edge cases.
//
// All this testing *could* be written to not be done with |Reflect.parse| --
// backwards column computations happen even when compiling normal code, in some
// cases. But it's much more the exception than the rule. And |Reflect.parse|
// has *very* predictable column-computation operations (basically start/end
// coordinates are computed immediately when the end of an AST node is reached)
// that make it easier to recognize what the exact pattern of computations for
// which offsets will look like.
// Helper function for checking node location tersely.
function checkLoc(node, expectedStart, expectedEnd)
{
let start = node.loc.start;
assertEq(start.line, expectedStart[0],
"start line number must be as expected");
assertEq(start.column, expectedStart[1],
"start column number must be as expected");
let end = node.loc.end;
assertEq(end.line, expectedEnd[0], "end line number must be as expected");
assertEq(end.column, expectedEnd[1],
"end column number must be as expected");
}
function lengthInCodePoints(str)
{
return [...str].length;
}
// True if column numbers are counts of code points, false otherwise. This
// constant can be used to short-circuit testing that isn't point/unit-agnostic.
const columnsAreCodePoints = (function()
{
var columnTypes = [];
function checkColumn(actual, expectedPoints, expectedUnits)
{
if (actual === expectedPoints)
columnTypes.push("p");
else if (actual === expectedUnits)
columnTypes.push("u");
else
columnTypes.push("x");
}
var script = Reflect.parse('"😱😱😱😱";', { loc: true });
assertEq(script.type, "Program");
assertEq(script.loc.start.line, 1);
assertEq(script.loc.end.line, 1);
assertEq(script.loc.start.column, 0);
checkColumn(script.loc.end.column, 7, 11);
var body = script.body;
assertEq(body.length, 1);
var stmt = body[0];
assertEq(stmt.type, "ExpressionStatement");
assertEq(stmt.loc.start.line, 1);
assertEq(stmt.loc.end.line, 1);
assertEq(stmt.loc.start.column, 0);
checkColumn(stmt.loc.end.column, 7, 11);
var expr = stmt.expression;
assertEq(expr.type, "Literal");
assertEq(expr.value, "😱😱😱😱");
assertEq(expr.loc.start.line, 1);
assertEq(expr.loc.end.line, 1);
assertEq(expr.loc.start.column, 0);
checkColumn(expr.loc.end.column, 6, 10);
var checkResult = columnTypes.join(",");
assertEq(checkResult === "p,p,p" || checkResult === "u,u,u", true,
"columns must be wholly code points or units: " + checkResult);
return checkResult === "p,p,p";
})();
// Start with some basic sanity-testing, without regard to exactly when, how, or
// in what order (offset => column) computations are performed.
function testSimple()
{
if (!columnsAreCodePoints)
return;
// Array elements within the full |simpleCode| string constructed below are
// one-element arrays containing the string "😱😱#x" where "#" is the
// character that, in C++, could be written as |'(' + i| where |i| is the
// index of the array within the outer array.
let simpleCodeArray =
[
'var Q = [[', // column 0, offset 0
// REPEAT
'"😱😱(x"],["', // column 10, offset 10
'😱😱)x"],["😱', // column 20, offset 22
'😱*x"],["😱😱', // column 30, offset 35
'+x"],["😱😱,', // column 40, offset 48
'x"],["😱😱-x', // column 50, offset 60
'"],["😱😱.x"', // column 60, offset 72
'],["😱😱/x"]', // column 70, offset 84
',["😱😱0x"],', // column 80, offset 96
'["😱😱1x"],[', // column 90, offset 108
// REPEAT
'"😱😱2x"],["', // column 100, offset 120 -- chunk limit between "]
'😱😱3x"],["😱', // column 110, offset 132
'😱4x"],["😱😱', // column 120, offset 145
'5x"],["😱😱6', // column 130, offset 158
'x"],["😱😱7x', // column 140, offset 170
'"],["😱😱8x"', // column 150, offset 182
'],["😱😱9x"]', // column 160, offset 194
',["😱😱:x"],', // column 170, offset 206
'["😱😱;x"],[', // column 180, offset 218
// REPEAT
'"😱😱<x"],["', // column 190, offset 230
'😱😱=x"],["😱', // column 200, offset 242
'😱>x"],["😱😱', // column 210, offset 255 -- chunk limit splits first 😱
'?x"],["😱😱@', // column 220, offset 268
'x"],["😱😱Ax', // column 230, offset 280
'"],["😱😱Bx"', // column 240, offset 292
'],["😱😱Cx"]', // column 250, offset 304
',["😱😱Dx"],', // column 260, offset 316
'["😱😱Ex"],[', // column 270, offset 328
// REPEAT
'"😱😱Fx"],["', // column 280, offset 340
'😱😱Gx"],["😱', // column 290, offset 352
'😱Hx"],["😱😱', // column 300, offset 365
'Ix"],["😱😱J', // column 310, offset 378 -- chunk limit between ["
'x"],["😱😱Kx', // column 320, offset 390
'"],["😱😱Lx"', // column 330, offset 402
'],["😱😱Mx"]', // column 340, offset 414
',["😱😱Nx"],', // column 350, offset 426
'["😱😱Ox"]];', // column 360 (10 long), offset 438 (+12 to end)
];
let simpleCode = simpleCodeArray.join("");
// |simpleCode| overall contains this many code points. (This number is
// chosen to be several |TokenStreamAnyChars::ColumnChunkLength = 128| chunks
// long so that long-line handling is exercised, and the relevant vector
// increased in length, for more than one chunk [which would be too short to
// trigger chunking] and for more than two chunks [so that vector extension
// will eventually occur].)
const CodePointLength = 370;
assertEq(lengthInCodePoints(simpleCode), CodePointLength,
"code point count should be correct");
// |simpleCodeArray| contains this many REPEAT-delimited cycles.
const RepetitionNumber = 4;
// Each cycle consists of this many elements.
const ElementsPerCycle = 9;
// Each element in a cycle has at least this many 😱.
const MinFaceScreamingPerElementInCycle = 2;
// Each cycle consists of many elements with three 😱.
const ElementsInCycleWithThreeFaceScreaming = 2;
// Compute the overall number of UTF-16 code units. (UTF-16 because this is a
// JS string as input.)
const OverallCodeUnitCount =
CodePointLength +
RepetitionNumber * (ElementsPerCycle * MinFaceScreamingPerElementInCycle +
ElementsInCycleWithThreeFaceScreaming);
// Code units != code points.
assertEq(OverallCodeUnitCount > CodePointLength, true,
"string contains code points outside BMP, so length in units " +
"exceeds length in points");
// The overall computed number of code units has this exact numeric value.
assertEq(OverallCodeUnitCount, 450,
"code unit count computation produces this value");
// The overall computed number of code units matches the string length.
assertEq(simpleCode.length, OverallCodeUnitCount, "string length must match");
// Evaluate the string.
var Q;
eval(simpleCode);
// Verify characteristics of the resulting execution.
assertEq(Array.isArray(Q), true);
const NumArrayElements = 40;
assertEq(Q.length, NumArrayElements);
Q.forEach((v, i) => {
assertEq(Array.isArray(v), true);
assertEq(v.length, 1);
assertEq(v[0], "😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x");
});
let parseTree = Reflect.parse(simpleCode, { loc: true });
// Check the overall script.
assertEq(parseTree.type, "Program");
checkLoc(parseTree, [1, 0], [1, 370]);
assertEq(parseTree.body.length, 1);
// Check the coordinates of the declaration.
let varDecl = parseTree.body[0];
assertEq(varDecl.type, "VariableDeclaration");
checkLoc(varDecl, [1, 0], [1, 369]);
// ...and its initializing expression.
let varInit = varDecl.declarations[0].init;
assertEq(varInit.type, "ArrayExpression");
checkLoc(varInit, [1, 8], [1, 369]);
// ...and then every literal inside it.
assertEq(varInit.elements.length, NumArrayElements, "array literal length");
const ItemLength = lengthInCodePoints('["😱😱#x"],');
assertEq(ItemLength, 9, "item length check");
for (let i = 0; i < NumArrayElements; i++)
{
let elem = varInit.elements[i];
assertEq(elem.type, "ArrayExpression");
let startCol = 9 + i * ItemLength;
let endCol = startCol + ItemLength - 1;
checkLoc(elem, [1, startCol], [1, endCol]);
let arrayElems = elem.elements;
assertEq(arrayElems.length, 1);
let str = arrayElems[0];
assertEq(str.type, "Literal");
assertEq(str.value,
"😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x");
checkLoc(str, [1, startCol + 1], [1, endCol - 1]);
}
}
testSimple();
// Test |ChunkInfo::unitsType() == UnitsType::GuaranteedSingleUnit| -- not that
// it should be observable, precisely, but effects of mis-applying or
// miscomputing it would in principle be observable if such were happening.
// This test also is intended to to be useful for (manually, in a debugger)
// verifying that the optimization is computed and kicks in correctly.
function testGuaranteedSingleUnit()
{
if (!columnsAreCodePoints)
return;
// Begin a few array literals in a first chunk to test column computation in
// that first chunk.
//
// End some of them in the first chunk to test columns *before* we know we
// have a long line.
//
// End one array *outside* the first chunk to test a computation inside a
// first chunk *after* we know we have a long line and have computed a first
// chunk.
let mixedChunksCode = "var Z = [ [ [],"; // column 0, offset 0
assertEq(mixedChunksCode.length, 15);
assertEq(lengthInCodePoints(mixedChunksCode), 15);
mixedChunksCode +=
" ".repeat(128 - mixedChunksCode.length); // column 15, offset 15
assertEq(mixedChunksCode.length, 128);
assertEq(lengthInCodePoints(mixedChunksCode), 128);
// Fill out a second chunk as also single-unit, with an outer array literal
// that begins in this chunk but finishes in the next (to test column
// computation in a prior, guaranteed-single-unit chunk).
mixedChunksCode += "[" + "[],".repeat(42) + " "; // column 128, offset 128
assertEq(mixedChunksCode.length, 256);
assertEq(lengthInCodePoints(mixedChunksCode), 256);
// Add a third chunk with one last empty nested array literal (so that we
// tack on another chunk, and conclude the second chunk is single-unit, before
// closing the enclosing array literal). Then close the enclosing array
// literal. Finally start a new string literal element containing
// multi-unit code points. For good measure, make the chunk *end* in the
// middle of such a code point, so that the relevant chunk limit must be
// retracted one code unit.
mixedChunksCode += "[] ], '" + "😱".repeat(61); // column 256, offset 256
assertEq(mixedChunksCode.length, 384 + 1);
assertEq(lengthInCodePoints(mixedChunksCode), 324);
// Wrap things up. Terminate the string, then terminate the nested array
// literal to trigger a column computation within the first chunk that can
// benefit from knowing the first chunk is all single-unit. Next add a *new*
// element to the outermost array, a string literal that contains a line
// terminator. The terminator invalidates the column computation cache, so
// when the outermost array is closed, location info for it will not hit the
// cache. Finally, tack on the terminating semicolon for good measure.
mixedChunksCode += "' ], '\u2028' ];"; // column 324, offset 385
assertEq(mixedChunksCode.length, 396);
assertEq(lengthInCodePoints(mixedChunksCode), 335);
let parseTree = Reflect.parse(mixedChunksCode, { loc: true });
// Check the overall script.
assertEq(parseTree.type, "Program");
checkLoc(parseTree, [1, 0], [2, 4]);
assertEq(parseTree.body.length, 1);
// Check the coordinates of the declaration.
let varDecl = parseTree.body[0];
assertEq(varDecl.type, "VariableDeclaration");
checkLoc(varDecl, [1, 0], [2, 3]);
// ...and its initializing expression.
let varInit = varDecl.declarations[0].init;
assertEq(varInit.type, "ArrayExpression");
checkLoc(varInit, [1, 8], [2, 3]);
let outerArrayElements = varInit.elements;
assertEq(outerArrayElements.length, 2);
{
// Next the first element, the array inside the initializing expression.
let nestedArray = varInit.elements[0];
assertEq(nestedArray.type, "ArrayExpression");
checkLoc(nestedArray, [1, 10], [1, 327]);
// Now inside that nested array.
let nestedArrayElements = nestedArray.elements;
assertEq(nestedArrayElements.length, 3);
// First the [] in chunk #0
let emptyArray = nestedArrayElements[0];
assertEq(emptyArray.type, "ArrayExpression");
assertEq(emptyArray.elements.length, 0);
checkLoc(emptyArray, [1, 12], [1, 14]);
// Then the big array of empty arrays starting in chunk #1 and ending just
// barely in chunk #2.
let bigArrayOfEmpties = nestedArrayElements[1];
assertEq(bigArrayOfEmpties.type, "ArrayExpression");
assertEq(bigArrayOfEmpties.elements.length, 42 + 1);
bigArrayOfEmpties.elements.forEach((elem, i) => {
assertEq(elem.type, "ArrayExpression");
assertEq(elem.elements.length, 0);
if (i !== 42)
checkLoc(elem, [1, 129 + i * 3], [1, 131 + i * 3]);
else
checkLoc(elem, [1, 256], [1, 258]); // last element was hand-placed
});
// Then the string literal of multi-unit code points beginning in chunk #2
// and ending just into chunk #3 on a second line.
let multiUnitStringLiteral = nestedArrayElements[2];
assertEq(multiUnitStringLiteral.type, "Literal");
assertEq(multiUnitStringLiteral.value, "😱".repeat(61));
checkLoc(multiUnitStringLiteral, [1, 262], [1, 325]);
}
{
// Finally, the string literal containing a line terminator as element in
// the outermost array.
let stringLiteralWithEmbeddedTerminator = outerArrayElements[1];
assertEq(stringLiteralWithEmbeddedTerminator.type, "Literal");
assertEq(stringLiteralWithEmbeddedTerminator.value, "\u2028");
checkLoc(stringLiteralWithEmbeddedTerminator, [1, 329], [2, 1]);
}
}
testGuaranteedSingleUnit();
if (typeof reportCompare === "function")
reportCompare(true, true);
print("Testing completed");
|