js/src/tests/non262/Unicode/uc-005.js


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239

/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/*
 *
 * Date:    15 July 2002
 * SUMMARY: Testing identifiers with double-byte names
 * See http://bugzilla.mozilla.org/show_bug.cgi?id=58274
 *
 * Here is a sample of the problem:
 *
 *    js> function f\u02B1 () {}
 *
 *    js> f\u02B1.toSource();
 *    function f¦() {}
 *
 *    js> f\u02B1.toSource().toSource();
 *    (new String("function f\xB1() {}"))
 *
 *
 * See how the high-byte information (the 02) has been lost?
 * The same thing was happening with the toString() method:
 *
 *    js> f\u02B1.toString();
 *
 *    function f¦() {
 *    }
 *
 *    js> f\u02B1.toString().toSource();
 *    (new String("\nfunction f\xB1() {\n}\n"))
 *
 */
//-----------------------------------------------------------------------------
var UBound = 0;
var BUGNUMBER = 58274;
var summary = 'Testing identifiers with double-byte names';
var status = '';
var statusitems = [];
var actual = '';
var actualvalues = [];
var expect= '';
var expectedvalues = [];


/*
 * Define a function that uses double-byte identifiers in
 * "every possible way"
 *
 * Then recover each double-byte identifier via f.toString().
 * To make this easier, put a 'Z' token before every one.
 *
 * Our eval string will be:
 *
 * sEval = "function Z\u02b1(Z\u02b2, b) {
 *          try { Z\u02b3 : var Z\u02b4 = Z\u02b1; }
 *          catch (Z\u02b5) { for (var Z\u02b6 in Z\u02b5)
 *          {for (1; 1<0; Z\u02b7++) {new Array()[Z\u02b6] = 1;} };} }";
 *
 * It will be helpful to build this string in stages:
 */
var s0 =  'function Z';
var s1 =  '\u02b1(Z';
var s2 =  '\u02b2, b) {try { Z';
var s3 =  '\u02b3 : var Z';
var s4 =  '\u02b4 = Z';
var s5 =  '\u02b1; } catch (Z'
  var s6 =  '\u02b5) { for (var Z';
var s7 =  '\u02b6 in Z';
var s8 =  '\u02b5){for (1; 1<0; Z';
var s9 =  '\u02b7++) {new Array()[Z';
var s10 = '\u02b6] = 1;} };} }';


/*
 * Concatenate these and eval() to create the function Z\u02b1
 */
var sEval = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10;
eval(sEval);


/*
 * Recover all the double-byte identifiers via Z\u02b1.toString().
 * We'll recover the 1st one as arrID[1], the 2nd one as arrID[2],
 * and so on ...
 */
var arrID = getIdentifiers(Z\u02b1);


/*
 * Now check that we got back what we put in -
 */
status = inSection(1);
actual = arrID[1];
expect = s1.charAt(0);
addThis();

status = inSection(2);
actual = arrID[2];
expect = s2.charAt(0);
addThis();

status = inSection(3);
actual = arrID[3];
expect = s3.charAt(0);
addThis();

status = inSection(4);
actual = arrID[4];
expect = s4.charAt(0);
addThis();

status = inSection(5);
actual = arrID[5];
expect = s5.charAt(0);
addThis();

status = inSection(6);
actual = arrID[6];
expect = s6.charAt(0);
addThis();

status = inSection(7);
actual = arrID[7];
expect = s7.charAt(0);
addThis();

status = inSection(8);
actual = arrID[8];
expect = s8.charAt(0);
addThis();

status = inSection(9);
actual = arrID[9];
expect = s9.charAt(0);
addThis();

status = inSection(10);
actual = arrID[10];
expect = s10.charAt(0);
addThis();


//-----------------------------------------------------------------------------
test();
//-----------------------------------------------------------------------------


/*
 * Goal: recover the double-byte identifiers from f.toString()
 * by getting the very next character after each 'Z' token.
 *
 * The return value will be an array |arr| indexed such that
 * |arr[1]| is the 1st identifier, |arr[2]| the 2nd, and so on.
 *
 * Note, however, f.toString() is implementation-independent.
 * For example, it may begin with '\nfunction' instead of 'function'.
 *
 * Rhino uses a Unicode representation for f.toString(); whereas
 * SpiderMonkey uses an ASCII representation, putting escape sequences
 * for non-ASCII characters. For example, if a function is called f\u02B1,
 * then in Rhino the toString() method will present a 2-character Unicode
 * string for its name, whereas SpiderMonkey will present a 7-character
 * ASCII string for its name: the string literal 'f\u02B1'.
 *
 * So we force the lexer to condense the string before we use it.
 * This will give uniform results in Rhino and SpiderMonkey.
 */
function getIdentifiers(f)
{
  var str = condenseStr(f.toString());
  var arr = str.split('Z');

  /*
   * The identifiers are the 1st char of each split substring
   * EXCEPT the first one, which is just ('\n' +) 'function '.
   *
   * Thus note the 1st identifier will be stored in |arr[1]|,
   * the 2nd one in |arr[2]|, etc., making the indexing easy -
   */
  for (i in arr)
    arr[i] = arr[i].charAt(0);
  return arr;
}


/*
 * This function is the opposite of a functions like escape(), which take
 * Unicode characters and return escape sequences for them. Here, we force
 * the lexer to turn escape sequences back into single characters.
 *
 * Note we can't simply do |eval(str)|, since in practice |str| will be an
 * identifier somewhere in the program (e.g. a function name); thus |eval(str)|
 * would return the object that the identifier represents: not what we want.
 *
 * So we surround |str| lexicographically with quotes to force the lexer to
 * evaluate it as a string. Have to strip out any linefeeds first, however -
 */
function condenseStr(str)
{
  /*
   * You won't be able to do the next step if |str| has
   * any carriage returns or linefeeds in it. For example:
   *
   *  js> eval("'" + '\nHello' + "'");
   *  1: SyntaxError: unterminated string literal:
   *  1: '
   *  1: ^
   *
   * So replace them with the empty string -
   */
  str = str.replace(/[\r\n]/g, '')
    return eval("'" + str + "'")
    }


function addThis()
{
  statusitems[UBound] = status;
  actualvalues[UBound] = actual;
  expectedvalues[UBound] = expect;
  UBound++;
}


function test()
{
  printBugNumber(BUGNUMBER);
  printStatus(summary);

  for (var i=0; i<UBound; i++)
  {
    reportCompare(expectedvalues[i], actualvalues[i], statusitems[i]);
  }
}