1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
// TODO: port to Python
'use strict';
var assert = require('assert');
var decode = require('../decode');
function encodeBinary(str) {
var result = '';
str = str.replace(/\s+/g, '');
while (str.length) {
result = '%' + ('0' + parseInt(str.slice(-8), 2).toString(16)).slice(-2) + result;
str = str.slice(0, -8);
}
return result;
}
var samples = {
'00000000': true,
'01010101': true,
'01111111': true,
// invalid as 1st byte
'10000000': true,
'10111111': true,
// invalid sequences, 2nd byte should be >= 0x80
'11000111 01010101': false,
'11100011 01010101': false,
'11110001 01010101': false,
// invalid sequences, 2nd byte should be < 0xc0
'11000111 11000000': false,
'11100011 11000000': false,
'11110001 11000000': false,
// invalid 3rd byte
'11100011 10010101 01010101': false,
'11110001 10010101 01010101': false,
// invalid 4th byte
'11110001 10010101 10010101 01010101': false,
// valid sequences
'11000111 10101010': true,
'11100011 10101010 10101010': true,
'11110001 10101010 10101010 10101010': true,
// minimal chars with given length
'11000010 10000000': true,
'11100000 10100000 10000000': true,
// impossible sequences
'11000001 10111111': false,
'11100000 10011111 10111111': false,
'11000001 10000000': false,
'11100000 10010000 10000000': false,
// maximum chars with given length
'11011111 10111111': true,
'11101111 10111111 10111111': true,
'11110000 10010000 10000000 10000000': true,
'11110000 10010000 10001111 10001111': true,
'11110100 10001111 10110000 10000000': true,
'11110100 10001111 10111111 10111111': true,
// too low
'11110000 10001111 10111111 10111111': false,
// too high
'11110100 10010000 10000000 10000000': false,
'11110100 10011111 10111111 10111111': false,
// surrogate range
'11101101 10011111 10111111': true,
'11101101 10100000 10000000': false,
'11101101 10111111 10111111': false,
'11101110 10000000 10000000': true
};
describe('decode', function() {
it('should decode %xx', function() {
assert.equal(decode('x%20xx%20%2520'), 'x xx %20');
});
it('should not decode invalid sequences', function() {
assert.equal(decode('%2g%z1%%'), '%2g%z1%%');
});
it('should not decode reservedSet', function() {
assert.equal(decode('%20%25%20', '%'), ' %25 ');
assert.equal(decode('%20%25%20', ' '), '%20%%20');
assert.equal(decode('%20%25%20', ' %'), '%20%25%20');
});
describe('utf8', function() {
Object.keys(samples).forEach(function(k) {
it(k, function() {
var res1, res2,
er = null,
str = encodeBinary(k);
try {
res1 = decodeURIComponent(str);
} catch(e) {
er = e;
}
res2 = decode(str);
if (er) {
assert.notEqual(res2.indexOf('\ufffd'), -1);
} else {
assert.equal(res1, res2);
assert.equal(res2.indexOf('\ufffd'), -1);
}
});
});
});
});
|