summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_lexer/src/unescape/tests.rs
blob: fa61554afde6c5d16c87b462e0ca802b15a7e0a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
use super::*;

#[test]
fn test_unescape_char_bad() {
    fn check(literal_text: &str, expected_error: EscapeError) {
        let actual_result = unescape_char(literal_text).map_err(|(_offset, err)| err);
        assert_eq!(actual_result, Err(expected_error));
    }

    check("", EscapeError::ZeroChars);
    check(r"\", EscapeError::LoneSlash);

    check("\n", EscapeError::EscapeOnlyChar);
    check("\t", EscapeError::EscapeOnlyChar);
    check("'", EscapeError::EscapeOnlyChar);
    check("\r", EscapeError::BareCarriageReturn);

    check("spam", EscapeError::MoreThanOneChar);
    check(r"\x0ff", EscapeError::MoreThanOneChar);
    check(r#"\"a"#, EscapeError::MoreThanOneChar);
    check(r"\na", EscapeError::MoreThanOneChar);
    check(r"\ra", EscapeError::MoreThanOneChar);
    check(r"\ta", EscapeError::MoreThanOneChar);
    check(r"\\a", EscapeError::MoreThanOneChar);
    check(r"\'a", EscapeError::MoreThanOneChar);
    check(r"\0a", EscapeError::MoreThanOneChar);
    check(r"\u{0}x", EscapeError::MoreThanOneChar);
    check(r"\u{1F63b}}", EscapeError::MoreThanOneChar);

    check(r"\v", EscapeError::InvalidEscape);
    check(r"\💩", EscapeError::InvalidEscape);
    check(r"\●", EscapeError::InvalidEscape);
    check("\\\r", EscapeError::InvalidEscape);

    check(r"\x", EscapeError::TooShortHexEscape);
    check(r"\x0", EscapeError::TooShortHexEscape);
    check(r"\xf", EscapeError::TooShortHexEscape);
    check(r"\xa", EscapeError::TooShortHexEscape);
    check(r"\xx", EscapeError::InvalidCharInHexEscape);
    check(r"\xы", EscapeError::InvalidCharInHexEscape);
    check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
    check(r"\xtt", EscapeError::InvalidCharInHexEscape);
    check(r"\xff", EscapeError::OutOfRangeHexEscape);
    check(r"\xFF", EscapeError::OutOfRangeHexEscape);
    check(r"\x80", EscapeError::OutOfRangeHexEscape);

    check(r"\u", EscapeError::NoBraceInUnicodeEscape);
    check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
    check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
    check(r"\u{", EscapeError::UnclosedUnicodeEscape);
    check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
    check(r"\u{}", EscapeError::EmptyUnicodeEscape);
    check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
    check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
    check(r"\u{FFFFFF}", EscapeError::OutOfRangeUnicodeEscape);
    check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
    check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);

    check(r"\u{DC00}", EscapeError::LoneSurrogateUnicodeEscape);
    check(r"\u{DDDD}", EscapeError::LoneSurrogateUnicodeEscape);
    check(r"\u{DFFF}", EscapeError::LoneSurrogateUnicodeEscape);

    check(r"\u{D800}", EscapeError::LoneSurrogateUnicodeEscape);
    check(r"\u{DAAA}", EscapeError::LoneSurrogateUnicodeEscape);
    check(r"\u{DBFF}", EscapeError::LoneSurrogateUnicodeEscape);
}

#[test]
fn test_unescape_char_good() {
    fn check(literal_text: &str, expected_char: char) {
        let actual_result = unescape_char(literal_text);
        assert_eq!(actual_result, Ok(expected_char));
    }

    check("a", 'a');
    check("ы", 'ы');
    check("🦀", '🦀');

    check(r#"\""#, '"');
    check(r"\n", '\n');
    check(r"\r", '\r');
    check(r"\t", '\t');
    check(r"\\", '\\');
    check(r"\'", '\'');
    check(r"\0", '\0');

    check(r"\x00", '\0');
    check(r"\x5a", 'Z');
    check(r"\x5A", 'Z');
    check(r"\x7f", 127 as char);

    check(r"\u{0}", '\0');
    check(r"\u{000000}", '\0');
    check(r"\u{41}", 'A');
    check(r"\u{0041}", 'A');
    check(r"\u{00_41}", 'A');
    check(r"\u{4__1__}", 'A');
    check(r"\u{1F63b}", '😻');
}

#[test]
fn test_unescape_str_warn() {
    fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
        let mut unescaped = Vec::with_capacity(literal.len());
        unescape_literal(literal, Mode::Str, &mut |range, res| unescaped.push((range, res)));
        assert_eq!(unescaped, expected);
    }

    // Check we can handle escaped newlines at the end of a file.
    check("\\\n", &[]);
    check("\\\n ", &[]);

    check(
        "\\\n \u{a0} x",
        &[
            (0..5, Err(EscapeError::UnskippedWhitespaceWarning)),
            (3..5, Ok('\u{a0}')),
            (5..6, Ok(' ')),
            (6..7, Ok('x')),
        ],
    );
    check("\\\n  \n  x", &[(0..7, Err(EscapeError::MultipleSkippedLinesWarning)), (7..8, Ok('x'))]);
}

#[test]
fn test_unescape_str_good() {
    fn check(literal_text: &str, expected: &str) {
        let mut buf = Ok(String::with_capacity(literal_text.len()));
        unescape_literal(literal_text, Mode::Str, &mut |range, c| {
            if let Ok(b) = &mut buf {
                match c {
                    Ok(c) => b.push(c),
                    Err(e) => buf = Err((range, e)),
                }
            }
        });
        let buf = buf.as_ref().map(|it| it.as_ref());
        assert_eq!(buf, Ok(expected))
    }

    check("foo", "foo");
    check("", "");
    check(" \t\n", " \t\n");

    check("hello \\\n     world", "hello world");
    check("thread's", "thread's")
}

#[test]
fn test_unescape_byte_bad() {
    fn check(literal_text: &str, expected_error: EscapeError) {
        let actual_result = unescape_byte(literal_text).map_err(|(_offset, err)| err);
        assert_eq!(actual_result, Err(expected_error));
    }

    check("", EscapeError::ZeroChars);
    check(r"\", EscapeError::LoneSlash);

    check("\n", EscapeError::EscapeOnlyChar);
    check("\t", EscapeError::EscapeOnlyChar);
    check("'", EscapeError::EscapeOnlyChar);
    check("\r", EscapeError::BareCarriageReturn);

    check("spam", EscapeError::MoreThanOneChar);
    check(r"\x0ff", EscapeError::MoreThanOneChar);
    check(r#"\"a"#, EscapeError::MoreThanOneChar);
    check(r"\na", EscapeError::MoreThanOneChar);
    check(r"\ra", EscapeError::MoreThanOneChar);
    check(r"\ta", EscapeError::MoreThanOneChar);
    check(r"\\a", EscapeError::MoreThanOneChar);
    check(r"\'a", EscapeError::MoreThanOneChar);
    check(r"\0a", EscapeError::MoreThanOneChar);

    check(r"\v", EscapeError::InvalidEscape);
    check(r"\💩", EscapeError::InvalidEscape);
    check(r"\●", EscapeError::InvalidEscape);

    check(r"\x", EscapeError::TooShortHexEscape);
    check(r"\x0", EscapeError::TooShortHexEscape);
    check(r"\xa", EscapeError::TooShortHexEscape);
    check(r"\xf", EscapeError::TooShortHexEscape);
    check(r"\xx", EscapeError::InvalidCharInHexEscape);
    check(r"\xы", EscapeError::InvalidCharInHexEscape);
    check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
    check(r"\xtt", EscapeError::InvalidCharInHexEscape);

    check(r"\u", EscapeError::NoBraceInUnicodeEscape);
    check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
    check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
    check(r"\u{", EscapeError::UnclosedUnicodeEscape);
    check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
    check(r"\u{}", EscapeError::EmptyUnicodeEscape);
    check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
    check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);

    check("ы", EscapeError::NonAsciiCharInByte);
    check("🦀", EscapeError::NonAsciiCharInByte);

    check(r"\u{0}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{000000}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{41}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{0041}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{00_41}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{4__1__}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{1F63b}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{0}x", EscapeError::UnicodeEscapeInByte);
    check(r"\u{1F63b}}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{FFFFFF}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{DC00}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{DDDD}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{DFFF}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{D800}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{DAAA}", EscapeError::UnicodeEscapeInByte);
    check(r"\u{DBFF}", EscapeError::UnicodeEscapeInByte);
}

#[test]
fn test_unescape_byte_good() {
    fn check(literal_text: &str, expected_byte: u8) {
        let actual_result = unescape_byte(literal_text);
        assert_eq!(actual_result, Ok(expected_byte));
    }

    check("a", b'a');

    check(r#"\""#, b'"');
    check(r"\n", b'\n');
    check(r"\r", b'\r');
    check(r"\t", b'\t');
    check(r"\\", b'\\');
    check(r"\'", b'\'');
    check(r"\0", b'\0');

    check(r"\x00", b'\0');
    check(r"\x5a", b'Z');
    check(r"\x5A", b'Z');
    check(r"\x7f", 127);
    check(r"\x80", 128);
    check(r"\xff", 255);
    check(r"\xFF", 255);
}

#[test]
fn test_unescape_byte_str_good() {
    fn check(literal_text: &str, expected: &[u8]) {
        let mut buf = Ok(Vec::with_capacity(literal_text.len()));
        unescape_byte_literal(literal_text, Mode::ByteStr, &mut |range, c| {
            if let Ok(b) = &mut buf {
                match c {
                    Ok(c) => b.push(c),
                    Err(e) => buf = Err((range, e)),
                }
            }
        });
        let buf = buf.as_ref().map(|it| it.as_ref());
        assert_eq!(buf, Ok(expected))
    }

    check("foo", b"foo");
    check("", b"");
    check(" \t\n", b" \t\n");

    check("hello \\\n     world", b"hello world");
    check("thread's", b"thread's")
}

#[test]
fn test_unescape_raw_str() {
    fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
        let mut unescaped = Vec::with_capacity(literal.len());
        unescape_literal(literal, Mode::RawStr, &mut |range, res| unescaped.push((range, res)));
        assert_eq!(unescaped, expected);
    }

    check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
    check("\rx", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString)), (1..2, Ok('x'))]);
}

#[test]
fn test_unescape_raw_byte_str() {
    fn check(literal: &str, expected: &[(Range<usize>, Result<u8, EscapeError>)]) {
        let mut unescaped = Vec::with_capacity(literal.len());
        unescape_byte_literal(literal, Mode::RawByteStr, &mut |range, res| {
            unescaped.push((range, res))
        });
        assert_eq!(unescaped, expected);
    }

    check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
    check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByteString))]);
    check(
        "🦀a",
        &[(0..4, Err(EscapeError::NonAsciiCharInByteString)), (4..5, Ok(byte_from_char('a')))],
    );
}