summaryrefslogtreecommitdiffstats
path: root/tests/ui/lexer
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:19:13 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:19:13 +0000
commit218caa410aa38c29984be31a5229b9fa717560ee (patch)
treec54bd55eeb6e4c508940a30e94c0032fbd45d677 /tests/ui/lexer
parentReleasing progress-linux version 1.67.1+dfsg1-1~progress7.99u1. (diff)
downloadrustc-218caa410aa38c29984be31a5229b9fa717560ee.tar.xz
rustc-218caa410aa38c29984be31a5229b9fa717560ee.zip
Merging upstream version 1.68.2+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests/ui/lexer')
-rw-r--r--tests/ui/lexer/error-stage.rs80
-rw-r--r--tests/ui/lexer/error-stage.stderr56
-rw-r--r--tests/ui/lexer/lex-bad-binary-literal.rs11
-rw-r--r--tests/ui/lexer/lex-bad-binary-literal.stderr56
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-1.rs17
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-1.stderr38
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-2.rs6
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-2.stderr13
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-3.rs7
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-3.stderr24
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-4.rs5
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-4.stderr9
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-5.rs7
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-5.stderr24
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-6.rs15
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-6.stderr44
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-7.rs13
-rw-r--r--tests/ui/lexer/lex-bad-char-literals-7.stderr21
-rw-r--r--tests/ui/lexer/lex-bad-numeric-literals.rs35
-rw-r--r--tests/ui/lexer/lex-bad-numeric-literals.stderr169
-rw-r--r--tests/ui/lexer/lex-bad-octal-literal.rs4
-rw-r--r--tests/ui/lexer/lex-bad-octal-literal.stderr14
-rw-r--r--tests/ui/lexer/lex-bad-token.rs3
-rw-r--r--tests/ui/lexer/lex-bad-token.stderr8
-rw-r--r--tests/ui/lexer/lex-bare-cr-nondoc-comment.rs9
-rw-r--r--tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.rs26
-rw-r--r--tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr46
-rw-r--r--tests/ui/lexer/lex-emoji-identifiers.rs17
-rw-r--r--tests/ui/lexer/lex-emoji-identifiers.stderr52
-rw-r--r--tests/ui/lexer/lex-stray-backslash.rs3
-rw-r--r--tests/ui/lexer/lex-stray-backslash.stderr8
-rw-r--r--tests/ui/lexer/lexer-crlf-line-endings-string-literal-doc-comment.rs38
32 files changed, 878 insertions, 0 deletions
diff --git a/tests/ui/lexer/error-stage.rs b/tests/ui/lexer/error-stage.rs
new file mode 100644
index 000000000..c8d88f745
--- /dev/null
+++ b/tests/ui/lexer/error-stage.rs
@@ -0,0 +1,80 @@
+// This test is about the treatment of invalid literals. In particular, some
+// literals are only considered invalid if they survive to HIR lowering.
+//
+// Literals with bad suffixes
+// --------------------------
+// Literals consist of a primary part and an optional suffix.
+// https://doc.rust-lang.org/reference/tokens.html#suffixes says:
+//
+// Any kind of literal (string, integer, etc) with any suffix is valid as a
+// token, and can be passed to a macro without producing an error. The macro
+// itself will decide how to interpret such a token and whether to produce an
+// error or not.
+//
+// ```
+// macro_rules! blackhole { ($tt:tt) => () }
+// blackhole!("string"suffix); // OK
+// ```
+//
+// However, suffixes on literal tokens parsed as Rust code are restricted.
+// Any suffixes are rejected on non-numeric literal tokens, and numeric
+// literal tokens are accepted only with suffixes from the list below.
+//
+// Integer: u8, i8, u16, i16, u32, i32, u64, i64, u128, i128, usize, isize
+// Floating-point: f32, f64
+//
+// This means that something like `"string"any_suffix` is a token accepted by
+// the lexer, but rejected later for being an invalid combination of primary
+// part and suffix.
+//
+// `0b10f32` is a similar case. `0b10` is a valid primary part that is a valid
+// *integer* literal when no suffix is present. It only causes an error later
+// when combined with the `f32` float suffix.
+//
+// However, `0b10.0f32` is different. It is rejected by the lexer because
+// `0b10.0` is not a valid token even on its own.
+//
+// This difference is unfortunate, but it's baked into the language now.
+//
+// Too-large integer literals
+// --------------------------
+// https://doc.rust-lang.org/reference/tokens.html#integer-literals says that
+// literals like `128_i8` and `256_u8` "are too big for their type, but are
+// still valid tokens".
+
+macro_rules! sink {
+ ($($x:tt;)*) => {()}
+}
+
+// The invalid literals are ignored because the macro consumes them. Except for
+// `0b10.0f32` because it's a lexer error.
+const _: () = sink! {
+ "string"any_suffix; // OK
+ 10u123; // OK
+ 10.0f123; // OK
+ 0b10f32; // OK
+ 0b10.0f32; //~ ERROR binary float literal is not supported
+ 999340282366920938463463374607431768211455999; // OK
+};
+
+// The invalid literals used to cause errors, but this was changed by #102944.
+// Except for `0b010.0f32`, because it's a lexer error.
+#[cfg(FALSE)]
+fn configured_out() {
+ "string"any_suffix; // OK
+ 10u123; // OK
+ 10.0f123; // OK
+ 0b10f32; // OK
+ 0b10.0f32; //~ ERROR binary float literal is not supported
+ 999340282366920938463463374607431768211455999; // OK
+}
+
+// All the invalid literals cause errors.
+fn main() {
+ "string"any_suffix; //~ ERROR suffixes on string literals are invalid
+ 10u123; //~ ERROR invalid width `123` for integer literal
+ 10.0f123; //~ ERROR invalid width `123` for float literal
+ 0b10f32; //~ ERROR binary float literal is not supported
+ 0b10.0f32; //~ ERROR binary float literal is not supported
+ 999340282366920938463463374607431768211455999; //~ ERROR integer literal is too large
+}
diff --git a/tests/ui/lexer/error-stage.stderr b/tests/ui/lexer/error-stage.stderr
new file mode 100644
index 000000000..ecbdb14dc
--- /dev/null
+++ b/tests/ui/lexer/error-stage.stderr
@@ -0,0 +1,56 @@
+error: binary float literal is not supported
+ --> $DIR/error-stage.rs:56:5
+ |
+LL | 0b10.0f32;
+ | ^^^^^^
+
+error: binary float literal is not supported
+ --> $DIR/error-stage.rs:68:5
+ |
+LL | 0b10.0f32;
+ | ^^^^^^
+
+error: binary float literal is not supported
+ --> $DIR/error-stage.rs:78:5
+ |
+LL | 0b10.0f32;
+ | ^^^^^^
+
+error: suffixes on string literals are invalid
+ --> $DIR/error-stage.rs:74:5
+ |
+LL | "string"any_suffix;
+ | ^^^^^^^^^^^^^^^^^^ invalid suffix `any_suffix`
+
+error: invalid width `123` for integer literal
+ --> $DIR/error-stage.rs:75:5
+ |
+LL | 10u123;
+ | ^^^^^^
+ |
+ = help: valid widths are 8, 16, 32, 64 and 128
+
+error: invalid width `123` for float literal
+ --> $DIR/error-stage.rs:76:5
+ |
+LL | 10.0f123;
+ | ^^^^^^^^
+ |
+ = help: valid widths are 32 and 64
+
+error: binary float literal is not supported
+ --> $DIR/error-stage.rs:77:5
+ |
+LL | 0b10f32;
+ | ^^^^^^^ not supported
+
+error: integer literal is too large
+ --> $DIR/error-stage.rs:79:5
+ |
+LL | 999340282366920938463463374607431768211455999;
+ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ |
+ = note: value exceeds limit of `340282366920938463463374607431768211455`
+
+error: aborting due to 8 previous errors
+
diff --git a/tests/ui/lexer/lex-bad-binary-literal.rs b/tests/ui/lexer/lex-bad-binary-literal.rs
new file mode 100644
index 000000000..7df98073e
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-binary-literal.rs
@@ -0,0 +1,11 @@
+fn main() {
+ 0b121; //~ ERROR invalid digit for a base 2 literal
+ 0b10_10301; //~ ERROR invalid digit for a base 2 literal
+ 0b30; //~ ERROR invalid digit for a base 2 literal
+ 0b41; //~ ERROR invalid digit for a base 2 literal
+ 0b5; //~ ERROR invalid digit for a base 2 literal
+ 0b6; //~ ERROR invalid digit for a base 2 literal
+ 0b7; //~ ERROR invalid digit for a base 2 literal
+ 0b8; //~ ERROR invalid digit for a base 2 literal
+ 0b9; //~ ERROR invalid digit for a base 2 literal
+}
diff --git a/tests/ui/lexer/lex-bad-binary-literal.stderr b/tests/ui/lexer/lex-bad-binary-literal.stderr
new file mode 100644
index 000000000..992b3d248
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-binary-literal.stderr
@@ -0,0 +1,56 @@
+error: invalid digit for a base 2 literal
+ --> $DIR/lex-bad-binary-literal.rs:2:8
+ |
+LL | 0b121;
+ | ^
+
+error: invalid digit for a base 2 literal
+ --> $DIR/lex-bad-binary-literal.rs:3:12
+ |
+LL | 0b10_10301;
+ | ^
+
+error: invalid digit for a base 2 literal
+ --> $DIR/lex-bad-binary-literal.rs:4:7
+ |
+LL | 0b30;
+ | ^
+
+error: invalid digit for a base 2 literal
+ --> $DIR/lex-bad-binary-literal.rs:5:7
+ |
+LL | 0b41;
+ | ^
+
+error: invalid digit for a base 2 literal
+ --> $DIR/lex-bad-binary-literal.rs:6:7
+ |
+LL | 0b5;
+ | ^
+
+error: invalid digit for a base 2 literal
+ --> $DIR/lex-bad-binary-literal.rs:7:7
+ |
+LL | 0b6;
+ | ^
+
+error: invalid digit for a base 2 literal
+ --> $DIR/lex-bad-binary-literal.rs:8:7
+ |
+LL | 0b7;
+ | ^
+
+error: invalid digit for a base 2 literal
+ --> $DIR/lex-bad-binary-literal.rs:9:7
+ |
+LL | 0b8;
+ | ^
+
+error: invalid digit for a base 2 literal
+ --> $DIR/lex-bad-binary-literal.rs:10:7
+ |
+LL | 0b9;
+ | ^
+
+error: aborting due to 9 previous errors
+
diff --git a/tests/ui/lexer/lex-bad-char-literals-1.rs b/tests/ui/lexer/lex-bad-char-literals-1.rs
new file mode 100644
index 000000000..e7951cfd2
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-1.rs
@@ -0,0 +1,17 @@
+static c3: char =
+ '\x1' //~ ERROR: numeric character escape is too short
+;
+
+static s3: &'static str =
+ "\x1" //~ ERROR: numeric character escape is too short
+;
+
+static c: char =
+ '\●' //~ ERROR: unknown character escape
+;
+
+static s: &'static str =
+ "\●" //~ ERROR: unknown character escape
+;
+
+fn main() {}
diff --git a/tests/ui/lexer/lex-bad-char-literals-1.stderr b/tests/ui/lexer/lex-bad-char-literals-1.stderr
new file mode 100644
index 000000000..e6ff1f662
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-1.stderr
@@ -0,0 +1,38 @@
+error: numeric character escape is too short
+ --> $DIR/lex-bad-char-literals-1.rs:2:6
+ |
+LL | '\x1'
+ | ^^^
+
+error: numeric character escape is too short
+ --> $DIR/lex-bad-char-literals-1.rs:6:6
+ |
+LL | "\x1"
+ | ^^^
+
+error: unknown character escape: `\u{25cf}`
+ --> $DIR/lex-bad-char-literals-1.rs:10:7
+ |
+LL | '\●'
+ | ^ unknown character escape
+ |
+ = help: for more information, visit <https://static.rust-lang.org/doc/master/reference.html#literals>
+help: if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal
+ |
+LL | r"\●"
+ | ~~~~~
+
+error: unknown character escape: `\u{25cf}`
+ --> $DIR/lex-bad-char-literals-1.rs:14:7
+ |
+LL | "\●"
+ | ^ unknown character escape
+ |
+ = help: for more information, visit <https://static.rust-lang.org/doc/master/reference.html#literals>
+help: if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal
+ |
+LL | r"\●"
+ | ~~~~~
+
+error: aborting due to 4 previous errors
+
diff --git a/tests/ui/lexer/lex-bad-char-literals-2.rs b/tests/ui/lexer/lex-bad-char-literals-2.rs
new file mode 100644
index 000000000..d35dafd9a
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-2.rs
@@ -0,0 +1,6 @@
+// This test needs to the last one appearing in this file as it kills the parser
+static c: char =
+ 'nope' //~ ERROR: character literal may only contain one codepoint
+;
+
+fn main() {}
diff --git a/tests/ui/lexer/lex-bad-char-literals-2.stderr b/tests/ui/lexer/lex-bad-char-literals-2.stderr
new file mode 100644
index 000000000..c2b19a7ad
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-2.stderr
@@ -0,0 +1,13 @@
+error: character literal may only contain one codepoint
+ --> $DIR/lex-bad-char-literals-2.rs:3:5
+ |
+LL | 'nope'
+ | ^^^^^^
+ |
+help: if you meant to write a `str` literal, use double quotes
+ |
+LL | "nope"
+ | ~~~~~~
+
+error: aborting due to previous error
+
diff --git a/tests/ui/lexer/lex-bad-char-literals-3.rs b/tests/ui/lexer/lex-bad-char-literals-3.rs
new file mode 100644
index 000000000..5194ff4d9
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-3.rs
@@ -0,0 +1,7 @@
+static c: char = '●●';
+//~^ ERROR: character literal may only contain one codepoint
+
+fn main() {
+ let ch: &str = '●●';
+ //~^ ERROR: character literal may only contain one codepoint
+}
diff --git a/tests/ui/lexer/lex-bad-char-literals-3.stderr b/tests/ui/lexer/lex-bad-char-literals-3.stderr
new file mode 100644
index 000000000..62a5e424c
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-3.stderr
@@ -0,0 +1,24 @@
+error: character literal may only contain one codepoint
+ --> $DIR/lex-bad-char-literals-3.rs:1:18
+ |
+LL | static c: char = '●●';
+ | ^^^^
+ |
+help: if you meant to write a `str` literal, use double quotes
+ |
+LL | static c: char = "●●";
+ | ~~~~
+
+error: character literal may only contain one codepoint
+ --> $DIR/lex-bad-char-literals-3.rs:5:20
+ |
+LL | let ch: &str = '●●';
+ | ^^^^
+ |
+help: if you meant to write a `str` literal, use double quotes
+ |
+LL | let ch: &str = "●●";
+ | ~~~~
+
+error: aborting due to 2 previous errors
+
diff --git a/tests/ui/lexer/lex-bad-char-literals-4.rs b/tests/ui/lexer/lex-bad-char-literals-4.rs
new file mode 100644
index 000000000..de0a19df9
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-4.rs
@@ -0,0 +1,5 @@
+//
+// This test needs to the last one appearing in this file as it kills the parser
+static c: char =
+ '● //~ ERROR: unterminated character literal
+;
diff --git a/tests/ui/lexer/lex-bad-char-literals-4.stderr b/tests/ui/lexer/lex-bad-char-literals-4.stderr
new file mode 100644
index 000000000..fec4421c4
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-4.stderr
@@ -0,0 +1,9 @@
+error[E0762]: unterminated character literal
+ --> $DIR/lex-bad-char-literals-4.rs:4:5
+ |
+LL | '●
+ | ^^^^
+
+error: aborting due to previous error
+
+For more information about this error, try `rustc --explain E0762`.
diff --git a/tests/ui/lexer/lex-bad-char-literals-5.rs b/tests/ui/lexer/lex-bad-char-literals-5.rs
new file mode 100644
index 000000000..0c4339edc
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-5.rs
@@ -0,0 +1,7 @@
+static c: char = '\x10\x10';
+//~^ ERROR: character literal may only contain one codepoint
+
+fn main() {
+ let ch: &str = '\x10\x10';
+ //~^ ERROR: character literal may only contain one codepoint
+}
diff --git a/tests/ui/lexer/lex-bad-char-literals-5.stderr b/tests/ui/lexer/lex-bad-char-literals-5.stderr
new file mode 100644
index 000000000..184817a65
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-5.stderr
@@ -0,0 +1,24 @@
+error: character literal may only contain one codepoint
+ --> $DIR/lex-bad-char-literals-5.rs:1:18
+ |
+LL | static c: char = '\x10\x10';
+ | ^^^^^^^^^^
+ |
+help: if you meant to write a `str` literal, use double quotes
+ |
+LL | static c: char = "\x10\x10";
+ | ~~~~~~~~~~
+
+error: character literal may only contain one codepoint
+ --> $DIR/lex-bad-char-literals-5.rs:5:20
+ |
+LL | let ch: &str = '\x10\x10';
+ | ^^^^^^^^^^
+ |
+help: if you meant to write a `str` literal, use double quotes
+ |
+LL | let ch: &str = "\x10\x10";
+ | ~~~~~~~~~~
+
+error: aborting due to 2 previous errors
+
diff --git a/tests/ui/lexer/lex-bad-char-literals-6.rs b/tests/ui/lexer/lex-bad-char-literals-6.rs
new file mode 100644
index 000000000..1b498c0fb
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-6.rs
@@ -0,0 +1,15 @@
+fn main() {
+ let x: &str = 'ab';
+ //~^ ERROR: character literal may only contain one codepoint
+ let y: char = 'cd';
+ //~^ ERROR: character literal may only contain one codepoint
+ let z = 'ef';
+ //~^ ERROR: character literal may only contain one codepoint
+
+ if x == y {}
+ if y == z {} // no error here
+ if x == z {}
+
+ let a: usize = "";
+ //~^ ERROR: mismatched types
+}
diff --git a/tests/ui/lexer/lex-bad-char-literals-6.stderr b/tests/ui/lexer/lex-bad-char-literals-6.stderr
new file mode 100644
index 000000000..2fe30304a
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-6.stderr
@@ -0,0 +1,44 @@
+error: character literal may only contain one codepoint
+ --> $DIR/lex-bad-char-literals-6.rs:2:19
+ |
+LL | let x: &str = 'ab';
+ | ^^^^
+ |
+help: if you meant to write a `str` literal, use double quotes
+ |
+LL | let x: &str = "ab";
+ | ~~~~
+
+error: character literal may only contain one codepoint
+ --> $DIR/lex-bad-char-literals-6.rs:4:19
+ |
+LL | let y: char = 'cd';
+ | ^^^^
+ |
+help: if you meant to write a `str` literal, use double quotes
+ |
+LL | let y: char = "cd";
+ | ~~~~
+
+error: character literal may only contain one codepoint
+ --> $DIR/lex-bad-char-literals-6.rs:6:13
+ |
+LL | let z = 'ef';
+ | ^^^^
+ |
+help: if you meant to write a `str` literal, use double quotes
+ |
+LL | let z = "ef";
+ | ~~~~
+
+error[E0308]: mismatched types
+ --> $DIR/lex-bad-char-literals-6.rs:13:20
+ |
+LL | let a: usize = "";
+ | ----- ^^ expected `usize`, found `&str`
+ | |
+ | expected due to this
+
+error: aborting due to 4 previous errors
+
+For more information about this error, try `rustc --explain E0308`.
diff --git a/tests/ui/lexer/lex-bad-char-literals-7.rs b/tests/ui/lexer/lex-bad-char-literals-7.rs
new file mode 100644
index 000000000..c675df2f3
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-7.rs
@@ -0,0 +1,13 @@
+fn main() {
+ let _: char = '';
+ //~^ ERROR: empty character literal
+ let _: char = '\u{}';
+ //~^ ERROR: empty unicode escape
+
+ // Next two are OK, but may befool error recovery
+ let _ = '/';
+ let _ = b'/';
+
+ let _ = ' hello // here's a comment
+ //~^ ERROR: unterminated character literal
+}
diff --git a/tests/ui/lexer/lex-bad-char-literals-7.stderr b/tests/ui/lexer/lex-bad-char-literals-7.stderr
new file mode 100644
index 000000000..255b9c689
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-7.stderr
@@ -0,0 +1,21 @@
+error: empty character literal
+ --> $DIR/lex-bad-char-literals-7.rs:2:20
+ |
+LL | let _: char = '';
+ | ^ empty character literal
+
+error: empty unicode escape
+ --> $DIR/lex-bad-char-literals-7.rs:4:20
+ |
+LL | let _: char = '\u{}';
+ | ^^^^ this escape must have at least 1 hex digit
+
+error[E0762]: unterminated character literal
+ --> $DIR/lex-bad-char-literals-7.rs:11:13
+ |
+LL | let _ = ' hello // here's a comment
+ | ^^^^^^^^
+
+error: aborting due to 3 previous errors
+
+For more information about this error, try `rustc --explain E0762`.
diff --git a/tests/ui/lexer/lex-bad-numeric-literals.rs b/tests/ui/lexer/lex-bad-numeric-literals.rs
new file mode 100644
index 000000000..56bdc50e4
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-numeric-literals.rs
@@ -0,0 +1,35 @@
+// ignore-tidy-linelength
+
+fn main() {
+ 0o1.0; //~ ERROR: octal float literal is not supported
+ 0o2f32; //~ ERROR: octal float literal is not supported
+ 0o3.0f32; //~ ERROR: octal float literal is not supported
+ 0o4e4; //~ ERROR: octal float literal is not supported
+ 0o5.0e5; //~ ERROR: octal float literal is not supported
+ 0o6e6f32; //~ ERROR: octal float literal is not supported
+ 0o7.0e7f64; //~ ERROR: octal float literal is not supported
+ 0x8.0e+9; //~ ERROR: hexadecimal float literal is not supported
+ 0x9.0e-9; //~ ERROR: hexadecimal float literal is not supported
+ 0o; //~ ERROR: no valid digits
+ 1e+; //~ ERROR: expected at least one digit in exponent
+ 0x539.0; //~ ERROR: hexadecimal float literal is not supported
+ 9900000000000000000000000000999999999999999999999999999999;
+ //~^ ERROR: integer literal is too large
+ 9900000000000000000000000000999999999999999999999999999999;
+ //~^ ERROR: integer literal is too large
+ 0b111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111110;
+ //~^ ERROR: integer literal is too large
+ 0o37777777777777777777777777777777777777777770;
+ //~^ ERROR: integer literal is too large
+ 0xffffffffffffffffffffffffffffffff0;
+ //~^ ERROR: integer literal is too large
+ 0x; //~ ERROR: no valid digits
+ 0xu32; //~ ERROR: no valid digits
+ 0ou32; //~ ERROR: no valid digits
+ 0bu32; //~ ERROR: no valid digits
+ 0b; //~ ERROR: no valid digits
+ 0o123f64; //~ ERROR: octal float literal is not supported
+ 0o123.456; //~ ERROR: octal float literal is not supported
+ 0b101f64; //~ ERROR: binary float literal is not supported
+ 0b111.101; //~ ERROR: binary float literal is not supported
+}
diff --git a/tests/ui/lexer/lex-bad-numeric-literals.stderr b/tests/ui/lexer/lex-bad-numeric-literals.stderr
new file mode 100644
index 000000000..145754197
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-numeric-literals.stderr
@@ -0,0 +1,169 @@
+error: octal float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:4:5
+ |
+LL | 0o1.0;
+ | ^^^^^
+
+error: octal float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:6:5
+ |
+LL | 0o3.0f32;
+ | ^^^^^
+
+error: octal float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:7:5
+ |
+LL | 0o4e4;
+ | ^^^^^
+
+error: octal float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:8:5
+ |
+LL | 0o5.0e5;
+ | ^^^^^^^
+
+error: octal float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:9:5
+ |
+LL | 0o6e6f32;
+ | ^^^^^
+
+error: octal float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:10:5
+ |
+LL | 0o7.0e7f64;
+ | ^^^^^^^
+
+error: hexadecimal float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:11:5
+ |
+LL | 0x8.0e+9;
+ | ^^^^^^^^
+
+error: hexadecimal float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:12:5
+ |
+LL | 0x9.0e-9;
+ | ^^^^^^^^
+
+error[E0768]: no valid digits found for number
+ --> $DIR/lex-bad-numeric-literals.rs:13:5
+ |
+LL | 0o;
+ | ^^
+
+error: expected at least one digit in exponent
+ --> $DIR/lex-bad-numeric-literals.rs:14:5
+ |
+LL | 1e+;
+ | ^^^
+
+error: hexadecimal float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:15:5
+ |
+LL | 0x539.0;
+ | ^^^^^^^
+
+error[E0768]: no valid digits found for number
+ --> $DIR/lex-bad-numeric-literals.rs:26:5
+ |
+LL | 0x;
+ | ^^
+
+error[E0768]: no valid digits found for number
+ --> $DIR/lex-bad-numeric-literals.rs:27:5
+ |
+LL | 0xu32;
+ | ^^
+
+error[E0768]: no valid digits found for number
+ --> $DIR/lex-bad-numeric-literals.rs:28:5
+ |
+LL | 0ou32;
+ | ^^
+
+error[E0768]: no valid digits found for number
+ --> $DIR/lex-bad-numeric-literals.rs:29:5
+ |
+LL | 0bu32;
+ | ^^
+
+error[E0768]: no valid digits found for number
+ --> $DIR/lex-bad-numeric-literals.rs:30:5
+ |
+LL | 0b;
+ | ^^
+
+error: octal float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:32:5
+ |
+LL | 0o123.456;
+ | ^^^^^^^^^
+
+error: binary float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:34:5
+ |
+LL | 0b111.101;
+ | ^^^^^^^^^
+
+error: octal float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:5:5
+ |
+LL | 0o2f32;
+ | ^^^^^^ not supported
+
+error: integer literal is too large
+ --> $DIR/lex-bad-numeric-literals.rs:16:5
+ |
+LL | 9900000000000000000000000000999999999999999999999999999999;
+ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ |
+ = note: value exceeds limit of `340282366920938463463374607431768211455`
+
+error: integer literal is too large
+ --> $DIR/lex-bad-numeric-literals.rs:18:5
+ |
+LL | 9900000000000000000000000000999999999999999999999999999999;
+ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ |
+ = note: value exceeds limit of `340282366920938463463374607431768211455`
+
+error: integer literal is too large
+ --> $DIR/lex-bad-numeric-literals.rs:20:5
+ |
+LL | 0b111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111110;
+ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ |
+ = note: value exceeds limit of `0b11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111`
+
+error: integer literal is too large
+ --> $DIR/lex-bad-numeric-literals.rs:22:5
+ |
+LL | 0o37777777777777777777777777777777777777777770;
+ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ |
+ = note: value exceeds limit of `0o3777777777777777777777777777777777777777777`
+
+error: integer literal is too large
+ --> $DIR/lex-bad-numeric-literals.rs:24:5
+ |
+LL | 0xffffffffffffffffffffffffffffffff0;
+ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ |
+ = note: value exceeds limit of `0xffffffffffffffffffffffffffffffff`
+
+error: octal float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:31:5
+ |
+LL | 0o123f64;
+ | ^^^^^^^^ not supported
+
+error: binary float literal is not supported
+ --> $DIR/lex-bad-numeric-literals.rs:33:5
+ |
+LL | 0b101f64;
+ | ^^^^^^^^ not supported
+
+error: aborting due to 26 previous errors
+
+For more information about this error, try `rustc --explain E0768`.
diff --git a/tests/ui/lexer/lex-bad-octal-literal.rs b/tests/ui/lexer/lex-bad-octal-literal.rs
new file mode 100644
index 000000000..49631f16b
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-octal-literal.rs
@@ -0,0 +1,4 @@
+fn main() {
+ 0o18; //~ ERROR invalid digit for a base 8 literal
+ 0o1234_9_5670; //~ ERROR invalid digit for a base 8 literal
+}
diff --git a/tests/ui/lexer/lex-bad-octal-literal.stderr b/tests/ui/lexer/lex-bad-octal-literal.stderr
new file mode 100644
index 000000000..2cb8ca5de
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-octal-literal.stderr
@@ -0,0 +1,14 @@
+error: invalid digit for a base 8 literal
+ --> $DIR/lex-bad-octal-literal.rs:2:8
+ |
+LL | 0o18;
+ | ^
+
+error: invalid digit for a base 8 literal
+ --> $DIR/lex-bad-octal-literal.rs:3:12
+ |
+LL | 0o1234_9_5670;
+ | ^
+
+error: aborting due to 2 previous errors
+
diff --git a/tests/ui/lexer/lex-bad-token.rs b/tests/ui/lexer/lex-bad-token.rs
new file mode 100644
index 000000000..9e4824611
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-token.rs
@@ -0,0 +1,3 @@
+● //~ ERROR: unknown start of token
+
+fn main() {}
diff --git a/tests/ui/lexer/lex-bad-token.stderr b/tests/ui/lexer/lex-bad-token.stderr
new file mode 100644
index 000000000..43c43721b
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-token.stderr
@@ -0,0 +1,8 @@
+error: unknown start of token: \u{25cf}
+ --> $DIR/lex-bad-token.rs:1:1
+ |
+LL | ●
+ | ^
+
+error: aborting due to previous error
+
diff --git a/tests/ui/lexer/lex-bare-cr-nondoc-comment.rs b/tests/ui/lexer/lex-bare-cr-nondoc-comment.rs
new file mode 100644
index 000000000..5b528d6e1
--- /dev/null
+++ b/tests/ui/lexer/lex-bare-cr-nondoc-comment.rs
@@ -0,0 +1,9 @@
+// run-pass
+// ignore-tidy-cr
+
+// nondoc comment with bare CR: ' '
+//// nondoc comment with bare CR: ' '
+/* block nondoc comment with bare CR: ' ' */
+
+fn main() {
+}
diff --git a/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.rs b/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.rs
new file mode 100644
index 000000000..b7752e1f0
--- /dev/null
+++ b/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.rs
@@ -0,0 +1,26 @@
+// ignore-tidy-cr
+
+/// doc comment with bare CR: ' '
+pub fn foo() {}
+//~^^ ERROR: bare CR not allowed in doc-comment
+
+/** block doc comment with bare CR: ' ' */
+pub fn bar() {}
+//~^^ ERROR: bare CR not allowed in block doc-comment
+
+fn main() {
+ //! doc comment with bare CR: ' '
+ //~^ ERROR: bare CR not allowed in doc-comment
+
+ /*! block doc comment with bare CR: ' ' */
+ //~^ ERROR: bare CR not allowed in block doc-comment
+
+ // the following string literal has a bare CR in it
+ let _s = "foo bar"; //~ ERROR: bare CR not allowed in string
+
+ // the following string literal has a bare CR in it
+ let _s = r"bar foo"; //~ ERROR: bare CR not allowed in raw string
+
+ // the following string literal has a bare CR in it
+ let _s = "foo\ bar"; //~ ERROR: unknown character escape: `\r`
+}
diff --git a/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr b/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr
new file mode 100644
index 000000000..1a21fed63
--- /dev/null
+++ b/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr
@@ -0,0 +1,46 @@
+error: bare CR not allowed in doc-comment
+ --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:3:32
+ |
+LL | /// doc comment with bare CR: ' '
+ | ^
+
+error: bare CR not allowed in block doc-comment
+ --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:7:38
+ |
+LL | /** block doc comment with bare CR: ' ' */
+ | ^
+
+error: bare CR not allowed in doc-comment
+ --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:12:36
+ |
+LL | //! doc comment with bare CR: ' '
+ | ^
+
+error: bare CR not allowed in block doc-comment
+ --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:15:42
+ |
+LL | /*! block doc comment with bare CR: ' ' */
+ | ^
+
+error: bare CR not allowed in string, use `\r` instead
+ --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:19:18
+ |
+LL | let _s = "foo bar";
+ | ^ help: escape the character: `\r`
+
+error: bare CR not allowed in raw string
+ --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:22:19
+ |
+LL | let _s = r"bar foo";
+ | ^
+
+error: unknown character escape: `\r`
+ --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:25:19
+ |
+LL | let _s = "foo\ bar";
+ | ^ unknown character escape
+ |
+ = help: this is an isolated carriage return; consider checking your editor and version control settings
+
+error: aborting due to 7 previous errors
+
diff --git a/tests/ui/lexer/lex-emoji-identifiers.rs b/tests/ui/lexer/lex-emoji-identifiers.rs
new file mode 100644
index 000000000..91b5929c0
--- /dev/null
+++ b/tests/ui/lexer/lex-emoji-identifiers.rs
@@ -0,0 +1,17 @@
+fn invalid_emoji_usages() {
+ let arrow↔️ = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
+ // FIXME
+ let planet🪐 = "basic emoji"; //~ ERROR: unknown start of token
+ // FIXME
+ let wireless🛜 = "basic emoji"; //~ ERROR: unknown start of token
+ // FIXME
+ let key1️⃣ = "keycap sequence"; //~ ERROR: unknown start of token
+ //~^ WARN: identifier contains uncommon Unicode codepoints
+ let flag🇺🇳 = "flag sequence"; //~ ERROR: identifiers cannot contain emoji
+ let wales🏴 = "tag sequence"; //~ ERROR: identifiers cannot contain emoji
+ let folded🙏🏿 = "modifier sequence"; //~ ERROR: identifiers cannot contain emoji
+}
+
+fn main() {
+ invalid_emoji_usages();
+}
diff --git a/tests/ui/lexer/lex-emoji-identifiers.stderr b/tests/ui/lexer/lex-emoji-identifiers.stderr
new file mode 100644
index 000000000..6237c5d02
--- /dev/null
+++ b/tests/ui/lexer/lex-emoji-identifiers.stderr
@@ -0,0 +1,52 @@
+error: unknown start of token: \u{1fa90}
+ --> $DIR/lex-emoji-identifiers.rs:4:15
+ |
+LL | let planet🪐 = "basic emoji";
+ | ^^
+
+error: unknown start of token: \u{1f6dc}
+ --> $DIR/lex-emoji-identifiers.rs:6:17
+ |
+LL | let wireless🛜 = "basic emoji";
+ | ^^
+
+error: unknown start of token: \u{20e3}
+ --> $DIR/lex-emoji-identifiers.rs:8:14
+ |
+LL | let key1️⃣ = "keycap sequence";
+ | ^
+
+error: identifiers cannot contain emoji: `arrow↔️`
+ --> $DIR/lex-emoji-identifiers.rs:2:9
+ |
+LL | let arrow↔️ = "basic emoji";
+ | ^^^^^^
+
+error: identifiers cannot contain emoji: `flag🇺🇳`
+ --> $DIR/lex-emoji-identifiers.rs:10:9
+ |
+LL | let flag🇺🇳 = "flag sequence";
+ | ^^^^^^
+
+error: identifiers cannot contain emoji: `wales🏴`
+ --> $DIR/lex-emoji-identifiers.rs:11:9
+ |
+LL | let wales🏴 = "tag sequence";
+ | ^^^^^^^
+
+error: identifiers cannot contain emoji: `folded🙏🏿`
+ --> $DIR/lex-emoji-identifiers.rs:12:9
+ |
+LL | let folded🙏🏿 = "modifier sequence";
+ | ^^^^^^^^^^
+
+warning: identifier contains uncommon Unicode codepoints
+ --> $DIR/lex-emoji-identifiers.rs:8:9
+ |
+LL | let key1️⃣ = "keycap sequence";
+ | ^^^^
+ |
+ = note: `#[warn(uncommon_codepoints)]` on by default
+
+error: aborting due to 7 previous errors; 1 warning emitted
+
diff --git a/tests/ui/lexer/lex-stray-backslash.rs b/tests/ui/lexer/lex-stray-backslash.rs
new file mode 100644
index 000000000..bb27f44c2
--- /dev/null
+++ b/tests/ui/lexer/lex-stray-backslash.rs
@@ -0,0 +1,3 @@
+\ //~ ERROR: unknown start of token: \
+
+fn main() {}
diff --git a/tests/ui/lexer/lex-stray-backslash.stderr b/tests/ui/lexer/lex-stray-backslash.stderr
new file mode 100644
index 000000000..06dc0f2b5
--- /dev/null
+++ b/tests/ui/lexer/lex-stray-backslash.stderr
@@ -0,0 +1,8 @@
+error: unknown start of token: \
+ --> $DIR/lex-stray-backslash.rs:1:1
+ |
+LL | \
+ | ^
+
+error: aborting due to previous error
+
diff --git a/tests/ui/lexer/lexer-crlf-line-endings-string-literal-doc-comment.rs b/tests/ui/lexer/lexer-crlf-line-endings-string-literal-doc-comment.rs
new file mode 100644
index 000000000..802be7f5a
--- /dev/null
+++ b/tests/ui/lexer/lexer-crlf-line-endings-string-literal-doc-comment.rs
@@ -0,0 +1,38 @@
+// run-pass
+// ignore-tidy-cr
+// ignore-tidy-cr (repeated again because of tidy bug)
+// license is ignored because tidy can't handle the CRLF here properly.
+
+// N.B., this file needs CRLF line endings. The .gitattributes file in
+// this directory should enforce it.
+
+// ignore-pretty issue #37195
+
+/// Doc comment that ends in CRLF
+pub fn foo() {}
+
+/** Block doc comment that
+ * contains CRLF characters
+ */
+pub fn bar() {}
+
+fn main() {
+ let s = "string
+literal";
+ assert_eq!(s, "string\nliteral");
+
+ let s = "literal with \
+ escaped newline";
+ assert_eq!(s, "literal with escaped newline");
+
+ let s = r"string
+literal";
+ assert_eq!(s, "string\nliteral");
+ let s = br"byte string
+literal";
+ assert_eq!(s, "byte string\nliteral".as_bytes());
+
+ // validate that our source file has CRLF endings
+ let source = include_str!("lexer-crlf-line-endings-string-literal-doc-comment.rs");
+ assert!(source.contains("string\r\nliteral"));
+}