32 files changed, 878 insertions, 0 deletions
diff --git a/tests/ui/lexer/error-stage.rs b/tests/ui/lexer/error-stage.rs
new file mode 100644
index 000000000..c8d88f745
--- /dev/null
+++ b/tests/ui/lexer/error-stage.rs
@@ -0,0 +1,80 @@
+// This test is about the treatment of invalid literals. In particular, some
+// literals are only considered invalid if they survive to HIR lowering.
+//
+// Literals with bad suffixes
+// --------------------------
+// Literals consist of a primary part and an optional suffix.
+// https://doc.rust-lang.org/reference/tokens.html#suffixes says:
+//
+//   Any kind of literal (string, integer, etc) with any suffix is valid as a
+//   token, and can be passed to a macro without producing an error. The macro
+//   itself will decide how to interpret such a token and whether to produce an
+//   error or not.
+//
+//   ```
+//   macro_rules! blackhole { ($tt:tt) => () }
+//   blackhole!("string"suffix); // OK
+//   ```
+//
+//   However, suffixes on literal tokens parsed as Rust code are restricted.
+//   Any suffixes are rejected on non-numeric literal tokens, and numeric
+//   literal tokens are accepted only with suffixes from the list below.
+//
+//   Integer: u8, i8, u16, i16, u32, i32, u64, i64, u128, i128, usize, isize
+//   Floating-point: f32, f64
+//
+// This means that something like `"string"any_suffix` is a token accepted by
+// the lexer, but rejected later for being an invalid combination of primary
+// part and suffix.
+//
+// `0b10f32` is a similar case. `0b10` is a valid primary part that is a valid
+// *integer* literal when no suffix is present. It only causes an error later
+// when combined with the `f32` float suffix.
+//
+// However, `0b10.0f32` is different. It is rejected by the lexer because
+// `0b10.0` is not a valid token even on its own.
+//
+// This difference is unfortunate, but it's baked into the language now.
+//
+// Too-large integer literals
+// --------------------------
+// https://doc.rust-lang.org/reference/tokens.html#integer-literals says that
+// literals like `128_i8` and `256_u8` "are too big for their type, but are
+// still valid tokens".
+
+macro_rules! sink {
+    ($($x:tt;)*) => {()}
+}
+
+// The invalid literals are ignored because the macro consumes them. Except for
+// `0b10.0f32` because it's a lexer error.
+const _: () = sink! {
+    "string"any_suffix; // OK
+    10u123; // OK
+    10.0f123; // OK
+    0b10f32; // OK
+    0b10.0f32; //~ ERROR binary float literal is not supported
+    999340282366920938463463374607431768211455999; // OK
+};
+
+// The invalid literals used to cause errors, but this was changed by #102944.
+// Except for `0b010.0f32`, because it's a lexer error.
+#[cfg(FALSE)]
+fn configured_out() {
+    "string"any_suffix; // OK
+    10u123; // OK
+    10.0f123; // OK
+    0b10f32; // OK
+    0b10.0f32; //~ ERROR binary float literal is not supported
+    999340282366920938463463374607431768211455999; // OK
+}
+
+// All the invalid literals cause errors.
+fn main() {
+    "string"any_suffix; //~ ERROR suffixes on string literals are invalid
+    10u123; //~ ERROR invalid width `123` for integer literal
+    10.0f123; //~ ERROR invalid width `123` for float literal
+    0b10f32; //~ ERROR binary float literal is not supported
+    0b10.0f32; //~ ERROR binary float literal is not supported
+    999340282366920938463463374607431768211455999; //~ ERROR integer literal is too large
+}
diff --git a/tests/ui/lexer/error-stage.stderr b/tests/ui/lexer/error-stage.stderr
new file mode 100644
index 000000000..ecbdb14dc
--- /dev/null
+++ b/tests/ui/lexer/error-stage.stderr
@@ -0,0 +1,56 @@
+error: binary float literal is not supported
+  --> $DIR/error-stage.rs:56:5
+   |
+LL |     0b10.0f32;
+   |     ^^^^^^
+
+error: binary float literal is not supported
+  --> $DIR/error-stage.rs:68:5
+   |
+LL |     0b10.0f32;
+   |     ^^^^^^
+
+error: binary float literal is not supported
+  --> $DIR/error-stage.rs:78:5
+   |
+LL |     0b10.0f32;
+   |     ^^^^^^
+
+error: suffixes on string literals are invalid
+  --> $DIR/error-stage.rs:74:5
+   |
+LL |     "string"any_suffix;
+   |     ^^^^^^^^^^^^^^^^^^ invalid suffix `any_suffix`
+
+error: invalid width `123` for integer literal
+  --> $DIR/error-stage.rs:75:5
+   |
+LL |     10u123;
+   |     ^^^^^^
+   |
+   = help: valid widths are 8, 16, 32, 64 and 128
+
+error: invalid width `123` for float literal
+  --> $DIR/error-stage.rs:76:5
+   |
+LL |     10.0f123;
+   |     ^^^^^^^^
+   |
+   = help: valid widths are 32 and 64
+
+error: binary float literal is not supported
+  --> $DIR/error-stage.rs:77:5
+   |
+LL |     0b10f32;
+   |     ^^^^^^^ not supported
+
+error: integer literal is too large
+  --> $DIR/error-stage.rs:79:5
+   |
+LL |     999340282366920938463463374607431768211455999;
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   |
+   = note: value exceeds limit of `340282366920938463463374607431768211455`
+
+error: aborting due to 8 previous errors
+
diff --git a/tests/ui/lexer/lex-bad-binary-literal.rs b/tests/ui/lexer/lex-bad-binary-literal.rs
new file mode 100644
index 000000000..7df98073e
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-binary-literal.rs
@@ -0,0 +1,11 @@
+fn main() {
+    0b121; //~ ERROR invalid digit for a base 2 literal
+    0b10_10301; //~ ERROR invalid digit for a base 2 literal
+    0b30; //~ ERROR invalid digit for a base 2 literal
+    0b41; //~ ERROR invalid digit for a base 2 literal
+    0b5; //~ ERROR invalid digit for a base 2 literal
+    0b6; //~ ERROR invalid digit for a base 2 literal
+    0b7; //~ ERROR invalid digit for a base 2 literal
+    0b8; //~ ERROR invalid digit for a base 2 literal
+    0b9; //~ ERROR invalid digit for a base 2 literal
+}
diff --git a/tests/ui/lexer/lex-bad-binary-literal.stderr b/tests/ui/lexer/lex-bad-binary-literal.stderr
new file mode 100644
index 000000000..992b3d248
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-binary-literal.stderr
@@ -0,0 +1,56 @@
+error: invalid digit for a base 2 literal
+  --> $DIR/lex-bad-binary-literal.rs:2:8
+   |
+LL |     0b121;
+   |        ^
+
+error: invalid digit for a base 2 literal
+  --> $DIR/lex-bad-binary-literal.rs:3:12
+   |
+LL |     0b10_10301;
+   |            ^
+
+error: invalid digit for a base 2 literal
+  --> $DIR/lex-bad-binary-literal.rs:4:7
+   |
+LL |     0b30;
+   |       ^
+
+error: invalid digit for a base 2 literal
+  --> $DIR/lex-bad-binary-literal.rs:5:7
+   |
+LL |     0b41;
+   |       ^
+
+error: invalid digit for a base 2 literal
+  --> $DIR/lex-bad-binary-literal.rs:6:7
+   |
+LL |     0b5;
+   |       ^
+
+error: invalid digit for a base 2 literal
+  --> $DIR/lex-bad-binary-literal.rs:7:7
+   |
+LL |     0b6;
+   |       ^
+
+error: invalid digit for a base 2 literal
+  --> $DIR/lex-bad-binary-literal.rs:8:7
+   |
+LL |     0b7;
+   |       ^
+
+error: invalid digit for a base 2 literal
+  --> $DIR/lex-bad-binary-literal.rs:9:7
+   |
+LL |     0b8;
+   |       ^
+
+error: invalid digit for a base 2 literal
+  --> $DIR/lex-bad-binary-literal.rs:10:7
+   |
+LL |     0b9;
+   |       ^
+
+error: aborting due to 9 previous errors
+
diff --git a/tests/ui/lexer/lex-bad-char-literals-1.rs b/tests/ui/lexer/lex-bad-char-literals-1.rs
new file mode 100644
index 000000000..e7951cfd2
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-1.rs
@@ -0,0 +1,17 @@
+static c3: char =
+    '\x1' //~ ERROR: numeric character escape is too short
+;
+
+static s3: &'static str =
+    "\x1" //~ ERROR: numeric character escape is too short
+;
+
+static c: char =
+    '\●' //~ ERROR: unknown character escape
+;
+
+static s: &'static str =
+    "\●" //~ ERROR: unknown character escape
+;
+
+fn main() {}
diff --git a/tests/ui/lexer/lex-bad-char-literals-1.stderr b/tests/ui/lexer/lex-bad-char-literals-1.stderr
new file mode 100644
index 000000000..e6ff1f662
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-1.stderr
@@ -0,0 +1,38 @@
+error: numeric character escape is too short
+  --> $DIR/lex-bad-char-literals-1.rs:2:6
+   |
+LL |     '\x1'
+   |      ^^^
+
+error: numeric character escape is too short
+  --> $DIR/lex-bad-char-literals-1.rs:6:6
+   |
+LL |     "\x1"
+   |      ^^^
+
+error: unknown character escape: `\u{25cf}`
+  --> $DIR/lex-bad-char-literals-1.rs:10:7
+   |
+LL |     '\●'
+   |       ^ unknown character escape
+   |
+   = help: for more information, visit <https://static.rust-lang.org/doc/master/reference.html#literals>
+help: if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal
+   |
+LL |     r"\●"
+   |     ~~~~~
+
+error: unknown character escape: `\u{25cf}`
+  --> $DIR/lex-bad-char-literals-1.rs:14:7
+   |
+LL |     "\●"
+   |       ^ unknown character escape
+   |
+   = help: for more information, visit <https://static.rust-lang.org/doc/master/reference.html#literals>
+help: if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal
+   |
+LL |     r"\●"
+   |     ~~~~~
+
+error: aborting due to 4 previous errors
+
diff --git a/tests/ui/lexer/lex-bad-char-literals-2.rs b/tests/ui/lexer/lex-bad-char-literals-2.rs
new file mode 100644
index 000000000..d35dafd9a
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-2.rs
@@ -0,0 +1,6 @@
+// This test needs to the last one appearing in this file as it kills the parser
+static c: char =
+    'nope' //~ ERROR: character literal may only contain one codepoint
+;
+
+fn main() {}
diff --git a/tests/ui/lexer/lex-bad-char-literals-2.stderr b/tests/ui/lexer/lex-bad-char-literals-2.stderr
new file mode 100644
index 000000000..c2b19a7ad
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-2.stderr
@@ -0,0 +1,13 @@
+error: character literal may only contain one codepoint
+  --> $DIR/lex-bad-char-literals-2.rs:3:5
+   |
+LL |     'nope'
+   |     ^^^^^^
+   |
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     "nope"
+   |     ~~~~~~
+
+error: aborting due to previous error
+
diff --git a/tests/ui/lexer/lex-bad-char-literals-3.rs b/tests/ui/lexer/lex-bad-char-literals-3.rs
new file mode 100644
index 000000000..5194ff4d9
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-3.rs
@@ -0,0 +1,7 @@
+static c: char = '●●';
+//~^ ERROR: character literal may only contain one codepoint
+
+fn main() {
+    let ch: &str = '●●';
+    //~^ ERROR: character literal may only contain one codepoint
+}
diff --git a/tests/ui/lexer/lex-bad-char-literals-3.stderr b/tests/ui/lexer/lex-bad-char-literals-3.stderr
new file mode 100644
index 000000000..62a5e424c
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-3.stderr
@@ -0,0 +1,24 @@
+error: character literal may only contain one codepoint
+  --> $DIR/lex-bad-char-literals-3.rs:1:18
+   |
+LL | static c: char = '●●';
+   |                  ^^^^
+   |
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL | static c: char = "●●";
+   |                  ~~~~
+
+error: character literal may only contain one codepoint
+  --> $DIR/lex-bad-char-literals-3.rs:5:20
+   |
+LL |     let ch: &str = '●●';
+   |                    ^^^^
+   |
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     let ch: &str = "●●";
+   |                    ~~~~
+
+error: aborting due to 2 previous errors
+
diff --git a/tests/ui/lexer/lex-bad-char-literals-4.rs b/tests/ui/lexer/lex-bad-char-literals-4.rs
new file mode 100644
index 000000000..de0a19df9
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-4.rs
@@ -0,0 +1,5 @@
+//
+// This test needs to the last one appearing in this file as it kills the parser
+static c: char =
+    '●  //~ ERROR: unterminated character literal
+;
diff --git a/tests/ui/lexer/lex-bad-char-literals-4.stderr b/tests/ui/lexer/lex-bad-char-literals-4.stderr
new file mode 100644
index 000000000..fec4421c4
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-4.stderr
@@ -0,0 +1,9 @@
+error[E0762]: unterminated character literal
+  --> $DIR/lex-bad-char-literals-4.rs:4:5
+   |
+LL |     '●
+   |     ^^^^
+
+error: aborting due to previous error
+
+For more information about this error, try `rustc --explain E0762`.
diff --git a/tests/ui/lexer/lex-bad-char-literals-5.rs b/tests/ui/lexer/lex-bad-char-literals-5.rs
new file mode 100644
index 000000000..0c4339edc
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-5.rs
@@ -0,0 +1,7 @@
+static c: char = '\x10\x10';
+//~^ ERROR: character literal may only contain one codepoint
+
+fn main() {
+    let ch: &str = '\x10\x10';
+    //~^ ERROR: character literal may only contain one codepoint
+}
diff --git a/tests/ui/lexer/lex-bad-char-literals-5.stderr b/tests/ui/lexer/lex-bad-char-literals-5.stderr
new file mode 100644
index 000000000..184817a65
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-5.stderr
@@ -0,0 +1,24 @@
+error: character literal may only contain one codepoint
+  --> $DIR/lex-bad-char-literals-5.rs:1:18
+   |
+LL | static c: char = '\x10\x10';
+   |                  ^^^^^^^^^^
+   |
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL | static c: char = "\x10\x10";
+   |                  ~~~~~~~~~~
+
+error: character literal may only contain one codepoint
+  --> $DIR/lex-bad-char-literals-5.rs:5:20
+   |
+LL |     let ch: &str = '\x10\x10';
+   |                    ^^^^^^^^^^
+   |
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     let ch: &str = "\x10\x10";
+   |                    ~~~~~~~~~~
+
+error: aborting due to 2 previous errors
+
diff --git a/tests/ui/lexer/lex-bad-char-literals-6.rs b/tests/ui/lexer/lex-bad-char-literals-6.rs
new file mode 100644
index 000000000..1b498c0fb
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-6.rs
@@ -0,0 +1,15 @@
+fn main() {
+    let x: &str = 'ab';
+    //~^ ERROR: character literal may only contain one codepoint
+    let y: char = 'cd';
+    //~^ ERROR: character literal may only contain one codepoint
+    let z = 'ef';
+    //~^ ERROR: character literal may only contain one codepoint
+
+    if x == y {}
+    if y == z {}  // no error here
+    if x == z {}
+
+    let a: usize = "";
+    //~^ ERROR: mismatched types
+}
diff --git a/tests/ui/lexer/lex-bad-char-literals-6.stderr b/tests/ui/lexer/lex-bad-char-literals-6.stderr
new file mode 100644
index 000000000..2fe30304a
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-6.stderr
@@ -0,0 +1,44 @@
+error: character literal may only contain one codepoint
+  --> $DIR/lex-bad-char-literals-6.rs:2:19
+   |
+LL |     let x: &str = 'ab';
+   |                   ^^^^
+   |
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     let x: &str = "ab";
+   |                   ~~~~
+
+error: character literal may only contain one codepoint
+  --> $DIR/lex-bad-char-literals-6.rs:4:19
+   |
+LL |     let y: char = 'cd';
+   |                   ^^^^
+   |
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     let y: char = "cd";
+   |                   ~~~~
+
+error: character literal may only contain one codepoint
+  --> $DIR/lex-bad-char-literals-6.rs:6:13
+   |
+LL |     let z = 'ef';
+   |             ^^^^
+   |
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     let z = "ef";
+   |             ~~~~
+
+error[E0308]: mismatched types
+  --> $DIR/lex-bad-char-literals-6.rs:13:20
+   |
+LL |     let a: usize = "";
+   |            -----   ^^ expected `usize`, found `&str`
+   |            |
+   |            expected due to this
+
+error: aborting due to 4 previous errors
+
+For more information about this error, try `rustc --explain E0308`.
diff --git a/tests/ui/lexer/lex-bad-char-literals-7.rs b/tests/ui/lexer/lex-bad-char-literals-7.rs
new file mode 100644
index 000000000..c675df2f3
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-7.rs
@@ -0,0 +1,13 @@
+fn main() {
+    let _: char = '';
+    //~^ ERROR: empty character literal
+    let _: char = '\u{}';
+    //~^ ERROR: empty unicode escape
+
+    // Next two are OK, but may befool error recovery
+    let _ = '/';
+    let _ = b'/';
+
+    let _ = ' hello // here's a comment
+    //~^ ERROR: unterminated character literal
+}
diff --git a/tests/ui/lexer/lex-bad-char-literals-7.stderr b/tests/ui/lexer/lex-bad-char-literals-7.stderr
new file mode 100644
index 000000000..255b9c689
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-char-literals-7.stderr
@@ -0,0 +1,21 @@
+error: empty character literal
+  --> $DIR/lex-bad-char-literals-7.rs:2:20
+   |
+LL |     let _: char = '';
+   |                    ^ empty character literal
+
+error: empty unicode escape
+  --> $DIR/lex-bad-char-literals-7.rs:4:20
+   |
+LL |     let _: char = '\u{}';
+   |                    ^^^^ this escape must have at least 1 hex digit
+
+error[E0762]: unterminated character literal
+  --> $DIR/lex-bad-char-literals-7.rs:11:13
+   |
+LL |     let _ = ' hello // here's a comment
+   |             ^^^^^^^^
+
+error: aborting due to 3 previous errors
+
+For more information about this error, try `rustc --explain E0762`.
diff --git a/tests/ui/lexer/lex-bad-numeric-literals.rs b/tests/ui/lexer/lex-bad-numeric-literals.rs
new file mode 100644
index 000000000..56bdc50e4
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-numeric-literals.rs
@@ -0,0 +1,35 @@
+// ignore-tidy-linelength
+
+fn main() {
+    0o1.0; //~ ERROR: octal float literal is not supported
+    0o2f32; //~ ERROR: octal float literal is not supported
+    0o3.0f32; //~ ERROR: octal float literal is not supported
+    0o4e4; //~ ERROR: octal float literal is not supported
+    0o5.0e5; //~ ERROR: octal float literal is not supported
+    0o6e6f32; //~ ERROR: octal float literal is not supported
+    0o7.0e7f64; //~ ERROR: octal float literal is not supported
+    0x8.0e+9; //~ ERROR: hexadecimal float literal is not supported
+    0x9.0e-9; //~ ERROR: hexadecimal float literal is not supported
+    0o; //~ ERROR: no valid digits
+    1e+; //~ ERROR: expected at least one digit in exponent
+    0x539.0; //~ ERROR: hexadecimal float literal is not supported
+    9900000000000000000000000000999999999999999999999999999999;
+    //~^ ERROR: integer literal is too large
+    9900000000000000000000000000999999999999999999999999999999;
+    //~^ ERROR: integer literal is too large
+    0b111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111110;
+    //~^ ERROR: integer literal is too large
+    0o37777777777777777777777777777777777777777770;
+    //~^ ERROR: integer literal is too large
+    0xffffffffffffffffffffffffffffffff0;
+    //~^ ERROR: integer literal is too large
+    0x; //~ ERROR: no valid digits
+    0xu32; //~ ERROR: no valid digits
+    0ou32; //~ ERROR: no valid digits
+    0bu32; //~ ERROR: no valid digits
+    0b; //~ ERROR: no valid digits
+    0o123f64; //~ ERROR: octal float literal is not supported
+    0o123.456; //~ ERROR: octal float literal is not supported
+    0b101f64; //~ ERROR: binary float literal is not supported
+    0b111.101; //~ ERROR: binary float literal is not supported
+}
diff --git a/tests/ui/lexer/lex-bad-numeric-literals.stderr b/tests/ui/lexer/lex-bad-numeric-literals.stderr
new file mode 100644
index 000000000..145754197
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-numeric-literals.stderr
@@ -0,0 +1,169 @@
+error: octal float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:4:5
+   |
+LL |     0o1.0;
+   |     ^^^^^
+
+error: octal float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:6:5
+   |
+LL |     0o3.0f32;
+   |     ^^^^^
+
+error: octal float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:7:5
+   |
+LL |     0o4e4;
+   |     ^^^^^
+
+error: octal float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:8:5
+   |
+LL |     0o5.0e5;
+   |     ^^^^^^^
+
+error: octal float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:9:5
+   |
+LL |     0o6e6f32;
+   |     ^^^^^
+
+error: octal float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:10:5
+   |
+LL |     0o7.0e7f64;
+   |     ^^^^^^^
+
+error: hexadecimal float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:11:5
+   |
+LL |     0x8.0e+9;
+   |     ^^^^^^^^
+
+error: hexadecimal float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:12:5
+   |
+LL |     0x9.0e-9;
+   |     ^^^^^^^^
+
+error[E0768]: no valid digits found for number
+  --> $DIR/lex-bad-numeric-literals.rs:13:5
+   |
+LL |     0o;
+   |     ^^
+
+error: expected at least one digit in exponent
+  --> $DIR/lex-bad-numeric-literals.rs:14:5
+   |
+LL |     1e+;
+   |     ^^^
+
+error: hexadecimal float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:15:5
+   |
+LL |     0x539.0;
+   |     ^^^^^^^
+
+error[E0768]: no valid digits found for number
+  --> $DIR/lex-bad-numeric-literals.rs:26:5
+   |
+LL |     0x;
+   |     ^^
+
+error[E0768]: no valid digits found for number
+  --> $DIR/lex-bad-numeric-literals.rs:27:5
+   |
+LL |     0xu32;
+   |     ^^
+
+error[E0768]: no valid digits found for number
+  --> $DIR/lex-bad-numeric-literals.rs:28:5
+   |
+LL |     0ou32;
+   |     ^^
+
+error[E0768]: no valid digits found for number
+  --> $DIR/lex-bad-numeric-literals.rs:29:5
+   |
+LL |     0bu32;
+   |     ^^
+
+error[E0768]: no valid digits found for number
+  --> $DIR/lex-bad-numeric-literals.rs:30:5
+   |
+LL |     0b;
+   |     ^^
+
+error: octal float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:32:5
+   |
+LL |     0o123.456;
+   |     ^^^^^^^^^
+
+error: binary float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:34:5
+   |
+LL |     0b111.101;
+   |     ^^^^^^^^^
+
+error: octal float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:5:5
+   |
+LL |     0o2f32;
+   |     ^^^^^^ not supported
+
+error: integer literal is too large
+  --> $DIR/lex-bad-numeric-literals.rs:16:5
+   |
+LL |     9900000000000000000000000000999999999999999999999999999999;
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   |
+   = note: value exceeds limit of `340282366920938463463374607431768211455`
+
+error: integer literal is too large
+  --> $DIR/lex-bad-numeric-literals.rs:18:5
+   |
+LL |     9900000000000000000000000000999999999999999999999999999999;
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   |
+   = note: value exceeds limit of `340282366920938463463374607431768211455`
+
+error: integer literal is too large
+  --> $DIR/lex-bad-numeric-literals.rs:20:5
+   |
+LL |     0b111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111110;
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   |
+   = note: value exceeds limit of `0b11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111`
+
+error: integer literal is too large
+  --> $DIR/lex-bad-numeric-literals.rs:22:5
+   |
+LL |     0o37777777777777777777777777777777777777777770;
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   |
+   = note: value exceeds limit of `0o3777777777777777777777777777777777777777777`
+
+error: integer literal is too large
+  --> $DIR/lex-bad-numeric-literals.rs:24:5
+   |
+LL |     0xffffffffffffffffffffffffffffffff0;
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   |
+   = note: value exceeds limit of `0xffffffffffffffffffffffffffffffff`
+
+error: octal float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:31:5
+   |
+LL |     0o123f64;
+   |     ^^^^^^^^ not supported
+
+error: binary float literal is not supported
+  --> $DIR/lex-bad-numeric-literals.rs:33:5
+   |
+LL |     0b101f64;
+   |     ^^^^^^^^ not supported
+
+error: aborting due to 26 previous errors
+
+For more information about this error, try `rustc --explain E0768`.
diff --git a/tests/ui/lexer/lex-bad-octal-literal.rs b/tests/ui/lexer/lex-bad-octal-literal.rs
new file mode 100644
index 000000000..49631f16b
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-octal-literal.rs
@@ -0,0 +1,4 @@
+fn main() {
+    0o18; //~ ERROR invalid digit for a base 8 literal
+    0o1234_9_5670;  //~ ERROR invalid digit for a base 8 literal
+}
diff --git a/tests/ui/lexer/lex-bad-octal-literal.stderr b/tests/ui/lexer/lex-bad-octal-literal.stderr
new file mode 100644
index 000000000..2cb8ca5de
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-octal-literal.stderr
@@ -0,0 +1,14 @@
+error: invalid digit for a base 8 literal
+  --> $DIR/lex-bad-octal-literal.rs:2:8
+   |
+LL |     0o18;
+   |        ^
+
+error: invalid digit for a base 8 literal
+  --> $DIR/lex-bad-octal-literal.rs:3:12
+   |
+LL |     0o1234_9_5670;
+   |            ^
+
+error: aborting due to 2 previous errors
+
diff --git a/tests/ui/lexer/lex-bad-token.rs b/tests/ui/lexer/lex-bad-token.rs
new file mode 100644
index 000000000..9e4824611
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-token.rs
@@ -0,0 +1,3 @@
+● //~ ERROR: unknown start of token
+
+fn main() {}
diff --git a/tests/ui/lexer/lex-bad-token.stderr b/tests/ui/lexer/lex-bad-token.stderr
new file mode 100644
index 000000000..43c43721b
--- /dev/null
+++ b/tests/ui/lexer/lex-bad-token.stderr
@@ -0,0 +1,8 @@
+error: unknown start of token: \u{25cf}
+  --> $DIR/lex-bad-token.rs:1:1
+   |
+LL | ●
+   | ^
+
+error: aborting due to previous error
+
diff --git a/tests/ui/lexer/lex-bare-cr-nondoc-comment.rs b/tests/ui/lexer/lex-bare-cr-nondoc-comment.rs
new file mode 100644
index 000000000..5b528d6e1
--- /dev/null
+++ b/tests/ui/lexer/lex-bare-cr-nondoc-comment.rs
@@ -0,0 +1,9 @@
+// run-pass
+// ignore-tidy-cr
+
+// nondoc comment with bare CR: '
+'
+//// nondoc comment with bare CR: '
+'
+/* block nondoc comment with bare CR: '
+' */
+
+fn main() {
+}
diff --git a/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.rs b/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.rs
new file mode 100644
index 000000000..b7752e1f0
--- /dev/null
+++ b/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.rs
@@ -0,0 +1,26 @@
+// ignore-tidy-cr
+
+/// doc comment with bare CR: '
+'
+pub fn foo() {}
+//~^^ ERROR: bare CR not allowed in doc-comment
+
+/** block doc comment with bare CR: '
+' */
+pub fn bar() {}
+//~^^ ERROR: bare CR not allowed in block doc-comment
+
+fn main() {
+    //! doc comment with bare CR: '
+'
+    //~^ ERROR: bare CR not allowed in doc-comment
+
+    /*! block doc comment with bare CR: '
+' */
+    //~^ ERROR: bare CR not allowed in block doc-comment
+
+    // the following string literal has a bare CR in it
+    let _s = "foo
+bar"; //~ ERROR: bare CR not allowed in string
+
+    // the following string literal has a bare CR in it
+    let _s = r"bar
+foo"; //~ ERROR: bare CR not allowed in raw string
+
+    // the following string literal has a bare CR in it
+    let _s = "foo\
+bar"; //~ ERROR: unknown character escape: `\r`
+}
diff --git a/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr b/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr
new file mode 100644
index 000000000..1a21fed63
--- /dev/null
+++ b/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr
@@ -0,0 +1,46 @@
+error: bare CR not allowed in doc-comment
+  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:3:32
+   |
+LL | /// doc comment with bare CR: '
+'
+   |                                ^
+
+error: bare CR not allowed in block doc-comment
+  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:7:38
+   |
+LL | /** block doc comment with bare CR: '
+' */
+   |                                      ^
+
+error: bare CR not allowed in doc-comment
+  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:12:36
+   |
+LL |     //! doc comment with bare CR: '
+'
+   |                                    ^
+
+error: bare CR not allowed in block doc-comment
+  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:15:42
+   |
+LL |     /*! block doc comment with bare CR: '
+' */
+   |                                          ^
+
+error: bare CR not allowed in string, use `\r` instead
+  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:19:18
+   |
+LL |     let _s = "foo
+bar";
+   |                  ^ help: escape the character: `\r`
+
+error: bare CR not allowed in raw string
+  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:22:19
+   |
+LL |     let _s = r"bar
+foo";
+   |                   ^
+
+error: unknown character escape: `\r`
+  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:25:19
+   |
+LL |     let _s = "foo\
+bar";
+   |                   ^ unknown character escape
+   |
+   = help: this is an isolated carriage return; consider checking your editor and version control settings
+
+error: aborting due to 7 previous errors
+
diff --git a/tests/ui/lexer/lex-emoji-identifiers.rs b/tests/ui/lexer/lex-emoji-identifiers.rs
new file mode 100644
index 000000000..91b5929c0
--- /dev/null
+++ b/tests/ui/lexer/lex-emoji-identifiers.rs
@@ -0,0 +1,17 @@
+fn invalid_emoji_usages() {
+    let arrow↔️ = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
+    // FIXME
+    let planet🪐 = "basic emoji"; //~ ERROR: unknown start of token
+    // FIXME
+    let wireless🛜 = "basic emoji"; //~ ERROR: unknown start of token
+    // FIXME
+    let key1️⃣ = "keycap sequence"; //~ ERROR: unknown start of token
+                                    //~^ WARN: identifier contains uncommon Unicode codepoints
+    let flag🇺🇳 = "flag sequence"; //~ ERROR: identifiers cannot contain emoji
+    let wales🏴 = "tag sequence"; //~ ERROR: identifiers cannot contain emoji
+    let folded🙏🏿 = "modifier sequence"; //~ ERROR: identifiers cannot contain emoji
+}
+
+fn main() {
+    invalid_emoji_usages();
+}
diff --git a/tests/ui/lexer/lex-emoji-identifiers.stderr b/tests/ui/lexer/lex-emoji-identifiers.stderr
new file mode 100644
index 000000000..6237c5d02
--- /dev/null
+++ b/tests/ui/lexer/lex-emoji-identifiers.stderr
@@ -0,0 +1,52 @@
+error: unknown start of token: \u{1fa90}
+  --> $DIR/lex-emoji-identifiers.rs:4:15
+   |
+LL |     let planet🪐 = "basic emoji";
+   |               ^^
+
+error: unknown start of token: \u{1f6dc}
+  --> $DIR/lex-emoji-identifiers.rs:6:17
+   |
+LL |     let wireless🛜 = "basic emoji";
+   |                 ^^
+
+error: unknown start of token: \u{20e3}
+  --> $DIR/lex-emoji-identifiers.rs:8:14
+   |
+LL |     let key1️⃣ = "keycap sequence";
+   |             ^
+
+error: identifiers cannot contain emoji: `arrow↔️`
+  --> $DIR/lex-emoji-identifiers.rs:2:9
+   |
+LL |     let arrow↔️ = "basic emoji";
+   |         ^^^^^^
+
+error: identifiers cannot contain emoji: `flag🇺🇳`
+  --> $DIR/lex-emoji-identifiers.rs:10:9
+   |
+LL |     let flag🇺🇳 = "flag sequence";
+   |         ^^^^^^
+
+error: identifiers cannot contain emoji: `wales🏴`
+  --> $DIR/lex-emoji-identifiers.rs:11:9
+   |
+LL |     let wales🏴 = "tag sequence";
+   |         ^^^^^^^
+
+error: identifiers cannot contain emoji: `folded🙏🏿`
+  --> $DIR/lex-emoji-identifiers.rs:12:9
+   |
+LL |     let folded🙏🏿 = "modifier sequence";
+   |         ^^^^^^^^^^
+
+warning: identifier contains uncommon Unicode codepoints
+  --> $DIR/lex-emoji-identifiers.rs:8:9
+   |
+LL |     let key1️⃣ = "keycap sequence";
+   |         ^^^^
+   |
+   = note: `#[warn(uncommon_codepoints)]` on by default
+
+error: aborting due to 7 previous errors; 1 warning emitted
+
diff --git a/tests/ui/lexer/lex-stray-backslash.rs b/tests/ui/lexer/lex-stray-backslash.rs
new file mode 100644
index 000000000..bb27f44c2
--- /dev/null
+++ b/tests/ui/lexer/lex-stray-backslash.rs
@@ -0,0 +1,3 @@
+\ //~ ERROR: unknown start of token: \
+
+fn main() {}
diff --git a/tests/ui/lexer/lex-stray-backslash.stderr b/tests/ui/lexer/lex-stray-backslash.stderr
new file mode 100644
index 000000000..06dc0f2b5
--- /dev/null
+++ b/tests/ui/lexer/lex-stray-backslash.stderr
@@ -0,0 +1,8 @@
+error: unknown start of token: \
+  --> $DIR/lex-stray-backslash.rs:1:1
+   |
+LL | \
+   | ^
+
+error: aborting due to previous error
+
diff --git a/tests/ui/lexer/lexer-crlf-line-endings-string-literal-doc-comment.rs b/tests/ui/lexer/lexer-crlf-line-endings-string-literal-doc-comment.rs
new file mode 100644
index 000000000..802be7f5a
--- /dev/null
+++ b/tests/ui/lexer/lexer-crlf-line-endings-string-literal-doc-comment.rs
@@ -0,0 +1,38 @@
+// run-pass
+// ignore-tidy-cr
+// ignore-tidy-cr (repeated again because of tidy bug)
+// license is ignored because tidy can't handle the CRLF here properly.
+
+// N.B., this file needs CRLF line endings. The .gitattributes file in
+// this directory should enforce it.
+
+// ignore-pretty issue #37195
+
+/// Doc comment that ends in CRLF
+pub fn foo() {}
+
+/** Block doc comment that
+ *  contains CRLF characters
+ */
+pub fn bar() {}
+
+fn main() {
+    let s = "string
+literal";
+    assert_eq!(s, "string\nliteral");
+
+    let s = "literal with \
+             escaped newline";
+    assert_eq!(s, "literal with escaped newline");
+
+    let s = r"string
+literal";
+    assert_eq!(s, "string\nliteral");
+    let s = br"byte string
+literal";
+    assert_eq!(s, "byte string\nliteral".as_bytes());
+
+    // validate that our source file has CRLF endings
+    let source = include_str!("lexer-crlf-line-endings-string-literal-doc-comment.rs");
+    assert!(source.contains("string\r\nliteral"));
+}