summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_parse_format/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_parse_format/src/lib.rs')
-rw-r--r--compiler/rustc_parse_format/src/lib.rs162
1 files changed, 105 insertions, 57 deletions
diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs
index 0113eb4e3..7b016cada 100644
--- a/compiler/rustc_parse_format/src/lib.rs
+++ b/compiler/rustc_parse_format/src/lib.rs
@@ -38,6 +38,31 @@ impl InnerSpan {
}
}
+/// The location and before/after width of a character whose width has changed from its source code
+/// representation
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub struct InnerWidthMapping {
+ /// Index of the character in the source
+ pub position: usize,
+ /// The inner width in characters
+ pub before: usize,
+ /// The transformed width in characters
+ pub after: usize,
+}
+
+impl InnerWidthMapping {
+ pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
+ InnerWidthMapping { position, before, after }
+ }
+}
+
+/// Whether the input string is a literal. If yes, it contains the inner width mappings.
+#[derive(Clone, PartialEq, Eq)]
+enum InputStringKind {
+ NotALiteral,
+ Literal { width_mappings: Vec<InnerWidthMapping> },
+}
+
/// The type of format string that we are parsing.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum ParseMode {
@@ -58,13 +83,13 @@ impl InnerOffset {
/// A piece is a portion of the format string which represents the next part
/// to emit. These are emitted as a stream by the `Parser` class.
-#[derive(Copy, Clone, Debug, PartialEq)]
+#[derive(Clone, Debug, PartialEq)]
pub enum Piece<'a> {
/// A literal string which should directly be emitted
String(&'a str),
/// This describes that formatting should process the next argument (as
/// specified inside) for emission.
- NextArgument(Argument<'a>),
+ NextArgument(Box<Argument<'a>>),
}
/// Representation of an argument specification.
@@ -200,8 +225,8 @@ pub struct Parser<'a> {
style: Option<usize>,
/// Start and end byte offset of every successfully parsed argument
pub arg_places: Vec<InnerSpan>,
- /// Characters that need to be shifted
- skips: Vec<usize>,
+ /// Characters whose length has been changed from their in-code representation
+ width_map: Vec<InnerWidthMapping>,
/// Span of the last opening brace seen, used for error reporting
last_opening_brace: Option<InnerSpan>,
/// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
@@ -224,7 +249,7 @@ impl<'a> Iterator for Parser<'a> {
'{' => {
let curr_last_brace = self.last_opening_brace;
let byte_pos = self.to_span_index(pos);
- let lbrace_end = self.to_span_index(pos + 1);
+ let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos));
self.last_opening_brace = Some(byte_pos.to(lbrace_end));
self.cur.next();
if self.consume('{') {
@@ -233,18 +258,21 @@ impl<'a> Iterator for Parser<'a> {
Some(String(self.string(pos + 1)))
} else {
let arg = self.argument(lbrace_end);
- if let Some(rbrace_byte_idx) = self.must_consume('}') {
- let lbrace_inner_offset = self.to_span_index(pos);
- let rbrace_inner_offset = self.to_span_index(rbrace_byte_idx);
+ if let Some(rbrace_pos) = self.must_consume('}') {
if self.is_literal {
+ let lbrace_byte_pos = self.to_span_index(pos);
+ let rbrace_byte_pos = self.to_span_index(rbrace_pos);
+
+ let width = self.to_span_width(rbrace_pos);
+
self.arg_places.push(
- lbrace_inner_offset.to(InnerOffset(rbrace_inner_offset.0 + 1)),
+ lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)),
);
}
} else {
self.suggest_positional_arg_instead_of_captured_arg(arg);
}
- Some(NextArgument(arg))
+ Some(NextArgument(Box::new(arg)))
}
}
'}' => {
@@ -285,7 +313,12 @@ impl<'a> Parser<'a> {
append_newline: bool,
mode: ParseMode,
) -> Parser<'a> {
- let (skips, is_literal) = find_skips_from_snippet(snippet, style);
+ let input_string_kind = find_width_map_from_snippet(snippet, style);
+ let (width_map, is_literal) = match input_string_kind {
+ InputStringKind::Literal { width_mappings } => (width_mappings, true),
+ InputStringKind::NotALiteral => (Vec::new(), false),
+ };
+
Parser {
mode,
input: s,
@@ -294,7 +327,7 @@ impl<'a> Parser<'a> {
curarg: 0,
style,
arg_places: vec![],
- skips,
+ width_map,
last_opening_brace: None,
append_newline,
is_literal,
@@ -367,21 +400,34 @@ impl<'a> Parser<'a> {
None
}
+ fn remap_pos(&self, mut pos: usize) -> InnerOffset {
+ for width in &self.width_map {
+ if pos > width.position {
+ pos += width.before - width.after;
+ } else if pos == width.position && width.after == 0 {
+ pos += width.before;
+ } else {
+ break;
+ }
+ }
+
+ InnerOffset(pos)
+ }
+
fn to_span_index(&self, pos: usize) -> InnerOffset {
- let mut pos = pos;
// This handles the raw string case, the raw argument is the number of #
// in r###"..."### (we need to add one because of the `r`).
let raw = self.style.map_or(0, |raw| raw + 1);
- for skip in &self.skips {
- if pos > *skip {
- pos += 1;
- } else if pos == *skip && raw == 0 {
- pos += 1;
- } else {
- break;
- }
+ let pos = self.remap_pos(pos);
+ InnerOffset(raw + pos.0 + 1)
+ }
+
+ fn to_span_width(&self, pos: usize) -> usize {
+ let pos = self.remap_pos(pos);
+ match self.width_map.iter().find(|w| w.position == pos.0) {
+ Some(w) => w.before,
+ None => 1,
}
- InnerOffset(raw + pos + 1)
}
fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan {
@@ -401,7 +447,7 @@ impl<'a> Parser<'a> {
Some(pos)
} else {
let pos = self.to_span_index(pos);
- let description = format!("expected `'}}'`, found `{:?}`", maybe);
+ let description = format!("expected `'}}'`, found `{maybe:?}`");
let label = "expected `}`".to_owned();
let (note, secondary_label) = if c == '}' {
(
@@ -425,12 +471,12 @@ impl<'a> Parser<'a> {
None
}
} else {
- let description = format!("expected `{:?}` but string was terminated", c);
+ let description = format!("expected `{c:?}` but string was terminated");
// point at closing `"`
let pos = self.input.len() - if self.append_newline { 1 } else { 0 };
let pos = self.to_span_index(pos);
if c == '}' {
- let label = format!("expected `{:?}`", c);
+ let label = format!("expected `{c:?}`");
let (note, secondary_label) = if c == '}' {
(
Some(
@@ -451,7 +497,7 @@ impl<'a> Parser<'a> {
should_be_replaced_with_positional_argument: false,
});
} else {
- self.err(description, format!("expected `{:?}`", c), pos.to(pos));
+ self.err(description, format!("expected `{c:?}`"), pos.to(pos));
}
None
}
@@ -809,59 +855,55 @@ impl<'a> Parser<'a> {
/// Finds the indices of all characters that have been processed and differ between the actual
/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
-fn find_skips_from_snippet(
+fn find_width_map_from_snippet(
snippet: Option<string::String>,
str_style: Option<usize>,
-) -> (Vec<usize>, bool) {
+) -> InputStringKind {
let snippet = match snippet {
Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
- _ => return (vec![], false),
+ _ => return InputStringKind::NotALiteral,
};
if str_style.is_some() {
- return (vec![], true);
+ return InputStringKind::Literal { width_mappings: Vec::new() };
}
let snippet = &snippet[1..snippet.len() - 1];
let mut s = snippet.char_indices();
- let mut skips = vec![];
+ let mut width_mappings = vec![];
while let Some((pos, c)) = s.next() {
match (c, s.clone().next()) {
// skip whitespace and empty lines ending in '\\'
- ('\\', Some((next_pos, '\n'))) => {
- skips.push(pos);
- skips.push(next_pos);
+ ('\\', Some((_, '\n'))) => {
let _ = s.next();
+ let mut width = 2;
- while let Some((pos, c)) = s.clone().next() {
+ while let Some((_, c)) = s.clone().next() {
if matches!(c, ' ' | '\n' | '\t') {
- skips.push(pos);
+ width += 1;
let _ = s.next();
} else {
break;
}
}
+
+ width_mappings.push(InnerWidthMapping::new(pos, width, 0));
}
- ('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
- skips.push(next_pos);
+ ('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
+ width_mappings.push(InnerWidthMapping::new(pos, 2, 1));
let _ = s.next();
}
('\\', Some((_, 'x'))) => {
- for _ in 0..3 {
- // consume `\xAB` literal
- if let Some((pos, _)) = s.next() {
- skips.push(pos);
- } else {
- break;
- }
- }
+ // consume `\xAB` literal
+ s.nth(2);
+ width_mappings.push(InnerWidthMapping::new(pos, 4, 1));
}
('\\', Some((_, 'u'))) => {
- if let Some((pos, _)) = s.next() {
- skips.push(pos);
- }
- if let Some((next_pos, next_c)) = s.next() {
+ let mut width = 2;
+ let _ = s.next();
+
+ if let Some((_, next_c)) = s.next() {
if next_c == '{' {
// consume up to 6 hexanumeric chars
let digits_len =
@@ -881,19 +923,18 @@ fn find_skips_from_snippet(
let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1));
// skip '{' and '}' also
- for pos in (next_pos..).take(required_skips + 2) {
- skips.push(pos)
- }
+ width += required_skips + 2;
s.nth(digits_len);
} else if next_c.is_digit(16) {
- skips.push(next_pos);
+ width += 1;
+
// We suggest adding `{` and `}` when appropriate, accept it here as if
// it were correct
let mut i = 0; // consume up to 6 hexanumeric chars
- while let (Some((next_pos, c)), _) = (s.next(), i < 6) {
+ while let (Some((_, c)), _) = (s.next(), i < 6) {
if c.is_digit(16) {
- skips.push(next_pos);
+ width += 1;
} else {
break;
}
@@ -901,12 +942,19 @@ fn find_skips_from_snippet(
}
}
}
+
+ width_mappings.push(InnerWidthMapping::new(pos, width, 1));
}
_ => {}
}
}
- (skips, true)
+
+ InputStringKind::Literal { width_mappings }
}
+// Assert a reasonable size for `Piece`
+#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
+rustc_data_structures::static_assert_size!(Piece<'_>, 16);
+
#[cfg(test)]
mod tests;