From 218caa410aa38c29984be31a5229b9fa717560ee Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:19:13 +0200 Subject: Merging upstream version 1.68.2+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_parse_format/src/lib.rs | 162 +++++++++++++++++++++------------ 1 file changed, 105 insertions(+), 57 deletions(-) (limited to 'compiler/rustc_parse_format/src/lib.rs') diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 0113eb4e3..7b016cada 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -38,6 +38,31 @@ impl InnerSpan { } } +/// The location and before/after width of a character whose width has changed from its source code +/// representation +#[derive(Copy, Clone, PartialEq, Eq)] +pub struct InnerWidthMapping { + /// Index of the character in the source + pub position: usize, + /// The inner width in characters + pub before: usize, + /// The transformed width in characters + pub after: usize, +} + +impl InnerWidthMapping { + pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping { + InnerWidthMapping { position, before, after } + } +} + +/// Whether the input string is a literal. If yes, it contains the inner width mappings. +#[derive(Clone, PartialEq, Eq)] +enum InputStringKind { + NotALiteral, + Literal { width_mappings: Vec }, +} + /// The type of format string that we are parsing. #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum ParseMode { @@ -58,13 +83,13 @@ impl InnerOffset { /// A piece is a portion of the format string which represents the next part /// to emit. These are emitted as a stream by the `Parser` class. -#[derive(Copy, Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub enum Piece<'a> { /// A literal string which should directly be emitted String(&'a str), /// This describes that formatting should process the next argument (as /// specified inside) for emission. - NextArgument(Argument<'a>), + NextArgument(Box>), } /// Representation of an argument specification. @@ -200,8 +225,8 @@ pub struct Parser<'a> { style: Option, /// Start and end byte offset of every successfully parsed argument pub arg_places: Vec, - /// Characters that need to be shifted - skips: Vec, + /// Characters whose length has been changed from their in-code representation + width_map: Vec, /// Span of the last opening brace seen, used for error reporting last_opening_brace: Option, /// Whether the source string is comes from `println!` as opposed to `format!` or `print!` @@ -224,7 +249,7 @@ impl<'a> Iterator for Parser<'a> { '{' => { let curr_last_brace = self.last_opening_brace; let byte_pos = self.to_span_index(pos); - let lbrace_end = self.to_span_index(pos + 1); + let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos)); self.last_opening_brace = Some(byte_pos.to(lbrace_end)); self.cur.next(); if self.consume('{') { @@ -233,18 +258,21 @@ impl<'a> Iterator for Parser<'a> { Some(String(self.string(pos + 1))) } else { let arg = self.argument(lbrace_end); - if let Some(rbrace_byte_idx) = self.must_consume('}') { - let lbrace_inner_offset = self.to_span_index(pos); - let rbrace_inner_offset = self.to_span_index(rbrace_byte_idx); + if let Some(rbrace_pos) = self.must_consume('}') { if self.is_literal { + let lbrace_byte_pos = self.to_span_index(pos); + let rbrace_byte_pos = self.to_span_index(rbrace_pos); + + let width = self.to_span_width(rbrace_pos); + self.arg_places.push( - lbrace_inner_offset.to(InnerOffset(rbrace_inner_offset.0 + 1)), + lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)), ); } } else { self.suggest_positional_arg_instead_of_captured_arg(arg); } - Some(NextArgument(arg)) + Some(NextArgument(Box::new(arg))) } } '}' => { @@ -285,7 +313,12 @@ impl<'a> Parser<'a> { append_newline: bool, mode: ParseMode, ) -> Parser<'a> { - let (skips, is_literal) = find_skips_from_snippet(snippet, style); + let input_string_kind = find_width_map_from_snippet(snippet, style); + let (width_map, is_literal) = match input_string_kind { + InputStringKind::Literal { width_mappings } => (width_mappings, true), + InputStringKind::NotALiteral => (Vec::new(), false), + }; + Parser { mode, input: s, @@ -294,7 +327,7 @@ impl<'a> Parser<'a> { curarg: 0, style, arg_places: vec![], - skips, + width_map, last_opening_brace: None, append_newline, is_literal, @@ -367,21 +400,34 @@ impl<'a> Parser<'a> { None } + fn remap_pos(&self, mut pos: usize) -> InnerOffset { + for width in &self.width_map { + if pos > width.position { + pos += width.before - width.after; + } else if pos == width.position && width.after == 0 { + pos += width.before; + } else { + break; + } + } + + InnerOffset(pos) + } + fn to_span_index(&self, pos: usize) -> InnerOffset { - let mut pos = pos; // This handles the raw string case, the raw argument is the number of # // in r###"..."### (we need to add one because of the `r`). let raw = self.style.map_or(0, |raw| raw + 1); - for skip in &self.skips { - if pos > *skip { - pos += 1; - } else if pos == *skip && raw == 0 { - pos += 1; - } else { - break; - } + let pos = self.remap_pos(pos); + InnerOffset(raw + pos.0 + 1) + } + + fn to_span_width(&self, pos: usize) -> usize { + let pos = self.remap_pos(pos); + match self.width_map.iter().find(|w| w.position == pos.0) { + Some(w) => w.before, + None => 1, } - InnerOffset(raw + pos + 1) } fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan { @@ -401,7 +447,7 @@ impl<'a> Parser<'a> { Some(pos) } else { let pos = self.to_span_index(pos); - let description = format!("expected `'}}'`, found `{:?}`", maybe); + let description = format!("expected `'}}'`, found `{maybe:?}`"); let label = "expected `}`".to_owned(); let (note, secondary_label) = if c == '}' { ( @@ -425,12 +471,12 @@ impl<'a> Parser<'a> { None } } else { - let description = format!("expected `{:?}` but string was terminated", c); + let description = format!("expected `{c:?}` but string was terminated"); // point at closing `"` let pos = self.input.len() - if self.append_newline { 1 } else { 0 }; let pos = self.to_span_index(pos); if c == '}' { - let label = format!("expected `{:?}`", c); + let label = format!("expected `{c:?}`"); let (note, secondary_label) = if c == '}' { ( Some( @@ -451,7 +497,7 @@ impl<'a> Parser<'a> { should_be_replaced_with_positional_argument: false, }); } else { - self.err(description, format!("expected `{:?}`", c), pos.to(pos)); + self.err(description, format!("expected `{c:?}`"), pos.to(pos)); } None } @@ -809,59 +855,55 @@ impl<'a> Parser<'a> { /// Finds the indices of all characters that have been processed and differ between the actual /// written code (code snippet) and the `InternedString` that gets processed in the `Parser` /// in order to properly synthesise the intra-string `Span`s for error diagnostics. -fn find_skips_from_snippet( +fn find_width_map_from_snippet( snippet: Option, str_style: Option, -) -> (Vec, bool) { +) -> InputStringKind { let snippet = match snippet { Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s, - _ => return (vec![], false), + _ => return InputStringKind::NotALiteral, }; if str_style.is_some() { - return (vec![], true); + return InputStringKind::Literal { width_mappings: Vec::new() }; } let snippet = &snippet[1..snippet.len() - 1]; let mut s = snippet.char_indices(); - let mut skips = vec![]; + let mut width_mappings = vec![]; while let Some((pos, c)) = s.next() { match (c, s.clone().next()) { // skip whitespace and empty lines ending in '\\' - ('\\', Some((next_pos, '\n'))) => { - skips.push(pos); - skips.push(next_pos); + ('\\', Some((_, '\n'))) => { let _ = s.next(); + let mut width = 2; - while let Some((pos, c)) = s.clone().next() { + while let Some((_, c)) = s.clone().next() { if matches!(c, ' ' | '\n' | '\t') { - skips.push(pos); + width += 1; let _ = s.next(); } else { break; } } + + width_mappings.push(InnerWidthMapping::new(pos, width, 0)); } - ('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { - skips.push(next_pos); + ('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { + width_mappings.push(InnerWidthMapping::new(pos, 2, 1)); let _ = s.next(); } ('\\', Some((_, 'x'))) => { - for _ in 0..3 { - // consume `\xAB` literal - if let Some((pos, _)) = s.next() { - skips.push(pos); - } else { - break; - } - } + // consume `\xAB` literal + s.nth(2); + width_mappings.push(InnerWidthMapping::new(pos, 4, 1)); } ('\\', Some((_, 'u'))) => { - if let Some((pos, _)) = s.next() { - skips.push(pos); - } - if let Some((next_pos, next_c)) = s.next() { + let mut width = 2; + let _ = s.next(); + + if let Some((_, next_c)) = s.next() { if next_c == '{' { // consume up to 6 hexanumeric chars let digits_len = @@ -881,19 +923,18 @@ fn find_skips_from_snippet( let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1)); // skip '{' and '}' also - for pos in (next_pos..).take(required_skips + 2) { - skips.push(pos) - } + width += required_skips + 2; s.nth(digits_len); } else if next_c.is_digit(16) { - skips.push(next_pos); + width += 1; + // We suggest adding `{` and `}` when appropriate, accept it here as if // it were correct let mut i = 0; // consume up to 6 hexanumeric chars - while let (Some((next_pos, c)), _) = (s.next(), i < 6) { + while let (Some((_, c)), _) = (s.next(), i < 6) { if c.is_digit(16) { - skips.push(next_pos); + width += 1; } else { break; } @@ -901,12 +942,19 @@ fn find_skips_from_snippet( } } } + + width_mappings.push(InnerWidthMapping::new(pos, width, 1)); } _ => {} } } - (skips, true) + + InputStringKind::Literal { width_mappings } } +// Assert a reasonable size for `Piece` +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(Piece<'_>, 16); + #[cfg(test)] mod tests; -- cgit v1.2.3