diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:59:35 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:59:35 +0000 |
commit | d1b2d29528b7794b41e66fc2136e395a02f8529b (patch) | |
tree | a4a17504b260206dec3cf55b2dca82929a348ac2 /src/tools/rust-analyzer/crates/mbe | |
parent | Releasing progress-linux version 1.72.1+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.tar.xz rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.zip |
Merging upstream version 1.73.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/tools/rust-analyzer/crates/mbe')
8 files changed, 278 insertions, 42 deletions
diff --git a/src/tools/rust-analyzer/crates/mbe/src/benchmark.rs b/src/tools/rust-analyzer/crates/mbe/src/benchmark.rs index d28dd17de..9d43e1304 100644 --- a/src/tools/rust-analyzer/crates/mbe/src/benchmark.rs +++ b/src/tools/rust-analyzer/crates/mbe/src/benchmark.rs @@ -20,10 +20,7 @@ fn benchmark_parse_macro_rules() { let rules = macro_rules_fixtures_tt(); let hash: usize = { let _pt = bench("mbe parse macro rules"); - rules - .values() - .map(|it| DeclarativeMacro::parse_macro_rules(it, true).unwrap().rules.len()) - .sum() + rules.values().map(|it| DeclarativeMacro::parse_macro_rules(it, true).rules.len()).sum() }; assert_eq!(hash, 1144); } @@ -41,7 +38,7 @@ fn benchmark_expand_macro_rules() { invocations .into_iter() .map(|(id, tt)| { - let res = rules[&id].expand(&tt); + let res = rules[&id].expand(tt); assert!(res.err.is_none()); res.value.token_trees.len() }) @@ -53,7 +50,7 @@ fn benchmark_expand_macro_rules() { fn macro_rules_fixtures() -> FxHashMap<String, DeclarativeMacro> { macro_rules_fixtures_tt() .into_iter() - .map(|(id, tt)| (id, DeclarativeMacro::parse_macro_rules(&tt, true).unwrap())) + .map(|(id, tt)| (id, DeclarativeMacro::parse_macro_rules(&tt, true))) .collect() } @@ -105,7 +102,7 @@ fn invocation_fixtures(rules: &FxHashMap<String, DeclarativeMacro>) -> Vec<(Stri for op in rule.lhs.iter() { collect_from_op(op, &mut subtree, &mut seed); } - if it.expand(&subtree).err.is_none() { + if it.expand(subtree.clone()).err.is_none() { res.push((name.clone(), subtree)); break; } diff --git a/src/tools/rust-analyzer/crates/mbe/src/expander.rs b/src/tools/rust-analyzer/crates/mbe/src/expander.rs index 8e2181e97..f2d89d3ef 100644 --- a/src/tools/rust-analyzer/crates/mbe/src/expander.rs +++ b/src/tools/rust-analyzer/crates/mbe/src/expander.rs @@ -123,4 +123,14 @@ enum Fragment { /// proc-macro delimiter=none. As we later discovered, "none" delimiters are /// tricky to handle in the parser, and rustc doesn't handle those either. Expr(tt::TokenTree), + /// There are roughly two types of paths: paths in expression context, where a + /// separator `::` between an identifier and its following generic argument list + /// is mandatory, and paths in type context, where `::` can be omitted. + /// + /// Unlike rustc, we need to transform the parsed fragments back into tokens + /// during transcription. When the matched path fragment is a type-context path + /// and is trasncribed as an expression-context path, verbatim transcription + /// would cause a syntax error. We need to fix it up just before transcribing; + /// see `transcriber::fix_up_and_push_path_tt()`. + Path(tt::TokenTree), } diff --git a/src/tools/rust-analyzer/crates/mbe/src/expander/matcher.rs b/src/tools/rust-analyzer/crates/mbe/src/expander/matcher.rs index 474826079..1471af98b 100644 --- a/src/tools/rust-analyzer/crates/mbe/src/expander/matcher.rs +++ b/src/tools/rust-analyzer/crates/mbe/src/expander/matcher.rs @@ -742,7 +742,11 @@ fn match_meta_var( is_2021: bool, ) -> ExpandResult<Option<Fragment>> { let fragment = match kind { - MetaVarKind::Path => parser::PrefixEntryPoint::Path, + MetaVarKind::Path => { + return input + .expect_fragment(parser::PrefixEntryPoint::Path) + .map(|it| it.map(Fragment::Path)); + } MetaVarKind::Ty => parser::PrefixEntryPoint::Ty, MetaVarKind::Pat if is_2021 => parser::PrefixEntryPoint::PatTop, MetaVarKind::Pat => parser::PrefixEntryPoint::Pat, @@ -771,7 +775,7 @@ fn match_meta_var( .expect_fragment(parser::PrefixEntryPoint::Expr) .map(|tt| tt.map(Fragment::Expr)); } - _ => { + MetaVarKind::Ident | MetaVarKind::Tt | MetaVarKind::Lifetime | MetaVarKind::Literal => { let tt_result = match kind { MetaVarKind::Ident => input .expect_ident() @@ -799,7 +803,7 @@ fn match_meta_var( }) .map_err(|()| ExpandError::binding_error("expected literal")) } - _ => Err(ExpandError::UnexpectedToken), + _ => unreachable!(), }; return tt_result.map(|it| Some(Fragment::Tokens(it))).into(); } @@ -884,7 +888,7 @@ impl<'a> Iterator for OpDelimitedIter<'a> { } } -impl<'a> TtIter<'a> { +impl TtIter<'_> { fn expect_separator(&mut self, separator: &Separator) -> bool { let mut fork = self.clone(); let ok = match separator { diff --git a/src/tools/rust-analyzer/crates/mbe/src/expander/transcriber.rs b/src/tools/rust-analyzer/crates/mbe/src/expander/transcriber.rs index 6161af185..cdac2f1e3 100644 --- a/src/tools/rust-analyzer/crates/mbe/src/expander/transcriber.rs +++ b/src/tools/rust-analyzer/crates/mbe/src/expander/transcriber.rs @@ -400,7 +400,8 @@ fn push_fragment(buf: &mut Vec<tt::TokenTree>, fragment: Fragment) { } buf.push(tt.into()) } - Fragment::Tokens(tt) | Fragment::Expr(tt) => buf.push(tt), + Fragment::Path(tt::TokenTree::Subtree(tt)) => fix_up_and_push_path_tt(buf, tt), + Fragment::Tokens(tt) | Fragment::Expr(tt) | Fragment::Path(tt) => buf.push(tt), } } @@ -411,6 +412,45 @@ fn push_subtree(buf: &mut Vec<tt::TokenTree>, tt: tt::Subtree) { } } +/// Inserts the path separator `::` between an identifier and its following generic +/// argument list, and then pushes into the buffer. See [`Fragment::Path`] for why +/// we need this fixup. +fn fix_up_and_push_path_tt(buf: &mut Vec<tt::TokenTree>, subtree: tt::Subtree) { + stdx::always!(matches!(subtree.delimiter.kind, tt::DelimiterKind::Invisible)); + let mut prev_was_ident = false; + // Note that we only need to fix up the top-level `TokenTree`s because the + // context of the paths in the descendant `Subtree`s won't be changed by the + // mbe transcription. + for tt in subtree.token_trees { + if prev_was_ident { + // Pedantically, `(T) -> U` in `FnOnce(T) -> U` is treated as a generic + // argument list and thus needs `::` between it and `FnOnce`. However in + // today's Rust this type of path *semantically* cannot appear as a + // top-level expression-context path, so we can safely ignore it. + if let tt::TokenTree::Leaf(tt::Leaf::Punct(tt::Punct { char: '<', .. })) = tt { + buf.push( + tt::Leaf::Punct(tt::Punct { + char: ':', + spacing: tt::Spacing::Joint, + span: tt::Span::unspecified(), + }) + .into(), + ); + buf.push( + tt::Leaf::Punct(tt::Punct { + char: ':', + spacing: tt::Spacing::Alone, + span: tt::Span::unspecified(), + }) + .into(), + ); + } + } + prev_was_ident = matches!(tt, tt::TokenTree::Leaf(tt::Leaf::Ident(_))); + buf.push(tt); + } +} + /// Handles `${count(t, depth)}`. `our_depth` is the recursion depth and `count_depth` is the depth /// defined by the metavar expression. fn count( diff --git a/src/tools/rust-analyzer/crates/mbe/src/lib.rs b/src/tools/rust-analyzer/crates/mbe/src/lib.rs index 5ef20ff8a..9d886a1c9 100644 --- a/src/tools/rust-analyzer/crates/mbe/src/lib.rs +++ b/src/tools/rust-analyzer/crates/mbe/src/lib.rs @@ -28,13 +28,13 @@ use crate::{ tt_iter::TtIter, }; -// FIXME: we probably should re-think `token_tree_to_syntax_node` interfaces pub use self::tt::{Delimiter, DelimiterKind, Punct}; pub use ::parser::TopEntryPoint; pub use crate::{ syntax_bridge::{ - parse_exprs_with_sep, parse_to_token_tree, syntax_node_to_token_tree, + parse_exprs_with_sep, parse_to_token_tree, syntax_node_to_token_map, + syntax_node_to_token_map_with_modifications, syntax_node_to_token_tree, syntax_node_to_token_tree_with_modifications, token_tree_to_syntax_node, SyntheticToken, SyntheticTokenId, }, @@ -131,6 +131,7 @@ pub struct DeclarativeMacro { // This is used for correctly determining the behavior of the pat fragment // FIXME: This should be tracked by hygiene of the fragment identifier! is_2021: bool, + err: Option<Box<ParseError>>, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -206,81 +207,118 @@ impl Shift { } } -#[derive(Debug, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum Origin { Def, Call, } impl DeclarativeMacro { + pub fn from_err(err: ParseError, is_2021: bool) -> DeclarativeMacro { + DeclarativeMacro { + rules: Box::default(), + shift: Shift(0), + is_2021, + err: Some(Box::new(err)), + } + } + /// The old, `macro_rules! m {}` flavor. - pub fn parse_macro_rules( - tt: &tt::Subtree, - is_2021: bool, - ) -> Result<DeclarativeMacro, ParseError> { + pub fn parse_macro_rules(tt: &tt::Subtree, is_2021: bool) -> DeclarativeMacro { // Note: this parsing can be implemented using mbe machinery itself, by // matching against `$($lhs:tt => $rhs:tt);*` pattern, but implementing // manually seems easier. let mut src = TtIter::new(tt); let mut rules = Vec::new(); + let mut err = None; + while src.len() > 0 { - let rule = Rule::parse(&mut src, true)?; + let rule = match Rule::parse(&mut src, true) { + Ok(it) => it, + Err(e) => { + err = Some(Box::new(e)); + break; + } + }; rules.push(rule); if let Err(()) = src.expect_char(';') { if src.len() > 0 { - return Err(ParseError::expected("expected `;`")); + err = Some(Box::new(ParseError::expected("expected `;`"))); } break; } } for Rule { lhs, .. } in &rules { - validate(lhs)?; + if let Err(e) = validate(lhs) { + err = Some(Box::new(e)); + break; + } } - Ok(DeclarativeMacro { rules: rules.into_boxed_slice(), shift: Shift::new(tt), is_2021 }) + DeclarativeMacro { rules: rules.into_boxed_slice(), shift: Shift::new(tt), is_2021, err } } /// The new, unstable `macro m {}` flavor. - pub fn parse_macro2(tt: &tt::Subtree, is_2021: bool) -> Result<DeclarativeMacro, ParseError> { + pub fn parse_macro2(tt: &tt::Subtree, is_2021: bool) -> DeclarativeMacro { let mut src = TtIter::new(tt); let mut rules = Vec::new(); + let mut err = None; if tt::DelimiterKind::Brace == tt.delimiter.kind { cov_mark::hit!(parse_macro_def_rules); while src.len() > 0 { - let rule = Rule::parse(&mut src, true)?; + let rule = match Rule::parse(&mut src, true) { + Ok(it) => it, + Err(e) => { + err = Some(Box::new(e)); + break; + } + }; rules.push(rule); if let Err(()) = src.expect_any_char(&[';', ',']) { if src.len() > 0 { - return Err(ParseError::expected("expected `;` or `,` to delimit rules")); + err = Some(Box::new(ParseError::expected( + "expected `;` or `,` to delimit rules", + ))); } break; } } } else { cov_mark::hit!(parse_macro_def_simple); - let rule = Rule::parse(&mut src, false)?; - if src.len() != 0 { - return Err(ParseError::expected("remaining tokens in macro def")); + match Rule::parse(&mut src, false) { + Ok(rule) => { + if src.len() != 0 { + err = Some(Box::new(ParseError::expected("remaining tokens in macro def"))); + } + rules.push(rule); + } + Err(e) => { + err = Some(Box::new(e)); + } } - rules.push(rule); } for Rule { lhs, .. } in &rules { - validate(lhs)?; + if let Err(e) = validate(lhs) { + err = Some(Box::new(e)); + break; + } } - Ok(DeclarativeMacro { rules: rules.into_boxed_slice(), shift: Shift::new(tt), is_2021 }) + DeclarativeMacro { rules: rules.into_boxed_slice(), shift: Shift::new(tt), is_2021, err } } - pub fn expand(&self, tt: &tt::Subtree) -> ExpandResult<tt::Subtree> { - // apply shift - let mut tt = tt.clone(); + pub fn expand(&self, mut tt: tt::Subtree) -> ExpandResult<tt::Subtree> { self.shift.shift_all(&mut tt); expander::expand_rules(&self.rules, &tt, self.is_2021) } + pub fn err(&self) -> Option<&ParseError> { + self.err.as_deref() + } + pub fn map_id_down(&self, id: tt::TokenId) -> tt::TokenId { self.shift.shift(id) } diff --git a/src/tools/rust-analyzer/crates/mbe/src/syntax_bridge.rs b/src/tools/rust-analyzer/crates/mbe/src/syntax_bridge.rs index 8cbf0f8fc..62b2accf5 100644 --- a/src/tools/rust-analyzer/crates/mbe/src/syntax_bridge.rs +++ b/src/tools/rust-analyzer/crates/mbe/src/syntax_bridge.rs @@ -53,6 +53,37 @@ pub fn syntax_node_to_token_tree_with_modifications( (subtree, c.id_alloc.map, c.id_alloc.next_id) } +/// Convert the syntax node to a `TokenTree` (what macro +/// will consume). +pub fn syntax_node_to_token_map(node: &SyntaxNode) -> TokenMap { + syntax_node_to_token_map_with_modifications( + node, + Default::default(), + 0, + Default::default(), + Default::default(), + ) + .0 +} + +/// Convert the syntax node to a `TokenTree` (what macro will consume) +/// with the censored range excluded. +pub fn syntax_node_to_token_map_with_modifications( + node: &SyntaxNode, + existing_token_map: TokenMap, + next_id: u32, + replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, + append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, +) -> (TokenMap, u32) { + let global_offset = node.text_range().start(); + let mut c = Converter::new(node, global_offset, existing_token_map, next_id, replace, append); + collect_tokens(&mut c); + c.id_alloc.map.shrink_to_fit(); + always!(c.replace.is_empty(), "replace: {:?}", c.replace); + always!(c.append.is_empty(), "append: {:?}", c.append); + (c.id_alloc.map, c.id_alloc.next_id) +} + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct SyntheticTokenId(pub u32); @@ -327,6 +358,111 @@ fn convert_tokens<C: TokenConverter>(conv: &mut C) -> tt::Subtree { } } +fn collect_tokens<C: TokenConverter>(conv: &mut C) { + struct StackEntry { + idx: usize, + open_range: TextRange, + delimiter: tt::DelimiterKind, + } + + let entry = StackEntry { + delimiter: tt::DelimiterKind::Invisible, + // never used (delimiter is `None`) + idx: !0, + open_range: TextRange::empty(TextSize::of('.')), + }; + let mut stack = NonEmptyVec::new(entry); + + loop { + let StackEntry { delimiter, .. } = stack.last_mut(); + let (token, range) = match conv.bump() { + Some(it) => it, + None => break, + }; + let synth_id = token.synthetic_id(conv); + + let kind = token.kind(conv); + if kind == COMMENT { + // Since `convert_doc_comment` can fail, we need to peek the next id, so that we can + // figure out which token id to use for the doc comment, if it is converted successfully. + let next_id = conv.id_alloc().peek_next_id(); + if let Some(_tokens) = conv.convert_doc_comment(&token, next_id) { + let id = conv.id_alloc().alloc(range, synth_id); + debug_assert_eq!(id, next_id); + } + continue; + } + if kind.is_punct() && kind != UNDERSCORE { + if synth_id.is_none() { + assert_eq!(range.len(), TextSize::of('.')); + } + + let expected = match delimiter { + tt::DelimiterKind::Parenthesis => Some(T![')']), + tt::DelimiterKind::Brace => Some(T!['}']), + tt::DelimiterKind::Bracket => Some(T![']']), + tt::DelimiterKind::Invisible => None, + }; + + if let Some(expected) = expected { + if kind == expected { + if let Some(entry) = stack.pop() { + conv.id_alloc().close_delim(entry.idx, Some(range)); + } + continue; + } + } + + let delim = match kind { + T!['('] => Some(tt::DelimiterKind::Parenthesis), + T!['{'] => Some(tt::DelimiterKind::Brace), + T!['['] => Some(tt::DelimiterKind::Bracket), + _ => None, + }; + + if let Some(kind) = delim { + let (_id, idx) = conv.id_alloc().open_delim(range, synth_id); + + stack.push(StackEntry { idx, open_range: range, delimiter: kind }); + continue; + } + + conv.id_alloc().alloc(range, synth_id); + } else { + macro_rules! make_leaf { + ($i:ident) => {{ + conv.id_alloc().alloc(range, synth_id); + }}; + } + match kind { + T![true] | T![false] => make_leaf!(Ident), + IDENT => make_leaf!(Ident), + UNDERSCORE => make_leaf!(Ident), + k if k.is_keyword() => make_leaf!(Ident), + k if k.is_literal() => make_leaf!(Literal), + LIFETIME_IDENT => { + let char_unit = TextSize::of('\''); + let r = TextRange::at(range.start(), char_unit); + conv.id_alloc().alloc(r, synth_id); + + let r = TextRange::at(range.start() + char_unit, range.len() - char_unit); + conv.id_alloc().alloc(r, synth_id); + continue; + } + _ => continue, + }; + }; + + // If we get here, we've consumed all input tokens. + // We might have more than one subtree in the stack, if the delimiters are improperly balanced. + // Merge them so we're left with one. + while let Some(entry) = stack.pop() { + conv.id_alloc().close_delim(entry.idx, None); + conv.id_alloc().alloc(entry.open_range, None); + } + } +} + fn is_single_token_op(kind: SyntaxKind) -> bool { matches!( kind, @@ -509,12 +645,12 @@ trait TokenConverter: Sized { fn id_alloc(&mut self) -> &mut TokenIdAlloc; } -impl<'a> SrcToken<RawConverter<'a>> for usize { - fn kind(&self, ctx: &RawConverter<'a>) -> SyntaxKind { +impl SrcToken<RawConverter<'_>> for usize { + fn kind(&self, ctx: &RawConverter<'_>) -> SyntaxKind { ctx.lexed.kind(*self) } - fn to_char(&self, ctx: &RawConverter<'a>) -> Option<char> { + fn to_char(&self, ctx: &RawConverter<'_>) -> Option<char> { ctx.lexed.text(*self).chars().next() } @@ -522,12 +658,12 @@ impl<'a> SrcToken<RawConverter<'a>> for usize { ctx.lexed.text(*self).into() } - fn synthetic_id(&self, _ctx: &RawConverter<'a>) -> Option<SyntheticTokenId> { + fn synthetic_id(&self, _ctx: &RawConverter<'_>) -> Option<SyntheticTokenId> { None } } -impl<'a> TokenConverter for RawConverter<'a> { +impl TokenConverter for RawConverter<'_> { type Token = usize; fn convert_doc_comment(&self, &token: &usize, span: tt::TokenId) -> Option<Vec<tt::TokenTree>> { @@ -800,7 +936,7 @@ fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> { Some(&texts[idx..texts.len() - (1 - idx)]) } -impl<'a> TtTreeSink<'a> { +impl TtTreeSink<'_> { /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween. /// This occurs when a float literal is used as a field access. fn float_split(&mut self, has_pseudo_dot: bool) { diff --git a/src/tools/rust-analyzer/crates/mbe/src/token_map.rs b/src/tools/rust-analyzer/crates/mbe/src/token_map.rs index c923e7a69..73a27df5d 100644 --- a/src/tools/rust-analyzer/crates/mbe/src/token_map.rs +++ b/src/tools/rust-analyzer/crates/mbe/src/token_map.rs @@ -110,4 +110,15 @@ impl TokenMap { // FIXME: This could be accidentally quadratic self.entries.remove(idx); } + + pub fn entries(&self) -> impl Iterator<Item = (tt::TokenId, TextRange)> + '_ { + self.entries.iter().filter_map(|&(tid, tr)| match tr { + TokenTextRange::Token(range) => Some((tid, range)), + TokenTextRange::Delimiter(_) => None, + }) + } + + pub fn filter(&mut self, id: impl Fn(tt::TokenId) -> bool) { + self.entries.retain(|&(tid, _)| id(tid)); + } } diff --git a/src/tools/rust-analyzer/crates/mbe/src/tt_iter.rs b/src/tools/rust-analyzer/crates/mbe/src/tt_iter.rs index 59dbf1568..79ff8ca28 100644 --- a/src/tools/rust-analyzer/crates/mbe/src/tt_iter.rs +++ b/src/tools/rust-analyzer/crates/mbe/src/tt_iter.rs @@ -197,4 +197,4 @@ impl<'a> Iterator for TtIter<'a> { } } -impl<'a> std::iter::ExactSizeIterator for TtIter<'a> {} +impl std::iter::ExactSizeIterator for TtIter<'_> {} |