From d1b2d29528b7794b41e66fc2136e395a02f8529b Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 30 May 2024 05:59:35 +0200 Subject: Merging upstream version 1.73.0+dfsg1. Signed-off-by: Daniel Baumann --- src/tools/rust-analyzer/crates/syntax/Cargo.toml | 2 +- src/tools/rust-analyzer/crates/syntax/rust.ungram | 6 ++ .../crates/syntax/src/ast/edit_in_place.rs | 85 +++++++++++++++-- .../crates/syntax/src/ast/generated/nodes.rs | 30 ++++++ .../rust-analyzer/crates/syntax/src/ast/make.rs | 93 +++++++++++++++++++ .../crates/syntax/src/ast/token_ext.rs | 61 +++++++++--- src/tools/rust-analyzer/crates/syntax/src/lib.rs | 103 +++++++++++++++++++++ .../crates/syntax/src/tests/ast_src.rs | 1 + 8 files changed, 360 insertions(+), 21 deletions(-) (limited to 'src/tools/rust-analyzer/crates/syntax') diff --git a/src/tools/rust-analyzer/crates/syntax/Cargo.toml b/src/tools/rust-analyzer/crates/syntax/Cargo.toml index fb38d25ab..5ee0c4792 100644 --- a/src/tools/rust-analyzer/crates/syntax/Cargo.toml +++ b/src/tools/rust-analyzer/crates/syntax/Cargo.toml @@ -19,7 +19,7 @@ itertools = "0.10.5" rowan = "0.15.11" rustc-hash = "1.1.0" once_cell = "1.17.0" -indexmap = "1.9.1" +indexmap = "2.0.0" smol_str.workspace = true triomphe.workspace = true diff --git a/src/tools/rust-analyzer/crates/syntax/rust.ungram b/src/tools/rust-analyzer/crates/syntax/rust.ungram index b096c9974..138ddd208 100644 --- a/src/tools/rust-analyzer/crates/syntax/rust.ungram +++ b/src/tools/rust-analyzer/crates/syntax/rust.ungram @@ -72,6 +72,12 @@ TokenTree = MacroItems = Item* +MacroEagerInput = + '(' (Expr (',' Expr)* ','?)? ')' +| '{' (Expr (',' Expr)* ','?)? '}' +| '[' (Expr (',' Expr)* ','?)? ']' + + MacroStmts = statements:Stmt* Expr? diff --git a/src/tools/rust-analyzer/crates/syntax/src/ast/edit_in_place.rs b/src/tools/rust-analyzer/crates/syntax/src/ast/edit_in_place.rs index b3ea6ca8d..a150d9e6c 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/ast/edit_in_place.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/ast/edit_in_place.rs @@ -213,6 +213,28 @@ pub trait AttrsOwnerEdit: ast::HasAttrs { } } } + + fn add_attr(&self, attr: ast::Attr) { + add_attr(self.syntax(), attr); + + fn add_attr(node: &SyntaxNode, attr: ast::Attr) { + let indent = IndentLevel::from_node(node); + attr.reindent_to(indent); + + let after_attrs_and_comments = node + .children_with_tokens() + .find(|it| !matches!(it.kind(), WHITESPACE | COMMENT | ATTR)) + .map_or(Position::first_child_of(node), |it| Position::before(it)); + + ted::insert_all( + after_attrs_and_comments, + vec![ + attr.syntax().clone().into(), + make::tokens::whitespace(&format!("\n{indent}")).into(), + ], + ) + } + } } impl AttrsOwnerEdit for T {} @@ -358,6 +380,26 @@ impl Removable for ast::UseTree { } impl ast::UseTree { + /// Deletes the usetree node represented by the input. Recursively removes parents, including use nodes that become empty. + pub fn remove_recursive(self) { + let parent = self.syntax().parent(); + + self.remove(); + + if let Some(u) = parent.clone().and_then(ast::Use::cast) { + if u.use_tree().is_none() { + u.remove(); + } + } else if let Some(u) = parent.and_then(ast::UseTreeList::cast) { + if u.use_trees().next().is_none() { + let parent = u.syntax().parent().and_then(ast::UseTree::cast); + if let Some(u) = parent { + u.remove_recursive(); + } + } + } + } + pub fn get_or_create_use_tree_list(&self) -> ast::UseTreeList { match self.use_tree_list() { Some(it) => it, @@ -465,6 +507,22 @@ impl Removable for ast::Use { } } } + let prev_ws = self + .syntax() + .prev_sibling_or_token() + .and_then(|it| it.into_token()) + .and_then(ast::Whitespace::cast); + if let Some(prev_ws) = prev_ws { + let ws_text = prev_ws.syntax().text(); + let prev_newline = ws_text.rfind('\n').map(|x| x + 1).unwrap_or(0); + let rest = &ws_text[0..prev_newline]; + if rest.is_empty() { + ted::remove(prev_ws.syntax()); + } else { + ted::replace(prev_ws.syntax(), make::tokens::whitespace(rest)); + } + } + ted::remove(self.syntax()); } } @@ -676,12 +734,6 @@ fn get_or_insert_comma_after(syntax: &SyntaxNode) -> SyntaxToken { } } -impl ast::StmtList { - pub fn push_front(&self, statement: ast::Stmt) { - ted::insert(Position::after(self.l_curly_token().unwrap()), statement.syntax()); - } -} - impl ast::VariantList { pub fn add_variant(&self, variant: ast::Variant) { let (indent, position) = match self.variants().last() { @@ -732,6 +784,27 @@ fn normalize_ws_between_braces(node: &SyntaxNode) -> Option<()> { Some(()) } +pub trait HasVisibilityEdit: ast::HasVisibility { + fn set_visibility(&self, visbility: ast::Visibility) { + match self.visibility() { + Some(current_visibility) => { + ted::replace(current_visibility.syntax(), visbility.syntax()) + } + None => { + let vis_before = self + .syntax() + .children_with_tokens() + .find(|it| !matches!(it.kind(), WHITESPACE | COMMENT | ATTR)) + .unwrap_or_else(|| self.syntax().first_child_or_token().unwrap()); + + ted::insert(ted::Position::before(vis_before), visbility.syntax()); + } + } + } +} + +impl HasVisibilityEdit for T {} + pub trait Indent: AstNode + Clone + Sized { fn indent_level(&self) -> IndentLevel { IndentLevel::from_node(self.syntax()) diff --git a/src/tools/rust-analyzer/crates/syntax/src/ast/generated/nodes.rs b/src/tools/rust-analyzer/crates/syntax/src/ast/generated/nodes.rs index e520801ea..0b27faa53 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/ast/generated/nodes.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/ast/generated/nodes.rs @@ -197,6 +197,20 @@ pub struct MacroItems { impl ast::HasModuleItem for MacroItems {} impl MacroItems {} +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct MacroEagerInput { + pub(crate) syntax: SyntaxNode, +} +impl MacroEagerInput { + pub fn l_paren_token(&self) -> Option { support::token(&self.syntax, T!['(']) } + pub fn exprs(&self) -> AstChildren { support::children(&self.syntax) } + pub fn r_paren_token(&self) -> Option { support::token(&self.syntax, T![')']) } + pub fn l_curly_token(&self) -> Option { support::token(&self.syntax, T!['{']) } + pub fn r_curly_token(&self) -> Option { support::token(&self.syntax, T!['}']) } + pub fn l_brack_token(&self) -> Option { support::token(&self.syntax, T!['[']) } + pub fn r_brack_token(&self) -> Option { support::token(&self.syntax, T![']']) } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct MacroStmts { pub(crate) syntax: SyntaxNode, @@ -1922,6 +1936,17 @@ impl AstNode for MacroItems { } fn syntax(&self) -> &SyntaxNode { &self.syntax } } +impl AstNode for MacroEagerInput { + fn can_cast(kind: SyntaxKind) -> bool { kind == MACRO_EAGER_INPUT } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { &self.syntax } +} impl AstNode for MacroStmts { fn can_cast(kind: SyntaxKind) -> bool { kind == MACRO_STMTS } fn cast(syntax: SyntaxNode) -> Option { @@ -4360,6 +4385,11 @@ impl std::fmt::Display for MacroItems { std::fmt::Display::fmt(self.syntax(), f) } } +impl std::fmt::Display for MacroEagerInput { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} impl std::fmt::Display for MacroStmts { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) diff --git a/src/tools/rust-analyzer/crates/syntax/src/ast/make.rs b/src/tools/rust-analyzer/crates/syntax/src/ast/make.rs index 3c2b7e56b..4c6db0ef0 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/ast/make.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/ast/make.rs @@ -10,6 +10,8 @@ //! `parse(format!())` we use internally is an implementation detail -- long //! term, it will be replaced with direct tree manipulation. use itertools::Itertools; +use parser::T; +use rowan::NodeOrToken; use stdx::{format_to, never}; use crate::{ast, utils::is_raw_identifier, AstNode, SourceFile, SyntaxKind, SyntaxToken}; @@ -447,6 +449,21 @@ pub fn block_expr( ast_from_text(&format!("fn f() {buf}")) } +pub fn async_move_block_expr( + stmts: impl IntoIterator, + tail_expr: Option, +) -> ast::BlockExpr { + let mut buf = "async move {\n".to_string(); + for stmt in stmts.into_iter() { + format_to!(buf, " {stmt}\n"); + } + if let Some(tail_expr) = tail_expr { + format_to!(buf, " {tail_expr}\n"); + } + buf += "}"; + ast_from_text(&format!("const _: () = {buf};")) +} + pub fn tail_only_block_expr(tail_expr: ast::Expr) -> ast::BlockExpr { ast_from_text(&format!("fn f() {{ {tail_expr} }}")) } @@ -848,6 +865,36 @@ pub fn param_list( ast_from_text(&list) } +pub fn trait_( + is_unsafe: bool, + ident: &str, + gen_params: Option, + where_clause: Option, + assoc_items: ast::AssocItemList, +) -> ast::Trait { + let mut text = String::new(); + + if is_unsafe { + format_to!(text, "unsafe "); + } + + format_to!(text, "trait {ident}"); + + if let Some(gen_params) = gen_params { + format_to!(text, "{} ", gen_params.to_string()); + } else { + text.push(' '); + } + + if let Some(where_clause) = where_clause { + format_to!(text, "{} ", where_clause.to_string()); + } + + format_to!(text, "{}", assoc_items.to_string()); + + ast_from_text(&text) +} + pub fn type_bound(bound: &str) -> ast::TypeBound { ast_from_text(&format!("fn f() {{ }}")) } @@ -985,6 +1032,41 @@ pub fn struct_( ast_from_text(&format!("{visibility}struct {strukt_name}{type_params}{field_list}{semicolon}",)) } +pub fn attr_outer(meta: ast::Meta) -> ast::Attr { + ast_from_text(&format!("#[{meta}]")) +} + +pub fn attr_inner(meta: ast::Meta) -> ast::Attr { + ast_from_text(&format!("#![{meta}]")) +} + +pub fn meta_expr(path: ast::Path, expr: ast::Expr) -> ast::Meta { + ast_from_text(&format!("#[{path} = {expr}]")) +} + +pub fn meta_token_tree(path: ast::Path, tt: ast::TokenTree) -> ast::Meta { + ast_from_text(&format!("#[{path}{tt}]")) +} + +pub fn meta_path(path: ast::Path) -> ast::Meta { + ast_from_text(&format!("#[{path}]")) +} + +pub fn token_tree( + delimiter: SyntaxKind, + tt: Vec>, +) -> ast::TokenTree { + let (l_delimiter, r_delimiter) = match delimiter { + T!['('] => ('(', ')'), + T!['['] => ('[', ']'), + T!['{'] => ('{', '}'), + _ => panic!("invalid delimiter `{delimiter:?}`"), + }; + let tt = tt.into_iter().join(""); + + ast_from_text(&format!("tt!{l_delimiter}{tt}{r_delimiter}")) +} + #[track_caller] fn ast_from_text(text: &str) -> N { let parse = SourceFile::parse(text); @@ -1022,6 +1104,17 @@ pub mod tokens { ) }); + pub fn semicolon() -> SyntaxToken { + SOURCE_FILE + .tree() + .syntax() + .clone_for_update() + .descendants_with_tokens() + .filter_map(|it| it.into_token()) + .find(|it| it.kind() == SEMICOLON) + .unwrap() + } + pub fn single_space() -> SyntaxToken { SOURCE_FILE .tree() diff --git a/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs b/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs index 090eb89f4..87fd51d70 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs @@ -2,7 +2,9 @@ use std::borrow::Cow; -use rustc_lexer::unescape::{unescape_byte, unescape_char, unescape_literal, Mode}; +use rustc_lexer::unescape::{ + unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, Mode, +}; use crate::{ ast::{self, AstToken}, @@ -146,6 +148,7 @@ impl QuoteOffsets { pub trait IsString: AstToken { const RAW_PREFIX: &'static str; + const MODE: Mode; fn is_raw(&self) -> bool { self.text().starts_with(Self::RAW_PREFIX) } @@ -181,7 +184,7 @@ pub trait IsString: AstToken { let text = &self.text()[text_range_no_quotes - start]; let offset = text_range_no_quotes.start() - start; - unescape_literal(text, Mode::Str, &mut |range, unescaped_char| { + unescape_literal(text, Self::MODE, &mut |range, unescaped_char| { let text_range = TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap()); cb(text_range + offset, unescaped_char); @@ -196,6 +199,7 @@ pub trait IsString: AstToken { impl IsString for ast::String { const RAW_PREFIX: &'static str = "r"; + const MODE: Mode = Mode::Str; } impl ast::String { @@ -213,7 +217,7 @@ impl ast::String { let mut buf = String::new(); let mut prev_end = 0; let mut has_error = false; - unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match ( + unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match ( unescaped_char, buf.capacity() == 0, ) { @@ -239,6 +243,7 @@ impl ast::String { impl IsString for ast::ByteString { const RAW_PREFIX: &'static str = "br"; + const MODE: Mode = Mode::ByteStr; } impl ast::ByteString { @@ -256,7 +261,7 @@ impl ast::ByteString { let mut buf: Vec = Vec::new(); let mut prev_end = 0; let mut has_error = false; - unescape_literal(text, Mode::ByteStr, &mut |char_range, unescaped_char| match ( + unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match ( unescaped_char, buf.capacity() == 0, ) { @@ -282,42 +287,70 @@ impl ast::ByteString { impl IsString for ast::CString { const RAW_PREFIX: &'static str = "cr"; + const MODE: Mode = Mode::CStr; + + fn escaped_char_ranges( + &self, + cb: &mut dyn FnMut(TextRange, Result), + ) { + let text_range_no_quotes = match self.text_range_between_quotes() { + Some(it) => it, + None => return, + }; + + let start = self.syntax().text_range().start(); + let text = &self.text()[text_range_no_quotes - start]; + let offset = text_range_no_quotes.start() - start; + + unescape_c_string(text, Self::MODE, &mut |range, unescaped_char| { + let text_range = + TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap()); + // XXX: This method should only be used for highlighting ranges. The unescaped + // char/byte is not used. For simplicity, we return an arbitrary placeholder char. + cb(text_range + offset, unescaped_char.map(|_| ' ')); + }); + } } impl ast::CString { - pub fn value(&self) -> Option> { + pub fn value(&self) -> Option> { if self.is_raw() { let text = self.text(); let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; - return Some(Cow::Borrowed(text)); + return Some(Cow::Borrowed(text.as_bytes())); } let text = self.text(); let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; - let mut buf = String::new(); + let mut buf = Vec::new(); let mut prev_end = 0; let mut has_error = false; - unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match ( - unescaped_char, + let mut char_buf = [0u8; 4]; + let mut extend_unit = |buf: &mut Vec, unit: CStrUnit| match unit { + CStrUnit::Byte(b) => buf.push(b), + CStrUnit::Char(c) => buf.extend(c.encode_utf8(&mut char_buf).as_bytes()), + }; + unescape_c_string(text, Self::MODE, &mut |char_range, unescaped| match ( + unescaped, buf.capacity() == 0, ) { - (Ok(c), false) => buf.push(c), + (Ok(u), false) => extend_unit(&mut buf, u), (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => { prev_end = char_range.end } - (Ok(c), true) => { + (Ok(u), true) => { buf.reserve_exact(text.len()); - buf.push_str(&text[..prev_end]); - buf.push(c); + buf.extend(text[..prev_end].as_bytes()); + extend_unit(&mut buf, u); } (Err(_), _) => has_error = true, }); match (has_error, buf.capacity() == 0) { (true, _) => None, - (false, true) => Some(Cow::Borrowed(text)), + (false, true) => Some(Cow::Borrowed(text.as_bytes())), (false, false) => Some(Cow::Owned(buf)), } } diff --git a/src/tools/rust-analyzer/crates/syntax/src/lib.rs b/src/tools/rust-analyzer/crates/syntax/src/lib.rs index efbf87966..4cd668a0c 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/lib.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/lib.rs @@ -171,6 +171,109 @@ impl SourceFile { } } +impl ast::TokenTree { + pub fn reparse_as_comma_separated_expr(self) -> Parse { + let tokens = self.syntax().descendants_with_tokens().filter_map(NodeOrToken::into_token); + + let mut parser_input = parser::Input::default(); + let mut was_joint = false; + for t in tokens { + let kind = t.kind(); + if kind.is_trivia() { + was_joint = false + } else { + if kind == SyntaxKind::IDENT { + let token_text = t.text(); + let contextual_kw = SyntaxKind::from_contextual_keyword(token_text) + .unwrap_or(SyntaxKind::IDENT); + parser_input.push_ident(contextual_kw); + } else { + if was_joint { + parser_input.was_joint(); + } + parser_input.push(kind); + // Tag the token as joint if it is float with a fractional part + // we use this jointness to inform the parser about what token split + // event to emit when we encounter a float literal in a field access + if kind == SyntaxKind::FLOAT_NUMBER && !t.text().ends_with('.') { + parser_input.was_joint(); + } + } + was_joint = true; + } + } + + let parser_output = parser::TopEntryPoint::MacroEagerInput.parse(&parser_input); + + let mut tokens = + self.syntax().descendants_with_tokens().filter_map(NodeOrToken::into_token); + let mut text = String::new(); + let mut pos = TextSize::from(0); + let mut builder = SyntaxTreeBuilder::default(); + for event in parser_output.iter() { + match event { + parser::Step::Token { kind, n_input_tokens } => { + let mut token = tokens.next().unwrap(); + while token.kind().is_trivia() { + let text = token.text(); + pos += TextSize::from(text.len() as u32); + builder.token(token.kind(), text); + + token = tokens.next().unwrap(); + } + text.push_str(token.text()); + for _ in 1..n_input_tokens { + let token = tokens.next().unwrap(); + text.push_str(token.text()); + } + + pos += TextSize::from(text.len() as u32); + builder.token(kind, &text); + text.clear(); + } + parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => { + let token = tokens.next().unwrap(); + let text = token.text(); + + match text.split_once('.') { + Some((left, right)) => { + assert!(!left.is_empty()); + builder.start_node(SyntaxKind::NAME_REF); + builder.token(SyntaxKind::INT_NUMBER, left); + builder.finish_node(); + + // here we move the exit up, the original exit has been deleted in process + builder.finish_node(); + + builder.token(SyntaxKind::DOT, "."); + + if has_pseudo_dot { + assert!(right.is_empty(), "{left}.{right}"); + } else { + builder.start_node(SyntaxKind::NAME_REF); + builder.token(SyntaxKind::INT_NUMBER, right); + builder.finish_node(); + + // the parser creates an unbalanced start node, we are required to close it here + builder.finish_node(); + } + } + None => unreachable!(), + } + pos += TextSize::from(text.len() as u32); + } + parser::Step::Enter { kind } => builder.start_node(kind), + parser::Step::Exit => builder.finish_node(), + parser::Step::Error { msg } => builder.error(msg.to_owned(), pos), + } + } + + let (green, errors) = builder.finish_raw(); + + Parse { green, errors: Arc::new(errors), _ty: PhantomData } + } +} + /// Matches a `SyntaxNode` against an `ast` type. /// /// # Example: diff --git a/src/tools/rust-analyzer/crates/syntax/src/tests/ast_src.rs b/src/tools/rust-analyzer/crates/syntax/src/tests/ast_src.rs index c5783b91a..e4db33f1c 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/tests/ast_src.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/tests/ast_src.rs @@ -216,6 +216,7 @@ pub(crate) const KINDS_SRC: KindsSrc<'_> = KindsSrc { // macro related "MACRO_ITEMS", "MACRO_STMTS", + "MACRO_EAGER_INPUT", ], }; -- cgit v1.2.3