summaryrefslogtreecommitdiffstats
path: root/src/tools/rust-analyzer/crates/syntax/src/parsing/reparsing.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/tools/rust-analyzer/crates/syntax/src/parsing/reparsing.rs')
-rw-r--r--src/tools/rust-analyzer/crates/syntax/src/parsing/reparsing.rs441
1 files changed, 441 insertions, 0 deletions
diff --git a/src/tools/rust-analyzer/crates/syntax/src/parsing/reparsing.rs b/src/tools/rust-analyzer/crates/syntax/src/parsing/reparsing.rs
new file mode 100644
index 000000000..701e6232d
--- /dev/null
+++ b/src/tools/rust-analyzer/crates/syntax/src/parsing/reparsing.rs
@@ -0,0 +1,441 @@
+//! Implementation of incremental re-parsing.
+//!
+//! We use two simple strategies for this:
+//! - if the edit modifies only a single token (like changing an identifier's
+//! letter), we replace only this token.
+//! - otherwise, we search for the nearest `{}` block which contains the edit
+//! and try to parse only this block.
+
+use parser::Reparser;
+use text_edit::Indel;
+
+use crate::{
+ parsing::build_tree,
+ syntax_node::{GreenNode, GreenToken, NodeOrToken, SyntaxElement, SyntaxNode},
+ SyntaxError,
+ SyntaxKind::*,
+ TextRange, TextSize, T,
+};
+
+pub(crate) fn incremental_reparse(
+ node: &SyntaxNode,
+ edit: &Indel,
+ errors: Vec<SyntaxError>,
+) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
+ if let Some((green, new_errors, old_range)) = reparse_token(node, edit) {
+ return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
+ }
+
+ if let Some((green, new_errors, old_range)) = reparse_block(node, edit) {
+ return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
+ }
+ None
+}
+
+fn reparse_token(
+ root: &SyntaxNode,
+ edit: &Indel,
+) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
+ let prev_token = root.covering_element(edit.delete).as_token()?.clone();
+ let prev_token_kind = prev_token.kind();
+ match prev_token_kind {
+ WHITESPACE | COMMENT | IDENT | STRING => {
+ if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT {
+ // removing a new line may extends previous token
+ let deleted_range = edit.delete - prev_token.text_range().start();
+ if prev_token.text()[deleted_range].contains('\n') {
+ return None;
+ }
+ }
+
+ let mut new_text = get_text_after_edit(prev_token.clone().into(), edit);
+ let (new_token_kind, new_err) = parser::LexedStr::single_token(&new_text)?;
+
+ if new_token_kind != prev_token_kind
+ || (new_token_kind == IDENT && is_contextual_kw(&new_text))
+ {
+ return None;
+ }
+
+ // Check that edited token is not a part of the bigger token.
+ // E.g. if for source code `bruh"str"` the user removed `ruh`, then
+ // `b` no longer remains an identifier, but becomes a part of byte string literal
+ if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) {
+ new_text.push(next_char);
+ let token_with_next_char = parser::LexedStr::single_token(&new_text);
+ if let Some((_kind, _error)) = token_with_next_char {
+ return None;
+ }
+ new_text.pop();
+ }
+
+ let new_token = GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), &new_text);
+ let range = TextRange::up_to(TextSize::of(&new_text));
+ Some((
+ prev_token.replace_with(new_token),
+ new_err.into_iter().map(|msg| SyntaxError::new(msg, range)).collect(),
+ prev_token.text_range(),
+ ))
+ }
+ _ => None,
+ }
+}
+
+fn reparse_block(
+ root: &SyntaxNode,
+ edit: &Indel,
+) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
+ let (node, reparser) = find_reparsable_node(root, edit.delete)?;
+ let text = get_text_after_edit(node.clone().into(), edit);
+
+ let lexed = parser::LexedStr::new(text.as_str());
+ let parser_input = lexed.to_input();
+ if !is_balanced(&lexed) {
+ return None;
+ }
+
+ let tree_traversal = reparser.parse(&parser_input);
+
+ let (green, new_parser_errors, _eof) = build_tree(lexed, tree_traversal);
+
+ Some((node.replace_with(green), new_parser_errors, node.text_range()))
+}
+
+fn get_text_after_edit(element: SyntaxElement, edit: &Indel) -> String {
+ let edit = Indel::replace(edit.delete - element.text_range().start(), edit.insert.clone());
+
+ let mut text = match element {
+ NodeOrToken::Token(token) => token.text().to_string(),
+ NodeOrToken::Node(node) => node.text().to_string(),
+ };
+ edit.apply(&mut text);
+ text
+}
+
+fn is_contextual_kw(text: &str) -> bool {
+ matches!(text, "auto" | "default" | "union")
+}
+
+fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> {
+ let node = node.covering_element(range);
+
+ node.ancestors().find_map(|node| {
+ let first_child = node.first_child_or_token().map(|it| it.kind());
+ let parent = node.parent().map(|it| it.kind());
+ Reparser::for_node(node.kind(), first_child, parent).map(|r| (node, r))
+ })
+}
+
+fn is_balanced(lexed: &parser::LexedStr<'_>) -> bool {
+ if lexed.is_empty() || lexed.kind(0) != T!['{'] || lexed.kind(lexed.len() - 1) != T!['}'] {
+ return false;
+ }
+ let mut balance = 0usize;
+ for i in 1..lexed.len() - 1 {
+ match lexed.kind(i) {
+ T!['{'] => balance += 1,
+ T!['}'] => {
+ balance = match balance.checked_sub(1) {
+ Some(b) => b,
+ None => return false,
+ }
+ }
+ _ => (),
+ }
+ }
+ balance == 0
+}
+
+fn merge_errors(
+ old_errors: Vec<SyntaxError>,
+ new_errors: Vec<SyntaxError>,
+ range_before_reparse: TextRange,
+ edit: &Indel,
+) -> Vec<SyntaxError> {
+ let mut res = Vec::new();
+
+ for old_err in old_errors {
+ let old_err_range = old_err.range();
+ if old_err_range.end() <= range_before_reparse.start() {
+ res.push(old_err);
+ } else if old_err_range.start() >= range_before_reparse.end() {
+ let inserted_len = TextSize::of(&edit.insert);
+ res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len()));
+ // Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug)
+ }
+ }
+ res.extend(new_errors.into_iter().map(|new_err| {
+ // fighting borrow checker with a variable ;)
+ let offseted_range = new_err.range() + range_before_reparse.start();
+ new_err.with_range(offseted_range)
+ }));
+ res
+}
+
+#[cfg(test)]
+mod tests {
+ use test_utils::{assert_eq_text, extract_range};
+
+ use super::*;
+ use crate::{AstNode, Parse, SourceFile};
+
+ fn do_check(before: &str, replace_with: &str, reparsed_len: u32) {
+ let (range, before) = extract_range(before);
+ let edit = Indel::replace(range, replace_with.to_owned());
+ let after = {
+ let mut after = before.clone();
+ edit.apply(&mut after);
+ after
+ };
+
+ let fully_reparsed = SourceFile::parse(&after);
+ let incrementally_reparsed: Parse<SourceFile> = {
+ let before = SourceFile::parse(&before);
+ let (green, new_errors, range) =
+ incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap();
+ assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length");
+ Parse::new(green, new_errors)
+ };
+
+ assert_eq_text!(
+ &format!("{:#?}", fully_reparsed.tree().syntax()),
+ &format!("{:#?}", incrementally_reparsed.tree().syntax()),
+ );
+ assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors());
+ }
+
+ #[test] // FIXME: some test here actually test token reparsing
+ fn reparse_block_tests() {
+ do_check(
+ r"
+fn foo() {
+ let x = foo + $0bar$0
+}
+",
+ "baz",
+ 3,
+ );
+ do_check(
+ r"
+fn foo() {
+ let x = foo$0 + bar$0
+}
+",
+ "baz",
+ 25,
+ );
+ do_check(
+ r"
+struct Foo {
+ f: foo$0$0
+}
+",
+ ",\n g: (),",
+ 14,
+ );
+ do_check(
+ r"
+fn foo {
+ let;
+ 1 + 1;
+ $092$0;
+}
+",
+ "62",
+ 31, // FIXME: reparse only int literal here
+ );
+ do_check(
+ r"
+mod foo {
+ fn $0$0
+}
+",
+ "bar",
+ 11,
+ );
+
+ do_check(
+ r"
+trait Foo {
+ type $0Foo$0;
+}
+",
+ "Output",
+ 3,
+ );
+ do_check(
+ r"
+impl IntoIterator<Item=i32> for Foo {
+ f$0$0
+}
+",
+ "n next(",
+ 9,
+ );
+ do_check(r"use a::b::{foo,$0,bar$0};", "baz", 10);
+ do_check(
+ r"
+pub enum A {
+ Foo$0$0
+}
+",
+ "\nBar;\n",
+ 11,
+ );
+ do_check(
+ r"
+foo!{a, b$0$0 d}
+",
+ ", c[3]",
+ 8,
+ );
+ do_check(
+ r"
+fn foo() {
+ vec![$0$0]
+}
+",
+ "123",
+ 14,
+ );
+ do_check(
+ r"
+extern {
+ fn$0;$0
+}
+",
+ " exit(code: c_int)",
+ 11,
+ );
+ }
+
+ #[test]
+ fn reparse_token_tests() {
+ do_check(
+ r"$0$0
+fn foo() -> i32 { 1 }
+",
+ "\n\n\n \n",
+ 1,
+ );
+ do_check(
+ r"
+fn foo() -> $0$0 {}
+",
+ " \n",
+ 2,
+ );
+ do_check(
+ r"
+fn $0foo$0() -> i32 { 1 }
+",
+ "bar",
+ 3,
+ );
+ do_check(
+ r"
+fn foo$0$0foo() { }
+",
+ "bar",
+ 6,
+ );
+ do_check(
+ r"
+fn foo /* $0$0 */ () {}
+",
+ "some comment",
+ 6,
+ );
+ do_check(
+ r"
+fn baz $0$0 () {}
+",
+ " \t\t\n\n",
+ 2,
+ );
+ do_check(
+ r"
+fn baz $0$0 () {}
+",
+ " \t\t\n\n",
+ 2,
+ );
+ do_check(
+ r"
+/// foo $0$0omment
+mod { }
+",
+ "c",
+ 14,
+ );
+ do_check(
+ r#"
+fn -> &str { "Hello$0$0" }
+"#,
+ ", world",
+ 7,
+ );
+ do_check(
+ r#"
+fn -> &str { // "Hello$0$0"
+"#,
+ ", world",
+ 10,
+ );
+ do_check(
+ r##"
+fn -> &str { r#"Hello$0$0"#
+"##,
+ ", world",
+ 10,
+ );
+ do_check(
+ r"
+#[derive($0Copy$0)]
+enum Foo {
+
+}
+",
+ "Clone",
+ 4,
+ );
+ }
+
+ #[test]
+ fn reparse_str_token_with_error_unchanged() {
+ do_check(r#""$0Unclosed$0 string literal"#, "Still unclosed", 24);
+ }
+
+ #[test]
+ fn reparse_str_token_with_error_fixed() {
+ do_check(r#""unterinated$0$0"#, "\"", 12);
+ }
+
+ #[test]
+ fn reparse_block_with_error_in_middle_unchanged() {
+ do_check(
+ r#"fn main() {
+ if {}
+ 32 + 4$0$0
+ return
+ if {}
+ }"#,
+ "23",
+ 105,
+ )
+ }
+
+ #[test]
+ fn reparse_block_with_error_in_middle_fixed() {
+ do_check(
+ r#"fn main() {
+ if {}
+ 32 + 4$0$0
+ return
+ if {}
+ }"#,
+ ";",
+ 105,
+ )
+ }
+}