summaryrefslogtreecommitdiffstats
path: root/src/tools/rust-analyzer/crates/ide-ssr/src/parsing.rs
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/tools/rust-analyzer/crates/ide-ssr/src/parsing.rs406
1 files changed, 406 insertions, 0 deletions
diff --git a/src/tools/rust-analyzer/crates/ide-ssr/src/parsing.rs b/src/tools/rust-analyzer/crates/ide-ssr/src/parsing.rs
new file mode 100644
index 000000000..f6220b928
--- /dev/null
+++ b/src/tools/rust-analyzer/crates/ide-ssr/src/parsing.rs
@@ -0,0 +1,406 @@
+//! This file contains code for parsing SSR rules, which look something like `foo($a) ==>> bar($b)`.
+//! We first split everything before and after the separator `==>>`. Next, both the search pattern
+//! and the replacement template get tokenized by the Rust tokenizer. Tokens are then searched for
+//! placeholders, which start with `$`. For replacement templates, this is the final form. For
+//! search patterns, we go further and parse the pattern as each kind of thing that we can match.
+//! e.g. expressions, type references etc.
+use ide_db::{FxHashMap, FxHashSet};
+use std::{fmt::Display, str::FromStr};
+use syntax::{SmolStr, SyntaxKind, SyntaxNode, T};
+
+use crate::errors::bail;
+use crate::{fragments, SsrError, SsrPattern, SsrRule};
+
+#[derive(Debug)]
+pub(crate) struct ParsedRule {
+ pub(crate) placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>,
+ pub(crate) pattern: SyntaxNode,
+ pub(crate) template: Option<SyntaxNode>,
+}
+
+#[derive(Debug)]
+pub(crate) struct RawPattern {
+ tokens: Vec<PatternElement>,
+}
+
+// Part of a search or replace pattern.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) enum PatternElement {
+ Token(Token),
+ Placeholder(Placeholder),
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct Placeholder {
+ /// The name of this placeholder. e.g. for "$a", this would be "a"
+ pub(crate) ident: Var,
+ /// A unique name used in place of this placeholder when we parse the pattern as Rust code.
+ stand_in_name: String,
+ pub(crate) constraints: Vec<Constraint>,
+}
+
+/// Represents a `$var` in an SSR query.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub(crate) struct Var(pub(crate) String);
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) enum Constraint {
+ Kind(NodeKind),
+ Not(Box<Constraint>),
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) enum NodeKind {
+ Literal,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(crate) struct Token {
+ kind: SyntaxKind,
+ pub(crate) text: SmolStr,
+}
+
+impl ParsedRule {
+ fn new(
+ pattern: &RawPattern,
+ template: Option<&RawPattern>,
+ ) -> Result<Vec<ParsedRule>, SsrError> {
+ let raw_pattern = pattern.as_rust_code();
+ let raw_template = template.map(|t| t.as_rust_code());
+ let raw_template = raw_template.as_deref();
+ let mut builder = RuleBuilder {
+ placeholders_by_stand_in: pattern.placeholders_by_stand_in(),
+ rules: Vec::new(),
+ };
+
+ let raw_template_stmt = raw_template.map(fragments::stmt);
+ if let raw_template_expr @ Some(Ok(_)) = raw_template.map(fragments::expr) {
+ builder.try_add(fragments::expr(&raw_pattern), raw_template_expr);
+ } else {
+ builder.try_add(fragments::expr(&raw_pattern), raw_template_stmt.clone());
+ }
+ builder.try_add(fragments::ty(&raw_pattern), raw_template.map(fragments::ty));
+ builder.try_add(fragments::item(&raw_pattern), raw_template.map(fragments::item));
+ builder.try_add(fragments::pat(&raw_pattern), raw_template.map(fragments::pat));
+ builder.try_add(fragments::stmt(&raw_pattern), raw_template_stmt);
+ builder.build()
+ }
+}
+
+struct RuleBuilder {
+ placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>,
+ rules: Vec<ParsedRule>,
+}
+
+impl RuleBuilder {
+ fn try_add(
+ &mut self,
+ pattern: Result<SyntaxNode, ()>,
+ template: Option<Result<SyntaxNode, ()>>,
+ ) {
+ match (pattern, template) {
+ (Ok(pattern), Some(Ok(template))) => self.rules.push(ParsedRule {
+ placeholders_by_stand_in: self.placeholders_by_stand_in.clone(),
+ pattern,
+ template: Some(template),
+ }),
+ (Ok(pattern), None) => self.rules.push(ParsedRule {
+ placeholders_by_stand_in: self.placeholders_by_stand_in.clone(),
+ pattern,
+ template: None,
+ }),
+ _ => {}
+ }
+ }
+
+ fn build(mut self) -> Result<Vec<ParsedRule>, SsrError> {
+ if self.rules.is_empty() {
+ bail!("Not a valid Rust expression, type, item, path or pattern");
+ }
+ // If any rules contain paths, then we reject any rules that don't contain paths. Allowing a
+ // mix leads to strange semantics, since the path-based rules only match things where the
+ // path refers to semantically the same thing, whereas the non-path-based rules could match
+ // anything. Specifically, if we have a rule like `foo ==>> bar` we only want to match the
+ // `foo` that is in the current scope, not any `foo`. However "foo" can be parsed as a
+ // pattern (IDENT_PAT -> NAME -> IDENT). Allowing such a rule through would result in
+ // renaming everything called `foo` to `bar`. It'd also be slow, since without a path, we'd
+ // have to use the slow-scan search mechanism.
+ if self.rules.iter().any(|rule| contains_path(&rule.pattern)) {
+ let old_len = self.rules.len();
+ self.rules.retain(|rule| contains_path(&rule.pattern));
+ if self.rules.len() < old_len {
+ cov_mark::hit!(pattern_is_a_single_segment_path);
+ }
+ }
+ Ok(self.rules)
+ }
+}
+
+/// Returns whether there are any paths in `node`.
+fn contains_path(node: &SyntaxNode) -> bool {
+ node.kind() == SyntaxKind::PATH
+ || node.descendants().any(|node| node.kind() == SyntaxKind::PATH)
+}
+
+impl FromStr for SsrRule {
+ type Err = SsrError;
+
+ fn from_str(query: &str) -> Result<SsrRule, SsrError> {
+ let mut it = query.split("==>>");
+ let pattern = it.next().expect("at least empty string").trim();
+ let template = it
+ .next()
+ .ok_or_else(|| SsrError("Cannot find delimiter `==>>`".into()))?
+ .trim()
+ .to_string();
+ if it.next().is_some() {
+ return Err(SsrError("More than one delimiter found".into()));
+ }
+ let raw_pattern = pattern.parse()?;
+ let raw_template = template.parse()?;
+ let parsed_rules = ParsedRule::new(&raw_pattern, Some(&raw_template))?;
+ let rule = SsrRule { pattern: raw_pattern, template: raw_template, parsed_rules };
+ validate_rule(&rule)?;
+ Ok(rule)
+ }
+}
+
+impl FromStr for RawPattern {
+ type Err = SsrError;
+
+ fn from_str(pattern_str: &str) -> Result<RawPattern, SsrError> {
+ Ok(RawPattern { tokens: parse_pattern(pattern_str)? })
+ }
+}
+
+impl RawPattern {
+ /// Returns this search pattern as Rust source code that we can feed to the Rust parser.
+ fn as_rust_code(&self) -> String {
+ let mut res = String::new();
+ for t in &self.tokens {
+ res.push_str(match t {
+ PatternElement::Token(token) => token.text.as_str(),
+ PatternElement::Placeholder(placeholder) => placeholder.stand_in_name.as_str(),
+ });
+ }
+ res
+ }
+
+ pub(crate) fn placeholders_by_stand_in(&self) -> FxHashMap<SmolStr, Placeholder> {
+ let mut res = FxHashMap::default();
+ for t in &self.tokens {
+ if let PatternElement::Placeholder(placeholder) = t {
+ res.insert(SmolStr::new(placeholder.stand_in_name.clone()), placeholder.clone());
+ }
+ }
+ res
+ }
+}
+
+impl FromStr for SsrPattern {
+ type Err = SsrError;
+
+ fn from_str(pattern_str: &str) -> Result<SsrPattern, SsrError> {
+ let raw_pattern = pattern_str.parse()?;
+ let parsed_rules = ParsedRule::new(&raw_pattern, None)?;
+ Ok(SsrPattern { parsed_rules })
+ }
+}
+
+/// Returns `pattern_str`, parsed as a search or replace pattern. If `remove_whitespace` is true,
+/// then any whitespace tokens will be removed, which we do for the search pattern, but not for the
+/// replace pattern.
+fn parse_pattern(pattern_str: &str) -> Result<Vec<PatternElement>, SsrError> {
+ let mut res = Vec::new();
+ let mut placeholder_names = FxHashSet::default();
+ let mut tokens = tokenize(pattern_str)?.into_iter();
+ while let Some(token) = tokens.next() {
+ if token.kind == T![$] {
+ let placeholder = parse_placeholder(&mut tokens)?;
+ if !placeholder_names.insert(placeholder.ident.clone()) {
+ bail!("Placeholder `{}` repeats more than once", placeholder.ident);
+ }
+ res.push(PatternElement::Placeholder(placeholder));
+ } else {
+ res.push(PatternElement::Token(token));
+ }
+ }
+ Ok(res)
+}
+
+/// Checks for errors in a rule. e.g. the replace pattern referencing placeholders that the search
+/// pattern didn't define.
+fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> {
+ let mut defined_placeholders = FxHashSet::default();
+ for p in &rule.pattern.tokens {
+ if let PatternElement::Placeholder(placeholder) = p {
+ defined_placeholders.insert(&placeholder.ident);
+ }
+ }
+ let mut undefined = Vec::new();
+ for p in &rule.template.tokens {
+ if let PatternElement::Placeholder(placeholder) = p {
+ if !defined_placeholders.contains(&placeholder.ident) {
+ undefined.push(placeholder.ident.to_string());
+ }
+ if !placeholder.constraints.is_empty() {
+ bail!("Replacement placeholders cannot have constraints");
+ }
+ }
+ }
+ if !undefined.is_empty() {
+ bail!("Replacement contains undefined placeholders: {}", undefined.join(", "));
+ }
+ Ok(())
+}
+
+fn tokenize(source: &str) -> Result<Vec<Token>, SsrError> {
+ let lexed = parser::LexedStr::new(source);
+ if let Some((_, first_error)) = lexed.errors().next() {
+ bail!("Failed to parse pattern: {}", first_error);
+ }
+ let mut tokens: Vec<Token> = Vec::new();
+ for i in 0..lexed.len() {
+ tokens.push(Token { kind: lexed.kind(i), text: lexed.text(i).into() });
+ }
+ Ok(tokens)
+}
+
+fn parse_placeholder(tokens: &mut std::vec::IntoIter<Token>) -> Result<Placeholder, SsrError> {
+ let mut name = None;
+ let mut constraints = Vec::new();
+ if let Some(token) = tokens.next() {
+ match token.kind {
+ SyntaxKind::IDENT => {
+ name = Some(token.text);
+ }
+ T!['{'] => {
+ let token =
+ tokens.next().ok_or_else(|| SsrError::new("Unexpected end of placeholder"))?;
+ if token.kind == SyntaxKind::IDENT {
+ name = Some(token.text);
+ }
+ loop {
+ let token = tokens
+ .next()
+ .ok_or_else(|| SsrError::new("Placeholder is missing closing brace '}'"))?;
+ match token.kind {
+ T![:] => {
+ constraints.push(parse_constraint(tokens)?);
+ }
+ T!['}'] => break,
+ _ => bail!("Unexpected token while parsing placeholder: '{}'", token.text),
+ }
+ }
+ }
+ _ => {
+ bail!("Placeholders should either be $name or ${{name:constraints}}");
+ }
+ }
+ }
+ let name = name.ok_or_else(|| SsrError::new("Placeholder ($) with no name"))?;
+ Ok(Placeholder::new(name, constraints))
+}
+
+fn parse_constraint(tokens: &mut std::vec::IntoIter<Token>) -> Result<Constraint, SsrError> {
+ let constraint_type = tokens
+ .next()
+ .ok_or_else(|| SsrError::new("Found end of placeholder while looking for a constraint"))?
+ .text
+ .to_string();
+ match constraint_type.as_str() {
+ "kind" => {
+ expect_token(tokens, "(")?;
+ let t = tokens.next().ok_or_else(|| {
+ SsrError::new("Unexpected end of constraint while looking for kind")
+ })?;
+ if t.kind != SyntaxKind::IDENT {
+ bail!("Expected ident, found {:?} while parsing kind constraint", t.kind);
+ }
+ expect_token(tokens, ")")?;
+ Ok(Constraint::Kind(NodeKind::from(&t.text)?))
+ }
+ "not" => {
+ expect_token(tokens, "(")?;
+ let sub = parse_constraint(tokens)?;
+ expect_token(tokens, ")")?;
+ Ok(Constraint::Not(Box::new(sub)))
+ }
+ x => bail!("Unsupported constraint type '{}'", x),
+ }
+}
+
+fn expect_token(tokens: &mut std::vec::IntoIter<Token>, expected: &str) -> Result<(), SsrError> {
+ if let Some(t) = tokens.next() {
+ if t.text == expected {
+ return Ok(());
+ }
+ bail!("Expected {} found {}", expected, t.text);
+ }
+ bail!("Expected {} found end of stream", expected);
+}
+
+impl NodeKind {
+ fn from(name: &SmolStr) -> Result<NodeKind, SsrError> {
+ Ok(match name.as_str() {
+ "literal" => NodeKind::Literal,
+ _ => bail!("Unknown node kind '{}'", name),
+ })
+ }
+}
+
+impl Placeholder {
+ fn new(name: SmolStr, constraints: Vec<Constraint>) -> Self {
+ Self {
+ stand_in_name: format!("__placeholder_{}", name),
+ constraints,
+ ident: Var(name.to_string()),
+ }
+ }
+}
+
+impl Display for Var {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "${}", self.0)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn parser_happy_case() {
+ fn token(kind: SyntaxKind, text: &str) -> PatternElement {
+ PatternElement::Token(Token { kind, text: SmolStr::new(text) })
+ }
+ fn placeholder(name: &str) -> PatternElement {
+ PatternElement::Placeholder(Placeholder::new(SmolStr::new(name), Vec::new()))
+ }
+ let result: SsrRule = "foo($a, $b) ==>> bar($b, $a)".parse().unwrap();
+ assert_eq!(
+ result.pattern.tokens,
+ vec![
+ token(SyntaxKind::IDENT, "foo"),
+ token(T!['('], "("),
+ placeholder("a"),
+ token(T![,], ","),
+ token(SyntaxKind::WHITESPACE, " "),
+ placeholder("b"),
+ token(T![')'], ")"),
+ ]
+ );
+ assert_eq!(
+ result.template.tokens,
+ vec![
+ token(SyntaxKind::IDENT, "bar"),
+ token(T!['('], "("),
+ placeholder("b"),
+ token(T![,], ","),
+ token(SyntaxKind::WHITESPACE, " "),
+ placeholder("a"),
+ token(T![')'], ")"),
+ ]
+ );
+ }
+}