diff options
Diffstat (limited to 'src/tools/rust-analyzer/crates/parser/src')
29 files changed, 6575 insertions, 0 deletions
diff --git a/src/tools/rust-analyzer/crates/parser/src/event.rs b/src/tools/rust-analyzer/crates/parser/src/event.rs new file mode 100644 index 000000000..b0e70e794 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/event.rs @@ -0,0 +1,133 @@ +//! This module provides a way to construct a `File`. +//! It is intended to be completely decoupled from the +//! parser, so as to allow to evolve the tree representation +//! and the parser algorithm independently. +//! +//! The `TreeSink` trait is the bridge between the parser and the +//! tree builder: the parser produces a stream of events like +//! `start node`, `finish node`, and `FileBuilder` converts +//! this stream to a real tree. +use std::mem; + +use crate::{ + output::Output, + SyntaxKind::{self, *}, +}; + +/// `Parser` produces a flat list of `Event`s. +/// They are converted to a tree-structure in +/// a separate pass, via `TreeBuilder`. +#[derive(Debug)] +pub(crate) enum Event { + /// This event signifies the start of the node. + /// It should be either abandoned (in which case the + /// `kind` is `TOMBSTONE`, and the event is ignored), + /// or completed via a `Finish` event. + /// + /// All tokens between a `Start` and a `Finish` would + /// become the children of the respective node. + /// + /// For left-recursive syntactic constructs, the parser produces + /// a child node before it sees a parent. `forward_parent` + /// saves the position of current event's parent. + /// + /// Consider this path + /// + /// foo::bar + /// + /// The events for it would look like this: + /// + /// ```text + /// START(PATH) IDENT('foo') FINISH START(PATH) T![::] IDENT('bar') FINISH + /// | /\ + /// | | + /// +------forward-parent------+ + /// ``` + /// + /// And the tree would look like this + /// + /// ```text + /// +--PATH---------+ + /// | | | + /// | | | + /// | '::' 'bar' + /// | + /// PATH + /// | + /// 'foo' + /// ``` + /// + /// See also `CompletedMarker::precede`. + Start { + kind: SyntaxKind, + forward_parent: Option<u32>, + }, + + /// Complete the previous `Start` event + Finish, + + /// Produce a single leaf-element. + /// `n_raw_tokens` is used to glue complex contextual tokens. + /// For example, lexer tokenizes `>>` as `>`, `>`, and + /// `n_raw_tokens = 2` is used to produced a single `>>`. + Token { + kind: SyntaxKind, + n_raw_tokens: u8, + }, + + Error { + msg: String, + }, +} + +impl Event { + pub(crate) fn tombstone() -> Self { + Event::Start { kind: TOMBSTONE, forward_parent: None } + } +} + +/// Generate the syntax tree with the control of events. +pub(super) fn process(mut events: Vec<Event>) -> Output { + let mut res = Output::default(); + let mut forward_parents = Vec::new(); + + for i in 0..events.len() { + match mem::replace(&mut events[i], Event::tombstone()) { + Event::Start { kind, forward_parent } => { + // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, + // in the normal control flow, the parent-child relation: `A -> B -> C`, + // while with the magic forward_parent, it writes: `C <- B <- A`. + + // append `A` into parents. + forward_parents.push(kind); + let mut idx = i; + let mut fp = forward_parent; + while let Some(fwd) = fp { + idx += fwd as usize; + // append `A`'s forward_parent `B` + fp = match mem::replace(&mut events[idx], Event::tombstone()) { + Event::Start { kind, forward_parent } => { + forward_parents.push(kind); + forward_parent + } + _ => unreachable!(), + }; + // append `B`'s forward_parent `C` in the next stage. + } + + for kind in forward_parents.drain(..).rev() { + if kind != TOMBSTONE { + res.enter_node(kind); + } + } + } + Event::Finish => res.leave_node(), + Event::Token { kind, n_raw_tokens } => { + res.token(kind, n_raw_tokens); + } + Event::Error { msg } => res.error(msg), + } + } + + res +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar.rs b/src/tools/rust-analyzer/crates/parser/src/grammar.rs new file mode 100644 index 000000000..b74683296 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar.rs @@ -0,0 +1,342 @@ +//! This is the actual "grammar" of the Rust language. +//! +//! Each function in this module and its children corresponds +//! to a production of the formal grammar. Submodules roughly +//! correspond to different *areas* of the grammar. By convention, +//! each submodule starts with `use super::*` import and exports +//! "public" productions via `pub(super)`. +//! +//! See docs for [`Parser`](super::parser::Parser) to learn about API, +//! available to the grammar, and see docs for [`Event`](super::event::Event) +//! to learn how this actually manages to produce parse trees. +//! +//! Code in this module also contains inline tests, which start with +//! `// test name-of-the-test` comment and look like this: +//! +//! ``` +//! // test function_with_zero_parameters +//! // fn foo() {} +//! ``` +//! +//! After adding a new inline-test, run `cargo test -p xtask` to +//! extract it as a standalone text-fixture into +//! `crates/syntax/test_data/parser/`, and run `cargo test` once to +//! create the "gold" value. +//! +//! Coding convention: rules like `where_clause` always produce either a +//! node or an error, rules like `opt_where_clause` may produce nothing. +//! Non-opt rules typically start with `assert!(p.at(FIRST_TOKEN))`, the +//! caller is responsible for branching on the first token. + +mod attributes; +mod expressions; +mod items; +mod params; +mod paths; +mod patterns; +mod generic_args; +mod generic_params; +mod types; + +use crate::{ + parser::{CompletedMarker, Marker, Parser}, + SyntaxKind::{self, *}, + TokenSet, T, +}; + +pub(crate) mod entry { + use super::*; + + pub(crate) mod prefix { + use super::*; + + pub(crate) fn vis(p: &mut Parser<'_>) { + let _ = opt_visibility(p, false); + } + + pub(crate) fn block(p: &mut Parser<'_>) { + expressions::block_expr(p); + } + + pub(crate) fn stmt(p: &mut Parser<'_>) { + expressions::stmt(p, expressions::Semicolon::Forbidden); + } + + pub(crate) fn pat(p: &mut Parser<'_>) { + patterns::pattern_single(p); + } + + pub(crate) fn ty(p: &mut Parser<'_>) { + types::type_(p); + } + pub(crate) fn expr(p: &mut Parser<'_>) { + let _ = expressions::expr(p); + } + pub(crate) fn path(p: &mut Parser<'_>) { + let _ = paths::type_path(p); + } + pub(crate) fn item(p: &mut Parser<'_>) { + items::item_or_macro(p, true); + } + // Parse a meta item , which excluded [], e.g : #[ MetaItem ] + pub(crate) fn meta_item(p: &mut Parser<'_>) { + attributes::meta(p); + } + } + + pub(crate) mod top { + use super::*; + + pub(crate) fn source_file(p: &mut Parser<'_>) { + let m = p.start(); + p.eat(SHEBANG); + items::mod_contents(p, false); + m.complete(p, SOURCE_FILE); + } + + pub(crate) fn macro_stmts(p: &mut Parser<'_>) { + let m = p.start(); + + while !p.at(EOF) { + expressions::stmt(p, expressions::Semicolon::Optional); + } + + m.complete(p, MACRO_STMTS); + } + + pub(crate) fn macro_items(p: &mut Parser<'_>) { + let m = p.start(); + items::mod_contents(p, false); + m.complete(p, MACRO_ITEMS); + } + + pub(crate) fn pattern(p: &mut Parser<'_>) { + let m = p.start(); + patterns::pattern_top(p); + if p.at(EOF) { + m.abandon(p); + return; + } + while !p.at(EOF) { + p.bump_any(); + } + m.complete(p, ERROR); + } + + pub(crate) fn type_(p: &mut Parser<'_>) { + let m = p.start(); + types::type_(p); + if p.at(EOF) { + m.abandon(p); + return; + } + while !p.at(EOF) { + p.bump_any(); + } + m.complete(p, ERROR); + } + + pub(crate) fn expr(p: &mut Parser<'_>) { + let m = p.start(); + expressions::expr(p); + if p.at(EOF) { + m.abandon(p); + return; + } + while !p.at(EOF) { + p.bump_any(); + } + m.complete(p, ERROR); + } + + pub(crate) fn meta_item(p: &mut Parser<'_>) { + let m = p.start(); + attributes::meta(p); + if p.at(EOF) { + m.abandon(p); + return; + } + while !p.at(EOF) { + p.bump_any(); + } + m.complete(p, ERROR); + } + } +} + +pub(crate) fn reparser( + node: SyntaxKind, + first_child: Option<SyntaxKind>, + parent: Option<SyntaxKind>, +) -> Option<fn(&mut Parser<'_>)> { + let res = match node { + BLOCK_EXPR => expressions::block_expr, + RECORD_FIELD_LIST => items::record_field_list, + RECORD_EXPR_FIELD_LIST => items::record_expr_field_list, + VARIANT_LIST => items::variant_list, + MATCH_ARM_LIST => items::match_arm_list, + USE_TREE_LIST => items::use_tree_list, + EXTERN_ITEM_LIST => items::extern_item_list, + TOKEN_TREE if first_child? == T!['{'] => items::token_tree, + ASSOC_ITEM_LIST => match parent? { + IMPL | TRAIT => items::assoc_item_list, + _ => return None, + }, + ITEM_LIST => items::item_list, + _ => return None, + }; + Some(res) +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum BlockLike { + Block, + NotBlock, +} + +impl BlockLike { + fn is_block(self) -> bool { + self == BlockLike::Block + } +} + +fn opt_visibility(p: &mut Parser<'_>, in_tuple_field: bool) -> bool { + match p.current() { + T![pub] => { + let m = p.start(); + p.bump(T![pub]); + if p.at(T!['(']) { + match p.nth(1) { + // test crate_visibility + // pub(crate) struct S; + // pub(self) struct S; + // pub(super) struct S; + + // test pub_parens_typepath + // struct B(pub (super::A)); + // struct B(pub (crate::A,)); + T![crate] | T![self] | T![super] | T![ident] if p.nth(2) != T![:] => { + // If we are in a tuple struct, then the parens following `pub` + // might be an tuple field, not part of the visibility. So in that + // case we don't want to consume an identifier. + + // test pub_tuple_field + // struct MyStruct(pub (u32, u32)); + if !(in_tuple_field && matches!(p.nth(1), T![ident])) { + p.bump(T!['(']); + paths::use_path(p); + p.expect(T![')']); + } + } + // test crate_visibility_in + // pub(in super::A) struct S; + // pub(in crate) struct S; + T![in] => { + p.bump(T!['(']); + p.bump(T![in]); + paths::use_path(p); + p.expect(T![')']); + } + _ => (), + } + } + m.complete(p, VISIBILITY); + true + } + // test crate_keyword_vis + // crate fn main() { } + // struct S { crate field: u32 } + // struct T(crate u32); + T![crate] => { + if p.nth_at(1, T![::]) { + // test crate_keyword_path + // fn foo() { crate::foo(); } + return false; + } + let m = p.start(); + p.bump(T![crate]); + m.complete(p, VISIBILITY); + true + } + _ => false, + } +} + +fn opt_rename(p: &mut Parser<'_>) { + if p.at(T![as]) { + let m = p.start(); + p.bump(T![as]); + if !p.eat(T![_]) { + name(p); + } + m.complete(p, RENAME); + } +} + +fn abi(p: &mut Parser<'_>) { + assert!(p.at(T![extern])); + let abi = p.start(); + p.bump(T![extern]); + p.eat(STRING); + abi.complete(p, ABI); +} + +fn opt_ret_type(p: &mut Parser<'_>) -> bool { + if p.at(T![->]) { + let m = p.start(); + p.bump(T![->]); + types::type_no_bounds(p); + m.complete(p, RET_TYPE); + true + } else { + false + } +} + +fn name_r(p: &mut Parser<'_>, recovery: TokenSet) { + if p.at(IDENT) { + let m = p.start(); + p.bump(IDENT); + m.complete(p, NAME); + } else { + p.err_recover("expected a name", recovery); + } +} + +fn name(p: &mut Parser<'_>) { + name_r(p, TokenSet::EMPTY); +} + +fn name_ref(p: &mut Parser<'_>) { + if p.at(IDENT) { + let m = p.start(); + p.bump(IDENT); + m.complete(p, NAME_REF); + } else { + p.err_and_bump("expected identifier"); + } +} + +fn name_ref_or_index(p: &mut Parser<'_>) { + assert!(p.at(IDENT) || p.at(INT_NUMBER)); + let m = p.start(); + p.bump_any(); + m.complete(p, NAME_REF); +} + +fn lifetime(p: &mut Parser<'_>) { + assert!(p.at(LIFETIME_IDENT)); + let m = p.start(); + p.bump(LIFETIME_IDENT); + m.complete(p, LIFETIME); +} + +fn error_block(p: &mut Parser<'_>, message: &str) { + assert!(p.at(T!['{'])); + let m = p.start(); + p.error(message); + p.bump(T!['{']); + expressions::expr_block_contents(p); + p.eat(T!['}']); + m.complete(p, ERROR); +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/attributes.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/attributes.rs new file mode 100644 index 000000000..0cf6a16f8 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/attributes.rs @@ -0,0 +1,53 @@ +use super::*; + +pub(super) fn inner_attrs(p: &mut Parser<'_>) { + while p.at(T![#]) && p.nth(1) == T![!] { + attr(p, true); + } +} + +pub(super) fn outer_attrs(p: &mut Parser<'_>) { + while p.at(T![#]) { + attr(p, false); + } +} + +fn attr(p: &mut Parser<'_>, inner: bool) { + assert!(p.at(T![#])); + + let attr = p.start(); + p.bump(T![#]); + + if inner { + p.bump(T![!]); + } + + if p.eat(T!['[']) { + meta(p); + + if !p.eat(T![']']) { + p.error("expected `]`"); + } + } else { + p.error("expected `[`"); + } + attr.complete(p, ATTR); +} + +pub(super) fn meta(p: &mut Parser<'_>) { + let meta = p.start(); + paths::use_path(p); + + match p.current() { + T![=] => { + p.bump(T![=]); + if !expressions::expr(p) { + p.error("expected expression"); + } + } + T!['('] | T!['['] | T!['{'] => items::token_tree(p), + _ => {} + } + + meta.complete(p, META); +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/expressions.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/expressions.rs new file mode 100644 index 000000000..e7402104e --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/expressions.rs @@ -0,0 +1,625 @@ +mod atom; + +use super::*; + +pub(crate) use self::atom::{block_expr, match_arm_list}; +pub(super) use self::atom::{literal, LITERAL_FIRST}; + +#[derive(PartialEq, Eq)] +pub(super) enum Semicolon { + Required, + Optional, + Forbidden, +} + +const EXPR_FIRST: TokenSet = LHS_FIRST; + +pub(super) fn expr(p: &mut Parser<'_>) -> bool { + let r = Restrictions { forbid_structs: false, prefer_stmt: false }; + expr_bp(p, None, r, 1).is_some() +} + +pub(super) fn expr_stmt( + p: &mut Parser<'_>, + m: Option<Marker>, +) -> Option<(CompletedMarker, BlockLike)> { + let r = Restrictions { forbid_structs: false, prefer_stmt: true }; + expr_bp(p, m, r, 1) +} + +fn expr_no_struct(p: &mut Parser<'_>) { + let r = Restrictions { forbid_structs: true, prefer_stmt: false }; + expr_bp(p, None, r, 1); +} + +/// Parses the expression in `let pattern = expression`. +/// It needs to be parsed with lower precedence than `&&`, so that +/// `if let true = true && false` is parsed as `if (let true = true) && (true)` +/// and not `if let true = (true && true)`. +fn expr_let(p: &mut Parser<'_>) { + let r = Restrictions { forbid_structs: true, prefer_stmt: false }; + expr_bp(p, None, r, 5); +} + +pub(super) fn stmt(p: &mut Parser<'_>, semicolon: Semicolon) { + if p.eat(T![;]) { + return; + } + + let m = p.start(); + // test attr_on_expr_stmt + // fn foo() { + // #[A] foo(); + // #[B] bar!{} + // #[C] #[D] {} + // #[D] return (); + // } + attributes::outer_attrs(p); + + if p.at(T![let]) { + let_stmt(p, m, semicolon); + return; + } + + // test block_items + // fn a() { fn b() {} } + let m = match items::opt_item(p, m) { + Ok(()) => return, + Err(m) => m, + }; + + if let Some((cm, blocklike)) = expr_stmt(p, Some(m)) { + if !(p.at(T!['}']) || (semicolon != Semicolon::Required && p.at(EOF))) { + // test no_semi_after_block + // fn foo() { + // if true {} + // loop {} + // match () {} + // while true {} + // for _ in () {} + // {} + // {} + // macro_rules! test { + // () => {} + // } + // test!{} + // } + let m = cm.precede(p); + match semicolon { + Semicolon::Required => { + if blocklike.is_block() { + p.eat(T![;]); + } else { + p.expect(T![;]); + } + } + Semicolon::Optional => { + p.eat(T![;]); + } + Semicolon::Forbidden => (), + } + m.complete(p, EXPR_STMT); + } + } + + // test let_stmt + // fn f() { let x: i32 = 92; } + fn let_stmt(p: &mut Parser<'_>, m: Marker, with_semi: Semicolon) { + p.bump(T![let]); + patterns::pattern(p); + if p.at(T![:]) { + // test let_stmt_ascription + // fn f() { let x: i32; } + types::ascription(p); + } + if p.eat(T![=]) { + // test let_stmt_init + // fn f() { let x = 92; } + expressions::expr(p); + } + + if p.at(T![else]) { + // test let_else + // fn f() { let Some(x) = opt else { return }; } + + let m = p.start(); + p.bump(T![else]); + block_expr(p); + m.complete(p, LET_ELSE); + } + + match with_semi { + Semicolon::Forbidden => (), + Semicolon::Optional => { + p.eat(T![;]); + } + Semicolon::Required => { + p.expect(T![;]); + } + } + m.complete(p, LET_STMT); + } +} + +pub(super) fn expr_block_contents(p: &mut Parser<'_>) { + attributes::inner_attrs(p); + + while !p.at(EOF) && !p.at(T!['}']) { + // test nocontentexpr + // fn foo(){ + // ;;;some_expr();;;;{;;;};;;;Ok(()) + // } + + // test nocontentexpr_after_item + // fn simple_function() { + // enum LocalEnum { + // One, + // Two, + // }; + // fn f() {}; + // struct S {}; + // } + stmt(p, Semicolon::Required); + } +} + +#[derive(Clone, Copy)] +struct Restrictions { + forbid_structs: bool, + prefer_stmt: bool, +} + +/// Binding powers of operators for a Pratt parser. +/// +/// See <https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html> +#[rustfmt::skip] +fn current_op(p: &Parser<'_>) -> (u8, SyntaxKind) { + const NOT_AN_OP: (u8, SyntaxKind) = (0, T![@]); + match p.current() { + T![|] if p.at(T![||]) => (3, T![||]), + T![|] if p.at(T![|=]) => (1, T![|=]), + T![|] => (6, T![|]), + T![>] if p.at(T![>>=]) => (1, T![>>=]), + T![>] if p.at(T![>>]) => (9, T![>>]), + T![>] if p.at(T![>=]) => (5, T![>=]), + T![>] => (5, T![>]), + T![=] if p.at(T![=>]) => NOT_AN_OP, + T![=] if p.at(T![==]) => (5, T![==]), + T![=] => (1, T![=]), + T![<] if p.at(T![<=]) => (5, T![<=]), + T![<] if p.at(T![<<=]) => (1, T![<<=]), + T![<] if p.at(T![<<]) => (9, T![<<]), + T![<] => (5, T![<]), + T![+] if p.at(T![+=]) => (1, T![+=]), + T![+] => (10, T![+]), + T![^] if p.at(T![^=]) => (1, T![^=]), + T![^] => (7, T![^]), + T![%] if p.at(T![%=]) => (1, T![%=]), + T![%] => (11, T![%]), + T![&] if p.at(T![&=]) => (1, T![&=]), + // If you update this, remember to update `expr_let()` too. + T![&] if p.at(T![&&]) => (4, T![&&]), + T![&] => (8, T![&]), + T![/] if p.at(T![/=]) => (1, T![/=]), + T![/] => (11, T![/]), + T![*] if p.at(T![*=]) => (1, T![*=]), + T![*] => (11, T![*]), + T![.] if p.at(T![..=]) => (2, T![..=]), + T![.] if p.at(T![..]) => (2, T![..]), + T![!] if p.at(T![!=]) => (5, T![!=]), + T![-] if p.at(T![-=]) => (1, T![-=]), + T![-] => (10, T![-]), + T![as] => (12, T![as]), + + _ => NOT_AN_OP + } +} + +// Parses expression with binding power of at least bp. +fn expr_bp( + p: &mut Parser<'_>, + m: Option<Marker>, + mut r: Restrictions, + bp: u8, +) -> Option<(CompletedMarker, BlockLike)> { + let m = m.unwrap_or_else(|| { + let m = p.start(); + attributes::outer_attrs(p); + m + }); + let mut lhs = match lhs(p, r) { + Some((lhs, blocklike)) => { + let lhs = lhs.extend_to(p, m); + if r.prefer_stmt && blocklike.is_block() { + // test stmt_bin_expr_ambiguity + // fn f() { + // let _ = {1} & 2; + // {1} &2; + // } + return Some((lhs, BlockLike::Block)); + } + lhs + } + None => { + m.abandon(p); + return None; + } + }; + + loop { + let is_range = p.at(T![..]) || p.at(T![..=]); + let (op_bp, op) = current_op(p); + if op_bp < bp { + break; + } + // test as_precedence + // fn f() { let _ = &1 as *const i32; } + if p.at(T![as]) { + lhs = cast_expr(p, lhs); + continue; + } + let m = lhs.precede(p); + p.bump(op); + + // test binop_resets_statementness + // fn f() { v = {1}&2; } + r = Restrictions { prefer_stmt: false, ..r }; + + if is_range { + // test postfix_range + // fn foo() { + // let x = 1..; + // match 1.. { _ => () }; + // match a.b()..S { _ => () }; + // } + let has_trailing_expression = + p.at_ts(EXPR_FIRST) && !(r.forbid_structs && p.at(T!['{'])); + if !has_trailing_expression { + // no RHS + lhs = m.complete(p, RANGE_EXPR); + break; + } + } + + expr_bp(p, None, Restrictions { prefer_stmt: false, ..r }, op_bp + 1); + lhs = m.complete(p, if is_range { RANGE_EXPR } else { BIN_EXPR }); + } + Some((lhs, BlockLike::NotBlock)) +} + +const LHS_FIRST: TokenSet = + atom::ATOM_EXPR_FIRST.union(TokenSet::new(&[T![&], T![*], T![!], T![.], T![-]])); + +fn lhs(p: &mut Parser<'_>, r: Restrictions) -> Option<(CompletedMarker, BlockLike)> { + let m; + let kind = match p.current() { + // test ref_expr + // fn foo() { + // // reference operator + // let _ = &1; + // let _ = &mut &f(); + // let _ = &raw; + // let _ = &raw.0; + // // raw reference operator + // let _ = &raw mut foo; + // let _ = &raw const foo; + // } + T![&] => { + m = p.start(); + p.bump(T![&]); + if p.at_contextual_kw(T![raw]) && (p.nth_at(1, T![mut]) || p.nth_at(1, T![const])) { + p.bump_remap(T![raw]); + p.bump_any(); + } else { + p.eat(T![mut]); + } + REF_EXPR + } + // test unary_expr + // fn foo() { + // **&1; + // !!true; + // --1; + // } + T![*] | T![!] | T![-] => { + m = p.start(); + p.bump_any(); + PREFIX_EXPR + } + _ => { + // test full_range_expr + // fn foo() { xs[..]; } + for op in [T![..=], T![..]] { + if p.at(op) { + m = p.start(); + p.bump(op); + if p.at_ts(EXPR_FIRST) && !(r.forbid_structs && p.at(T!['{'])) { + expr_bp(p, None, r, 2); + } + let cm = m.complete(p, RANGE_EXPR); + return Some((cm, BlockLike::NotBlock)); + } + } + + // test expression_after_block + // fn foo() { + // let mut p = F{x: 5}; + // {p}.x = 10; + // } + let (lhs, blocklike) = atom::atom_expr(p, r)?; + let (cm, block_like) = + postfix_expr(p, lhs, blocklike, !(r.prefer_stmt && blocklike.is_block())); + return Some((cm, block_like)); + } + }; + // parse the interior of the unary expression + expr_bp(p, None, r, 255); + let cm = m.complete(p, kind); + Some((cm, BlockLike::NotBlock)) +} + +fn postfix_expr( + p: &mut Parser<'_>, + mut lhs: CompletedMarker, + // Calls are disallowed if the type is a block and we prefer statements because the call cannot be disambiguated from a tuple + // E.g. `while true {break}();` is parsed as + // `while true {break}; ();` + mut block_like: BlockLike, + mut allow_calls: bool, +) -> (CompletedMarker, BlockLike) { + loop { + lhs = match p.current() { + // test stmt_postfix_expr_ambiguity + // fn foo() { + // match () { + // _ => {} + // () => {} + // [] => {} + // } + // } + T!['('] if allow_calls => call_expr(p, lhs), + T!['['] if allow_calls => index_expr(p, lhs), + T![.] => match postfix_dot_expr(p, lhs) { + Ok(it) => it, + Err(it) => { + lhs = it; + break; + } + }, + T![?] => try_expr(p, lhs), + _ => break, + }; + allow_calls = true; + block_like = BlockLike::NotBlock; + } + return (lhs, block_like); + + fn postfix_dot_expr( + p: &mut Parser<'_>, + lhs: CompletedMarker, + ) -> Result<CompletedMarker, CompletedMarker> { + assert!(p.at(T![.])); + if p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::])) { + return Ok(method_call_expr(p, lhs)); + } + + // test await_expr + // fn foo() { + // x.await; + // x.0.await; + // x.0().await?.hello(); + // } + if p.nth(1) == T![await] { + let m = lhs.precede(p); + p.bump(T![.]); + p.bump(T![await]); + return Ok(m.complete(p, AWAIT_EXPR)); + } + + if p.at(T![..=]) || p.at(T![..]) { + return Err(lhs); + } + + Ok(field_expr(p, lhs)) + } +} + +// test call_expr +// fn foo() { +// let _ = f(); +// let _ = f()(1)(1, 2,); +// let _ = f(<Foo>::func()); +// f(<Foo as Trait>::func()); +// } +fn call_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { + assert!(p.at(T!['('])); + let m = lhs.precede(p); + arg_list(p); + m.complete(p, CALL_EXPR) +} + +// test index_expr +// fn foo() { +// x[1][2]; +// } +fn index_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { + assert!(p.at(T!['['])); + let m = lhs.precede(p); + p.bump(T!['[']); + expr(p); + p.expect(T![']']); + m.complete(p, INDEX_EXPR) +} + +// test method_call_expr +// fn foo() { +// x.foo(); +// y.bar::<T>(1, 2,); +// } +fn method_call_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { + assert!(p.at(T![.]) && p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::]))); + let m = lhs.precede(p); + p.bump_any(); + name_ref(p); + generic_args::opt_generic_arg_list(p, true); + if p.at(T!['(']) { + arg_list(p); + } + m.complete(p, METHOD_CALL_EXPR) +} + +// test field_expr +// fn foo() { +// x.foo; +// x.0.bar; +// x.0(); +// } +fn field_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { + assert!(p.at(T![.])); + let m = lhs.precede(p); + p.bump(T![.]); + if p.at(IDENT) || p.at(INT_NUMBER) { + name_ref_or_index(p); + } else if p.at(FLOAT_NUMBER) { + // FIXME: How to recover and instead parse INT + T![.]? + p.bump_any(); + } else { + p.error("expected field name or number"); + } + m.complete(p, FIELD_EXPR) +} + +// test try_expr +// fn foo() { +// x?; +// } +fn try_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { + assert!(p.at(T![?])); + let m = lhs.precede(p); + p.bump(T![?]); + m.complete(p, TRY_EXPR) +} + +// test cast_expr +// fn foo() { +// 82 as i32; +// 81 as i8 + 1; +// 79 as i16 - 1; +// 0x36 as u8 <= 0x37; +// } +fn cast_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { + assert!(p.at(T![as])); + let m = lhs.precede(p); + p.bump(T![as]); + // Use type_no_bounds(), because cast expressions are not + // allowed to have bounds. + types::type_no_bounds(p); + m.complete(p, CAST_EXPR) +} + +fn arg_list(p: &mut Parser<'_>) { + assert!(p.at(T!['('])); + let m = p.start(); + p.bump(T!['(']); + while !p.at(T![')']) && !p.at(EOF) { + // test arg_with_attr + // fn main() { + // foo(#[attr] 92) + // } + if !expr(p) { + break; + } + if !p.at(T![')']) && !p.expect(T![,]) { + break; + } + } + p.eat(T![')']); + m.complete(p, ARG_LIST); +} + +// test path_expr +// fn foo() { +// let _ = a; +// let _ = a::b; +// let _ = ::a::<b>; +// let _ = format!(); +// } +fn path_expr(p: &mut Parser<'_>, r: Restrictions) -> (CompletedMarker, BlockLike) { + assert!(paths::is_path_start(p)); + let m = p.start(); + paths::expr_path(p); + match p.current() { + T!['{'] if !r.forbid_structs => { + record_expr_field_list(p); + (m.complete(p, RECORD_EXPR), BlockLike::NotBlock) + } + T![!] if !p.at(T![!=]) => { + let block_like = items::macro_call_after_excl(p); + (m.complete(p, MACRO_CALL).precede(p).complete(p, MACRO_EXPR), block_like) + } + _ => (m.complete(p, PATH_EXPR), BlockLike::NotBlock), + } +} + +// test record_lit +// fn foo() { +// S {}; +// S { x, y: 32, }; +// S { x, y: 32, ..Default::default() }; +// TupleStruct { 0: 1 }; +// } +pub(crate) fn record_expr_field_list(p: &mut Parser<'_>) { + assert!(p.at(T!['{'])); + let m = p.start(); + p.bump(T!['{']); + while !p.at(EOF) && !p.at(T!['}']) { + let m = p.start(); + // test record_literal_field_with_attr + // fn main() { + // S { #[cfg(test)] field: 1 } + // } + attributes::outer_attrs(p); + + match p.current() { + IDENT | INT_NUMBER => { + // test_err record_literal_before_ellipsis_recovery + // fn main() { + // S { field ..S::default() } + // } + if p.nth_at(1, T![:]) || p.nth_at(1, T![..]) { + name_ref_or_index(p); + p.expect(T![:]); + } + expr(p); + m.complete(p, RECORD_EXPR_FIELD); + } + T![.] if p.at(T![..]) => { + m.abandon(p); + p.bump(T![..]); + + // test destructuring_assignment_struct_rest_pattern + // fn foo() { + // S { .. } = S {}; + // } + + // We permit `.. }` on the left-hand side of a destructuring assignment. + if !p.at(T!['}']) { + expr(p); + } + } + T!['{'] => { + error_block(p, "expected a field"); + m.abandon(p); + } + _ => { + p.err_and_bump("expected identifier"); + m.abandon(p); + } + } + if !p.at(T!['}']) { + p.expect(T![,]); + } + } + p.expect(T!['}']); + m.complete(p, RECORD_EXPR_FIELD_LIST); +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/expressions/atom.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/expressions/atom.rs new file mode 100644 index 000000000..99f42a266 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/expressions/atom.rs @@ -0,0 +1,643 @@ +use super::*; + +// test expr_literals +// fn foo() { +// let _ = true; +// let _ = false; +// let _ = 1; +// let _ = 2.0; +// let _ = b'a'; +// let _ = 'b'; +// let _ = "c"; +// let _ = r"d"; +// let _ = b"e"; +// let _ = br"f"; +// } +pub(crate) const LITERAL_FIRST: TokenSet = TokenSet::new(&[ + T![true], + T![false], + INT_NUMBER, + FLOAT_NUMBER, + BYTE, + CHAR, + STRING, + BYTE_STRING, +]); + +pub(crate) fn literal(p: &mut Parser<'_>) -> Option<CompletedMarker> { + if !p.at_ts(LITERAL_FIRST) { + return None; + } + let m = p.start(); + p.bump_any(); + Some(m.complete(p, LITERAL)) +} + +// E.g. for after the break in `if break {}`, this should not match +pub(super) const ATOM_EXPR_FIRST: TokenSet = + LITERAL_FIRST.union(paths::PATH_FIRST).union(TokenSet::new(&[ + T!['('], + T!['{'], + T!['['], + T![|], + T![move], + T![box], + T![if], + T![while], + T![match], + T![unsafe], + T![return], + T![yield], + T![break], + T![continue], + T![async], + T![try], + T![const], + T![loop], + T![for], + LIFETIME_IDENT, + ])); + +const EXPR_RECOVERY_SET: TokenSet = TokenSet::new(&[T![let]]); + +pub(super) fn atom_expr( + p: &mut Parser<'_>, + r: Restrictions, +) -> Option<(CompletedMarker, BlockLike)> { + if let Some(m) = literal(p) { + return Some((m, BlockLike::NotBlock)); + } + if paths::is_path_start(p) { + return Some(path_expr(p, r)); + } + let la = p.nth(1); + let done = match p.current() { + T!['('] => tuple_expr(p), + T!['['] => array_expr(p), + T![if] => if_expr(p), + T![let] => let_expr(p), + T![_] => { + // test destructuring_assignment_wildcard_pat + // fn foo() { + // _ = 1; + // Some(_) = None; + // } + let m = p.start(); + p.bump(T![_]); + m.complete(p, UNDERSCORE_EXPR) + } + T![loop] => loop_expr(p, None), + T![box] => box_expr(p, None), + T![while] => while_expr(p, None), + T![try] => try_block_expr(p, None), + T![match] => match_expr(p), + T![return] => return_expr(p), + T![yield] => yield_expr(p), + T![continue] => continue_expr(p), + T![break] => break_expr(p, r), + + LIFETIME_IDENT if la == T![:] => { + let m = p.start(); + label(p); + match p.current() { + T![loop] => loop_expr(p, Some(m)), + T![for] => for_expr(p, Some(m)), + T![while] => while_expr(p, Some(m)), + // test labeled_block + // fn f() { 'label: {}; } + T!['{'] => { + stmt_list(p); + m.complete(p, BLOCK_EXPR) + } + _ => { + // test_err misplaced_label_err + // fn main() { + // 'loop: impl + // } + p.error("expected a loop"); + m.complete(p, ERROR); + return None; + } + } + } + // test effect_blocks + // fn f() { unsafe { } } + // fn f() { const { } } + // fn f() { async { } } + // fn f() { async move { } } + T![const] | T![unsafe] | T![async] if la == T!['{'] => { + let m = p.start(); + p.bump_any(); + stmt_list(p); + m.complete(p, BLOCK_EXPR) + } + T![async] if la == T![move] && p.nth(2) == T!['{'] => { + let m = p.start(); + p.bump(T![async]); + p.eat(T![move]); + stmt_list(p); + m.complete(p, BLOCK_EXPR) + } + T!['{'] => { + // test for_range_from + // fn foo() { + // for x in 0 .. { + // break; + // } + // } + let m = p.start(); + stmt_list(p); + m.complete(p, BLOCK_EXPR) + } + + T![static] | T![async] | T![move] | T![|] => closure_expr(p), + T![for] if la == T![<] => closure_expr(p), + T![for] => for_expr(p, None), + + _ => { + p.err_recover("expected expression", EXPR_RECOVERY_SET); + return None; + } + }; + let blocklike = match done.kind() { + IF_EXPR | WHILE_EXPR | FOR_EXPR | LOOP_EXPR | MATCH_EXPR | BLOCK_EXPR => BlockLike::Block, + _ => BlockLike::NotBlock, + }; + Some((done, blocklike)) +} + +// test tuple_expr +// fn foo() { +// (); +// (1); +// (1,); +// } +fn tuple_expr(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T!['('])); + let m = p.start(); + p.expect(T!['(']); + + let mut saw_comma = false; + let mut saw_expr = false; + while !p.at(EOF) && !p.at(T![')']) { + saw_expr = true; + + // test tuple_attrs + // const A: (i64, i64) = (1, #[cfg(test)] 2); + if !expr(p) { + break; + } + + if !p.at(T![')']) { + saw_comma = true; + p.expect(T![,]); + } + } + p.expect(T![')']); + m.complete(p, if saw_expr && !saw_comma { PAREN_EXPR } else { TUPLE_EXPR }) +} + +// test array_expr +// fn foo() { +// []; +// [1]; +// [1, 2,]; +// [1; 2]; +// } +fn array_expr(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T!['['])); + let m = p.start(); + + let mut n_exprs = 0u32; + let mut has_semi = false; + + p.bump(T!['[']); + while !p.at(EOF) && !p.at(T![']']) { + n_exprs += 1; + + // test array_attrs + // const A: &[i64] = &[1, #[cfg(test)] 2]; + if !expr(p) { + break; + } + + if n_exprs == 1 && p.eat(T![;]) { + has_semi = true; + continue; + } + + if has_semi || !p.at(T![']']) && !p.expect(T![,]) { + break; + } + } + p.expect(T![']']); + + m.complete(p, ARRAY_EXPR) +} + +// test lambda_expr +// fn foo() { +// || (); +// || -> i32 { 92 }; +// |x| x; +// move |x: i32,| x; +// async || {}; +// move || {}; +// async move || {}; +// static || {}; +// static move || {}; +// static async || {}; +// static async move || {}; +// for<'a> || {}; +// for<'a> move || {}; +// } +fn closure_expr(p: &mut Parser<'_>) -> CompletedMarker { + assert!(match p.current() { + T![static] | T![async] | T![move] | T![|] => true, + T![for] => p.nth(1) == T![<], + _ => false, + }); + + let m = p.start(); + + if p.at(T![for]) { + types::for_binder(p); + } + + p.eat(T![static]); + p.eat(T![async]); + p.eat(T![move]); + + if !p.at(T![|]) { + p.error("expected `|`"); + return m.complete(p, CLOSURE_EXPR); + } + params::param_list_closure(p); + if opt_ret_type(p) { + // test lambda_ret_block + // fn main() { || -> i32 { 92 }(); } + block_expr(p); + } else if p.at_ts(EXPR_FIRST) { + expr(p); + } else { + p.error("expected expression"); + } + m.complete(p, CLOSURE_EXPR) +} + +// test if_expr +// fn foo() { +// if true {}; +// if true {} else {}; +// if true {} else if false {} else {}; +// if S {}; +// if { true } { } else { }; +// } +fn if_expr(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T![if])); + let m = p.start(); + p.bump(T![if]); + expr_no_struct(p); + block_expr(p); + if p.at(T![else]) { + p.bump(T![else]); + if p.at(T![if]) { + if_expr(p); + } else { + block_expr(p); + } + } + m.complete(p, IF_EXPR) +} + +// test label +// fn foo() { +// 'a: loop {} +// 'b: while true {} +// 'c: for x in () {} +// } +fn label(p: &mut Parser<'_>) { + assert!(p.at(LIFETIME_IDENT) && p.nth(1) == T![:]); + let m = p.start(); + lifetime(p); + p.bump_any(); + m.complete(p, LABEL); +} + +// test loop_expr +// fn foo() { +// loop {}; +// } +fn loop_expr(p: &mut Parser<'_>, m: Option<Marker>) -> CompletedMarker { + assert!(p.at(T![loop])); + let m = m.unwrap_or_else(|| p.start()); + p.bump(T![loop]); + block_expr(p); + m.complete(p, LOOP_EXPR) +} + +// test while_expr +// fn foo() { +// while true {}; +// while let Some(x) = it.next() {}; +// while { true } {}; +// } +fn while_expr(p: &mut Parser<'_>, m: Option<Marker>) -> CompletedMarker { + assert!(p.at(T![while])); + let m = m.unwrap_or_else(|| p.start()); + p.bump(T![while]); + expr_no_struct(p); + block_expr(p); + m.complete(p, WHILE_EXPR) +} + +// test for_expr +// fn foo() { +// for x in [] {}; +// } +fn for_expr(p: &mut Parser<'_>, m: Option<Marker>) -> CompletedMarker { + assert!(p.at(T![for])); + let m = m.unwrap_or_else(|| p.start()); + p.bump(T![for]); + patterns::pattern(p); + p.expect(T![in]); + expr_no_struct(p); + block_expr(p); + m.complete(p, FOR_EXPR) +} + +// test let_expr +// fn foo() { +// if let Some(_) = None && true {} +// while 1 == 5 && (let None = None) {} +// } +fn let_expr(p: &mut Parser<'_>) -> CompletedMarker { + let m = p.start(); + p.bump(T![let]); + patterns::pattern_top(p); + p.expect(T![=]); + expr_let(p); + m.complete(p, LET_EXPR) +} + +// test match_expr +// fn foo() { +// match () { }; +// match S {}; +// match { } { _ => () }; +// match { S {} } {}; +// } +fn match_expr(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T![match])); + let m = p.start(); + p.bump(T![match]); + expr_no_struct(p); + if p.at(T!['{']) { + match_arm_list(p); + } else { + p.error("expected `{`"); + } + m.complete(p, MATCH_EXPR) +} + +pub(crate) fn match_arm_list(p: &mut Parser<'_>) { + assert!(p.at(T!['{'])); + let m = p.start(); + p.eat(T!['{']); + + // test match_arms_inner_attribute + // fn foo() { + // match () { + // #![doc("Inner attribute")] + // #![doc("Can be")] + // #![doc("Stacked")] + // _ => (), + // } + // } + attributes::inner_attrs(p); + + while !p.at(EOF) && !p.at(T!['}']) { + if p.at(T!['{']) { + error_block(p, "expected match arm"); + continue; + } + match_arm(p); + } + p.expect(T!['}']); + m.complete(p, MATCH_ARM_LIST); +} + +// test match_arm +// fn foo() { +// match () { +// _ => (), +// _ if Test > Test{field: 0} => (), +// X | Y if Z => (), +// | X | Y if Z => (), +// | X => (), +// }; +// } +fn match_arm(p: &mut Parser<'_>) { + let m = p.start(); + // test match_arms_outer_attributes + // fn foo() { + // match () { + // #[cfg(feature = "some")] + // _ => (), + // #[cfg(feature = "other")] + // _ => (), + // #[cfg(feature = "many")] + // #[cfg(feature = "attributes")] + // #[cfg(feature = "before")] + // _ => (), + // } + // } + attributes::outer_attrs(p); + + patterns::pattern_top_r(p, TokenSet::EMPTY); + if p.at(T![if]) { + match_guard(p); + } + p.expect(T![=>]); + let blocklike = match expr_stmt(p, None) { + Some((_, blocklike)) => blocklike, + None => BlockLike::NotBlock, + }; + + // test match_arms_commas + // fn foo() { + // match () { + // _ => (), + // _ => {} + // _ => () + // } + // } + if !p.eat(T![,]) && !blocklike.is_block() && !p.at(T!['}']) { + p.error("expected `,`"); + } + m.complete(p, MATCH_ARM); +} + +// test match_guard +// fn foo() { +// match () { +// _ if foo => (), +// _ if let foo = bar => (), +// } +// } +fn match_guard(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T![if])); + let m = p.start(); + p.bump(T![if]); + expr(p); + m.complete(p, MATCH_GUARD) +} + +// test block +// fn a() {} +// fn b() { let _ = 1; } +// fn c() { 1; 2; } +// fn d() { 1; 2 } +pub(crate) fn block_expr(p: &mut Parser<'_>) { + if !p.at(T!['{']) { + p.error("expected a block"); + return; + } + let m = p.start(); + stmt_list(p); + m.complete(p, BLOCK_EXPR); +} + +fn stmt_list(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T!['{'])); + let m = p.start(); + p.bump(T!['{']); + expr_block_contents(p); + p.expect(T!['}']); + m.complete(p, STMT_LIST) +} + +// test return_expr +// fn foo() { +// return; +// return 92; +// } +fn return_expr(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T![return])); + let m = p.start(); + p.bump(T![return]); + if p.at_ts(EXPR_FIRST) { + expr(p); + } + m.complete(p, RETURN_EXPR) +} +// test yield_expr +// fn foo() { +// yield; +// yield 1; +// } +fn yield_expr(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T![yield])); + let m = p.start(); + p.bump(T![yield]); + if p.at_ts(EXPR_FIRST) { + expr(p); + } + m.complete(p, YIELD_EXPR) +} + +// test continue_expr +// fn foo() { +// loop { +// continue; +// continue 'l; +// } +// } +fn continue_expr(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T![continue])); + let m = p.start(); + p.bump(T![continue]); + if p.at(LIFETIME_IDENT) { + lifetime(p); + } + m.complete(p, CONTINUE_EXPR) +} + +// test break_expr +// fn foo() { +// loop { +// break; +// break 'l; +// break 92; +// break 'l 92; +// } +// } +fn break_expr(p: &mut Parser<'_>, r: Restrictions) -> CompletedMarker { + assert!(p.at(T![break])); + let m = p.start(); + p.bump(T![break]); + if p.at(LIFETIME_IDENT) { + lifetime(p); + } + // test break_ambiguity + // fn foo(){ + // if break {} + // while break {} + // for i in break {} + // match break {} + // } + if p.at_ts(EXPR_FIRST) && !(r.forbid_structs && p.at(T!['{'])) { + expr(p); + } + m.complete(p, BREAK_EXPR) +} + +// test try_block_expr +// fn foo() { +// let _ = try {}; +// } +fn try_block_expr(p: &mut Parser<'_>, m: Option<Marker>) -> CompletedMarker { + assert!(p.at(T![try])); + let m = m.unwrap_or_else(|| p.start()); + // Special-case `try!` as macro. + // This is a hack until we do proper edition support + if p.nth_at(1, T![!]) { + // test try_macro_fallback + // fn foo() { try!(Ok(())); } + let macro_call = p.start(); + let path = p.start(); + let path_segment = p.start(); + let name_ref = p.start(); + p.bump_remap(IDENT); + name_ref.complete(p, NAME_REF); + path_segment.complete(p, PATH_SEGMENT); + path.complete(p, PATH); + let _block_like = items::macro_call_after_excl(p); + macro_call.complete(p, MACRO_CALL); + return m.complete(p, MACRO_EXPR); + } + + p.bump(T![try]); + if p.at(T!['{']) { + stmt_list(p); + } else { + p.error("expected a block"); + } + m.complete(p, BLOCK_EXPR) +} + +// test box_expr +// fn foo() { +// let x = box 1i32; +// let y = (box 1i32, box 2i32); +// let z = Foo(box 1i32, box 2i32); +// } +fn box_expr(p: &mut Parser<'_>, m: Option<Marker>) -> CompletedMarker { + assert!(p.at(T![box])); + let m = m.unwrap_or_else(|| p.start()); + p.bump(T![box]); + if p.at_ts(EXPR_FIRST) { + expr(p); + } + m.complete(p, BOX_EXPR) +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/generic_args.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/generic_args.rs new file mode 100644 index 000000000..c438943a0 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/generic_args.rs @@ -0,0 +1,131 @@ +use super::*; + +pub(super) fn opt_generic_arg_list(p: &mut Parser<'_>, colon_colon_required: bool) { + let m; + if p.at(T![::]) && p.nth(2) == T![<] { + m = p.start(); + p.bump(T![::]); + p.bump(T![<]); + } else if !colon_colon_required && p.at(T![<]) && p.nth(1) != T![=] { + m = p.start(); + p.bump(T![<]); + } else { + return; + } + + while !p.at(EOF) && !p.at(T![>]) { + generic_arg(p); + if !p.at(T![>]) && !p.expect(T![,]) { + break; + } + } + p.expect(T![>]); + m.complete(p, GENERIC_ARG_LIST); +} + +// test generic_arg +// type T = S<i32>; +fn generic_arg(p: &mut Parser<'_>) { + match p.current() { + LIFETIME_IDENT => lifetime_arg(p), + T!['{'] | T![true] | T![false] | T![-] => const_arg(p), + k if k.is_literal() => const_arg(p), + // test associated_type_bounds + // fn print_all<T: Iterator<Item, Item::Item, Item::<true>, Item: Display, Item<'a> = Item>>(printables: T) {} + + // test macro_inside_generic_arg + // type A = Foo<syn::Token![_]>; + IDENT if [T![<], T![=], T![:]].contains(&p.nth(1)) && !p.nth_at(1, T![::]) => { + let m = p.start(); + name_ref(p); + opt_generic_arg_list(p, false); + match p.current() { + T![=] => { + p.bump_any(); + if types::TYPE_FIRST.contains(p.current()) { + // test assoc_type_eq + // type T = StreamingIterator<Item<'a> = &'a T>; + types::type_(p); + } else { + // test assoc_const_eq + // fn foo<F: Foo<N=3>>() {} + // const TEST: usize = 3; + // fn bar<F: Foo<N={TEST}>>() {} + const_arg(p); + } + m.complete(p, ASSOC_TYPE_ARG); + } + // test assoc_type_bound + // type T = StreamingIterator<Item<'a>: Clone>; + T![:] if !p.at(T![::]) => { + generic_params::bounds(p); + m.complete(p, ASSOC_TYPE_ARG); + } + _ => { + let m = m.complete(p, PATH_SEGMENT).precede(p).complete(p, PATH); + let m = paths::type_path_for_qualifier(p, m); + m.precede(p).complete(p, PATH_TYPE).precede(p).complete(p, TYPE_ARG); + } + } + } + _ => type_arg(p), + } +} + +// test lifetime_arg +// type T = S<'static>; +fn lifetime_arg(p: &mut Parser<'_>) { + let m = p.start(); + lifetime(p); + m.complete(p, LIFETIME_ARG); +} + +pub(super) fn const_arg_expr(p: &mut Parser<'_>) { + // The tests in here are really for `const_arg`, which wraps the content + // CONST_ARG. + match p.current() { + // test const_arg_block + // type T = S<{90 + 2}>; + T!['{'] => { + expressions::block_expr(p); + } + // test const_arg_literal + // type T = S<"hello", 0xdeadbeef>; + k if k.is_literal() => { + expressions::literal(p); + } + // test const_arg_bool_literal + // type T = S<true>; + T![true] | T![false] => { + expressions::literal(p); + } + // test const_arg_negative_number + // type T = S<-92>; + T![-] => { + let lm = p.start(); + p.bump(T![-]); + expressions::literal(p); + lm.complete(p, PREFIX_EXPR); + } + _ => { + // This shouldn't be hit by `const_arg` + let lm = p.start(); + paths::use_path(p); + lm.complete(p, PATH_EXPR); + } + } +} + +// test const_arg +// type T = S<92>; +pub(super) fn const_arg(p: &mut Parser<'_>) { + let m = p.start(); + const_arg_expr(p); + m.complete(p, CONST_ARG); +} + +fn type_arg(p: &mut Parser<'_>) { + let m = p.start(); + types::type_(p); + m.complete(p, TYPE_ARG); +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/generic_params.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/generic_params.rs new file mode 100644 index 000000000..6db28ef13 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/generic_params.rs @@ -0,0 +1,242 @@ +use super::*; + +pub(super) fn opt_generic_param_list(p: &mut Parser<'_>) { + if p.at(T![<]) { + generic_param_list(p); + } +} + +// test generic_param_list +// fn f<T: Clone>() {} +fn generic_param_list(p: &mut Parser<'_>) { + assert!(p.at(T![<])); + let m = p.start(); + p.bump(T![<]); + + while !p.at(EOF) && !p.at(T![>]) { + generic_param(p); + if !p.at(T![>]) && !p.expect(T![,]) { + break; + } + } + p.expect(T![>]); + m.complete(p, GENERIC_PARAM_LIST); +} + +fn generic_param(p: &mut Parser<'_>) { + let m = p.start(); + // test generic_param_attribute + // fn foo<#[lt_attr] 'a, #[t_attr] T>() {} + attributes::outer_attrs(p); + match p.current() { + LIFETIME_IDENT => lifetime_param(p, m), + IDENT => type_param(p, m), + T![const] => const_param(p, m), + _ => { + m.abandon(p); + p.err_and_bump("expected type parameter"); + } + } +} + +// test lifetime_param +// fn f<'a: 'b>() {} +fn lifetime_param(p: &mut Parser<'_>, m: Marker) { + assert!(p.at(LIFETIME_IDENT)); + lifetime(p); + if p.at(T![:]) { + lifetime_bounds(p); + } + m.complete(p, LIFETIME_PARAM); +} + +// test type_param +// fn f<T: Clone>() {} +fn type_param(p: &mut Parser<'_>, m: Marker) { + assert!(p.at(IDENT)); + name(p); + if p.at(T![:]) { + bounds(p); + } + if p.at(T![=]) { + // test type_param_default + // struct S<T = i32>; + p.bump(T![=]); + types::type_(p); + } + m.complete(p, TYPE_PARAM); +} + +// test const_param +// struct S<const N: u32>; +fn const_param(p: &mut Parser<'_>, m: Marker) { + p.bump(T![const]); + name(p); + if p.at(T![:]) { + types::ascription(p); + } else { + p.error("missing type for const parameter"); + } + + if p.at(T![=]) { + // test const_param_default_literal + // struct A<const N: i32 = -1>; + p.bump(T![=]); + + // test const_param_default_expression + // struct A<const N: i32 = { 1 }>; + + // test const_param_default_path + // struct A<const N: i32 = i32::MAX>; + generic_args::const_arg_expr(p); + } + + m.complete(p, CONST_PARAM); +} + +fn lifetime_bounds(p: &mut Parser<'_>) { + assert!(p.at(T![:])); + p.bump(T![:]); + while p.at(LIFETIME_IDENT) { + lifetime(p); + if !p.eat(T![+]) { + break; + } + } +} + +// test type_param_bounds +// struct S<T: 'a + ?Sized + (Copy) + ~const Drop>; +pub(super) fn bounds(p: &mut Parser<'_>) { + assert!(p.at(T![:])); + p.bump(T![:]); + bounds_without_colon(p); +} + +pub(super) fn bounds_without_colon(p: &mut Parser<'_>) { + let m = p.start(); + bounds_without_colon_m(p, m); +} + +pub(super) fn bounds_without_colon_m(p: &mut Parser<'_>, marker: Marker) -> CompletedMarker { + while type_bound(p) { + if !p.eat(T![+]) { + break; + } + } + marker.complete(p, TYPE_BOUND_LIST) +} + +fn type_bound(p: &mut Parser<'_>) -> bool { + let m = p.start(); + let has_paren = p.eat(T!['(']); + match p.current() { + LIFETIME_IDENT => lifetime(p), + T![for] => types::for_type(p, false), + T![?] if p.nth_at(1, T![for]) => { + // test question_for_type_trait_bound + // fn f<T>() where T: ?for<> Sized {} + p.bump_any(); + types::for_type(p, false) + } + current => { + match current { + T![?] => p.bump_any(), + T![~] => { + p.bump_any(); + p.expect(T![const]); + } + _ => (), + } + if paths::is_use_path_start(p) { + types::path_type_(p, false); + } else { + m.abandon(p); + return false; + } + } + } + if has_paren { + p.expect(T![')']); + } + m.complete(p, TYPE_BOUND); + + true +} + +// test where_clause +// fn foo() +// where +// 'a: 'b + 'c, +// T: Clone + Copy + 'static, +// Iterator::Item: 'a, +// <T as Iterator>::Item: 'a +// {} +pub(super) fn opt_where_clause(p: &mut Parser<'_>) { + if !p.at(T![where]) { + return; + } + let m = p.start(); + p.bump(T![where]); + + while is_where_predicate(p) { + where_predicate(p); + + let comma = p.eat(T![,]); + + match p.current() { + T!['{'] | T![;] | T![=] => break, + _ => (), + } + + if !comma { + p.error("expected comma"); + } + } + + m.complete(p, WHERE_CLAUSE); + + fn is_where_predicate(p: &mut Parser<'_>) -> bool { + match p.current() { + LIFETIME_IDENT => true, + T![impl] => false, + token => types::TYPE_FIRST.contains(token), + } + } +} + +fn where_predicate(p: &mut Parser<'_>) { + let m = p.start(); + match p.current() { + LIFETIME_IDENT => { + lifetime(p); + if p.at(T![:]) { + bounds(p); + } else { + p.error("expected colon"); + } + } + T![impl] => { + p.error("expected lifetime or type"); + } + _ => { + if p.at(T![for]) { + // test where_pred_for + // fn for_trait<F>() + // where + // for<'a> F: Fn(&'a str) + // { } + types::for_binder(p); + } + + types::type_(p); + + if p.at(T![:]) { + bounds(p); + } else { + p.error("expected colon"); + } + } + } + m.complete(p, WHERE_PRED); +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/items.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/items.rs new file mode 100644 index 000000000..5e0951bf8 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/items.rs @@ -0,0 +1,465 @@ +mod consts; +mod adt; +mod traits; +mod use_item; + +pub(crate) use self::{ + adt::{record_field_list, variant_list}, + expressions::{match_arm_list, record_expr_field_list}, + traits::assoc_item_list, + use_item::use_tree_list, +}; +use super::*; + +// test mod_contents +// fn foo() {} +// macro_rules! foo {} +// foo::bar!(); +// super::baz! {} +// struct S; +pub(super) fn mod_contents(p: &mut Parser<'_>, stop_on_r_curly: bool) { + attributes::inner_attrs(p); + while !p.at(EOF) && !(p.at(T!['}']) && stop_on_r_curly) { + item_or_macro(p, stop_on_r_curly); + } +} + +pub(super) const ITEM_RECOVERY_SET: TokenSet = TokenSet::new(&[ + T![fn], + T![struct], + T![enum], + T![impl], + T![trait], + T![const], + T![static], + T![let], + T![mod], + T![pub], + T![crate], + T![use], + T![macro], + T![;], +]); + +pub(super) fn item_or_macro(p: &mut Parser<'_>, stop_on_r_curly: bool) { + let m = p.start(); + attributes::outer_attrs(p); + + let m = match opt_item(p, m) { + Ok(()) => { + if p.at(T![;]) { + p.err_and_bump( + "expected item, found `;`\n\ + consider removing this semicolon", + ); + } + return; + } + Err(m) => m, + }; + + if paths::is_use_path_start(p) { + match macro_call(p) { + BlockLike::Block => (), + BlockLike::NotBlock => { + p.expect(T![;]); + } + } + m.complete(p, MACRO_CALL); + return; + } + + m.abandon(p); + match p.current() { + T!['{'] => error_block(p, "expected an item"), + T!['}'] if !stop_on_r_curly => { + let e = p.start(); + p.error("unmatched `}`"); + p.bump(T!['}']); + e.complete(p, ERROR); + } + EOF | T!['}'] => p.error("expected an item"), + _ => p.err_and_bump("expected an item"), + } +} + +/// Try to parse an item, completing `m` in case of success. +pub(super) fn opt_item(p: &mut Parser<'_>, m: Marker) -> Result<(), Marker> { + // test_err pub_expr + // fn foo() { pub 92; } + let has_visibility = opt_visibility(p, false); + + let m = match opt_item_without_modifiers(p, m) { + Ok(()) => return Ok(()), + Err(m) => m, + }; + + let mut has_mods = false; + let mut has_extern = false; + + // modifiers + if p.at(T![const]) && p.nth(1) != T!['{'] { + p.eat(T![const]); + has_mods = true; + } + + // test_err async_without_semicolon + // fn foo() { let _ = async {} } + if p.at(T![async]) && !matches!(p.nth(1), T!['{'] | T![move] | T![|]) { + p.eat(T![async]); + has_mods = true; + } + + // test_err unsafe_block_in_mod + // fn foo(){} unsafe { } fn bar(){} + if p.at(T![unsafe]) && p.nth(1) != T!['{'] { + p.eat(T![unsafe]); + has_mods = true; + } + + if p.at(T![extern]) { + has_extern = true; + has_mods = true; + abi(p); + } + if p.at_contextual_kw(T![auto]) && p.nth(1) == T![trait] { + p.bump_remap(T![auto]); + has_mods = true; + } + + // test default_item + // default impl T for Foo {} + if p.at_contextual_kw(T![default]) { + match p.nth(1) { + T![fn] | T![type] | T![const] | T![impl] => { + p.bump_remap(T![default]); + has_mods = true; + } + // test default_unsafe_item + // default unsafe impl T for Foo { + // default unsafe fn foo() {} + // } + T![unsafe] if matches!(p.nth(2), T![impl] | T![fn]) => { + p.bump_remap(T![default]); + p.bump(T![unsafe]); + has_mods = true; + } + // test default_async_fn + // impl T for Foo { + // default async fn foo() {} + // } + T![async] => { + let mut maybe_fn = p.nth(2); + let is_unsafe = if matches!(maybe_fn, T![unsafe]) { + // test default_async_unsafe_fn + // impl T for Foo { + // default async unsafe fn foo() {} + // } + maybe_fn = p.nth(3); + true + } else { + false + }; + + if matches!(maybe_fn, T![fn]) { + p.bump_remap(T![default]); + p.bump(T![async]); + if is_unsafe { + p.bump(T![unsafe]); + } + has_mods = true; + } + } + _ => (), + } + } + + // test existential_type + // existential type Foo: Fn() -> usize; + if p.at_contextual_kw(T![existential]) && p.nth(1) == T![type] { + p.bump_remap(T![existential]); + has_mods = true; + } + + // items + match p.current() { + T![fn] => fn_(p, m), + + T![const] if p.nth(1) != T!['{'] => consts::konst(p, m), + + T![trait] => traits::trait_(p, m), + T![impl] => traits::impl_(p, m), + + T![type] => type_alias(p, m), + + // test extern_block + // unsafe extern "C" {} + // extern {} + T!['{'] if has_extern => { + extern_item_list(p); + m.complete(p, EXTERN_BLOCK); + } + + _ if has_visibility || has_mods => { + if has_mods { + p.error("expected existential, fn, trait or impl"); + } else { + p.error("expected an item"); + } + m.complete(p, ERROR); + } + + _ => return Err(m), + } + Ok(()) +} + +fn opt_item_without_modifiers(p: &mut Parser<'_>, m: Marker) -> Result<(), Marker> { + let la = p.nth(1); + match p.current() { + T![extern] if la == T![crate] => extern_crate(p, m), + T![use] => use_item::use_(p, m), + T![mod] => mod_item(p, m), + + T![type] => type_alias(p, m), + T![struct] => adt::strukt(p, m), + T![enum] => adt::enum_(p, m), + IDENT if p.at_contextual_kw(T![union]) && p.nth(1) == IDENT => adt::union(p, m), + + T![macro] => macro_def(p, m), + IDENT if p.at_contextual_kw(T![macro_rules]) && p.nth(1) == BANG => macro_rules(p, m), + + T![const] if (la == IDENT || la == T![_] || la == T![mut]) => consts::konst(p, m), + T![static] if (la == IDENT || la == T![_] || la == T![mut]) => consts::static_(p, m), + + _ => return Err(m), + }; + Ok(()) +} + +// test extern_crate +// extern crate foo; +fn extern_crate(p: &mut Parser<'_>, m: Marker) { + p.bump(T![extern]); + p.bump(T![crate]); + + if p.at(T![self]) { + // test extern_crate_self + // extern crate self; + let m = p.start(); + p.bump(T![self]); + m.complete(p, NAME_REF); + } else { + name_ref(p); + } + + // test extern_crate_rename + // extern crate foo as bar; + opt_rename(p); + p.expect(T![;]); + m.complete(p, EXTERN_CRATE); +} + +// test mod_item +// mod a; +pub(crate) fn mod_item(p: &mut Parser<'_>, m: Marker) { + p.bump(T![mod]); + name(p); + if p.at(T!['{']) { + // test mod_item_curly + // mod b { } + item_list(p); + } else if !p.eat(T![;]) { + p.error("expected `;` or `{`"); + } + m.complete(p, MODULE); +} + +// test type_alias +// type Foo = Bar; +fn type_alias(p: &mut Parser<'_>, m: Marker) { + p.bump(T![type]); + + name(p); + + // test type_item_type_params + // type Result<T> = (); + generic_params::opt_generic_param_list(p); + + if p.at(T![:]) { + generic_params::bounds(p); + } + + // test type_item_where_clause_deprecated + // type Foo where Foo: Copy = (); + generic_params::opt_where_clause(p); + if p.eat(T![=]) { + types::type_(p); + } + + // test type_item_where_clause + // type Foo = () where Foo: Copy; + generic_params::opt_where_clause(p); + + p.expect(T![;]); + m.complete(p, TYPE_ALIAS); +} + +pub(crate) fn item_list(p: &mut Parser<'_>) { + assert!(p.at(T!['{'])); + let m = p.start(); + p.bump(T!['{']); + mod_contents(p, true); + p.expect(T!['}']); + m.complete(p, ITEM_LIST); +} + +pub(crate) fn extern_item_list(p: &mut Parser<'_>) { + assert!(p.at(T!['{'])); + let m = p.start(); + p.bump(T!['{']); + mod_contents(p, true); + p.expect(T!['}']); + m.complete(p, EXTERN_ITEM_LIST); +} + +fn macro_rules(p: &mut Parser<'_>, m: Marker) { + assert!(p.at_contextual_kw(T![macro_rules])); + p.bump_remap(T![macro_rules]); + p.expect(T![!]); + + if p.at(IDENT) { + name(p); + } + // Special-case `macro_rules! try`. + // This is a hack until we do proper edition support + + // test try_macro_rules + // macro_rules! try { () => {} } + if p.at(T![try]) { + let m = p.start(); + p.bump_remap(IDENT); + m.complete(p, NAME); + } + + match p.current() { + // test macro_rules_non_brace + // macro_rules! m ( ($i:ident) => {} ); + // macro_rules! m [ ($i:ident) => {} ]; + T!['['] | T!['('] => { + token_tree(p); + p.expect(T![;]); + } + T!['{'] => token_tree(p), + _ => p.error("expected `{`, `[`, `(`"), + } + m.complete(p, MACRO_RULES); +} + +// test macro_def +// macro m($i:ident) {} +fn macro_def(p: &mut Parser<'_>, m: Marker) { + p.expect(T![macro]); + name_r(p, ITEM_RECOVERY_SET); + if p.at(T!['{']) { + // test macro_def_curly + // macro m { ($i:ident) => {} } + token_tree(p); + } else if p.at(T!['(']) { + let m = p.start(); + token_tree(p); + match p.current() { + T!['{'] | T!['['] | T!['('] => token_tree(p), + _ => p.error("expected `{`, `[`, `(`"), + } + m.complete(p, TOKEN_TREE); + } else { + p.error("unmatched `(`"); + } + + m.complete(p, MACRO_DEF); +} + +// test fn +// fn foo() {} +fn fn_(p: &mut Parser<'_>, m: Marker) { + p.bump(T![fn]); + + name_r(p, ITEM_RECOVERY_SET); + // test function_type_params + // fn foo<T: Clone + Copy>(){} + generic_params::opt_generic_param_list(p); + + if p.at(T!['(']) { + params::param_list_fn_def(p); + } else { + p.error("expected function arguments"); + } + // test function_ret_type + // fn foo() {} + // fn bar() -> () {} + opt_ret_type(p); + + // test function_where_clause + // fn foo<T>() where T: Copy {} + generic_params::opt_where_clause(p); + + if p.at(T![;]) { + // test fn_decl + // trait T { fn foo(); } + p.bump(T![;]); + } else { + expressions::block_expr(p); + } + m.complete(p, FN); +} + +fn macro_call(p: &mut Parser<'_>) -> BlockLike { + assert!(paths::is_use_path_start(p)); + paths::use_path(p); + macro_call_after_excl(p) +} + +pub(super) fn macro_call_after_excl(p: &mut Parser<'_>) -> BlockLike { + p.expect(T![!]); + + match p.current() { + T!['{'] => { + token_tree(p); + BlockLike::Block + } + T!['('] | T!['['] => { + token_tree(p); + BlockLike::NotBlock + } + _ => { + p.error("expected `{`, `[`, `(`"); + BlockLike::NotBlock + } + } +} + +pub(crate) fn token_tree(p: &mut Parser<'_>) { + let closing_paren_kind = match p.current() { + T!['{'] => T!['}'], + T!['('] => T![')'], + T!['['] => T![']'], + _ => unreachable!(), + }; + let m = p.start(); + p.bump_any(); + while !p.at(EOF) && !p.at(closing_paren_kind) { + match p.current() { + T!['{'] | T!['('] | T!['['] => token_tree(p), + T!['}'] => { + p.error("unmatched `}`"); + m.complete(p, TOKEN_TREE); + return; + } + T![')'] | T![']'] => p.err_and_bump("unmatched brace"), + _ => p.bump_any(), + } + } + p.expect(closing_paren_kind); + m.complete(p, TOKEN_TREE); +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/items/adt.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/items/adt.rs new file mode 100644 index 000000000..e7d30516b --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/items/adt.rs @@ -0,0 +1,168 @@ +use super::*; + +// test struct_item +// struct S {} +pub(super) fn strukt(p: &mut Parser<'_>, m: Marker) { + p.bump(T![struct]); + struct_or_union(p, m, true); +} + +// test union_item +// struct U { i: i32, f: f32 } +pub(super) fn union(p: &mut Parser<'_>, m: Marker) { + assert!(p.at_contextual_kw(T![union])); + p.bump_remap(T![union]); + struct_or_union(p, m, false); +} + +fn struct_or_union(p: &mut Parser<'_>, m: Marker, is_struct: bool) { + name_r(p, ITEM_RECOVERY_SET); + generic_params::opt_generic_param_list(p); + match p.current() { + T![where] => { + generic_params::opt_where_clause(p); + match p.current() { + T![;] => p.bump(T![;]), + T!['{'] => record_field_list(p), + _ => { + //FIXME: special case `(` error message + p.error("expected `;` or `{`"); + } + } + } + T!['{'] => record_field_list(p), + // test unit_struct + // struct S; + T![;] if is_struct => { + p.bump(T![;]); + } + // test tuple_struct + // struct S(String, usize); + T!['('] if is_struct => { + tuple_field_list(p); + // test tuple_struct_where + // struct S<T>(T) where T: Clone; + generic_params::opt_where_clause(p); + p.expect(T![;]); + } + _ => p.error(if is_struct { "expected `;`, `{`, or `(`" } else { "expected `{`" }), + } + m.complete(p, if is_struct { STRUCT } else { UNION }); +} + +pub(super) fn enum_(p: &mut Parser<'_>, m: Marker) { + p.bump(T![enum]); + name_r(p, ITEM_RECOVERY_SET); + generic_params::opt_generic_param_list(p); + generic_params::opt_where_clause(p); + if p.at(T!['{']) { + variant_list(p); + } else { + p.error("expected `{`"); + } + m.complete(p, ENUM); +} + +pub(crate) fn variant_list(p: &mut Parser<'_>) { + assert!(p.at(T!['{'])); + let m = p.start(); + p.bump(T!['{']); + while !p.at(EOF) && !p.at(T!['}']) { + if p.at(T!['{']) { + error_block(p, "expected enum variant"); + continue; + } + variant(p); + if !p.at(T!['}']) { + p.expect(T![,]); + } + } + p.expect(T!['}']); + m.complete(p, VARIANT_LIST); + + fn variant(p: &mut Parser<'_>) { + let m = p.start(); + attributes::outer_attrs(p); + if p.at(IDENT) { + name(p); + match p.current() { + T!['{'] => record_field_list(p), + T!['('] => tuple_field_list(p), + _ => (), + } + + // test variant_discriminant + // enum E { X(i32) = 10 } + if p.eat(T![=]) { + expressions::expr(p); + } + m.complete(p, VARIANT); + } else { + m.abandon(p); + p.err_and_bump("expected enum variant"); + } + } +} + +// test record_field_list +// struct S { a: i32, b: f32 } +pub(crate) fn record_field_list(p: &mut Parser<'_>) { + assert!(p.at(T!['{'])); + let m = p.start(); + p.bump(T!['{']); + while !p.at(T!['}']) && !p.at(EOF) { + if p.at(T!['{']) { + error_block(p, "expected field"); + continue; + } + record_field(p); + if !p.at(T!['}']) { + p.expect(T![,]); + } + } + p.expect(T!['}']); + m.complete(p, RECORD_FIELD_LIST); + + fn record_field(p: &mut Parser<'_>) { + let m = p.start(); + // test record_field_attrs + // struct S { #[attr] f: f32 } + attributes::outer_attrs(p); + opt_visibility(p, false); + if p.at(IDENT) { + name(p); + p.expect(T![:]); + types::type_(p); + m.complete(p, RECORD_FIELD); + } else { + m.abandon(p); + p.err_and_bump("expected field declaration"); + } + } +} + +fn tuple_field_list(p: &mut Parser<'_>) { + assert!(p.at(T!['('])); + let m = p.start(); + p.bump(T!['(']); + while !p.at(T![')']) && !p.at(EOF) { + let m = p.start(); + // test tuple_field_attrs + // struct S (#[attr] f32); + attributes::outer_attrs(p); + opt_visibility(p, true); + if !p.at_ts(types::TYPE_FIRST) { + p.error("expected a type"); + m.complete(p, ERROR); + break; + } + types::type_(p); + m.complete(p, TUPLE_FIELD); + + if !p.at(T![')']) { + p.expect(T![,]); + } + } + p.expect(T![')']); + m.complete(p, TUPLE_FIELD_LIST); +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/items/consts.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/items/consts.rs new file mode 100644 index 000000000..9549ec9b4 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/items/consts.rs @@ -0,0 +1,37 @@ +use super::*; + +// test const_item +// const C: u32 = 92; +pub(super) fn konst(p: &mut Parser<'_>, m: Marker) { + p.bump(T![const]); + const_or_static(p, m, true); +} + +pub(super) fn static_(p: &mut Parser<'_>, m: Marker) { + p.bump(T![static]); + const_or_static(p, m, false); +} + +fn const_or_static(p: &mut Parser<'_>, m: Marker, is_const: bool) { + p.eat(T![mut]); + + if is_const && p.eat(T![_]) { + // test anonymous_const + // const _: u32 = 0; + } else { + // test_err anonymous_static + // static _: i32 = 5; + name(p); + } + + if p.at(T![:]) { + types::ascription(p); + } else { + p.error("missing type for `const` or `static`"); + } + if p.eat(T![=]) { + expressions::expr(p); + } + p.expect(T![;]); + m.complete(p, if is_const { CONST } else { STATIC }); +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/items/traits.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/items/traits.rs new file mode 100644 index 000000000..c982e2d56 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/items/traits.rs @@ -0,0 +1,140 @@ +use super::*; + +// test trait_item +// trait T { fn new() -> Self; } +pub(super) fn trait_(p: &mut Parser<'_>, m: Marker) { + p.bump(T![trait]); + name_r(p, ITEM_RECOVERY_SET); + + // test trait_item_generic_params + // trait X<U: Debug + Display> {} + generic_params::opt_generic_param_list(p); + + if p.eat(T![=]) { + // test trait_alias + // trait Z<U> = T<U>; + generic_params::bounds_without_colon(p); + + // test trait_alias_where_clause + // trait Z<U> = T<U> where U: Copy; + // trait Z<U> = where Self: T<U>; + generic_params::opt_where_clause(p); + p.expect(T![;]); + m.complete(p, TRAIT); + return; + } + + if p.at(T![:]) { + // test trait_item_bounds + // trait T: Hash + Clone {} + generic_params::bounds(p); + } + + // test trait_item_where_clause + // trait T where Self: Copy {} + generic_params::opt_where_clause(p); + + if p.at(T!['{']) { + assoc_item_list(p); + } else { + p.error("expected `{`"); + } + m.complete(p, TRAIT); +} + +// test impl_item +// impl S {} +pub(super) fn impl_(p: &mut Parser<'_>, m: Marker) { + p.bump(T![impl]); + if p.at(T![<]) && not_a_qualified_path(p) { + generic_params::opt_generic_param_list(p); + } + + // test impl_item_const + // impl const Send for S {} + p.eat(T![const]); + + // FIXME: never type + // impl ! {} + + // test impl_item_neg + // impl !Send for S {} + p.eat(T![!]); + impl_type(p); + if p.eat(T![for]) { + impl_type(p); + } + generic_params::opt_where_clause(p); + if p.at(T!['{']) { + assoc_item_list(p); + } else { + p.error("expected `{`"); + } + m.complete(p, IMPL); +} + +// test assoc_item_list +// impl F { +// type A = i32; +// const B: i32 = 92; +// fn foo() {} +// fn bar(&self) {} +// } +pub(crate) fn assoc_item_list(p: &mut Parser<'_>) { + assert!(p.at(T!['{'])); + + let m = p.start(); + p.bump(T!['{']); + // test assoc_item_list_inner_attrs + // impl S { #![attr] } + attributes::inner_attrs(p); + + while !p.at(EOF) && !p.at(T!['}']) { + if p.at(T!['{']) { + error_block(p, "expected an item"); + continue; + } + item_or_macro(p, true); + } + p.expect(T!['}']); + m.complete(p, ASSOC_ITEM_LIST); +} + +// test impl_type_params +// impl<const N: u32> Bar<N> {} +fn not_a_qualified_path(p: &Parser<'_>) -> bool { + // There's an ambiguity between generic parameters and qualified paths in impls. + // If we see `<` it may start both, so we have to inspect some following tokens. + // The following combinations can only start generics, + // but not qualified paths (with one exception): + // `<` `>` - empty generic parameters + // `<` `#` - generic parameters with attributes + // `<` `const` - const generic parameters + // `<` (LIFETIME_IDENT|IDENT) `>` - single generic parameter + // `<` (LIFETIME_IDENT|IDENT) `,` - first generic parameter in a list + // `<` (LIFETIME_IDENT|IDENT) `:` - generic parameter with bounds + // `<` (LIFETIME_IDENT|IDENT) `=` - generic parameter with a default + // The only truly ambiguous case is + // `<` IDENT `>` `::` IDENT ... + // we disambiguate it in favor of generics (`impl<T> ::absolute::Path<T> { ... }`) + // because this is what almost always expected in practice, qualified paths in impls + // (`impl <Type>::AssocTy { ... }`) aren't even allowed by type checker at the moment. + if p.nth(1) == T![#] || p.nth(1) == T![>] || p.nth(1) == T![const] { + return true; + } + (p.nth(1) == LIFETIME_IDENT || p.nth(1) == IDENT) + && (p.nth(2) == T![>] || p.nth(2) == T![,] || p.nth(2) == T![:] || p.nth(2) == T![=]) +} + +// test_err impl_type +// impl Type {} +// impl Trait1 for T {} +// impl impl NotType {} +// impl Trait2 for impl NotType {} +pub(crate) fn impl_type(p: &mut Parser<'_>) { + if p.at(T![impl]) { + p.error("expected trait or type"); + return; + } + types::type_(p); +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/items/use_item.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/items/use_item.rs new file mode 100644 index 000000000..69880b794 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/items/use_item.rs @@ -0,0 +1,93 @@ +use super::*; + +// test use_item +// use std::collections; +pub(super) fn use_(p: &mut Parser<'_>, m: Marker) { + p.bump(T![use]); + use_tree(p, true); + p.expect(T![;]); + m.complete(p, USE); +} + +// test use_tree +// use outer::tree::{inner::tree}; +fn use_tree(p: &mut Parser<'_>, top_level: bool) { + let m = p.start(); + match p.current() { + // test use_tree_star + // use *; + // use std::{*}; + T![*] => p.bump(T![*]), + // test use_tree_abs_star + // use ::*; + // use std::{::*}; + T![:] if p.at(T![::]) && p.nth(2) == T![*] => { + p.bump(T![::]); + p.bump(T![*]); + } + T!['{'] => use_tree_list(p), + T![:] if p.at(T![::]) && p.nth(2) == T!['{'] => { + p.bump(T![::]); + use_tree_list(p); + } + + // test use_tree_path + // use ::std; + // use std::collections; + // + // use self::m; + // use super::m; + // use crate::m; + _ if paths::is_use_path_start(p) => { + paths::use_path(p); + match p.current() { + // test use_tree_alias + // use std as stdlib; + // use Trait as _; + T![as] => opt_rename(p), + T![:] if p.at(T![::]) => { + p.bump(T![::]); + match p.current() { + // test use_tree_path_star + // use std::*; + T![*] => p.bump(T![*]), + // test use_tree_path_use_tree + // use std::{collections}; + T!['{'] => use_tree_list(p), + _ => p.error("expected `{` or `*`"), + } + } + _ => (), + } + } + _ => { + m.abandon(p); + let msg = "expected one of `*`, `::`, `{`, `self`, `super` or an identifier"; + if top_level { + p.err_recover(msg, ITEM_RECOVERY_SET); + } else { + // if we are parsing a nested tree, we have to eat a token to + // main balanced `{}` + p.err_and_bump(msg); + } + return; + } + } + m.complete(p, USE_TREE); +} + +// test use_tree_list +// use {a, b, c}; +pub(crate) fn use_tree_list(p: &mut Parser<'_>) { + assert!(p.at(T!['{'])); + let m = p.start(); + p.bump(T!['{']); + while !p.at(EOF) && !p.at(T!['}']) { + use_tree(p, false); + if !p.at(T!['}']) { + p.expect(T![,]); + } + } + p.expect(T!['}']); + m.complete(p, USE_TREE_LIST); +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/params.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/params.rs new file mode 100644 index 000000000..20e8e95f0 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/params.rs @@ -0,0 +1,209 @@ +use super::*; + +// test param_list +// fn a() {} +// fn b(x: i32) {} +// fn c(x: i32, ) {} +// fn d(x: i32, y: ()) {} +pub(super) fn param_list_fn_def(p: &mut Parser<'_>) { + list_(p, Flavor::FnDef); +} + +// test param_list_opt_patterns +// fn foo<F: FnMut(&mut Foo<'a>)>(){} +pub(super) fn param_list_fn_trait(p: &mut Parser<'_>) { + list_(p, Flavor::FnTrait); +} + +pub(super) fn param_list_fn_ptr(p: &mut Parser<'_>) { + list_(p, Flavor::FnPointer); +} + +pub(super) fn param_list_closure(p: &mut Parser<'_>) { + list_(p, Flavor::Closure); +} + +#[derive(Debug, Clone, Copy)] +enum Flavor { + FnDef, // Includes trait fn params; omitted param idents are not supported + FnTrait, // Params for `Fn(...)`/`FnMut(...)`/`FnOnce(...)` annotations + FnPointer, + Closure, +} + +fn list_(p: &mut Parser<'_>, flavor: Flavor) { + use Flavor::*; + + let (bra, ket) = match flavor { + Closure => (T![|], T![|]), + FnDef | FnTrait | FnPointer => (T!['('], T![')']), + }; + + let list_marker = p.start(); + p.bump(bra); + + let mut param_marker = None; + if let FnDef = flavor { + // test self_param_outer_attr + // fn f(#[must_use] self) {} + let m = p.start(); + attributes::outer_attrs(p); + match opt_self_param(p, m) { + Ok(()) => {} + Err(m) => param_marker = Some(m), + } + } + + while !p.at(EOF) && !p.at(ket) { + // test param_outer_arg + // fn f(#[attr1] pat: Type) {} + let m = match param_marker.take() { + Some(m) => m, + None => { + let m = p.start(); + attributes::outer_attrs(p); + m + } + }; + + if !p.at_ts(PARAM_FIRST) { + p.error("expected value parameter"); + m.abandon(p); + break; + } + param(p, m, flavor); + if !p.at(ket) { + p.expect(T![,]); + } + } + + if let Some(m) = param_marker { + m.abandon(p); + } + + p.expect(ket); + list_marker.complete(p, PARAM_LIST); +} + +const PARAM_FIRST: TokenSet = patterns::PATTERN_FIRST.union(types::TYPE_FIRST); + +fn param(p: &mut Parser<'_>, m: Marker, flavor: Flavor) { + match flavor { + // test param_list_vararg + // extern "C" { fn printf(format: *const i8, ..., _: u8) -> i32; } + Flavor::FnDef | Flavor::FnPointer if p.eat(T![...]) => {} + + // test fn_def_param + // fn foo(..., (x, y): (i32, i32)) {} + Flavor::FnDef => { + patterns::pattern(p); + if !variadic_param(p) { + if p.at(T![:]) { + types::ascription(p); + } else { + // test_err missing_fn_param_type + // fn f(x y: i32, z, t: i32) {} + p.error("missing type for function parameter"); + } + } + } + // test value_parameters_no_patterns + // type F = Box<Fn(i32, &i32, &i32, ())>; + Flavor::FnTrait => { + types::type_(p); + } + // test fn_pointer_param_ident_path + // type Foo = fn(Bar::Baz); + // type Qux = fn(baz: Bar::Baz); + + // test fn_pointer_unnamed_arg + // type Foo = fn(_: bar); + Flavor::FnPointer => { + if (p.at(IDENT) || p.at(UNDERSCORE)) && p.nth(1) == T![:] && !p.nth_at(1, T![::]) { + patterns::pattern_single(p); + if !variadic_param(p) { + if p.at(T![:]) { + types::ascription(p); + } else { + p.error("missing type for function parameter"); + } + } + } else { + types::type_(p); + } + } + // test closure_params + // fn main() { + // let foo = |bar, baz: Baz, qux: Qux::Quux| (); + // } + Flavor::Closure => { + patterns::pattern_single(p); + if p.at(T![:]) && !p.at(T![::]) { + types::ascription(p); + } + } + } + m.complete(p, PARAM); +} + +fn variadic_param(p: &mut Parser<'_>) -> bool { + if p.at(T![:]) && p.nth_at(1, T![...]) { + p.bump(T![:]); + p.bump(T![...]); + true + } else { + false + } +} + +// test self_param +// impl S { +// fn a(self) {} +// fn b(&self,) {} +// fn c(&'a self,) {} +// fn d(&'a mut self, x: i32) {} +// fn e(mut self) {} +// } +fn opt_self_param(p: &mut Parser<'_>, m: Marker) -> Result<(), Marker> { + if p.at(T![self]) || p.at(T![mut]) && p.nth(1) == T![self] { + p.eat(T![mut]); + self_as_name(p); + // test arb_self_types + // impl S { + // fn a(self: &Self) {} + // fn b(mut self: Box<Self>) {} + // } + if p.at(T![:]) { + types::ascription(p); + } + } else { + let la1 = p.nth(1); + let la2 = p.nth(2); + let la3 = p.nth(3); + if !matches!( + (p.current(), la1, la2, la3), + (T![&], T![self], _, _) + | (T![&], T![mut] | LIFETIME_IDENT, T![self], _) + | (T![&], LIFETIME_IDENT, T![mut], T![self]) + ) { + return Err(m); + } + p.bump(T![&]); + if p.at(LIFETIME_IDENT) { + lifetime(p); + } + p.eat(T![mut]); + self_as_name(p); + } + m.complete(p, SELF_PARAM); + if !p.at(T![')']) { + p.expect(T![,]); + } + Ok(()) +} + +fn self_as_name(p: &mut Parser<'_>) { + let m = p.start(); + p.bump(T![self]); + m.complete(p, NAME); +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/paths.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/paths.rs new file mode 100644 index 000000000..8de5d33a1 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/paths.rs @@ -0,0 +1,132 @@ +use super::*; + +pub(super) const PATH_FIRST: TokenSet = + TokenSet::new(&[IDENT, T![self], T![super], T![crate], T![Self], T![:], T![<]]); + +pub(super) fn is_path_start(p: &Parser<'_>) -> bool { + is_use_path_start(p) || p.at(T![<]) || p.at(T![Self]) +} + +pub(super) fn is_use_path_start(p: &Parser<'_>) -> bool { + match p.current() { + IDENT | T![self] | T![super] | T![crate] => true, + T![:] if p.at(T![::]) => true, + _ => false, + } +} + +pub(super) fn use_path(p: &mut Parser<'_>) { + path(p, Mode::Use); +} + +pub(crate) fn type_path(p: &mut Parser<'_>) { + path(p, Mode::Type); +} + +pub(super) fn expr_path(p: &mut Parser<'_>) { + path(p, Mode::Expr); +} + +pub(crate) fn type_path_for_qualifier( + p: &mut Parser<'_>, + qual: CompletedMarker, +) -> CompletedMarker { + path_for_qualifier(p, Mode::Type, qual) +} + +#[derive(Clone, Copy, Eq, PartialEq)] +enum Mode { + Use, + Type, + Expr, +} + +fn path(p: &mut Parser<'_>, mode: Mode) { + let path = p.start(); + path_segment(p, mode, true); + let qual = path.complete(p, PATH); + path_for_qualifier(p, mode, qual); +} + +fn path_for_qualifier( + p: &mut Parser<'_>, + mode: Mode, + mut qual: CompletedMarker, +) -> CompletedMarker { + loop { + let use_tree = mode == Mode::Use && matches!(p.nth(2), T![*] | T!['{']); + if p.at(T![::]) && !use_tree { + let path = qual.precede(p); + p.bump(T![::]); + path_segment(p, mode, false); + let path = path.complete(p, PATH); + qual = path; + } else { + return qual; + } + } +} + +fn path_segment(p: &mut Parser<'_>, mode: Mode, first: bool) { + let m = p.start(); + // test qual_paths + // type X = <A as B>::Output; + // fn foo() { <usize as Default>::default(); } + if first && p.eat(T![<]) { + types::type_(p); + if p.eat(T![as]) { + if is_use_path_start(p) { + types::path_type(p); + } else { + p.error("expected a trait"); + } + } + p.expect(T![>]); + } else { + let mut empty = true; + if first { + p.eat(T![::]); + empty = false; + } + match p.current() { + IDENT => { + name_ref(p); + opt_path_type_args(p, mode); + } + // test crate_path + // use crate::foo; + T![self] | T![super] | T![crate] | T![Self] => { + let m = p.start(); + p.bump_any(); + m.complete(p, NAME_REF); + } + _ => { + p.err_recover("expected identifier", items::ITEM_RECOVERY_SET); + if empty { + // test_err empty_segment + // use crate::; + m.abandon(p); + return; + } + } + }; + } + m.complete(p, PATH_SEGMENT); +} + +fn opt_path_type_args(p: &mut Parser<'_>, mode: Mode) { + match mode { + Mode::Use => {} + Mode::Type => { + // test path_fn_trait_args + // type F = Box<Fn(i32) -> ()>; + if p.at(T!['(']) { + params::param_list_fn_trait(p); + opt_ret_type(p); + } else { + generic_args::opt_generic_arg_list(p, false); + } + } + Mode::Expr => generic_args::opt_generic_arg_list(p, true), + } +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/patterns.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/patterns.rs new file mode 100644 index 000000000..4cbf10306 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/patterns.rs @@ -0,0 +1,440 @@ +use super::*; + +pub(super) const PATTERN_FIRST: TokenSet = + expressions::LITERAL_FIRST.union(paths::PATH_FIRST).union(TokenSet::new(&[ + T![box], + T![ref], + T![mut], + T!['('], + T!['['], + T![&], + T![_], + T![-], + T![.], + ])); + +pub(crate) fn pattern(p: &mut Parser<'_>) { + pattern_r(p, PAT_RECOVERY_SET); +} + +/// Parses a pattern list separated by pipes `|`. +pub(super) fn pattern_top(p: &mut Parser<'_>) { + pattern_top_r(p, PAT_RECOVERY_SET); +} + +pub(crate) fn pattern_single(p: &mut Parser<'_>) { + pattern_single_r(p, PAT_RECOVERY_SET); +} + +/// Parses a pattern list separated by pipes `|` +/// using the given `recovery_set`. +pub(super) fn pattern_top_r(p: &mut Parser<'_>, recovery_set: TokenSet) { + p.eat(T![|]); + pattern_r(p, recovery_set); +} + +/// Parses a pattern list separated by pipes `|`, with no leading `|`,using the +/// given `recovery_set`. + +// test or_pattern +// fn main() { +// match () { +// (_ | _) => (), +// &(_ | _) => (), +// (_ | _,) => (), +// [_ | _,] => (), +// } +// } +fn pattern_r(p: &mut Parser<'_>, recovery_set: TokenSet) { + let m = p.start(); + pattern_single_r(p, recovery_set); + + if !p.at(T![|]) { + m.abandon(p); + return; + } + while p.eat(T![|]) { + pattern_single_r(p, recovery_set); + } + m.complete(p, OR_PAT); +} + +fn pattern_single_r(p: &mut Parser<'_>, recovery_set: TokenSet) { + if let Some(lhs) = atom_pat(p, recovery_set) { + // test range_pat + // fn main() { + // match 92 { + // 0 ... 100 => (), + // 101 ..= 200 => (), + // 200 .. 301 => (), + // 302 .. => (), + // } + // + // match Some(10 as u8) { + // Some(0) | None => (), + // Some(1..) => () + // } + // + // match (10 as u8, 5 as u8) { + // (0, _) => (), + // (1.., _) => () + // } + // } + + // FIXME: support half_open_range_patterns (`..=2`), + // exclusive_range_pattern (`..5`) with missing lhs + for range_op in [T![...], T![..=], T![..]] { + if p.at(range_op) { + let m = lhs.precede(p); + p.bump(range_op); + + // `0 .. =>` or `let 0 .. =` or `Some(0 .. )` + // ^ ^ ^ + if p.at(T![=]) | p.at(T![')']) | p.at(T![,]) { + // test half_open_range_pat + // fn f() { let 0 .. = 1u32; } + } else { + atom_pat(p, recovery_set); + } + m.complete(p, RANGE_PAT); + return; + } + } + } +} + +const PAT_RECOVERY_SET: TokenSet = + TokenSet::new(&[T![let], T![if], T![while], T![loop], T![match], T![')'], T![,], T![=]]); + +fn atom_pat(p: &mut Parser<'_>, recovery_set: TokenSet) -> Option<CompletedMarker> { + let m = match p.current() { + T![box] => box_pat(p), + T![ref] | T![mut] => ident_pat(p, true), + T![const] => const_block_pat(p), + IDENT => match p.nth(1) { + // Checks the token after an IDENT to see if a pattern is a path (Struct { .. }) or macro + // (T![x]). + T!['('] | T!['{'] | T![!] => path_or_macro_pat(p), + T![:] if p.nth_at(1, T![::]) => path_or_macro_pat(p), + _ => ident_pat(p, true), + }, + + // test type_path_in_pattern + // fn main() { let <_>::Foo = (); } + _ if paths::is_path_start(p) => path_or_macro_pat(p), + _ if is_literal_pat_start(p) => literal_pat(p), + + T![.] if p.at(T![..]) => rest_pat(p), + T![_] => wildcard_pat(p), + T![&] => ref_pat(p), + T!['('] => tuple_pat(p), + T!['['] => slice_pat(p), + + _ => { + p.err_recover("expected pattern", recovery_set); + return None; + } + }; + + Some(m) +} + +fn is_literal_pat_start(p: &Parser<'_>) -> bool { + p.at(T![-]) && (p.nth(1) == INT_NUMBER || p.nth(1) == FLOAT_NUMBER) + || p.at_ts(expressions::LITERAL_FIRST) +} + +// test literal_pattern +// fn main() { +// match () { +// -1 => (), +// 92 => (), +// 'c' => (), +// "hello" => (), +// } +// } +fn literal_pat(p: &mut Parser<'_>) -> CompletedMarker { + assert!(is_literal_pat_start(p)); + let m = p.start(); + if p.at(T![-]) { + p.bump(T![-]); + } + expressions::literal(p); + m.complete(p, LITERAL_PAT) +} + +// test path_part +// fn foo() { +// let foo::Bar = (); +// let ::Bar = (); +// let Bar { .. } = (); +// let Bar(..) = (); +// } +fn path_or_macro_pat(p: &mut Parser<'_>) -> CompletedMarker { + assert!(paths::is_path_start(p)); + let m = p.start(); + paths::expr_path(p); + let kind = match p.current() { + T!['('] => { + tuple_pat_fields(p); + TUPLE_STRUCT_PAT + } + T!['{'] => { + record_pat_field_list(p); + RECORD_PAT + } + // test marco_pat + // fn main() { + // let m!(x) = 0; + // } + T![!] => { + items::macro_call_after_excl(p); + return m.complete(p, MACRO_CALL).precede(p).complete(p, MACRO_PAT); + } + _ => PATH_PAT, + }; + m.complete(p, kind) +} + +// test tuple_pat_fields +// fn foo() { +// let S() = (); +// let S(_) = (); +// let S(_,) = (); +// let S(_, .. , x) = (); +// } +fn tuple_pat_fields(p: &mut Parser<'_>) { + assert!(p.at(T!['('])); + p.bump(T!['(']); + pat_list(p, T![')']); + p.expect(T![')']); +} + +// test record_pat_field +// fn foo() { +// let S { 0: 1 } = (); +// let S { x: 1 } = (); +// let S { #[cfg(any())] x: 1 } = (); +// } +fn record_pat_field(p: &mut Parser<'_>) { + match p.current() { + IDENT | INT_NUMBER if p.nth(1) == T![:] => { + name_ref_or_index(p); + p.bump(T![:]); + pattern(p); + } + T![box] => { + // FIXME: not all box patterns should be allowed + box_pat(p); + } + T![ref] | T![mut] | IDENT => { + ident_pat(p, false); + } + _ => { + p.err_and_bump("expected identifier"); + } + } +} + +// test record_pat_field_list +// fn foo() { +// let S {} = (); +// let S { f, ref mut g } = (); +// let S { h: _, ..} = (); +// let S { h: _, } = (); +// let S { #[cfg(any())] .. } = (); +// } +fn record_pat_field_list(p: &mut Parser<'_>) { + assert!(p.at(T!['{'])); + let m = p.start(); + p.bump(T!['{']); + while !p.at(EOF) && !p.at(T!['}']) { + let m = p.start(); + attributes::outer_attrs(p); + + match p.current() { + // A trailing `..` is *not* treated as a REST_PAT. + T![.] if p.at(T![..]) => { + p.bump(T![..]); + m.complete(p, REST_PAT); + } + T!['{'] => { + error_block(p, "expected ident"); + m.abandon(p); + } + _ => { + record_pat_field(p); + m.complete(p, RECORD_PAT_FIELD); + } + } + if !p.at(T!['}']) { + p.expect(T![,]); + } + } + p.expect(T!['}']); + m.complete(p, RECORD_PAT_FIELD_LIST); +} + +// test placeholder_pat +// fn main() { let _ = (); } +fn wildcard_pat(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T![_])); + let m = p.start(); + p.bump(T![_]); + m.complete(p, WILDCARD_PAT) +} + +// test dot_dot_pat +// fn main() { +// let .. = (); +// // +// // Tuples +// // +// let (a, ..) = (); +// let (a, ..,) = (); +// let Tuple(a, ..) = (); +// let Tuple(a, ..,) = (); +// let (.., ..) = (); +// let Tuple(.., ..) = (); +// let (.., a, ..) = (); +// let Tuple(.., a, ..) = (); +// // +// // Slices +// // +// let [..] = (); +// let [head, ..] = (); +// let [head, tail @ ..] = (); +// let [head, .., cons] = (); +// let [head, mid @ .., cons] = (); +// let [head, .., .., cons] = (); +// let [head, .., mid, tail @ ..] = (); +// let [head, .., mid, .., cons] = (); +// } +fn rest_pat(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T![..])); + let m = p.start(); + p.bump(T![..]); + m.complete(p, REST_PAT) +} + +// test ref_pat +// fn main() { +// let &a = (); +// let &mut b = (); +// } +fn ref_pat(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T![&])); + let m = p.start(); + p.bump(T![&]); + p.eat(T![mut]); + pattern_single(p); + m.complete(p, REF_PAT) +} + +// test tuple_pat +// fn main() { +// let (a, b, ..) = (); +// let (a,) = (); +// let (..) = (); +// let () = (); +// } +fn tuple_pat(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T!['('])); + let m = p.start(); + p.bump(T!['(']); + let mut has_comma = false; + let mut has_pat = false; + let mut has_rest = false; + while !p.at(EOF) && !p.at(T![')']) { + has_pat = true; + if !p.at_ts(PATTERN_FIRST) { + p.error("expected a pattern"); + break; + } + has_rest |= p.at(T![..]); + + pattern(p); + if !p.at(T![')']) { + has_comma = true; + p.expect(T![,]); + } + } + p.expect(T![')']); + + m.complete(p, if !has_comma && !has_rest && has_pat { PAREN_PAT } else { TUPLE_PAT }) +} + +// test slice_pat +// fn main() { +// let [a, b, ..] = []; +// } +fn slice_pat(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T!['['])); + let m = p.start(); + p.bump(T!['[']); + pat_list(p, T![']']); + p.expect(T![']']); + m.complete(p, SLICE_PAT) +} + +fn pat_list(p: &mut Parser<'_>, ket: SyntaxKind) { + while !p.at(EOF) && !p.at(ket) { + if !p.at_ts(PATTERN_FIRST) { + p.error("expected a pattern"); + break; + } + + pattern(p); + if !p.at(ket) { + p.expect(T![,]); + } + } +} + +// test bind_pat +// fn main() { +// let a = (); +// let mut b = (); +// let ref c = (); +// let ref mut d = (); +// let e @ _ = (); +// let ref mut f @ g @ _ = (); +// } +fn ident_pat(p: &mut Parser<'_>, with_at: bool) -> CompletedMarker { + assert!(matches!(p.current(), T![ref] | T![mut] | IDENT)); + let m = p.start(); + p.eat(T![ref]); + p.eat(T![mut]); + name_r(p, PAT_RECOVERY_SET); + if with_at && p.eat(T![@]) { + pattern_single(p); + } + m.complete(p, IDENT_PAT) +} + +// test box_pat +// fn main() { +// let box i = (); +// let box Outer { box i, j: box Inner(box &x) } = (); +// let box ref mut i = (); +// } +fn box_pat(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T![box])); + let m = p.start(); + p.bump(T![box]); + pattern_single(p); + m.complete(p, BOX_PAT) +} + +// test const_block_pat +// fn main() { +// let const { 15 } = (); +// let const { foo(); bar() } = (); +// } +fn const_block_pat(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(T![const])); + let m = p.start(); + p.bump(T![const]); + expressions::block_expr(p); + m.complete(p, CONST_BLOCK_PAT) +} diff --git a/src/tools/rust-analyzer/crates/parser/src/grammar/types.rs b/src/tools/rust-analyzer/crates/parser/src/grammar/types.rs new file mode 100644 index 000000000..5c6e18fee --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/grammar/types.rs @@ -0,0 +1,352 @@ +use super::*; + +pub(super) const TYPE_FIRST: TokenSet = paths::PATH_FIRST.union(TokenSet::new(&[ + T!['('], + T!['['], + T![<], + T![!], + T![*], + T![&], + T![_], + T![fn], + T![unsafe], + T![extern], + T![for], + T![impl], + T![dyn], + T![Self], +])); + +const TYPE_RECOVERY_SET: TokenSet = TokenSet::new(&[ + T![')'], + T![,], + // test_err struct_field_recover + // struct S { f pub g: () } + T![pub], +]); + +pub(crate) fn type_(p: &mut Parser<'_>) { + type_with_bounds_cond(p, true); +} + +pub(super) fn type_no_bounds(p: &mut Parser<'_>) { + type_with_bounds_cond(p, false); +} + +fn type_with_bounds_cond(p: &mut Parser<'_>, allow_bounds: bool) { + match p.current() { + T!['('] => paren_or_tuple_type(p), + T![!] => never_type(p), + T![*] => ptr_type(p), + T!['['] => array_or_slice_type(p), + T![&] => ref_type(p), + T![_] => infer_type(p), + T![fn] | T![unsafe] | T![extern] => fn_ptr_type(p), + T![for] => for_type(p, allow_bounds), + T![impl] => impl_trait_type(p), + T![dyn] => dyn_trait_type(p), + // Some path types are not allowed to have bounds (no plus) + T![<] => path_type_(p, allow_bounds), + _ if paths::is_path_start(p) => path_or_macro_type_(p, allow_bounds), + _ => { + p.err_recover("expected type", TYPE_RECOVERY_SET); + } + } +} + +pub(super) fn ascription(p: &mut Parser<'_>) { + assert!(p.at(T![:])); + p.bump(T![:]); + if p.at(T![=]) { + // recover from `let x: = expr;`, `const X: = expr;` and similars + // hopefully no type starts with `=` + p.error("missing type"); + return; + } + type_(p); +} + +fn paren_or_tuple_type(p: &mut Parser<'_>) { + assert!(p.at(T!['('])); + let m = p.start(); + p.bump(T!['(']); + let mut n_types: u32 = 0; + let mut trailing_comma: bool = false; + while !p.at(EOF) && !p.at(T![')']) { + n_types += 1; + type_(p); + if p.eat(T![,]) { + trailing_comma = true; + } else { + trailing_comma = false; + break; + } + } + p.expect(T![')']); + + let kind = if n_types == 1 && !trailing_comma { + // test paren_type + // type T = (i32); + PAREN_TYPE + } else { + // test unit_type + // type T = (); + + // test singleton_tuple_type + // type T = (i32,); + TUPLE_TYPE + }; + m.complete(p, kind); +} + +// test never_type +// type Never = !; +fn never_type(p: &mut Parser<'_>) { + assert!(p.at(T![!])); + let m = p.start(); + p.bump(T![!]); + m.complete(p, NEVER_TYPE); +} + +fn ptr_type(p: &mut Parser<'_>) { + assert!(p.at(T![*])); + let m = p.start(); + p.bump(T![*]); + + match p.current() { + // test pointer_type_mut + // type M = *mut (); + // type C = *mut (); + T![mut] | T![const] => p.bump_any(), + _ => { + // test_err pointer_type_no_mutability + // type T = *(); + p.error( + "expected mut or const in raw pointer type \ + (use `*mut T` or `*const T` as appropriate)", + ); + } + }; + + type_no_bounds(p); + m.complete(p, PTR_TYPE); +} + +fn array_or_slice_type(p: &mut Parser<'_>) { + assert!(p.at(T!['['])); + let m = p.start(); + p.bump(T!['[']); + + type_(p); + let kind = match p.current() { + // test slice_type + // type T = [()]; + T![']'] => { + p.bump(T![']']); + SLICE_TYPE + } + + // test array_type + // type T = [(); 92]; + T![;] => { + p.bump(T![;]); + expressions::expr(p); + p.expect(T![']']); + ARRAY_TYPE + } + // test_err array_type_missing_semi + // type T = [() 92]; + _ => { + p.error("expected `;` or `]`"); + SLICE_TYPE + } + }; + m.complete(p, kind); +} + +// test reference_type; +// type A = &(); +// type B = &'static (); +// type C = &mut (); +fn ref_type(p: &mut Parser<'_>) { + assert!(p.at(T![&])); + let m = p.start(); + p.bump(T![&]); + if p.at(LIFETIME_IDENT) { + lifetime(p); + } + p.eat(T![mut]); + type_no_bounds(p); + m.complete(p, REF_TYPE); +} + +// test placeholder_type +// type Placeholder = _; +fn infer_type(p: &mut Parser<'_>) { + assert!(p.at(T![_])); + let m = p.start(); + p.bump(T![_]); + m.complete(p, INFER_TYPE); +} + +// test fn_pointer_type +// type A = fn(); +// type B = unsafe fn(); +// type C = unsafe extern "C" fn(); +// type D = extern "C" fn ( u8 , ... ) -> u8; +fn fn_ptr_type(p: &mut Parser<'_>) { + let m = p.start(); + p.eat(T![unsafe]); + if p.at(T![extern]) { + abi(p); + } + // test_err fn_pointer_type_missing_fn + // type F = unsafe (); + if !p.eat(T![fn]) { + m.abandon(p); + p.error("expected `fn`"); + return; + } + if p.at(T!['(']) { + params::param_list_fn_ptr(p); + } else { + p.error("expected parameters"); + } + // test fn_pointer_type_with_ret + // type F = fn() -> (); + opt_ret_type(p); + m.complete(p, FN_PTR_TYPE); +} + +pub(super) fn for_binder(p: &mut Parser<'_>) { + assert!(p.at(T![for])); + p.bump(T![for]); + if p.at(T![<]) { + generic_params::opt_generic_param_list(p); + } else { + p.error("expected `<`"); + } +} + +// test for_type +// type A = for<'a> fn() -> (); +// type B = for<'a> unsafe extern "C" fn(&'a ()) -> (); +// type Obj = for<'a> PartialEq<&'a i32>; +pub(super) fn for_type(p: &mut Parser<'_>, allow_bounds: bool) { + assert!(p.at(T![for])); + let m = p.start(); + for_binder(p); + match p.current() { + T![fn] | T![unsafe] | T![extern] => {} + // OK: legacy trait object format + _ if paths::is_use_path_start(p) => {} + _ => { + p.error("expected a function pointer or path"); + } + } + type_no_bounds(p); + let completed = m.complete(p, FOR_TYPE); + + // test no_dyn_trait_leading_for + // type A = for<'a> Test<'a> + Send; + if allow_bounds { + opt_type_bounds_as_dyn_trait_type(p, completed); + } +} + +// test impl_trait_type +// type A = impl Iterator<Item=Foo<'a>> + 'a; +fn impl_trait_type(p: &mut Parser<'_>) { + assert!(p.at(T![impl])); + let m = p.start(); + p.bump(T![impl]); + generic_params::bounds_without_colon(p); + m.complete(p, IMPL_TRAIT_TYPE); +} + +// test dyn_trait_type +// type A = dyn Iterator<Item=Foo<'a>> + 'a; +fn dyn_trait_type(p: &mut Parser<'_>) { + assert!(p.at(T![dyn])); + let m = p.start(); + p.bump(T![dyn]); + generic_params::bounds_without_colon(p); + m.complete(p, DYN_TRAIT_TYPE); +} + +// test path_type +// type A = Foo; +// type B = ::Foo; +// type C = self::Foo; +// type D = super::Foo; +pub(super) fn path_type(p: &mut Parser<'_>) { + path_type_(p, true); +} + +// test macro_call_type +// type A = foo!(); +// type B = crate::foo!(); +fn path_or_macro_type_(p: &mut Parser<'_>, allow_bounds: bool) { + assert!(paths::is_path_start(p)); + let r = p.start(); + let m = p.start(); + + paths::type_path(p); + + let kind = if p.at(T![!]) && !p.at(T![!=]) { + items::macro_call_after_excl(p); + m.complete(p, MACRO_CALL); + MACRO_TYPE + } else { + m.abandon(p); + PATH_TYPE + }; + + let path = r.complete(p, kind); + + if allow_bounds { + opt_type_bounds_as_dyn_trait_type(p, path); + } +} + +pub(super) fn path_type_(p: &mut Parser<'_>, allow_bounds: bool) { + assert!(paths::is_path_start(p)); + let m = p.start(); + paths::type_path(p); + + // test path_type_with_bounds + // fn foo() -> Box<T + 'f> {} + // fn foo() -> Box<dyn T + 'f> {} + let path = m.complete(p, PATH_TYPE); + if allow_bounds { + opt_type_bounds_as_dyn_trait_type(p, path); + } +} + +/// This turns a parsed PATH_TYPE or FOR_TYPE optionally into a DYN_TRAIT_TYPE +/// with a TYPE_BOUND_LIST +fn opt_type_bounds_as_dyn_trait_type(p: &mut Parser<'_>, type_marker: CompletedMarker) { + assert!(matches!( + type_marker.kind(), + SyntaxKind::PATH_TYPE | SyntaxKind::FOR_TYPE | SyntaxKind::MACRO_TYPE + )); + if !p.at(T![+]) { + return; + } + + // First create a TYPE_BOUND from the completed PATH_TYPE + let m = type_marker.precede(p).complete(p, TYPE_BOUND); + + // Next setup a marker for the TYPE_BOUND_LIST + let m = m.precede(p); + + // This gets consumed here so it gets properly set + // in the TYPE_BOUND_LIST + p.eat(T![+]); + + // Parse rest of the bounds into the TYPE_BOUND_LIST + let m = generic_params::bounds_without_colon_m(p, m); + + // Finally precede everything with DYN_TRAIT_TYPE + m.precede(p).complete(p, DYN_TRAIT_TYPE); +} diff --git a/src/tools/rust-analyzer/crates/parser/src/input.rs b/src/tools/rust-analyzer/crates/parser/src/input.rs new file mode 100644 index 000000000..9504bd4d9 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/input.rs @@ -0,0 +1,88 @@ +//! See [`Input`]. + +use crate::SyntaxKind; + +#[allow(non_camel_case_types)] +type bits = u64; + +/// Input for the parser -- a sequence of tokens. +/// +/// As of now, parser doesn't have access to the *text* of the tokens, and makes +/// decisions based solely on their classification. Unlike `LexerToken`, the +/// `Tokens` doesn't include whitespace and comments. Main input to the parser. +/// +/// Struct of arrays internally, but this shouldn't really matter. +#[derive(Default)] +pub struct Input { + kind: Vec<SyntaxKind>, + joint: Vec<bits>, + contextual_kind: Vec<SyntaxKind>, +} + +/// `pub` impl used by callers to create `Tokens`. +impl Input { + #[inline] + pub fn push(&mut self, kind: SyntaxKind) { + self.push_impl(kind, SyntaxKind::EOF) + } + #[inline] + pub fn push_ident(&mut self, contextual_kind: SyntaxKind) { + self.push_impl(SyntaxKind::IDENT, contextual_kind) + } + /// Sets jointness for the last token we've pushed. + /// + /// This is a separate API rather than an argument to the `push` to make it + /// convenient both for textual and mbe tokens. With text, you know whether + /// the *previous* token was joint, with mbe, you know whether the *current* + /// one is joint. This API allows for styles of usage: + /// + /// ``` + /// // In text: + /// tokens.was_joint(prev_joint); + /// tokens.push(curr); + /// + /// // In MBE: + /// token.push(curr); + /// tokens.push(curr_joint) + /// ``` + #[inline] + pub fn was_joint(&mut self) { + let n = self.len() - 1; + let (idx, b_idx) = self.bit_index(n); + self.joint[idx] |= 1 << b_idx; + } + #[inline] + fn push_impl(&mut self, kind: SyntaxKind, contextual_kind: SyntaxKind) { + let idx = self.len(); + if idx % (bits::BITS as usize) == 0 { + self.joint.push(0); + } + self.kind.push(kind); + self.contextual_kind.push(contextual_kind); + } +} + +/// pub(crate) impl used by the parser to consume `Tokens`. +impl Input { + pub(crate) fn kind(&self, idx: usize) -> SyntaxKind { + self.kind.get(idx).copied().unwrap_or(SyntaxKind::EOF) + } + pub(crate) fn contextual_kind(&self, idx: usize) -> SyntaxKind { + self.contextual_kind.get(idx).copied().unwrap_or(SyntaxKind::EOF) + } + pub(crate) fn is_joint(&self, n: usize) -> bool { + let (idx, b_idx) = self.bit_index(n); + self.joint[idx] & 1 << b_idx != 0 + } +} + +impl Input { + fn bit_index(&self, n: usize) -> (usize, usize) { + let idx = n / (bits::BITS as usize); + let b_idx = n % (bits::BITS as usize); + (idx, b_idx) + } + fn len(&self) -> usize { + self.kind.len() + } +} diff --git a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs new file mode 100644 index 000000000..f4b9988ea --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs @@ -0,0 +1,300 @@ +//! Lexing `&str` into a sequence of Rust tokens. +//! +//! Note that strictly speaking the parser in this crate is not required to work +//! on tokens which originated from text. Macros, eg, can synthesize tokens out +//! of thin air. So, ideally, lexer should be an orthogonal crate. It is however +//! convenient to include a text-based lexer here! +//! +//! Note that these tokens, unlike the tokens we feed into the parser, do +//! include info about comments and whitespace. + +use std::ops; + +use crate::{ + SyntaxKind::{self, *}, + T, +}; + +pub struct LexedStr<'a> { + text: &'a str, + kind: Vec<SyntaxKind>, + start: Vec<u32>, + error: Vec<LexError>, +} + +struct LexError { + msg: String, + token: u32, +} + +impl<'a> LexedStr<'a> { + pub fn new(text: &'a str) -> LexedStr<'a> { + let mut conv = Converter::new(text); + if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { + conv.res.push(SHEBANG, conv.offset); + conv.offset = shebang_len; + }; + + for token in rustc_lexer::tokenize(&text[conv.offset..]) { + let token_text = &text[conv.offset..][..token.len]; + + conv.extend_token(&token.kind, token_text); + } + + conv.finalize_with_eof() + } + + pub fn single_token(text: &'a str) -> Option<(SyntaxKind, Option<String>)> { + if text.is_empty() { + return None; + } + + let token = rustc_lexer::first_token(text); + if token.len != text.len() { + return None; + } + + let mut conv = Converter::new(text); + conv.extend_token(&token.kind, text); + match &*conv.res.kind { + [kind] => Some((*kind, conv.res.error.pop().map(|it| it.msg.clone()))), + _ => None, + } + } + + pub fn as_str(&self) -> &str { + self.text + } + + pub fn len(&self) -> usize { + self.kind.len() - 1 + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn kind(&self, i: usize) -> SyntaxKind { + assert!(i < self.len()); + self.kind[i] + } + + pub fn text(&self, i: usize) -> &str { + self.range_text(i..i + 1) + } + pub fn range_text(&self, r: ops::Range<usize>) -> &str { + assert!(r.start < r.end && r.end <= self.len()); + let lo = self.start[r.start] as usize; + let hi = self.start[r.end] as usize; + &self.text[lo..hi] + } + + // Naming is hard. + pub fn text_range(&self, i: usize) -> ops::Range<usize> { + assert!(i < self.len()); + let lo = self.start[i] as usize; + let hi = self.start[i + 1] as usize; + lo..hi + } + pub fn text_start(&self, i: usize) -> usize { + assert!(i <= self.len()); + self.start[i] as usize + } + pub fn text_len(&self, i: usize) -> usize { + assert!(i < self.len()); + let r = self.text_range(i); + r.end - r.start + } + + pub fn error(&self, i: usize) -> Option<&str> { + assert!(i < self.len()); + let err = self.error.binary_search_by_key(&(i as u32), |i| i.token).ok()?; + Some(self.error[err].msg.as_str()) + } + + pub fn errors(&self) -> impl Iterator<Item = (usize, &str)> + '_ { + self.error.iter().map(|it| (it.token as usize, it.msg.as_str())) + } + + fn push(&mut self, kind: SyntaxKind, offset: usize) { + self.kind.push(kind); + self.start.push(offset as u32); + } +} + +struct Converter<'a> { + res: LexedStr<'a>, + offset: usize, +} + +impl<'a> Converter<'a> { + fn new(text: &'a str) -> Self { + Self { + res: LexedStr { text, kind: Vec::new(), start: Vec::new(), error: Vec::new() }, + offset: 0, + } + } + + fn finalize_with_eof(mut self) -> LexedStr<'a> { + self.res.push(EOF, self.offset); + self.res + } + + fn push(&mut self, kind: SyntaxKind, len: usize, err: Option<&str>) { + self.res.push(kind, self.offset); + self.offset += len; + + if let Some(err) = err { + let token = self.res.len() as u32; + let msg = err.to_string(); + self.res.error.push(LexError { msg, token }); + } + } + + fn extend_token(&mut self, kind: &rustc_lexer::TokenKind, token_text: &str) { + // A note on an intended tradeoff: + // We drop some useful information here (see patterns with double dots `..`) + // Storing that info in `SyntaxKind` is not possible due to its layout requirements of + // being `u16` that come from `rowan::SyntaxKind`. + let mut err = ""; + + let syntax_kind = { + match kind { + rustc_lexer::TokenKind::LineComment { doc_style: _ } => COMMENT, + rustc_lexer::TokenKind::BlockComment { doc_style: _, terminated } => { + if !terminated { + err = "Missing trailing `*/` symbols to terminate the block comment"; + } + COMMENT + } + + rustc_lexer::TokenKind::Whitespace => WHITESPACE, + + rustc_lexer::TokenKind::Ident if token_text == "_" => UNDERSCORE, + rustc_lexer::TokenKind::Ident => { + SyntaxKind::from_keyword(token_text).unwrap_or(IDENT) + } + + rustc_lexer::TokenKind::RawIdent => IDENT, + rustc_lexer::TokenKind::Literal { kind, .. } => { + self.extend_literal(token_text.len(), kind); + return; + } + + rustc_lexer::TokenKind::Lifetime { starts_with_number } => { + if *starts_with_number { + err = "Lifetime name cannot start with a number"; + } + LIFETIME_IDENT + } + + rustc_lexer::TokenKind::Semi => T![;], + rustc_lexer::TokenKind::Comma => T![,], + rustc_lexer::TokenKind::Dot => T![.], + rustc_lexer::TokenKind::OpenParen => T!['('], + rustc_lexer::TokenKind::CloseParen => T![')'], + rustc_lexer::TokenKind::OpenBrace => T!['{'], + rustc_lexer::TokenKind::CloseBrace => T!['}'], + rustc_lexer::TokenKind::OpenBracket => T!['['], + rustc_lexer::TokenKind::CloseBracket => T![']'], + rustc_lexer::TokenKind::At => T![@], + rustc_lexer::TokenKind::Pound => T![#], + rustc_lexer::TokenKind::Tilde => T![~], + rustc_lexer::TokenKind::Question => T![?], + rustc_lexer::TokenKind::Colon => T![:], + rustc_lexer::TokenKind::Dollar => T![$], + rustc_lexer::TokenKind::Eq => T![=], + rustc_lexer::TokenKind::Bang => T![!], + rustc_lexer::TokenKind::Lt => T![<], + rustc_lexer::TokenKind::Gt => T![>], + rustc_lexer::TokenKind::Minus => T![-], + rustc_lexer::TokenKind::And => T![&], + rustc_lexer::TokenKind::Or => T![|], + rustc_lexer::TokenKind::Plus => T![+], + rustc_lexer::TokenKind::Star => T![*], + rustc_lexer::TokenKind::Slash => T![/], + rustc_lexer::TokenKind::Caret => T![^], + rustc_lexer::TokenKind::Percent => T![%], + rustc_lexer::TokenKind::Unknown => ERROR, + } + }; + + let err = if err.is_empty() { None } else { Some(err) }; + self.push(syntax_kind, token_text.len(), err); + } + + fn extend_literal(&mut self, len: usize, kind: &rustc_lexer::LiteralKind) { + let mut err = ""; + + let syntax_kind = match *kind { + rustc_lexer::LiteralKind::Int { empty_int, base: _ } => { + if empty_int { + err = "Missing digits after the integer base prefix"; + } + INT_NUMBER + } + rustc_lexer::LiteralKind::Float { empty_exponent, base: _ } => { + if empty_exponent { + err = "Missing digits after the exponent symbol"; + } + FLOAT_NUMBER + } + rustc_lexer::LiteralKind::Char { terminated } => { + if !terminated { + err = "Missing trailing `'` symbol to terminate the character literal"; + } + CHAR + } + rustc_lexer::LiteralKind::Byte { terminated } => { + if !terminated { + err = "Missing trailing `'` symbol to terminate the byte literal"; + } + BYTE + } + rustc_lexer::LiteralKind::Str { terminated } => { + if !terminated { + err = "Missing trailing `\"` symbol to terminate the string literal"; + } + STRING + } + rustc_lexer::LiteralKind::ByteStr { terminated } => { + if !terminated { + err = "Missing trailing `\"` symbol to terminate the byte string literal"; + } + BYTE_STRING + } + rustc_lexer::LiteralKind::RawStr { err: raw_str_err, .. } => { + if let Some(raw_str_err) = raw_str_err { + err = match raw_str_err { + rustc_lexer::RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw string literal", + rustc_lexer::RawStrError::NoTerminator { expected, found, .. } => if expected == found { + "Missing trailing `\"` to terminate the raw string literal" + } else { + "Missing trailing `\"` with `#` symbols to terminate the raw string literal" + }, + rustc_lexer::RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols", + }; + }; + STRING + } + rustc_lexer::LiteralKind::RawByteStr { err: raw_str_err, .. } => { + if let Some(raw_str_err) = raw_str_err { + err = match raw_str_err { + rustc_lexer::RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw byte string literal", + rustc_lexer::RawStrError::NoTerminator { expected, found, .. } => if expected == found { + "Missing trailing `\"` to terminate the raw byte string literal" + } else { + "Missing trailing `\"` with `#` symbols to terminate the raw byte string literal" + }, + rustc_lexer::RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw byte strings may be delimited by up to 65535 `#` symbols", + }; + }; + + BYTE_STRING + } + }; + + let err = if err.is_empty() { None } else { Some(err) }; + self.push(syntax_kind, len, err); + } +} diff --git a/src/tools/rust-analyzer/crates/parser/src/lib.rs b/src/tools/rust-analyzer/crates/parser/src/lib.rs new file mode 100644 index 000000000..87be47927 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/lib.rs @@ -0,0 +1,181 @@ +//! The Rust parser. +//! +//! NOTE: The crate is undergoing refactors, don't believe everything the docs +//! say :-) +//! +//! The parser doesn't know about concrete representation of tokens and syntax +//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead. As +//! a consequence, this crate does not contain a lexer. +//! +//! The [`Parser`] struct from the [`parser`] module is a cursor into the +//! sequence of tokens. Parsing routines use [`Parser`] to inspect current +//! state and advance the parsing. +//! +//! The actual parsing happens in the [`grammar`] module. +//! +//! Tests for this crate live in the `syntax` crate. +//! +//! [`Parser`]: crate::parser::Parser + +#![warn(rust_2018_idioms, unused_lifetimes, semicolon_in_expressions_from_macros)] +#![allow(rustdoc::private_intra_doc_links)] + +mod lexed_str; +mod token_set; +mod syntax_kind; +mod event; +mod parser; +mod grammar; +mod input; +mod output; +mod shortcuts; + +#[cfg(test)] +mod tests; + +pub(crate) use token_set::TokenSet; + +pub use crate::{ + input::Input, + lexed_str::LexedStr, + output::{Output, Step}, + shortcuts::StrStep, + syntax_kind::SyntaxKind, +}; + +/// Parse the whole of the input as a given syntactic construct. +/// +/// This covers two main use-cases: +/// +/// * Parsing a Rust file. +/// * Parsing a result of macro expansion. +/// +/// That is, for something like +/// +/// ``` +/// quick_check! { +/// fn prop() {} +/// } +/// ``` +/// +/// the input to the macro will be parsed with [`PrefixEntryPoint::Item`], and +/// the result will be [`TopEntryPoint::MacroItems`]. +/// +/// [`TopEntryPoint::parse`] makes a guarantee that +/// * all input is consumed +/// * the result is a valid tree (there's one root node) +#[derive(Debug)] +pub enum TopEntryPoint { + SourceFile, + MacroStmts, + MacroItems, + Pattern, + Type, + Expr, + /// Edge case -- macros generally don't expand to attributes, with the + /// exception of `cfg_attr` which does! + MetaItem, +} + +impl TopEntryPoint { + pub fn parse(&self, input: &Input) -> Output { + let entry_point: fn(&'_ mut parser::Parser<'_>) = match self { + TopEntryPoint::SourceFile => grammar::entry::top::source_file, + TopEntryPoint::MacroStmts => grammar::entry::top::macro_stmts, + TopEntryPoint::MacroItems => grammar::entry::top::macro_items, + TopEntryPoint::Pattern => grammar::entry::top::pattern, + TopEntryPoint::Type => grammar::entry::top::type_, + TopEntryPoint::Expr => grammar::entry::top::expr, + TopEntryPoint::MetaItem => grammar::entry::top::meta_item, + }; + let mut p = parser::Parser::new(input); + entry_point(&mut p); + let events = p.finish(); + let res = event::process(events); + + if cfg!(debug_assertions) { + let mut depth = 0; + let mut first = true; + for step in res.iter() { + assert!(depth > 0 || first); + first = false; + match step { + Step::Enter { .. } => depth += 1, + Step::Exit => depth -= 1, + Step::Token { .. } | Step::Error { .. } => (), + } + } + assert!(!first, "no tree at all"); + } + + res + } +} + +/// Parse a prefix of the input as a given syntactic construct. +/// +/// This is used by macro-by-example parser to implement things like `$i:item` +/// and the naming of variants follows the naming of macro fragments. +/// +/// Note that this is generally non-optional -- the result is intentionally not +/// `Option<Output>`. The way MBE work, by the time we *try* to parse `$e:expr` +/// we already commit to expression. In other words, this API by design can't be +/// used to implement "rollback and try another alternative" logic. +#[derive(Debug)] +pub enum PrefixEntryPoint { + Vis, + Block, + Stmt, + Pat, + Ty, + Expr, + Path, + Item, + MetaItem, +} + +impl PrefixEntryPoint { + pub fn parse(&self, input: &Input) -> Output { + let entry_point: fn(&'_ mut parser::Parser<'_>) = match self { + PrefixEntryPoint::Vis => grammar::entry::prefix::vis, + PrefixEntryPoint::Block => grammar::entry::prefix::block, + PrefixEntryPoint::Stmt => grammar::entry::prefix::stmt, + PrefixEntryPoint::Pat => grammar::entry::prefix::pat, + PrefixEntryPoint::Ty => grammar::entry::prefix::ty, + PrefixEntryPoint::Expr => grammar::entry::prefix::expr, + PrefixEntryPoint::Path => grammar::entry::prefix::path, + PrefixEntryPoint::Item => grammar::entry::prefix::item, + PrefixEntryPoint::MetaItem => grammar::entry::prefix::meta_item, + }; + let mut p = parser::Parser::new(input); + entry_point(&mut p); + let events = p.finish(); + event::process(events) + } +} + +/// A parsing function for a specific braced-block. +pub struct Reparser(fn(&mut parser::Parser<'_>)); + +impl Reparser { + /// If the node is a braced block, return the corresponding `Reparser`. + pub fn for_node( + node: SyntaxKind, + first_child: Option<SyntaxKind>, + parent: Option<SyntaxKind>, + ) -> Option<Reparser> { + grammar::reparser(node, first_child, parent).map(Reparser) + } + + /// Re-parse given tokens using this `Reparser`. + /// + /// Tokens must start with `{`, end with `}` and form a valid brace + /// sequence. + pub fn parse(self, tokens: &Input) -> Output { + let Reparser(r) = self; + let mut p = parser::Parser::new(tokens); + r(&mut p); + let events = p.finish(); + event::process(events) + } +} diff --git a/src/tools/rust-analyzer/crates/parser/src/output.rs b/src/tools/rust-analyzer/crates/parser/src/output.rs new file mode 100644 index 000000000..e9ec9822d --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/output.rs @@ -0,0 +1,77 @@ +//! See [`Output`] + +use crate::SyntaxKind; + +/// Output of the parser -- a DFS traversal of a concrete syntax tree. +/// +/// Use the [`Output::iter`] method to iterate over traversal steps and consume +/// a syntax tree. +/// +/// In a sense, this is just a sequence of [`SyntaxKind`]-colored parenthesis +/// interspersed into the original [`crate::Input`]. The output is fundamentally +/// coordinated with the input and `n_input_tokens` refers to the number of +/// times [`crate::Input::push`] was called. +#[derive(Default)] +pub struct Output { + /// 32-bit encoding of events. If LSB is zero, then that's an index into the + /// error vector. Otherwise, it's one of the thee other variants, with data encoded as + /// + /// |16 bit kind|8 bit n_input_tokens|4 bit tag|4 bit leftover| + /// + event: Vec<u32>, + error: Vec<String>, +} + +#[derive(Debug)] +pub enum Step<'a> { + Token { kind: SyntaxKind, n_input_tokens: u8 }, + Enter { kind: SyntaxKind }, + Exit, + Error { msg: &'a str }, +} + +impl Output { + pub fn iter(&self) -> impl Iterator<Item = Step<'_>> { + self.event.iter().map(|&event| { + if event & 0b1 == 0 { + return Step::Error { msg: self.error[(event as usize) >> 1].as_str() }; + } + let tag = ((event & 0x0000_00F0) >> 4) as u8; + match tag { + 0 => { + let kind: SyntaxKind = (((event & 0xFFFF_0000) >> 16) as u16).into(); + let n_input_tokens = ((event & 0x0000_FF00) >> 8) as u8; + Step::Token { kind, n_input_tokens } + } + 1 => { + let kind: SyntaxKind = (((event & 0xFFFF_0000) >> 16) as u16).into(); + Step::Enter { kind } + } + 2 => Step::Exit, + _ => unreachable!(), + } + }) + } + + pub(crate) fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { + let e = ((kind as u16 as u32) << 16) | ((n_tokens as u32) << 8) | (0 << 4) | 1; + self.event.push(e) + } + + pub(crate) fn enter_node(&mut self, kind: SyntaxKind) { + let e = ((kind as u16 as u32) << 16) | (1 << 4) | 1; + self.event.push(e) + } + + pub(crate) fn leave_node(&mut self) { + let e = 2 << 4 | 1; + self.event.push(e) + } + + pub(crate) fn error(&mut self, error: String) { + let idx = self.error.len(); + self.error.push(error); + let e = (idx as u32) << 1; + self.event.push(e); + } +} diff --git a/src/tools/rust-analyzer/crates/parser/src/parser.rs b/src/tools/rust-analyzer/crates/parser/src/parser.rs new file mode 100644 index 000000000..48d8350e0 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/parser.rs @@ -0,0 +1,340 @@ +//! See [`Parser`]. + +use std::cell::Cell; + +use drop_bomb::DropBomb; +use limit::Limit; + +use crate::{ + event::Event, + input::Input, + SyntaxKind::{self, EOF, ERROR, TOMBSTONE}, + TokenSet, T, +}; + +/// `Parser` struct provides the low-level API for +/// navigating through the stream of tokens and +/// constructing the parse tree. The actual parsing +/// happens in the [`grammar`](super::grammar) module. +/// +/// However, the result of this `Parser` is not a real +/// tree, but rather a flat stream of events of the form +/// "start expression, consume number literal, +/// finish expression". See `Event` docs for more. +pub(crate) struct Parser<'t> { + inp: &'t Input, + pos: usize, + events: Vec<Event>, + steps: Cell<u32>, +} + +static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000); + +impl<'t> Parser<'t> { + pub(super) fn new(inp: &'t Input) -> Parser<'t> { + Parser { inp, pos: 0, events: Vec::new(), steps: Cell::new(0) } + } + + pub(crate) fn finish(self) -> Vec<Event> { + self.events + } + + /// Returns the kind of the current token. + /// If parser has already reached the end of input, + /// the special `EOF` kind is returned. + pub(crate) fn current(&self) -> SyntaxKind { + self.nth(0) + } + + /// Lookahead operation: returns the kind of the next nth + /// token. + pub(crate) fn nth(&self, n: usize) -> SyntaxKind { + assert!(n <= 3); + + let steps = self.steps.get(); + assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck"); + self.steps.set(steps + 1); + + self.inp.kind(self.pos + n) + } + + /// Checks if the current token is `kind`. + pub(crate) fn at(&self, kind: SyntaxKind) -> bool { + self.nth_at(0, kind) + } + + pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool { + match kind { + T![-=] => self.at_composite2(n, T![-], T![=]), + T![->] => self.at_composite2(n, T![-], T![>]), + T![::] => self.at_composite2(n, T![:], T![:]), + T![!=] => self.at_composite2(n, T![!], T![=]), + T![..] => self.at_composite2(n, T![.], T![.]), + T![*=] => self.at_composite2(n, T![*], T![=]), + T![/=] => self.at_composite2(n, T![/], T![=]), + T![&&] => self.at_composite2(n, T![&], T![&]), + T![&=] => self.at_composite2(n, T![&], T![=]), + T![%=] => self.at_composite2(n, T![%], T![=]), + T![^=] => self.at_composite2(n, T![^], T![=]), + T![+=] => self.at_composite2(n, T![+], T![=]), + T![<<] => self.at_composite2(n, T![<], T![<]), + T![<=] => self.at_composite2(n, T![<], T![=]), + T![==] => self.at_composite2(n, T![=], T![=]), + T![=>] => self.at_composite2(n, T![=], T![>]), + T![>=] => self.at_composite2(n, T![>], T![=]), + T![>>] => self.at_composite2(n, T![>], T![>]), + T![|=] => self.at_composite2(n, T![|], T![=]), + T![||] => self.at_composite2(n, T![|], T![|]), + + T![...] => self.at_composite3(n, T![.], T![.], T![.]), + T![..=] => self.at_composite3(n, T![.], T![.], T![=]), + T![<<=] => self.at_composite3(n, T![<], T![<], T![=]), + T![>>=] => self.at_composite3(n, T![>], T![>], T![=]), + + _ => self.inp.kind(self.pos + n) == kind, + } + } + + /// Consume the next token if `kind` matches. + pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { + if !self.at(kind) { + return false; + } + let n_raw_tokens = match kind { + T![-=] + | T![->] + | T![::] + | T![!=] + | T![..] + | T![*=] + | T![/=] + | T![&&] + | T![&=] + | T![%=] + | T![^=] + | T![+=] + | T![<<] + | T![<=] + | T![==] + | T![=>] + | T![>=] + | T![>>] + | T![|=] + | T![||] => 2, + + T![...] | T![..=] | T![<<=] | T![>>=] => 3, + _ => 1, + }; + self.do_bump(kind, n_raw_tokens); + true + } + + fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool { + self.inp.kind(self.pos + n) == k1 + && self.inp.kind(self.pos + n + 1) == k2 + && self.inp.is_joint(self.pos + n) + } + + fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool { + self.inp.kind(self.pos + n) == k1 + && self.inp.kind(self.pos + n + 1) == k2 + && self.inp.kind(self.pos + n + 2) == k3 + && self.inp.is_joint(self.pos + n) + && self.inp.is_joint(self.pos + n + 1) + } + + /// Checks if the current token is in `kinds`. + pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool { + kinds.contains(self.current()) + } + + /// Checks if the current token is contextual keyword with text `t`. + pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool { + self.inp.contextual_kind(self.pos) == kw + } + + /// Starts a new node in the syntax tree. All nodes and tokens + /// consumed between the `start` and the corresponding `Marker::complete` + /// belong to the same node. + pub(crate) fn start(&mut self) -> Marker { + let pos = self.events.len() as u32; + self.push_event(Event::tombstone()); + Marker::new(pos) + } + + /// Consume the next token if `kind` matches. + pub(crate) fn bump(&mut self, kind: SyntaxKind) { + assert!(self.eat(kind)); + } + + /// Advances the parser by one token + pub(crate) fn bump_any(&mut self) { + let kind = self.nth(0); + if kind == EOF { + return; + } + self.do_bump(kind, 1); + } + + /// Advances the parser by one token, remapping its kind. + /// This is useful to create contextual keywords from + /// identifiers. For example, the lexer creates a `union` + /// *identifier* token, but the parser remaps it to the + /// `union` keyword, and keyword is what ends up in the + /// final tree. + pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { + if self.nth(0) == EOF { + // FIXME: panic!? + return; + } + self.do_bump(kind, 1); + } + + /// Emit error with the `message` + /// FIXME: this should be much more fancy and support + /// structured errors with spans and notes, like rustc + /// does. + pub(crate) fn error<T: Into<String>>(&mut self, message: T) { + let msg = message.into(); + self.push_event(Event::Error { msg }); + } + + /// Consume the next token if it is `kind` or emit an error + /// otherwise. + pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { + if self.eat(kind) { + return true; + } + self.error(format!("expected {:?}", kind)); + false + } + + /// Create an error node and consume the next token. + pub(crate) fn err_and_bump(&mut self, message: &str) { + self.err_recover(message, TokenSet::EMPTY); + } + + /// Create an error node and consume the next token. + pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) { + match self.current() { + T!['{'] | T!['}'] => { + self.error(message); + return; + } + _ => (), + } + + if self.at_ts(recovery) { + self.error(message); + return; + } + + let m = self.start(); + self.error(message); + self.bump_any(); + m.complete(self, ERROR); + } + + fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { + self.pos += n_raw_tokens as usize; + self.push_event(Event::Token { kind, n_raw_tokens }); + } + + fn push_event(&mut self, event: Event) { + self.events.push(event); + } +} + +/// See [`Parser::start`]. +pub(crate) struct Marker { + pos: u32, + bomb: DropBomb, +} + +impl Marker { + fn new(pos: u32) -> Marker { + Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") } + } + + /// Finishes the syntax tree node and assigns `kind` to it, + /// and mark the create a `CompletedMarker` for possible future + /// operation like `.precede()` to deal with forward_parent. + pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker { + self.bomb.defuse(); + let idx = self.pos as usize; + match &mut p.events[idx] { + Event::Start { kind: slot, .. } => { + *slot = kind; + } + _ => unreachable!(), + } + p.push_event(Event::Finish); + CompletedMarker::new(self.pos, kind) + } + + /// Abandons the syntax tree node. All its children + /// are attached to its parent instead. + pub(crate) fn abandon(mut self, p: &mut Parser<'_>) { + self.bomb.defuse(); + let idx = self.pos as usize; + if idx == p.events.len() - 1 { + match p.events.pop() { + Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), + _ => unreachable!(), + } + } + } +} + +pub(crate) struct CompletedMarker { + pos: u32, + kind: SyntaxKind, +} + +impl CompletedMarker { + fn new(pos: u32, kind: SyntaxKind) -> Self { + CompletedMarker { pos, kind } + } + + /// This method allows to create a new node which starts + /// *before* the current one. That is, parser could start + /// node `A`, then complete it, and then after parsing the + /// whole `A`, decide that it should have started some node + /// `B` before starting `A`. `precede` allows to do exactly + /// that. See also docs about + /// [`Event::Start::forward_parent`](crate::event::Event::Start::forward_parent). + /// + /// Given completed events `[START, FINISH]` and its corresponding + /// `CompletedMarker(pos: 0, _)`. + /// Append a new `START` events as `[START, FINISH, NEWSTART]`, + /// then mark `NEWSTART` as `START`'s parent with saving its relative + /// distance to `NEWSTART` into forward_parent(=2 in this case); + pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker { + let new_pos = p.start(); + let idx = self.pos as usize; + match &mut p.events[idx] { + Event::Start { forward_parent, .. } => { + *forward_parent = Some(new_pos.pos - self.pos); + } + _ => unreachable!(), + } + new_pos + } + + /// Extends this completed marker *to the left* up to `m`. + pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker { + m.bomb.defuse(); + let idx = m.pos as usize; + match &mut p.events[idx] { + Event::Start { forward_parent, .. } => { + *forward_parent = Some(self.pos - m.pos); + } + _ => unreachable!(), + } + self + } + + pub(crate) fn kind(&self) -> SyntaxKind { + self.kind + } +} diff --git a/src/tools/rust-analyzer/crates/parser/src/shortcuts.rs b/src/tools/rust-analyzer/crates/parser/src/shortcuts.rs new file mode 100644 index 000000000..4b805fadd --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/shortcuts.rs @@ -0,0 +1,215 @@ +//! Shortcuts that span lexer/parser abstraction. +//! +//! The way Rust works, parser doesn't necessary parse text, and you might +//! tokenize text without parsing it further. So, it makes sense to keep +//! abstract token parsing, and string tokenization as completely separate +//! layers. +//! +//! However, often you do pares text into syntax trees and the glue code for +//! that needs to live somewhere. Rather than putting it to lexer or parser, we +//! use a separate shortcuts module for that. + +use std::mem; + +use crate::{ + LexedStr, Step, + SyntaxKind::{self, *}, +}; + +#[derive(Debug)] +pub enum StrStep<'a> { + Token { kind: SyntaxKind, text: &'a str }, + Enter { kind: SyntaxKind }, + Exit, + Error { msg: &'a str, pos: usize }, +} + +impl<'a> LexedStr<'a> { + pub fn to_input(&self) -> crate::Input { + let mut res = crate::Input::default(); + let mut was_joint = false; + for i in 0..self.len() { + let kind = self.kind(i); + if kind.is_trivia() { + was_joint = false + } else { + if kind == SyntaxKind::IDENT { + let token_text = self.text(i); + let contextual_kw = SyntaxKind::from_contextual_keyword(token_text) + .unwrap_or(SyntaxKind::IDENT); + res.push_ident(contextual_kw); + } else { + if was_joint { + res.was_joint(); + } + res.push(kind); + } + was_joint = true; + } + } + res + } + + /// NB: only valid to call with Output from Reparser/TopLevelEntry. + pub fn intersperse_trivia( + &self, + output: &crate::Output, + sink: &mut dyn FnMut(StrStep<'_>), + ) -> bool { + let mut builder = Builder { lexed: self, pos: 0, state: State::PendingEnter, sink }; + + for event in output.iter() { + match event { + Step::Token { kind, n_input_tokens: n_raw_tokens } => { + builder.token(kind, n_raw_tokens) + } + Step::Enter { kind } => builder.enter(kind), + Step::Exit => builder.exit(), + Step::Error { msg } => { + let text_pos = builder.lexed.text_start(builder.pos); + (builder.sink)(StrStep::Error { msg, pos: text_pos }); + } + } + } + + match mem::replace(&mut builder.state, State::Normal) { + State::PendingExit => { + builder.eat_trivias(); + (builder.sink)(StrStep::Exit); + } + State::PendingEnter | State::Normal => unreachable!(), + } + + let is_eof = builder.pos == builder.lexed.len(); + is_eof + } +} + +struct Builder<'a, 'b> { + lexed: &'a LexedStr<'a>, + pos: usize, + state: State, + sink: &'b mut dyn FnMut(StrStep<'_>), +} + +enum State { + PendingEnter, + Normal, + PendingExit, +} + +impl Builder<'_, '_> { + fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { + match mem::replace(&mut self.state, State::Normal) { + State::PendingEnter => unreachable!(), + State::PendingExit => (self.sink)(StrStep::Exit), + State::Normal => (), + } + self.eat_trivias(); + self.do_token(kind, n_tokens as usize); + } + + fn enter(&mut self, kind: SyntaxKind) { + match mem::replace(&mut self.state, State::Normal) { + State::PendingEnter => { + (self.sink)(StrStep::Enter { kind }); + // No need to attach trivias to previous node: there is no + // previous node. + return; + } + State::PendingExit => (self.sink)(StrStep::Exit), + State::Normal => (), + } + + let n_trivias = + (self.pos..self.lexed.len()).take_while(|&it| self.lexed.kind(it).is_trivia()).count(); + let leading_trivias = self.pos..self.pos + n_trivias; + let n_attached_trivias = n_attached_trivias( + kind, + leading_trivias.rev().map(|it| (self.lexed.kind(it), self.lexed.text(it))), + ); + self.eat_n_trivias(n_trivias - n_attached_trivias); + (self.sink)(StrStep::Enter { kind }); + self.eat_n_trivias(n_attached_trivias); + } + + fn exit(&mut self) { + match mem::replace(&mut self.state, State::PendingExit) { + State::PendingEnter => unreachable!(), + State::PendingExit => (self.sink)(StrStep::Exit), + State::Normal => (), + } + } + + fn eat_trivias(&mut self) { + while self.pos < self.lexed.len() { + let kind = self.lexed.kind(self.pos); + if !kind.is_trivia() { + break; + } + self.do_token(kind, 1); + } + } + + fn eat_n_trivias(&mut self, n: usize) { + for _ in 0..n { + let kind = self.lexed.kind(self.pos); + assert!(kind.is_trivia()); + self.do_token(kind, 1); + } + } + + fn do_token(&mut self, kind: SyntaxKind, n_tokens: usize) { + let text = &self.lexed.range_text(self.pos..self.pos + n_tokens); + self.pos += n_tokens; + (self.sink)(StrStep::Token { kind, text }); + } +} + +fn n_attached_trivias<'a>( + kind: SyntaxKind, + trivias: impl Iterator<Item = (SyntaxKind, &'a str)>, +) -> usize { + match kind { + CONST | ENUM | FN | IMPL | MACRO_CALL | MACRO_DEF | MACRO_RULES | MODULE | RECORD_FIELD + | STATIC | STRUCT | TRAIT | TUPLE_FIELD | TYPE_ALIAS | UNION | USE | VARIANT => { + let mut res = 0; + let mut trivias = trivias.enumerate().peekable(); + + while let Some((i, (kind, text))) = trivias.next() { + match kind { + WHITESPACE if text.contains("\n\n") => { + // we check whether the next token is a doc-comment + // and skip the whitespace in this case + if let Some((COMMENT, peek_text)) = trivias.peek().map(|(_, pair)| pair) { + if is_outer(peek_text) { + continue; + } + } + break; + } + COMMENT => { + if is_inner(text) { + break; + } + res = i + 1; + } + _ => (), + } + } + res + } + _ => 0, + } +} + +fn is_outer(text: &str) -> bool { + if text.starts_with("////") || text.starts_with("/***") { + return false; + } + text.starts_with("///") || text.starts_with("/**") +} + +fn is_inner(text: &str) -> bool { + text.starts_with("//!") || text.starts_with("/*!") +} diff --git a/src/tools/rust-analyzer/crates/parser/src/syntax_kind.rs b/src/tools/rust-analyzer/crates/parser/src/syntax_kind.rs new file mode 100644 index 000000000..0483adc77 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/syntax_kind.rs @@ -0,0 +1,29 @@ +//! Defines [`SyntaxKind`] -- a fieldless enum of all possible syntactic +//! constructs of the Rust language. + +mod generated; + +#[allow(unreachable_pub)] +pub use self::generated::{SyntaxKind, T}; + +impl From<u16> for SyntaxKind { + #[inline] + fn from(d: u16) -> SyntaxKind { + assert!(d <= (SyntaxKind::__LAST as u16)); + unsafe { std::mem::transmute::<u16, SyntaxKind>(d) } + } +} + +impl From<SyntaxKind> for u16 { + #[inline] + fn from(k: SyntaxKind) -> u16 { + k as u16 + } +} + +impl SyntaxKind { + #[inline] + pub fn is_trivia(self) -> bool { + matches!(self, SyntaxKind::WHITESPACE | SyntaxKind::COMMENT) + } +} diff --git a/src/tools/rust-analyzer/crates/parser/src/syntax_kind/generated.rs b/src/tools/rust-analyzer/crates/parser/src/syntax_kind/generated.rs new file mode 100644 index 000000000..628fa745e --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/syntax_kind/generated.rs @@ -0,0 +1,390 @@ +//! Generated by `sourcegen_ast`, do not edit by hand. + +#![allow(bad_style, missing_docs, unreachable_pub)] +#[doc = r" The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`."] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +#[repr(u16)] +pub enum SyntaxKind { + #[doc(hidden)] + TOMBSTONE, + #[doc(hidden)] + EOF, + SEMICOLON, + COMMA, + L_PAREN, + R_PAREN, + L_CURLY, + R_CURLY, + L_BRACK, + R_BRACK, + L_ANGLE, + R_ANGLE, + AT, + POUND, + TILDE, + QUESTION, + DOLLAR, + AMP, + PIPE, + PLUS, + STAR, + SLASH, + CARET, + PERCENT, + UNDERSCORE, + DOT, + DOT2, + DOT3, + DOT2EQ, + COLON, + COLON2, + EQ, + EQ2, + FAT_ARROW, + BANG, + NEQ, + MINUS, + THIN_ARROW, + LTEQ, + GTEQ, + PLUSEQ, + MINUSEQ, + PIPEEQ, + AMPEQ, + CARETEQ, + SLASHEQ, + STAREQ, + PERCENTEQ, + AMP2, + PIPE2, + SHL, + SHR, + SHLEQ, + SHREQ, + AS_KW, + ASYNC_KW, + AWAIT_KW, + BOX_KW, + BREAK_KW, + CONST_KW, + CONTINUE_KW, + CRATE_KW, + DYN_KW, + ELSE_KW, + ENUM_KW, + EXTERN_KW, + FALSE_KW, + FN_KW, + FOR_KW, + IF_KW, + IMPL_KW, + IN_KW, + LET_KW, + LOOP_KW, + MACRO_KW, + MATCH_KW, + MOD_KW, + MOVE_KW, + MUT_KW, + PUB_KW, + REF_KW, + RETURN_KW, + SELF_KW, + SELF_TYPE_KW, + STATIC_KW, + STRUCT_KW, + SUPER_KW, + TRAIT_KW, + TRUE_KW, + TRY_KW, + TYPE_KW, + UNSAFE_KW, + USE_KW, + WHERE_KW, + WHILE_KW, + YIELD_KW, + AUTO_KW, + DEFAULT_KW, + EXISTENTIAL_KW, + UNION_KW, + RAW_KW, + MACRO_RULES_KW, + INT_NUMBER, + FLOAT_NUMBER, + CHAR, + BYTE, + STRING, + BYTE_STRING, + ERROR, + IDENT, + WHITESPACE, + LIFETIME_IDENT, + COMMENT, + SHEBANG, + SOURCE_FILE, + STRUCT, + UNION, + ENUM, + FN, + RET_TYPE, + EXTERN_CRATE, + MODULE, + USE, + STATIC, + CONST, + TRAIT, + IMPL, + TYPE_ALIAS, + MACRO_CALL, + MACRO_RULES, + MACRO_ARM, + TOKEN_TREE, + MACRO_DEF, + PAREN_TYPE, + TUPLE_TYPE, + MACRO_TYPE, + NEVER_TYPE, + PATH_TYPE, + PTR_TYPE, + ARRAY_TYPE, + SLICE_TYPE, + REF_TYPE, + INFER_TYPE, + FN_PTR_TYPE, + FOR_TYPE, + IMPL_TRAIT_TYPE, + DYN_TRAIT_TYPE, + OR_PAT, + PAREN_PAT, + REF_PAT, + BOX_PAT, + IDENT_PAT, + WILDCARD_PAT, + REST_PAT, + PATH_PAT, + RECORD_PAT, + RECORD_PAT_FIELD_LIST, + RECORD_PAT_FIELD, + TUPLE_STRUCT_PAT, + TUPLE_PAT, + SLICE_PAT, + RANGE_PAT, + LITERAL_PAT, + MACRO_PAT, + CONST_BLOCK_PAT, + TUPLE_EXPR, + ARRAY_EXPR, + PAREN_EXPR, + PATH_EXPR, + CLOSURE_EXPR, + IF_EXPR, + WHILE_EXPR, + LOOP_EXPR, + FOR_EXPR, + CONTINUE_EXPR, + BREAK_EXPR, + LABEL, + BLOCK_EXPR, + STMT_LIST, + RETURN_EXPR, + YIELD_EXPR, + LET_EXPR, + UNDERSCORE_EXPR, + MACRO_EXPR, + MATCH_EXPR, + MATCH_ARM_LIST, + MATCH_ARM, + MATCH_GUARD, + RECORD_EXPR, + RECORD_EXPR_FIELD_LIST, + RECORD_EXPR_FIELD, + BOX_EXPR, + CALL_EXPR, + INDEX_EXPR, + METHOD_CALL_EXPR, + FIELD_EXPR, + AWAIT_EXPR, + TRY_EXPR, + CAST_EXPR, + REF_EXPR, + PREFIX_EXPR, + RANGE_EXPR, + BIN_EXPR, + EXTERN_BLOCK, + EXTERN_ITEM_LIST, + VARIANT, + RECORD_FIELD_LIST, + RECORD_FIELD, + TUPLE_FIELD_LIST, + TUPLE_FIELD, + VARIANT_LIST, + ITEM_LIST, + ASSOC_ITEM_LIST, + ATTR, + META, + USE_TREE, + USE_TREE_LIST, + PATH, + PATH_SEGMENT, + LITERAL, + RENAME, + VISIBILITY, + WHERE_CLAUSE, + WHERE_PRED, + ABI, + NAME, + NAME_REF, + LET_STMT, + LET_ELSE, + EXPR_STMT, + GENERIC_PARAM_LIST, + GENERIC_PARAM, + LIFETIME_PARAM, + TYPE_PARAM, + CONST_PARAM, + GENERIC_ARG_LIST, + LIFETIME, + LIFETIME_ARG, + TYPE_ARG, + ASSOC_TYPE_ARG, + CONST_ARG, + PARAM_LIST, + PARAM, + SELF_PARAM, + ARG_LIST, + TYPE_BOUND, + TYPE_BOUND_LIST, + MACRO_ITEMS, + MACRO_STMTS, + #[doc(hidden)] + __LAST, +} +use self::SyntaxKind::*; +impl SyntaxKind { + pub fn is_keyword(self) -> bool { + match self { + AS_KW | ASYNC_KW | AWAIT_KW | BOX_KW | BREAK_KW | CONST_KW | CONTINUE_KW | CRATE_KW + | DYN_KW | ELSE_KW | ENUM_KW | EXTERN_KW | FALSE_KW | FN_KW | FOR_KW | IF_KW + | IMPL_KW | IN_KW | LET_KW | LOOP_KW | MACRO_KW | MATCH_KW | MOD_KW | MOVE_KW + | MUT_KW | PUB_KW | REF_KW | RETURN_KW | SELF_KW | SELF_TYPE_KW | STATIC_KW + | STRUCT_KW | SUPER_KW | TRAIT_KW | TRUE_KW | TRY_KW | TYPE_KW | UNSAFE_KW | USE_KW + | WHERE_KW | WHILE_KW | YIELD_KW | AUTO_KW | DEFAULT_KW | EXISTENTIAL_KW | UNION_KW + | RAW_KW | MACRO_RULES_KW => true, + _ => false, + } + } + pub fn is_punct(self) -> bool { + match self { + SEMICOLON | COMMA | L_PAREN | R_PAREN | L_CURLY | R_CURLY | L_BRACK | R_BRACK + | L_ANGLE | R_ANGLE | AT | POUND | TILDE | QUESTION | DOLLAR | AMP | PIPE | PLUS + | STAR | SLASH | CARET | PERCENT | UNDERSCORE | DOT | DOT2 | DOT3 | DOT2EQ | COLON + | COLON2 | EQ | EQ2 | FAT_ARROW | BANG | NEQ | MINUS | THIN_ARROW | LTEQ | GTEQ + | PLUSEQ | MINUSEQ | PIPEEQ | AMPEQ | CARETEQ | SLASHEQ | STAREQ | PERCENTEQ | AMP2 + | PIPE2 | SHL | SHR | SHLEQ | SHREQ => true, + _ => false, + } + } + pub fn is_literal(self) -> bool { + match self { + INT_NUMBER | FLOAT_NUMBER | CHAR | BYTE | STRING | BYTE_STRING => true, + _ => false, + } + } + pub fn from_keyword(ident: &str) -> Option<SyntaxKind> { + let kw = match ident { + "as" => AS_KW, + "async" => ASYNC_KW, + "await" => AWAIT_KW, + "box" => BOX_KW, + "break" => BREAK_KW, + "const" => CONST_KW, + "continue" => CONTINUE_KW, + "crate" => CRATE_KW, + "dyn" => DYN_KW, + "else" => ELSE_KW, + "enum" => ENUM_KW, + "extern" => EXTERN_KW, + "false" => FALSE_KW, + "fn" => FN_KW, + "for" => FOR_KW, + "if" => IF_KW, + "impl" => IMPL_KW, + "in" => IN_KW, + "let" => LET_KW, + "loop" => LOOP_KW, + "macro" => MACRO_KW, + "match" => MATCH_KW, + "mod" => MOD_KW, + "move" => MOVE_KW, + "mut" => MUT_KW, + "pub" => PUB_KW, + "ref" => REF_KW, + "return" => RETURN_KW, + "self" => SELF_KW, + "Self" => SELF_TYPE_KW, + "static" => STATIC_KW, + "struct" => STRUCT_KW, + "super" => SUPER_KW, + "trait" => TRAIT_KW, + "true" => TRUE_KW, + "try" => TRY_KW, + "type" => TYPE_KW, + "unsafe" => UNSAFE_KW, + "use" => USE_KW, + "where" => WHERE_KW, + "while" => WHILE_KW, + "yield" => YIELD_KW, + _ => return None, + }; + Some(kw) + } + pub fn from_contextual_keyword(ident: &str) -> Option<SyntaxKind> { + let kw = match ident { + "auto" => AUTO_KW, + "default" => DEFAULT_KW, + "existential" => EXISTENTIAL_KW, + "union" => UNION_KW, + "raw" => RAW_KW, + "macro_rules" => MACRO_RULES_KW, + _ => return None, + }; + Some(kw) + } + pub fn from_char(c: char) -> Option<SyntaxKind> { + let tok = match c { + ';' => SEMICOLON, + ',' => COMMA, + '(' => L_PAREN, + ')' => R_PAREN, + '{' => L_CURLY, + '}' => R_CURLY, + '[' => L_BRACK, + ']' => R_BRACK, + '<' => L_ANGLE, + '>' => R_ANGLE, + '@' => AT, + '#' => POUND, + '~' => TILDE, + '?' => QUESTION, + '$' => DOLLAR, + '&' => AMP, + '|' => PIPE, + '+' => PLUS, + '*' => STAR, + '/' => SLASH, + '^' => CARET, + '%' => PERCENT, + '_' => UNDERSCORE, + '.' => DOT, + ':' => COLON, + '=' => EQ, + '!' => BANG, + '-' => MINUS, + _ => return None, + }; + Some(tok) + } +} +#[macro_export] +macro_rules ! T { [;] => { $ crate :: SyntaxKind :: SEMICOLON } ; [,] => { $ crate :: SyntaxKind :: COMMA } ; ['('] => { $ crate :: SyntaxKind :: L_PAREN } ; [')'] => { $ crate :: SyntaxKind :: R_PAREN } ; ['{'] => { $ crate :: SyntaxKind :: L_CURLY } ; ['}'] => { $ crate :: SyntaxKind :: R_CURLY } ; ['['] => { $ crate :: SyntaxKind :: L_BRACK } ; [']'] => { $ crate :: SyntaxKind :: R_BRACK } ; [<] => { $ crate :: SyntaxKind :: L_ANGLE } ; [>] => { $ crate :: SyntaxKind :: R_ANGLE } ; [@] => { $ crate :: SyntaxKind :: AT } ; [#] => { $ crate :: SyntaxKind :: POUND } ; [~] => { $ crate :: SyntaxKind :: TILDE } ; [?] => { $ crate :: SyntaxKind :: QUESTION } ; [$] => { $ crate :: SyntaxKind :: DOLLAR } ; [&] => { $ crate :: SyntaxKind :: AMP } ; [|] => { $ crate :: SyntaxKind :: PIPE } ; [+] => { $ crate :: SyntaxKind :: PLUS } ; [*] => { $ crate :: SyntaxKind :: STAR } ; [/] => { $ crate :: SyntaxKind :: SLASH } ; [^] => { $ crate :: SyntaxKind :: CARET } ; [%] => { $ crate :: SyntaxKind :: PERCENT } ; [_] => { $ crate :: SyntaxKind :: UNDERSCORE } ; [.] => { $ crate :: SyntaxKind :: DOT } ; [..] => { $ crate :: SyntaxKind :: DOT2 } ; [...] => { $ crate :: SyntaxKind :: DOT3 } ; [..=] => { $ crate :: SyntaxKind :: DOT2EQ } ; [:] => { $ crate :: SyntaxKind :: COLON } ; [::] => { $ crate :: SyntaxKind :: COLON2 } ; [=] => { $ crate :: SyntaxKind :: EQ } ; [==] => { $ crate :: SyntaxKind :: EQ2 } ; [=>] => { $ crate :: SyntaxKind :: FAT_ARROW } ; [!] => { $ crate :: SyntaxKind :: BANG } ; [!=] => { $ crate :: SyntaxKind :: NEQ } ; [-] => { $ crate :: SyntaxKind :: MINUS } ; [->] => { $ crate :: SyntaxKind :: THIN_ARROW } ; [<=] => { $ crate :: SyntaxKind :: LTEQ } ; [>=] => { $ crate :: SyntaxKind :: GTEQ } ; [+=] => { $ crate :: SyntaxKind :: PLUSEQ } ; [-=] => { $ crate :: SyntaxKind :: MINUSEQ } ; [|=] => { $ crate :: SyntaxKind :: PIPEEQ } ; [&=] => { $ crate :: SyntaxKind :: AMPEQ } ; [^=] => { $ crate :: SyntaxKind :: CARETEQ } ; [/=] => { $ crate :: SyntaxKind :: SLASHEQ } ; [*=] => { $ crate :: SyntaxKind :: STAREQ } ; [%=] => { $ crate :: SyntaxKind :: PERCENTEQ } ; [&&] => { $ crate :: SyntaxKind :: AMP2 } ; [||] => { $ crate :: SyntaxKind :: PIPE2 } ; [<<] => { $ crate :: SyntaxKind :: SHL } ; [>>] => { $ crate :: SyntaxKind :: SHR } ; [<<=] => { $ crate :: SyntaxKind :: SHLEQ } ; [>>=] => { $ crate :: SyntaxKind :: SHREQ } ; [as] => { $ crate :: SyntaxKind :: AS_KW } ; [async] => { $ crate :: SyntaxKind :: ASYNC_KW } ; [await] => { $ crate :: SyntaxKind :: AWAIT_KW } ; [box] => { $ crate :: SyntaxKind :: BOX_KW } ; [break] => { $ crate :: SyntaxKind :: BREAK_KW } ; [const] => { $ crate :: SyntaxKind :: CONST_KW } ; [continue] => { $ crate :: SyntaxKind :: CONTINUE_KW } ; [crate] => { $ crate :: SyntaxKind :: CRATE_KW } ; [dyn] => { $ crate :: SyntaxKind :: DYN_KW } ; [else] => { $ crate :: SyntaxKind :: ELSE_KW } ; [enum] => { $ crate :: SyntaxKind :: ENUM_KW } ; [extern] => { $ crate :: SyntaxKind :: EXTERN_KW } ; [false] => { $ crate :: SyntaxKind :: FALSE_KW } ; [fn] => { $ crate :: SyntaxKind :: FN_KW } ; [for] => { $ crate :: SyntaxKind :: FOR_KW } ; [if] => { $ crate :: SyntaxKind :: IF_KW } ; [impl] => { $ crate :: SyntaxKind :: IMPL_KW } ; [in] => { $ crate :: SyntaxKind :: IN_KW } ; [let] => { $ crate :: SyntaxKind :: LET_KW } ; [loop] => { $ crate :: SyntaxKind :: LOOP_KW } ; [macro] => { $ crate :: SyntaxKind :: MACRO_KW } ; [match] => { $ crate :: SyntaxKind :: MATCH_KW } ; [mod] => { $ crate :: SyntaxKind :: MOD_KW } ; [move] => { $ crate :: SyntaxKind :: MOVE_KW } ; [mut] => { $ crate :: SyntaxKind :: MUT_KW } ; [pub] => { $ crate :: SyntaxKind :: PUB_KW } ; [ref] => { $ crate :: SyntaxKind :: REF_KW } ; [return] => { $ crate :: SyntaxKind :: RETURN_KW } ; [self] => { $ crate :: SyntaxKind :: SELF_KW } ; [Self] => { $ crate :: SyntaxKind :: SELF_TYPE_KW } ; [static] => { $ crate :: SyntaxKind :: STATIC_KW } ; [struct] => { $ crate :: SyntaxKind :: STRUCT_KW } ; [super] => { $ crate :: SyntaxKind :: SUPER_KW } ; [trait] => { $ crate :: SyntaxKind :: TRAIT_KW } ; [true] => { $ crate :: SyntaxKind :: TRUE_KW } ; [try] => { $ crate :: SyntaxKind :: TRY_KW } ; [type] => { $ crate :: SyntaxKind :: TYPE_KW } ; [unsafe] => { $ crate :: SyntaxKind :: UNSAFE_KW } ; [use] => { $ crate :: SyntaxKind :: USE_KW } ; [where] => { $ crate :: SyntaxKind :: WHERE_KW } ; [while] => { $ crate :: SyntaxKind :: WHILE_KW } ; [yield] => { $ crate :: SyntaxKind :: YIELD_KW } ; [auto] => { $ crate :: SyntaxKind :: AUTO_KW } ; [default] => { $ crate :: SyntaxKind :: DEFAULT_KW } ; [existential] => { $ crate :: SyntaxKind :: EXISTENTIAL_KW } ; [union] => { $ crate :: SyntaxKind :: UNION_KW } ; [raw] => { $ crate :: SyntaxKind :: RAW_KW } ; [macro_rules] => { $ crate :: SyntaxKind :: MACRO_RULES_KW } ; [lifetime_ident] => { $ crate :: SyntaxKind :: LIFETIME_IDENT } ; [ident] => { $ crate :: SyntaxKind :: IDENT } ; [shebang] => { $ crate :: SyntaxKind :: SHEBANG } ; } +pub use T; diff --git a/src/tools/rust-analyzer/crates/parser/src/tests.rs b/src/tools/rust-analyzer/crates/parser/src/tests.rs new file mode 100644 index 000000000..735c0b3e4 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/tests.rs @@ -0,0 +1,166 @@ +mod sourcegen_inline_tests; +mod top_entries; +mod prefix_entries; + +use std::{ + fmt::Write, + fs, + path::{Path, PathBuf}, +}; + +use expect_test::expect_file; + +use crate::{LexedStr, TopEntryPoint}; + +#[test] +fn lex_ok() { + for case in TestCase::list("lexer/ok") { + let actual = lex(&case.text); + expect_file![case.rast].assert_eq(&actual) + } +} + +#[test] +fn lex_err() { + for case in TestCase::list("lexer/err") { + let actual = lex(&case.text); + expect_file![case.rast].assert_eq(&actual) + } +} + +fn lex(text: &str) -> String { + let lexed = LexedStr::new(text); + + let mut res = String::new(); + for i in 0..lexed.len() { + let kind = lexed.kind(i); + let text = lexed.text(i); + let error = lexed.error(i); + + let error = error.map(|err| format!(" error: {}", err)).unwrap_or_default(); + writeln!(res, "{:?} {:?}{}", kind, text, error).unwrap(); + } + res +} + +#[test] +fn parse_ok() { + for case in TestCase::list("parser/ok") { + let (actual, errors) = parse(TopEntryPoint::SourceFile, &case.text); + assert!(!errors, "errors in an OK file {}:\n{}", case.rs.display(), actual); + expect_file![case.rast].assert_eq(&actual); + } +} + +#[test] +fn parse_inline_ok() { + for case in TestCase::list("parser/inline/ok") { + let (actual, errors) = parse(TopEntryPoint::SourceFile, &case.text); + assert!(!errors, "errors in an OK file {}:\n{}", case.rs.display(), actual); + expect_file![case.rast].assert_eq(&actual); + } +} + +#[test] +fn parse_err() { + for case in TestCase::list("parser/err") { + let (actual, errors) = parse(TopEntryPoint::SourceFile, &case.text); + assert!(errors, "no errors in an ERR file {}:\n{}", case.rs.display(), actual); + expect_file![case.rast].assert_eq(&actual) + } +} + +#[test] +fn parse_inline_err() { + for case in TestCase::list("parser/inline/err") { + let (actual, errors) = parse(TopEntryPoint::SourceFile, &case.text); + assert!(errors, "no errors in an ERR file {}:\n{}", case.rs.display(), actual); + expect_file![case.rast].assert_eq(&actual) + } +} + +fn parse(entry: TopEntryPoint, text: &str) -> (String, bool) { + let lexed = LexedStr::new(text); + let input = lexed.to_input(); + let output = entry.parse(&input); + + let mut buf = String::new(); + let mut errors = Vec::new(); + let mut indent = String::new(); + let mut depth = 0; + let mut len = 0; + lexed.intersperse_trivia(&output, &mut |step| match step { + crate::StrStep::Token { kind, text } => { + assert!(depth > 0); + len += text.len(); + write!(buf, "{}", indent).unwrap(); + write!(buf, "{:?} {:?}\n", kind, text).unwrap(); + } + crate::StrStep::Enter { kind } => { + assert!(depth > 0 || len == 0); + depth += 1; + write!(buf, "{}", indent).unwrap(); + write!(buf, "{:?}\n", kind).unwrap(); + indent.push_str(" "); + } + crate::StrStep::Exit => { + assert!(depth > 0); + depth -= 1; + indent.pop(); + indent.pop(); + } + crate::StrStep::Error { msg, pos } => { + assert!(depth > 0); + errors.push(format!("error {}: {}\n", pos, msg)) + } + }); + assert_eq!( + len, + text.len(), + "didn't parse all text.\nParsed:\n{}\n\nAll:\n{}\n", + &text[..len], + text + ); + + for (token, msg) in lexed.errors() { + let pos = lexed.text_start(token); + errors.push(format!("error {}: {}\n", pos, msg)); + } + + let has_errors = !errors.is_empty(); + for e in errors { + buf.push_str(&e); + } + (buf, has_errors) +} + +#[derive(PartialEq, Eq, PartialOrd, Ord)] +struct TestCase { + rs: PathBuf, + rast: PathBuf, + text: String, +} + +impl TestCase { + fn list(path: &'static str) -> Vec<TestCase> { + let crate_root_dir = Path::new(env!("CARGO_MANIFEST_DIR")); + let test_data_dir = crate_root_dir.join("test_data"); + let dir = test_data_dir.join(path); + + let mut res = Vec::new(); + let read_dir = fs::read_dir(&dir) + .unwrap_or_else(|err| panic!("can't `read_dir` {}: {}", dir.display(), err)); + for file in read_dir { + let file = file.unwrap(); + let path = file.path(); + if path.extension().unwrap_or_default() == "rs" { + let rs = path; + let rast = rs.with_extension("rast"); + let text = fs::read_to_string(&rs).unwrap(); + res.push(TestCase { rs, rast, text }); + } + } + res.sort(); + res + } +} diff --git a/src/tools/rust-analyzer/crates/parser/src/tests/prefix_entries.rs b/src/tools/rust-analyzer/crates/parser/src/tests/prefix_entries.rs new file mode 100644 index 000000000..e626b4f27 --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/tests/prefix_entries.rs @@ -0,0 +1,107 @@ +use crate::{LexedStr, PrefixEntryPoint, Step}; + +#[test] +fn vis() { + check(PrefixEntryPoint::Vis, "pub(crate) fn foo() {}", "pub(crate)"); + check(PrefixEntryPoint::Vis, "fn foo() {}", ""); + check(PrefixEntryPoint::Vis, "pub(fn foo() {}", "pub"); + check(PrefixEntryPoint::Vis, "pub(crate fn foo() {}", "pub(crate"); + check(PrefixEntryPoint::Vis, "crate fn foo() {}", "crate"); +} + +#[test] +fn block() { + check(PrefixEntryPoint::Block, "{}, 92", "{}"); + check(PrefixEntryPoint::Block, "{, 92)", "{, 92)"); + check(PrefixEntryPoint::Block, "()", ""); +} + +#[test] +fn stmt() { + check(PrefixEntryPoint::Stmt, "92; fn", "92"); + check(PrefixEntryPoint::Stmt, "let _ = 92; 1", "let _ = 92"); + check(PrefixEntryPoint::Stmt, "pub fn f() {} = 92", "pub fn f() {}"); + check(PrefixEntryPoint::Stmt, "struct S;;", "struct S;"); + check(PrefixEntryPoint::Stmt, "fn f() {};", "fn f() {}"); + check(PrefixEntryPoint::Stmt, ";;;", ";"); + check(PrefixEntryPoint::Stmt, "+", "+"); + check(PrefixEntryPoint::Stmt, "@", "@"); + check(PrefixEntryPoint::Stmt, "loop {} - 1", "loop {}"); +} + +#[test] +fn pat() { + check(PrefixEntryPoint::Pat, "x y", "x"); + check(PrefixEntryPoint::Pat, "fn f() {}", "fn"); + // FIXME: This one is wrong, we should consume only one pattern. + check(PrefixEntryPoint::Pat, ".. ..", ".. .."); +} + +#[test] +fn ty() { + check(PrefixEntryPoint::Ty, "fn() foo", "fn()"); + check(PrefixEntryPoint::Ty, "Clone + Copy + fn", "Clone + Copy +"); + check(PrefixEntryPoint::Ty, "struct f", "struct"); +} + +#[test] +fn expr() { + check(PrefixEntryPoint::Expr, "92 92", "92"); + check(PrefixEntryPoint::Expr, "+1", "+"); + check(PrefixEntryPoint::Expr, "-1", "-1"); + check(PrefixEntryPoint::Expr, "fn foo() {}", "fn"); + check(PrefixEntryPoint::Expr, "#[attr] ()", "#[attr] ()"); +} + +#[test] +fn path() { + check(PrefixEntryPoint::Path, "foo::bar baz", "foo::bar"); + check(PrefixEntryPoint::Path, "foo::<> baz", "foo::<>"); + check(PrefixEntryPoint::Path, "foo<> baz", "foo<>"); + check(PrefixEntryPoint::Path, "Fn() -> i32?", "Fn() -> i32"); + // FIXME: This shouldn't be accepted as path actually. + check(PrefixEntryPoint::Path, "<_>::foo", "<_>::foo"); +} + +#[test] +fn item() { + // FIXME: This shouldn't consume the semicolon. + check(PrefixEntryPoint::Item, "fn foo() {};", "fn foo() {};"); + check(PrefixEntryPoint::Item, "#[attr] pub struct S {} 92", "#[attr] pub struct S {}"); + check(PrefixEntryPoint::Item, "item!{}?", "item!{}"); + check(PrefixEntryPoint::Item, "????", "?"); +} + +#[test] +fn meta_item() { + check(PrefixEntryPoint::MetaItem, "attr, ", "attr"); + check(PrefixEntryPoint::MetaItem, "attr(some token {stream});", "attr(some token {stream})"); + check(PrefixEntryPoint::MetaItem, "path::attr = 2 * 2!", "path::attr = 2 * 2"); +} + +#[track_caller] +fn check(entry: PrefixEntryPoint, input: &str, prefix: &str) { + let lexed = LexedStr::new(input); + let input = lexed.to_input(); + + let mut n_tokens = 0; + for step in entry.parse(&input).iter() { + match step { + Step::Token { n_input_tokens, .. } => n_tokens += n_input_tokens as usize, + Step::Enter { .. } | Step::Exit | Step::Error { .. } => (), + } + } + + let mut i = 0; + loop { + if n_tokens == 0 { + break; + } + if !lexed.kind(i).is_trivia() { + n_tokens -= 1; + } + i += 1; + } + let buf = &lexed.as_str()[..lexed.text_start(i)]; + assert_eq!(buf, prefix); +} diff --git a/src/tools/rust-analyzer/crates/parser/src/tests/sourcegen_inline_tests.rs b/src/tools/rust-analyzer/crates/parser/src/tests/sourcegen_inline_tests.rs new file mode 100644 index 000000000..7b2b703de --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/tests/sourcegen_inline_tests.rs @@ -0,0 +1,123 @@ +//! This module greps parser's code for specially formatted comments and turns +//! them into tests. + +use std::{ + collections::HashMap, + fs, iter, + path::{Path, PathBuf}, +}; + +#[test] +fn sourcegen_parser_tests() { + let grammar_dir = sourcegen::project_root().join(Path::new("crates/parser/src/grammar")); + let tests = tests_from_dir(&grammar_dir); + + install_tests(&tests.ok, "crates/parser/test_data/parser/inline/ok"); + install_tests(&tests.err, "crates/parser/test_data/parser/inline/err"); + + fn install_tests(tests: &HashMap<String, Test>, into: &str) { + let tests_dir = sourcegen::project_root().join(into); + if !tests_dir.is_dir() { + fs::create_dir_all(&tests_dir).unwrap(); + } + // ok is never actually read, but it needs to be specified to create a Test in existing_tests + let existing = existing_tests(&tests_dir, true); + for t in existing.keys().filter(|&t| !tests.contains_key(t)) { + panic!("Test is deleted: {}", t); + } + + let mut new_idx = existing.len() + 1; + for (name, test) in tests { + let path = match existing.get(name) { + Some((path, _test)) => path.clone(), + None => { + let file_name = format!("{:04}_{}.rs", new_idx, name); + new_idx += 1; + tests_dir.join(file_name) + } + }; + sourcegen::ensure_file_contents(&path, &test.text); + } + } +} + +#[derive(Debug)] +struct Test { + name: String, + text: String, + ok: bool, +} + +#[derive(Default, Debug)] +struct Tests { + ok: HashMap<String, Test>, + err: HashMap<String, Test>, +} + +fn collect_tests(s: &str) -> Vec<Test> { + let mut res = Vec::new(); + for comment_block in sourcegen::CommentBlock::extract_untagged(s) { + let first_line = &comment_block.contents[0]; + let (name, ok) = if let Some(name) = first_line.strip_prefix("test ") { + (name.to_string(), true) + } else if let Some(name) = first_line.strip_prefix("test_err ") { + (name.to_string(), false) + } else { + continue; + }; + let text: String = comment_block.contents[1..] + .iter() + .cloned() + .chain(iter::once(String::new())) + .collect::<Vec<_>>() + .join("\n"); + assert!(!text.trim().is_empty() && text.ends_with('\n')); + res.push(Test { name, text, ok }) + } + res +} + +fn tests_from_dir(dir: &Path) -> Tests { + let mut res = Tests::default(); + for entry in sourcegen::list_rust_files(dir) { + process_file(&mut res, entry.as_path()); + } + let grammar_rs = dir.parent().unwrap().join("grammar.rs"); + process_file(&mut res, &grammar_rs); + return res; + + fn process_file(res: &mut Tests, path: &Path) { + let text = fs::read_to_string(path).unwrap(); + + for test in collect_tests(&text) { + if test.ok { + if let Some(old_test) = res.ok.insert(test.name.clone(), test) { + panic!("Duplicate test: {}", old_test.name); + } + } else if let Some(old_test) = res.err.insert(test.name.clone(), test) { + panic!("Duplicate test: {}", old_test.name); + } + } + } +} + +fn existing_tests(dir: &Path, ok: bool) -> HashMap<String, (PathBuf, Test)> { + let mut res = HashMap::default(); + for file in fs::read_dir(dir).unwrap() { + let file = file.unwrap(); + let path = file.path(); + if path.extension().unwrap_or_default() != "rs" { + continue; + } + let name = { + let file_name = path.file_name().unwrap().to_str().unwrap(); + file_name[5..file_name.len() - 3].to_string() + }; + let text = fs::read_to_string(&path).unwrap(); + let test = Test { name: name.clone(), text, ok }; + if let Some(old) = res.insert(name, (path, test)) { + println!("Duplicate test: {:?}", old); + } + } + res +} diff --git a/src/tools/rust-analyzer/crates/parser/src/tests/top_entries.rs b/src/tools/rust-analyzer/crates/parser/src/tests/top_entries.rs new file mode 100644 index 000000000..eb640dc7f --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/tests/top_entries.rs @@ -0,0 +1,312 @@ +use expect_test::expect; + +use crate::TopEntryPoint; + +#[test] +fn source_file() { + check( + TopEntryPoint::SourceFile, + "", + expect![[r#" + SOURCE_FILE + "#]], + ); + + check( + TopEntryPoint::SourceFile, + "struct S;", + expect![[r#" + SOURCE_FILE + STRUCT + STRUCT_KW "struct" + WHITESPACE " " + NAME + IDENT "S" + SEMICOLON ";" + "#]], + ); + + check( + TopEntryPoint::SourceFile, + "@error@", + expect![[r#" + SOURCE_FILE + ERROR + AT "@" + MACRO_CALL + PATH + PATH_SEGMENT + NAME_REF + IDENT "error" + ERROR + AT "@" + error 0: expected an item + error 6: expected BANG + error 6: expected `{`, `[`, `(` + error 6: expected SEMICOLON + error 6: expected an item + "#]], + ); +} + +#[test] +fn macro_stmt() { + check( + TopEntryPoint::MacroStmts, + "", + expect![[r#" + MACRO_STMTS + "#]], + ); + check( + TopEntryPoint::MacroStmts, + "#!/usr/bin/rust", + expect![[r##" + MACRO_STMTS + ERROR + SHEBANG "#!/usr/bin/rust" + error 0: expected expression + "##]], + ); + check( + TopEntryPoint::MacroStmts, + "let x = 1 2 struct S;", + expect![[r#" + MACRO_STMTS + LET_STMT + LET_KW "let" + WHITESPACE " " + IDENT_PAT + NAME + IDENT "x" + WHITESPACE " " + EQ "=" + WHITESPACE " " + LITERAL + INT_NUMBER "1" + WHITESPACE " " + EXPR_STMT + LITERAL + INT_NUMBER "2" + WHITESPACE " " + STRUCT + STRUCT_KW "struct" + WHITESPACE " " + NAME + IDENT "S" + SEMICOLON ";" + "#]], + ); +} + +#[test] +fn macro_items() { + check( + TopEntryPoint::MacroItems, + "", + expect![[r#" + MACRO_ITEMS + "#]], + ); + check( + TopEntryPoint::MacroItems, + "#!/usr/bin/rust", + expect![[r##" + MACRO_ITEMS + ERROR + SHEBANG "#!/usr/bin/rust" + error 0: expected an item + "##]], + ); + check( + TopEntryPoint::MacroItems, + "struct S; foo!{}", + expect![[r#" + MACRO_ITEMS + STRUCT + STRUCT_KW "struct" + WHITESPACE " " + NAME + IDENT "S" + SEMICOLON ";" + WHITESPACE " " + MACRO_CALL + PATH + PATH_SEGMENT + NAME_REF + IDENT "foo" + BANG "!" + TOKEN_TREE + L_CURLY "{" + R_CURLY "}" + "#]], + ); +} + +#[test] +fn macro_pattern() { + check( + TopEntryPoint::Pattern, + "", + expect![[r#" + ERROR + error 0: expected pattern + "#]], + ); + check( + TopEntryPoint::Pattern, + "Some(_)", + expect![[r#" + TUPLE_STRUCT_PAT + PATH + PATH_SEGMENT + NAME_REF + IDENT "Some" + L_PAREN "(" + WILDCARD_PAT + UNDERSCORE "_" + R_PAREN ")" + "#]], + ); + + check( + TopEntryPoint::Pattern, + "None leftover tokens", + expect![[r#" + ERROR + IDENT_PAT + NAME + IDENT "None" + WHITESPACE " " + IDENT "leftover" + WHITESPACE " " + IDENT "tokens" + "#]], + ); + + check( + TopEntryPoint::Pattern, + "@err", + expect![[r#" + ERROR + ERROR + AT "@" + IDENT "err" + error 0: expected pattern + "#]], + ); +} + +#[test] +fn type_() { + check( + TopEntryPoint::Type, + "", + expect![[r#" + ERROR + error 0: expected type + "#]], + ); + + check( + TopEntryPoint::Type, + "Option<!>", + expect![[r#" + PATH_TYPE + PATH + PATH_SEGMENT + NAME_REF + IDENT "Option" + GENERIC_ARG_LIST + L_ANGLE "<" + TYPE_ARG + NEVER_TYPE + BANG "!" + R_ANGLE ">" + "#]], + ); + check( + TopEntryPoint::Type, + "() () ()", + expect![[r#" + ERROR + TUPLE_TYPE + L_PAREN "(" + R_PAREN ")" + WHITESPACE " " + L_PAREN "(" + R_PAREN ")" + WHITESPACE " " + L_PAREN "(" + R_PAREN ")" + "#]], + ); + check( + TopEntryPoint::Type, + "$$$", + expect![[r#" + ERROR + ERROR + DOLLAR "$" + DOLLAR "$" + DOLLAR "$" + error 0: expected type + "#]], + ); +} + +#[test] +fn expr() { + check( + TopEntryPoint::Expr, + "", + expect![[r#" + ERROR + error 0: expected expression + "#]], + ); + check( + TopEntryPoint::Expr, + "2 + 2 == 5", + expect![[r#" + BIN_EXPR + BIN_EXPR + LITERAL + INT_NUMBER "2" + WHITESPACE " " + PLUS "+" + WHITESPACE " " + LITERAL + INT_NUMBER "2" + WHITESPACE " " + EQ2 "==" + WHITESPACE " " + LITERAL + INT_NUMBER "5" + "#]], + ); + check( + TopEntryPoint::Expr, + "let _ = 0;", + expect![[r#" + ERROR + LET_EXPR + LET_KW "let" + WHITESPACE " " + WILDCARD_PAT + UNDERSCORE "_" + WHITESPACE " " + EQ "=" + WHITESPACE " " + LITERAL + INT_NUMBER "0" + SEMICOLON ";" + "#]], + ); +} + +#[track_caller] +fn check(entry: TopEntryPoint, input: &str, expect: expect_test::Expect) { + let (parsed, _errors) = super::parse(entry, input); + expect.assert_eq(&parsed) +} diff --git a/src/tools/rust-analyzer/crates/parser/src/token_set.rs b/src/tools/rust-analyzer/crates/parser/src/token_set.rs new file mode 100644 index 000000000..cd4894c1e --- /dev/null +++ b/src/tools/rust-analyzer/crates/parser/src/token_set.rs @@ -0,0 +1,42 @@ +//! A bit-set of `SyntaxKind`s. + +use crate::SyntaxKind; + +/// A bit-set of `SyntaxKind`s +#[derive(Clone, Copy)] +pub(crate) struct TokenSet(u128); + +impl TokenSet { + pub(crate) const EMPTY: TokenSet = TokenSet(0); + + pub(crate) const fn new(kinds: &[SyntaxKind]) -> TokenSet { + let mut res = 0u128; + let mut i = 0; + while i < kinds.len() { + res |= mask(kinds[i]); + i += 1; + } + TokenSet(res) + } + + pub(crate) const fn union(self, other: TokenSet) -> TokenSet { + TokenSet(self.0 | other.0) + } + + pub(crate) const fn contains(&self, kind: SyntaxKind) -> bool { + self.0 & mask(kind) != 0 + } +} + +const fn mask(kind: SyntaxKind) -> u128 { + 1u128 << (kind as usize) +} + +#[test] +fn token_set_works_for_tokens() { + use crate::SyntaxKind::*; + let ts = TokenSet::new(&[EOF, SHEBANG]); + assert!(ts.contains(EOF)); + assert!(ts.contains(SHEBANG)); + assert!(!ts.contains(PLUS)); +} |