summaryrefslogtreecommitdiffstats
path: root/src/tools/rust-analyzer/crates/parser/src/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/tools/rust-analyzer/crates/parser/src/parser.rs')
-rw-r--r--src/tools/rust-analyzer/crates/parser/src/parser.rs340
1 files changed, 340 insertions, 0 deletions
diff --git a/src/tools/rust-analyzer/crates/parser/src/parser.rs b/src/tools/rust-analyzer/crates/parser/src/parser.rs
new file mode 100644
index 000000000..48d8350e0
--- /dev/null
+++ b/src/tools/rust-analyzer/crates/parser/src/parser.rs
@@ -0,0 +1,340 @@
+//! See [`Parser`].
+
+use std::cell::Cell;
+
+use drop_bomb::DropBomb;
+use limit::Limit;
+
+use crate::{
+ event::Event,
+ input::Input,
+ SyntaxKind::{self, EOF, ERROR, TOMBSTONE},
+ TokenSet, T,
+};
+
+/// `Parser` struct provides the low-level API for
+/// navigating through the stream of tokens and
+/// constructing the parse tree. The actual parsing
+/// happens in the [`grammar`](super::grammar) module.
+///
+/// However, the result of this `Parser` is not a real
+/// tree, but rather a flat stream of events of the form
+/// "start expression, consume number literal,
+/// finish expression". See `Event` docs for more.
+pub(crate) struct Parser<'t> {
+ inp: &'t Input,
+ pos: usize,
+ events: Vec<Event>,
+ steps: Cell<u32>,
+}
+
+static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000);
+
+impl<'t> Parser<'t> {
+ pub(super) fn new(inp: &'t Input) -> Parser<'t> {
+ Parser { inp, pos: 0, events: Vec::new(), steps: Cell::new(0) }
+ }
+
+ pub(crate) fn finish(self) -> Vec<Event> {
+ self.events
+ }
+
+ /// Returns the kind of the current token.
+ /// If parser has already reached the end of input,
+ /// the special `EOF` kind is returned.
+ pub(crate) fn current(&self) -> SyntaxKind {
+ self.nth(0)
+ }
+
+ /// Lookahead operation: returns the kind of the next nth
+ /// token.
+ pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
+ assert!(n <= 3);
+
+ let steps = self.steps.get();
+ assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck");
+ self.steps.set(steps + 1);
+
+ self.inp.kind(self.pos + n)
+ }
+
+ /// Checks if the current token is `kind`.
+ pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
+ self.nth_at(0, kind)
+ }
+
+ pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
+ match kind {
+ T![-=] => self.at_composite2(n, T![-], T![=]),
+ T![->] => self.at_composite2(n, T![-], T![>]),
+ T![::] => self.at_composite2(n, T![:], T![:]),
+ T![!=] => self.at_composite2(n, T![!], T![=]),
+ T![..] => self.at_composite2(n, T![.], T![.]),
+ T![*=] => self.at_composite2(n, T![*], T![=]),
+ T![/=] => self.at_composite2(n, T![/], T![=]),
+ T![&&] => self.at_composite2(n, T![&], T![&]),
+ T![&=] => self.at_composite2(n, T![&], T![=]),
+ T![%=] => self.at_composite2(n, T![%], T![=]),
+ T![^=] => self.at_composite2(n, T![^], T![=]),
+ T![+=] => self.at_composite2(n, T![+], T![=]),
+ T![<<] => self.at_composite2(n, T![<], T![<]),
+ T![<=] => self.at_composite2(n, T![<], T![=]),
+ T![==] => self.at_composite2(n, T![=], T![=]),
+ T![=>] => self.at_composite2(n, T![=], T![>]),
+ T![>=] => self.at_composite2(n, T![>], T![=]),
+ T![>>] => self.at_composite2(n, T![>], T![>]),
+ T![|=] => self.at_composite2(n, T![|], T![=]),
+ T![||] => self.at_composite2(n, T![|], T![|]),
+
+ T![...] => self.at_composite3(n, T![.], T![.], T![.]),
+ T![..=] => self.at_composite3(n, T![.], T![.], T![=]),
+ T![<<=] => self.at_composite3(n, T![<], T![<], T![=]),
+ T![>>=] => self.at_composite3(n, T![>], T![>], T![=]),
+
+ _ => self.inp.kind(self.pos + n) == kind,
+ }
+ }
+
+ /// Consume the next token if `kind` matches.
+ pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
+ if !self.at(kind) {
+ return false;
+ }
+ let n_raw_tokens = match kind {
+ T![-=]
+ | T![->]
+ | T![::]
+ | T![!=]
+ | T![..]
+ | T![*=]
+ | T![/=]
+ | T![&&]
+ | T![&=]
+ | T![%=]
+ | T![^=]
+ | T![+=]
+ | T![<<]
+ | T![<=]
+ | T![==]
+ | T![=>]
+ | T![>=]
+ | T![>>]
+ | T![|=]
+ | T![||] => 2,
+
+ T![...] | T![..=] | T![<<=] | T![>>=] => 3,
+ _ => 1,
+ };
+ self.do_bump(kind, n_raw_tokens);
+ true
+ }
+
+ fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool {
+ self.inp.kind(self.pos + n) == k1
+ && self.inp.kind(self.pos + n + 1) == k2
+ && self.inp.is_joint(self.pos + n)
+ }
+
+ fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
+ self.inp.kind(self.pos + n) == k1
+ && self.inp.kind(self.pos + n + 1) == k2
+ && self.inp.kind(self.pos + n + 2) == k3
+ && self.inp.is_joint(self.pos + n)
+ && self.inp.is_joint(self.pos + n + 1)
+ }
+
+ /// Checks if the current token is in `kinds`.
+ pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
+ kinds.contains(self.current())
+ }
+
+ /// Checks if the current token is contextual keyword with text `t`.
+ pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool {
+ self.inp.contextual_kind(self.pos) == kw
+ }
+
+ /// Starts a new node in the syntax tree. All nodes and tokens
+ /// consumed between the `start` and the corresponding `Marker::complete`
+ /// belong to the same node.
+ pub(crate) fn start(&mut self) -> Marker {
+ let pos = self.events.len() as u32;
+ self.push_event(Event::tombstone());
+ Marker::new(pos)
+ }
+
+ /// Consume the next token if `kind` matches.
+ pub(crate) fn bump(&mut self, kind: SyntaxKind) {
+ assert!(self.eat(kind));
+ }
+
+ /// Advances the parser by one token
+ pub(crate) fn bump_any(&mut self) {
+ let kind = self.nth(0);
+ if kind == EOF {
+ return;
+ }
+ self.do_bump(kind, 1);
+ }
+
+ /// Advances the parser by one token, remapping its kind.
+ /// This is useful to create contextual keywords from
+ /// identifiers. For example, the lexer creates a `union`
+ /// *identifier* token, but the parser remaps it to the
+ /// `union` keyword, and keyword is what ends up in the
+ /// final tree.
+ pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
+ if self.nth(0) == EOF {
+ // FIXME: panic!?
+ return;
+ }
+ self.do_bump(kind, 1);
+ }
+
+ /// Emit error with the `message`
+ /// FIXME: this should be much more fancy and support
+ /// structured errors with spans and notes, like rustc
+ /// does.
+ pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
+ let msg = message.into();
+ self.push_event(Event::Error { msg });
+ }
+
+ /// Consume the next token if it is `kind` or emit an error
+ /// otherwise.
+ pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
+ if self.eat(kind) {
+ return true;
+ }
+ self.error(format!("expected {:?}", kind));
+ false
+ }
+
+ /// Create an error node and consume the next token.
+ pub(crate) fn err_and_bump(&mut self, message: &str) {
+ self.err_recover(message, TokenSet::EMPTY);
+ }
+
+ /// Create an error node and consume the next token.
+ pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) {
+ match self.current() {
+ T!['{'] | T!['}'] => {
+ self.error(message);
+ return;
+ }
+ _ => (),
+ }
+
+ if self.at_ts(recovery) {
+ self.error(message);
+ return;
+ }
+
+ let m = self.start();
+ self.error(message);
+ self.bump_any();
+ m.complete(self, ERROR);
+ }
+
+ fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
+ self.pos += n_raw_tokens as usize;
+ self.push_event(Event::Token { kind, n_raw_tokens });
+ }
+
+ fn push_event(&mut self, event: Event) {
+ self.events.push(event);
+ }
+}
+
+/// See [`Parser::start`].
+pub(crate) struct Marker {
+ pos: u32,
+ bomb: DropBomb,
+}
+
+impl Marker {
+ fn new(pos: u32) -> Marker {
+ Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") }
+ }
+
+ /// Finishes the syntax tree node and assigns `kind` to it,
+ /// and mark the create a `CompletedMarker` for possible future
+ /// operation like `.precede()` to deal with forward_parent.
+ pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker {
+ self.bomb.defuse();
+ let idx = self.pos as usize;
+ match &mut p.events[idx] {
+ Event::Start { kind: slot, .. } => {
+ *slot = kind;
+ }
+ _ => unreachable!(),
+ }
+ p.push_event(Event::Finish);
+ CompletedMarker::new(self.pos, kind)
+ }
+
+ /// Abandons the syntax tree node. All its children
+ /// are attached to its parent instead.
+ pub(crate) fn abandon(mut self, p: &mut Parser<'_>) {
+ self.bomb.defuse();
+ let idx = self.pos as usize;
+ if idx == p.events.len() - 1 {
+ match p.events.pop() {
+ Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (),
+ _ => unreachable!(),
+ }
+ }
+ }
+}
+
+pub(crate) struct CompletedMarker {
+ pos: u32,
+ kind: SyntaxKind,
+}
+
+impl CompletedMarker {
+ fn new(pos: u32, kind: SyntaxKind) -> Self {
+ CompletedMarker { pos, kind }
+ }
+
+ /// This method allows to create a new node which starts
+ /// *before* the current one. That is, parser could start
+ /// node `A`, then complete it, and then after parsing the
+ /// whole `A`, decide that it should have started some node
+ /// `B` before starting `A`. `precede` allows to do exactly
+ /// that. See also docs about
+ /// [`Event::Start::forward_parent`](crate::event::Event::Start::forward_parent).
+ ///
+ /// Given completed events `[START, FINISH]` and its corresponding
+ /// `CompletedMarker(pos: 0, _)`.
+ /// Append a new `START` events as `[START, FINISH, NEWSTART]`,
+ /// then mark `NEWSTART` as `START`'s parent with saving its relative
+ /// distance to `NEWSTART` into forward_parent(=2 in this case);
+ pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker {
+ let new_pos = p.start();
+ let idx = self.pos as usize;
+ match &mut p.events[idx] {
+ Event::Start { forward_parent, .. } => {
+ *forward_parent = Some(new_pos.pos - self.pos);
+ }
+ _ => unreachable!(),
+ }
+ new_pos
+ }
+
+ /// Extends this completed marker *to the left* up to `m`.
+ pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker {
+ m.bomb.defuse();
+ let idx = m.pos as usize;
+ match &mut p.events[idx] {
+ Event::Start { forward_parent, .. } => {
+ *forward_parent = Some(self.pos - m.pos);
+ }
+ _ => unreachable!(),
+ }
+ self
+ }
+
+ pub(crate) fn kind(&self) -> SyntaxKind {
+ self.kind
+ }
+}