diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/rust/jsparagus-parser/src/parser.rs | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/jsparagus-parser/src/parser.rs')
-rw-r--r-- | third_party/rust/jsparagus-parser/src/parser.rs | 262 |
1 files changed, 262 insertions, 0 deletions
diff --git a/third_party/rust/jsparagus-parser/src/parser.rs b/third_party/rust/jsparagus-parser/src/parser.rs new file mode 100644 index 0000000000..c9f12a56e7 --- /dev/null +++ b/third_party/rust/jsparagus-parser/src/parser.rs @@ -0,0 +1,262 @@ +use crate::queue_stack::QueueStack; +use crate::simulator::Simulator; +use ast::arena; +use ast::SourceLocation; +use generated_parser::{ + full_actions, AstBuilder, AstBuilderDelegate, ErrorCode, ParseError, ParserTrait, Result, + StackValue, TermValue, TerminalId, Token, TABLES, +}; +use json_log::json_trace; + +pub struct Parser<'alloc> { + /// Vector of states visited in the LR parse table. + state_stack: Vec<usize>, + /// Stack and Queue of terms and their associated values. The Queue + /// corresponds to terms which are added as lookahead as well as terms which + /// are replayed, and the stack matches the state_stack. + node_stack: QueueStack<TermValue<StackValue<'alloc>>>, + /// Build the AST stored in the TermValue vectors. + handler: AstBuilder<'alloc>, +} + +impl<'alloc> AstBuilderDelegate<'alloc> for Parser<'alloc> { + fn ast_builder_refmut(&mut self) -> &mut AstBuilder<'alloc> { + &mut self.handler + } +} + +impl<'alloc> ParserTrait<'alloc, StackValue<'alloc>> for Parser<'alloc> { + fn shift(&mut self, tv: TermValue<StackValue<'alloc>>) -> Result<'alloc, bool> { + // The shift function should exit either by accepting the input or + // emptying its queue of lookahead. + debug_assert!(self.node_stack.queue_empty()); + self.node_stack.enqueue(tv); + // Shift the new terminal/nonterminal and its associated value. + json_trace!({ "enter": "shift" }); + let mut state = self.state(); + debug_assert!(state < TABLES.shift_count); + while !self.node_stack.queue_empty() { + let term_index: usize = self.node_stack.next().unwrap().term.into(); + debug_assert!(term_index < TABLES.shift_width); + let index = state * TABLES.shift_width + term_index; + let goto = TABLES.shift_table[index]; + json_trace!({ + "from": state, + "to": goto, + "term": format!("{:?}", { let s: &'static str = tv.term.into(); s }), + }); + if goto < 0 { + self.node_stack.shift(); + let tv = self.node_stack.pop().unwrap(); + // Error handling is in charge of shifting an ErrorSymbol from the + // current state. + self.try_error_handling(tv)?; + continue; + } + state = goto as usize; + self.shift_replayed(state); + // Execute any actions, such as reduce actions ast builder actions. + if state >= TABLES.shift_count { + assert!(state < TABLES.action_count + TABLES.shift_count); + json_trace!({ "action": state }); + if full_actions(self, state)? { + return Ok(true); + } + state = self.state(); + } + debug_assert!(state < TABLES.shift_count); + } + Ok(false) + } + #[inline(always)] + fn shift_replayed(&mut self, state: usize) { + // let term_index: usize = self.node_stack.next().unwrap().term.into(); + // assert!(term_index < TABLES.shift_width); + // let from_state = self.state(); + // let index = from_state * TABLES.shift_width + term_index; + // let goto = TABLES.shift_table[index]; + // assert!((goto as usize) == state); + self.state_stack.push(state); + self.node_stack.shift(); + } + fn unshift(&mut self) { + self.state_stack.pop().unwrap(); + self.node_stack.unshift() + } + fn pop(&mut self) -> TermValue<StackValue<'alloc>> { + self.state_stack.pop().unwrap(); + self.node_stack.pop().unwrap() + } + fn replay(&mut self, tv: TermValue<StackValue<'alloc>>) { + self.node_stack.push_next(tv) + } + fn epsilon(&mut self, state: usize) { + *self.state_stack.last_mut().unwrap() = state; + } + fn top_state(&self) -> usize { + self.state() + } + fn check_not_on_new_line(&mut self, peek: usize) -> Result<'alloc, bool> { + let sv = { + let stack = self.node_stack.stack_slice(); + &stack[stack.len() - peek].value + }; + if let StackValue::Token(ref token) = sv { + if !token.is_on_new_line { + return Ok(true); + } + self.rewind(peek - 1); + let tv = self.pop(); + self.try_error_handling(tv)?; + return Ok(false); + } + Err(ParseError::NoLineTerminatorHereExpectedToken.into()) + } +} + +impl<'alloc> Parser<'alloc> { + pub fn new(handler: AstBuilder<'alloc>, entry_state: usize) -> Self { + TABLES.check(); + assert!(entry_state < TABLES.shift_count); + let mut state_stack = Vec::with_capacity(128); + state_stack.push(entry_state); + + Self { + state_stack, + node_stack: QueueStack::with_capacity(128), + handler, + } + } + + fn state(&self) -> usize { + *self.state_stack.last().unwrap() + } + + pub fn write_token(&mut self, token: arena::Box<'alloc, Token>) -> Result<'alloc, ()> { + json_trace!({ + "method": "write_token", + "is_on_new_line": token.is_on_new_line, + "start": token.loc.start, + "end": token.loc.end, + }); + // Shift the token with the associated StackValue. + let term = token.terminal_id.into(); + let accept = self.shift(TermValue { + term, + value: StackValue::Token(token), + })?; + // JavaScript grammar accepts empty inputs, therefore we can never + // accept any program before receiving a TerminalId::End. + assert!(!accept); + Ok(()) + } + + pub fn close(&mut self, position: usize) -> Result<'alloc, StackValue<'alloc>> { + // Shift the End terminal with the associated StackValue. + json_trace!({ + "method": "close", + "position": position, + }); + let loc = SourceLocation::new(position, position); + let token = Token::basic_token(TerminalId::End, loc); + let accept = self.shift(TermValue { + term: TerminalId::End.into(), + value: StackValue::Token(self.handler.alloc(token)), + })?; + // Adding a TerminalId::End would either lead to a parse error, or to + // accepting the current input. In which case we return matching node + // value. + assert!(accept); + + // We can either reduce a Script/Module, or a Script/Module followed by + // an <End> terminal. + assert!(self.node_stack.stack_len() >= 1); + assert!(self.node_stack.stack_len() <= 2); + if self.node_stack.stack_len() > 1 { + self.node_stack.pop(); + } + Ok(self.node_stack.pop().unwrap().value) + } + + pub(crate) fn parse_error(t: &Token) -> ParseError<'alloc> { + if t.terminal_id == TerminalId::End { + ParseError::UnexpectedEnd + } else { + ParseError::SyntaxError(t.clone()) + } + } + + fn try_error_handling(&mut self, t: TermValue<StackValue<'alloc>>) -> Result<'alloc, bool> { + json_trace!({ + "try_error_handling_term": format!("{}", { + let s: &'static str = t.term.into(); + s + }), + }); + if let StackValue::Token(ref token) = t.value { + // Error tokens might them-self cause more errors to be reported. + // This happens due to the fact that the ErrorToken can be replayed, + // and while the ErrorToken might be in the lookahead rules, it + // might not be in the shifted terms coming after the reduced + // nonterminal. + if t.term == TerminalId::ErrorToken.into() { + return Err(Self::parse_error(token).into()); + } + + // Otherwise, check if the current rule accept an Automatic + // Semi-Colon insertion (ASI). + let state = self.state(); + assert!(state < TABLES.shift_count); + let error_code = TABLES.error_codes[state]; + if let Some(error_code) = error_code { + let err_token = (*token).clone(); + Self::recover(token, error_code)?; + self.replay(t); + let err_token = self.handler.alloc(err_token); + self.replay(TermValue { + term: TerminalId::ErrorToken.into(), + value: StackValue::Token(err_token), + }); + return Ok(false); + } + // On error, don't attempt error handling again. + return Err(Self::parse_error(token).into()); + } + Err(ParseError::ParserCannotUnpackToken.into()) + } + + pub(crate) fn recover(t: &Token, error_code: ErrorCode) -> Result<'alloc, ()> { + match error_code { + ErrorCode::Asi => { + if t.is_on_new_line + || t.terminal_id == TerminalId::End + || t.terminal_id == TerminalId::CloseBrace + { + Ok(()) + } else { + Err(Self::parse_error(t).into()) + } + } + ErrorCode::DoWhileAsi => Ok(()), + } + } + + fn simulator<'a>(&'a self) -> Simulator<'alloc, 'a> { + assert_eq!(self.node_stack.queue_len(), 0); + Simulator::new(&self.state_stack, self.node_stack.stack_slice()) + } + + pub fn can_accept_terminal(&self, t: TerminalId) -> bool { + let result = self.simulator().write_token(t).is_ok(); + json_trace!({ + "can_accept": result, + "terminal": format!("{:?}", t), + }); + result + } + + /// Return true if self.close() would succeed. + pub fn can_close(&self) -> bool { + self.simulator().close(0).is_ok() + } +} |