diff options
Diffstat (limited to 'vendor/jsonpath_lib/src/parser/tokenizer.rs')
-rw-r--r-- | vendor/jsonpath_lib/src/parser/tokenizer.rs | 370 |
1 files changed, 370 insertions, 0 deletions
diff --git a/vendor/jsonpath_lib/src/parser/tokenizer.rs b/vendor/jsonpath_lib/src/parser/tokenizer.rs new file mode 100644 index 000000000..3e079b9d8 --- /dev/null +++ b/vendor/jsonpath_lib/src/parser/tokenizer.rs @@ -0,0 +1,370 @@ +use std::result::Result; + +use super::path_reader::{PathReader, ReaderError}; + +const CH_DOLLA: char = '$'; +const CH_DOT: char = '.'; +const CH_ASTERISK: char = '*'; +const CH_LARRAY: char = '['; +const CH_RARRAY: char = ']'; +const CH_LPAREN: char = '('; +const CH_RPAREN: char = ')'; +const CH_AT: char = '@'; +const CH_QUESTION: char = '?'; +const CH_COMMA: char = ','; +const CH_SEMICOLON: char = ':'; +const CH_EQUAL: char = '='; +const CH_AMPERSAND: char = '&'; +const CH_PIPE: char = '|'; +const CH_LITTLE: char = '<'; +const CH_GREATER: char = '>'; +const CH_EXCLAMATION: char = '!'; +const CH_SINGLE_QUOTE: char = '\''; +const CH_DOUBLE_QUOTE: char = '"'; + +#[derive(Debug, Clone, PartialEq)] +pub enum TokenError { + Eof, + Position(usize), +} + +fn to_token_error(read_err: ReaderError) -> TokenError { + match read_err { + ReaderError::Eof => TokenError::Eof, + } +} + +#[derive(Debug, PartialEq)] +pub enum Token { + Absolute(usize), + Dot(usize), + At(usize), + OpenArray(usize), + CloseArray(usize), + Asterisk(usize), + Question(usize), + Comma(usize), + Split(usize), + OpenParenthesis(usize), + CloseParenthesis(usize), + Key(usize, String), + DoubleQuoted(usize, String), + SingleQuoted(usize, String), + Equal(usize), + GreaterOrEqual(usize), + Greater(usize), + Little(usize), + LittleOrEqual(usize), + NotEqual(usize), + And(usize), + Or(usize), + Whitespace(usize, usize), +} + +impl Token { + pub fn is_match_token_type(&self, other: Token) -> bool { + match self { + Token::Absolute(_) => matches!(other, Token::Absolute(_)), + Token::Dot(_) => matches!(other, Token::Dot(_)), + Token::At(_) => matches!(other, Token::At(_)), + Token::OpenArray(_) => matches!(other, Token::OpenArray(_)), + Token::CloseArray(_) => matches!(other, Token::CloseArray(_)), + Token::Asterisk(_) => matches!(other, Token::Asterisk(_)), + Token::Question(_) => matches!(other, Token::Question(_)), + Token::Comma(_) => matches!(other, Token::Comma(_)), + Token::Split(_) => matches!(other, Token::Split(_)), + Token::OpenParenthesis(_) => matches!(other, Token::OpenParenthesis(_)), + Token::CloseParenthesis(_) => matches!(other, Token::CloseParenthesis(_)), + Token::Key(_, _) => matches!(other, Token::Key(_, _)), + Token::DoubleQuoted(_, _) => matches!(other, Token::DoubleQuoted(_, _)), + Token::SingleQuoted(_, _) => matches!(other, Token::SingleQuoted(_, _)), + Token::Equal(_) => matches!(other, Token::Equal(_)), + Token::GreaterOrEqual(_) => matches!(other, Token::GreaterOrEqual(_)), + Token::Greater(_) => matches!(other, Token::Greater(_)), + Token::Little(_) => matches!(other, Token::Little(_)), + Token::LittleOrEqual(_) => matches!(other, Token::LittleOrEqual(_)), + Token::NotEqual(_) => matches!(other, Token::NotEqual(_)), + Token::And(_) => matches!(other, Token::And(_)), + Token::Or(_) => matches!(other, Token::Or(_)), + Token::Whitespace(_, _) => matches!(other, Token::Whitespace(_, _)), + } + } +} + +pub struct Tokenizer<'a> { + input: PathReader<'a>, +} + +impl<'a> Tokenizer<'a> { + pub fn new(input: &'a str) -> Self { + trace!("input: {}", input); + Tokenizer { + input: PathReader::new(input), + } + } + + fn dolla(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> { + let fun = |c: &char| match c { + &CH_DOT + | &CH_ASTERISK + | &CH_LARRAY + | &CH_RARRAY + | &CH_LPAREN + | &CH_RPAREN + | &CH_AT + | &CH_QUESTION + | &CH_COMMA + | &CH_SEMICOLON + | &CH_LITTLE + | &CH_GREATER + | &CH_EQUAL + | &CH_AMPERSAND + | &CH_PIPE + | &CH_EXCLAMATION + => false, + _ => !c.is_whitespace(), + }; + let (_, mut vec) = self.input.take_while(fun).map_err(to_token_error)?; + vec.insert(0, ch); + + if vec.len() == 1 { + Ok(Token::Absolute(pos)) + } else { + Ok(Token::Key(pos, vec)) + } + } + + fn quote(&mut self, ch: char) -> Result<String, TokenError> { + let (_, mut val) = self + .input + .take_while(|c| *c != ch) + .map_err(to_token_error)?; + + if let Some('\\') = val.chars().last() { + self.input.next_char().map_err(to_token_error)?; + let _ = val.pop(); + let (_, val_remain) = self + .input + .take_while(|c| *c != ch) + .map_err(to_token_error)?; + self.input.next_char().map_err(to_token_error)?; + val.push(ch); + val.push_str(val_remain.as_str()); + } else { + self.input.next_char().map_err(to_token_error)?; + } + + Ok(val) + } + + fn single_quote(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> { + let val = self.quote(ch)?; + Ok(Token::SingleQuoted(pos, val)) + } + + fn double_quote(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> { + let val = self.quote(ch)?; + Ok(Token::DoubleQuoted(pos, val)) + } + + fn equal(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { + let (_, ch) = self.input.peek_char().map_err(to_token_error)?; + match ch { + CH_EQUAL => { + self.input.next_char().map_err(to_token_error)?; + Ok(Token::Equal(pos)) + } + _ => Err(TokenError::Position(pos)), + } + } + + fn not_equal(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { + let (_, ch) = self.input.peek_char().map_err(to_token_error)?; + match ch { + CH_EQUAL => { + self.input.next_char().map_err(to_token_error)?; + Ok(Token::NotEqual(pos)) + } + _ => Err(TokenError::Position(pos)), + } + } + + fn little(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { + let (_, ch) = self.input.peek_char().map_err(to_token_error)?; + match ch { + CH_EQUAL => { + self.input.next_char().map_err(to_token_error)?; + Ok(Token::LittleOrEqual(pos)) + } + _ => Ok(Token::Little(pos)), + } + } + + fn greater(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { + let (_, ch) = self.input.peek_char().map_err(to_token_error)?; + match ch { + CH_EQUAL => { + self.input.next_char().map_err(to_token_error)?; + Ok(Token::GreaterOrEqual(pos)) + } + _ => Ok(Token::Greater(pos)), + } + } + + fn and(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { + let (_, ch) = self.input.peek_char().map_err(to_token_error)?; + match ch { + CH_AMPERSAND => { + let _ = self.input.next_char().map_err(to_token_error); + Ok(Token::And(pos)) + } + _ => Err(TokenError::Position(pos)), + } + } + + fn or(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { + let (_, ch) = self.input.peek_char().map_err(to_token_error)?; + match ch { + CH_PIPE => { + self.input.next_char().map_err(to_token_error)?; + Ok(Token::Or(pos)) + } + _ => Err(TokenError::Position(pos)), + } + } + + fn whitespace(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { + let (_, vec) = self + .input + .take_while(|c| c.is_whitespace()) + .map_err(to_token_error)?; + Ok(Token::Whitespace(pos, vec.len())) + } + + fn other(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> { + let fun = |c: &char| match c { + &CH_DOLLA + | &CH_DOT + | &CH_ASTERISK + | &CH_LARRAY + | &CH_RARRAY + | &CH_LPAREN + | &CH_RPAREN + | &CH_AT + | &CH_QUESTION + | &CH_COMMA + | &CH_SEMICOLON + | &CH_LITTLE + | &CH_GREATER + | &CH_EQUAL + | &CH_AMPERSAND + | &CH_PIPE + | &CH_EXCLAMATION + => false, + _ => !c.is_whitespace(), + }; + let (_, mut vec) = self.input.take_while(fun).map_err(to_token_error)?; + vec.insert(0, ch); + Ok(Token::Key(pos, vec)) + } + + pub fn next_token(&mut self) -> Result<Token, TokenError> { + let (pos, ch) = self.input.next_char().map_err(to_token_error)?; + match ch { + CH_DOLLA => self.dolla(pos, ch), + CH_DOT => Ok(Token::Dot(pos)), + CH_ASTERISK => Ok(Token::Asterisk(pos)), + CH_LARRAY => Ok(Token::OpenArray(pos)), + CH_RARRAY => Ok(Token::CloseArray(pos)), + CH_LPAREN => Ok(Token::OpenParenthesis(pos)), + CH_RPAREN => Ok(Token::CloseParenthesis(pos)), + CH_AT => Ok(Token::At(pos)), + CH_QUESTION => Ok(Token::Question(pos)), + CH_COMMA => Ok(Token::Comma(pos)), + CH_SEMICOLON => Ok(Token::Split(pos)), + CH_SINGLE_QUOTE => self.single_quote(pos, ch), + CH_DOUBLE_QUOTE => self.double_quote(pos, ch), + CH_EQUAL => self.equal(pos, ch), + CH_GREATER => self.greater(pos, ch), + CH_LITTLE => self.little(pos, ch), + CH_AMPERSAND => self.and(pos, ch), + CH_PIPE => self.or(pos, ch), + CH_EXCLAMATION => self.not_equal(pos, ch), + _ if ch.is_whitespace() => self.whitespace(pos, ch), + _ => self.other(pos, ch), + } + } + + fn current_pos(&self) -> usize { + self.input.current_pos() + } +} + +pub struct TokenReader<'a> { + origin_input: &'a str, + err: TokenError, + err_pos: usize, + tokens: Vec<(usize, Token)>, + curr_pos: Option<usize>, +} + +impl<'a> TokenReader<'a> { + pub fn new(input: &'a str) -> Self { + let mut tokenizer = Tokenizer::new(input); + let mut tokens = vec![]; + loop { + match tokenizer.next_token() { + Ok(t) => { + tokens.insert(0, (tokenizer.current_pos(), t)); + } + Err(e) => { + return TokenReader { + origin_input: input, + err: e, + err_pos: tokenizer.current_pos(), + tokens, + curr_pos: None, + }; + } + } + } + } + + pub fn peek_token(&self) -> Result<&Token, TokenError> { + match self.tokens.last() { + Some((_, t)) => { + trace!("%{:?}", t); + Ok(t) + } + _ => { + trace!("%{:?}", self.err); + Err(self.err.clone()) + } + } + } + + pub fn next_token(&mut self) -> Result<Token, TokenError> { + match self.tokens.pop() { + Some((pos, t)) => { + self.curr_pos = Some(pos); + trace!("@{:?}", t); + Ok(t) + } + _ => { + trace!("@{:?}", self.err); + Err(self.err.clone()) + } + } + } + + pub fn err_msg_with_pos(&self, pos: usize) -> String { + format!("{}\n{}", self.origin_input, "^".repeat(pos)) + } + + pub fn err_msg(&self) -> String { + match self.curr_pos { + Some(pos) => self.err_msg_with_pos(pos), + _ => self.err_msg_with_pos(self.err_pos), + } + } +} |