summaryrefslogtreecommitdiffstats
path: root/vendor/jsonpath_lib/src/parser/tokenizer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/jsonpath_lib/src/parser/tokenizer.rs')
-rw-r--r--vendor/jsonpath_lib/src/parser/tokenizer.rs370
1 files changed, 370 insertions, 0 deletions
diff --git a/vendor/jsonpath_lib/src/parser/tokenizer.rs b/vendor/jsonpath_lib/src/parser/tokenizer.rs
new file mode 100644
index 000000000..3e079b9d8
--- /dev/null
+++ b/vendor/jsonpath_lib/src/parser/tokenizer.rs
@@ -0,0 +1,370 @@
+use std::result::Result;
+
+use super::path_reader::{PathReader, ReaderError};
+
+const CH_DOLLA: char = '$';
+const CH_DOT: char = '.';
+const CH_ASTERISK: char = '*';
+const CH_LARRAY: char = '[';
+const CH_RARRAY: char = ']';
+const CH_LPAREN: char = '(';
+const CH_RPAREN: char = ')';
+const CH_AT: char = '@';
+const CH_QUESTION: char = '?';
+const CH_COMMA: char = ',';
+const CH_SEMICOLON: char = ':';
+const CH_EQUAL: char = '=';
+const CH_AMPERSAND: char = '&';
+const CH_PIPE: char = '|';
+const CH_LITTLE: char = '<';
+const CH_GREATER: char = '>';
+const CH_EXCLAMATION: char = '!';
+const CH_SINGLE_QUOTE: char = '\'';
+const CH_DOUBLE_QUOTE: char = '"';
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum TokenError {
+ Eof,
+ Position(usize),
+}
+
+fn to_token_error(read_err: ReaderError) -> TokenError {
+ match read_err {
+ ReaderError::Eof => TokenError::Eof,
+ }
+}
+
+#[derive(Debug, PartialEq)]
+pub enum Token {
+ Absolute(usize),
+ Dot(usize),
+ At(usize),
+ OpenArray(usize),
+ CloseArray(usize),
+ Asterisk(usize),
+ Question(usize),
+ Comma(usize),
+ Split(usize),
+ OpenParenthesis(usize),
+ CloseParenthesis(usize),
+ Key(usize, String),
+ DoubleQuoted(usize, String),
+ SingleQuoted(usize, String),
+ Equal(usize),
+ GreaterOrEqual(usize),
+ Greater(usize),
+ Little(usize),
+ LittleOrEqual(usize),
+ NotEqual(usize),
+ And(usize),
+ Or(usize),
+ Whitespace(usize, usize),
+}
+
+impl Token {
+ pub fn is_match_token_type(&self, other: Token) -> bool {
+ match self {
+ Token::Absolute(_) => matches!(other, Token::Absolute(_)),
+ Token::Dot(_) => matches!(other, Token::Dot(_)),
+ Token::At(_) => matches!(other, Token::At(_)),
+ Token::OpenArray(_) => matches!(other, Token::OpenArray(_)),
+ Token::CloseArray(_) => matches!(other, Token::CloseArray(_)),
+ Token::Asterisk(_) => matches!(other, Token::Asterisk(_)),
+ Token::Question(_) => matches!(other, Token::Question(_)),
+ Token::Comma(_) => matches!(other, Token::Comma(_)),
+ Token::Split(_) => matches!(other, Token::Split(_)),
+ Token::OpenParenthesis(_) => matches!(other, Token::OpenParenthesis(_)),
+ Token::CloseParenthesis(_) => matches!(other, Token::CloseParenthesis(_)),
+ Token::Key(_, _) => matches!(other, Token::Key(_, _)),
+ Token::DoubleQuoted(_, _) => matches!(other, Token::DoubleQuoted(_, _)),
+ Token::SingleQuoted(_, _) => matches!(other, Token::SingleQuoted(_, _)),
+ Token::Equal(_) => matches!(other, Token::Equal(_)),
+ Token::GreaterOrEqual(_) => matches!(other, Token::GreaterOrEqual(_)),
+ Token::Greater(_) => matches!(other, Token::Greater(_)),
+ Token::Little(_) => matches!(other, Token::Little(_)),
+ Token::LittleOrEqual(_) => matches!(other, Token::LittleOrEqual(_)),
+ Token::NotEqual(_) => matches!(other, Token::NotEqual(_)),
+ Token::And(_) => matches!(other, Token::And(_)),
+ Token::Or(_) => matches!(other, Token::Or(_)),
+ Token::Whitespace(_, _) => matches!(other, Token::Whitespace(_, _)),
+ }
+ }
+}
+
+pub struct Tokenizer<'a> {
+ input: PathReader<'a>,
+}
+
+impl<'a> Tokenizer<'a> {
+ pub fn new(input: &'a str) -> Self {
+ trace!("input: {}", input);
+ Tokenizer {
+ input: PathReader::new(input),
+ }
+ }
+
+ fn dolla(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> {
+ let fun = |c: &char| match c {
+ &CH_DOT
+ | &CH_ASTERISK
+ | &CH_LARRAY
+ | &CH_RARRAY
+ | &CH_LPAREN
+ | &CH_RPAREN
+ | &CH_AT
+ | &CH_QUESTION
+ | &CH_COMMA
+ | &CH_SEMICOLON
+ | &CH_LITTLE
+ | &CH_GREATER
+ | &CH_EQUAL
+ | &CH_AMPERSAND
+ | &CH_PIPE
+ | &CH_EXCLAMATION
+ => false,
+ _ => !c.is_whitespace(),
+ };
+ let (_, mut vec) = self.input.take_while(fun).map_err(to_token_error)?;
+ vec.insert(0, ch);
+
+ if vec.len() == 1 {
+ Ok(Token::Absolute(pos))
+ } else {
+ Ok(Token::Key(pos, vec))
+ }
+ }
+
+ fn quote(&mut self, ch: char) -> Result<String, TokenError> {
+ let (_, mut val) = self
+ .input
+ .take_while(|c| *c != ch)
+ .map_err(to_token_error)?;
+
+ if let Some('\\') = val.chars().last() {
+ self.input.next_char().map_err(to_token_error)?;
+ let _ = val.pop();
+ let (_, val_remain) = self
+ .input
+ .take_while(|c| *c != ch)
+ .map_err(to_token_error)?;
+ self.input.next_char().map_err(to_token_error)?;
+ val.push(ch);
+ val.push_str(val_remain.as_str());
+ } else {
+ self.input.next_char().map_err(to_token_error)?;
+ }
+
+ Ok(val)
+ }
+
+ fn single_quote(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> {
+ let val = self.quote(ch)?;
+ Ok(Token::SingleQuoted(pos, val))
+ }
+
+ fn double_quote(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> {
+ let val = self.quote(ch)?;
+ Ok(Token::DoubleQuoted(pos, val))
+ }
+
+ fn equal(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
+ let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
+ match ch {
+ CH_EQUAL => {
+ self.input.next_char().map_err(to_token_error)?;
+ Ok(Token::Equal(pos))
+ }
+ _ => Err(TokenError::Position(pos)),
+ }
+ }
+
+ fn not_equal(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
+ let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
+ match ch {
+ CH_EQUAL => {
+ self.input.next_char().map_err(to_token_error)?;
+ Ok(Token::NotEqual(pos))
+ }
+ _ => Err(TokenError::Position(pos)),
+ }
+ }
+
+ fn little(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
+ let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
+ match ch {
+ CH_EQUAL => {
+ self.input.next_char().map_err(to_token_error)?;
+ Ok(Token::LittleOrEqual(pos))
+ }
+ _ => Ok(Token::Little(pos)),
+ }
+ }
+
+ fn greater(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
+ let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
+ match ch {
+ CH_EQUAL => {
+ self.input.next_char().map_err(to_token_error)?;
+ Ok(Token::GreaterOrEqual(pos))
+ }
+ _ => Ok(Token::Greater(pos)),
+ }
+ }
+
+ fn and(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
+ let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
+ match ch {
+ CH_AMPERSAND => {
+ let _ = self.input.next_char().map_err(to_token_error);
+ Ok(Token::And(pos))
+ }
+ _ => Err(TokenError::Position(pos)),
+ }
+ }
+
+ fn or(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
+ let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
+ match ch {
+ CH_PIPE => {
+ self.input.next_char().map_err(to_token_error)?;
+ Ok(Token::Or(pos))
+ }
+ _ => Err(TokenError::Position(pos)),
+ }
+ }
+
+ fn whitespace(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
+ let (_, vec) = self
+ .input
+ .take_while(|c| c.is_whitespace())
+ .map_err(to_token_error)?;
+ Ok(Token::Whitespace(pos, vec.len()))
+ }
+
+ fn other(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> {
+ let fun = |c: &char| match c {
+ &CH_DOLLA
+ | &CH_DOT
+ | &CH_ASTERISK
+ | &CH_LARRAY
+ | &CH_RARRAY
+ | &CH_LPAREN
+ | &CH_RPAREN
+ | &CH_AT
+ | &CH_QUESTION
+ | &CH_COMMA
+ | &CH_SEMICOLON
+ | &CH_LITTLE
+ | &CH_GREATER
+ | &CH_EQUAL
+ | &CH_AMPERSAND
+ | &CH_PIPE
+ | &CH_EXCLAMATION
+ => false,
+ _ => !c.is_whitespace(),
+ };
+ let (_, mut vec) = self.input.take_while(fun).map_err(to_token_error)?;
+ vec.insert(0, ch);
+ Ok(Token::Key(pos, vec))
+ }
+
+ pub fn next_token(&mut self) -> Result<Token, TokenError> {
+ let (pos, ch) = self.input.next_char().map_err(to_token_error)?;
+ match ch {
+ CH_DOLLA => self.dolla(pos, ch),
+ CH_DOT => Ok(Token::Dot(pos)),
+ CH_ASTERISK => Ok(Token::Asterisk(pos)),
+ CH_LARRAY => Ok(Token::OpenArray(pos)),
+ CH_RARRAY => Ok(Token::CloseArray(pos)),
+ CH_LPAREN => Ok(Token::OpenParenthesis(pos)),
+ CH_RPAREN => Ok(Token::CloseParenthesis(pos)),
+ CH_AT => Ok(Token::At(pos)),
+ CH_QUESTION => Ok(Token::Question(pos)),
+ CH_COMMA => Ok(Token::Comma(pos)),
+ CH_SEMICOLON => Ok(Token::Split(pos)),
+ CH_SINGLE_QUOTE => self.single_quote(pos, ch),
+ CH_DOUBLE_QUOTE => self.double_quote(pos, ch),
+ CH_EQUAL => self.equal(pos, ch),
+ CH_GREATER => self.greater(pos, ch),
+ CH_LITTLE => self.little(pos, ch),
+ CH_AMPERSAND => self.and(pos, ch),
+ CH_PIPE => self.or(pos, ch),
+ CH_EXCLAMATION => self.not_equal(pos, ch),
+ _ if ch.is_whitespace() => self.whitespace(pos, ch),
+ _ => self.other(pos, ch),
+ }
+ }
+
+ fn current_pos(&self) -> usize {
+ self.input.current_pos()
+ }
+}
+
+pub struct TokenReader<'a> {
+ origin_input: &'a str,
+ err: TokenError,
+ err_pos: usize,
+ tokens: Vec<(usize, Token)>,
+ curr_pos: Option<usize>,
+}
+
+impl<'a> TokenReader<'a> {
+ pub fn new(input: &'a str) -> Self {
+ let mut tokenizer = Tokenizer::new(input);
+ let mut tokens = vec![];
+ loop {
+ match tokenizer.next_token() {
+ Ok(t) => {
+ tokens.insert(0, (tokenizer.current_pos(), t));
+ }
+ Err(e) => {
+ return TokenReader {
+ origin_input: input,
+ err: e,
+ err_pos: tokenizer.current_pos(),
+ tokens,
+ curr_pos: None,
+ };
+ }
+ }
+ }
+ }
+
+ pub fn peek_token(&self) -> Result<&Token, TokenError> {
+ match self.tokens.last() {
+ Some((_, t)) => {
+ trace!("%{:?}", t);
+ Ok(t)
+ }
+ _ => {
+ trace!("%{:?}", self.err);
+ Err(self.err.clone())
+ }
+ }
+ }
+
+ pub fn next_token(&mut self) -> Result<Token, TokenError> {
+ match self.tokens.pop() {
+ Some((pos, t)) => {
+ self.curr_pos = Some(pos);
+ trace!("@{:?}", t);
+ Ok(t)
+ }
+ _ => {
+ trace!("@{:?}", self.err);
+ Err(self.err.clone())
+ }
+ }
+ }
+
+ pub fn err_msg_with_pos(&self, pos: usize) -> String {
+ format!("{}\n{}", self.origin_input, "^".repeat(pos))
+ }
+
+ pub fn err_msg(&self) -> String {
+ match self.curr_pos {
+ Some(pos) => self.err_msg_with_pos(pos),
+ _ => self.err_msg_with_pos(self.err_pos),
+ }
+ }
+}