From 2d4657dbba42ff38ad3db64e494a9cf89df98c07 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 12 Feb 2024 07:15:45 +0100 Subject: Merging upstream version 21.0.2. Signed-off-by: Daniel Baumann --- sqlglotrs/src/settings.rs | 6 ++++++ sqlglotrs/src/tokenizer.rs | 19 ++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'sqlglotrs/src') diff --git a/sqlglotrs/src/settings.rs b/sqlglotrs/src/settings.rs index 32575c6..c6e76a7 100644 --- a/sqlglotrs/src/settings.rs +++ b/sqlglotrs/src/settings.rs @@ -17,6 +17,7 @@ pub struct TokenTypeSettings { pub semicolon: TokenType, pub string: TokenType, pub var: TokenType, + pub heredoc_string_alternative: TokenType, } #[pymethods] @@ -34,6 +35,7 @@ impl TokenTypeSettings { semicolon: TokenType, string: TokenType, var: TokenType, + heredoc_string_alternative: TokenType, ) -> Self { TokenTypeSettings { bit_string, @@ -47,6 +49,7 @@ impl TokenTypeSettings { semicolon, string, var, + heredoc_string_alternative, } } } @@ -69,6 +72,7 @@ pub struct TokenizerSettings { pub var_single_tokens: HashSet, pub commands: HashSet, pub command_prefix_tokens: HashSet, + pub heredoc_tag_is_identifier: bool, } #[pymethods] @@ -90,6 +94,7 @@ impl TokenizerSettings { var_single_tokens: HashSet, commands: HashSet, command_prefix_tokens: HashSet, + heredoc_tag_is_identifier: bool, ) -> Self { let to_char = |v: &String| { if v.len() == 1 { @@ -138,6 +143,7 @@ impl TokenizerSettings { var_single_tokens: var_single_tokens_native, commands, command_prefix_tokens, + heredoc_tag_is_identifier, } } } diff --git a/sqlglotrs/src/tokenizer.rs b/sqlglotrs/src/tokenizer.rs index 920a5b5..94a8b08 100644 --- a/sqlglotrs/src/tokenizer.rs +++ b/sqlglotrs/src/tokenizer.rs @@ -399,6 +399,19 @@ impl<'a> TokenizerState<'a> { } else if *token_type == self.token_types.bit_string { (Some(2), *token_type, end.clone()) } else if *token_type == self.token_types.heredoc_string { + if self.settings.heredoc_tag_is_identifier + && !self.is_identifier(self.peek_char) + && self.peek_char.to_string() != *end + { + if self.token_types.heredoc_string_alternative != self.token_types.var { + self.add(self.token_types.heredoc_string_alternative, None)? + } else { + self.scan_var()? + }; + + return Ok(true) + }; + self.advance(1)?; let tag = if self.current_char.to_string() == *end { String::from("") @@ -469,7 +482,7 @@ impl<'a> TokenizerState<'a> { } else if self.peek_char.to_ascii_uppercase() == 'E' && scientific == 0 { scientific += 1; self.advance(1)?; - } else if self.peek_char.is_alphabetic() || self.peek_char == '_' { + } else if self.is_identifier(self.peek_char) { let number_text = self.text(); let mut literal = String::from(""); @@ -643,6 +656,10 @@ impl<'a> TokenizerState<'a> { Ok(text) } + fn is_identifier(&mut self, name: char) -> bool { + name.is_alphabetic() || name == '_' + } + fn extract_value(&mut self) -> Result { loop { if !self.peek_char.is_whitespace() -- cgit v1.2.3