diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-02-12 06:15:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-02-12 06:15:14 +0000 |
commit | 8fd7374bf370b99577a40d4de1716ad990d5a34b (patch) | |
tree | 061ecaf38b8a390a8a70348eea1fd11233f9e19c /sqlglotrs/src | |
parent | Adding upstream version 21.0.1. (diff) | |
download | sqlglot-8fd7374bf370b99577a40d4de1716ad990d5a34b.tar.xz sqlglot-8fd7374bf370b99577a40d4de1716ad990d5a34b.zip |
Adding upstream version 21.0.2.upstream/21.0.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglotrs/src')
-rw-r--r-- | sqlglotrs/src/settings.rs | 6 | ||||
-rw-r--r-- | sqlglotrs/src/tokenizer.rs | 19 |
2 files changed, 24 insertions, 1 deletions
diff --git a/sqlglotrs/src/settings.rs b/sqlglotrs/src/settings.rs index 32575c6..c6e76a7 100644 --- a/sqlglotrs/src/settings.rs +++ b/sqlglotrs/src/settings.rs @@ -17,6 +17,7 @@ pub struct TokenTypeSettings { pub semicolon: TokenType, pub string: TokenType, pub var: TokenType, + pub heredoc_string_alternative: TokenType, } #[pymethods] @@ -34,6 +35,7 @@ impl TokenTypeSettings { semicolon: TokenType, string: TokenType, var: TokenType, + heredoc_string_alternative: TokenType, ) -> Self { TokenTypeSettings { bit_string, @@ -47,6 +49,7 @@ impl TokenTypeSettings { semicolon, string, var, + heredoc_string_alternative, } } } @@ -69,6 +72,7 @@ pub struct TokenizerSettings { pub var_single_tokens: HashSet<char>, pub commands: HashSet<TokenType>, pub command_prefix_tokens: HashSet<TokenType>, + pub heredoc_tag_is_identifier: bool, } #[pymethods] @@ -90,6 +94,7 @@ impl TokenizerSettings { var_single_tokens: HashSet<String>, commands: HashSet<TokenType>, command_prefix_tokens: HashSet<TokenType>, + heredoc_tag_is_identifier: bool, ) -> Self { let to_char = |v: &String| { if v.len() == 1 { @@ -138,6 +143,7 @@ impl TokenizerSettings { var_single_tokens: var_single_tokens_native, commands, command_prefix_tokens, + heredoc_tag_is_identifier, } } } diff --git a/sqlglotrs/src/tokenizer.rs b/sqlglotrs/src/tokenizer.rs index 920a5b5..94a8b08 100644 --- a/sqlglotrs/src/tokenizer.rs +++ b/sqlglotrs/src/tokenizer.rs @@ -399,6 +399,19 @@ impl<'a> TokenizerState<'a> { } else if *token_type == self.token_types.bit_string { (Some(2), *token_type, end.clone()) } else if *token_type == self.token_types.heredoc_string { + if self.settings.heredoc_tag_is_identifier + && !self.is_identifier(self.peek_char) + && self.peek_char.to_string() != *end + { + if self.token_types.heredoc_string_alternative != self.token_types.var { + self.add(self.token_types.heredoc_string_alternative, None)? + } else { + self.scan_var()? + }; + + return Ok(true) + }; + self.advance(1)?; let tag = if self.current_char.to_string() == *end { String::from("") @@ -469,7 +482,7 @@ impl<'a> TokenizerState<'a> { } else if self.peek_char.to_ascii_uppercase() == 'E' && scientific == 0 { scientific += 1; self.advance(1)?; - } else if self.peek_char.is_alphabetic() || self.peek_char == '_' { + } else if self.is_identifier(self.peek_char) { let number_text = self.text(); let mut literal = String::from(""); @@ -643,6 +656,10 @@ impl<'a> TokenizerState<'a> { Ok(text) } + fn is_identifier(&mut self, name: char) -> bool { + name.is_alphabetic() || name == '_' + } + fn extract_value(&mut self) -> Result<String, TokenizerError> { loop { if !self.peek_char.is_whitespace() |