From 2d4657dbba42ff38ad3db64e494a9cf89df98c07 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 12 Feb 2024 07:15:45 +0100 Subject: Merging upstream version 21.0.2. Signed-off-by: Daniel Baumann --- sqlglotrs/Cargo.lock | 2 +- sqlglotrs/Cargo.toml | 2 +- sqlglotrs/src/settings.rs | 6 ++++++ sqlglotrs/src/tokenizer.rs | 19 ++++++++++++++++++- 4 files changed, 26 insertions(+), 3 deletions(-) (limited to 'sqlglotrs') diff --git a/sqlglotrs/Cargo.lock b/sqlglotrs/Cargo.lock index cd9a9ef..920ef8b 100644 --- a/sqlglotrs/Cargo.lock +++ b/sqlglotrs/Cargo.lock @@ -188,7 +188,7 @@ checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" [[package]] name = "sqlglotrs" -version = "0.1.0" +version = "0.1.1" dependencies = [ "pyo3", ] diff --git a/sqlglotrs/Cargo.toml b/sqlglotrs/Cargo.toml index ece4a88..0c367d9 100644 --- a/sqlglotrs/Cargo.toml +++ b/sqlglotrs/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sqlglotrs" -version = "0.1.0" +version = "0.1.1" edition = "2021" [lib] diff --git a/sqlglotrs/src/settings.rs b/sqlglotrs/src/settings.rs index 32575c6..c6e76a7 100644 --- a/sqlglotrs/src/settings.rs +++ b/sqlglotrs/src/settings.rs @@ -17,6 +17,7 @@ pub struct TokenTypeSettings { pub semicolon: TokenType, pub string: TokenType, pub var: TokenType, + pub heredoc_string_alternative: TokenType, } #[pymethods] @@ -34,6 +35,7 @@ impl TokenTypeSettings { semicolon: TokenType, string: TokenType, var: TokenType, + heredoc_string_alternative: TokenType, ) -> Self { TokenTypeSettings { bit_string, @@ -47,6 +49,7 @@ impl TokenTypeSettings { semicolon, string, var, + heredoc_string_alternative, } } } @@ -69,6 +72,7 @@ pub struct TokenizerSettings { pub var_single_tokens: HashSet, pub commands: HashSet, pub command_prefix_tokens: HashSet, + pub heredoc_tag_is_identifier: bool, } #[pymethods] @@ -90,6 +94,7 @@ impl TokenizerSettings { var_single_tokens: HashSet, commands: HashSet, command_prefix_tokens: HashSet, + heredoc_tag_is_identifier: bool, ) -> Self { let to_char = |v: &String| { if v.len() == 1 { @@ -138,6 +143,7 @@ impl TokenizerSettings { var_single_tokens: var_single_tokens_native, commands, command_prefix_tokens, + heredoc_tag_is_identifier, } } } diff --git a/sqlglotrs/src/tokenizer.rs b/sqlglotrs/src/tokenizer.rs index 920a5b5..94a8b08 100644 --- a/sqlglotrs/src/tokenizer.rs +++ b/sqlglotrs/src/tokenizer.rs @@ -399,6 +399,19 @@ impl<'a> TokenizerState<'a> { } else if *token_type == self.token_types.bit_string { (Some(2), *token_type, end.clone()) } else if *token_type == self.token_types.heredoc_string { + if self.settings.heredoc_tag_is_identifier + && !self.is_identifier(self.peek_char) + && self.peek_char.to_string() != *end + { + if self.token_types.heredoc_string_alternative != self.token_types.var { + self.add(self.token_types.heredoc_string_alternative, None)? + } else { + self.scan_var()? + }; + + return Ok(true) + }; + self.advance(1)?; let tag = if self.current_char.to_string() == *end { String::from("") @@ -469,7 +482,7 @@ impl<'a> TokenizerState<'a> { } else if self.peek_char.to_ascii_uppercase() == 'E' && scientific == 0 { scientific += 1; self.advance(1)?; - } else if self.peek_char.is_alphabetic() || self.peek_char == '_' { + } else if self.is_identifier(self.peek_char) { let number_text = self.text(); let mut literal = String::from(""); @@ -643,6 +656,10 @@ impl<'a> TokenizerState<'a> { Ok(text) } + fn is_identifier(&mut self, name: char) -> bool { + name.is_alphabetic() || name == '_' + } + fn extract_value(&mut self) -> Result { loop { if !self.peek_char.is_whitespace() -- cgit v1.2.3