diff options
Diffstat (limited to 'sqlglotrs')
-rw-r--r-- | sqlglotrs/Cargo.lock | 2 | ||||
-rw-r--r-- | sqlglotrs/Cargo.toml | 2 | ||||
-rw-r--r-- | sqlglotrs/src/tokenizer.rs | 34 |
3 files changed, 20 insertions, 18 deletions
diff --git a/sqlglotrs/Cargo.lock b/sqlglotrs/Cargo.lock index e9255b7..f630155 100644 --- a/sqlglotrs/Cargo.lock +++ b/sqlglotrs/Cargo.lock @@ -188,7 +188,7 @@ checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" [[package]] name = "sqlglotrs" -version = "0.2.5" +version = "0.2.6" dependencies = [ "pyo3", ] diff --git a/sqlglotrs/Cargo.toml b/sqlglotrs/Cargo.toml index 4c566ee..663ccda 100644 --- a/sqlglotrs/Cargo.toml +++ b/sqlglotrs/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sqlglotrs" -version = "0.2.5" +version = "0.2.6" edition = "2021" [lib] diff --git a/sqlglotrs/src/tokenizer.rs b/sqlglotrs/src/tokenizer.rs index e79d0e9..45bbe77 100644 --- a/sqlglotrs/src/tokenizer.rs +++ b/sqlglotrs/src/tokenizer.rs @@ -405,19 +405,6 @@ impl<'a> TokenizerState<'a> { } else if *token_type == self.token_types.bit_string { (Some(2), *token_type, end.clone()) } else if *token_type == self.token_types.heredoc_string { - if self.settings.heredoc_tag_is_identifier - && !self.is_identifier(self.peek_char) - && self.peek_char.to_string() != *end - { - if self.token_types.heredoc_string_alternative != self.token_types.var { - self.add(self.token_types.heredoc_string_alternative, None)? - } else { - self.scan_var()? - }; - - return Ok(true) - }; - self.advance(1)?; let tag = if self.current_char.to_string() == *end { @@ -426,7 +413,14 @@ impl<'a> TokenizerState<'a> { self.extract_string(end, false, false, !self.settings.heredoc_tag_is_identifier)? }; - if self.is_end && !tag.is_empty() && self.settings.heredoc_tag_is_identifier { + if !tag.is_empty() + && self.settings.heredoc_tag_is_identifier + && (self.is_end || !self.is_identifier(&tag)) + { + if !self.is_end { + self.advance(-1)?; + } + self.advance(-(tag.len() as isize))?; self.add(self.token_types.heredoc_string_alternative, None)?; return Ok(true) @@ -494,7 +488,7 @@ impl<'a> TokenizerState<'a> { } else if self.peek_char.to_ascii_uppercase() == 'E' && scientific == 0 { scientific += 1; self.advance(1)?; - } else if self.is_identifier(self.peek_char) { + } else if self.is_alphabetic_or_underscore(self.peek_char) { let number_text = self.text(); let mut literal = String::from(""); @@ -676,10 +670,18 @@ impl<'a> TokenizerState<'a> { Ok(text) } - fn is_identifier(&mut self, name: char) -> bool { + fn is_alphabetic_or_underscore(&mut self, name: char) -> bool { name.is_alphabetic() || name == '_' } + fn is_identifier(&mut self, s: &str) -> bool { + s.chars().enumerate().all( + |(i, c)| + if i == 0 { self.is_alphabetic_or_underscore(c) } + else { self.is_alphabetic_or_underscore(c) || c.is_digit(10) } + ) + } + fn extract_value(&mut self) -> Result<String, TokenizerError> { loop { if !self.peek_char.is_whitespace() |