summaryrefslogtreecommitdiffstats
path: root/sqlglotrs/src
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglotrs/src')
-rw-r--r--sqlglotrs/src/tokenizer.rs34
1 files changed, 18 insertions, 16 deletions
diff --git a/sqlglotrs/src/tokenizer.rs b/sqlglotrs/src/tokenizer.rs
index e79d0e9..45bbe77 100644
--- a/sqlglotrs/src/tokenizer.rs
+++ b/sqlglotrs/src/tokenizer.rs
@@ -405,19 +405,6 @@ impl<'a> TokenizerState<'a> {
} else if *token_type == self.token_types.bit_string {
(Some(2), *token_type, end.clone())
} else if *token_type == self.token_types.heredoc_string {
- if self.settings.heredoc_tag_is_identifier
- && !self.is_identifier(self.peek_char)
- && self.peek_char.to_string() != *end
- {
- if self.token_types.heredoc_string_alternative != self.token_types.var {
- self.add(self.token_types.heredoc_string_alternative, None)?
- } else {
- self.scan_var()?
- };
-
- return Ok(true)
- };
-
self.advance(1)?;
let tag = if self.current_char.to_string() == *end {
@@ -426,7 +413,14 @@ impl<'a> TokenizerState<'a> {
self.extract_string(end, false, false, !self.settings.heredoc_tag_is_identifier)?
};
- if self.is_end && !tag.is_empty() && self.settings.heredoc_tag_is_identifier {
+ if !tag.is_empty()
+ && self.settings.heredoc_tag_is_identifier
+ && (self.is_end || !self.is_identifier(&tag))
+ {
+ if !self.is_end {
+ self.advance(-1)?;
+ }
+
self.advance(-(tag.len() as isize))?;
self.add(self.token_types.heredoc_string_alternative, None)?;
return Ok(true)
@@ -494,7 +488,7 @@ impl<'a> TokenizerState<'a> {
} else if self.peek_char.to_ascii_uppercase() == 'E' && scientific == 0 {
scientific += 1;
self.advance(1)?;
- } else if self.is_identifier(self.peek_char) {
+ } else if self.is_alphabetic_or_underscore(self.peek_char) {
let number_text = self.text();
let mut literal = String::from("");
@@ -676,10 +670,18 @@ impl<'a> TokenizerState<'a> {
Ok(text)
}
- fn is_identifier(&mut self, name: char) -> bool {
+ fn is_alphabetic_or_underscore(&mut self, name: char) -> bool {
name.is_alphabetic() || name == '_'
}
+ fn is_identifier(&mut self, s: &str) -> bool {
+ s.chars().enumerate().all(
+ |(i, c)|
+ if i == 0 { self.is_alphabetic_or_underscore(c) }
+ else { self.is_alphabetic_or_underscore(c) || c.is_digit(10) }
+ )
+ }
+
fn extract_value(&mut self) -> Result<String, TokenizerError> {
loop {
if !self.peek_char.is_whitespace()