4 files changed, 26 insertions, 3 deletions
diff --git a/sqlglotrs/Cargo.lock b/sqlglotrs/Cargo.lock
index cd9a9ef..920ef8b 100644
--- a/sqlglotrs/Cargo.lock
+++ b/sqlglotrs/Cargo.lock
@@ -188,7 +188,7 @@ checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970"
 
 [[package]]
 name = "sqlglotrs"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
  "pyo3",
 ]
diff --git a/sqlglotrs/Cargo.toml b/sqlglotrs/Cargo.toml
index ece4a88..0c367d9 100644
--- a/sqlglotrs/Cargo.toml
+++ b/sqlglotrs/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "sqlglotrs"
-version = "0.1.0"
+version = "0.1.1"
 edition = "2021"
 
 [lib]
diff --git a/sqlglotrs/src/settings.rs b/sqlglotrs/src/settings.rs
index 32575c6..c6e76a7 100644
--- a/sqlglotrs/src/settings.rs
+++ b/sqlglotrs/src/settings.rs
@@ -17,6 +17,7 @@ pub struct TokenTypeSettings {
     pub semicolon: TokenType,
     pub string: TokenType,
     pub var: TokenType,
+    pub heredoc_string_alternative: TokenType,
 }
 
 #[pymethods]
@@ -34,6 +35,7 @@ impl TokenTypeSettings {
         semicolon: TokenType,
         string: TokenType,
         var: TokenType,
+        heredoc_string_alternative: TokenType,
     ) -> Self {
         TokenTypeSettings {
             bit_string,
@@ -47,6 +49,7 @@ impl TokenTypeSettings {
             semicolon,
             string,
             var,
+            heredoc_string_alternative,
         }
     }
 }
@@ -69,6 +72,7 @@ pub struct TokenizerSettings {
     pub var_single_tokens: HashSet<char>,
     pub commands: HashSet<TokenType>,
     pub command_prefix_tokens: HashSet<TokenType>,
+    pub heredoc_tag_is_identifier: bool,
 }
 
 #[pymethods]
@@ -90,6 +94,7 @@ impl TokenizerSettings {
         var_single_tokens: HashSet<String>,
         commands: HashSet<TokenType>,
         command_prefix_tokens: HashSet<TokenType>,
+        heredoc_tag_is_identifier: bool,
     ) -> Self {
         let to_char = |v: &String| {
             if v.len() == 1 {
@@ -138,6 +143,7 @@ impl TokenizerSettings {
             var_single_tokens: var_single_tokens_native,
             commands,
             command_prefix_tokens,
+            heredoc_tag_is_identifier,
         }
     }
 }
diff --git a/sqlglotrs/src/tokenizer.rs b/sqlglotrs/src/tokenizer.rs
index 920a5b5..94a8b08 100644
--- a/sqlglotrs/src/tokenizer.rs
+++ b/sqlglotrs/src/tokenizer.rs
@@ -399,6 +399,19 @@ impl<'a> TokenizerState<'a> {
             } else if *token_type == self.token_types.bit_string {
                 (Some(2), *token_type, end.clone())
             } else if *token_type == self.token_types.heredoc_string {
+                if self.settings.heredoc_tag_is_identifier
+                    && !self.is_identifier(self.peek_char)
+                    && self.peek_char.to_string() != *end
+                {
+                    if self.token_types.heredoc_string_alternative != self.token_types.var {
+                        self.add(self.token_types.heredoc_string_alternative, None)?
+                    } else {
+                        self.scan_var()?
+                    };
+
+                    return Ok(true)
+                };
+
                 self.advance(1)?;
                 let tag = if self.current_char.to_string() == *end {
                     String::from("")
@@ -469,7 +482,7 @@ impl<'a> TokenizerState<'a> {
             } else if self.peek_char.to_ascii_uppercase() == 'E' && scientific == 0 {
                 scientific += 1;
                 self.advance(1)?;
-            } else if self.peek_char.is_alphabetic() || self.peek_char == '_' {
+            } else if self.is_identifier(self.peek_char) {
                 let number_text = self.text();
                 let mut literal = String::from("");
 
@@ -643,6 +656,10 @@ impl<'a> TokenizerState<'a> {
         Ok(text)
     }
 
+    fn is_identifier(&mut self, name: char) -> bool {
+        name.is_alphabetic() || name == '_'
+    }
+
     fn extract_value(&mut self) -> Result<String, TokenizerError> {
         loop {
             if !self.peek_char.is_whitespace()