summaryrefslogtreecommitdiffstats
path: root/sqlglotrs/src
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglotrs/src')
-rw-r--r--sqlglotrs/src/settings.rs6
-rw-r--r--sqlglotrs/src/tokenizer.rs19
2 files changed, 24 insertions, 1 deletions
diff --git a/sqlglotrs/src/settings.rs b/sqlglotrs/src/settings.rs
index 32575c6..c6e76a7 100644
--- a/sqlglotrs/src/settings.rs
+++ b/sqlglotrs/src/settings.rs
@@ -17,6 +17,7 @@ pub struct TokenTypeSettings {
pub semicolon: TokenType,
pub string: TokenType,
pub var: TokenType,
+ pub heredoc_string_alternative: TokenType,
}
#[pymethods]
@@ -34,6 +35,7 @@ impl TokenTypeSettings {
semicolon: TokenType,
string: TokenType,
var: TokenType,
+ heredoc_string_alternative: TokenType,
) -> Self {
TokenTypeSettings {
bit_string,
@@ -47,6 +49,7 @@ impl TokenTypeSettings {
semicolon,
string,
var,
+ heredoc_string_alternative,
}
}
}
@@ -69,6 +72,7 @@ pub struct TokenizerSettings {
pub var_single_tokens: HashSet<char>,
pub commands: HashSet<TokenType>,
pub command_prefix_tokens: HashSet<TokenType>,
+ pub heredoc_tag_is_identifier: bool,
}
#[pymethods]
@@ -90,6 +94,7 @@ impl TokenizerSettings {
var_single_tokens: HashSet<String>,
commands: HashSet<TokenType>,
command_prefix_tokens: HashSet<TokenType>,
+ heredoc_tag_is_identifier: bool,
) -> Self {
let to_char = |v: &String| {
if v.len() == 1 {
@@ -138,6 +143,7 @@ impl TokenizerSettings {
var_single_tokens: var_single_tokens_native,
commands,
command_prefix_tokens,
+ heredoc_tag_is_identifier,
}
}
}
diff --git a/sqlglotrs/src/tokenizer.rs b/sqlglotrs/src/tokenizer.rs
index 920a5b5..94a8b08 100644
--- a/sqlglotrs/src/tokenizer.rs
+++ b/sqlglotrs/src/tokenizer.rs
@@ -399,6 +399,19 @@ impl<'a> TokenizerState<'a> {
} else if *token_type == self.token_types.bit_string {
(Some(2), *token_type, end.clone())
} else if *token_type == self.token_types.heredoc_string {
+ if self.settings.heredoc_tag_is_identifier
+ && !self.is_identifier(self.peek_char)
+ && self.peek_char.to_string() != *end
+ {
+ if self.token_types.heredoc_string_alternative != self.token_types.var {
+ self.add(self.token_types.heredoc_string_alternative, None)?
+ } else {
+ self.scan_var()?
+ };
+
+ return Ok(true)
+ };
+
self.advance(1)?;
let tag = if self.current_char.to_string() == *end {
String::from("")
@@ -469,7 +482,7 @@ impl<'a> TokenizerState<'a> {
} else if self.peek_char.to_ascii_uppercase() == 'E' && scientific == 0 {
scientific += 1;
self.advance(1)?;
- } else if self.peek_char.is_alphabetic() || self.peek_char == '_' {
+ } else if self.is_identifier(self.peek_char) {
let number_text = self.text();
let mut literal = String::from("");
@@ -643,6 +656,10 @@ impl<'a> TokenizerState<'a> {
Ok(text)
}
+ fn is_identifier(&mut self, name: char) -> bool {
+ name.is_alphabetic() || name == '_'
+ }
+
fn extract_value(&mut self) -> Result<String, TokenizerError> {
loop {
if !self.peek_char.is_whitespace()