summaryrefslogtreecommitdiffstats
path: root/sqlglotrs/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2025-01-27 16:31:23 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2025-01-27 16:31:23 +0000
commit886e522786b50d8cb126baacbd1eaf92861832ec (patch)
tree6d9736731779fe0f981e8a3692c9eeb37e40fbf3 /sqlglotrs/src
parentReleasing debian version 26.2.1-1. (diff)
downloadsqlglot-886e522786b50d8cb126baacbd1eaf92861832ec.tar.xz
sqlglot-886e522786b50d8cb126baacbd1eaf92861832ec.zip
Merging upstream version 26.3.8.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglotrs/src')
-rw-r--r--sqlglotrs/src/tokenizer.rs119
1 files changed, 62 insertions, 57 deletions
diff --git a/sqlglotrs/src/tokenizer.rs b/sqlglotrs/src/tokenizer.rs
index 3092c4d..41d6c14 100644
--- a/sqlglotrs/src/tokenizer.rs
+++ b/sqlglotrs/src/tokenizer.rs
@@ -197,7 +197,7 @@ impl<'a> TokenizerState<'a> {
if end <= self.size {
self.sql[start..end].iter().collect()
} else {
- String::from("")
+ String::new()
}
}
@@ -433,7 +433,7 @@ impl<'a> TokenizerState<'a> {
self.advance(1)?;
let tag = if self.current_char.to_string() == *end {
- String::from("")
+ String::new()
} else {
self.extract_string(end, false, true, !self.settings.heredoc_tag_is_identifier)?
};
@@ -516,7 +516,7 @@ impl<'a> TokenizerState<'a> {
self.advance(1)?;
} else if self.is_alphabetic_or_underscore(self.peek_char) {
let number_text = self.text();
- let mut literal = String::from("");
+ let mut literal = String::new();
while !self.peek_char.is_whitespace()
&& !self.is_end
@@ -533,8 +533,9 @@ impl<'a> TokenizerState<'a> {
self.settings
.numeric_literals
.get(&literal.to_uppercase())
- .unwrap_or(&String::from("")),
- ).copied();
+ .unwrap_or(&String::new()),
+ )
+ .copied();
let replaced = literal.replace("_", "");
@@ -603,7 +604,8 @@ impl<'a> TokenizerState<'a> {
} else {
self.settings
.keywords
- .get(&self.text().to_uppercase()).copied()
+ .get(&self.text().to_uppercase())
+ .copied()
.unwrap_or(self.token_types.var)
};
self.add(token_type, None)
@@ -622,19 +624,19 @@ impl<'a> TokenizerState<'a> {
raw_string: bool,
raise_unmatched: bool,
) -> Result<String, TokenizerError> {
- let mut text = String::from("");
+ let mut text = String::new();
+ let mut combined_identifier_escapes = None;
+ if use_identifier_escapes {
+ let mut tmp = self.settings.identifier_escapes.clone();
+ tmp.extend(delimiter.chars());
+ combined_identifier_escapes = Some(tmp);
+ }
+ let escapes = match combined_identifier_escapes {
+ Some(ref v) => v,
+ None => &self.settings.string_escapes,
+ };
loop {
- let mut new_identifier_escapes;
- let escapes = if use_identifier_escapes {
- new_identifier_escapes = self.settings.identifier_escapes.clone();
- new_identifier_escapes.extend(delimiter.chars());
- &new_identifier_escapes
- } else {
- &self.settings.string_escapes
- };
- let peek_char_str = self.peek_char.to_string();
-
if !raw_string
&& !self.dialect_settings.unescaped_sequences.is_empty()
&& !self.peek_char.is_whitespace()
@@ -652,63 +654,66 @@ impl<'a> TokenizerState<'a> {
if (self.settings.string_escapes_allowed_in_raw_strings || !raw_string)
&& escapes.contains(&self.current_char)
- && (peek_char_str == delimiter || escapes.contains(&self.peek_char))
&& (self.current_char == self.peek_char
|| !self
.settings
.quotes
.contains_key(&self.current_char.to_string()))
{
- if peek_char_str == delimiter {
- text.push(self.peek_char);
- } else {
- text.push(self.current_char);
- text.push(self.peek_char);
- }
- if self.current + 1 < self.size {
- self.advance(2)?;
- } else {
- return self.error_result(format!(
- "Missing {} from {}:{}",
- delimiter, self.line, self.current
- ));
- }
- } else {
- if self.chars(delimiter.len()) == delimiter {
- if delimiter.len() > 1 {
- self.advance((delimiter.len() - 1) as isize)?;
- }
- break;
- }
- if self.is_end {
- if !raise_unmatched {
+ let peek_char_str = self.peek_char.to_string();
+ let equal_delimiter = delimiter == peek_char_str;
+ if equal_delimiter || escapes.contains(&self.peek_char) {
+ if equal_delimiter {
+ text.push(self.peek_char);
+ } else {
text.push(self.current_char);
- return Ok(text);
+ text.push(self.peek_char);
}
-
- return self.error_result(format!(
- "Missing {} from {}:{}",
- delimiter, self.line, self.current
- ));
+ if self.current + 1 < self.size {
+ self.advance(2)?;
+ } else {
+ return self.error_result(format!(
+ "Missing {} from {}:{}",
+ delimiter, self.line, self.current
+ ));
+ }
+ continue;
+ }
+ }
+ if self.chars(delimiter.len()) == delimiter {
+ if delimiter.len() > 1 {
+ self.advance((delimiter.len() - 1) as isize)?;
+ }
+ break;
+ }
+ if self.is_end {
+ if !raise_unmatched {
+ text.push(self.current_char);
+ return Ok(text);
}
- let current = self.current - 1;
- self.advance(1)?;
- text.push_str(
- &self.sql[current..self.current - 1]
- .iter()
- .collect::<String>(),
- );
+ return self.error_result(format!(
+ "Missing {} from {}:{}",
+ delimiter, self.line, self.current
+ ));
}
+
+ let current = self.current - 1;
+ self.advance(1)?;
+ text.push_str(
+ &self.sql[current..self.current - 1]
+ .iter()
+ .collect::<String>(),
+ );
}
Ok(text)
}
- fn is_alphabetic_or_underscore(&mut self, name: char) -> bool {
+ fn is_alphabetic_or_underscore(&self, name: char) -> bool {
name.is_alphabetic() || name == '_'
}
- fn is_identifier(&mut self, s: &str) -> bool {
+ fn is_identifier(&self, s: &str) -> bool {
s.chars().enumerate().all(|(i, c)| {
if i == 0 {
self.is_alphabetic_or_underscore(c)
@@ -718,7 +723,7 @@ impl<'a> TokenizerState<'a> {
})
}
- fn is_numeric(&mut self, s: &str) -> bool {
+ fn is_numeric(&self, s: &str) -> bool {
s.chars().all(|c| c.is_ascii_digit())
}