summaryrefslogtreecommitdiffstats
path: root/sqlglotrs/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2025-01-18 06:24:36 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2025-01-18 06:24:36 +0000
commit3ce88d0e00da70b1addf38b4287068e4a89cff13 (patch)
tree8e7a1cffef630f437c82d2798d5723c001307393 /sqlglotrs/src
parentReleasing debian version 26.1.3-1. (diff)
downloadsqlglot-3ce88d0e00da70b1addf38b4287068e4a89cff13.tar.xz
sqlglot-3ce88d0e00da70b1addf38b4287068e4a89cff13.zip
Merging upstream version 26.2.1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglotrs/src')
-rw-r--r--sqlglotrs/src/settings.rs4
-rw-r--r--sqlglotrs/src/token.rs4
-rw-r--r--sqlglotrs/src/tokenizer.rs33
-rw-r--r--sqlglotrs/src/trie.rs17
4 files changed, 21 insertions, 37 deletions
diff --git a/sqlglotrs/src/settings.rs b/sqlglotrs/src/settings.rs
index b0c951c..c7e3b41 100644
--- a/sqlglotrs/src/settings.rs
+++ b/sqlglotrs/src/settings.rs
@@ -1,6 +1,6 @@
-use std::collections::{HashMap, HashSet};
-
use pyo3::prelude::*;
+use rustc_hash::FxHashMap as HashMap;
+use rustc_hash::FxHashSet as HashSet;
pub type TokenType = u16;
diff --git a/sqlglotrs/src/token.rs b/sqlglotrs/src/token.rs
index 3352469..48698fc 100644
--- a/sqlglotrs/src/token.rs
+++ b/sqlglotrs/src/token.rs
@@ -49,13 +49,11 @@ impl Token {
pub fn append_comments(&self, comments: &mut Vec<String>) {
Python::with_gil(|py| {
let pylist = self.comments.bind(py);
- for comment in comments.iter() {
+ for comment in comments.drain(..) {
if let Err(_) = pylist.append(comment) {
panic!("Failed to append comments to the Python list");
}
}
});
- // Simulate `Vec::append`.
- let _ = std::mem::replace(comments, Vec::new());
}
}
diff --git a/sqlglotrs/src/tokenizer.rs b/sqlglotrs/src/tokenizer.rs
index 2ffe45f..3092c4d 100644
--- a/sqlglotrs/src/tokenizer.rs
+++ b/sqlglotrs/src/tokenizer.rs
@@ -23,14 +23,11 @@ pub struct Tokenizer {
impl Tokenizer {
#[new]
pub fn new(settings: TokenizerSettings, token_types: TokenTypeSettings) -> Tokenizer {
- let mut keyword_trie = Trie::new();
- let single_token_strs: Vec<String> = settings
- .single_tokens
- .keys()
- .map(|s| s.to_string())
- .collect();
- let trie_filter =
- |key: &&String| key.contains(" ") || single_token_strs.iter().any(|t| key.contains(t));
+ let mut keyword_trie = Trie::default();
+
+ let trie_filter = |key: &&String| {
+ key.contains(" ") || settings.single_tokens.keys().any(|&t| key.contains(t))
+ };
keyword_trie.add(settings.keywords.keys().filter(trie_filter));
keyword_trie.add(settings.comments.keys().filter(trie_filter));
@@ -114,7 +111,7 @@ impl<'a> TokenizerState<'a> {
fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
self.scan(None)?;
- Ok(std::mem::replace(&mut self.tokens, Vec::new()))
+ Ok(std::mem::take(&mut self.tokens))
}
fn scan(&mut self, until_peek_char: Option<char>) -> Result<(), TokenizerError> {
@@ -146,7 +143,7 @@ impl<'a> TokenizerState<'a> {
}
if !self.settings.white_space.contains_key(&self.current_char) {
- if self.current_char.is_digit(10) {
+ if self.current_char.is_ascii_digit() {
self.scan_number()?;
} else if let Some(identifier_end) =
self.settings.identifiers.get(&self.current_char)
@@ -205,7 +202,7 @@ impl<'a> TokenizerState<'a> {
}
fn char_at(&self, index: usize) -> Result<char, TokenizerError> {
- self.sql.get(index).map(|c| *c).ok_or_else(|| {
+ self.sql.get(index).copied().ok_or_else(|| {
self.error(format!(
"Index {} is out of bound (size {})",
index, self.size
@@ -237,7 +234,7 @@ impl<'a> TokenizerState<'a> {
self.column,
self.start,
self.current - 1,
- std::mem::replace(&mut self.comments, Vec::new()),
+ std::mem::take(&mut self.comments),
));
// If we have either a semicolon or a begin token before the command's token, we'll parse
@@ -503,7 +500,7 @@ impl<'a> TokenizerState<'a> {
let mut scientific = 0;
loop {
- if self.peek_char.is_digit(10) {
+ if self.peek_char.is_ascii_digit() {
self.advance(1)?;
} else if self.peek_char == '.' && !decimal {
if self.tokens.last().map(|t| t.token_type) == Some(self.token_types.parameter) {
@@ -537,8 +534,7 @@ impl<'a> TokenizerState<'a> {
.numeric_literals
.get(&literal.to_uppercase())
.unwrap_or(&String::from("")),
- )
- .map(|x| *x);
+ ).copied();
let replaced = literal.replace("_", "");
@@ -607,8 +603,7 @@ impl<'a> TokenizerState<'a> {
} else {
self.settings
.keywords
- .get(&self.text().to_uppercase())
- .map(|x| *x)
+ .get(&self.text().to_uppercase()).copied()
.unwrap_or(self.token_types.var)
};
self.add(token_type, None)
@@ -718,13 +713,13 @@ impl<'a> TokenizerState<'a> {
if i == 0 {
self.is_alphabetic_or_underscore(c)
} else {
- self.is_alphabetic_or_underscore(c) || c.is_digit(10)
+ self.is_alphabetic_or_underscore(c) || c.is_ascii_digit()
}
})
}
fn is_numeric(&mut self, s: &str) -> bool {
- s.chars().all(|c| c.is_digit(10))
+ s.chars().all(|c| c.is_ascii_digit())
}
fn extract_value(&mut self) -> Result<String, TokenizerError> {
diff --git a/sqlglotrs/src/trie.rs b/sqlglotrs/src/trie.rs
index 8e6f20c..25f9fb5 100644
--- a/sqlglotrs/src/trie.rs
+++ b/sqlglotrs/src/trie.rs
@@ -1,6 +1,6 @@
-use std::collections::HashMap;
+use rustc_hash::FxHashMap as HashMap;
-#[derive(Debug)]
+#[derive(Debug, Default)]
pub struct TrieNode {
is_word: bool,
children: HashMap<char, TrieNode>,
@@ -35,21 +35,12 @@ impl TrieNode {
}
}
-#[derive(Debug)]
+#[derive(Debug, Default)]
pub struct Trie {
pub root: TrieNode,
}
impl Trie {
- pub fn new() -> Self {
- Trie {
- root: TrieNode {
- is_word: false,
- children: HashMap::new(),
- },
- }
- }
-
pub fn add<'a, I>(&mut self, keys: I)
where
I: Iterator<Item = &'a String>,
@@ -59,7 +50,7 @@ impl Trie {
for c in key.chars() {
current = current.children.entry(c).or_insert(TrieNode {
is_word: false,
- children: HashMap::new(),
+ children: HashMap::default(),
});
}
current.is_word = true;