diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2025-01-18 06:24:36 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2025-01-18 06:24:36 +0000 |
commit | 3ce88d0e00da70b1addf38b4287068e4a89cff13 (patch) | |
tree | 8e7a1cffef630f437c82d2798d5723c001307393 /sqlglotrs/src | |
parent | Releasing debian version 26.1.3-1. (diff) | |
download | sqlglot-3ce88d0e00da70b1addf38b4287068e4a89cff13.tar.xz sqlglot-3ce88d0e00da70b1addf38b4287068e4a89cff13.zip |
Merging upstream version 26.2.1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglotrs/src')
-rw-r--r-- | sqlglotrs/src/settings.rs | 4 | ||||
-rw-r--r-- | sqlglotrs/src/token.rs | 4 | ||||
-rw-r--r-- | sqlglotrs/src/tokenizer.rs | 33 | ||||
-rw-r--r-- | sqlglotrs/src/trie.rs | 17 |
4 files changed, 21 insertions, 37 deletions
diff --git a/sqlglotrs/src/settings.rs b/sqlglotrs/src/settings.rs index b0c951c..c7e3b41 100644 --- a/sqlglotrs/src/settings.rs +++ b/sqlglotrs/src/settings.rs @@ -1,6 +1,6 @@ -use std::collections::{HashMap, HashSet}; - use pyo3::prelude::*; +use rustc_hash::FxHashMap as HashMap; +use rustc_hash::FxHashSet as HashSet; pub type TokenType = u16; diff --git a/sqlglotrs/src/token.rs b/sqlglotrs/src/token.rs index 3352469..48698fc 100644 --- a/sqlglotrs/src/token.rs +++ b/sqlglotrs/src/token.rs @@ -49,13 +49,11 @@ impl Token { pub fn append_comments(&self, comments: &mut Vec<String>) { Python::with_gil(|py| { let pylist = self.comments.bind(py); - for comment in comments.iter() { + for comment in comments.drain(..) { if let Err(_) = pylist.append(comment) { panic!("Failed to append comments to the Python list"); } } }); - // Simulate `Vec::append`. - let _ = std::mem::replace(comments, Vec::new()); } } diff --git a/sqlglotrs/src/tokenizer.rs b/sqlglotrs/src/tokenizer.rs index 2ffe45f..3092c4d 100644 --- a/sqlglotrs/src/tokenizer.rs +++ b/sqlglotrs/src/tokenizer.rs @@ -23,14 +23,11 @@ pub struct Tokenizer { impl Tokenizer { #[new] pub fn new(settings: TokenizerSettings, token_types: TokenTypeSettings) -> Tokenizer { - let mut keyword_trie = Trie::new(); - let single_token_strs: Vec<String> = settings - .single_tokens - .keys() - .map(|s| s.to_string()) - .collect(); - let trie_filter = - |key: &&String| key.contains(" ") || single_token_strs.iter().any(|t| key.contains(t)); + let mut keyword_trie = Trie::default(); + + let trie_filter = |key: &&String| { + key.contains(" ") || settings.single_tokens.keys().any(|&t| key.contains(t)) + }; keyword_trie.add(settings.keywords.keys().filter(trie_filter)); keyword_trie.add(settings.comments.keys().filter(trie_filter)); @@ -114,7 +111,7 @@ impl<'a> TokenizerState<'a> { fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> { self.scan(None)?; - Ok(std::mem::replace(&mut self.tokens, Vec::new())) + Ok(std::mem::take(&mut self.tokens)) } fn scan(&mut self, until_peek_char: Option<char>) -> Result<(), TokenizerError> { @@ -146,7 +143,7 @@ impl<'a> TokenizerState<'a> { } if !self.settings.white_space.contains_key(&self.current_char) { - if self.current_char.is_digit(10) { + if self.current_char.is_ascii_digit() { self.scan_number()?; } else if let Some(identifier_end) = self.settings.identifiers.get(&self.current_char) @@ -205,7 +202,7 @@ impl<'a> TokenizerState<'a> { } fn char_at(&self, index: usize) -> Result<char, TokenizerError> { - self.sql.get(index).map(|c| *c).ok_or_else(|| { + self.sql.get(index).copied().ok_or_else(|| { self.error(format!( "Index {} is out of bound (size {})", index, self.size @@ -237,7 +234,7 @@ impl<'a> TokenizerState<'a> { self.column, self.start, self.current - 1, - std::mem::replace(&mut self.comments, Vec::new()), + std::mem::take(&mut self.comments), )); // If we have either a semicolon or a begin token before the command's token, we'll parse @@ -503,7 +500,7 @@ impl<'a> TokenizerState<'a> { let mut scientific = 0; loop { - if self.peek_char.is_digit(10) { + if self.peek_char.is_ascii_digit() { self.advance(1)?; } else if self.peek_char == '.' && !decimal { if self.tokens.last().map(|t| t.token_type) == Some(self.token_types.parameter) { @@ -537,8 +534,7 @@ impl<'a> TokenizerState<'a> { .numeric_literals .get(&literal.to_uppercase()) .unwrap_or(&String::from("")), - ) - .map(|x| *x); + ).copied(); let replaced = literal.replace("_", ""); @@ -607,8 +603,7 @@ impl<'a> TokenizerState<'a> { } else { self.settings .keywords - .get(&self.text().to_uppercase()) - .map(|x| *x) + .get(&self.text().to_uppercase()).copied() .unwrap_or(self.token_types.var) }; self.add(token_type, None) @@ -718,13 +713,13 @@ impl<'a> TokenizerState<'a> { if i == 0 { self.is_alphabetic_or_underscore(c) } else { - self.is_alphabetic_or_underscore(c) || c.is_digit(10) + self.is_alphabetic_or_underscore(c) || c.is_ascii_digit() } }) } fn is_numeric(&mut self, s: &str) -> bool { - s.chars().all(|c| c.is_digit(10)) + s.chars().all(|c| c.is_ascii_digit()) } fn extract_value(&mut self) -> Result<String, TokenizerError> { diff --git a/sqlglotrs/src/trie.rs b/sqlglotrs/src/trie.rs index 8e6f20c..25f9fb5 100644 --- a/sqlglotrs/src/trie.rs +++ b/sqlglotrs/src/trie.rs @@ -1,6 +1,6 @@ -use std::collections::HashMap; +use rustc_hash::FxHashMap as HashMap; -#[derive(Debug)] +#[derive(Debug, Default)] pub struct TrieNode { is_word: bool, children: HashMap<char, TrieNode>, @@ -35,21 +35,12 @@ impl TrieNode { } } -#[derive(Debug)] +#[derive(Debug, Default)] pub struct Trie { pub root: TrieNode, } impl Trie { - pub fn new() -> Self { - Trie { - root: TrieNode { - is_word: false, - children: HashMap::new(), - }, - } - } - pub fn add<'a, I>(&mut self, keys: I) where I: Iterator<Item = &'a String>, @@ -59,7 +50,7 @@ impl Trie { for c in key.chars() { current = current.children.entry(c).or_insert(TrieNode { is_word: false, - children: HashMap::new(), + children: HashMap::default(), }); } current.is_word = true; |