summaryrefslogtreecommitdiffstats
path: root/sqlglotrs/src/settings.rs
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglotrs/src/settings.rs')
-rw-r--r--sqlglotrs/src/settings.rs164
1 files changed, 164 insertions, 0 deletions
diff --git a/sqlglotrs/src/settings.rs b/sqlglotrs/src/settings.rs
new file mode 100644
index 0000000..32575c6
--- /dev/null
+++ b/sqlglotrs/src/settings.rs
@@ -0,0 +1,164 @@
+use pyo3::prelude::*;
+use std::collections::{HashMap, HashSet};
+
+pub type TokenType = u16;
+
+#[derive(Clone, Debug)]
+#[pyclass]
+pub struct TokenTypeSettings {
+ pub bit_string: TokenType,
+ pub break_: TokenType,
+ pub dcolon: TokenType,
+ pub heredoc_string: TokenType,
+ pub hex_string: TokenType,
+ pub identifier: TokenType,
+ pub number: TokenType,
+ pub parameter: TokenType,
+ pub semicolon: TokenType,
+ pub string: TokenType,
+ pub var: TokenType,
+}
+
+#[pymethods]
+impl TokenTypeSettings {
+ #[new]
+ pub fn new(
+ bit_string: TokenType,
+ break_: TokenType,
+ dcolon: TokenType,
+ heredoc_string: TokenType,
+ hex_string: TokenType,
+ identifier: TokenType,
+ number: TokenType,
+ parameter: TokenType,
+ semicolon: TokenType,
+ string: TokenType,
+ var: TokenType,
+ ) -> Self {
+ TokenTypeSettings {
+ bit_string,
+ break_,
+ dcolon,
+ heredoc_string,
+ hex_string,
+ identifier,
+ number,
+ parameter,
+ semicolon,
+ string,
+ var,
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+#[pyclass]
+pub struct TokenizerSettings {
+ pub white_space: HashMap<char, TokenType>,
+ pub single_tokens: HashMap<char, TokenType>,
+ pub keywords: HashMap<String, TokenType>,
+ pub numeric_literals: HashMap<String, String>,
+ pub identifiers: HashMap<char, char>,
+ pub identifier_escapes: HashSet<char>,
+ pub string_escapes: HashSet<char>,
+ pub quotes: HashMap<String, String>,
+ pub format_strings: HashMap<String, (String, TokenType)>,
+ pub has_bit_strings: bool,
+ pub has_hex_strings: bool,
+ pub comments: HashMap<String, Option<String>>,
+ pub var_single_tokens: HashSet<char>,
+ pub commands: HashSet<TokenType>,
+ pub command_prefix_tokens: HashSet<TokenType>,
+}
+
+#[pymethods]
+impl TokenizerSettings {
+ #[new]
+ pub fn new(
+ white_space: HashMap<String, TokenType>,
+ single_tokens: HashMap<String, TokenType>,
+ keywords: HashMap<String, TokenType>,
+ numeric_literals: HashMap<String, String>,
+ identifiers: HashMap<String, String>,
+ identifier_escapes: HashSet<String>,
+ string_escapes: HashSet<String>,
+ quotes: HashMap<String, String>,
+ format_strings: HashMap<String, (String, TokenType)>,
+ has_bit_strings: bool,
+ has_hex_strings: bool,
+ comments: HashMap<String, Option<String>>,
+ var_single_tokens: HashSet<String>,
+ commands: HashSet<TokenType>,
+ command_prefix_tokens: HashSet<TokenType>,
+ ) -> Self {
+ let to_char = |v: &String| {
+ if v.len() == 1 {
+ v.chars().next().unwrap()
+ } else {
+ panic!("Invalid char: {}", v)
+ }
+ };
+
+ let white_space_native: HashMap<char, TokenType> = white_space
+ .into_iter()
+ .map(|(k, v)| (to_char(&k), v))
+ .collect();
+
+ let single_tokens_native: HashMap<char, TokenType> = single_tokens
+ .into_iter()
+ .map(|(k, v)| (to_char(&k), v))
+ .collect();
+
+ let identifiers_native: HashMap<char, char> = identifiers
+ .iter()
+ .map(|(k, v)| (to_char(k), to_char(v)))
+ .collect();
+
+ let identifier_escapes_native: HashSet<char> =
+ identifier_escapes.iter().map(&to_char).collect();
+
+ let string_escapes_native: HashSet<char> = string_escapes.iter().map(&to_char).collect();
+
+ let var_single_tokens_native: HashSet<char> =
+ var_single_tokens.iter().map(&to_char).collect();
+
+ TokenizerSettings {
+ white_space: white_space_native,
+ single_tokens: single_tokens_native,
+ keywords,
+ numeric_literals,
+ identifiers: identifiers_native,
+ identifier_escapes: identifier_escapes_native,
+ string_escapes: string_escapes_native,
+ quotes,
+ format_strings,
+ has_bit_strings,
+ has_hex_strings,
+ comments,
+ var_single_tokens: var_single_tokens_native,
+ commands,
+ command_prefix_tokens,
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+#[pyclass]
+pub struct TokenizerDialectSettings {
+ pub escape_sequences: HashMap<String, String>,
+ pub identifiers_can_start_with_digit: bool,
+}
+
+#[pymethods]
+impl TokenizerDialectSettings {
+ #[new]
+ pub fn new(
+ escape_sequences: HashMap<String, String>,
+ identifiers_can_start_with_digit: bool,
+ ) -> Self {
+ TokenizerDialectSettings {
+ escape_sequences,
+ identifiers_can_start_with_digit,
+ }
+ }
+}