use pyo3::prelude::*; use std::collections::{HashMap, HashSet}; pub type TokenType = u16; #[derive(Clone, Debug)] #[pyclass] pub struct TokenTypeSettings { pub bit_string: TokenType, pub break_: TokenType, pub dcolon: TokenType, pub heredoc_string: TokenType, pub raw_string: TokenType, pub hex_string: TokenType, pub identifier: TokenType, pub number: TokenType, pub parameter: TokenType, pub semicolon: TokenType, pub string: TokenType, pub var: TokenType, pub heredoc_string_alternative: TokenType, } #[pymethods] impl TokenTypeSettings { #[new] pub fn new( bit_string: TokenType, break_: TokenType, dcolon: TokenType, heredoc_string: TokenType, raw_string: TokenType, hex_string: TokenType, identifier: TokenType, number: TokenType, parameter: TokenType, semicolon: TokenType, string: TokenType, var: TokenType, heredoc_string_alternative: TokenType, ) -> Self { TokenTypeSettings { bit_string, break_, dcolon, heredoc_string, raw_string, hex_string, identifier, number, parameter, semicolon, string, var, heredoc_string_alternative, } } } #[derive(Clone, Debug)] #[pyclass] pub struct TokenizerSettings { pub white_space: HashMap, pub single_tokens: HashMap, pub keywords: HashMap, pub numeric_literals: HashMap, pub identifiers: HashMap, pub identifier_escapes: HashSet, pub string_escapes: HashSet, pub quotes: HashMap, pub format_strings: HashMap, pub has_bit_strings: bool, pub has_hex_strings: bool, pub comments: HashMap>, pub var_single_tokens: HashSet, pub commands: HashSet, pub command_prefix_tokens: HashSet, pub heredoc_tag_is_identifier: bool, pub string_escapes_allowed_in_raw_strings: bool, } #[pymethods] impl TokenizerSettings { #[new] pub fn new( white_space: HashMap, single_tokens: HashMap, keywords: HashMap, numeric_literals: HashMap, identifiers: HashMap, identifier_escapes: HashSet, string_escapes: HashSet, quotes: HashMap, format_strings: HashMap, has_bit_strings: bool, has_hex_strings: bool, comments: HashMap>, var_single_tokens: HashSet, commands: HashSet, command_prefix_tokens: HashSet, heredoc_tag_is_identifier: bool, string_escapes_allowed_in_raw_strings: bool, ) -> Self { let to_char = |v: &String| { if v.len() == 1 { v.chars().next().unwrap() } else { panic!("Invalid char: {}", v) } }; let white_space_native: HashMap = white_space .into_iter() .map(|(k, v)| (to_char(&k), v)) .collect(); let single_tokens_native: HashMap = single_tokens .into_iter() .map(|(k, v)| (to_char(&k), v)) .collect(); let identifiers_native: HashMap = identifiers .iter() .map(|(k, v)| (to_char(k), to_char(v))) .collect(); let identifier_escapes_native: HashSet = identifier_escapes.iter().map(&to_char).collect(); let string_escapes_native: HashSet = string_escapes.iter().map(&to_char).collect(); let var_single_tokens_native: HashSet = var_single_tokens.iter().map(&to_char).collect(); TokenizerSettings { white_space: white_space_native, single_tokens: single_tokens_native, keywords, numeric_literals, identifiers: identifiers_native, identifier_escapes: identifier_escapes_native, string_escapes: string_escapes_native, quotes, format_strings, has_bit_strings, has_hex_strings, comments, var_single_tokens: var_single_tokens_native, commands, command_prefix_tokens, heredoc_tag_is_identifier, string_escapes_allowed_in_raw_strings, } } } #[derive(Clone, Debug)] #[pyclass] pub struct TokenizerDialectSettings { pub unescaped_sequences: HashMap, pub identifiers_can_start_with_digit: bool, } #[pymethods] impl TokenizerDialectSettings { #[new] pub fn new( unescaped_sequences: HashMap, identifiers_can_start_with_digit: bool, ) -> Self { TokenizerDialectSettings { unescaped_sequences, identifiers_can_start_with_digit, } } }