1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
use pyo3::prelude::*;
use pyo3::types::{PyList, PyNone, PyString};
mod settings;
mod tokenizer;
mod trie;
pub use self::settings::{
TokenType, TokenTypeSettings, TokenizerDialectSettings, TokenizerSettings,
};
pub use self::tokenizer::Tokenizer;
#[derive(Debug)]
#[pyclass]
pub struct Token {
#[pyo3(get, name = "token_type_index")]
pub token_type: TokenType,
#[pyo3(get, set, name = "token_type")]
pub token_type_py: PyObject,
#[pyo3(get)]
pub text: Py<PyString>,
#[pyo3(get)]
pub line: usize,
#[pyo3(get)]
pub col: usize,
#[pyo3(get)]
pub start: usize,
#[pyo3(get)]
pub end: usize,
#[pyo3(get)]
pub comments: Py<PyList>,
}
impl Token {
pub fn new(
token_type: TokenType,
text: String,
line: usize,
col: usize,
start: usize,
end: usize,
comments: Vec<String>,
) -> Token {
Python::with_gil(|py| Token {
token_type,
token_type_py: PyNone::get_bound(py).into_py(py),
text: PyString::new_bound(py, &text).into_py(py),
line,
col,
start,
end,
comments: PyList::new_bound(py, &comments).into(),
})
}
pub fn append_comments(&self, comments: &mut Vec<String>) {
Python::with_gil(|py| {
let pylist = self.comments.bind(py);
for comment in comments.iter() {
if let Err(_) = pylist.append(comment) {
panic!("Failed to append comments to the Python list");
}
}
});
// Simulate `Vec::append`.
let _ = std::mem::replace(comments, Vec::new());
}
}
#[pymethods]
impl Token {
#[pyo3(name = "__repr__")]
fn python_repr(&self) -> PyResult<String> {
Python::with_gil(|py| {
Ok(format!(
"<Token token_type: {}, text: {}, line: {}, col: {}, start: {}, end: {}, comments: {}>",
self.token_type_py.bind(py).repr()?,
self.text.bind(py).repr()?,
self.line,
self.col,
self.start,
self.end,
self.comments.bind(py).repr()?,
))
})
}
}
#[pymodule]
fn sqlglotrs(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<Token>()?;
m.add_class::<TokenTypeSettings>()?;
m.add_class::<TokenizerSettings>()?;
m.add_class::<TokenizerDialectSettings>()?;
m.add_class::<Tokenizer>()?;
Ok(())
}
|