diff options
Diffstat (limited to 'sqlglot/trie.py')
-rw-r--r-- | sqlglot/trie.py | 48 |
1 files changed, 45 insertions, 3 deletions
diff --git a/sqlglot/trie.py b/sqlglot/trie.py index a234107..fa2aaf1 100644 --- a/sqlglot/trie.py +++ b/sqlglot/trie.py @@ -1,5 +1,26 @@ -def new_trie(keywords): - trie = {} +import typing as t + +key = t.Sequence[t.Hashable] + + +def new_trie(keywords: t.Iterable[key]) -> t.Dict: + """ + Creates a new trie out of a collection of keywords. + + The trie is represented as a sequence of nested dictionaries keyed by either single character + strings, or by 0, which is used to designate that a keyword is in the trie. + + Example: + >>> new_trie(["bla", "foo", "blab"]) + {'b': {'l': {'a': {0: True, 'b': {0: True}}}}, 'f': {'o': {'o': {0: True}}}} + + Args: + keywords: the keywords to create the trie from. + + Returns: + The trie corresponding to `keywords`. + """ + trie: t.Dict = {} for key in keywords: current = trie @@ -11,7 +32,28 @@ def new_trie(keywords): return trie -def in_trie(trie, key): +def in_trie(trie: t.Dict, key: key) -> t.Tuple[int, t.Dict]: + """ + Checks whether a key is in a trie. + + Examples: + >>> in_trie(new_trie(["cat"]), "bob") + (0, {'c': {'a': {'t': {0: True}}}}) + + >>> in_trie(new_trie(["cat"]), "ca") + (1, {'t': {0: True}}) + + >>> in_trie(new_trie(["cat"]), "cat") + (2, {0: True}) + + Args: + trie: the trie to be searched. + key: the target key. + + Returns: + A pair `(value, subtrie)`, where `subtrie` is the sub-trie we get at the point where the search stops, and `value` + is either 0 (search was unsuccessfull), 1 (`value` is a prefix of a keyword in `trie`) or 2 (`key is in `trie`). + """ if not key: return (0, trie) |