summaryrefslogtreecommitdiffstats
path: root/sqlglot/trie.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-06-22 18:53:34 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-06-22 18:53:34 +0000
commit8f88a01462641cbf930b3c43b780565d0fb7d37e (patch)
treee211588c29e6ce6d16fbbfd33d8cda63237c2e6e /sqlglot/trie.py
parentReleasing debian version 16.2.1-1. (diff)
downloadsqlglot-8f88a01462641cbf930b3c43b780565d0fb7d37e.tar.xz
sqlglot-8f88a01462641cbf930b3c43b780565d0fb7d37e.zip
Merging upstream version 16.4.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/trie.py')
-rw-r--r--sqlglot/trie.py43
1 files changed, 27 insertions, 16 deletions
diff --git a/sqlglot/trie.py b/sqlglot/trie.py
index eba91b9..59601ee 100644
--- a/sqlglot/trie.py
+++ b/sqlglot/trie.py
@@ -1,14 +1,21 @@
import typing as t
+from enum import Enum, auto
key = t.Sequence[t.Hashable]
+class TrieResult(Enum):
+ FAILED = auto()
+ PREFIX = auto()
+ EXISTS = auto()
+
+
def new_trie(keywords: t.Iterable[key], trie: t.Optional[t.Dict] = None) -> t.Dict:
"""
Creates a new trie out of a collection of keywords.
- The trie is represented as a sequence of nested dictionaries keyed by either single character
- strings, or by 0, which is used to designate that a keyword is in the trie.
+ The trie is represented as a sequence of nested dictionaries keyed by either single
+ character strings, or by 0, which is used to designate that a keyword is in the trie.
Example:
>>> new_trie(["bla", "foo", "blab"])
@@ -25,46 +32,50 @@ def new_trie(keywords: t.Iterable[key], trie: t.Optional[t.Dict] = None) -> t.Di
for key in keywords:
current = trie
-
for char in key:
current = current.setdefault(char, {})
+
current[0] = True
return trie
-def in_trie(trie: t.Dict, key: key) -> t.Tuple[int, t.Dict]:
+def in_trie(trie: t.Dict, key: key) -> t.Tuple[TrieResult, t.Dict]:
"""
Checks whether a key is in a trie.
Examples:
>>> in_trie(new_trie(["cat"]), "bob")
- (0, {'c': {'a': {'t': {0: True}}}})
+ (<TrieResult.FAILED: 1>, {'c': {'a': {'t': {0: True}}}})
>>> in_trie(new_trie(["cat"]), "ca")
- (1, {'t': {0: True}})
+ (<TrieResult.PREFIX: 2>, {'t': {0: True}})
>>> in_trie(new_trie(["cat"]), "cat")
- (2, {0: True})
+ (<TrieResult.EXISTS: 3>, {0: True})
Args:
- trie: the trie to be searched.
- key: the target key.
+ trie: The trie to be searched.
+ key: The target key.
Returns:
- A pair `(value, subtrie)`, where `subtrie` is the sub-trie we get at the point where the search stops, and `value`
- is either 0 (search was unsuccessful), 1 (`value` is a prefix of a keyword in `trie`) or 2 (`key is in `trie`).
+ A pair `(value, subtrie)`, where `subtrie` is the sub-trie we get at the point
+ where the search stops, and `value` is a TrieResult value that can be one of:
+
+ - TrieResult.FAILED: the search was unsuccessful
+ - TrieResult.PREFIX: `value` is a prefix of a keyword in `trie`
+ - TrieResult.EXISTS: `key` exists in `trie`
"""
if not key:
- return (0, trie)
+ return (TrieResult.FAILED, trie)
current = trie
-
for char in key:
if char not in current:
- return (0, current)
+ return (TrieResult.FAILED, current)
current = current[char]
if 0 in current:
- return (2, current)
- return (1, current)
+ return (TrieResult.EXISTS, current)
+
+ return (TrieResult.PREFIX, current)