From 8f88a01462641cbf930b3c43b780565d0fb7d37e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 22 Jun 2023 20:53:34 +0200 Subject: Merging upstream version 16.4.0. Signed-off-by: Daniel Baumann --- sqlglot/trie.py | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) (limited to 'sqlglot/trie.py') diff --git a/sqlglot/trie.py b/sqlglot/trie.py index eba91b9..59601ee 100644 --- a/sqlglot/trie.py +++ b/sqlglot/trie.py @@ -1,14 +1,21 @@ import typing as t +from enum import Enum, auto key = t.Sequence[t.Hashable] +class TrieResult(Enum): + FAILED = auto() + PREFIX = auto() + EXISTS = auto() + + def new_trie(keywords: t.Iterable[key], trie: t.Optional[t.Dict] = None) -> t.Dict: """ Creates a new trie out of a collection of keywords. - The trie is represented as a sequence of nested dictionaries keyed by either single character - strings, or by 0, which is used to designate that a keyword is in the trie. + The trie is represented as a sequence of nested dictionaries keyed by either single + character strings, or by 0, which is used to designate that a keyword is in the trie. Example: >>> new_trie(["bla", "foo", "blab"]) @@ -25,46 +32,50 @@ def new_trie(keywords: t.Iterable[key], trie: t.Optional[t.Dict] = None) -> t.Di for key in keywords: current = trie - for char in key: current = current.setdefault(char, {}) + current[0] = True return trie -def in_trie(trie: t.Dict, key: key) -> t.Tuple[int, t.Dict]: +def in_trie(trie: t.Dict, key: key) -> t.Tuple[TrieResult, t.Dict]: """ Checks whether a key is in a trie. Examples: >>> in_trie(new_trie(["cat"]), "bob") - (0, {'c': {'a': {'t': {0: True}}}}) + (, {'c': {'a': {'t': {0: True}}}}) >>> in_trie(new_trie(["cat"]), "ca") - (1, {'t': {0: True}}) + (, {'t': {0: True}}) >>> in_trie(new_trie(["cat"]), "cat") - (2, {0: True}) + (, {0: True}) Args: - trie: the trie to be searched. - key: the target key. + trie: The trie to be searched. + key: The target key. Returns: - A pair `(value, subtrie)`, where `subtrie` is the sub-trie we get at the point where the search stops, and `value` - is either 0 (search was unsuccessful), 1 (`value` is a prefix of a keyword in `trie`) or 2 (`key is in `trie`). + A pair `(value, subtrie)`, where `subtrie` is the sub-trie we get at the point + where the search stops, and `value` is a TrieResult value that can be one of: + + - TrieResult.FAILED: the search was unsuccessful + - TrieResult.PREFIX: `value` is a prefix of a keyword in `trie` + - TrieResult.EXISTS: `key` exists in `trie` """ if not key: - return (0, trie) + return (TrieResult.FAILED, trie) current = trie - for char in key: if char not in current: - return (0, current) + return (TrieResult.FAILED, current) current = current[char] if 0 in current: - return (2, current) - return (1, current) + return (TrieResult.EXISTS, current) + + return (TrieResult.PREFIX, current) -- cgit v1.2.3