Adding upstream version 3.1.0.upstream/3.1.0

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2021-02-08 10:31:05 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2021-02-08 10:31:05 +0000
commit: 6884720fae8a2622b14e93d9e35ca5fcc2283b40 (patch)
tree: df6f736bb623cdd7932bbe2256101a6ac4ef7f35 /pgcli/packages/prioritization.py
parent: Initial commit. (diff)
download: pgcli-6884720fae8a2622b14e93d9e35ca5fcc2283b40.tar.xz
pgcli-6884720fae8a2622b14e93d9e35ca5fcc2283b40.zip
1 files changed, 51 insertions, 0 deletions
diff --git a/pgcli/packages/prioritization.py b/pgcli/packages/prioritization.py
new file mode 100644
index 0000000..e92dcbb
--- /dev/null
+++ b/pgcli/packages/prioritization.py
@@ -0,0 +1,51 @@
+import re
+import sqlparse
+from sqlparse.tokens import Name
+from collections import defaultdict
+from .pgliterals.main import get_literals
+
+
+white_space_regex = re.compile("\\s+", re.MULTILINE)
+
+
+def _compile_regex(keyword):
+    # Surround the keyword with word boundaries and replace interior whitespace
+    # with whitespace wildcards
+    pattern = "\\b" + white_space_regex.sub(r"\\s+", keyword) + "\\b"
+    return re.compile(pattern, re.MULTILINE | re.IGNORECASE)
+
+
+keywords = get_literals("keywords")
+keyword_regexs = dict((kw, _compile_regex(kw)) for kw in keywords)
+
+
+class PrevalenceCounter(object):
+    def __init__(self):
+        self.keyword_counts = defaultdict(int)
+        self.name_counts = defaultdict(int)
+
+    def update(self, text):
+        self.update_keywords(text)
+        self.update_names(text)
+
+    def update_names(self, text):
+        for parsed in sqlparse.parse(text):
+            for token in parsed.flatten():
+                if token.ttype in Name:
+                    self.name_counts[token.value] += 1
+
+    def clear_names(self):
+        self.name_counts = defaultdict(int)
+
+    def update_keywords(self, text):
+        # Count keywords. Can't rely for sqlparse for this, because it's
+        # database agnostic
+        for keyword, regex in keyword_regexs.items():
+            for _ in regex.finditer(text):
+                self.keyword_counts[keyword] += 1
+
+    def keyword_count(self, keyword):
+        return self.keyword_counts[keyword]
+
+    def name_count(self, name):
+        return self.name_counts[name]
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2021-02-08 10:31:05 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2021-02-08 10:31:05 +0000
commit	6884720fae8a2622b14e93d9e35ca5fcc2283b40 (patch)
tree	df6f736bb623cdd7932bbe2256101a6ac4ef7f35 /pgcli/packages/prioritization.py
parent	Initial commit. (diff)
download	pgcli-6884720fae8a2622b14e93d9e35ca5fcc2283b40.tar.xz pgcli-6884720fae8a2622b14e93d9e35ca5fcc2283b40.zip