diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 12:08:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 12:08:18 +0000 |
commit | 5da14042f70711ea5cf66e034699730335462f66 (patch) | |
tree | 0f6354ccac934ed87a2d555f45be4c831cf92f4a /src/fluent-bit/lib/onigmo/tool/update-doc.py | |
parent | Releasing debian version 1.44.3-2. (diff) | |
download | netdata-5da14042f70711ea5cf66e034699730335462f66.tar.xz netdata-5da14042f70711ea5cf66e034699730335462f66.zip |
Merging upstream version 1.45.3+dfsg.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/fluent-bit/lib/onigmo/tool/update-doc.py')
-rwxr-xr-x | src/fluent-bit/lib/onigmo/tool/update-doc.py | 145 |
1 files changed, 145 insertions, 0 deletions
diff --git a/src/fluent-bit/lib/onigmo/tool/update-doc.py b/src/fluent-bit/lib/onigmo/tool/update-doc.py new file mode 100755 index 000000000..4126adff4 --- /dev/null +++ b/src/fluent-bit/lib/onigmo/tool/update-doc.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Usage: +# $ python update-doc.py UCD_DIR > ../doc/UnicodeProps.txt + +from __future__ import print_function +import sys +import os +import re +import datetime + +onig_ver = "6.2.2" +ucddir = "." + +def print_list(arr, title): + print() + print("*", title) + for i in arr: + print(" " + i) + +def output_header(): + d = datetime.date.today() + print("Onigmo (Oniguruma-mod) Unicode Properties Version %s %04d/%02d/%02d" + % (onig_ver, d.year, d.month, d.day)) + + posix_brackets = [ + "Alpha", "Blank", "Cntrl", "Digit", "Graph", "Lower", "Print", + "Punct", "Space", "Upper", "XDigit", "Word", "Alnum", "ASCII", + "XPosixPunct" + ] + specials = ["Any", "Assigned"] + + print_list(posix_brackets, "POSIX brackets") + print_list(specials, "Special") + return set(posix_brackets) | set(specials) + +def output_categories(): + categories = set(["LC", "Cn"]) + pattern = re.compile('^.*?;.*?;(..);') + with open(ucddir + os.sep + 'UnicodeData.txt', 'r') as f: + for line in f: + res = pattern.match(line) + if not res: + continue + categories.add(res.group(1)) + categories.add(res.group(1)[0]) # Major category + print_list(sorted(categories), "Major and General Categories") + return categories + +def output_scripts(filename, title, add=[]): + scripts = set(add) + pattern = re.compile('^[0-9a-fA-F]+(?:\.\.[0-9a-fA-F]+)? *; (\w+) +# ') + with open(filename, 'r') as f: + for line in f: + res = pattern.match(line) + if not res: + continue + scripts.add(res.group(1)) + print_list(sorted(scripts), title) + return scripts + +def output_aliases(scripts): + aliases = set() + pattern = re.compile('^(\w+) *; (\w+)') + with open(ucddir + os.sep + 'PropertyAliases.txt', 'r') as f: + for line in f: + res = pattern.match(line) + if not res: + continue + if (res.group(2) in scripts) and (res.group(1) not in scripts): + aliases.add(res.group(1)) + print_list(sorted(aliases), "PropertyAliases") + return aliases + +def output_valuealiases(scripts): + scripts |= set(["cntrl", "digit", "punct"]) # exclude them + aliases = list() + aliases_sc = list() + pattern = re.compile('^(gc|sc) ; (\w+) *; (\w+)(?: *; (\w+))?') + with open(ucddir + os.sep + 'PropertyValueAliases.txt', 'r') as f: + for line in f: + res = pattern.match(line) + if not res: + continue + if (res.group(1) == "gc"): + if res.group(2) in scripts: + if res.group(3) not in scripts: + aliases.append(res.group(3)) + if res.group(4) and (res.group(4) not in scripts): + aliases.append(res.group(4)) + else: + if res.group(3) in scripts: + if res.group(2) not in scripts: + aliases_sc.append(res.group(2)) + if res.group(4) and (res.group(4) not in scripts): + aliases_sc.append(res.group(4)) + + print_list(aliases, "PropertyValueAliases (General_Category)") + print_list(aliases_sc, "PropertyValueAliases (Script)") + return set(aliases) | set(aliases_sc) + +def output_ages(): + ages = set() + pattern = re.compile('^[\dA-F.]+ *; ([\d.]+)') + with open(ucddir + os.sep + 'DerivedAge.txt', 'r') as f: + for line in f: + res = pattern.match(line) + if not res: + continue + ages.add("Age=" + res.group(1)) + print_list(sorted(ages), "DerivedAges") + return ages + +def output_blocks(): + blocks = list() + pattern = re.compile('^[\dA-F.]+ *; ([-\w ]+)') + with open(ucddir + os.sep + 'Blocks.txt', 'r') as f: + for line in f: + res = pattern.match(line) + if not res: + continue + blocks.append("In_" + re.sub('\W', '_', res.group(1))) + blocks.append("In_No_Block") + print_list(blocks, "Blocks") + return set(blocks) + +def main(): + global ucddir + if len(sys.argv) > 1: + ucddir = sys.argv[1] + scripts = set() + scripts |= output_header() + scripts |= output_categories() + scripts |= output_scripts(ucddir + os.sep + 'Scripts.txt', 'Scripts', ["Unknown"]) + scripts |= output_scripts(ucddir + os.sep + 'DerivedCoreProperties.txt', 'DerivedCoreProperties') + scripts |= output_scripts(ucddir + os.sep + 'PropList.txt', 'PropList') + scripts |= output_scripts(ucddir + os.sep + 'emoji-data.txt', 'Emoji') + output_aliases(scripts) + output_valuealiases(scripts) + output_ages() + output_blocks() + +if __name__ == '__main__': + main() |