diff options
Diffstat (limited to 'fluent-bit/lib/onigmo/tool/update-doc.py')
-rwxr-xr-x | fluent-bit/lib/onigmo/tool/update-doc.py | 145 |
1 files changed, 0 insertions, 145 deletions
diff --git a/fluent-bit/lib/onigmo/tool/update-doc.py b/fluent-bit/lib/onigmo/tool/update-doc.py deleted file mode 100755 index 4126adff4..000000000 --- a/fluent-bit/lib/onigmo/tool/update-doc.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -# Usage: -# $ python update-doc.py UCD_DIR > ../doc/UnicodeProps.txt - -from __future__ import print_function -import sys -import os -import re -import datetime - -onig_ver = "6.2.2" -ucddir = "." - -def print_list(arr, title): - print() - print("*", title) - for i in arr: - print(" " + i) - -def output_header(): - d = datetime.date.today() - print("Onigmo (Oniguruma-mod) Unicode Properties Version %s %04d/%02d/%02d" - % (onig_ver, d.year, d.month, d.day)) - - posix_brackets = [ - "Alpha", "Blank", "Cntrl", "Digit", "Graph", "Lower", "Print", - "Punct", "Space", "Upper", "XDigit", "Word", "Alnum", "ASCII", - "XPosixPunct" - ] - specials = ["Any", "Assigned"] - - print_list(posix_brackets, "POSIX brackets") - print_list(specials, "Special") - return set(posix_brackets) | set(specials) - -def output_categories(): - categories = set(["LC", "Cn"]) - pattern = re.compile('^.*?;.*?;(..);') - with open(ucddir + os.sep + 'UnicodeData.txt', 'r') as f: - for line in f: - res = pattern.match(line) - if not res: - continue - categories.add(res.group(1)) - categories.add(res.group(1)[0]) # Major category - print_list(sorted(categories), "Major and General Categories") - return categories - -def output_scripts(filename, title, add=[]): - scripts = set(add) - pattern = re.compile('^[0-9a-fA-F]+(?:\.\.[0-9a-fA-F]+)? *; (\w+) +# ') - with open(filename, 'r') as f: - for line in f: - res = pattern.match(line) - if not res: - continue - scripts.add(res.group(1)) - print_list(sorted(scripts), title) - return scripts - -def output_aliases(scripts): - aliases = set() - pattern = re.compile('^(\w+) *; (\w+)') - with open(ucddir + os.sep + 'PropertyAliases.txt', 'r') as f: - for line in f: - res = pattern.match(line) - if not res: - continue - if (res.group(2) in scripts) and (res.group(1) not in scripts): - aliases.add(res.group(1)) - print_list(sorted(aliases), "PropertyAliases") - return aliases - -def output_valuealiases(scripts): - scripts |= set(["cntrl", "digit", "punct"]) # exclude them - aliases = list() - aliases_sc = list() - pattern = re.compile('^(gc|sc) ; (\w+) *; (\w+)(?: *; (\w+))?') - with open(ucddir + os.sep + 'PropertyValueAliases.txt', 'r') as f: - for line in f: - res = pattern.match(line) - if not res: - continue - if (res.group(1) == "gc"): - if res.group(2) in scripts: - if res.group(3) not in scripts: - aliases.append(res.group(3)) - if res.group(4) and (res.group(4) not in scripts): - aliases.append(res.group(4)) - else: - if res.group(3) in scripts: - if res.group(2) not in scripts: - aliases_sc.append(res.group(2)) - if res.group(4) and (res.group(4) not in scripts): - aliases_sc.append(res.group(4)) - - print_list(aliases, "PropertyValueAliases (General_Category)") - print_list(aliases_sc, "PropertyValueAliases (Script)") - return set(aliases) | set(aliases_sc) - -def output_ages(): - ages = set() - pattern = re.compile('^[\dA-F.]+ *; ([\d.]+)') - with open(ucddir + os.sep + 'DerivedAge.txt', 'r') as f: - for line in f: - res = pattern.match(line) - if not res: - continue - ages.add("Age=" + res.group(1)) - print_list(sorted(ages), "DerivedAges") - return ages - -def output_blocks(): - blocks = list() - pattern = re.compile('^[\dA-F.]+ *; ([-\w ]+)') - with open(ucddir + os.sep + 'Blocks.txt', 'r') as f: - for line in f: - res = pattern.match(line) - if not res: - continue - blocks.append("In_" + re.sub('\W', '_', res.group(1))) - blocks.append("In_No_Block") - print_list(blocks, "Blocks") - return set(blocks) - -def main(): - global ucddir - if len(sys.argv) > 1: - ucddir = sys.argv[1] - scripts = set() - scripts |= output_header() - scripts |= output_categories() - scripts |= output_scripts(ucddir + os.sep + 'Scripts.txt', 'Scripts', ["Unknown"]) - scripts |= output_scripts(ucddir + os.sep + 'DerivedCoreProperties.txt', 'DerivedCoreProperties') - scripts |= output_scripts(ucddir + os.sep + 'PropList.txt', 'PropList') - scripts |= output_scripts(ucddir + os.sep + 'emoji-data.txt', 'Emoji') - output_aliases(scripts) - output_valuealiases(scripts) - output_ages() - output_blocks() - -if __name__ == '__main__': - main() |