diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /intl/icu/source/python/icutools/databuilder | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/icu/source/python/icutools/databuilder')
16 files changed, 2599 insertions, 0 deletions
diff --git a/intl/icu/source/python/icutools/databuilder/__init__.py b/intl/icu/source/python/icutools/databuilder/__init__.py new file mode 100644 index 0000000000..be936166e7 --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/__init__.py @@ -0,0 +1,16 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +from collections import namedtuple + +LocalFile = namedtuple("LocalFile", ["dirname", "filename"]) +SrcFile = namedtuple("SrcFile", ["filename"]) +InFile = namedtuple("InFile", ["filename"]) +TmpFile = namedtuple("TmpFile", ["filename"]) +OutFile = namedtuple("OutFile", ["filename"]) +PkgFile = namedtuple("PkgFile", ["filename"]) + +IcuTool = namedtuple("IcuTool", ["name"]) +SystemTool = namedtuple("SystemTool", ["name"]) + +DepTarget = namedtuple("DepTarget", ["name"]) diff --git a/intl/icu/source/python/icutools/databuilder/__main__.py b/intl/icu/source/python/icutools/databuilder/__main__.py new file mode 100644 index 0000000000..a6a387d9ad --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/__main__.py @@ -0,0 +1,360 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +# Python 2/3 Compatibility (ICU-20299) +# TODO(ICU-20301): Remove this. +from __future__ import print_function + +import argparse +import glob as pyglob +import io as pyio +import json +import os +import sys + +from . import * +from .comment_stripper import CommentStripper +from .request_types import CopyRequest +from .renderers import makefile, common_exec +from . import filtration, utils + +flag_parser = argparse.ArgumentParser( + description = """Generates rules for building ICU binary data files from text +and other input files in source control. + +Use the --mode option to declare how to execute those rules, either exporting +the rules to a Makefile or spawning child processes to run them immediately: + + --mode=gnumake prints a Makefile to standard out. + --mode=unix-exec spawns child processes in a Unix-like environment. + --mode=windows-exec spawns child processes in a Windows-like environment. + +Tips for --mode=unix-exec +========================= + +Create two empty directories for out_dir and tmp_dir. They will get filled +with a lot of intermediate files. + +Set LD_LIBRARY_PATH to include the lib directory. e.g., from icu4c/source: + + $ LD_LIBRARY_PATH=lib PYTHONPATH=python python3 -m icutools.databuilder ... + +Once icutools.databuilder finishes, you have compiled the data, but you have +not packaged it into a .dat or .so file. This is done by the separate pkgdata +tool in bin. Read the docs of pkgdata: + + $ LD_LIBRARY_PATH=lib ./bin/pkgdata --help + +Example command line to call pkgdata: + + $ LD_LIBRARY_PATH=lib ./bin/pkgdata -m common -p icudt63l -c \\ + -O data/icupkg.inc -s $OUTDIR -d $TMPDIR $TMPDIR/icudata.lst + +where $OUTDIR and $TMPDIR are your out and tmp directories, respectively. +The above command will create icudt63l.dat in the tmpdir. + +Command-Line Arguments +====================== +""", + formatter_class = argparse.RawDescriptionHelpFormatter +) + +arg_group_required = flag_parser.add_argument_group("required arguments") +arg_group_required.add_argument( + "--mode", + help = "What to do with the generated rules.", + choices = ["gnumake", "unix-exec", "windows-exec", "bazel-exec"], + required = True +) + +flag_parser.add_argument( + "--src_dir", + help = "Path to data source folder (icu4c/source/data).", + default = "." +) +flag_parser.add_argument( + "--filter_file", + metavar = "PATH", + help = "Path to an ICU data filter JSON file.", + default = None +) +flag_parser.add_argument( + "--include_uni_core_data", + help = "Include the full Unicode core data in the dat file.", + default = False, + action = "store_true" +) +flag_parser.add_argument( + "--seqmode", + help = "Whether to optimize rules to be run sequentially (fewer threads) or in parallel (many threads). Defaults to 'sequential', which is better for unix-exec and windows-exec modes. 'parallel' is often better for massively parallel build systems.", + choices = ["sequential", "parallel"], + default = "sequential" +) +flag_parser.add_argument( + "--verbose", + help = "Print more verbose output (default false).", + default = False, + action = "store_true" +) + +arg_group_exec = flag_parser.add_argument_group("arguments for unix-exec and windows-exec modes") +arg_group_exec.add_argument( + "--out_dir", + help = "Path to where to save output data files.", + default = "icudata" +) +arg_group_exec.add_argument( + "--tmp_dir", + help = "Path to where to save temporary files.", + default = "icutmp" +) +arg_group_exec.add_argument( + "--tool_dir", + help = "Path to where to find binary tools (genrb, etc).", + default = "../bin" +) +arg_group_exec.add_argument( + "--tool_cfg", + help = "The build configuration of the tools. Used in 'windows-exec' mode only.", + default = "x86/Debug" +) + + +class Config(object): + + def __init__(self, args): + # Process arguments + self.max_parallel = (args.seqmode == "parallel") + + # Boolean: Whether to include core Unicode data files in the .dat file + self.include_uni_core_data = args.include_uni_core_data + + # Default fields before processing filter file + self.filters_json_data = {} + self.filter_dir = "ERROR_NO_FILTER_FILE" + + # Process filter file + if args.filter_file: + try: + with open(args.filter_file, "r") as f: + print("Note: Applying filters from %s." % args.filter_file, file=sys.stderr) + self._parse_filter_file(f) + except IOError: + print("Error: Could not read filter file %s." % args.filter_file, file=sys.stderr) + exit(1) + self.filter_dir = os.path.abspath(os.path.dirname(args.filter_file)) + + # Either "unihan" or "implicithan" + self.coll_han_type = "unihan" + if "collationUCAData" in self.filters_json_data: + self.coll_han_type = self.filters_json_data["collationUCAData"] + + # Either "additive" or "subtractive" + self.strategy = "subtractive" + if "strategy" in self.filters_json_data: + self.strategy = self.filters_json_data["strategy"] + + # True or False (could be extended later to support enum/list) + self.use_pool_bundle = True + if "usePoolBundle" in self.filters_json_data: + self.use_pool_bundle = self.filters_json_data["usePoolBundle"] + + def _parse_filter_file(self, f): + # Use the Hjson parser if it is available; otherwise, use vanilla JSON. + try: + import hjson + self.filters_json_data = hjson.load(f) + except ImportError: + self.filters_json_data = json.load(CommentStripper(f)) + + # Optionally pre-validate the JSON schema before further processing. + # Some schema errors will be caught later, but this step ensures + # maximal validity. + try: + import jsonschema + schema_path = os.path.join(os.path.dirname(__file__), "filtration_schema.json") + with open(schema_path) as schema_f: + schema = json.load(CommentStripper(schema_f)) + validator = jsonschema.Draft4Validator(schema) + for error in validator.iter_errors(self.filters_json_data, schema): + print("WARNING: ICU data filter JSON file:", error.message, + "at", "".join( + "[%d]" % part if isinstance(part, int) else ".%s" % part + for part in error.absolute_path + ), + file=sys.stderr) + except ImportError: + print("Tip: to validate your filter file, install the Pip package 'jsonschema'", file=sys.stderr) + pass + + +def add_copy_input_requests(requests, config, common_vars): + files_to_copy = set() + for request in requests: + request_files = request.all_input_files() + # Also add known dependency txt files as possible inputs. + # This is required for translit rule files. + if hasattr(request, "dep_targets"): + request_files += [ + f for f in request.dep_targets if isinstance(f, InFile) + ] + for f in request_files: + if isinstance(f, InFile): + files_to_copy.add(f) + + result = [] + id = 0 + + json_data = config.filters_json_data["fileReplacements"] + dirname = json_data["directory"] + for directive in json_data["replacements"]: + if type(directive) == str: + input_file = LocalFile(dirname, directive) + output_file = InFile(directive) + else: + input_file = LocalFile(dirname, directive["src"]) + output_file = InFile(directive["dest"]) + result += [ + CopyRequest( + name = "input_copy_%d" % id, + input_file = input_file, + output_file = output_file + ) + ] + files_to_copy.remove(output_file) + id += 1 + + for f in files_to_copy: + result += [ + CopyRequest( + name = "input_copy_%d" % id, + input_file = SrcFile(f.filename), + output_file = f + ) + ] + id += 1 + + result += requests + return result + + +class IO(object): + """I/O operations required when computing the build actions""" + + def __init__(self, src_dir): + self.src_dir = src_dir + + def glob(self, pattern): + absolute_paths = pyglob.glob(os.path.join(self.src_dir, pattern)) + # Strip off the absolute path suffix so we are left with a relative path. + relative_paths = [v[len(self.src_dir)+1:] for v in sorted(absolute_paths)] + # For the purposes of icutools.databuilder, force Unix-style directory separators. + # Within the Python code, including BUILDRULES.py and user-provided config files, + # directory separators are normalized to '/', including on Windows platforms. + return [v.replace("\\", "/") for v in relative_paths] + + def read_locale_deps(self, tree): + return self._read_json("%s/LOCALE_DEPS.json" % tree) + + def _read_json(self, filename): + with pyio.open(os.path.join(self.src_dir, filename), "r", encoding="utf-8-sig") as f: + return json.load(CommentStripper(f)) + + +def main(argv): + args = flag_parser.parse_args(argv) + config = Config(args) + + if args.mode == "gnumake": + makefile_vars = { + "SRC_DIR": "$(srcdir)", + "IN_DIR": "$(srcdir)", + "INDEX_NAME": "res_index" + } + makefile_env = ["ICUDATA_CHAR", "OUT_DIR", "TMP_DIR"] + common = { + key: "$(%s)" % key + for key in list(makefile_vars.keys()) + makefile_env + } + common["FILTERS_DIR"] = config.filter_dir + common["CWD_DIR"] = os.getcwd() + else: + makefile_vars = None + common = { + "SRC_DIR": args.src_dir, + "IN_DIR": args.src_dir, + "OUT_DIR": args.out_dir, + "TMP_DIR": args.tmp_dir, + "FILTERS_DIR": config.filter_dir, + "CWD_DIR": os.getcwd(), + "INDEX_NAME": "res_index", + # TODO: Pull this from configure script: + "ICUDATA_CHAR": "l" + } + + # Automatically load BUILDRULES from the src_dir + sys.path.append(args.src_dir) + try: + import BUILDRULES + except ImportError: + print("Cannot find BUILDRULES! Did you set your --src_dir?", file=sys.stderr) + sys.exit(1) + + io = IO(args.src_dir) + requests = BUILDRULES.generate(config, io, common) + + if "fileReplacements" in config.filters_json_data: + tmp_in_dir = "{TMP_DIR}/in".format(**common) + if makefile_vars: + makefile_vars["IN_DIR"] = tmp_in_dir + else: + common["IN_DIR"] = tmp_in_dir + requests = add_copy_input_requests(requests, config, common) + + requests = filtration.apply_filters(requests, config, io) + requests = utils.flatten_requests(requests, config, common) + + build_dirs = utils.compute_directories(requests) + + if args.mode == "gnumake": + print(makefile.get_gnumake_rules( + build_dirs, + requests, + makefile_vars, + common_vars = common + )) + elif args.mode == "windows-exec": + return common_exec.run( + platform = "windows", + build_dirs = build_dirs, + requests = requests, + common_vars = common, + tool_dir = args.tool_dir, + tool_cfg = args.tool_cfg, + verbose = args.verbose, + ) + elif args.mode == "unix-exec": + return common_exec.run( + platform = "unix", + build_dirs = build_dirs, + requests = requests, + common_vars = common, + tool_dir = args.tool_dir, + verbose = args.verbose, + ) + elif args.mode == "bazel-exec": + return common_exec.run( + platform = "bazel", + build_dirs = build_dirs, + requests = requests, + common_vars = common, + tool_dir = args.tool_dir, + verbose = args.verbose, + ) + else: + print("Mode not supported: %s" % args.mode) + return 1 + return 0 + +if __name__ == "__main__": + exit(main(sys.argv[1:])) diff --git a/intl/icu/source/python/icutools/databuilder/comment_stripper.py b/intl/icu/source/python/icutools/databuilder/comment_stripper.py new file mode 100644 index 0000000000..4001f2f675 --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/comment_stripper.py @@ -0,0 +1,51 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +import io + +class CommentStripper(object): + """Removes lines starting with "//" from a file stream.""" + + def __init__(self, f): + self.f = f + self.state = 0 + + def read(self, size=-1): + bytes = self.f.read(size) + # TODO: Do we need to read more bytes if comments were stripped + # in order to obey the size request? + return "".join(self._strip_comments(bytes)) + + def _strip_comments(self, bytes): + for byte in bytes: + if self.state == 0: + # state 0: start of a line + if byte == "/": + self.state = 1 + elif byte == "\n": + self.state = 0 + yield byte + else: + self.state = 2 + yield byte + elif self.state == 1: + # state 1: read a single '/' + if byte == "/": + self.state = 3 + elif byte == "\n": + self.state = 0 + yield "/" # the one that was skipped + yield "\n" + else: + self.state = 2 + yield "/" # the one that was skipped + yield byte + elif self.state == 2: + # state 2: middle of a line, no comment + if byte == "\n": + self.state = 0 + yield byte + elif self.state == 3: + # state 3: inside a comment + if byte == "\n": + self.state = 0 diff --git a/intl/icu/source/python/icutools/databuilder/filtration.py b/intl/icu/source/python/icutools/databuilder/filtration.py new file mode 100644 index 0000000000..554013ac98 --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/filtration.py @@ -0,0 +1,412 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +# Python 2/3 Compatibility (ICU-20299) +# TODO(ICU-20301): Remove this. +from __future__ import print_function + +from abc import abstractmethod +from collections import defaultdict +import re +import sys + +from . import * +from . import utils +from .request_types import * + + +# Note: for this to be a proper abstract class, it should extend abc.ABC. +# There is no nice way to do this that works in both Python 2 and 3. +# TODO(ICU-20301): Make this inherit from abc.ABC. +class Filter(object): + @staticmethod + def create_from_json(json_data, io): + assert io != None + if "filterType" in json_data: + filter_type = json_data["filterType"] + else: + filter_type = "file-stem" + + if filter_type == "file-stem": + return FileStemFilter(json_data) + elif filter_type == "language": + return LanguageFilter(json_data) + elif filter_type == "regex": + return RegexFilter(json_data) + elif filter_type == "exclude": + return ExclusionFilter() + elif filter_type == "union": + return UnionFilter(json_data, io) + elif filter_type == "locale": + return LocaleFilter(json_data, io) + else: + print("Error: Unknown filterType option: %s" % filter_type, file=sys.stderr) + return None + + def filter(self, request): + if not request.apply_file_filter(self): + return [] + for file in request.all_input_files(): + assert self.match(file) + return [request] + + @staticmethod + def _file_to_file_stem(file): + start = file.filename.rfind("/") + limit = file.filename.rfind(".") + return file.filename[start+1:limit] + + @staticmethod + def _file_to_subdir(file): + limit = file.filename.rfind("/") + if limit == -1: + return None + return file.filename[:limit] + + @abstractmethod + def match(self, file): + pass + + +class InclusionFilter(Filter): + def match(self, file): + return True + + +class ExclusionFilter(Filter): + def match(self, file): + return False + + +class WhitelistBlacklistFilter(Filter): + def __init__(self, json_data): + if "whitelist" in json_data: + self.is_whitelist = True + self.whitelist = json_data["whitelist"] + else: + assert "blacklist" in json_data, "Need either whitelist or blacklist: %s" % str(json_data) + self.is_whitelist = False + self.blacklist = json_data["blacklist"] + + def match(self, file): + file_stem = self._file_to_file_stem(file) + return self._should_include(file_stem) + + @abstractmethod + def _should_include(self, file_stem): + pass + + +class FileStemFilter(WhitelistBlacklistFilter): + def _should_include(self, file_stem): + if self.is_whitelist: + return file_stem in self.whitelist + else: + return file_stem not in self.blacklist + + +class LanguageFilter(WhitelistBlacklistFilter): + def _should_include(self, file_stem): + language = file_stem.split("_")[0] + if language == "root": + # Always include root.txt + return True + if self.is_whitelist: + return language in self.whitelist + else: + return language not in self.blacklist + + +class RegexFilter(WhitelistBlacklistFilter): + def __init__(self, *args): + # TODO(ICU-20301): Change this to: super().__init__(*args) + super(RegexFilter, self).__init__(*args) + if self.is_whitelist: + self.whitelist = [re.compile(pat) for pat in self.whitelist] + else: + self.blacklist = [re.compile(pat) for pat in self.blacklist] + + def _should_include(self, file_stem): + if self.is_whitelist: + for pattern in self.whitelist: + if pattern.match(file_stem): + return True + return False + else: + for pattern in self.blacklist: + if pattern.match(file_stem): + return False + return True + + +class UnionFilter(Filter): + def __init__(self, json_data, io): + # Collect the sub-filters. + self.sub_filters = [] + for filter_json in json_data["unionOf"]: + self.sub_filters.append(Filter.create_from_json(filter_json, io)) + + def match(self, file): + """Match iff any of the sub-filters match.""" + for filter in self.sub_filters: + if filter.match(file): + return True + return False + + +LANGUAGE_SCRIPT_REGEX = re.compile(r"^([a-z]{2,3})_[A-Z][a-z]{3}$") +LANGUAGE_ONLY_REGEX = re.compile(r"^[a-z]{2,3}$") + +class LocaleFilter(Filter): + def __init__(self, json_data, io): + self.locales_requested = list(json_data["whitelist"]) + self.include_children = json_data.get("includeChildren", True) + self.include_scripts = json_data.get("includeScripts", False) + + # Load the dependency graph from disk + self.dependency_data_by_tree = { + tree: io.read_locale_deps(tree) + for tree in utils.ALL_TREES + } + + def match(self, file): + tree = self._file_to_subdir(file) + assert tree is not None + locale = self._file_to_file_stem(file) + + # A locale is *required* if it is *requested* or an ancestor of a + # *requested* locale. + if locale in self._locales_required(tree): + return True + + # Resolve include_scripts and include_children. + return self._match_recursive(locale, tree) + + def _match_recursive(self, locale, tree): + # Base case: return True if we reached a *requested* locale, + # or False if we ascend out of the locale tree. + if locale is None: + return False + if locale in self.locales_requested: + return True + + # Check for alternative scripts. + # This causes sr_Latn to check sr instead of going directly to root. + if self.include_scripts: + match = LANGUAGE_SCRIPT_REGEX.match(locale) + if match and self._match_recursive(match.group(1), tree): + return True + + # Check if we are a descendant of a *requested* locale. + if self.include_children: + parent = self._get_parent_locale(locale, tree) + if self._match_recursive(parent, tree): + return True + + # No matches. + return False + + def _get_parent_locale(self, locale, tree): + """Gets the parent locale in the given tree, according to dependency data.""" + dependency_data = self.dependency_data_by_tree[tree] + if "parents" in dependency_data and locale in dependency_data["parents"]: + return dependency_data["parents"][locale] + if "aliases" in dependency_data and locale in dependency_data["aliases"]: + return dependency_data["aliases"][locale] + if LANGUAGE_ONLY_REGEX.match(locale): + return "root" + i = locale.rfind("_") + if i < 0: + assert locale == "root", "Invalid locale: %s/%s" % (tree, locale) + return None + return locale[:i] + + def _locales_required(self, tree): + """Returns a generator of all required locales in the given tree.""" + for locale in self.locales_requested: + while locale is not None: + yield locale + locale = self._get_parent_locale(locale, tree) + + +def apply_filters(requests, config, io): + """Runs the filters and returns a new list of requests.""" + requests = _apply_file_filters(requests, config, io) + requests = _apply_resource_filters(requests, config, io) + return requests + + +def _apply_file_filters(old_requests, config, io): + """Filters out entire files.""" + filters = _preprocess_file_filters(old_requests, config, io) + new_requests = [] + for request in old_requests: + category = request.category + if category in filters: + new_requests += filters[category].filter(request) + else: + new_requests.append(request) + return new_requests + + +def _preprocess_file_filters(requests, config, io): + all_categories = set( + request.category + for request in requests + ) + all_categories.remove(None) + all_categories = list(sorted(all_categories)) + json_data = config.filters_json_data + filters = {} + default_filter_json = "exclude" if config.strategy == "additive" else "include" + for category in all_categories: + filter_json = default_filter_json + # Figure out the correct filter to create + if "featureFilters" in json_data and category in json_data["featureFilters"]: + filter_json = json_data["featureFilters"][category] + if filter_json == "include" and "localeFilter" in json_data and category.endswith("_tree"): + filter_json = json_data["localeFilter"] + # Resolve the filter JSON into a filter object + if filter_json == "exclude": + filters[category] = ExclusionFilter() + elif filter_json == "include": + pass # no-op + else: + filters[category] = Filter.create_from_json(filter_json, io) + if "featureFilters" in json_data: + for category in json_data["featureFilters"]: + if category not in all_categories: + print("Warning: category %s is not known" % category, file=sys.stderr) + return filters + + +class ResourceFilterInfo(object): + def __init__(self, category, strategy): + self.category = category + self.strategy = strategy + self.filter_tmp_dir = "filters/%s" % category + self.input_files = None + self.filter_files = None + self.rules_by_file = None + + def apply_to_requests(self, all_requests): + # Call this method only once per list of requests. + assert self.input_files is None + for request in all_requests: + if request.category != self.category: + continue + if not isinstance(request, AbstractExecutionRequest): + continue + if request.tool != IcuTool("genrb"): + continue + if not request.input_files: + continue + self._set_files(request.input_files) + request.dep_targets += [self.filter_files[:]] + arg_str = "--filterDir {TMP_DIR}/%s" % self.filter_tmp_dir + request.args = "%s %s" % (arg_str, request.args) + + # Make sure we found the target request + if self.input_files is None: + print("WARNING: Category not found: %s" % self.category, file=sys.stderr) + self.input_files = [] + self.filter_files = [] + self.rules_by_file = [] + + def _set_files(self, files): + # Note: The input files to genrb for a certain category should always + # be the same. For example, there are often two genrb calls: one for + # --writePoolBundle, and the other for --usePoolBundle. They are both + # expected to have the same list of input files. + if self.input_files is not None: + assert self.input_files == files + return + self.input_files = list(files) + self.filter_files = [ + TmpFile("%s/%s" % (self.filter_tmp_dir, basename)) + for basename in ( + file.filename[file.filename.rfind("/")+1:] + for file in files + ) + ] + if self.strategy == "additive": + self.rules_by_file = [ + [r"-/", r"+/%%ALIAS", r"+/%%Parent"] + for _ in range(len(files)) + ] + else: + self.rules_by_file = [ + [r"+/"] + for _ in range(len(files)) + ] + + def add_rules(self, file_filter, rules): + for file, rule_list in zip(self.input_files, self.rules_by_file): + if file_filter.match(file): + rule_list += rules + + def make_requests(self): + # Map from rule list to filter files with that rule list + unique_rules = defaultdict(list) + for filter_file, rules in zip(self.filter_files, self.rules_by_file): + unique_rules[tuple(rules)].append(filter_file) + + new_requests = [] + i = 0 + for rules, filter_files in unique_rules.items(): + base_filter_file = filter_files[0] + new_requests += [ + PrintFileRequest( + name = "%s_print_%d" % (self.category, i), + output_file = base_filter_file, + content = self._generate_resource_filter_txt(rules) + ) + ] + i += 1 + for filter_file in filter_files[1:]: + new_requests += [ + CopyRequest( + name = "%s_copy_%d" % (self.category, i), + input_file = base_filter_file, + output_file = filter_file + ) + ] + i += 1 + return new_requests + + @staticmethod + def _generate_resource_filter_txt(rules): + result = "# Caution: This file is automatically generated\n\n" + result += "\n".join(rules) + return result + + +def _apply_resource_filters(all_requests, config, io): + """Creates filters for looking within resource bundle files.""" + json_data = config.filters_json_data + if "resourceFilters" not in json_data: + return all_requests + + collected = {} + for entry in json_data["resourceFilters"]: + if "files" in entry: + file_filter = Filter.create_from_json(entry["files"], io) + else: + file_filter = InclusionFilter() + for category in entry["categories"]: + # not defaultdict because we need to pass arguments to the constructor + if category not in collected: + filter_info = ResourceFilterInfo(category, config.strategy) + filter_info.apply_to_requests(all_requests) + collected[category] = filter_info + else: + filter_info = collected[category] + filter_info.add_rules(file_filter, entry["rules"]) + + # Add the filter generation requests to the beginning so that by default + # they are made before genrb gets run (order is required by windirect) + new_requests = [] + for filter_info in collected.values(): + new_requests += filter_info.make_requests() + new_requests += all_requests + return new_requests diff --git a/intl/icu/source/python/icutools/databuilder/filtration_schema.json b/intl/icu/source/python/icutools/databuilder/filtration_schema.json new file mode 100644 index 0000000000..2b7ff99899 --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/filtration_schema.json @@ -0,0 +1,169 @@ +// Copyright (C) 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +{ + "$id": "http://unicode.org/icu-filter-schema", + "$schema": "http://json-schema.org/draft-04/schema#", + "description": "JSON Schema for an ICU data filter file", + "type": "object", + "properties": { + "strategy": { + "type": "string", + "enum": ["additive", "subtractive"] + }, + "localeFilter": { "$ref": "#/definitions/filter" }, + "featureFilters": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { "$ref": "#/definitions/filter" }, + { + "type": "string", + "enum": ["include", "exclude"] + } + ] + } + }, + "resourceFilters": { + "type": "array", + "items": { + "type": "object", + "properties": { + "categories": { + "type": "array", + "items": { "type": "string" } + }, + "files": { "$ref": "#/definitions/filter" }, + "rules": { + "type": "array", + "items": { + "type": "string", + "pattern": "^[+-]/[\\S]*$" + } + } + }, + "required": ["categories", "rules"], + "additionalProperties": false + } + }, + "fileReplacements": { + "type": "object", + "properties": { + "directory": { + "type": "string", + "pattern": "^(\\$SRC|\\$FILTERS|\\$CWD|/$|/[^/]+)(/[^/]+)*$" + }, + "replacements": { + "type": "array", + "items": { + "oneOf": [ + { "type": "string" }, + { + "type": "object", + "properties": { + "src": { "type": "string" }, + "dest": { "type": "string" } + }, + "additionalProperties": false, + "required": ["src", "dest"] + } + ] + } + } + }, + "additionalProperties": false, + "required": ["directory", "replacements"] + }, + "collationUCAData": { + "type": "string", + "enum": ["unihan", "implicithan"] + }, + "usePoolBundle": { + "type": "boolean" + } + }, + "additionalProperties": false, + "definitions": { + "filter": { + "type": "object", + "oneOf": [ + { + "properties": { + "filterType": { + "$ref": "#/definitions/blacklistWhitelistFilterTypes" + }, + "whitelist": { "$ref": "#/definitions/stringList" } + }, + "required": ["whitelist"], + "additionalProperties": false + }, + { + "properties": { + "filterType": { + "$ref": "#/definitions/blacklistWhitelistFilterTypes" + }, + "blacklist": { "$ref": "#/definitions/stringList" } + }, + "required": ["blacklist"], + "additionalProperties": false + }, + { + "properties": { + "filterType": { + "type": "string", + "enum": ["exclude"] + } + }, + "required": ["filterType"], + "additionalProperties": false + }, + { + "properties": { + "filterType": { + "type": "string", + "enum": ["locale"] + }, + "includeChildren": { + "type": "boolean" + }, + "includeScripts": { + "type": "boolean" + }, + "whitelist": { "$ref": "#/definitions/stringList" } + }, + "required": ["filterType", "whitelist"], + "additionalProperties": false + }, + { + "properties": { + "filterType": { + "type": "string", + "enum": ["union"] + }, + "unionOf": { + "type": "array", + "items": { "$ref": "#/definitions/filter" } + } + }, + "required": ["filterType", "unionOf"], + "additionalProperties": false + } + ] + }, + "blacklistWhitelistFilterTypes": { + "type": "string", + "enum": [ + "language", + "regex" + ] + }, + "stringList": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": true + } + } +} diff --git a/intl/icu/source/python/icutools/databuilder/renderers/__init__.py b/intl/icu/source/python/icutools/databuilder/renderers/__init__.py new file mode 100644 index 0000000000..7c402c2b78 --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/renderers/__init__.py @@ -0,0 +1,10 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +from collections import namedtuple + +MakeRule = namedtuple("MakeRule", ["name", "dep_literals", "dep_files", "output_file", "cmds"]) + +MakeFilesVar = namedtuple("MakeFilesVar", ["name", "files"]) + +MakeStringVar = namedtuple("MakeStringVar", ["name", "content"]) diff --git a/intl/icu/source/python/icutools/databuilder/renderers/common_exec.py b/intl/icu/source/python/icutools/databuilder/renderers/common_exec.py new file mode 100644 index 0000000000..3623441555 --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/renderers/common_exec.py @@ -0,0 +1,149 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +from . import * +from .. import * +from .. import utils +from ..request_types import * + +import os +import shutil +import subprocess +import sys + +def run(build_dirs, requests, common_vars, verbose=True, **kwargs): + for bd in build_dirs: + makedirs(bd.format(**common_vars)) + for request in requests: + status = run_helper(request, common_vars, verbose=verbose, **kwargs) + if status != 0: + print("!!! ERROR executing above command line: exit code %d" % status) + return 1 + if verbose: + print("All data build commands executed") + return 0 + +def makedirs(dirs): + """makedirs compatible between Python 2 and 3""" + try: + # Python 3 version + os.makedirs(dirs, exist_ok=True) + except TypeError as e: + # Python 2 version + try: + os.makedirs(dirs) + except OSError as e: + if e.errno != errno.EEXIST: + raise e + +def run_helper(request, common_vars, platform, tool_dir, verbose, tool_cfg=None, **kwargs): + if isinstance(request, PrintFileRequest): + output_path = "{DIRNAME}/{FILENAME}".format( + DIRNAME = utils.dir_for(request.output_file).format(**common_vars), + FILENAME = request.output_file.filename, + ) + if verbose: + print("Printing to file: %s" % output_path) + with open(output_path, "w") as f: + f.write(request.content) + return 0 + if isinstance(request, CopyRequest): + input_path = "{DIRNAME}/{FILENAME}".format( + DIRNAME = utils.dir_for(request.input_file).format(**common_vars), + FILENAME = request.input_file.filename, + ) + output_path = "{DIRNAME}/{FILENAME}".format( + DIRNAME = utils.dir_for(request.output_file).format(**common_vars), + FILENAME = request.output_file.filename, + ) + if verbose: + print("Copying file to: %s" % output_path) + shutil.copyfile(input_path, output_path) + return 0 + if isinstance(request, VariableRequest): + # No-op + return 0 + + assert isinstance(request.tool, IcuTool) + if platform == "windows": + cmd_template = "{TOOL_DIR}/{TOOL}/{TOOL_CFG}/{TOOL}.exe {{ARGS}}".format( + TOOL_DIR = tool_dir, + TOOL_CFG = tool_cfg, + TOOL = request.tool.name, + **common_vars + ) + elif platform == "unix": + cmd_template = "{TOOL_DIR}/{TOOL} {{ARGS}}".format( + TOOL_DIR = tool_dir, + TOOL = request.tool.name, + **common_vars + ) + elif platform == "bazel": + cmd_template = "{TOOL_DIR}/{TOOL}/{TOOL} {{ARGS}}".format( + TOOL_DIR = tool_dir, + TOOL = request.tool.name, + **common_vars + ) + else: + raise ValueError("Unknown platform: %s" % platform) + + if isinstance(request, RepeatedExecutionRequest): + for loop_vars in utils.repeated_execution_request_looper(request): + command_line = utils.format_repeated_request_command( + request, + cmd_template, + loop_vars, + common_vars + ) + if platform == "windows": + # Note: this / to \ substitution may be too aggressive? + command_line = command_line.replace("/", "\\") + returncode = run_shell_command(command_line, platform, verbose) + if returncode != 0: + return returncode + return 0 + if isinstance(request, SingleExecutionRequest): + command_line = utils.format_single_request_command( + request, + cmd_template, + common_vars + ) + if platform == "windows": + # Note: this / to \ substitution may be too aggressive? + command_line = command_line.replace("/", "\\") + returncode = run_shell_command(command_line, platform, verbose) + return returncode + assert False + +def run_shell_command(command_line, platform, verbose): + changed_windows_comspec = False + # If the command line length on Windows exceeds the absolute maximum that CMD supports (8191), then + # we temporarily switch over to use PowerShell for the command, and then switch back to CMD. + # We don't want to use PowerShell for everything though, as it tends to be slower. + if (platform == "windows"): + previous_comspec = os.environ["COMSPEC"] + # Add 7 to the length for the argument /c with quotes. + # For example: C:\WINDOWS\system32\cmd.exe /c "<command_line>" + if ((len(previous_comspec) + len(command_line) + 7) > 8190): + if verbose: + print("Command length exceeds the max length for CMD on Windows, using PowerShell instead.") + os.environ["COMSPEC"] = 'powershell' + changed_windows_comspec = True + if verbose: + print("Running: %s" % command_line) + returncode = subprocess.call( + command_line, + shell = True + ) + else: + # Pipe output to /dev/null in quiet mode + with open(os.devnull, "w") as devnull: + returncode = subprocess.call( + command_line, + shell = True, + stdout = devnull, + stderr = devnull + ) + if changed_windows_comspec: + os.environ["COMSPEC"] = previous_comspec + return returncode diff --git a/intl/icu/source/python/icutools/databuilder/renderers/makefile.py b/intl/icu/source/python/icutools/databuilder/renderers/makefile.py new file mode 100644 index 0000000000..9b2005b07d --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/renderers/makefile.py @@ -0,0 +1,245 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +# Python 2/3 Compatibility (ICU-20299) +# TODO(ICU-20301): Remove this. +from __future__ import print_function + +from . import * +from .. import * +from .. import utils +from ..request_types import * + +def get_gnumake_rules(build_dirs, requests, makefile_vars, **kwargs): + makefile_string = "" + + # Common Variables + common_vars = kwargs["common_vars"] + for key, value in sorted(makefile_vars.items()): + makefile_string += "{KEY} = {VALUE}\n".format( + KEY = key, + VALUE = value + ) + makefile_string += "\n" + + # Directories + dirs_timestamp_file = "{TMP_DIR}/dirs.timestamp".format(**common_vars) + makefile_string += "DIRS = {TIMESTAMP_FILE}\n\n".format( + TIMESTAMP_FILE = dirs_timestamp_file + ) + makefile_string += "{TIMESTAMP_FILE}:\n\t$(MKINSTALLDIRS) {ALL_DIRS}\n\techo timestamp > {TIMESTAMP_FILE}\n\n".format( + TIMESTAMP_FILE = dirs_timestamp_file, + ALL_DIRS = " ".join(build_dirs).format(**common_vars) + ) + + # Generate Rules + make_rules = [] + for request in requests: + make_rules += get_gnumake_rules_helper(request, **kwargs) + + # Main Commands + for rule in make_rules: + if isinstance(rule, MakeFilesVar): + makefile_string += "{NAME} = {FILE_LIST}\n\n".format( + NAME = rule.name, + FILE_LIST = files_to_makefile(rule.files, wrap = True, **kwargs), + ) + continue + + if isinstance(rule, MakeStringVar): + makefile_string += "define {NAME}\n{CONTENT}\nendef\nexport {NAME}\n\n".format( + NAME = rule.name, + CONTENT = rule.content + ) + continue + + assert isinstance(rule, MakeRule) + header_line = "{OUT_FILE}: {DEP_FILES} {DEP_LITERALS} | $(DIRS)".format( + OUT_FILE = files_to_makefile([rule.output_file], **kwargs), + DEP_FILES = files_to_makefile(rule.dep_files, wrap = True, **kwargs), + DEP_LITERALS = " ".join(rule.dep_literals) + ) + + if len(rule.cmds) == 0: + makefile_string += "%s\n\n" % header_line + continue + + makefile_string += "{HEADER_LINE}\n{RULE_LINES}\n\n".format( + HEADER_LINE = header_line, + RULE_LINES = "\n".join("\t%s" % cmd for cmd in rule.cmds) + ) + + return makefile_string + +def files_to_makefile(files, common_vars, wrap = False, **kwargs): + if len(files) == 0: + return "" + dirnames = [utils.dir_for(file).format(**common_vars) for file in files] + join_str = " \\\n\t\t" if wrap and len(files) > 2 else " " + if len(files) == 1: + return "%s/%s" % (dirnames[0], files[0].filename) + elif len(set(dirnames)) == 1: + return "$(addprefix %s/,%s)" % (dirnames[0], join_str.join(file.filename for file in files)) + else: + return join_str.join("%s/%s" % (d, f.filename) for d,f in zip(dirnames, files)) + +def get_gnumake_rules_helper(request, common_vars, **kwargs): + + if isinstance(request, PrintFileRequest): + var_name = "%s_CONTENT" % request.name.upper() + return [ + MakeStringVar( + name = var_name, + content = request.content + ), + MakeRule( + name = request.name, + dep_literals = [], + dep_files = [], + output_file = request.output_file, + cmds = [ + "echo \"$${VAR_NAME}\" > {MAKEFILENAME}".format( + VAR_NAME = var_name, + MAKEFILENAME = files_to_makefile([request.output_file], common_vars), + **common_vars + ) + ] + ) + ] + + + if isinstance(request, CopyRequest): + return [ + MakeRule( + name = request.name, + dep_literals = [], + dep_files = [request.input_file], + output_file = request.output_file, + cmds = ["cp %s %s" % ( + files_to_makefile([request.input_file], common_vars), + files_to_makefile([request.output_file], common_vars)) + ] + ) + ] + + if isinstance(request, VariableRequest): + return [ + MakeFilesVar( + name = request.name.upper(), + files = request.input_files + ) + ] + + if request.tool.name == "make": + cmd_template = "$(MAKE) {ARGS}" + elif request.tool.name == "gentest": + cmd_template = "$(INVOKE) $(GENTEST) {ARGS}" + else: + assert isinstance(request.tool, IcuTool) + cmd_template = "$(INVOKE) $(TOOLBINDIR)/{TOOL} {{ARGS}}".format( + TOOL = request.tool.name + ) + + if isinstance(request, SingleExecutionRequest): + cmd = utils.format_single_request_command(request, cmd_template, common_vars) + dep_files = request.all_input_files() + + if len(request.output_files) > 1: + # Special case for multiple output files: Makefile rules should have only one + # output file apiece. More information: + # https://www.gnu.org/software/automake/manual/html_node/Multiple-Outputs.html + timestamp_var_name = "%s_ALL" % request.name.upper() + timestamp_file = TmpFile("%s.timestamp" % request.name) + rules = [ + MakeFilesVar( + name = timestamp_var_name, + files = [timestamp_file] + ), + MakeRule( + name = "%s_all" % request.name, + dep_literals = [], + dep_files = dep_files, + output_file = timestamp_file, + cmds = [ + cmd, + "echo timestamp > {MAKEFILENAME}".format( + MAKEFILENAME = files_to_makefile([timestamp_file], common_vars) + ) + ] + ) + ] + for i, file in enumerate(request.output_files): + rules += [ + MakeRule( + name = "%s_%d" % (request.name, i), + dep_literals = ["$(%s)" % timestamp_var_name], + dep_files = [], + output_file = file, + cmds = [] + ) + ] + return rules + + elif len(dep_files) > 5: + # For nicer printing, for long input lists, use a helper variable. + dep_var_name = "%s_DEPS" % request.name.upper() + return [ + MakeFilesVar( + name = dep_var_name, + files = dep_files + ), + MakeRule( + name = request.name, + dep_literals = ["$(%s)" % dep_var_name], + dep_files = [], + output_file = request.output_files[0], + cmds = [cmd] + ) + ] + + else: + return [ + MakeRule( + name = request.name, + dep_literals = [], + dep_files = dep_files, + output_file = request.output_files[0], + cmds = [cmd] + ) + ] + + if isinstance(request, RepeatedExecutionRequest): + rules = [] + dep_literals = [] + # To keep from repeating the same dep files many times, make a variable. + if len(request.common_dep_files) > 0: + dep_var_name = "%s_DEPS" % request.name.upper() + dep_literals = ["$(%s)" % dep_var_name] + rules += [ + MakeFilesVar( + name = dep_var_name, + files = request.common_dep_files + ) + ] + # Add a rule for each individual file. + for loop_vars in utils.repeated_execution_request_looper(request): + (_, specific_dep_files, input_file, output_file) = loop_vars + name_suffix = input_file[input_file.filename.rfind("/")+1:input_file.filename.rfind(".")] + cmd = utils.format_repeated_request_command( + request, + cmd_template, + loop_vars, + common_vars + ) + rules += [ + MakeRule( + name = "%s_%s" % (request.name, name_suffix), + dep_literals = dep_literals, + dep_files = specific_dep_files + [input_file], + output_file = output_file, + cmds = [cmd] + ) + ] + return rules + + assert False diff --git a/intl/icu/source/python/icutools/databuilder/request_types.py b/intl/icu/source/python/icutools/databuilder/request_types.py new file mode 100644 index 0000000000..aa70f8d918 --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/request_types.py @@ -0,0 +1,364 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +# Python 2/3 Compatibility (ICU-20299) +# TODO(ICU-20301): Remove this. +from __future__ import print_function + +from abc import abstractmethod +import copy +import sys + +from . import * +from . import utils + + +# TODO(ICU-20301): Remove arguments from all instances of super() in this file + +# Note: for this to be a proper abstract class, it should extend abc.ABC. +# There is no nice way to do this that works in both Python 2 and 3. +# TODO(ICU-20301): Make this inherit from abc.ABC. +class AbstractRequest(object): + def __init__(self, **kwargs): + + # Used for identification purposes + self.name = None + + # The filter category that applies to this request + self.category = None + + self._set_fields(kwargs) + + def _set_fields(self, kwargs): + for key, value in list(kwargs.items()): + if hasattr(self, key): + if isinstance(value, list): + value = copy.copy(value) + elif isinstance(value, dict): + value = copy.deepcopy(value) + setattr(self, key, value) + else: + raise ValueError("Unknown argument: %s" % key) + + def apply_file_filter(self, filter): + """ + Returns True if this request still has input files after filtering, + or False if the request is "empty" after filtering. + """ + return True + + def flatten(self, config, all_requests, common_vars): + return [self] + + def all_input_files(self): + return [] + + def all_output_files(self): + return [] + + +class AbstractExecutionRequest(AbstractRequest): + def __init__(self, **kwargs): + + # Names of targets (requests) or files that this request depends on. + # The entries of dep_targets may be any of the following types: + # + # 1. DepTarget, for the output of an execution request. + # 2. InFile, TmpFile, etc., for a specific file. + # 3. A list of InFile, TmpFile, etc., where the list is the same + # length as self.input_files and self.output_files. + # + # In cases 1 and 2, the dependency is added to all rules that the + # request generates. In case 3, the dependency is added only to the + # rule that generates the output file at the same array index. + self.dep_targets = [] + + # Computed during self.flatten(); don't edit directly. + self.common_dep_files = [] + + # Primary input files + self.input_files = [] + + # Output files; for some subclasses, this must be the same length + # as input_files + self.output_files = [] + + # What tool to execute + self.tool = None + + # Argument string to pass to the tool with optional placeholders + self.args = "" + + # Placeholders to substitute into the argument string; if any of these + # have a list type, the list must be equal in length to input_files + self.format_with = {} + + super(AbstractExecutionRequest, self).__init__(**kwargs) + + def apply_file_filter(self, filter): + i = 0 + while i < len(self.input_files): + if filter.match(self.input_files[i]): + i += 1 + else: + self._del_at(i) + return i > 0 + + def _del_at(self, i): + del self.input_files[i] + for _, v in self.format_with.items(): + if isinstance(v, list): + assert len(v) == len(self.input_files) + 1 + del v[i] + for v in self.dep_targets: + if isinstance(v, list): + assert len(v) == len(self.input_files) + 1 + del v[i] + + def flatten(self, config, all_requests, common_vars): + self._dep_targets_to_files(all_requests) + return super(AbstractExecutionRequest, self).flatten(config, all_requests, common_vars) + + def _dep_targets_to_files(self, all_requests): + if not self.dep_targets: + return + for dep_target in self.dep_targets: + if isinstance(dep_target, list): + if hasattr(self, "specific_dep_files"): + assert len(dep_target) == len(self.specific_dep_files) + for file, out_list in zip(dep_target, self.specific_dep_files): + assert hasattr(file, "filename") + out_list.append(file) + else: + self.common_dep_files += dep_target + continue + if not isinstance(dep_target, DepTarget): + # Copy file entries directly to dep_files. + assert hasattr(dep_target, "filename") + self.common_dep_files.append(dep_target) + continue + # For DepTarget entries, search for the target. + for request in all_requests: + if request.name == dep_target.name: + self.common_dep_files += request.all_output_files() + break + else: + print("Warning: Unable to find target %s, a dependency of %s" % ( + dep_target.name, + self.name + ), file=sys.stderr) + self.dep_targets = [] + + def all_input_files(self): + return self.common_dep_files + self.input_files + + def all_output_files(self): + return self.output_files + + +class SingleExecutionRequest(AbstractExecutionRequest): + def __init__(self, **kwargs): + super(SingleExecutionRequest, self).__init__(**kwargs) + + +class RepeatedExecutionRequest(AbstractExecutionRequest): + def __init__(self, **kwargs): + + # Placeholders to substitute into the argument string unique to each + # iteration; all values must be lists equal in length to input_files + self.repeat_with = {} + + # Lists for dep files that are specific to individual resource bundle files + self.specific_dep_files = [[] for _ in range(len(kwargs["input_files"]))] + + super(RepeatedExecutionRequest, self).__init__(**kwargs) + + def _del_at(self, i): + super(RepeatedExecutionRequest, self)._del_at(i) + del self.output_files[i] + del self.specific_dep_files[i] + for _, v in self.repeat_with.items(): + if isinstance(v, list): + del v[i] + + def all_input_files(self): + files = super(RepeatedExecutionRequest, self).all_input_files() + for specific_file_list in self.specific_dep_files: + files += specific_file_list + return files + + +class RepeatedOrSingleExecutionRequest(AbstractExecutionRequest): + def __init__(self, **kwargs): + self.repeat_with = {} + super(RepeatedOrSingleExecutionRequest, self).__init__(**kwargs) + + def flatten(self, config, all_requests, common_vars): + if config.max_parallel: + new_request = RepeatedExecutionRequest( + name = self.name, + category = self.category, + dep_targets = self.dep_targets, + input_files = self.input_files, + output_files = self.output_files, + tool = self.tool, + args = self.args, + format_with = self.format_with, + repeat_with = self.repeat_with + ) + else: + new_request = SingleExecutionRequest( + name = self.name, + category = self.category, + dep_targets = self.dep_targets, + input_files = self.input_files, + output_files = self.output_files, + tool = self.tool, + args = self.args, + format_with = utils.concat_dicts(self.format_with, self.repeat_with) + ) + return new_request.flatten(config, all_requests, common_vars) + + def _del_at(self, i): + super(RepeatedOrSingleExecutionRequest, self)._del_at(i) + del self.output_files[i] + for _, v in self.repeat_with.items(): + if isinstance(v, list): + del v[i] + + +class PrintFileRequest(AbstractRequest): + def __init__(self, **kwargs): + self.output_file = None + self.content = None + super(PrintFileRequest, self).__init__(**kwargs) + + def all_output_files(self): + return [self.output_file] + + +class CopyRequest(AbstractRequest): + def __init__(self, **kwargs): + self.input_file = None + self.output_file = None + super(CopyRequest, self).__init__(**kwargs) + + def all_input_files(self): + return [self.input_file] + + def all_output_files(self): + return [self.output_file] + + +class VariableRequest(AbstractRequest): + def __init__(self, **kwargs): + self.input_files = [] + super(VariableRequest, self).__init__(**kwargs) + + def all_input_files(self): + return self.input_files + + +class ListRequest(AbstractRequest): + def __init__(self, **kwargs): + self.variable_name = None + self.output_file = None + self.include_tmp = None + super(ListRequest, self).__init__(**kwargs) + + def flatten(self, config, all_requests, common_vars): + list_files = list(sorted(utils.get_all_output_files(all_requests))) + if self.include_tmp: + variable_files = list(sorted(utils.get_all_output_files(all_requests, include_tmp=True))) + else: + # Always include the list file itself + variable_files = list_files + [self.output_file] + return PrintFileRequest( + name = self.name, + output_file = self.output_file, + content = "\n".join(file.filename for file in list_files) + ).flatten(config, all_requests, common_vars) + VariableRequest( + name = self.variable_name, + input_files = variable_files + ).flatten(config, all_requests, common_vars) + + def all_output_files(self): + return [self.output_file] + + +class IndexRequest(AbstractRequest): + def __init__(self, **kwargs): + self.installed_files = [] + self.alias_files = [] + self.txt_file = None + self.output_file = None + self.cldr_version = "" + self.args = "" + self.format_with = {} + super(IndexRequest, self).__init__(**kwargs) + + def apply_file_filter(self, filter): + i = 0 + while i < len(self.installed_files): + if filter.match(self.installed_files[i]): + i += 1 + else: + del self.installed_files[i] + j = 0 + while j < len(self.alias_files): + if filter.match(self.alias_files[j]): + j += 1 + else: + del self.alias_files[j] + return i + j > 0 + + def flatten(self, config, all_requests, common_vars): + return ( + PrintFileRequest( + name = self.name, + output_file = self.txt_file, + content = self._generate_index_file(common_vars) + ).flatten(config, all_requests, common_vars) + + SingleExecutionRequest( + name = "%s_res" % self.name, + category = self.category, + input_files = [self.txt_file], + output_files = [self.output_file], + tool = IcuTool("genrb"), + args = self.args, + format_with = self.format_with + ).flatten(config, all_requests, common_vars) + ) + + def _generate_index_file(self, common_vars): + installed_locales = [IndexRequest.locale_file_stem(f) for f in self.installed_files] + alias_locales = [IndexRequest.locale_file_stem(f) for f in self.alias_files] + formatted_version = " CLDRVersion { \"%s\" }\n" % self.cldr_version if self.cldr_version else "" + formatted_installed_locales = "\n".join([" %s {\"\"}" % v for v in installed_locales]) + formatted_alias_locales = "\n".join([" %s {\"\"}" % v for v in alias_locales]) + # TODO: CLDRVersion is required only in the base file + return ("// Warning this file is automatically generated\n" + "{INDEX_NAME}:table(nofallback) {{\n" + "{FORMATTED_VERSION}" + " InstalledLocales:table {{\n" + "{FORMATTED_INSTALLED_LOCALES}\n" + " }}\n" + " AliasLocales:table {{\n" + "{FORMATTED_ALIAS_LOCALES}\n" + " }}\n" + "}}").format( + FORMATTED_VERSION = formatted_version, + FORMATTED_INSTALLED_LOCALES = formatted_installed_locales, + FORMATTED_ALIAS_LOCALES = formatted_alias_locales, + **common_vars + ) + + def all_input_files(self): + return self.installed_files + self.alias_files + + def all_output_files(self): + return [self.output_file] + + @staticmethod + def locale_file_stem(f): + return f.filename[f.filename.rfind("/")+1:-4] diff --git a/intl/icu/source/python/icutools/databuilder/test/__init__.py b/intl/icu/source/python/icutools/databuilder/test/__init__.py new file mode 100644 index 0000000000..dd12bfa16e --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/test/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html diff --git a/intl/icu/source/python/icutools/databuilder/test/__main__.py b/intl/icu/source/python/icutools/databuilder/test/__main__.py new file mode 100644 index 0000000000..6ae2c0f7c9 --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/test/__main__.py @@ -0,0 +1,14 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +import unittest + +from . import filtration_test + +def load_tests(loader, tests, pattern): + suite = unittest.TestSuite() + suite.addTest(filtration_test.suite) + return suite + +if __name__ == '__main__': + unittest.main() diff --git a/intl/icu/source/python/icutools/databuilder/test/filtration_test.py b/intl/icu/source/python/icutools/databuilder/test/filtration_test.py new file mode 100644 index 0000000000..416223bd7e --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/test/filtration_test.py @@ -0,0 +1,421 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +import io as pyio +import json +import os +import unittest + +from .. import InFile +from ..comment_stripper import CommentStripper +from ..filtration import Filter + +EXAMPLE_FILE_STEMS = [ + "af_NA", + "af_VARIANT", + "af_ZA_VARIANT", + "af_ZA", + "af", + "ar", + "ar_SA", + "ars", + "bs_BA", + "bs_Cyrl_BA", + "bs_Cyrl", + "bs_Latn_BA", + "bs_Latn", + "bs", + "en_001", + "en_150", + "en_DE", + "en_GB", + "en_US", + "root", + "sr_BA", + "sr_CS", + "sr_Cyrl_BA", + "sr_Cyrl_CS", + "sr_Cyrl_ME", + "sr_Cyrl", + "sr_Latn_BA", + "sr_Latn_CS", + "sr_Latn_ME_VARIANT", + "sr_Latn_ME", + "sr_Latn", + "sr_ME", + "sr", + "vai_Latn_LR", + "vai_Latn", + "vai_LR", + "vai_Vaii_LR", + "vai_Vaii", + "vai", + "yue", + "zh_CN", + "zh_Hans_CN", + "zh_Hans_HK", + "zh_Hans_MO", + "zh_Hans_SG", + "zh_Hans", + "zh_Hant_HK", + "zh_Hant_MO", + "zh_Hant_TW", + "zh_Hant", + "zh_HK", + "zh_MO", + "zh_SG", + "zh_TW", + "zh" +] + + +class TestIO(object): + def __init__(self): + pass + + def read_locale_deps(self, tree): + if tree not in ("brkitr", "locales", "rbnf"): + return None + with pyio.open(os.path.join( + os.path.dirname(__file__), + "sample_data", + tree, + "LOCALE_DEPS.json" + ), "r", encoding="utf-8-sig") as f: + return json.load(CommentStripper(f)) + + +class FiltrationTest(unittest.TestCase): + + def test_exclude(self): + self._check_filter(Filter.create_from_json({ + "filterType": "exclude" + }, TestIO()), [ + ]) + + def test_default_whitelist(self): + self._check_filter(Filter.create_from_json({ + "whitelist": [ + "ars", + "zh_Hans" + ] + }, TestIO()), [ + "ars", + "zh_Hans" + ]) + + def test_default_blacklist(self): + expected_matches = set(EXAMPLE_FILE_STEMS) + expected_matches.remove("ars") + expected_matches.remove("zh_Hans") + self._check_filter(Filter.create_from_json({ + "blacklist": [ + "ars", + "zh_Hans" + ] + }, TestIO()), expected_matches) + + def test_language_whitelist(self): + self._check_filter(Filter.create_from_json({ + "filterType": "language", + "whitelist": [ + "af", + "bs" + ] + }, TestIO()), [ + "root", + "af_NA", + "af_VARIANT", + "af_ZA_VARIANT", + "af_ZA", + "af", + "bs_BA", + "bs_Cyrl_BA", + "bs_Cyrl", + "bs_Latn_BA", + "bs_Latn", + "bs" + ]) + + def test_language_blacklist(self): + expected_matches = set(EXAMPLE_FILE_STEMS) + expected_matches.remove("af_NA") + expected_matches.remove("af_VARIANT") + expected_matches.remove("af_ZA_VARIANT") + expected_matches.remove("af_ZA") + expected_matches.remove("af") + self._check_filter(Filter.create_from_json({ + "filterType": "language", + "blacklist": [ + "af" + ] + }, TestIO()), expected_matches) + + def test_regex_whitelist(self): + self._check_filter(Filter.create_from_json({ + "filterType": "regex", + "whitelist": [ + r"^ar.*$", + r"^zh$" + ] + }, TestIO()), [ + "ar", + "ar_SA", + "ars", + "zh" + ]) + + def test_regex_blacklist(self): + expected_matches = set(EXAMPLE_FILE_STEMS) + expected_matches.remove("ar") + expected_matches.remove("ar_SA") + expected_matches.remove("ars") + expected_matches.remove("zh") + self._check_filter(Filter.create_from_json({ + "filterType": "regex", + "blacklist": [ + r"^ar.*$", + r"^zh$" + ] + }, TestIO()), expected_matches) + + def test_locale_basic(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + # Default scripts: + # sr => Cyrl + # vai => Vaii + # zh => Hans + "bs_BA", # is an alias to bs_Latn_BA + "en_DE", + "sr", # Language with no script + "vai_Latn", # Language with non-default script + "zh_Hans" # Language with default script + ] + }, TestIO()), [ + "root", + # bs: should include the full dependency tree of bs_BA + "bs_BA", + "bs_Latn_BA", + "bs_Latn", + "bs", + # en: should include the full dependency tree of en_DE + "en", + "en_DE", + "en_150", + "en_001", + # sr: include Cyrl, the default, but not Latn. + "sr", + "sr_BA", + "sr_CS", + "sr_Cyrl", + "sr_Cyrl_BA", + "sr_Cyrl_CS", + "sr_Cyrl_ME", + # vai: include Latn but NOT Vaii. + "vai_Latn", + "vai_Latn_LR", + # zh: include Hans but NOT Hant. + "zh", + "zh_CN", + "zh_SG", + "zh_Hans", + "zh_Hans_CN", + "zh_Hans_HK", + "zh_Hans_MO", + "zh_Hans_SG" + ]) + + def test_locale_no_children(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "includeChildren": False, + "whitelist": [ + # See comments in test_locale_basic. + "bs_BA", + "en_DE", + "sr", + "vai_Latn", + "zh_Hans" + ] + }, TestIO()), [ + "root", + "bs_BA", + "bs_Latn_BA", + "bs_Latn", + "bs", + "en", + "en_DE", + "en_150", + "en_001", + "sr", + "vai_Latn", + "zh", + "zh_Hans", + ]) + + def test_locale_include_scripts(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "includeScripts": True, + "whitelist": [ + # See comments in test_locale_basic. + "bs_BA", + "en_DE", + "sr", + "vai_Latn", + "zh_Hans" + ] + }, TestIO()), [ + "root", + # bs: includeScripts only works for language-only (without region) + "bs_BA", + "bs_Latn_BA", + "bs_Latn", + "bs", + # en: should include the full dependency tree of en_DE + "en", + "en_DE", + "en_150", + "en_001", + # sr: include Latn, since no particular script was requested. + "sr_BA", + "sr_CS", + "sr_Cyrl_BA", + "sr_Cyrl_CS", + "sr_Cyrl_ME", + "sr_Cyrl", + "sr_Latn_BA", + "sr_Latn_CS", + "sr_Latn_ME_VARIANT", + "sr_Latn_ME", + "sr_Latn", + "sr_ME", + "sr", + # vai: do NOT include Vaii; the script was explicitly requested. + "vai_Latn_LR", + "vai_Latn", + # zh: do NOT include Hant; the script was explicitly requested. + "zh_CN", + "zh_SG", + "zh_Hans_CN", + "zh_Hans_HK", + "zh_Hans_MO", + "zh_Hans_SG", + "zh_Hans", + "zh" + ]) + + def test_locale_no_children_include_scripts(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "includeChildren": False, + "includeScripts": True, + "whitelist": [ + # See comments in test_locale_basic. + "bs_BA", + "en_DE", + "sr", + "vai_Latn", + "zh_Hans" + ] + }, TestIO()), [ + "root", + # bs: includeScripts only works for language-only (without region) + "bs_BA", + "bs_Latn_BA", + "bs_Latn", + "bs", + # en: should include the full dependency tree of en_DE + "en", + "en_DE", + "en_150", + "en_001", + # sr: include Cyrl and Latn but no other children + "sr", + "sr_Cyrl", + "sr_Latn", + # vai: include only the requested script + "vai_Latn", + # zh: include only the requested script + "zh", + "zh_Hans", + ]) + + def test_union(self): + self._check_filter(Filter.create_from_json({ + "filterType": "union", + "unionOf": [ + { + "whitelist": [ + "ars", + "zh_Hans" + ] + }, + { + "filterType": "regex", + "whitelist": [ + r"^bs.*$", + r"^zh$" + ] + } + ] + }, TestIO()), [ + "ars", + "zh_Hans", + "bs_BA", + "bs_Cyrl_BA", + "bs_Cyrl", + "bs_Latn_BA", + "bs_Latn", + "bs", + "zh" + ]) + + def test_hk_deps_normal(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + "zh_HK" + ] + }, TestIO()), [ + "root", + "zh_Hant", + "zh_Hant_HK", + "zh_HK", + ]) + + def test_hk_deps_rbnf(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + "zh_HK" + ] + }, TestIO()), [ + "root", + "yue", + "zh_Hant_HK", + "zh_HK", + ], "rbnf") + + def test_no_alias_parent_structure(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + "zh_HK" + ] + }, TestIO()), [ + "root", + "zh_HK", + "zh", + ], "brkitr") + + def _check_filter(self, filter, expected_matches, tree="locales"): + for file_stem in EXAMPLE_FILE_STEMS: + is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem))) + expected_match = file_stem in expected_matches + self.assertEqual(is_match, expected_match, file_stem) + +# Export the test for the runner +suite = unittest.makeSuite(FiltrationTest) diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json new file mode 100644 index 0000000000..89329e87ee --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json @@ -0,0 +1,10 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +////////////////////////////////////////////////////////////// +// This is a sample LOCALE_DEPS.json file for testing only. // +////////////////////////////////////////////////////////////// + +{ + "cldrVersion": "36.1" +} diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json new file mode 100644 index 0000000000..fd28a741ef --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json @@ -0,0 +1,197 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +////////////////////////////////////////////////////////////// +// This is a sample LOCALE_DEPS.json file for testing only. // +////////////////////////////////////////////////////////////// + +{ + "cldrVersion": "36.1", + "aliases": { + "ars": "ar_SA", + "az_AZ": "az_Latn_AZ", + "bs_BA": "bs_Latn_BA", + "en_NH": "en_VU", + "en_RH": "en_ZW", + "ff_CM": "ff_Latn_CM", + "ff_GN": "ff_Latn_GN", + "ff_MR": "ff_Latn_MR", + "ff_SN": "ff_Latn_SN", + "in": "id", + "in_ID": "id_ID", + "iw": "he", + "iw_IL": "he_IL", + "mo": "ro", + "no": "nb", + "no_NO": "nb_NO", + "no_NO_NY": "nn_NO", + "pa_IN": "pa_Guru_IN", + "pa_PK": "pa_Arab_PK", + "sh": "sr_Latn", + "sh_BA": "sr_Latn_BA", + "sh_CS": "sr_Latn_RS", + "sh_YU": "sr_Latn_RS", + "shi_MA": "shi_Tfng_MA", + "sr_BA": "sr_Cyrl_BA", + "sr_CS": "sr_Cyrl_RS", + "sr_Cyrl_CS": "sr_Cyrl_RS", + "sr_Cyrl_YU": "sr_Cyrl_RS", + "sr_Latn_CS": "sr_Latn_RS", + "sr_Latn_YU": "sr_Latn_RS", + "sr_ME": "sr_Latn_ME", + "sr_RS": "sr_Cyrl_RS", + "sr_XK": "sr_Cyrl_XK", + "sr_YU": "sr_Cyrl_RS", + "tl": "fil", + "tl_PH": "fil_PH", + "uz_AF": "uz_Arab_AF", + "uz_UZ": "uz_Latn_UZ", + "vai_LR": "vai_Vaii_LR", + "yue_CN": "yue_Hans_CN", + "yue_HK": "yue_Hant_HK", + "zh_CN": "zh_Hans_CN", + "zh_HK": "zh_Hant_HK", + "zh_MO": "zh_Hant_MO", + "zh_SG": "zh_Hans_SG", + "zh_TW": "zh_Hant_TW" + }, + "parents": { + "az_Cyrl": "root", + "bs_Cyrl": "root", + "en_150": "en_001", + "en_AG": "en_001", + "en_AI": "en_001", + "en_AT": "en_150", + "en_AU": "en_001", + "en_BB": "en_001", + "en_BE": "en_150", + "en_BM": "en_001", + "en_BS": "en_001", + "en_BW": "en_001", + "en_BZ": "en_001", + "en_CA": "en_001", + "en_CC": "en_001", + "en_CH": "en_150", + "en_CK": "en_001", + "en_CM": "en_001", + "en_CX": "en_001", + "en_CY": "en_001", + "en_DE": "en_150", + "en_DG": "en_001", + "en_DK": "en_150", + "en_DM": "en_001", + "en_ER": "en_001", + "en_FI": "en_150", + "en_FJ": "en_001", + "en_FK": "en_001", + "en_FM": "en_001", + "en_GB": "en_001", + "en_GD": "en_001", + "en_GG": "en_001", + "en_GH": "en_001", + "en_GI": "en_001", + "en_GM": "en_001", + "en_GY": "en_001", + "en_HK": "en_001", + "en_IE": "en_001", + "en_IL": "en_001", + "en_IM": "en_001", + "en_IN": "en_001", + "en_IO": "en_001", + "en_JE": "en_001", + "en_JM": "en_001", + "en_KE": "en_001", + "en_KI": "en_001", + "en_KN": "en_001", + "en_KY": "en_001", + "en_LC": "en_001", + "en_LR": "en_001", + "en_LS": "en_001", + "en_MG": "en_001", + "en_MO": "en_001", + "en_MS": "en_001", + "en_MT": "en_001", + "en_MU": "en_001", + "en_MW": "en_001", + "en_MY": "en_001", + "en_NA": "en_001", + "en_NF": "en_001", + "en_NG": "en_001", + "en_NL": "en_150", + "en_NR": "en_001", + "en_NU": "en_001", + "en_NZ": "en_001", + "en_PG": "en_001", + "en_PH": "en_001", + "en_PK": "en_001", + "en_PN": "en_001", + "en_PW": "en_001", + "en_RW": "en_001", + "en_SB": "en_001", + "en_SC": "en_001", + "en_SD": "en_001", + "en_SE": "en_150", + "en_SG": "en_001", + "en_SH": "en_001", + "en_SI": "en_150", + "en_SL": "en_001", + "en_SS": "en_001", + "en_SX": "en_001", + "en_SZ": "en_001", + "en_TC": "en_001", + "en_TK": "en_001", + "en_TO": "en_001", + "en_TT": "en_001", + "en_TV": "en_001", + "en_TZ": "en_001", + "en_UG": "en_001", + "en_VC": "en_001", + "en_VG": "en_001", + "en_VU": "en_001", + "en_WS": "en_001", + "en_ZA": "en_001", + "en_ZM": "en_001", + "en_ZW": "en_001", + "es_AR": "es_419", + "es_BO": "es_419", + "es_BR": "es_419", + "es_BZ": "es_419", + "es_CL": "es_419", + "es_CO": "es_419", + "es_CR": "es_419", + "es_CU": "es_419", + "es_DO": "es_419", + "es_EC": "es_419", + "es_GT": "es_419", + "es_HN": "es_419", + "es_MX": "es_419", + "es_NI": "es_419", + "es_PA": "es_419", + "es_PE": "es_419", + "es_PR": "es_419", + "es_PY": "es_419", + "es_SV": "es_419", + "es_US": "es_419", + "es_UY": "es_419", + "es_VE": "es_419", + "pa_Arab": "root", + "pt_AO": "pt_PT", + "pt_CH": "pt_PT", + "pt_CV": "pt_PT", + "pt_GQ": "pt_PT", + "pt_GW": "pt_PT", + "pt_LU": "pt_PT", + "pt_MO": "pt_PT", + "pt_MZ": "pt_PT", + "pt_ST": "pt_PT", + "pt_TL": "pt_PT", + "shi_Latn": "root", + "sr_Latn": "root", + "uz_Arab": "root", + "uz_Cyrl": "root", + "vai_Latn": "root", + "yue_Hans": "root", + "zh_Hant": "root", + "zh_Hant_MO": "zh_Hant_HK" + } +} diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json new file mode 100644 index 0000000000..f079619a36 --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json @@ -0,0 +1,36 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +////////////////////////////////////////////////////////////// +// This is a sample LOCALE_DEPS.json file for testing only. // +////////////////////////////////////////////////////////////// + +{ + "cldrVersion": "36.1", + "aliases": { + "ars": "ar_SA", + "in": "id", + "iw": "he", + "no": "nb", + "sh": "sr_Latn", + "zh_HK": "zh_Hant_HK", + "zh_Hant_HK": "yue", + "zh_MO": "zh_Hant_MO", + "zh_TW": "zh_Hant_TW" + }, + "parents": { + "en_IN": "en_001", + "es_DO": "es_419", + "es_GT": "es_419", + "es_HN": "es_419", + "es_MX": "es_419", + "es_NI": "es_419", + "es_PA": "es_419", + "es_PR": "es_419", + "es_SV": "es_419", + "es_US": "es_419", + "sr_Latn": "root", + "yue_Hans": "root", + "zh_Hant": "root" + } +} diff --git a/intl/icu/source/python/icutools/databuilder/utils.py b/intl/icu/source/python/icutools/databuilder/utils.py new file mode 100644 index 0000000000..3d53d18fae --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/utils.py @@ -0,0 +1,143 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +# Python 2/3 Compatibility (ICU-20299) +# TODO(ICU-20301): Remove this. +from __future__ import print_function + +import sys + +from . import * + + +def dir_for(file): + if isinstance(file, LocalFile): + return get_local_dirname(file.dirname) + if isinstance(file, SrcFile): + return "{SRC_DIR}" + if isinstance(file, InFile): + return "{IN_DIR}" + if isinstance(file, TmpFile): + return "{TMP_DIR}" + if isinstance(file, OutFile): + return "{OUT_DIR}" + if isinstance(file, PkgFile): + return "{PKG_DIR}" + assert False + + +LOCAL_DIRNAME_SUBSTITUTIONS = { + "SRC": "{SRC_DIR}", + "FILTERS": "{FILTERS_DIR}", + "CWD": "{CWD_DIR}" +} + + +def get_local_dirname(dirname): + if dirname.startswith("/"): + return dirname + elif dirname.startswith("$"): + # Note: directory separator substitution happens later + sep_idx = dirname.find("/") + if sep_idx == -1: + sep_idx = len(dirname) + variable = dirname[1:sep_idx] + if variable in LOCAL_DIRNAME_SUBSTITUTIONS: + return LOCAL_DIRNAME_SUBSTITUTIONS[variable] + dirname[sep_idx:] + print( + "Error: Local directory must be absolute, or relative to one of: " + + (", ".join("$%s" % v for v in LOCAL_DIRNAME_SUBSTITUTIONS.keys())), + file=sys.stderr + ) + exit(1) + + +ALL_TREES = [ + "locales", + "curr", + "lang", + "region", + "zone", + "unit", + "coll", + "brkitr", + "rbnf", +] + + +def concat_dicts(*dicts): + # There is not a super great way to do this in Python: + new_dict = {} + for dict in dicts: + new_dict.update(dict) + return new_dict + + +def repeated_execution_request_looper(request): + # dictionary of lists to list of dictionaries: + ld = [ + dict(zip(request.repeat_with, t)) + for t in zip(*request.repeat_with.values()) + ] + if not ld: + # No special options given in repeat_with + ld = [{} for _ in range(len(request.input_files))] + return zip(ld, request.specific_dep_files, request.input_files, request.output_files) + + +def format_single_request_command(request, cmd_template, common_vars): + return cmd_template.format( + ARGS = request.args.format( + INPUT_FILES = [file.filename for file in request.input_files], + OUTPUT_FILES = [file.filename for file in request.output_files], + **concat_dicts(common_vars, request.format_with) + ) + ) + + +def format_repeated_request_command(request, cmd_template, loop_vars, common_vars): + (iter_vars, _, input_file, output_file) = loop_vars + return cmd_template.format( + ARGS = request.args.format( + INPUT_FILE = input_file.filename, + OUTPUT_FILE = output_file.filename, + **concat_dicts(common_vars, request.format_with, iter_vars) + ) + ) + + +def flatten_requests(requests, config, common_vars): + result = [] + for request in requests: + result += request.flatten(config, requests, common_vars) + return result + + +def get_all_output_files(requests, include_tmp=False): + files = [] + for request in requests: + files += request.all_output_files() + + # Filter out all files but those in OUT_DIR if necessary. + # It is also easy to filter for uniqueness; do it right now and return. + if not include_tmp: + files = (file for file in files if isinstance(file, OutFile)) + return list(set(files)) + + # Filter for unique values. NOTE: Cannot use set() because we need to accept same filename as + # OutFile and TmpFile as different, and by default they evaluate as equal. + return [f for _, f in set((type(f), f) for f in files)] + + +def compute_directories(requests): + dirs = set() + for file in get_all_output_files(requests, include_tmp=True): + path = "%s/%s" % (dir_for(file), file.filename) + dirs.add(path[:path.rfind("/")]) + return list(sorted(dirs)) + + +class SpaceSeparatedList(list): + """A list that joins itself with spaces when converted to a string.""" + def __str__(self): + return " ".join(self) |