summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/python/icutools/databuilder
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/icu/source/python/icutools/databuilder
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/icu/source/python/icutools/databuilder')
-rw-r--r--intl/icu/source/python/icutools/databuilder/__init__.py16
-rw-r--r--intl/icu/source/python/icutools/databuilder/__main__.py377
-rw-r--r--intl/icu/source/python/icutools/databuilder/comment_stripper.py51
-rw-r--r--intl/icu/source/python/icutools/databuilder/filtration.py427
-rw-r--r--intl/icu/source/python/icutools/databuilder/filtration_schema.json206
-rw-r--r--intl/icu/source/python/icutools/databuilder/renderers/__init__.py10
-rw-r--r--intl/icu/source/python/icutools/databuilder/renderers/common_exec.py155
-rw-r--r--intl/icu/source/python/icutools/databuilder/renderers/makefile.py245
-rw-r--r--intl/icu/source/python/icutools/databuilder/request_types.py364
-rw-r--r--intl/icu/source/python/icutools/databuilder/test/__init__.py2
-rw-r--r--intl/icu/source/python/icutools/databuilder/test/__main__.py14
-rw-r--r--intl/icu/source/python/icutools/databuilder/test/filtration_test.py421
-rw-r--r--intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json10
-rw-r--r--intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json197
-rw-r--r--intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json36
-rw-r--r--intl/icu/source/python/icutools/databuilder/utils.py143
16 files changed, 2674 insertions, 0 deletions
diff --git a/intl/icu/source/python/icutools/databuilder/__init__.py b/intl/icu/source/python/icutools/databuilder/__init__.py
new file mode 100644
index 0000000000..be936166e7
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+from collections import namedtuple
+
+LocalFile = namedtuple("LocalFile", ["dirname", "filename"])
+SrcFile = namedtuple("SrcFile", ["filename"])
+InFile = namedtuple("InFile", ["filename"])
+TmpFile = namedtuple("TmpFile", ["filename"])
+OutFile = namedtuple("OutFile", ["filename"])
+PkgFile = namedtuple("PkgFile", ["filename"])
+
+IcuTool = namedtuple("IcuTool", ["name"])
+SystemTool = namedtuple("SystemTool", ["name"])
+
+DepTarget = namedtuple("DepTarget", ["name"])
diff --git a/intl/icu/source/python/icutools/databuilder/__main__.py b/intl/icu/source/python/icutools/databuilder/__main__.py
new file mode 100644
index 0000000000..b72fa76629
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/__main__.py
@@ -0,0 +1,377 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# Python 2/3 Compatibility (ICU-20299)
+# TODO(ICU-20301): Remove this.
+from __future__ import print_function
+
+import argparse
+import glob as pyglob
+import io as pyio
+import json
+import os
+import sys
+
+from . import *
+from .comment_stripper import CommentStripper
+from .request_types import CopyRequest
+from .renderers import makefile, common_exec
+from . import filtration, utils
+
+flag_parser = argparse.ArgumentParser(
+ description = """Generates rules for building ICU binary data files from text
+and other input files in source control.
+
+Use the --mode option to declare how to execute those rules, either exporting
+the rules to a Makefile or spawning child processes to run them immediately:
+
+ --mode=gnumake prints a Makefile to standard out.
+ --mode=unix-exec spawns child processes in a Unix-like environment.
+ --mode=windows-exec spawns child processes in a Windows-like environment.
+
+Tips for --mode=unix-exec
+=========================
+
+Create two empty directories for out_dir and tmp_dir. They will get filled
+with a lot of intermediate files.
+
+Set LD_LIBRARY_PATH to include the lib directory. e.g., from icu4c/source:
+
+ $ LD_LIBRARY_PATH=lib PYTHONPATH=python python3 -m icutools.databuilder ...
+
+Once icutools.databuilder finishes, you have compiled the data, but you have
+not packaged it into a .dat or .so file. This is done by the separate pkgdata
+tool in bin. Read the docs of pkgdata:
+
+ $ LD_LIBRARY_PATH=lib ./bin/pkgdata --help
+
+Example command line to call pkgdata:
+
+ $ LD_LIBRARY_PATH=lib ./bin/pkgdata -m common -p icudt63l -c \\
+ -O data/icupkg.inc -s $OUTDIR -d $TMPDIR $TMPDIR/icudata.lst
+
+where $OUTDIR and $TMPDIR are your out and tmp directories, respectively.
+The above command will create icudt63l.dat in the tmpdir.
+
+Command-Line Arguments
+======================
+""",
+ formatter_class = argparse.RawDescriptionHelpFormatter
+)
+
+arg_group_required = flag_parser.add_argument_group("required arguments")
+arg_group_required.add_argument(
+ "--mode",
+ help = "What to do with the generated rules.",
+ choices = ["gnumake", "unix-exec", "windows-exec", "bazel-exec"],
+ required = True
+)
+
+flag_parser.add_argument(
+ "--src_dir",
+ help = "Path to data source folder (icu4c/source/data).",
+ default = "."
+)
+flag_parser.add_argument(
+ "--filter_file",
+ metavar = "PATH",
+ help = "Path to an ICU data filter JSON file.",
+ default = None
+)
+flag_parser.add_argument(
+ "--include_uni_core_data",
+ help = "Include the full Unicode core data in the dat file.",
+ default = False,
+ action = "store_true"
+)
+flag_parser.add_argument(
+ "--seqmode",
+ help = "Whether to optimize rules to be run sequentially (fewer threads) or in parallel (many threads). Defaults to 'sequential', which is better for unix-exec and windows-exec modes. 'parallel' is often better for massively parallel build systems.",
+ choices = ["sequential", "parallel"],
+ default = "sequential"
+)
+flag_parser.add_argument(
+ "--verbose",
+ help = "Print more verbose output (default false).",
+ default = False,
+ action = "store_true"
+)
+
+arg_group_exec = flag_parser.add_argument_group("arguments for unix-exec and windows-exec modes")
+arg_group_exec.add_argument(
+ "--out_dir",
+ help = "Path to where to save output data files.",
+ default = "icudata"
+)
+arg_group_exec.add_argument(
+ "--tmp_dir",
+ help = "Path to where to save temporary files.",
+ default = "icutmp"
+)
+arg_group_exec.add_argument(
+ "--tool_dir",
+ help = "Path to where to find binary tools (genrb, etc).",
+ default = "../bin"
+)
+arg_group_exec.add_argument(
+ "--tool_cfg",
+ help = "The build configuration of the tools. Used in 'windows-exec' mode only.",
+ default = "x86/Debug"
+)
+
+
+class Config(object):
+
+ def __init__(self, args):
+ # Process arguments
+ self.max_parallel = (args.seqmode == "parallel")
+
+ # Boolean: Whether to include core Unicode data files in the .dat file
+ self.include_uni_core_data = args.include_uni_core_data
+
+ # Default fields before processing filter file
+ self.filters_json_data = {}
+ self.filter_dir = "ERROR_NO_FILTER_FILE"
+
+ # Process filter file
+ if args.filter_file:
+ try:
+ with open(args.filter_file, "r") as f:
+ print("Note: Applying filters from %s." % args.filter_file, file=sys.stderr)
+ self._parse_filter_file(f)
+ except IOError:
+ print("Error: Could not read filter file %s." % args.filter_file, file=sys.stderr)
+ exit(1)
+ self.filter_dir = os.path.abspath(os.path.dirname(args.filter_file))
+
+ # Either "unihan" or "implicithan"
+ self.coll_han_type = "unihan"
+ if "collationUCAData" in self.filters_json_data:
+ self.coll_han_type = self.filters_json_data["collationUCAData"]
+
+ # Either "additive" or "subtractive"
+ self.strategy = "subtractive"
+ if "strategy" in self.filters_json_data:
+ self.strategy = self.filters_json_data["strategy"]
+
+ # True or False (could be extended later to support enum/list)
+ self.use_pool_bundle = True
+ if "usePoolBundle" in self.filters_json_data:
+ self.use_pool_bundle = self.filters_json_data["usePoolBundle"]
+
+ # By default, exclude collation data that mimics the order of some large legacy charsets.
+ # We do this in "subtractive" strategy by inserting a resourceFilter.
+ # Later rules from an explicit filter file may override this default behavior.
+ # (In "additive" strategy this is unnecessary.)
+ if self.strategy == "subtractive":
+ filters = self.filters_json_data.setdefault("resourceFilters", [])
+ omit_charset_collations = {
+ "categories": [
+ "coll_tree"
+ ],
+ "rules": [
+ "-/collations/big5han",
+ "-/collations/gb2312han"
+ ]
+ }
+ filters.insert(0, omit_charset_collations)
+
+ def _parse_filter_file(self, f):
+ # Use the Hjson parser if it is available; otherwise, use vanilla JSON.
+ try:
+ import hjson
+ self.filters_json_data = hjson.load(f)
+ except ImportError:
+ self.filters_json_data = json.load(CommentStripper(f))
+
+ # Optionally pre-validate the JSON schema before further processing.
+ # Some schema errors will be caught later, but this step ensures
+ # maximal validity.
+ try:
+ import jsonschema
+ schema_path = os.path.join(os.path.dirname(__file__), "filtration_schema.json")
+ with open(schema_path) as schema_f:
+ schema = json.load(CommentStripper(schema_f))
+ validator = jsonschema.Draft4Validator(schema)
+ for error in validator.iter_errors(self.filters_json_data, schema):
+ print("WARNING: ICU data filter JSON file:", error.message,
+ "at", "".join(
+ "[%d]" % part if isinstance(part, int) else ".%s" % part
+ for part in error.absolute_path
+ ),
+ file=sys.stderr)
+ except ImportError:
+ print("Tip: to validate your filter file, install the Pip package 'jsonschema'", file=sys.stderr)
+ pass
+
+
+def add_copy_input_requests(requests, config, common_vars):
+ files_to_copy = set()
+ for request in requests:
+ request_files = request.all_input_files()
+ # Also add known dependency txt files as possible inputs.
+ # This is required for translit rule files.
+ if hasattr(request, "dep_targets"):
+ request_files += [
+ f for f in request.dep_targets if isinstance(f, InFile)
+ ]
+ for f in request_files:
+ if isinstance(f, InFile):
+ files_to_copy.add(f)
+
+ result = []
+ id = 0
+
+ json_data = config.filters_json_data["fileReplacements"]
+ dirname = json_data["directory"]
+ for directive in json_data["replacements"]:
+ if type(directive) == str:
+ input_file = LocalFile(dirname, directive)
+ output_file = InFile(directive)
+ else:
+ input_file = LocalFile(dirname, directive["src"])
+ output_file = InFile(directive["dest"])
+ result += [
+ CopyRequest(
+ name = "input_copy_%d" % id,
+ input_file = input_file,
+ output_file = output_file
+ )
+ ]
+ files_to_copy.remove(output_file)
+ id += 1
+
+ for f in files_to_copy:
+ result += [
+ CopyRequest(
+ name = "input_copy_%d" % id,
+ input_file = SrcFile(f.filename),
+ output_file = f
+ )
+ ]
+ id += 1
+
+ result += requests
+ return result
+
+
+class IO(object):
+ """I/O operations required when computing the build actions"""
+
+ def __init__(self, src_dir):
+ self.src_dir = src_dir
+
+ def glob(self, pattern):
+ absolute_paths = pyglob.glob(os.path.join(self.src_dir, pattern))
+ # Strip off the absolute path suffix so we are left with a relative path.
+ relative_paths = [v[len(self.src_dir)+1:] for v in sorted(absolute_paths)]
+ # For the purposes of icutools.databuilder, force Unix-style directory separators.
+ # Within the Python code, including BUILDRULES.py and user-provided config files,
+ # directory separators are normalized to '/', including on Windows platforms.
+ return [v.replace("\\", "/") for v in relative_paths]
+
+ def read_locale_deps(self, tree):
+ return self._read_json("%s/LOCALE_DEPS.json" % tree)
+
+ def _read_json(self, filename):
+ with pyio.open(os.path.join(self.src_dir, filename), "r", encoding="utf-8-sig") as f:
+ return json.load(CommentStripper(f))
+
+
+def main(argv):
+ args = flag_parser.parse_args(argv)
+ config = Config(args)
+
+ if args.mode == "gnumake":
+ makefile_vars = {
+ "SRC_DIR": "$(srcdir)",
+ "IN_DIR": "$(srcdir)",
+ "INDEX_NAME": "res_index"
+ }
+ makefile_env = ["ICUDATA_CHAR", "OUT_DIR", "TMP_DIR"]
+ common = {
+ key: "$(%s)" % key
+ for key in list(makefile_vars.keys()) + makefile_env
+ }
+ common["FILTERS_DIR"] = config.filter_dir
+ common["CWD_DIR"] = os.getcwd()
+ else:
+ makefile_vars = None
+ common = {
+ "SRC_DIR": args.src_dir,
+ "IN_DIR": args.src_dir,
+ "OUT_DIR": args.out_dir,
+ "TMP_DIR": args.tmp_dir,
+ "FILTERS_DIR": config.filter_dir,
+ "CWD_DIR": os.getcwd(),
+ "INDEX_NAME": "res_index",
+ # TODO: Pull this from configure script:
+ "ICUDATA_CHAR": "l"
+ }
+
+ # Automatically load BUILDRULES from the src_dir
+ sys.path.append(args.src_dir)
+ try:
+ import BUILDRULES
+ except ImportError:
+ print("Cannot find BUILDRULES! Did you set your --src_dir?", file=sys.stderr)
+ sys.exit(1)
+
+ io = IO(args.src_dir)
+ requests = BUILDRULES.generate(config, io, common)
+
+ if "fileReplacements" in config.filters_json_data:
+ tmp_in_dir = "{TMP_DIR}/in".format(**common)
+ if makefile_vars:
+ makefile_vars["IN_DIR"] = tmp_in_dir
+ else:
+ common["IN_DIR"] = tmp_in_dir
+ requests = add_copy_input_requests(requests, config, common)
+
+ requests = filtration.apply_filters(requests, config, io)
+ requests = utils.flatten_requests(requests, config, common)
+
+ build_dirs = utils.compute_directories(requests)
+
+ if args.mode == "gnumake":
+ print(makefile.get_gnumake_rules(
+ build_dirs,
+ requests,
+ makefile_vars,
+ common_vars = common
+ ))
+ elif args.mode == "windows-exec":
+ return common_exec.run(
+ platform = "windows",
+ build_dirs = build_dirs,
+ requests = requests,
+ common_vars = common,
+ tool_dir = args.tool_dir,
+ tool_cfg = args.tool_cfg,
+ verbose = args.verbose,
+ )
+ elif args.mode == "unix-exec":
+ return common_exec.run(
+ platform = "unix",
+ build_dirs = build_dirs,
+ requests = requests,
+ common_vars = common,
+ tool_dir = args.tool_dir,
+ verbose = args.verbose,
+ )
+ elif args.mode == "bazel-exec":
+ return common_exec.run(
+ platform = "bazel",
+ build_dirs = build_dirs,
+ requests = requests,
+ common_vars = common,
+ tool_dir = args.tool_dir,
+ verbose = args.verbose,
+ )
+ else:
+ print("Mode not supported: %s" % args.mode)
+ return 1
+ return 0
+
+if __name__ == "__main__":
+ exit(main(sys.argv[1:]))
diff --git a/intl/icu/source/python/icutools/databuilder/comment_stripper.py b/intl/icu/source/python/icutools/databuilder/comment_stripper.py
new file mode 100644
index 0000000000..4001f2f675
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/comment_stripper.py
@@ -0,0 +1,51 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import io
+
+class CommentStripper(object):
+ """Removes lines starting with "//" from a file stream."""
+
+ def __init__(self, f):
+ self.f = f
+ self.state = 0
+
+ def read(self, size=-1):
+ bytes = self.f.read(size)
+ # TODO: Do we need to read more bytes if comments were stripped
+ # in order to obey the size request?
+ return "".join(self._strip_comments(bytes))
+
+ def _strip_comments(self, bytes):
+ for byte in bytes:
+ if self.state == 0:
+ # state 0: start of a line
+ if byte == "/":
+ self.state = 1
+ elif byte == "\n":
+ self.state = 0
+ yield byte
+ else:
+ self.state = 2
+ yield byte
+ elif self.state == 1:
+ # state 1: read a single '/'
+ if byte == "/":
+ self.state = 3
+ elif byte == "\n":
+ self.state = 0
+ yield "/" # the one that was skipped
+ yield "\n"
+ else:
+ self.state = 2
+ yield "/" # the one that was skipped
+ yield byte
+ elif self.state == 2:
+ # state 2: middle of a line, no comment
+ if byte == "\n":
+ self.state = 0
+ yield byte
+ elif self.state == 3:
+ # state 3: inside a comment
+ if byte == "\n":
+ self.state = 0
diff --git a/intl/icu/source/python/icutools/databuilder/filtration.py b/intl/icu/source/python/icutools/databuilder/filtration.py
new file mode 100644
index 0000000000..e9339a0895
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/filtration.py
@@ -0,0 +1,427 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# Python 2/3 Compatibility (ICU-20299)
+# TODO(ICU-20301): Remove this.
+from __future__ import print_function
+
+from abc import abstractmethod
+from collections import defaultdict
+import re
+import sys
+
+from . import *
+from . import utils
+from .request_types import *
+
+
+# Note: for this to be a proper abstract class, it should extend abc.ABC.
+# There is no nice way to do this that works in both Python 2 and 3.
+# TODO(ICU-20301): Make this inherit from abc.ABC.
+class Filter(object):
+ @staticmethod
+ def create_from_json(json_data, io):
+ assert io != None
+ if "filterType" in json_data:
+ filter_type = json_data["filterType"]
+ else:
+ filter_type = "file-stem"
+
+ if filter_type == "file-stem":
+ return FileStemFilter(json_data)
+ elif filter_type == "language":
+ return LanguageFilter(json_data)
+ elif filter_type == "regex":
+ return RegexFilter(json_data)
+ elif filter_type == "exclude":
+ return ExclusionFilter()
+ elif filter_type == "union":
+ return UnionFilter(json_data, io)
+ elif filter_type == "locale":
+ return LocaleFilter(json_data, io)
+ else:
+ print("Error: Unknown filterType option: %s" % filter_type, file=sys.stderr)
+ return None
+
+ def filter(self, request):
+ if not request.apply_file_filter(self):
+ return []
+ for file in request.all_input_files():
+ assert self.match(file)
+ return [request]
+
+ @staticmethod
+ def _file_to_file_stem(file):
+ start = file.filename.rfind("/")
+ limit = file.filename.rfind(".")
+ return file.filename[start+1:limit]
+
+ @staticmethod
+ def _file_to_subdir(file):
+ limit = file.filename.rfind("/")
+ if limit == -1:
+ return None
+ return file.filename[:limit]
+
+ @abstractmethod
+ def match(self, file):
+ pass
+
+
+class InclusionFilter(Filter):
+ def match(self, file):
+ return True
+
+
+class ExclusionFilter(Filter):
+ def match(self, file):
+ return False
+
+
+class IncludeExcludeFilter(Filter):
+ def __init__(self, json_data):
+ if "whitelist" in json_data:
+ self.is_includelist = True
+ self.includelist = json_data["whitelist"]
+ elif "includelist" in json_data:
+ self.is_includelist = True
+ self.includelist = json_data["includelist"]
+ elif "blacklist" in json_data:
+ self.is_includelist = False
+ self.excludelist = json_data["blacklist"]
+ elif "excludelist" in json_data:
+ self.is_includelist = False
+ self.excludelist = json_data["excludelist"]
+ else:
+ raise AssertionError("Need either includelist or excludelist: %s" % str(json_data))
+
+ def match(self, file):
+ file_stem = self._file_to_file_stem(file)
+ return self._should_include(file_stem)
+
+ @abstractmethod
+ def _should_include(self, file_stem):
+ pass
+
+
+class FileStemFilter(IncludeExcludeFilter):
+ def _should_include(self, file_stem):
+ if self.is_includelist:
+ return file_stem in self.includelist
+ else:
+ return file_stem not in self.excludelist
+
+
+class LanguageFilter(IncludeExcludeFilter):
+ def _should_include(self, file_stem):
+ language = file_stem.split("_")[0]
+ if language == "root":
+ # Always include root.txt
+ return True
+ if self.is_includelist:
+ return language in self.includelist
+ else:
+ return language not in self.excludelist
+
+
+class RegexFilter(IncludeExcludeFilter):
+ def __init__(self, *args):
+ # TODO(ICU-20301): Change this to: super().__init__(*args)
+ super(RegexFilter, self).__init__(*args)
+ if self.is_includelist:
+ self.includelist = [re.compile(pat) for pat in self.includelist]
+ else:
+ self.excludelist = [re.compile(pat) for pat in self.excludelist]
+
+ def _should_include(self, file_stem):
+ if self.is_includelist:
+ for pattern in self.includelist:
+ if pattern.match(file_stem):
+ return True
+ return False
+ else:
+ for pattern in self.excludelist:
+ if pattern.match(file_stem):
+ return False
+ return True
+
+
+class UnionFilter(Filter):
+ def __init__(self, json_data, io):
+ # Collect the sub-filters.
+ self.sub_filters = []
+ for filter_json in json_data["unionOf"]:
+ self.sub_filters.append(Filter.create_from_json(filter_json, io))
+
+ def match(self, file):
+ """Match iff any of the sub-filters match."""
+ for filter in self.sub_filters:
+ if filter.match(file):
+ return True
+ return False
+
+
+LANGUAGE_SCRIPT_REGEX = re.compile(r"^([a-z]{2,3})_[A-Z][a-z]{3}$")
+LANGUAGE_ONLY_REGEX = re.compile(r"^[a-z]{2,3}$")
+
+class LocaleFilter(Filter):
+ def __init__(self, json_data, io):
+ if "whitelist" in json_data:
+ self.locales_requested = list(json_data["whitelist"])
+ elif "includelist" in json_data:
+ self.locales_requested = list(json_data["includelist"])
+ else:
+ raise AssertionError("You must have an includelist in a locale filter")
+ self.include_children = json_data.get("includeChildren", True)
+ self.include_scripts = json_data.get("includeScripts", False)
+
+ # Load the dependency graph from disk
+ self.dependency_data_by_tree = {
+ tree: io.read_locale_deps(tree)
+ for tree in utils.ALL_TREES
+ }
+
+ def match(self, file):
+ tree = self._file_to_subdir(file)
+ assert tree is not None
+ locale = self._file_to_file_stem(file)
+
+ # A locale is *required* if it is *requested* or an ancestor of a
+ # *requested* locale.
+ if locale in self._locales_required(tree):
+ return True
+
+ # Resolve include_scripts and include_children.
+ return self._match_recursive(locale, tree)
+
+ def _match_recursive(self, locale, tree):
+ # Base case: return True if we reached a *requested* locale,
+ # or False if we ascend out of the locale tree.
+ if locale is None:
+ return False
+ if locale in self.locales_requested:
+ return True
+
+ # Check for alternative scripts.
+ # This causes sr_Latn to check sr instead of going directly to root.
+ if self.include_scripts:
+ match = LANGUAGE_SCRIPT_REGEX.match(locale)
+ if match and self._match_recursive(match.group(1), tree):
+ return True
+
+ # Check if we are a descendant of a *requested* locale.
+ if self.include_children:
+ parent = self._get_parent_locale(locale, tree)
+ if self._match_recursive(parent, tree):
+ return True
+
+ # No matches.
+ return False
+
+ def _get_parent_locale(self, locale, tree):
+ """Gets the parent locale in the given tree, according to dependency data."""
+ dependency_data = self.dependency_data_by_tree[tree]
+ if "parents" in dependency_data and locale in dependency_data["parents"]:
+ return dependency_data["parents"][locale]
+ if "aliases" in dependency_data and locale in dependency_data["aliases"]:
+ return dependency_data["aliases"][locale]
+ if LANGUAGE_ONLY_REGEX.match(locale):
+ return "root"
+ i = locale.rfind("_")
+ if i < 0:
+ assert locale == "root", "Invalid locale: %s/%s" % (tree, locale)
+ return None
+ return locale[:i]
+
+ def _locales_required(self, tree):
+ """Returns a generator of all required locales in the given tree."""
+ for locale in self.locales_requested:
+ while locale is not None:
+ yield locale
+ locale = self._get_parent_locale(locale, tree)
+
+
+def apply_filters(requests, config, io):
+ """Runs the filters and returns a new list of requests."""
+ requests = _apply_file_filters(requests, config, io)
+ requests = _apply_resource_filters(requests, config, io)
+ return requests
+
+
+def _apply_file_filters(old_requests, config, io):
+ """Filters out entire files."""
+ filters = _preprocess_file_filters(old_requests, config, io)
+ new_requests = []
+ for request in old_requests:
+ category = request.category
+ if category in filters:
+ new_requests += filters[category].filter(request)
+ else:
+ new_requests.append(request)
+ return new_requests
+
+
+def _preprocess_file_filters(requests, config, io):
+ all_categories = set(
+ request.category
+ for request in requests
+ )
+ all_categories.remove(None)
+ all_categories = list(sorted(all_categories))
+ json_data = config.filters_json_data
+ filters = {}
+ default_filter_json = "exclude" if config.strategy == "additive" else "include"
+ for category in all_categories:
+ filter_json = default_filter_json
+ # Special default for category "brkitr_lstm" and "brkitr_adaboost" as "exclude" for now.
+ if "brkitr_lstm" == category or "brkitr_adaboost" == category:
+ filter_json = "exclude"
+ # Figure out the correct filter to create for now.
+ if "featureFilters" in json_data and category in json_data["featureFilters"]:
+ filter_json = json_data["featureFilters"][category]
+ if filter_json == "include" and "localeFilter" in json_data and category.endswith("_tree"):
+ filter_json = json_data["localeFilter"]
+ # Resolve the filter JSON into a filter object
+ if filter_json == "exclude":
+ filters[category] = ExclusionFilter()
+ elif filter_json == "include":
+ pass # no-op
+ else:
+ filters[category] = Filter.create_from_json(filter_json, io)
+ if "featureFilters" in json_data:
+ for category in json_data["featureFilters"]:
+ if category not in all_categories:
+ print("Warning: category %s is not known" % category, file=sys.stderr)
+ return filters
+
+
+class ResourceFilterInfo(object):
+ def __init__(self, category, strategy):
+ self.category = category
+ self.strategy = strategy
+ self.filter_tmp_dir = "filters/%s" % category
+ self.input_files = None
+ self.filter_files = None
+ self.rules_by_file = None
+
+ def apply_to_requests(self, all_requests):
+ # Call this method only once per list of requests.
+ assert self.input_files is None
+ for request in all_requests:
+ if request.category != self.category:
+ continue
+ if not isinstance(request, AbstractExecutionRequest):
+ continue
+ if request.tool != IcuTool("genrb"):
+ continue
+ if not request.input_files:
+ continue
+ self._set_files(request.input_files)
+ request.dep_targets += [self.filter_files[:]]
+ arg_str = "--filterDir {TMP_DIR}/%s" % self.filter_tmp_dir
+ request.args = "%s %s" % (arg_str, request.args)
+
+ # Make sure we found the target request
+ if self.input_files is None:
+ print("WARNING: Category not found: %s" % self.category, file=sys.stderr)
+ self.input_files = []
+ self.filter_files = []
+ self.rules_by_file = []
+
+ def _set_files(self, files):
+ # Note: The input files to genrb for a certain category should always
+ # be the same. For example, there are often two genrb calls: one for
+ # --writePoolBundle, and the other for --usePoolBundle. They are both
+ # expected to have the same list of input files.
+ if self.input_files is not None:
+ assert self.input_files == files
+ return
+ self.input_files = list(files)
+ self.filter_files = [
+ TmpFile("%s/%s" % (self.filter_tmp_dir, basename))
+ for basename in (
+ file.filename[file.filename.rfind("/")+1:]
+ for file in files
+ )
+ ]
+ if self.strategy == "additive":
+ self.rules_by_file = [
+ [r"-/", r"+/%%ALIAS", r"+/%%Parent"]
+ for _ in range(len(files))
+ ]
+ else:
+ self.rules_by_file = [
+ [r"+/"]
+ for _ in range(len(files))
+ ]
+
+ def add_rules(self, file_filter, rules):
+ for file, rule_list in zip(self.input_files, self.rules_by_file):
+ if file_filter.match(file):
+ rule_list += rules
+
+ def make_requests(self):
+ # Map from rule list to filter files with that rule list
+ unique_rules = defaultdict(list)
+ for filter_file, rules in zip(self.filter_files, self.rules_by_file):
+ unique_rules[tuple(rules)].append(filter_file)
+
+ new_requests = []
+ i = 0
+ for rules, filter_files in unique_rules.items():
+ base_filter_file = filter_files[0]
+ new_requests += [
+ PrintFileRequest(
+ name = "%s_print_%d" % (self.category, i),
+ output_file = base_filter_file,
+ content = self._generate_resource_filter_txt(rules)
+ )
+ ]
+ i += 1
+ for filter_file in filter_files[1:]:
+ new_requests += [
+ CopyRequest(
+ name = "%s_copy_%d" % (self.category, i),
+ input_file = base_filter_file,
+ output_file = filter_file
+ )
+ ]
+ i += 1
+ return new_requests
+
+ @staticmethod
+ def _generate_resource_filter_txt(rules):
+ result = "# Caution: This file is automatically generated\n\n"
+ result += "\n".join(rules)
+ return result
+
+
+def _apply_resource_filters(all_requests, config, io):
+ """Creates filters for looking within resource bundle files."""
+ json_data = config.filters_json_data
+ if "resourceFilters" not in json_data:
+ return all_requests
+
+ collected = {}
+ for entry in json_data["resourceFilters"]:
+ if "files" in entry:
+ file_filter = Filter.create_from_json(entry["files"], io)
+ else:
+ file_filter = InclusionFilter()
+ for category in entry["categories"]:
+ # not defaultdict because we need to pass arguments to the constructor
+ if category not in collected:
+ filter_info = ResourceFilterInfo(category, config.strategy)
+ filter_info.apply_to_requests(all_requests)
+ collected[category] = filter_info
+ else:
+ filter_info = collected[category]
+ filter_info.add_rules(file_filter, entry["rules"])
+
+ # Add the filter generation requests to the beginning so that by default
+ # they are made before genrb gets run (order is required by windirect)
+ new_requests = []
+ for filter_info in collected.values():
+ new_requests += filter_info.make_requests()
+ new_requests += all_requests
+ return new_requests
diff --git a/intl/icu/source/python/icutools/databuilder/filtration_schema.json b/intl/icu/source/python/icutools/databuilder/filtration_schema.json
new file mode 100644
index 0000000000..3aed41a334
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/filtration_schema.json
@@ -0,0 +1,206 @@
+// Copyright (C) 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+{
+ "$id": "http://unicode.org/icu-filter-schema",
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "description": "JSON Schema for an ICU data filter file",
+ "type": "object",
+ "properties": {
+ "strategy": {
+ "type": "string",
+ "enum": ["additive", "subtractive"]
+ },
+ "localeFilter": { "$ref": "#/definitions/filter" },
+ "featureFilters": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ { "$ref": "#/definitions/filter" },
+ {
+ "type": "string",
+ "enum": ["include", "exclude"]
+ }
+ ]
+ }
+ },
+ "resourceFilters": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "categories": {
+ "type": "array",
+ "items": { "type": "string" }
+ },
+ "files": { "$ref": "#/definitions/filter" },
+ "rules": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "pattern": "^[+-]/[\\S]*$"
+ }
+ }
+ },
+ "required": ["categories", "rules"],
+ "additionalProperties": false
+ }
+ },
+ "fileReplacements": {
+ "type": "object",
+ "properties": {
+ "directory": {
+ "type": "string",
+ "pattern": "^(\\$SRC|\\$FILTERS|\\$CWD|/$|/[^/]+)(/[^/]+)*$"
+ },
+ "replacements": {
+ "type": "array",
+ "items": {
+ "oneOf": [
+ { "type": "string" },
+ {
+ "type": "object",
+ "properties": {
+ "src": { "type": "string" },
+ "dest": { "type": "string" }
+ },
+ "additionalProperties": false,
+ "required": ["src", "dest"]
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": ["directory", "replacements"]
+ },
+ "collationUCAData": {
+ "type": "string",
+ "enum": ["unihan", "implicithan"]
+ },
+ "usePoolBundle": {
+ "type": "boolean"
+ }
+ },
+ "additionalProperties": false,
+ "definitions": {
+ "filter": {
+ "type": "object",
+ "oneOf": [
+ {
+ "properties": {
+ "filterType": {
+ "$ref": "#/definitions/includeExcludeFilterTypes"
+ },
+ "whitelist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["whitelist"],
+ "additionalProperties": false
+ },
+ {
+ "properties": {
+ "filterType": {
+ "$ref": "#/definitions/includeExcludeFilterTypes"
+ },
+ "blacklist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["blacklist"],
+ "additionalProperties": false
+ },
+ {
+ "properties": {
+ "filterType": {
+ "$ref": "#/definitions/includeExcludeFilterTypes"
+ },
+ "includelist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["includelist"],
+ "additionalProperties": false
+ },
+ {
+ "properties": {
+ "filterType": {
+ "$ref": "#/definitions/includeExcludeFilterTypes"
+ },
+ "excludelist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["excludelist"],
+ "additionalProperties": false
+ },
+ {
+ "properties": {
+ "filterType": {
+ "type": "string",
+ "enum": ["exclude"]
+ }
+ },
+ "required": ["filterType"],
+ "additionalProperties": false
+ },
+ {
+ "properties": {
+ "filterType": {
+ "type": "string",
+ "enum": ["locale"]
+ },
+ "includeChildren": {
+ "type": "boolean"
+ },
+ "includeScripts": {
+ "type": "boolean"
+ },
+ "whitelist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["filterType", "whitelist"],
+ "additionalProperties": false
+ },
+ {
+ "properties": {
+ "filterType": {
+ "type": "string",
+ "enum": ["locale"]
+ },
+ "includeChildren": {
+ "type": "boolean"
+ },
+ "includeScripts": {
+ "type": "boolean"
+ },
+ "includelist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["filterType", "includelist"],
+ "additionalProperties": false
+ },
+ {
+ "properties": {
+ "filterType": {
+ "type": "string",
+ "enum": ["union"]
+ },
+ "unionOf": {
+ "type": "array",
+ "items": { "$ref": "#/definitions/filter" }
+ }
+ },
+ "required": ["filterType", "unionOf"],
+ "additionalProperties": false
+ }
+ ]
+ },
+ "includeExcludeFilterTypes": {
+ "type": "string",
+ "enum": [
+ "language",
+ "regex"
+ ]
+ },
+ "stringList": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "minItems": 1,
+ "uniqueItems": true
+ }
+ }
+}
diff --git a/intl/icu/source/python/icutools/databuilder/renderers/__init__.py b/intl/icu/source/python/icutools/databuilder/renderers/__init__.py
new file mode 100644
index 0000000000..7c402c2b78
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/renderers/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+from collections import namedtuple
+
+MakeRule = namedtuple("MakeRule", ["name", "dep_literals", "dep_files", "output_file", "cmds"])
+
+MakeFilesVar = namedtuple("MakeFilesVar", ["name", "files"])
+
+MakeStringVar = namedtuple("MakeStringVar", ["name", "content"])
diff --git a/intl/icu/source/python/icutools/databuilder/renderers/common_exec.py b/intl/icu/source/python/icutools/databuilder/renderers/common_exec.py
new file mode 100644
index 0000000000..91c12fdcf6
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/renderers/common_exec.py
@@ -0,0 +1,155 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# Python 2/3 Compatibility (ICU-20299)
+# TODO(ICU-20301): Remove this.
+from __future__ import print_function
+
+from . import *
+from .. import *
+from .. import utils
+from ..request_types import *
+
+import os
+import shutil
+import subprocess
+import sys
+
+def run(build_dirs, requests, common_vars, verbose=True, **kwargs):
+ for bd in build_dirs:
+ makedirs(bd.format(**common_vars))
+ for request in requests:
+ status = run_helper(request, common_vars, verbose=verbose, **kwargs)
+ if status != 0:
+ print("!!! ERROR executing above command line: exit code %d" % status)
+ return 1
+ if verbose:
+ print("All data build commands executed")
+ return 0
+
+def makedirs(dirs):
+ """makedirs compatible between Python 2 and 3"""
+ try:
+ # Python 3 version
+ os.makedirs(dirs, exist_ok=True)
+ except TypeError as e:
+ # Python 2 version
+ try:
+ os.makedirs(dirs)
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise e
+
+def run_helper(request, common_vars, platform, tool_dir, verbose, tool_cfg=None, **kwargs):
+ if isinstance(request, PrintFileRequest):
+ output_path = "{DIRNAME}/{FILENAME}".format(
+ DIRNAME = utils.dir_for(request.output_file).format(**common_vars),
+ FILENAME = request.output_file.filename,
+ )
+ if verbose:
+ print("Printing to file: %s" % output_path)
+ with open(output_path, "w") as f:
+ f.write(request.content)
+ return 0
+ if isinstance(request, CopyRequest):
+ input_path = "{DIRNAME}/{FILENAME}".format(
+ DIRNAME = utils.dir_for(request.input_file).format(**common_vars),
+ FILENAME = request.input_file.filename,
+ )
+ output_path = "{DIRNAME}/{FILENAME}".format(
+ DIRNAME = utils.dir_for(request.output_file).format(**common_vars),
+ FILENAME = request.output_file.filename,
+ )
+ if verbose:
+ print("Copying file to: %s" % output_path)
+ shutil.copyfile(input_path, output_path)
+ return 0
+ if isinstance(request, VariableRequest):
+ # No-op
+ return 0
+
+ assert isinstance(request.tool, IcuTool)
+ if platform == "windows":
+ cmd_template = "{TOOL_DIR}/{TOOL}/{TOOL_CFG}/{TOOL}.exe {{ARGS}}".format(
+ TOOL_DIR = tool_dir,
+ TOOL_CFG = tool_cfg,
+ TOOL = request.tool.name,
+ **common_vars
+ )
+ elif platform == "unix":
+ cmd_template = "{TOOL_DIR}/{TOOL} {{ARGS}}".format(
+ TOOL_DIR = tool_dir,
+ TOOL = request.tool.name,
+ **common_vars
+ )
+ elif platform == "bazel":
+ cmd_template = "{TOOL_DIR}/{TOOL}/{TOOL} {{ARGS}}".format(
+ TOOL_DIR = tool_dir,
+ TOOL = request.tool.name,
+ **common_vars
+ )
+ else:
+ raise ValueError("Unknown platform: %s" % platform)
+
+ if isinstance(request, RepeatedExecutionRequest):
+ for loop_vars in utils.repeated_execution_request_looper(request):
+ command_line = utils.format_repeated_request_command(
+ request,
+ cmd_template,
+ loop_vars,
+ common_vars
+ )
+ if platform == "windows":
+ # Note: this / to \ substitution may be too aggressive?
+ command_line = command_line.replace("/", "\\")
+ returncode = run_shell_command(command_line, platform, verbose)
+ if returncode != 0:
+ return returncode
+ return 0
+ if isinstance(request, SingleExecutionRequest):
+ command_line = utils.format_single_request_command(
+ request,
+ cmd_template,
+ common_vars
+ )
+ if platform == "windows":
+ # Note: this / to \ substitution may be too aggressive?
+ command_line = command_line.replace("/", "\\")
+ returncode = run_shell_command(command_line, platform, verbose)
+ return returncode
+ assert False
+
+def run_shell_command(command_line, platform, verbose):
+ changed_windows_comspec = False
+ # If the command line length on Windows exceeds the absolute maximum that CMD supports (8191), then
+ # we temporarily switch over to use PowerShell for the command, and then switch back to CMD.
+ # We don't want to use PowerShell for everything though, as it tends to be slower.
+ if (platform == "windows"):
+ previous_comspec = os.environ["COMSPEC"]
+ # Add 7 to the length for the argument /c with quotes.
+ # For example: C:\WINDOWS\system32\cmd.exe /c "<command_line>"
+ if ((len(previous_comspec) + len(command_line) + 7) > 8190):
+ if verbose:
+ print("Command length exceeds the max length for CMD on Windows, using PowerShell instead.")
+ os.environ["COMSPEC"] = 'powershell'
+ changed_windows_comspec = True
+ if verbose:
+ print("Running: %s" % command_line)
+ returncode = subprocess.call(
+ command_line,
+ shell = True
+ )
+ else:
+ # Pipe output to /dev/null in quiet mode
+ with open(os.devnull, "w") as devnull:
+ returncode = subprocess.call(
+ command_line,
+ shell = True,
+ stdout = devnull,
+ stderr = devnull
+ )
+ if changed_windows_comspec:
+ os.environ["COMSPEC"] = previous_comspec
+ if returncode != 0:
+ print("Command failed: %s" % command_line, file=sys.stderr)
+ return returncode
diff --git a/intl/icu/source/python/icutools/databuilder/renderers/makefile.py b/intl/icu/source/python/icutools/databuilder/renderers/makefile.py
new file mode 100644
index 0000000000..9b2005b07d
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/renderers/makefile.py
@@ -0,0 +1,245 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# Python 2/3 Compatibility (ICU-20299)
+# TODO(ICU-20301): Remove this.
+from __future__ import print_function
+
+from . import *
+from .. import *
+from .. import utils
+from ..request_types import *
+
+def get_gnumake_rules(build_dirs, requests, makefile_vars, **kwargs):
+ makefile_string = ""
+
+ # Common Variables
+ common_vars = kwargs["common_vars"]
+ for key, value in sorted(makefile_vars.items()):
+ makefile_string += "{KEY} = {VALUE}\n".format(
+ KEY = key,
+ VALUE = value
+ )
+ makefile_string += "\n"
+
+ # Directories
+ dirs_timestamp_file = "{TMP_DIR}/dirs.timestamp".format(**common_vars)
+ makefile_string += "DIRS = {TIMESTAMP_FILE}\n\n".format(
+ TIMESTAMP_FILE = dirs_timestamp_file
+ )
+ makefile_string += "{TIMESTAMP_FILE}:\n\t$(MKINSTALLDIRS) {ALL_DIRS}\n\techo timestamp > {TIMESTAMP_FILE}\n\n".format(
+ TIMESTAMP_FILE = dirs_timestamp_file,
+ ALL_DIRS = " ".join(build_dirs).format(**common_vars)
+ )
+
+ # Generate Rules
+ make_rules = []
+ for request in requests:
+ make_rules += get_gnumake_rules_helper(request, **kwargs)
+
+ # Main Commands
+ for rule in make_rules:
+ if isinstance(rule, MakeFilesVar):
+ makefile_string += "{NAME} = {FILE_LIST}\n\n".format(
+ NAME = rule.name,
+ FILE_LIST = files_to_makefile(rule.files, wrap = True, **kwargs),
+ )
+ continue
+
+ if isinstance(rule, MakeStringVar):
+ makefile_string += "define {NAME}\n{CONTENT}\nendef\nexport {NAME}\n\n".format(
+ NAME = rule.name,
+ CONTENT = rule.content
+ )
+ continue
+
+ assert isinstance(rule, MakeRule)
+ header_line = "{OUT_FILE}: {DEP_FILES} {DEP_LITERALS} | $(DIRS)".format(
+ OUT_FILE = files_to_makefile([rule.output_file], **kwargs),
+ DEP_FILES = files_to_makefile(rule.dep_files, wrap = True, **kwargs),
+ DEP_LITERALS = " ".join(rule.dep_literals)
+ )
+
+ if len(rule.cmds) == 0:
+ makefile_string += "%s\n\n" % header_line
+ continue
+
+ makefile_string += "{HEADER_LINE}\n{RULE_LINES}\n\n".format(
+ HEADER_LINE = header_line,
+ RULE_LINES = "\n".join("\t%s" % cmd for cmd in rule.cmds)
+ )
+
+ return makefile_string
+
+def files_to_makefile(files, common_vars, wrap = False, **kwargs):
+ if len(files) == 0:
+ return ""
+ dirnames = [utils.dir_for(file).format(**common_vars) for file in files]
+ join_str = " \\\n\t\t" if wrap and len(files) > 2 else " "
+ if len(files) == 1:
+ return "%s/%s" % (dirnames[0], files[0].filename)
+ elif len(set(dirnames)) == 1:
+ return "$(addprefix %s/,%s)" % (dirnames[0], join_str.join(file.filename for file in files))
+ else:
+ return join_str.join("%s/%s" % (d, f.filename) for d,f in zip(dirnames, files))
+
+def get_gnumake_rules_helper(request, common_vars, **kwargs):
+
+ if isinstance(request, PrintFileRequest):
+ var_name = "%s_CONTENT" % request.name.upper()
+ return [
+ MakeStringVar(
+ name = var_name,
+ content = request.content
+ ),
+ MakeRule(
+ name = request.name,
+ dep_literals = [],
+ dep_files = [],
+ output_file = request.output_file,
+ cmds = [
+ "echo \"$${VAR_NAME}\" > {MAKEFILENAME}".format(
+ VAR_NAME = var_name,
+ MAKEFILENAME = files_to_makefile([request.output_file], common_vars),
+ **common_vars
+ )
+ ]
+ )
+ ]
+
+
+ if isinstance(request, CopyRequest):
+ return [
+ MakeRule(
+ name = request.name,
+ dep_literals = [],
+ dep_files = [request.input_file],
+ output_file = request.output_file,
+ cmds = ["cp %s %s" % (
+ files_to_makefile([request.input_file], common_vars),
+ files_to_makefile([request.output_file], common_vars))
+ ]
+ )
+ ]
+
+ if isinstance(request, VariableRequest):
+ return [
+ MakeFilesVar(
+ name = request.name.upper(),
+ files = request.input_files
+ )
+ ]
+
+ if request.tool.name == "make":
+ cmd_template = "$(MAKE) {ARGS}"
+ elif request.tool.name == "gentest":
+ cmd_template = "$(INVOKE) $(GENTEST) {ARGS}"
+ else:
+ assert isinstance(request.tool, IcuTool)
+ cmd_template = "$(INVOKE) $(TOOLBINDIR)/{TOOL} {{ARGS}}".format(
+ TOOL = request.tool.name
+ )
+
+ if isinstance(request, SingleExecutionRequest):
+ cmd = utils.format_single_request_command(request, cmd_template, common_vars)
+ dep_files = request.all_input_files()
+
+ if len(request.output_files) > 1:
+ # Special case for multiple output files: Makefile rules should have only one
+ # output file apiece. More information:
+ # https://www.gnu.org/software/automake/manual/html_node/Multiple-Outputs.html
+ timestamp_var_name = "%s_ALL" % request.name.upper()
+ timestamp_file = TmpFile("%s.timestamp" % request.name)
+ rules = [
+ MakeFilesVar(
+ name = timestamp_var_name,
+ files = [timestamp_file]
+ ),
+ MakeRule(
+ name = "%s_all" % request.name,
+ dep_literals = [],
+ dep_files = dep_files,
+ output_file = timestamp_file,
+ cmds = [
+ cmd,
+ "echo timestamp > {MAKEFILENAME}".format(
+ MAKEFILENAME = files_to_makefile([timestamp_file], common_vars)
+ )
+ ]
+ )
+ ]
+ for i, file in enumerate(request.output_files):
+ rules += [
+ MakeRule(
+ name = "%s_%d" % (request.name, i),
+ dep_literals = ["$(%s)" % timestamp_var_name],
+ dep_files = [],
+ output_file = file,
+ cmds = []
+ )
+ ]
+ return rules
+
+ elif len(dep_files) > 5:
+ # For nicer printing, for long input lists, use a helper variable.
+ dep_var_name = "%s_DEPS" % request.name.upper()
+ return [
+ MakeFilesVar(
+ name = dep_var_name,
+ files = dep_files
+ ),
+ MakeRule(
+ name = request.name,
+ dep_literals = ["$(%s)" % dep_var_name],
+ dep_files = [],
+ output_file = request.output_files[0],
+ cmds = [cmd]
+ )
+ ]
+
+ else:
+ return [
+ MakeRule(
+ name = request.name,
+ dep_literals = [],
+ dep_files = dep_files,
+ output_file = request.output_files[0],
+ cmds = [cmd]
+ )
+ ]
+
+ if isinstance(request, RepeatedExecutionRequest):
+ rules = []
+ dep_literals = []
+ # To keep from repeating the same dep files many times, make a variable.
+ if len(request.common_dep_files) > 0:
+ dep_var_name = "%s_DEPS" % request.name.upper()
+ dep_literals = ["$(%s)" % dep_var_name]
+ rules += [
+ MakeFilesVar(
+ name = dep_var_name,
+ files = request.common_dep_files
+ )
+ ]
+ # Add a rule for each individual file.
+ for loop_vars in utils.repeated_execution_request_looper(request):
+ (_, specific_dep_files, input_file, output_file) = loop_vars
+ name_suffix = input_file[input_file.filename.rfind("/")+1:input_file.filename.rfind(".")]
+ cmd = utils.format_repeated_request_command(
+ request,
+ cmd_template,
+ loop_vars,
+ common_vars
+ )
+ rules += [
+ MakeRule(
+ name = "%s_%s" % (request.name, name_suffix),
+ dep_literals = dep_literals,
+ dep_files = specific_dep_files + [input_file],
+ output_file = output_file,
+ cmds = [cmd]
+ )
+ ]
+ return rules
+
+ assert False
diff --git a/intl/icu/source/python/icutools/databuilder/request_types.py b/intl/icu/source/python/icutools/databuilder/request_types.py
new file mode 100644
index 0000000000..aa70f8d918
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/request_types.py
@@ -0,0 +1,364 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# Python 2/3 Compatibility (ICU-20299)
+# TODO(ICU-20301): Remove this.
+from __future__ import print_function
+
+from abc import abstractmethod
+import copy
+import sys
+
+from . import *
+from . import utils
+
+
+# TODO(ICU-20301): Remove arguments from all instances of super() in this file
+
+# Note: for this to be a proper abstract class, it should extend abc.ABC.
+# There is no nice way to do this that works in both Python 2 and 3.
+# TODO(ICU-20301): Make this inherit from abc.ABC.
+class AbstractRequest(object):
+ def __init__(self, **kwargs):
+
+ # Used for identification purposes
+ self.name = None
+
+ # The filter category that applies to this request
+ self.category = None
+
+ self._set_fields(kwargs)
+
+ def _set_fields(self, kwargs):
+ for key, value in list(kwargs.items()):
+ if hasattr(self, key):
+ if isinstance(value, list):
+ value = copy.copy(value)
+ elif isinstance(value, dict):
+ value = copy.deepcopy(value)
+ setattr(self, key, value)
+ else:
+ raise ValueError("Unknown argument: %s" % key)
+
+ def apply_file_filter(self, filter):
+ """
+ Returns True if this request still has input files after filtering,
+ or False if the request is "empty" after filtering.
+ """
+ return True
+
+ def flatten(self, config, all_requests, common_vars):
+ return [self]
+
+ def all_input_files(self):
+ return []
+
+ def all_output_files(self):
+ return []
+
+
+class AbstractExecutionRequest(AbstractRequest):
+ def __init__(self, **kwargs):
+
+ # Names of targets (requests) or files that this request depends on.
+ # The entries of dep_targets may be any of the following types:
+ #
+ # 1. DepTarget, for the output of an execution request.
+ # 2. InFile, TmpFile, etc., for a specific file.
+ # 3. A list of InFile, TmpFile, etc., where the list is the same
+ # length as self.input_files and self.output_files.
+ #
+ # In cases 1 and 2, the dependency is added to all rules that the
+ # request generates. In case 3, the dependency is added only to the
+ # rule that generates the output file at the same array index.
+ self.dep_targets = []
+
+ # Computed during self.flatten(); don't edit directly.
+ self.common_dep_files = []
+
+ # Primary input files
+ self.input_files = []
+
+ # Output files; for some subclasses, this must be the same length
+ # as input_files
+ self.output_files = []
+
+ # What tool to execute
+ self.tool = None
+
+ # Argument string to pass to the tool with optional placeholders
+ self.args = ""
+
+ # Placeholders to substitute into the argument string; if any of these
+ # have a list type, the list must be equal in length to input_files
+ self.format_with = {}
+
+ super(AbstractExecutionRequest, self).__init__(**kwargs)
+
+ def apply_file_filter(self, filter):
+ i = 0
+ while i < len(self.input_files):
+ if filter.match(self.input_files[i]):
+ i += 1
+ else:
+ self._del_at(i)
+ return i > 0
+
+ def _del_at(self, i):
+ del self.input_files[i]
+ for _, v in self.format_with.items():
+ if isinstance(v, list):
+ assert len(v) == len(self.input_files) + 1
+ del v[i]
+ for v in self.dep_targets:
+ if isinstance(v, list):
+ assert len(v) == len(self.input_files) + 1
+ del v[i]
+
+ def flatten(self, config, all_requests, common_vars):
+ self._dep_targets_to_files(all_requests)
+ return super(AbstractExecutionRequest, self).flatten(config, all_requests, common_vars)
+
+ def _dep_targets_to_files(self, all_requests):
+ if not self.dep_targets:
+ return
+ for dep_target in self.dep_targets:
+ if isinstance(dep_target, list):
+ if hasattr(self, "specific_dep_files"):
+ assert len(dep_target) == len(self.specific_dep_files)
+ for file, out_list in zip(dep_target, self.specific_dep_files):
+ assert hasattr(file, "filename")
+ out_list.append(file)
+ else:
+ self.common_dep_files += dep_target
+ continue
+ if not isinstance(dep_target, DepTarget):
+ # Copy file entries directly to dep_files.
+ assert hasattr(dep_target, "filename")
+ self.common_dep_files.append(dep_target)
+ continue
+ # For DepTarget entries, search for the target.
+ for request in all_requests:
+ if request.name == dep_target.name:
+ self.common_dep_files += request.all_output_files()
+ break
+ else:
+ print("Warning: Unable to find target %s, a dependency of %s" % (
+ dep_target.name,
+ self.name
+ ), file=sys.stderr)
+ self.dep_targets = []
+
+ def all_input_files(self):
+ return self.common_dep_files + self.input_files
+
+ def all_output_files(self):
+ return self.output_files
+
+
+class SingleExecutionRequest(AbstractExecutionRequest):
+ def __init__(self, **kwargs):
+ super(SingleExecutionRequest, self).__init__(**kwargs)
+
+
+class RepeatedExecutionRequest(AbstractExecutionRequest):
+ def __init__(self, **kwargs):
+
+ # Placeholders to substitute into the argument string unique to each
+ # iteration; all values must be lists equal in length to input_files
+ self.repeat_with = {}
+
+ # Lists for dep files that are specific to individual resource bundle files
+ self.specific_dep_files = [[] for _ in range(len(kwargs["input_files"]))]
+
+ super(RepeatedExecutionRequest, self).__init__(**kwargs)
+
+ def _del_at(self, i):
+ super(RepeatedExecutionRequest, self)._del_at(i)
+ del self.output_files[i]
+ del self.specific_dep_files[i]
+ for _, v in self.repeat_with.items():
+ if isinstance(v, list):
+ del v[i]
+
+ def all_input_files(self):
+ files = super(RepeatedExecutionRequest, self).all_input_files()
+ for specific_file_list in self.specific_dep_files:
+ files += specific_file_list
+ return files
+
+
+class RepeatedOrSingleExecutionRequest(AbstractExecutionRequest):
+ def __init__(self, **kwargs):
+ self.repeat_with = {}
+ super(RepeatedOrSingleExecutionRequest, self).__init__(**kwargs)
+
+ def flatten(self, config, all_requests, common_vars):
+ if config.max_parallel:
+ new_request = RepeatedExecutionRequest(
+ name = self.name,
+ category = self.category,
+ dep_targets = self.dep_targets,
+ input_files = self.input_files,
+ output_files = self.output_files,
+ tool = self.tool,
+ args = self.args,
+ format_with = self.format_with,
+ repeat_with = self.repeat_with
+ )
+ else:
+ new_request = SingleExecutionRequest(
+ name = self.name,
+ category = self.category,
+ dep_targets = self.dep_targets,
+ input_files = self.input_files,
+ output_files = self.output_files,
+ tool = self.tool,
+ args = self.args,
+ format_with = utils.concat_dicts(self.format_with, self.repeat_with)
+ )
+ return new_request.flatten(config, all_requests, common_vars)
+
+ def _del_at(self, i):
+ super(RepeatedOrSingleExecutionRequest, self)._del_at(i)
+ del self.output_files[i]
+ for _, v in self.repeat_with.items():
+ if isinstance(v, list):
+ del v[i]
+
+
+class PrintFileRequest(AbstractRequest):
+ def __init__(self, **kwargs):
+ self.output_file = None
+ self.content = None
+ super(PrintFileRequest, self).__init__(**kwargs)
+
+ def all_output_files(self):
+ return [self.output_file]
+
+
+class CopyRequest(AbstractRequest):
+ def __init__(self, **kwargs):
+ self.input_file = None
+ self.output_file = None
+ super(CopyRequest, self).__init__(**kwargs)
+
+ def all_input_files(self):
+ return [self.input_file]
+
+ def all_output_files(self):
+ return [self.output_file]
+
+
+class VariableRequest(AbstractRequest):
+ def __init__(self, **kwargs):
+ self.input_files = []
+ super(VariableRequest, self).__init__(**kwargs)
+
+ def all_input_files(self):
+ return self.input_files
+
+
+class ListRequest(AbstractRequest):
+ def __init__(self, **kwargs):
+ self.variable_name = None
+ self.output_file = None
+ self.include_tmp = None
+ super(ListRequest, self).__init__(**kwargs)
+
+ def flatten(self, config, all_requests, common_vars):
+ list_files = list(sorted(utils.get_all_output_files(all_requests)))
+ if self.include_tmp:
+ variable_files = list(sorted(utils.get_all_output_files(all_requests, include_tmp=True)))
+ else:
+ # Always include the list file itself
+ variable_files = list_files + [self.output_file]
+ return PrintFileRequest(
+ name = self.name,
+ output_file = self.output_file,
+ content = "\n".join(file.filename for file in list_files)
+ ).flatten(config, all_requests, common_vars) + VariableRequest(
+ name = self.variable_name,
+ input_files = variable_files
+ ).flatten(config, all_requests, common_vars)
+
+ def all_output_files(self):
+ return [self.output_file]
+
+
+class IndexRequest(AbstractRequest):
+ def __init__(self, **kwargs):
+ self.installed_files = []
+ self.alias_files = []
+ self.txt_file = None
+ self.output_file = None
+ self.cldr_version = ""
+ self.args = ""
+ self.format_with = {}
+ super(IndexRequest, self).__init__(**kwargs)
+
+ def apply_file_filter(self, filter):
+ i = 0
+ while i < len(self.installed_files):
+ if filter.match(self.installed_files[i]):
+ i += 1
+ else:
+ del self.installed_files[i]
+ j = 0
+ while j < len(self.alias_files):
+ if filter.match(self.alias_files[j]):
+ j += 1
+ else:
+ del self.alias_files[j]
+ return i + j > 0
+
+ def flatten(self, config, all_requests, common_vars):
+ return (
+ PrintFileRequest(
+ name = self.name,
+ output_file = self.txt_file,
+ content = self._generate_index_file(common_vars)
+ ).flatten(config, all_requests, common_vars) +
+ SingleExecutionRequest(
+ name = "%s_res" % self.name,
+ category = self.category,
+ input_files = [self.txt_file],
+ output_files = [self.output_file],
+ tool = IcuTool("genrb"),
+ args = self.args,
+ format_with = self.format_with
+ ).flatten(config, all_requests, common_vars)
+ )
+
+ def _generate_index_file(self, common_vars):
+ installed_locales = [IndexRequest.locale_file_stem(f) for f in self.installed_files]
+ alias_locales = [IndexRequest.locale_file_stem(f) for f in self.alias_files]
+ formatted_version = " CLDRVersion { \"%s\" }\n" % self.cldr_version if self.cldr_version else ""
+ formatted_installed_locales = "\n".join([" %s {\"\"}" % v for v in installed_locales])
+ formatted_alias_locales = "\n".join([" %s {\"\"}" % v for v in alias_locales])
+ # TODO: CLDRVersion is required only in the base file
+ return ("// Warning this file is automatically generated\n"
+ "{INDEX_NAME}:table(nofallback) {{\n"
+ "{FORMATTED_VERSION}"
+ " InstalledLocales:table {{\n"
+ "{FORMATTED_INSTALLED_LOCALES}\n"
+ " }}\n"
+ " AliasLocales:table {{\n"
+ "{FORMATTED_ALIAS_LOCALES}\n"
+ " }}\n"
+ "}}").format(
+ FORMATTED_VERSION = formatted_version,
+ FORMATTED_INSTALLED_LOCALES = formatted_installed_locales,
+ FORMATTED_ALIAS_LOCALES = formatted_alias_locales,
+ **common_vars
+ )
+
+ def all_input_files(self):
+ return self.installed_files + self.alias_files
+
+ def all_output_files(self):
+ return [self.output_file]
+
+ @staticmethod
+ def locale_file_stem(f):
+ return f.filename[f.filename.rfind("/")+1:-4]
diff --git a/intl/icu/source/python/icutools/databuilder/test/__init__.py b/intl/icu/source/python/icutools/databuilder/test/__init__.py
new file mode 100644
index 0000000000..dd12bfa16e
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
diff --git a/intl/icu/source/python/icutools/databuilder/test/__main__.py b/intl/icu/source/python/icutools/databuilder/test/__main__.py
new file mode 100644
index 0000000000..6ae2c0f7c9
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/__main__.py
@@ -0,0 +1,14 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import unittest
+
+from . import filtration_test
+
+def load_tests(loader, tests, pattern):
+ suite = unittest.TestSuite()
+ suite.addTest(filtration_test.suite)
+ return suite
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/intl/icu/source/python/icutools/databuilder/test/filtration_test.py b/intl/icu/source/python/icutools/databuilder/test/filtration_test.py
new file mode 100644
index 0000000000..416223bd7e
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/filtration_test.py
@@ -0,0 +1,421 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import io as pyio
+import json
+import os
+import unittest
+
+from .. import InFile
+from ..comment_stripper import CommentStripper
+from ..filtration import Filter
+
+EXAMPLE_FILE_STEMS = [
+ "af_NA",
+ "af_VARIANT",
+ "af_ZA_VARIANT",
+ "af_ZA",
+ "af",
+ "ar",
+ "ar_SA",
+ "ars",
+ "bs_BA",
+ "bs_Cyrl_BA",
+ "bs_Cyrl",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ "en_001",
+ "en_150",
+ "en_DE",
+ "en_GB",
+ "en_US",
+ "root",
+ "sr_BA",
+ "sr_CS",
+ "sr_Cyrl_BA",
+ "sr_Cyrl_CS",
+ "sr_Cyrl_ME",
+ "sr_Cyrl",
+ "sr_Latn_BA",
+ "sr_Latn_CS",
+ "sr_Latn_ME_VARIANT",
+ "sr_Latn_ME",
+ "sr_Latn",
+ "sr_ME",
+ "sr",
+ "vai_Latn_LR",
+ "vai_Latn",
+ "vai_LR",
+ "vai_Vaii_LR",
+ "vai_Vaii",
+ "vai",
+ "yue",
+ "zh_CN",
+ "zh_Hans_CN",
+ "zh_Hans_HK",
+ "zh_Hans_MO",
+ "zh_Hans_SG",
+ "zh_Hans",
+ "zh_Hant_HK",
+ "zh_Hant_MO",
+ "zh_Hant_TW",
+ "zh_Hant",
+ "zh_HK",
+ "zh_MO",
+ "zh_SG",
+ "zh_TW",
+ "zh"
+]
+
+
+class TestIO(object):
+ def __init__(self):
+ pass
+
+ def read_locale_deps(self, tree):
+ if tree not in ("brkitr", "locales", "rbnf"):
+ return None
+ with pyio.open(os.path.join(
+ os.path.dirname(__file__),
+ "sample_data",
+ tree,
+ "LOCALE_DEPS.json"
+ ), "r", encoding="utf-8-sig") as f:
+ return json.load(CommentStripper(f))
+
+
+class FiltrationTest(unittest.TestCase):
+
+ def test_exclude(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "exclude"
+ }, TestIO()), [
+ ])
+
+ def test_default_whitelist(self):
+ self._check_filter(Filter.create_from_json({
+ "whitelist": [
+ "ars",
+ "zh_Hans"
+ ]
+ }, TestIO()), [
+ "ars",
+ "zh_Hans"
+ ])
+
+ def test_default_blacklist(self):
+ expected_matches = set(EXAMPLE_FILE_STEMS)
+ expected_matches.remove("ars")
+ expected_matches.remove("zh_Hans")
+ self._check_filter(Filter.create_from_json({
+ "blacklist": [
+ "ars",
+ "zh_Hans"
+ ]
+ }, TestIO()), expected_matches)
+
+ def test_language_whitelist(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "language",
+ "whitelist": [
+ "af",
+ "bs"
+ ]
+ }, TestIO()), [
+ "root",
+ "af_NA",
+ "af_VARIANT",
+ "af_ZA_VARIANT",
+ "af_ZA",
+ "af",
+ "bs_BA",
+ "bs_Cyrl_BA",
+ "bs_Cyrl",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs"
+ ])
+
+ def test_language_blacklist(self):
+ expected_matches = set(EXAMPLE_FILE_STEMS)
+ expected_matches.remove("af_NA")
+ expected_matches.remove("af_VARIANT")
+ expected_matches.remove("af_ZA_VARIANT")
+ expected_matches.remove("af_ZA")
+ expected_matches.remove("af")
+ self._check_filter(Filter.create_from_json({
+ "filterType": "language",
+ "blacklist": [
+ "af"
+ ]
+ }, TestIO()), expected_matches)
+
+ def test_regex_whitelist(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "regex",
+ "whitelist": [
+ r"^ar.*$",
+ r"^zh$"
+ ]
+ }, TestIO()), [
+ "ar",
+ "ar_SA",
+ "ars",
+ "zh"
+ ])
+
+ def test_regex_blacklist(self):
+ expected_matches = set(EXAMPLE_FILE_STEMS)
+ expected_matches.remove("ar")
+ expected_matches.remove("ar_SA")
+ expected_matches.remove("ars")
+ expected_matches.remove("zh")
+ self._check_filter(Filter.create_from_json({
+ "filterType": "regex",
+ "blacklist": [
+ r"^ar.*$",
+ r"^zh$"
+ ]
+ }, TestIO()), expected_matches)
+
+ def test_locale_basic(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "whitelist": [
+ # Default scripts:
+ # sr => Cyrl
+ # vai => Vaii
+ # zh => Hans
+ "bs_BA", # is an alias to bs_Latn_BA
+ "en_DE",
+ "sr", # Language with no script
+ "vai_Latn", # Language with non-default script
+ "zh_Hans" # Language with default script
+ ]
+ }, TestIO()), [
+ "root",
+ # bs: should include the full dependency tree of bs_BA
+ "bs_BA",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ # en: should include the full dependency tree of en_DE
+ "en",
+ "en_DE",
+ "en_150",
+ "en_001",
+ # sr: include Cyrl, the default, but not Latn.
+ "sr",
+ "sr_BA",
+ "sr_CS",
+ "sr_Cyrl",
+ "sr_Cyrl_BA",
+ "sr_Cyrl_CS",
+ "sr_Cyrl_ME",
+ # vai: include Latn but NOT Vaii.
+ "vai_Latn",
+ "vai_Latn_LR",
+ # zh: include Hans but NOT Hant.
+ "zh",
+ "zh_CN",
+ "zh_SG",
+ "zh_Hans",
+ "zh_Hans_CN",
+ "zh_Hans_HK",
+ "zh_Hans_MO",
+ "zh_Hans_SG"
+ ])
+
+ def test_locale_no_children(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "includeChildren": False,
+ "whitelist": [
+ # See comments in test_locale_basic.
+ "bs_BA",
+ "en_DE",
+ "sr",
+ "vai_Latn",
+ "zh_Hans"
+ ]
+ }, TestIO()), [
+ "root",
+ "bs_BA",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ "en",
+ "en_DE",
+ "en_150",
+ "en_001",
+ "sr",
+ "vai_Latn",
+ "zh",
+ "zh_Hans",
+ ])
+
+ def test_locale_include_scripts(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "includeScripts": True,
+ "whitelist": [
+ # See comments in test_locale_basic.
+ "bs_BA",
+ "en_DE",
+ "sr",
+ "vai_Latn",
+ "zh_Hans"
+ ]
+ }, TestIO()), [
+ "root",
+ # bs: includeScripts only works for language-only (without region)
+ "bs_BA",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ # en: should include the full dependency tree of en_DE
+ "en",
+ "en_DE",
+ "en_150",
+ "en_001",
+ # sr: include Latn, since no particular script was requested.
+ "sr_BA",
+ "sr_CS",
+ "sr_Cyrl_BA",
+ "sr_Cyrl_CS",
+ "sr_Cyrl_ME",
+ "sr_Cyrl",
+ "sr_Latn_BA",
+ "sr_Latn_CS",
+ "sr_Latn_ME_VARIANT",
+ "sr_Latn_ME",
+ "sr_Latn",
+ "sr_ME",
+ "sr",
+ # vai: do NOT include Vaii; the script was explicitly requested.
+ "vai_Latn_LR",
+ "vai_Latn",
+ # zh: do NOT include Hant; the script was explicitly requested.
+ "zh_CN",
+ "zh_SG",
+ "zh_Hans_CN",
+ "zh_Hans_HK",
+ "zh_Hans_MO",
+ "zh_Hans_SG",
+ "zh_Hans",
+ "zh"
+ ])
+
+ def test_locale_no_children_include_scripts(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "includeChildren": False,
+ "includeScripts": True,
+ "whitelist": [
+ # See comments in test_locale_basic.
+ "bs_BA",
+ "en_DE",
+ "sr",
+ "vai_Latn",
+ "zh_Hans"
+ ]
+ }, TestIO()), [
+ "root",
+ # bs: includeScripts only works for language-only (without region)
+ "bs_BA",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ # en: should include the full dependency tree of en_DE
+ "en",
+ "en_DE",
+ "en_150",
+ "en_001",
+ # sr: include Cyrl and Latn but no other children
+ "sr",
+ "sr_Cyrl",
+ "sr_Latn",
+ # vai: include only the requested script
+ "vai_Latn",
+ # zh: include only the requested script
+ "zh",
+ "zh_Hans",
+ ])
+
+ def test_union(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "union",
+ "unionOf": [
+ {
+ "whitelist": [
+ "ars",
+ "zh_Hans"
+ ]
+ },
+ {
+ "filterType": "regex",
+ "whitelist": [
+ r"^bs.*$",
+ r"^zh$"
+ ]
+ }
+ ]
+ }, TestIO()), [
+ "ars",
+ "zh_Hans",
+ "bs_BA",
+ "bs_Cyrl_BA",
+ "bs_Cyrl",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ "zh"
+ ])
+
+ def test_hk_deps_normal(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "whitelist": [
+ "zh_HK"
+ ]
+ }, TestIO()), [
+ "root",
+ "zh_Hant",
+ "zh_Hant_HK",
+ "zh_HK",
+ ])
+
+ def test_hk_deps_rbnf(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "whitelist": [
+ "zh_HK"
+ ]
+ }, TestIO()), [
+ "root",
+ "yue",
+ "zh_Hant_HK",
+ "zh_HK",
+ ], "rbnf")
+
+ def test_no_alias_parent_structure(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "whitelist": [
+ "zh_HK"
+ ]
+ }, TestIO()), [
+ "root",
+ "zh_HK",
+ "zh",
+ ], "brkitr")
+
+ def _check_filter(self, filter, expected_matches, tree="locales"):
+ for file_stem in EXAMPLE_FILE_STEMS:
+ is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem)))
+ expected_match = file_stem in expected_matches
+ self.assertEqual(is_match, expected_match, file_stem)
+
+# Export the test for the runner
+suite = unittest.makeSuite(FiltrationTest)
diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json
new file mode 100644
index 0000000000..674db09278
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json
@@ -0,0 +1,10 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+//////////////////////////////////////////////////////////////
+// This is a sample LOCALE_DEPS.json file for testing only. //
+//////////////////////////////////////////////////////////////
+
+{
+ "cldrVersion": "36.1"
+}
diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json
new file mode 100644
index 0000000000..1456ea0d9a
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json
@@ -0,0 +1,197 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+//////////////////////////////////////////////////////////////
+// This is a sample LOCALE_DEPS.json file for testing only. //
+//////////////////////////////////////////////////////////////
+
+{
+ "cldrVersion": "36.1",
+ "aliases": {
+ "ars": "ar_SA",
+ "az_AZ": "az_Latn_AZ",
+ "bs_BA": "bs_Latn_BA",
+ "en_NH": "en_VU",
+ "en_RH": "en_ZW",
+ "ff_CM": "ff_Latn_CM",
+ "ff_GN": "ff_Latn_GN",
+ "ff_MR": "ff_Latn_MR",
+ "ff_SN": "ff_Latn_SN",
+ "in": "id",
+ "in_ID": "id_ID",
+ "iw": "he",
+ "iw_IL": "he_IL",
+ "mo": "ro",
+ "no": "nb",
+ "no_NO": "nb_NO",
+ "no_NO_NY": "nn_NO",
+ "pa_IN": "pa_Guru_IN",
+ "pa_PK": "pa_Arab_PK",
+ "sh": "sr_Latn",
+ "sh_BA": "sr_Latn_BA",
+ "sh_CS": "sr_Latn_RS",
+ "sh_YU": "sr_Latn_RS",
+ "shi_MA": "shi_Tfng_MA",
+ "sr_BA": "sr_Cyrl_BA",
+ "sr_CS": "sr_Cyrl_RS",
+ "sr_Cyrl_CS": "sr_Cyrl_RS",
+ "sr_Cyrl_YU": "sr_Cyrl_RS",
+ "sr_Latn_CS": "sr_Latn_RS",
+ "sr_Latn_YU": "sr_Latn_RS",
+ "sr_ME": "sr_Latn_ME",
+ "sr_RS": "sr_Cyrl_RS",
+ "sr_XK": "sr_Cyrl_XK",
+ "sr_YU": "sr_Cyrl_RS",
+ "tl": "fil",
+ "tl_PH": "fil_PH",
+ "uz_AF": "uz_Arab_AF",
+ "uz_UZ": "uz_Latn_UZ",
+ "vai_LR": "vai_Vaii_LR",
+ "yue_CN": "yue_Hans_CN",
+ "yue_HK": "yue_Hant_HK",
+ "zh_CN": "zh_Hans_CN",
+ "zh_HK": "zh_Hant_HK",
+ "zh_MO": "zh_Hant_MO",
+ "zh_SG": "zh_Hans_SG",
+ "zh_TW": "zh_Hant_TW"
+ },
+ "parents": {
+ "az_Cyrl": "root",
+ "bs_Cyrl": "root",
+ "en_150": "en_001",
+ "en_AG": "en_001",
+ "en_AI": "en_001",
+ "en_AT": "en_150",
+ "en_AU": "en_001",
+ "en_BB": "en_001",
+ "en_BE": "en_150",
+ "en_BM": "en_001",
+ "en_BS": "en_001",
+ "en_BW": "en_001",
+ "en_BZ": "en_001",
+ "en_CA": "en_001",
+ "en_CC": "en_001",
+ "en_CH": "en_150",
+ "en_CK": "en_001",
+ "en_CM": "en_001",
+ "en_CX": "en_001",
+ "en_CY": "en_001",
+ "en_DE": "en_150",
+ "en_DG": "en_001",
+ "en_DK": "en_150",
+ "en_DM": "en_001",
+ "en_ER": "en_001",
+ "en_FI": "en_150",
+ "en_FJ": "en_001",
+ "en_FK": "en_001",
+ "en_FM": "en_001",
+ "en_GB": "en_001",
+ "en_GD": "en_001",
+ "en_GG": "en_001",
+ "en_GH": "en_001",
+ "en_GI": "en_001",
+ "en_GM": "en_001",
+ "en_GY": "en_001",
+ "en_HK": "en_001",
+ "en_IE": "en_001",
+ "en_IL": "en_001",
+ "en_IM": "en_001",
+ "en_IN": "en_001",
+ "en_IO": "en_001",
+ "en_JE": "en_001",
+ "en_JM": "en_001",
+ "en_KE": "en_001",
+ "en_KI": "en_001",
+ "en_KN": "en_001",
+ "en_KY": "en_001",
+ "en_LC": "en_001",
+ "en_LR": "en_001",
+ "en_LS": "en_001",
+ "en_MG": "en_001",
+ "en_MO": "en_001",
+ "en_MS": "en_001",
+ "en_MT": "en_001",
+ "en_MU": "en_001",
+ "en_MW": "en_001",
+ "en_MY": "en_001",
+ "en_NA": "en_001",
+ "en_NF": "en_001",
+ "en_NG": "en_001",
+ "en_NL": "en_150",
+ "en_NR": "en_001",
+ "en_NU": "en_001",
+ "en_NZ": "en_001",
+ "en_PG": "en_001",
+ "en_PH": "en_001",
+ "en_PK": "en_001",
+ "en_PN": "en_001",
+ "en_PW": "en_001",
+ "en_RW": "en_001",
+ "en_SB": "en_001",
+ "en_SC": "en_001",
+ "en_SD": "en_001",
+ "en_SE": "en_150",
+ "en_SG": "en_001",
+ "en_SH": "en_001",
+ "en_SI": "en_150",
+ "en_SL": "en_001",
+ "en_SS": "en_001",
+ "en_SX": "en_001",
+ "en_SZ": "en_001",
+ "en_TC": "en_001",
+ "en_TK": "en_001",
+ "en_TO": "en_001",
+ "en_TT": "en_001",
+ "en_TV": "en_001",
+ "en_TZ": "en_001",
+ "en_UG": "en_001",
+ "en_VC": "en_001",
+ "en_VG": "en_001",
+ "en_VU": "en_001",
+ "en_WS": "en_001",
+ "en_ZA": "en_001",
+ "en_ZM": "en_001",
+ "en_ZW": "en_001",
+ "es_AR": "es_419",
+ "es_BO": "es_419",
+ "es_BR": "es_419",
+ "es_BZ": "es_419",
+ "es_CL": "es_419",
+ "es_CO": "es_419",
+ "es_CR": "es_419",
+ "es_CU": "es_419",
+ "es_DO": "es_419",
+ "es_EC": "es_419",
+ "es_GT": "es_419",
+ "es_HN": "es_419",
+ "es_MX": "es_419",
+ "es_NI": "es_419",
+ "es_PA": "es_419",
+ "es_PE": "es_419",
+ "es_PR": "es_419",
+ "es_PY": "es_419",
+ "es_SV": "es_419",
+ "es_US": "es_419",
+ "es_UY": "es_419",
+ "es_VE": "es_419",
+ "pa_Arab": "root",
+ "pt_AO": "pt_PT",
+ "pt_CH": "pt_PT",
+ "pt_CV": "pt_PT",
+ "pt_GQ": "pt_PT",
+ "pt_GW": "pt_PT",
+ "pt_LU": "pt_PT",
+ "pt_MO": "pt_PT",
+ "pt_MZ": "pt_PT",
+ "pt_ST": "pt_PT",
+ "pt_TL": "pt_PT",
+ "shi_Latn": "root",
+ "sr_Latn": "root",
+ "uz_Arab": "root",
+ "uz_Cyrl": "root",
+ "vai_Latn": "root",
+ "yue_Hans": "root",
+ "zh_Hant": "root",
+ "zh_Hant_MO": "zh_Hant_HK"
+ }
+}
diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json
new file mode 100644
index 0000000000..c6ec208add
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json
@@ -0,0 +1,36 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+//////////////////////////////////////////////////////////////
+// This is a sample LOCALE_DEPS.json file for testing only. //
+//////////////////////////////////////////////////////////////
+
+{
+ "cldrVersion": "36.1",
+ "aliases": {
+ "ars": "ar_SA",
+ "in": "id",
+ "iw": "he",
+ "no": "nb",
+ "sh": "sr_Latn",
+ "zh_HK": "zh_Hant_HK",
+ "zh_Hant_HK": "yue",
+ "zh_MO": "zh_Hant_MO",
+ "zh_TW": "zh_Hant_TW"
+ },
+ "parents": {
+ "en_IN": "en_001",
+ "es_DO": "es_419",
+ "es_GT": "es_419",
+ "es_HN": "es_419",
+ "es_MX": "es_419",
+ "es_NI": "es_419",
+ "es_PA": "es_419",
+ "es_PR": "es_419",
+ "es_SV": "es_419",
+ "es_US": "es_419",
+ "sr_Latn": "root",
+ "yue_Hans": "root",
+ "zh_Hant": "root"
+ }
+}
diff --git a/intl/icu/source/python/icutools/databuilder/utils.py b/intl/icu/source/python/icutools/databuilder/utils.py
new file mode 100644
index 0000000000..3d53d18fae
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/utils.py
@@ -0,0 +1,143 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# Python 2/3 Compatibility (ICU-20299)
+# TODO(ICU-20301): Remove this.
+from __future__ import print_function
+
+import sys
+
+from . import *
+
+
+def dir_for(file):
+ if isinstance(file, LocalFile):
+ return get_local_dirname(file.dirname)
+ if isinstance(file, SrcFile):
+ return "{SRC_DIR}"
+ if isinstance(file, InFile):
+ return "{IN_DIR}"
+ if isinstance(file, TmpFile):
+ return "{TMP_DIR}"
+ if isinstance(file, OutFile):
+ return "{OUT_DIR}"
+ if isinstance(file, PkgFile):
+ return "{PKG_DIR}"
+ assert False
+
+
+LOCAL_DIRNAME_SUBSTITUTIONS = {
+ "SRC": "{SRC_DIR}",
+ "FILTERS": "{FILTERS_DIR}",
+ "CWD": "{CWD_DIR}"
+}
+
+
+def get_local_dirname(dirname):
+ if dirname.startswith("/"):
+ return dirname
+ elif dirname.startswith("$"):
+ # Note: directory separator substitution happens later
+ sep_idx = dirname.find("/")
+ if sep_idx == -1:
+ sep_idx = len(dirname)
+ variable = dirname[1:sep_idx]
+ if variable in LOCAL_DIRNAME_SUBSTITUTIONS:
+ return LOCAL_DIRNAME_SUBSTITUTIONS[variable] + dirname[sep_idx:]
+ print(
+ "Error: Local directory must be absolute, or relative to one of: " +
+ (", ".join("$%s" % v for v in LOCAL_DIRNAME_SUBSTITUTIONS.keys())),
+ file=sys.stderr
+ )
+ exit(1)
+
+
+ALL_TREES = [
+ "locales",
+ "curr",
+ "lang",
+ "region",
+ "zone",
+ "unit",
+ "coll",
+ "brkitr",
+ "rbnf",
+]
+
+
+def concat_dicts(*dicts):
+ # There is not a super great way to do this in Python:
+ new_dict = {}
+ for dict in dicts:
+ new_dict.update(dict)
+ return new_dict
+
+
+def repeated_execution_request_looper(request):
+ # dictionary of lists to list of dictionaries:
+ ld = [
+ dict(zip(request.repeat_with, t))
+ for t in zip(*request.repeat_with.values())
+ ]
+ if not ld:
+ # No special options given in repeat_with
+ ld = [{} for _ in range(len(request.input_files))]
+ return zip(ld, request.specific_dep_files, request.input_files, request.output_files)
+
+
+def format_single_request_command(request, cmd_template, common_vars):
+ return cmd_template.format(
+ ARGS = request.args.format(
+ INPUT_FILES = [file.filename for file in request.input_files],
+ OUTPUT_FILES = [file.filename for file in request.output_files],
+ **concat_dicts(common_vars, request.format_with)
+ )
+ )
+
+
+def format_repeated_request_command(request, cmd_template, loop_vars, common_vars):
+ (iter_vars, _, input_file, output_file) = loop_vars
+ return cmd_template.format(
+ ARGS = request.args.format(
+ INPUT_FILE = input_file.filename,
+ OUTPUT_FILE = output_file.filename,
+ **concat_dicts(common_vars, request.format_with, iter_vars)
+ )
+ )
+
+
+def flatten_requests(requests, config, common_vars):
+ result = []
+ for request in requests:
+ result += request.flatten(config, requests, common_vars)
+ return result
+
+
+def get_all_output_files(requests, include_tmp=False):
+ files = []
+ for request in requests:
+ files += request.all_output_files()
+
+ # Filter out all files but those in OUT_DIR if necessary.
+ # It is also easy to filter for uniqueness; do it right now and return.
+ if not include_tmp:
+ files = (file for file in files if isinstance(file, OutFile))
+ return list(set(files))
+
+ # Filter for unique values. NOTE: Cannot use set() because we need to accept same filename as
+ # OutFile and TmpFile as different, and by default they evaluate as equal.
+ return [f for _, f in set((type(f), f) for f in files)]
+
+
+def compute_directories(requests):
+ dirs = set()
+ for file in get_all_output_files(requests, include_tmp=True):
+ path = "%s/%s" % (dir_for(file), file.filename)
+ dirs.add(path[:path.rfind("/")])
+ return list(sorted(dirs))
+
+
+class SpaceSeparatedList(list):
+ """A list that joins itself with spaces when converted to a string."""
+ def __str__(self):
+ return " ".join(self)