Adding upstream version 115.7.0esr.upstream/115.7.0esr upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
commit: 36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree: 105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/icu/source/python
parent: Initial commit. (diff)
download: firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
17 files changed, 2678 insertions, 0 deletions
diff --git a/intl/icu/source/python/icutools/__init__.py b/intl/icu/source/python/icutools/__init__.py
new file mode 100644
index 0000000000..514ea27927
--- /dev/null
+++ b/intl/icu/source/python/icutools/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# This is the root ICU namespace for build tools.
diff --git a/intl/icu/source/python/icutools/databuilder/__init__.py b/intl/icu/source/python/icutools/databuilder/__init__.py
new file mode 100644
index 0000000000..be936166e7
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+from collections import namedtuple
+
+LocalFile = namedtuple("LocalFile", ["dirname", "filename"])
+SrcFile = namedtuple("SrcFile", ["filename"])
+InFile = namedtuple("InFile", ["filename"])
+TmpFile = namedtuple("TmpFile", ["filename"])
+OutFile = namedtuple("OutFile", ["filename"])
+PkgFile = namedtuple("PkgFile", ["filename"])
+
+IcuTool = namedtuple("IcuTool", ["name"])
+SystemTool = namedtuple("SystemTool", ["name"])
+
+DepTarget = namedtuple("DepTarget", ["name"])
diff --git a/intl/icu/source/python/icutools/databuilder/__main__.py b/intl/icu/source/python/icutools/databuilder/__main__.py
new file mode 100644
index 0000000000..b72fa76629
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/__main__.py
@@ -0,0 +1,377 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# Python 2/3 Compatibility (ICU-20299)
+# TODO(ICU-20301): Remove this.
+from __future__ import print_function
+
+import argparse
+import glob as pyglob
+import io as pyio
+import json
+import os
+import sys
+
+from . import *
+from .comment_stripper import CommentStripper
+from .request_types import CopyRequest
+from .renderers import makefile, common_exec
+from . import filtration, utils
+
+flag_parser = argparse.ArgumentParser(
+    description = """Generates rules for building ICU binary data files from text
+and other input files in source control.
+
+Use the --mode option to declare how to execute those rules, either exporting
+the rules to a Makefile or spawning child processes to run them immediately:
+
+  --mode=gnumake prints a Makefile to standard out.
+  --mode=unix-exec spawns child processes in a Unix-like environment.
+  --mode=windows-exec spawns child processes in a Windows-like environment.
+
+Tips for --mode=unix-exec
+=========================
+
+Create two empty directories for out_dir and tmp_dir. They will get filled
+with a lot of intermediate files.
+
+Set LD_LIBRARY_PATH to include the lib directory. e.g., from icu4c/source:
+
+  $ LD_LIBRARY_PATH=lib PYTHONPATH=python python3 -m icutools.databuilder ...
+
+Once icutools.databuilder finishes, you have compiled the data, but you have
+not packaged it into a .dat or .so file. This is done by the separate pkgdata
+tool in bin. Read the docs of pkgdata:
+
+  $ LD_LIBRARY_PATH=lib ./bin/pkgdata --help
+
+Example command line to call pkgdata:
+
+  $ LD_LIBRARY_PATH=lib ./bin/pkgdata -m common -p icudt63l -c \\
+      -O data/icupkg.inc -s $OUTDIR -d $TMPDIR $TMPDIR/icudata.lst
+
+where $OUTDIR and $TMPDIR are your out and tmp directories, respectively.
+The above command will create icudt63l.dat in the tmpdir.
+
+Command-Line Arguments
+======================
+""",
+    formatter_class = argparse.RawDescriptionHelpFormatter
+)
+
+arg_group_required = flag_parser.add_argument_group("required arguments")
+arg_group_required.add_argument(
+    "--mode",
+    help = "What to do with the generated rules.",
+    choices = ["gnumake", "unix-exec", "windows-exec", "bazel-exec"],
+    required = True
+)
+
+flag_parser.add_argument(
+    "--src_dir",
+    help = "Path to data source folder (icu4c/source/data).",
+    default = "."
+)
+flag_parser.add_argument(
+    "--filter_file",
+    metavar = "PATH",
+    help = "Path to an ICU data filter JSON file.",
+    default = None
+)
+flag_parser.add_argument(
+    "--include_uni_core_data",
+    help = "Include the full Unicode core data in the dat file.",
+    default = False,
+    action = "store_true"
+)
+flag_parser.add_argument(
+    "--seqmode",
+    help = "Whether to optimize rules to be run sequentially (fewer threads) or in parallel (many threads). Defaults to 'sequential', which is better for unix-exec and windows-exec modes. 'parallel' is often better for massively parallel build systems.",
+    choices = ["sequential", "parallel"],
+    default = "sequential"
+)
+flag_parser.add_argument(
+    "--verbose",
+    help = "Print more verbose output (default false).",
+    default = False,
+    action = "store_true"
+)
+
+arg_group_exec = flag_parser.add_argument_group("arguments for unix-exec and windows-exec modes")
+arg_group_exec.add_argument(
+    "--out_dir",
+    help = "Path to where to save output data files.",
+    default = "icudata"
+)
+arg_group_exec.add_argument(
+    "--tmp_dir",
+    help = "Path to where to save temporary files.",
+    default = "icutmp"
+)
+arg_group_exec.add_argument(
+    "--tool_dir",
+    help = "Path to where to find binary tools (genrb, etc).",
+    default = "../bin"
+)
+arg_group_exec.add_argument(
+    "--tool_cfg",
+    help = "The build configuration of the tools. Used in 'windows-exec' mode only.",
+    default = "x86/Debug"
+)
+
+
+class Config(object):
+
+    def __init__(self, args):
+        # Process arguments
+        self.max_parallel = (args.seqmode == "parallel")
+
+        # Boolean: Whether to include core Unicode data files in the .dat file
+        self.include_uni_core_data = args.include_uni_core_data
+
+        # Default fields before processing filter file
+        self.filters_json_data = {}
+        self.filter_dir = "ERROR_NO_FILTER_FILE"
+
+        # Process filter file
+        if args.filter_file:
+            try:
+                with open(args.filter_file, "r") as f:
+                    print("Note: Applying filters from %s." % args.filter_file, file=sys.stderr)
+                    self._parse_filter_file(f)
+            except IOError:
+                print("Error: Could not read filter file %s." % args.filter_file, file=sys.stderr)
+                exit(1)
+            self.filter_dir = os.path.abspath(os.path.dirname(args.filter_file))
+
+        # Either "unihan" or "implicithan"
+        self.coll_han_type = "unihan"
+        if "collationUCAData" in self.filters_json_data:
+            self.coll_han_type = self.filters_json_data["collationUCAData"]
+
+        # Either "additive" or "subtractive"
+        self.strategy = "subtractive"
+        if "strategy" in self.filters_json_data:
+            self.strategy = self.filters_json_data["strategy"]
+
+        # True or False (could be extended later to support enum/list)
+        self.use_pool_bundle = True
+        if "usePoolBundle" in self.filters_json_data:
+            self.use_pool_bundle = self.filters_json_data["usePoolBundle"]
+
+        # By default, exclude collation data that mimics the order of some large legacy charsets.
+        # We do this in "subtractive" strategy by inserting a resourceFilter.
+        # Later rules from an explicit filter file may override this default behavior.
+        # (In "additive" strategy this is unnecessary.)
+        if self.strategy == "subtractive":
+            filters = self.filters_json_data.setdefault("resourceFilters", [])
+            omit_charset_collations = {
+                "categories": [
+                    "coll_tree"
+                ],
+                "rules": [
+                    "-/collations/big5han",
+                    "-/collations/gb2312han"
+                ]
+            }
+            filters.insert(0, omit_charset_collations)
+
+    def _parse_filter_file(self, f):
+        # Use the Hjson parser if it is available; otherwise, use vanilla JSON.
+        try:
+            import hjson
+            self.filters_json_data = hjson.load(f)
+        except ImportError:
+            self.filters_json_data = json.load(CommentStripper(f))
+
+        # Optionally pre-validate the JSON schema before further processing.
+        # Some schema errors will be caught later, but this step ensures
+        # maximal validity.
+        try:
+            import jsonschema
+            schema_path = os.path.join(os.path.dirname(__file__), "filtration_schema.json")
+            with open(schema_path) as schema_f:
+                schema = json.load(CommentStripper(schema_f))
+            validator = jsonschema.Draft4Validator(schema)
+            for error in validator.iter_errors(self.filters_json_data, schema):
+                print("WARNING: ICU data filter JSON file:", error.message,
+                    "at", "".join(
+                        "[%d]" % part if isinstance(part, int) else ".%s" % part
+                        for part in error.absolute_path
+                    ),
+                    file=sys.stderr)
+        except ImportError:
+            print("Tip: to validate your filter file, install the Pip package 'jsonschema'", file=sys.stderr)
+            pass
+
+
+def add_copy_input_requests(requests, config, common_vars):
+    files_to_copy = set()
+    for request in requests:
+        request_files = request.all_input_files()
+        # Also add known dependency txt files as possible inputs.
+        # This is required for translit rule files.
+        if hasattr(request, "dep_targets"):
+            request_files += [
+                f for f in request.dep_targets if isinstance(f, InFile)
+            ]
+        for f in request_files:
+            if isinstance(f, InFile):
+                files_to_copy.add(f)
+
+    result = []
+    id = 0
+
+    json_data = config.filters_json_data["fileReplacements"]
+    dirname = json_data["directory"]
+    for directive in json_data["replacements"]:
+        if type(directive) == str:
+            input_file = LocalFile(dirname, directive)
+            output_file = InFile(directive)
+        else:
+            input_file = LocalFile(dirname, directive["src"])
+            output_file = InFile(directive["dest"])
+        result += [
+            CopyRequest(
+                name = "input_copy_%d" % id,
+                input_file = input_file,
+                output_file = output_file
+            )
+        ]
+        files_to_copy.remove(output_file)
+        id += 1
+
+    for f in files_to_copy:
+        result += [
+            CopyRequest(
+                name = "input_copy_%d" % id,
+                input_file = SrcFile(f.filename),
+                output_file = f
+            )
+        ]
+        id += 1
+
+    result += requests
+    return result
+
+
+class IO(object):
+    """I/O operations required when computing the build actions"""
+
+    def __init__(self, src_dir):
+        self.src_dir = src_dir
+
+    def glob(self, pattern):
+        absolute_paths = pyglob.glob(os.path.join(self.src_dir, pattern))
+        # Strip off the absolute path suffix so we are left with a relative path.
+        relative_paths = [v[len(self.src_dir)+1:] for v in sorted(absolute_paths)]
+        # For the purposes of icutools.databuilder, force Unix-style directory separators.
+        # Within the Python code, including BUILDRULES.py and user-provided config files,
+        # directory separators are normalized to '/', including on Windows platforms.
+        return [v.replace("\\", "/") for v in relative_paths]
+
+    def read_locale_deps(self, tree):
+        return self._read_json("%s/LOCALE_DEPS.json" % tree)
+
+    def _read_json(self, filename):
+        with pyio.open(os.path.join(self.src_dir, filename), "r", encoding="utf-8-sig") as f:
+            return json.load(CommentStripper(f))
+
+
+def main(argv):
+    args = flag_parser.parse_args(argv)
+    config = Config(args)
+
+    if args.mode == "gnumake":
+        makefile_vars = {
+            "SRC_DIR": "$(srcdir)",
+            "IN_DIR": "$(srcdir)",
+            "INDEX_NAME": "res_index"
+        }
+        makefile_env = ["ICUDATA_CHAR", "OUT_DIR", "TMP_DIR"]
+        common = {
+            key: "$(%s)" % key
+            for key in list(makefile_vars.keys()) + makefile_env
+        }
+        common["FILTERS_DIR"] = config.filter_dir
+        common["CWD_DIR"] = os.getcwd()
+    else:
+        makefile_vars = None
+        common = {
+            "SRC_DIR": args.src_dir,
+            "IN_DIR": args.src_dir,
+            "OUT_DIR": args.out_dir,
+            "TMP_DIR": args.tmp_dir,
+            "FILTERS_DIR": config.filter_dir,
+            "CWD_DIR": os.getcwd(),
+            "INDEX_NAME": "res_index",
+            # TODO: Pull this from configure script:
+            "ICUDATA_CHAR": "l"
+        }
+
+    # Automatically load BUILDRULES from the src_dir
+    sys.path.append(args.src_dir)
+    try:
+        import BUILDRULES
+    except ImportError:
+        print("Cannot find BUILDRULES! Did you set your --src_dir?", file=sys.stderr)
+        sys.exit(1)
+
+    io = IO(args.src_dir)
+    requests = BUILDRULES.generate(config, io, common)
+
+    if "fileReplacements" in config.filters_json_data:
+        tmp_in_dir = "{TMP_DIR}/in".format(**common)
+        if makefile_vars:
+            makefile_vars["IN_DIR"] = tmp_in_dir
+        else:
+            common["IN_DIR"] = tmp_in_dir
+        requests = add_copy_input_requests(requests, config, common)
+
+    requests = filtration.apply_filters(requests, config, io)
+    requests = utils.flatten_requests(requests, config, common)
+
+    build_dirs = utils.compute_directories(requests)
+
+    if args.mode == "gnumake":
+        print(makefile.get_gnumake_rules(
+            build_dirs,
+            requests,
+            makefile_vars,
+            common_vars = common
+        ))
+    elif args.mode == "windows-exec":
+        return common_exec.run(
+            platform = "windows",
+            build_dirs = build_dirs,
+            requests = requests,
+            common_vars = common,
+            tool_dir = args.tool_dir,
+            tool_cfg = args.tool_cfg,
+            verbose = args.verbose,
+        )
+    elif args.mode == "unix-exec":
+        return common_exec.run(
+            platform = "unix",
+            build_dirs = build_dirs,
+            requests = requests,
+            common_vars = common,
+            tool_dir = args.tool_dir,
+            verbose = args.verbose,
+        )
+    elif args.mode == "bazel-exec":
+        return common_exec.run(
+            platform = "bazel",
+            build_dirs = build_dirs,
+            requests = requests,
+            common_vars = common,
+            tool_dir = args.tool_dir,
+            verbose = args.verbose,
+        )
+    else:
+        print("Mode not supported: %s" % args.mode)
+        return 1
+    return 0
+
+if __name__ == "__main__":
+    exit(main(sys.argv[1:]))
diff --git a/intl/icu/source/python/icutools/databuilder/comment_stripper.py b/intl/icu/source/python/icutools/databuilder/comment_stripper.py
new file mode 100644
index 0000000000..4001f2f675
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/comment_stripper.py
@@ -0,0 +1,51 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import io
+
+class CommentStripper(object):
+    """Removes lines starting with "//" from a file stream."""
+
+    def __init__(self, f):
+        self.f = f
+        self.state = 0
+
+    def read(self, size=-1):
+        bytes = self.f.read(size)
+        # TODO: Do we need to read more bytes if comments were stripped
+        # in order to obey the size request?
+        return "".join(self._strip_comments(bytes))
+
+    def _strip_comments(self, bytes):
+        for byte in bytes:
+            if self.state == 0:
+                # state 0: start of a line
+                if byte == "/":
+                    self.state = 1
+                elif byte == "\n":
+                    self.state = 0
+                    yield byte
+                else:
+                    self.state = 2
+                    yield byte
+            elif self.state == 1:
+                # state 1: read a single '/'
+                if byte == "/":
+                    self.state = 3
+                elif byte == "\n":
+                    self.state = 0
+                    yield "/"  # the one that was skipped
+                    yield "\n"
+                else:
+                    self.state = 2
+                    yield "/"  # the one that was skipped
+                    yield byte
+            elif self.state == 2:
+                # state 2: middle of a line, no comment
+                if byte == "\n":
+                    self.state = 0
+                yield byte
+            elif self.state == 3:
+                # state 3: inside a comment
+                if byte == "\n":
+                    self.state = 0
diff --git a/intl/icu/source/python/icutools/databuilder/filtration.py b/intl/icu/source/python/icutools/databuilder/filtration.py
new file mode 100644
index 0000000000..e9339a0895
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/filtration.py
@@ -0,0 +1,427 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# Python 2/3 Compatibility (ICU-20299)
+# TODO(ICU-20301): Remove this.
+from __future__ import print_function
+
+from abc import abstractmethod
+from collections import defaultdict
+import re
+import sys
+
+from . import *
+from . import utils
+from .request_types import *
+
+
+# Note: for this to be a proper abstract class, it should extend abc.ABC.
+# There is no nice way to do this that works in both Python 2 and 3.
+# TODO(ICU-20301): Make this inherit from abc.ABC.
+class Filter(object):
+    @staticmethod
+    def create_from_json(json_data, io):
+        assert io != None
+        if "filterType" in json_data:
+            filter_type = json_data["filterType"]
+        else:
+            filter_type = "file-stem"
+
+        if filter_type == "file-stem":
+            return FileStemFilter(json_data)
+        elif filter_type == "language":
+            return LanguageFilter(json_data)
+        elif filter_type == "regex":
+            return RegexFilter(json_data)
+        elif filter_type == "exclude":
+            return ExclusionFilter()
+        elif filter_type == "union":
+            return UnionFilter(json_data, io)
+        elif filter_type == "locale":
+            return LocaleFilter(json_data, io)
+        else:
+            print("Error: Unknown filterType option: %s" % filter_type, file=sys.stderr)
+            return None
+
+    def filter(self, request):
+        if not request.apply_file_filter(self):
+            return []
+        for file in request.all_input_files():
+            assert self.match(file)
+        return [request]
+
+    @staticmethod
+    def _file_to_file_stem(file):
+        start = file.filename.rfind("/")
+        limit = file.filename.rfind(".")
+        return file.filename[start+1:limit]
+
+    @staticmethod
+    def _file_to_subdir(file):
+        limit = file.filename.rfind("/")
+        if limit == -1:
+            return None
+        return file.filename[:limit]
+
+    @abstractmethod
+    def match(self, file):
+        pass
+
+
+class InclusionFilter(Filter):
+    def match(self, file):
+        return True
+
+
+class ExclusionFilter(Filter):
+    def match(self, file):
+        return False
+
+
+class IncludeExcludeFilter(Filter):
+    def __init__(self, json_data):
+        if "whitelist" in json_data:
+            self.is_includelist = True
+            self.includelist = json_data["whitelist"]
+        elif "includelist" in json_data:
+            self.is_includelist = True
+            self.includelist = json_data["includelist"]
+        elif "blacklist" in json_data:
+            self.is_includelist = False
+            self.excludelist = json_data["blacklist"]
+        elif "excludelist" in json_data:
+            self.is_includelist = False
+            self.excludelist = json_data["excludelist"]
+        else:
+            raise AssertionError("Need either includelist or excludelist: %s" % str(json_data))
+
+    def match(self, file):
+        file_stem = self._file_to_file_stem(file)
+        return self._should_include(file_stem)
+
+    @abstractmethod
+    def _should_include(self, file_stem):
+        pass
+
+
+class FileStemFilter(IncludeExcludeFilter):
+    def _should_include(self, file_stem):
+        if self.is_includelist:
+            return file_stem in self.includelist
+        else:
+            return file_stem not in self.excludelist
+
+
+class LanguageFilter(IncludeExcludeFilter):
+    def _should_include(self, file_stem):
+        language = file_stem.split("_")[0]
+        if language == "root":
+            # Always include root.txt
+            return True
+        if self.is_includelist:
+            return language in self.includelist
+        else:
+            return language not in self.excludelist
+
+
+class RegexFilter(IncludeExcludeFilter):
+    def __init__(self, *args):
+        # TODO(ICU-20301): Change this to: super().__init__(*args)
+        super(RegexFilter, self).__init__(*args)
+        if self.is_includelist:
+            self.includelist = [re.compile(pat) for pat in self.includelist]
+        else:
+            self.excludelist = [re.compile(pat) for pat in self.excludelist]
+
+    def _should_include(self, file_stem):
+        if self.is_includelist:
+            for pattern in self.includelist:
+                if pattern.match(file_stem):
+                    return True
+            return False
+        else:
+            for pattern in self.excludelist:
+                if pattern.match(file_stem):
+                    return False
+            return True
+
+
+class UnionFilter(Filter):
+    def __init__(self, json_data, io):
+        # Collect the sub-filters.
+        self.sub_filters = []
+        for filter_json in json_data["unionOf"]:
+            self.sub_filters.append(Filter.create_from_json(filter_json, io))
+
+    def match(self, file):
+        """Match iff any of the sub-filters match."""
+        for filter in self.sub_filters:
+            if filter.match(file):
+                return True
+        return False
+
+
+LANGUAGE_SCRIPT_REGEX = re.compile(r"^([a-z]{2,3})_[A-Z][a-z]{3}$")
+LANGUAGE_ONLY_REGEX = re.compile(r"^[a-z]{2,3}$")
+
+class LocaleFilter(Filter):
+    def __init__(self, json_data, io):
+        if "whitelist" in json_data:
+            self.locales_requested = list(json_data["whitelist"])
+        elif "includelist" in json_data:
+            self.locales_requested = list(json_data["includelist"])
+        else:
+            raise AssertionError("You must have an includelist in a locale filter")
+        self.include_children = json_data.get("includeChildren", True)
+        self.include_scripts = json_data.get("includeScripts", False)
+
+        # Load the dependency graph from disk
+        self.dependency_data_by_tree = {
+            tree: io.read_locale_deps(tree)
+            for tree in utils.ALL_TREES
+        }
+
+    def match(self, file):
+        tree = self._file_to_subdir(file)
+        assert tree is not None
+        locale = self._file_to_file_stem(file)
+
+        # A locale is *required* if it is *requested* or an ancestor of a
+        # *requested* locale.
+        if locale in self._locales_required(tree):
+            return True
+
+        # Resolve include_scripts and include_children.
+        return self._match_recursive(locale, tree)
+
+    def _match_recursive(self, locale, tree):
+        # Base case: return True if we reached a *requested* locale,
+        # or False if we ascend out of the locale tree.
+        if locale is None:
+            return False
+        if locale in self.locales_requested:
+            return True
+
+        # Check for alternative scripts.
+        # This causes sr_Latn to check sr instead of going directly to root.
+        if self.include_scripts:
+            match = LANGUAGE_SCRIPT_REGEX.match(locale)
+            if match and self._match_recursive(match.group(1), tree):
+                return True
+
+        # Check if we are a descendant of a *requested* locale.
+        if self.include_children:
+            parent = self._get_parent_locale(locale, tree)
+            if self._match_recursive(parent, tree):
+                return True
+
+        # No matches.
+        return False
+
+    def _get_parent_locale(self, locale, tree):
+        """Gets the parent locale in the given tree, according to dependency data."""
+        dependency_data = self.dependency_data_by_tree[tree]
+        if "parents" in dependency_data and locale in dependency_data["parents"]:
+            return dependency_data["parents"][locale]
+        if "aliases" in dependency_data and locale in dependency_data["aliases"]:
+            return dependency_data["aliases"][locale]
+        if LANGUAGE_ONLY_REGEX.match(locale):
+            return "root"
+        i = locale.rfind("_")
+        if i < 0:
+            assert locale == "root", "Invalid locale: %s/%s" % (tree, locale)
+            return None
+        return locale[:i]
+
+    def _locales_required(self, tree):
+        """Returns a generator of all required locales in the given tree."""
+        for locale in self.locales_requested:
+            while locale is not None:
+                yield locale
+                locale = self._get_parent_locale(locale, tree)
+
+
+def apply_filters(requests, config, io):
+    """Runs the filters and returns a new list of requests."""
+    requests = _apply_file_filters(requests, config, io)
+    requests = _apply_resource_filters(requests, config, io)
+    return requests
+
+
+def _apply_file_filters(old_requests, config, io):
+    """Filters out entire files."""
+    filters = _preprocess_file_filters(old_requests, config, io)
+    new_requests = []
+    for request in old_requests:
+        category = request.category
+        if category in filters:
+            new_requests += filters[category].filter(request)
+        else:
+            new_requests.append(request)
+    return new_requests
+
+
+def _preprocess_file_filters(requests, config, io):
+    all_categories = set(
+        request.category
+        for request in requests
+    )
+    all_categories.remove(None)
+    all_categories = list(sorted(all_categories))
+    json_data = config.filters_json_data
+    filters = {}
+    default_filter_json = "exclude" if config.strategy == "additive" else "include"
+    for category in all_categories:
+        filter_json = default_filter_json
+        # Special default for category "brkitr_lstm" and "brkitr_adaboost" as "exclude" for now.
+        if "brkitr_lstm" == category or "brkitr_adaboost" == category:
+            filter_json = "exclude"
+        # Figure out the correct filter to create for now.
+        if "featureFilters" in json_data and category in json_data["featureFilters"]:
+            filter_json = json_data["featureFilters"][category]
+        if filter_json == "include" and "localeFilter" in json_data and category.endswith("_tree"):
+            filter_json = json_data["localeFilter"]
+        # Resolve the filter JSON into a filter object
+        if filter_json == "exclude":
+            filters[category] = ExclusionFilter()
+        elif filter_json == "include":
+            pass  # no-op
+        else:
+            filters[category] = Filter.create_from_json(filter_json, io)
+    if "featureFilters" in json_data:
+        for category in json_data["featureFilters"]:
+            if category not in all_categories:
+                print("Warning: category %s is not known" % category, file=sys.stderr)
+    return filters
+
+
+class ResourceFilterInfo(object):
+    def __init__(self, category, strategy):
+        self.category = category
+        self.strategy = strategy
+        self.filter_tmp_dir = "filters/%s" % category
+        self.input_files = None
+        self.filter_files = None
+        self.rules_by_file = None
+
+    def apply_to_requests(self, all_requests):
+        # Call this method only once per list of requests.
+        assert self.input_files is None
+        for request in all_requests:
+            if request.category != self.category:
+                continue
+            if not isinstance(request, AbstractExecutionRequest):
+                continue
+            if request.tool != IcuTool("genrb"):
+                continue
+            if not request.input_files:
+                continue
+            self._set_files(request.input_files)
+            request.dep_targets += [self.filter_files[:]]
+            arg_str = "--filterDir {TMP_DIR}/%s" % self.filter_tmp_dir
+            request.args = "%s %s" % (arg_str, request.args)
+
+        # Make sure we found the target request
+        if self.input_files is None:
+            print("WARNING: Category not found: %s" % self.category, file=sys.stderr)
+            self.input_files = []
+            self.filter_files = []
+            self.rules_by_file = []
+
+    def _set_files(self, files):
+        # Note: The input files to genrb for a certain category should always
+        # be the same. For example, there are often two genrb calls: one for
+        # --writePoolBundle, and the other for --usePoolBundle. They are both
+        # expected to have the same list of input files.
+        if self.input_files is not None:
+            assert self.input_files == files
+            return
+        self.input_files = list(files)
+        self.filter_files = [
+            TmpFile("%s/%s" % (self.filter_tmp_dir, basename))
+            for basename in (
+                file.filename[file.filename.rfind("/")+1:]
+                for file in files
+            )
+        ]
+        if self.strategy == "additive":
+            self.rules_by_file = [
+                [r"-/", r"+/%%ALIAS", r"+/%%Parent"]
+                for _ in range(len(files))
+            ]
+        else:
+            self.rules_by_file = [
+                [r"+/"]
+                for _ in range(len(files))
+            ]
+
+    def add_rules(self, file_filter, rules):
+        for file, rule_list in zip(self.input_files, self.rules_by_file):
+            if file_filter.match(file):
+                rule_list += rules
+
+    def make_requests(self):
+        # Map from rule list to filter files with that rule list
+        unique_rules = defaultdict(list)
+        for filter_file, rules in zip(self.filter_files, self.rules_by_file):
+            unique_rules[tuple(rules)].append(filter_file)
+
+        new_requests = []
+        i = 0
+        for rules, filter_files in unique_rules.items():
+            base_filter_file = filter_files[0]
+            new_requests += [
+                PrintFileRequest(
+                    name = "%s_print_%d" % (self.category, i),
+                    output_file = base_filter_file,
+                    content = self._generate_resource_filter_txt(rules)
+                )
+            ]
+            i += 1
+            for filter_file in filter_files[1:]:
+                new_requests += [
+                    CopyRequest(
+                        name = "%s_copy_%d" % (self.category, i),
+                        input_file = base_filter_file,
+                        output_file = filter_file
+                    )
+                ]
+                i += 1
+        return new_requests
+
+    @staticmethod
+    def _generate_resource_filter_txt(rules):
+        result = "# Caution: This file is automatically generated\n\n"
+        result += "\n".join(rules)
+        return result
+
+
+def _apply_resource_filters(all_requests, config, io):
+    """Creates filters for looking within resource bundle files."""
+    json_data = config.filters_json_data
+    if "resourceFilters" not in json_data:
+        return all_requests
+
+    collected = {}
+    for entry in json_data["resourceFilters"]:
+        if "files" in entry:
+            file_filter = Filter.create_from_json(entry["files"], io)
+        else:
+            file_filter = InclusionFilter()
+        for category in entry["categories"]:
+            # not defaultdict because we need to pass arguments to the constructor
+            if category not in collected:
+                filter_info = ResourceFilterInfo(category, config.strategy)
+                filter_info.apply_to_requests(all_requests)
+                collected[category] = filter_info
+            else:
+                filter_info = collected[category]
+            filter_info.add_rules(file_filter, entry["rules"])
+
+    # Add the filter generation requests to the beginning so that by default
+    # they are made before genrb gets run (order is required by windirect)
+    new_requests = []
+    for filter_info in collected.values():
+        new_requests += filter_info.make_requests()
+    new_requests += all_requests
+    return new_requests
diff --git a/intl/icu/source/python/icutools/databuilder/filtration_schema.json b/intl/icu/source/python/icutools/databuilder/filtration_schema.json
new file mode 100644
index 0000000000..3aed41a334
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/filtration_schema.json
@@ -0,0 +1,206 @@
+// Copyright (C) 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+{
+    "$id": "http://unicode.org/icu-filter-schema",
+    "$schema": "http://json-schema.org/draft-04/schema#",
+    "description": "JSON Schema for an ICU data filter file",
+    "type": "object",
+    "properties": {
+        "strategy": {
+            "type": "string",
+            "enum": ["additive", "subtractive"]
+        },
+        "localeFilter": { "$ref": "#/definitions/filter" },
+        "featureFilters": {
+            "type": "object",
+            "additionalProperties": {
+                "oneOf": [
+                    { "$ref": "#/definitions/filter" },
+                    {
+                        "type": "string",
+                        "enum": ["include", "exclude"]
+                    }
+                ]
+            }
+        },
+        "resourceFilters": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "categories": {
+                        "type": "array",
+                        "items": { "type": "string" }
+                    },
+                    "files": { "$ref": "#/definitions/filter" },
+                    "rules": {
+                        "type": "array",
+                        "items": {
+                            "type": "string",
+                            "pattern": "^[+-]/[\\S]*$"
+                        }
+                    }
+                },
+                "required": ["categories", "rules"],
+                "additionalProperties": false
+            }
+        },
+        "fileReplacements": {
+            "type": "object",
+            "properties": {
+                "directory": {
+                    "type": "string",
+                    "pattern": "^(\\$SRC|\\$FILTERS|\\$CWD|/$|/[^/]+)(/[^/]+)*$"
+                },
+                "replacements": {
+                    "type": "array",
+                    "items": {
+                        "oneOf": [
+                            { "type": "string" },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "src": { "type": "string" },
+                                    "dest": { "type": "string" }
+                                },
+                                "additionalProperties": false,
+                                "required": ["src", "dest"]
+                            }
+                        ]
+                    }
+                }
+            },
+            "additionalProperties": false,
+            "required": ["directory", "replacements"]
+        },
+        "collationUCAData": {
+            "type": "string",
+            "enum": ["unihan", "implicithan"]
+        },
+        "usePoolBundle": {
+            "type": "boolean"
+        }
+    },
+    "additionalProperties": false,
+    "definitions": {
+        "filter": {
+            "type": "object",
+            "oneOf": [
+                {
+                    "properties": {
+                        "filterType": {
+                            "$ref": "#/definitions/includeExcludeFilterTypes"
+                        },
+                        "whitelist": { "$ref": "#/definitions/stringList" }
+                    },
+                    "required": ["whitelist"],
+                    "additionalProperties": false
+                },
+                {
+                    "properties": {
+                        "filterType": {
+                            "$ref": "#/definitions/includeExcludeFilterTypes"
+                        },
+                        "blacklist": { "$ref": "#/definitions/stringList" }
+                    },
+                    "required": ["blacklist"],
+                    "additionalProperties": false
+                },
+                {
+                    "properties": {
+                        "filterType": {
+                            "$ref": "#/definitions/includeExcludeFilterTypes"
+                        },
+                        "includelist": { "$ref": "#/definitions/stringList" }
+                    },
+                    "required": ["includelist"],
+                    "additionalProperties": false
+                },
+                {
+                    "properties": {
+                        "filterType": {
+                            "$ref": "#/definitions/includeExcludeFilterTypes"
+                        },
+                        "excludelist": { "$ref": "#/definitions/stringList" }
+                    },
+                    "required": ["excludelist"],
+                    "additionalProperties": false
+                },
+                {
+                    "properties": {
+                        "filterType": {
+                            "type": "string",
+                            "enum": ["exclude"]
+                        }
+                    },
+                    "required": ["filterType"],
+                    "additionalProperties": false
+                },
+                {
+                    "properties": {
+                        "filterType": {
+                            "type": "string",
+                            "enum": ["locale"]
+                        },
+                        "includeChildren": {
+                            "type": "boolean"
+                        },
+                        "includeScripts": {
+                            "type": "boolean"
+                        },
+                        "whitelist": { "$ref": "#/definitions/stringList" }
+                    },
+                    "required": ["filterType", "whitelist"],
+                    "additionalProperties": false
+                },
+                {
+                    "properties": {
+                        "filterType": {
+                            "type": "string",
+                            "enum": ["locale"]
+                        },
+                        "includeChildren": {
+                            "type": "boolean"
+                        },
+                        "includeScripts": {
+                            "type": "boolean"
+                        },
+                        "includelist": { "$ref": "#/definitions/stringList" }
+                    },
+                    "required": ["filterType", "includelist"],
+                    "additionalProperties": false
+                },
+                {
+                    "properties": {
+                        "filterType": {
+                            "type": "string",
+                            "enum": ["union"]
+                        },
+                        "unionOf": {
+                            "type": "array",
+                            "items": { "$ref": "#/definitions/filter" }
+                        }
+                    },
+                    "required": ["filterType", "unionOf"],
+                    "additionalProperties": false
+                }
+            ]
+        },
+        "includeExcludeFilterTypes": {
+            "type": "string",
+            "enum": [
+                "language",
+                "regex"
+            ]
+        },
+        "stringList": {
+            "type": "array",
+            "items": {
+                "type": "string"
+            },
+            "minItems": 1,
+            "uniqueItems": true
+        }
+    }
+}
diff --git a/intl/icu/source/python/icutools/databuilder/renderers/__init__.py b/intl/icu/source/python/icutools/databuilder/renderers/__init__.py
new file mode 100644
index 0000000000..7c402c2b78
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/renderers/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+from collections import namedtuple
+
+MakeRule = namedtuple("MakeRule", ["name", "dep_literals", "dep_files", "output_file", "cmds"])
+
+MakeFilesVar = namedtuple("MakeFilesVar", ["name", "files"])
+
+MakeStringVar = namedtuple("MakeStringVar", ["name", "content"])
diff --git a/intl/icu/source/python/icutools/databuilder/renderers/common_exec.py b/intl/icu/source/python/icutools/databuilder/renderers/common_exec.py
new file mode 100644
index 0000000000..91c12fdcf6
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/renderers/common_exec.py
@@ -0,0 +1,155 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# Python 2/3 Compatibility (ICU-20299)
+# TODO(ICU-20301): Remove this.
+from __future__ import print_function
+
+from . import *
+from .. import *
+from .. import utils
+from ..request_types import *
+
+import os
+import shutil
+import subprocess
+import sys
+
+def run(build_dirs, requests, common_vars, verbose=True, **kwargs):
+    for bd in build_dirs:
+        makedirs(bd.format(**common_vars))
+    for request in requests:
+        status = run_helper(request, common_vars, verbose=verbose, **kwargs)
+        if status != 0:
+            print("!!! ERROR executing above command line: exit code %d" % status)
+            return 1
+    if verbose:
+        print("All data build commands executed")
+    return 0
+
+def makedirs(dirs):
+    """makedirs compatible between Python 2 and 3"""
+    try:
+        # Python 3 version
+        os.makedirs(dirs, exist_ok=True)
+    except TypeError as e:
+        # Python 2 version
+        try:
+            os.makedirs(dirs)
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise e
+
+def run_helper(request, common_vars, platform, tool_dir, verbose, tool_cfg=None, **kwargs):
+    if isinstance(request, PrintFileRequest):
+        output_path = "{DIRNAME}/{FILENAME}".format(
+            DIRNAME = utils.dir_for(request.output_file).format(**common_vars),
+            FILENAME = request.output_file.filename,
+        )
+        if verbose:
+            print("Printing to file: %s" % output_path)
+        with open(output_path, "w") as f:
+            f.write(request.content)
+        return 0
+    if isinstance(request, CopyRequest):
+        input_path = "{DIRNAME}/{FILENAME}".format(
+            DIRNAME = utils.dir_for(request.input_file).format(**common_vars),
+            FILENAME = request.input_file.filename,
+        )
+        output_path = "{DIRNAME}/{FILENAME}".format(
+            DIRNAME = utils.dir_for(request.output_file).format(**common_vars),
+            FILENAME = request.output_file.filename,
+        )
+        if verbose:
+            print("Copying file to: %s" % output_path)
+        shutil.copyfile(input_path, output_path)
+        return 0
+    if isinstance(request, VariableRequest):
+        # No-op
+        return 0
+
+    assert isinstance(request.tool, IcuTool)
+    if platform == "windows":
+        cmd_template = "{TOOL_DIR}/{TOOL}/{TOOL_CFG}/{TOOL}.exe {{ARGS}}".format(
+            TOOL_DIR = tool_dir,
+            TOOL_CFG = tool_cfg,
+            TOOL = request.tool.name,
+            **common_vars
+        )
+    elif platform == "unix":
+        cmd_template = "{TOOL_DIR}/{TOOL} {{ARGS}}".format(
+            TOOL_DIR = tool_dir,
+            TOOL = request.tool.name,
+            **common_vars
+        )
+    elif platform == "bazel":
+        cmd_template = "{TOOL_DIR}/{TOOL}/{TOOL} {{ARGS}}".format(
+            TOOL_DIR = tool_dir,
+            TOOL = request.tool.name,
+            **common_vars
+        )
+    else:
+        raise ValueError("Unknown platform: %s" % platform)
+
+    if isinstance(request, RepeatedExecutionRequest):
+        for loop_vars in utils.repeated_execution_request_looper(request):
+            command_line = utils.format_repeated_request_command(
+                request,
+                cmd_template,
+                loop_vars,
+                common_vars
+            )
+            if platform == "windows":
+                # Note: this / to \ substitution may be too aggressive?
+                command_line = command_line.replace("/", "\\")
+            returncode = run_shell_command(command_line, platform, verbose)
+            if returncode != 0:
+                return returncode
+        return 0
+    if isinstance(request, SingleExecutionRequest):
+        command_line = utils.format_single_request_command(
+            request,
+            cmd_template,
+            common_vars
+        )
+        if platform == "windows":
+            # Note: this / to \ substitution may be too aggressive?
+            command_line = command_line.replace("/", "\\")
+        returncode = run_shell_command(command_line, platform, verbose)
+        return returncode
+    assert False
+
+def run_shell_command(command_line, platform, verbose):
+    changed_windows_comspec = False
+    # If the command line length on Windows exceeds the absolute maximum that CMD supports (8191), then
+    # we temporarily switch over to use PowerShell for the command, and then switch back to CMD.
+    # We don't want to use PowerShell for everything though, as it tends to be slower.
+    if (platform == "windows"):
+        previous_comspec = os.environ["COMSPEC"]
+        # Add 7 to the length for the argument /c with quotes.
+        # For example:  C:\WINDOWS\system32\cmd.exe /c "<command_line>"
+        if ((len(previous_comspec) + len(command_line) + 7) > 8190):
+            if verbose:
+                print("Command length exceeds the max length for CMD on Windows, using PowerShell instead.")
+            os.environ["COMSPEC"] = 'powershell'
+            changed_windows_comspec = True
+    if verbose:
+        print("Running: %s" % command_line)
+        returncode = subprocess.call(
+            command_line,
+            shell = True
+        )
+    else:
+        # Pipe output to /dev/null in quiet mode
+        with open(os.devnull, "w") as devnull:
+            returncode = subprocess.call(
+                command_line,
+                shell = True,
+                stdout = devnull,
+                stderr = devnull
+            )
+    if changed_windows_comspec:
+        os.environ["COMSPEC"] = previous_comspec
+    if returncode != 0:
+        print("Command failed: %s" % command_line, file=sys.stderr)
+    return returncode
diff --git a/intl/icu/source/python/icutools/databuilder/renderers/makefile.py b/intl/icu/source/python/icutools/databuilder/renderers/makefile.py
new file mode 100644
index 0000000000..9b2005b07d
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/renderers/makefile.py
@@ -0,0 +1,245 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# Python 2/3 Compatibility (ICU-20299)
+# TODO(ICU-20301): Remove this.
+from __future__ import print_function
+
+from . import *
+from .. import *
+from .. import utils
+from ..request_types import *
+
+def get_gnumake_rules(build_dirs, requests, makefile_vars, **kwargs):
+    makefile_string = ""
+
+    # Common Variables
+    common_vars = kwargs["common_vars"]
+    for key, value in sorted(makefile_vars.items()):
+        makefile_string += "{KEY} = {VALUE}\n".format(
+            KEY = key,
+            VALUE = value
+        )
+    makefile_string += "\n"
+
+    # Directories
+    dirs_timestamp_file = "{TMP_DIR}/dirs.timestamp".format(**common_vars)
+    makefile_string += "DIRS = {TIMESTAMP_FILE}\n\n".format(
+        TIMESTAMP_FILE = dirs_timestamp_file
+    )
+    makefile_string += "{TIMESTAMP_FILE}:\n\t$(MKINSTALLDIRS) {ALL_DIRS}\n\techo timestamp > {TIMESTAMP_FILE}\n\n".format(
+        TIMESTAMP_FILE = dirs_timestamp_file,
+        ALL_DIRS = " ".join(build_dirs).format(**common_vars)
+    )
+
+    # Generate Rules
+    make_rules = []
+    for request in requests:
+        make_rules += get_gnumake_rules_helper(request, **kwargs)
+
+    # Main Commands
+    for rule in make_rules:
+        if isinstance(rule, MakeFilesVar):
+            makefile_string += "{NAME} = {FILE_LIST}\n\n".format(
+                NAME = rule.name,
+                FILE_LIST = files_to_makefile(rule.files, wrap = True, **kwargs),
+            )
+            continue
+
+        if isinstance(rule, MakeStringVar):
+            makefile_string += "define {NAME}\n{CONTENT}\nendef\nexport {NAME}\n\n".format(
+                NAME = rule.name,
+                CONTENT = rule.content
+            )
+            continue
+
+        assert isinstance(rule, MakeRule)
+        header_line = "{OUT_FILE}: {DEP_FILES} {DEP_LITERALS} | $(DIRS)".format(
+            OUT_FILE = files_to_makefile([rule.output_file], **kwargs),
+            DEP_FILES = files_to_makefile(rule.dep_files, wrap = True, **kwargs),
+            DEP_LITERALS = " ".join(rule.dep_literals)
+        )
+
+        if len(rule.cmds) == 0:
+            makefile_string += "%s\n\n" % header_line
+            continue
+
+        makefile_string += "{HEADER_LINE}\n{RULE_LINES}\n\n".format(
+            HEADER_LINE = header_line,
+            RULE_LINES = "\n".join("\t%s" % cmd for cmd in rule.cmds)
+        )
+
+    return makefile_string
+
+def files_to_makefile(files, common_vars, wrap = False, **kwargs):
+    if len(files) == 0:
+        return ""
+    dirnames = [utils.dir_for(file).format(**common_vars) for file in files]
+    join_str = " \\\n\t\t" if wrap and len(files) > 2 else " "
+    if len(files) == 1:
+        return "%s/%s" % (dirnames[0], files[0].filename)
+    elif len(set(dirnames)) == 1:
+        return "$(addprefix %s/,%s)" % (dirnames[0], join_str.join(file.filename for file in files))
+    else:
+        return join_str.join("%s/%s" % (d, f.filename) for d,f in zip(dirnames, files))
+
+def get_gnumake_rules_helper(request, common_vars, **kwargs):
+
+    if isinstance(request, PrintFileRequest):
+        var_name = "%s_CONTENT" % request.name.upper()
+        return [
+            MakeStringVar(
+                name = var_name,
+                content = request.content
+            ),
+            MakeRule(
+                name = request.name,
+                dep_literals = [],
+                dep_files = [],
+                output_file = request.output_file,
+                cmds = [
+                    "echo \"$${VAR_NAME}\" > {MAKEFILENAME}".format(
+                        VAR_NAME = var_name,
+                        MAKEFILENAME = files_to_makefile([request.output_file], common_vars),
+                        **common_vars
+                    )
+                ]
+            )
+        ]
+
+
+    if isinstance(request, CopyRequest):
+        return [
+            MakeRule(
+                name = request.name,
+                dep_literals = [],
+                dep_files = [request.input_file],
+                output_file = request.output_file,
+                cmds = ["cp %s %s" % (
+                    files_to_makefile([request.input_file], common_vars),
+                    files_to_makefile([request.output_file], common_vars))
+                ]
+            )
+        ]
+
+    if isinstance(request, VariableRequest):
+        return [
+            MakeFilesVar(
+                name = request.name.upper(),
+                files = request.input_files
+            )
+        ]
+
+    if request.tool.name == "make":
+        cmd_template = "$(MAKE) {ARGS}"
+    elif request.tool.name == "gentest":
+        cmd_template = "$(INVOKE) $(GENTEST) {ARGS}"
+    else:
+        assert isinstance(request.tool, IcuTool)
+        cmd_template = "$(INVOKE) $(TOOLBINDIR)/{TOOL} {{ARGS}}".format(
+            TOOL = request.tool.name
+        )
+
+    if isinstance(request, SingleExecutionRequest):
+        cmd = utils.format_single_request_command(request, cmd_template, common_vars)
+        dep_files = request.all_input_files()
+
+        if len(request.output_files) > 1:
+            # Special case for multiple output files: Makefile rules should have only one
+            # output file apiece. More information:
+            # https://www.gnu.org/software/automake/manual/html_node/Multiple-Outputs.html
+            timestamp_var_name = "%s_ALL" % request.name.upper()
+            timestamp_file = TmpFile("%s.timestamp" % request.name)
+            rules = [
+                MakeFilesVar(
+                    name = timestamp_var_name,
+                    files = [timestamp_file]
+                ),
+                MakeRule(
+                    name = "%s_all" % request.name,
+                    dep_literals = [],
+                    dep_files = dep_files,
+                    output_file = timestamp_file,
+                    cmds = [
+                        cmd,
+                        "echo timestamp > {MAKEFILENAME}".format(
+                            MAKEFILENAME = files_to_makefile([timestamp_file], common_vars)
+                        )
+                    ]
+                )
+            ]
+            for i, file in enumerate(request.output_files):
+                rules += [
+                    MakeRule(
+                        name = "%s_%d" % (request.name, i),
+                        dep_literals = ["$(%s)" % timestamp_var_name],
+                        dep_files = [],
+                        output_file = file,
+                        cmds = []
+                    )
+                ]
+            return rules
+
+        elif len(dep_files) > 5:
+            # For nicer printing, for long input lists, use a helper variable.
+            dep_var_name = "%s_DEPS" % request.name.upper()
+            return [
+                MakeFilesVar(
+                    name = dep_var_name,
+                    files = dep_files
+                ),
+                MakeRule(
+                    name = request.name,
+                    dep_literals = ["$(%s)" % dep_var_name],
+                    dep_files = [],
+                    output_file = request.output_files[0],
+                    cmds = [cmd]
+                )
+            ]
+
+        else:
+            return [
+                MakeRule(
+                    name = request.name,
+                    dep_literals = [],
+                    dep_files = dep_files,
+                    output_file = request.output_files[0],
+                    cmds = [cmd]
+                )
+            ]
+
+    if isinstance(request, RepeatedExecutionRequest):
+        rules = []
+        dep_literals = []
+        # To keep from repeating the same dep files many times, make a variable.
+        if len(request.common_dep_files) > 0:
+            dep_var_name = "%s_DEPS" % request.name.upper()
+            dep_literals = ["$(%s)" % dep_var_name]
+            rules += [
+                MakeFilesVar(
+                    name = dep_var_name,
+                    files = request.common_dep_files
+                )
+            ]
+        # Add a rule for each individual file.
+        for loop_vars in utils.repeated_execution_request_looper(request):
+            (_, specific_dep_files, input_file, output_file) = loop_vars
+            name_suffix = input_file[input_file.filename.rfind("/")+1:input_file.filename.rfind(".")]
+            cmd = utils.format_repeated_request_command(
+                request,
+                cmd_template,
+                loop_vars,
+                common_vars
+            )
+            rules += [
+                MakeRule(
+                    name = "%s_%s" % (request.name, name_suffix),
+                    dep_literals = dep_literals,
+                    dep_files = specific_dep_files + [input_file],
+                    output_file = output_file,
+                    cmds = [cmd]
+                )
+            ]
+        return rules
+
+    assert False
diff --git a/intl/icu/source/python/icutools/databuilder/request_types.py b/intl/icu/source/python/icutools/databuilder/request_types.py
new file mode 100644
index 0000000000..aa70f8d918
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/request_types.py
@@ -0,0 +1,364 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# Python 2/3 Compatibility (ICU-20299)
+# TODO(ICU-20301): Remove this.
+from __future__ import print_function
+
+from abc import abstractmethod
+import copy
+import sys
+
+from . import *
+from . import utils
+
+
+# TODO(ICU-20301): Remove arguments from all instances of super() in this file
+
+# Note: for this to be a proper abstract class, it should extend abc.ABC.
+# There is no nice way to do this that works in both Python 2 and 3.
+# TODO(ICU-20301): Make this inherit from abc.ABC.
+class AbstractRequest(object):
+    def __init__(self, **kwargs):
+
+        # Used for identification purposes
+        self.name = None
+
+        # The filter category that applies to this request
+        self.category = None
+
+        self._set_fields(kwargs)
+
+    def _set_fields(self, kwargs):
+        for key, value in list(kwargs.items()):
+            if hasattr(self, key):
+                if isinstance(value, list):
+                    value = copy.copy(value)
+                elif isinstance(value, dict):
+                    value = copy.deepcopy(value)
+                setattr(self, key, value)
+            else:
+                raise ValueError("Unknown argument: %s" % key)
+
+    def apply_file_filter(self, filter):
+        """
+        Returns True if this request still has input files after filtering,
+        or False if the request is "empty" after filtering.
+        """
+        return True
+
+    def flatten(self, config, all_requests, common_vars):
+        return [self]
+
+    def all_input_files(self):
+        return []
+
+    def all_output_files(self):
+        return []
+
+
+class AbstractExecutionRequest(AbstractRequest):
+    def __init__(self, **kwargs):
+
+        # Names of targets (requests) or files that this request depends on.
+        # The entries of dep_targets may be any of the following types:
+        #
+        #   1. DepTarget, for the output of an execution request.
+        #   2. InFile, TmpFile, etc., for a specific file.
+        #   3. A list of InFile, TmpFile, etc., where the list is the same
+        #      length as self.input_files and self.output_files.
+        #
+        # In cases 1 and 2, the dependency is added to all rules that the
+        # request generates. In case 3, the dependency is added only to the
+        # rule that generates the output file at the same array index.
+        self.dep_targets = []
+
+        # Computed during self.flatten(); don't edit directly.
+        self.common_dep_files = []
+
+        # Primary input files
+        self.input_files = []
+
+        # Output files; for some subclasses, this must be the same length
+        # as input_files
+        self.output_files = []
+
+        # What tool to execute
+        self.tool = None
+
+        # Argument string to pass to the tool with optional placeholders
+        self.args = ""
+
+        # Placeholders to substitute into the argument string; if any of these
+        # have a list type, the list must be equal in length to input_files
+        self.format_with = {}
+
+        super(AbstractExecutionRequest, self).__init__(**kwargs)
+
+    def apply_file_filter(self, filter):
+        i = 0
+        while i < len(self.input_files):
+            if filter.match(self.input_files[i]):
+                i += 1
+            else:
+                self._del_at(i)
+        return i > 0
+
+    def _del_at(self, i):
+        del self.input_files[i]
+        for _, v in self.format_with.items():
+            if isinstance(v, list):
+                assert len(v) == len(self.input_files) + 1
+                del v[i]
+        for v in self.dep_targets:
+            if isinstance(v, list):
+                assert len(v) == len(self.input_files) + 1
+                del v[i]
+
+    def flatten(self, config, all_requests, common_vars):
+        self._dep_targets_to_files(all_requests)
+        return super(AbstractExecutionRequest, self).flatten(config, all_requests, common_vars)
+
+    def _dep_targets_to_files(self, all_requests):
+        if not self.dep_targets:
+            return
+        for dep_target in self.dep_targets:
+            if isinstance(dep_target, list):
+                if hasattr(self, "specific_dep_files"):
+                    assert len(dep_target) == len(self.specific_dep_files)
+                    for file, out_list in zip(dep_target, self.specific_dep_files):
+                        assert hasattr(file, "filename")
+                        out_list.append(file)
+                else:
+                    self.common_dep_files += dep_target
+                continue
+            if not isinstance(dep_target, DepTarget):
+                # Copy file entries directly to dep_files.
+                assert hasattr(dep_target, "filename")
+                self.common_dep_files.append(dep_target)
+                continue
+            # For DepTarget entries, search for the target.
+            for request in all_requests:
+                if request.name == dep_target.name:
+                    self.common_dep_files += request.all_output_files()
+                    break
+            else:
+                print("Warning: Unable to find target %s, a dependency of %s" % (
+                    dep_target.name,
+                    self.name
+                ), file=sys.stderr)
+        self.dep_targets = []
+
+    def all_input_files(self):
+        return self.common_dep_files + self.input_files
+
+    def all_output_files(self):
+        return self.output_files
+
+
+class SingleExecutionRequest(AbstractExecutionRequest):
+    def __init__(self, **kwargs):
+        super(SingleExecutionRequest, self).__init__(**kwargs)
+
+
+class RepeatedExecutionRequest(AbstractExecutionRequest):
+    def __init__(self, **kwargs):
+
+        # Placeholders to substitute into the argument string unique to each
+        # iteration; all values must be lists equal in length to input_files
+        self.repeat_with = {}
+
+        # Lists for dep files that are specific to individual resource bundle files
+        self.specific_dep_files = [[] for _ in range(len(kwargs["input_files"]))]
+
+        super(RepeatedExecutionRequest, self).__init__(**kwargs)
+
+    def _del_at(self, i):
+        super(RepeatedExecutionRequest, self)._del_at(i)
+        del self.output_files[i]
+        del self.specific_dep_files[i]
+        for _, v in self.repeat_with.items():
+            if isinstance(v, list):
+                del v[i]
+
+    def all_input_files(self):
+        files = super(RepeatedExecutionRequest, self).all_input_files()
+        for specific_file_list in self.specific_dep_files:
+            files += specific_file_list
+        return files
+
+
+class RepeatedOrSingleExecutionRequest(AbstractExecutionRequest):
+    def __init__(self, **kwargs):
+        self.repeat_with = {}
+        super(RepeatedOrSingleExecutionRequest, self).__init__(**kwargs)
+
+    def flatten(self, config, all_requests, common_vars):
+        if config.max_parallel:
+            new_request = RepeatedExecutionRequest(
+                name = self.name,
+                category = self.category,
+                dep_targets = self.dep_targets,
+                input_files = self.input_files,
+                output_files = self.output_files,
+                tool = self.tool,
+                args = self.args,
+                format_with = self.format_with,
+                repeat_with = self.repeat_with
+            )
+        else:
+            new_request = SingleExecutionRequest(
+                name = self.name,
+                category = self.category,
+                dep_targets = self.dep_targets,
+                input_files = self.input_files,
+                output_files = self.output_files,
+                tool = self.tool,
+                args = self.args,
+                format_with = utils.concat_dicts(self.format_with, self.repeat_with)
+            )
+        return new_request.flatten(config, all_requests, common_vars)
+
+    def _del_at(self, i):
+        super(RepeatedOrSingleExecutionRequest, self)._del_at(i)
+        del self.output_files[i]
+        for _, v in self.repeat_with.items():
+            if isinstance(v, list):
+                del v[i]
+
+
+class PrintFileRequest(AbstractRequest):
+    def __init__(self, **kwargs):
+        self.output_file = None
+        self.content = None
+        super(PrintFileRequest, self).__init__(**kwargs)
+
+    def all_output_files(self):
+        return [self.output_file]
+
+
+class CopyRequest(AbstractRequest):
+    def __init__(self, **kwargs):
+        self.input_file = None
+        self.output_file = None
+        super(CopyRequest, self).__init__(**kwargs)
+
+    def all_input_files(self):
+        return [self.input_file]
+
+    def all_output_files(self):
+        return [self.output_file]
+
+
+class VariableRequest(AbstractRequest):
+    def __init__(self, **kwargs):
+        self.input_files = []
+        super(VariableRequest, self).__init__(**kwargs)
+
+    def all_input_files(self):
+        return self.input_files
+
+
+class ListRequest(AbstractRequest):
+    def __init__(self, **kwargs):
+        self.variable_name = None
+        self.output_file = None
+        self.include_tmp = None
+        super(ListRequest, self).__init__(**kwargs)
+
+    def flatten(self, config, all_requests, common_vars):
+        list_files = list(sorted(utils.get_all_output_files(all_requests)))
+        if self.include_tmp:
+            variable_files = list(sorted(utils.get_all_output_files(all_requests, include_tmp=True)))
+        else:
+            # Always include the list file itself
+            variable_files = list_files + [self.output_file]
+        return PrintFileRequest(
+            name = self.name,
+            output_file = self.output_file,
+            content = "\n".join(file.filename for file in list_files)
+        ).flatten(config, all_requests, common_vars) + VariableRequest(
+            name = self.variable_name,
+            input_files = variable_files
+        ).flatten(config, all_requests, common_vars)
+
+    def all_output_files(self):
+        return [self.output_file]
+
+
+class IndexRequest(AbstractRequest):
+    def __init__(self, **kwargs):
+        self.installed_files = []
+        self.alias_files = []
+        self.txt_file = None
+        self.output_file = None
+        self.cldr_version = ""
+        self.args = ""
+        self.format_with = {}
+        super(IndexRequest, self).__init__(**kwargs)
+
+    def apply_file_filter(self, filter):
+        i = 0
+        while i < len(self.installed_files):
+            if filter.match(self.installed_files[i]):
+                i += 1
+            else:
+                del self.installed_files[i]
+        j = 0
+        while j < len(self.alias_files):
+            if filter.match(self.alias_files[j]):
+                j += 1
+            else:
+                del self.alias_files[j]
+        return i + j > 0
+
+    def flatten(self, config, all_requests, common_vars):
+        return (
+            PrintFileRequest(
+                name = self.name,
+                output_file = self.txt_file,
+                content = self._generate_index_file(common_vars)
+            ).flatten(config, all_requests, common_vars) +
+            SingleExecutionRequest(
+                name = "%s_res" % self.name,
+                category = self.category,
+                input_files = [self.txt_file],
+                output_files = [self.output_file],
+                tool = IcuTool("genrb"),
+                args = self.args,
+                format_with = self.format_with
+            ).flatten(config, all_requests, common_vars)
+        )
+
+    def _generate_index_file(self, common_vars):
+        installed_locales = [IndexRequest.locale_file_stem(f) for f in self.installed_files]
+        alias_locales = [IndexRequest.locale_file_stem(f) for f in self.alias_files]
+        formatted_version = "    CLDRVersion { \"%s\" }\n" % self.cldr_version if self.cldr_version else ""
+        formatted_installed_locales = "\n".join(["        %s {\"\"}" % v for v in installed_locales])
+        formatted_alias_locales = "\n".join(["        %s {\"\"}" % v for v in alias_locales])
+        # TODO: CLDRVersion is required only in the base file
+        return ("// Warning this file is automatically generated\n"
+                "{INDEX_NAME}:table(nofallback) {{\n"
+                "{FORMATTED_VERSION}"
+                "    InstalledLocales:table {{\n"
+                "{FORMATTED_INSTALLED_LOCALES}\n"
+                "    }}\n"
+                "    AliasLocales:table {{\n"
+                "{FORMATTED_ALIAS_LOCALES}\n"
+                "    }}\n"
+                "}}").format(
+                    FORMATTED_VERSION = formatted_version,
+                    FORMATTED_INSTALLED_LOCALES = formatted_installed_locales,
+                    FORMATTED_ALIAS_LOCALES = formatted_alias_locales,
+                    **common_vars
+                )
+
+    def all_input_files(self):
+        return self.installed_files + self.alias_files
+
+    def all_output_files(self):
+        return [self.output_file]
+
+    @staticmethod
+    def locale_file_stem(f):
+        return f.filename[f.filename.rfind("/")+1:-4]
diff --git a/intl/icu/source/python/icutools/databuilder/test/__init__.py b/intl/icu/source/python/icutools/databuilder/test/__init__.py
new file mode 100644
index 0000000000..dd12bfa16e
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
diff --git a/intl/icu/source/python/icutools/databuilder/test/__main__.py b/intl/icu/source/python/icutools/databuilder/test/__main__.py
new file mode 100644
index 0000000000..6ae2c0f7c9
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/__main__.py
@@ -0,0 +1,14 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import unittest
+
+from . import filtration_test
+
+def load_tests(loader, tests, pattern):
+    suite = unittest.TestSuite()
+    suite.addTest(filtration_test.suite)
+    return suite
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/intl/icu/source/python/icutools/databuilder/test/filtration_test.py b/intl/icu/source/python/icutools/databuilder/test/filtration_test.py
new file mode 100644
index 0000000000..416223bd7e
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/filtration_test.py
@@ -0,0 +1,421 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import io as pyio
+import json
+import os
+import unittest
+
+from .. import InFile
+from ..comment_stripper import CommentStripper
+from ..filtration import Filter
+
+EXAMPLE_FILE_STEMS = [
+    "af_NA",
+    "af_VARIANT",
+    "af_ZA_VARIANT",
+    "af_ZA",
+    "af",
+    "ar",
+    "ar_SA",
+    "ars",
+    "bs_BA",
+    "bs_Cyrl_BA",
+    "bs_Cyrl",
+    "bs_Latn_BA",
+    "bs_Latn",
+    "bs",
+    "en_001",
+    "en_150",
+    "en_DE",
+    "en_GB",
+    "en_US",
+    "root",
+    "sr_BA",
+    "sr_CS",
+    "sr_Cyrl_BA",
+    "sr_Cyrl_CS",
+    "sr_Cyrl_ME",
+    "sr_Cyrl",
+    "sr_Latn_BA",
+    "sr_Latn_CS",
+    "sr_Latn_ME_VARIANT",
+    "sr_Latn_ME",
+    "sr_Latn",
+    "sr_ME",
+    "sr",
+    "vai_Latn_LR",
+    "vai_Latn",
+    "vai_LR",
+    "vai_Vaii_LR",
+    "vai_Vaii",
+    "vai",
+    "yue",
+    "zh_CN",
+    "zh_Hans_CN",
+    "zh_Hans_HK",
+    "zh_Hans_MO",
+    "zh_Hans_SG",
+    "zh_Hans",
+    "zh_Hant_HK",
+    "zh_Hant_MO",
+    "zh_Hant_TW",
+    "zh_Hant",
+    "zh_HK",
+    "zh_MO",
+    "zh_SG",
+    "zh_TW",
+    "zh"
+]
+
+
+class TestIO(object):
+    def __init__(self):
+        pass
+
+    def read_locale_deps(self, tree):
+        if tree not in ("brkitr", "locales", "rbnf"):
+            return None
+        with pyio.open(os.path.join(
+                os.path.dirname(__file__),
+                "sample_data",
+                tree,
+                "LOCALE_DEPS.json"
+                ), "r", encoding="utf-8-sig") as f:
+            return json.load(CommentStripper(f))
+
+
+class FiltrationTest(unittest.TestCase):
+
+    def test_exclude(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "exclude"
+        }, TestIO()), [
+        ])
+
+    def test_default_whitelist(self):
+        self._check_filter(Filter.create_from_json({
+            "whitelist": [
+                "ars",
+                "zh_Hans"
+            ]
+        }, TestIO()), [
+            "ars",
+            "zh_Hans"
+        ])
+
+    def test_default_blacklist(self):
+        expected_matches = set(EXAMPLE_FILE_STEMS)
+        expected_matches.remove("ars")
+        expected_matches.remove("zh_Hans")
+        self._check_filter(Filter.create_from_json({
+            "blacklist": [
+                "ars",
+                "zh_Hans"
+            ]
+        }, TestIO()), expected_matches)
+
+    def test_language_whitelist(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "language",
+            "whitelist": [
+                "af",
+                "bs"
+            ]
+        }, TestIO()), [
+            "root",
+            "af_NA",
+            "af_VARIANT",
+            "af_ZA_VARIANT",
+            "af_ZA",
+            "af",
+            "bs_BA",
+            "bs_Cyrl_BA",
+            "bs_Cyrl",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs"
+        ])
+
+    def test_language_blacklist(self):
+        expected_matches = set(EXAMPLE_FILE_STEMS)
+        expected_matches.remove("af_NA")
+        expected_matches.remove("af_VARIANT")
+        expected_matches.remove("af_ZA_VARIANT")
+        expected_matches.remove("af_ZA")
+        expected_matches.remove("af")
+        self._check_filter(Filter.create_from_json({
+            "filterType": "language",
+            "blacklist": [
+                "af"
+            ]
+        }, TestIO()), expected_matches)
+
+    def test_regex_whitelist(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "regex",
+            "whitelist": [
+                r"^ar.*$",
+                r"^zh$"
+            ]
+        }, TestIO()), [
+            "ar",
+            "ar_SA",
+            "ars",
+            "zh"
+        ])
+
+    def test_regex_blacklist(self):
+        expected_matches = set(EXAMPLE_FILE_STEMS)
+        expected_matches.remove("ar")
+        expected_matches.remove("ar_SA")
+        expected_matches.remove("ars")
+        expected_matches.remove("zh")
+        self._check_filter(Filter.create_from_json({
+            "filterType": "regex",
+            "blacklist": [
+                r"^ar.*$",
+                r"^zh$"
+            ]
+        }, TestIO()), expected_matches)
+
+    def test_locale_basic(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "whitelist": [
+                # Default scripts:
+                # sr => Cyrl
+                # vai => Vaii
+                # zh => Hans
+                "bs_BA", # is an alias to bs_Latn_BA
+                "en_DE",
+                "sr", # Language with no script
+                "vai_Latn", # Language with non-default script
+                "zh_Hans" # Language with default script
+            ]
+        }, TestIO()), [
+            "root",
+            # bs: should include the full dependency tree of bs_BA
+            "bs_BA",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            # en: should include the full dependency tree of en_DE
+            "en",
+            "en_DE",
+            "en_150",
+            "en_001",
+            # sr: include Cyrl, the default, but not Latn.
+            "sr",
+            "sr_BA",
+            "sr_CS",
+            "sr_Cyrl",
+            "sr_Cyrl_BA",
+            "sr_Cyrl_CS",
+            "sr_Cyrl_ME",
+            # vai: include Latn but NOT Vaii.
+            "vai_Latn",
+            "vai_Latn_LR",
+            # zh: include Hans but NOT Hant.
+            "zh",
+            "zh_CN",
+            "zh_SG",
+            "zh_Hans",
+            "zh_Hans_CN",
+            "zh_Hans_HK",
+            "zh_Hans_MO",
+            "zh_Hans_SG"
+        ])
+
+    def test_locale_no_children(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "includeChildren": False,
+            "whitelist": [
+                # See comments in test_locale_basic.
+                "bs_BA",
+                "en_DE",
+                "sr",
+                "vai_Latn",
+                "zh_Hans"
+            ]
+        }, TestIO()), [
+            "root",
+            "bs_BA",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            "en",
+            "en_DE",
+            "en_150",
+            "en_001",
+            "sr",
+            "vai_Latn",
+            "zh",
+            "zh_Hans",
+        ])
+
+    def test_locale_include_scripts(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "includeScripts": True,
+            "whitelist": [
+                # See comments in test_locale_basic.
+                "bs_BA",
+                "en_DE",
+                "sr",
+                "vai_Latn",
+                "zh_Hans"
+            ]
+        }, TestIO()), [
+            "root",
+            # bs: includeScripts only works for language-only (without region)
+            "bs_BA",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            # en: should include the full dependency tree of en_DE
+            "en",
+            "en_DE",
+            "en_150",
+            "en_001",
+            # sr: include Latn, since no particular script was requested.
+            "sr_BA",
+            "sr_CS",
+            "sr_Cyrl_BA",
+            "sr_Cyrl_CS",
+            "sr_Cyrl_ME",
+            "sr_Cyrl",
+            "sr_Latn_BA",
+            "sr_Latn_CS",
+            "sr_Latn_ME_VARIANT",
+            "sr_Latn_ME",
+            "sr_Latn",
+            "sr_ME",
+            "sr",
+            # vai: do NOT include Vaii; the script was explicitly requested.
+            "vai_Latn_LR",
+            "vai_Latn",
+            # zh: do NOT include Hant; the script was explicitly requested.
+            "zh_CN",
+            "zh_SG",
+            "zh_Hans_CN",
+            "zh_Hans_HK",
+            "zh_Hans_MO",
+            "zh_Hans_SG",
+            "zh_Hans",
+            "zh"
+        ])
+
+    def test_locale_no_children_include_scripts(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "includeChildren": False,
+            "includeScripts": True,
+            "whitelist": [
+                # See comments in test_locale_basic.
+                "bs_BA",
+                "en_DE",
+                "sr",
+                "vai_Latn",
+                "zh_Hans"
+            ]
+        }, TestIO()), [
+            "root",
+            # bs: includeScripts only works for language-only (without region)
+            "bs_BA",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            # en: should include the full dependency tree of en_DE
+            "en",
+            "en_DE",
+            "en_150",
+            "en_001",
+            # sr: include Cyrl and Latn but no other children
+            "sr",
+            "sr_Cyrl",
+            "sr_Latn",
+            # vai: include only the requested script
+            "vai_Latn",
+            # zh: include only the requested script
+            "zh",
+            "zh_Hans",
+        ])
+
+    def test_union(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "union",
+            "unionOf": [
+                {
+                    "whitelist": [
+                        "ars",
+                        "zh_Hans"
+                    ]
+                },
+                {
+                    "filterType": "regex",
+                    "whitelist": [
+                        r"^bs.*$",
+                        r"^zh$"
+                    ]
+                }
+            ]
+        }, TestIO()), [
+            "ars",
+            "zh_Hans",
+            "bs_BA",
+            "bs_Cyrl_BA",
+            "bs_Cyrl",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            "zh"
+        ])
+
+    def test_hk_deps_normal(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "whitelist": [
+                "zh_HK"
+            ]
+        }, TestIO()), [
+            "root",
+            "zh_Hant",
+            "zh_Hant_HK",
+            "zh_HK",
+        ])
+
+    def test_hk_deps_rbnf(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "whitelist": [
+                "zh_HK"
+            ]
+        }, TestIO()), [
+            "root",
+            "yue",
+            "zh_Hant_HK",
+            "zh_HK",
+        ], "rbnf")
+
+    def test_no_alias_parent_structure(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "whitelist": [
+                "zh_HK"
+            ]
+        }, TestIO()), [
+            "root",
+            "zh_HK",
+            "zh",
+        ], "brkitr")
+
+    def _check_filter(self, filter, expected_matches, tree="locales"):
+        for file_stem in EXAMPLE_FILE_STEMS:
+            is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem)))
+            expected_match = file_stem in expected_matches
+            self.assertEqual(is_match, expected_match, file_stem)
+
+# Export the test for the runner
+suite = unittest.makeSuite(FiltrationTest)
diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json
new file mode 100644
index 0000000000..674db09278
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json
@@ -0,0 +1,10 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+//////////////////////////////////////////////////////////////
+// This is a sample LOCALE_DEPS.json file for testing only. //
+//////////////////////////////////////////////////////////////
+
+{
+    "cldrVersion": "36.1"
+}
diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json
new file mode 100644
index 0000000000..1456ea0d9a
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json
@@ -0,0 +1,197 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+//////////////////////////////////////////////////////////////
+// This is a sample LOCALE_DEPS.json file for testing only. //
+//////////////////////////////////////////////////////////////
+
+{
+    "cldrVersion": "36.1",
+    "aliases": {
+        "ars": "ar_SA",
+        "az_AZ": "az_Latn_AZ",
+        "bs_BA": "bs_Latn_BA",
+        "en_NH": "en_VU",
+        "en_RH": "en_ZW",
+        "ff_CM": "ff_Latn_CM",
+        "ff_GN": "ff_Latn_GN",
+        "ff_MR": "ff_Latn_MR",
+        "ff_SN": "ff_Latn_SN",
+        "in": "id",
+        "in_ID": "id_ID",
+        "iw": "he",
+        "iw_IL": "he_IL",
+        "mo": "ro",
+        "no": "nb",
+        "no_NO": "nb_NO",
+        "no_NO_NY": "nn_NO",
+        "pa_IN": "pa_Guru_IN",
+        "pa_PK": "pa_Arab_PK",
+        "sh": "sr_Latn",
+        "sh_BA": "sr_Latn_BA",
+        "sh_CS": "sr_Latn_RS",
+        "sh_YU": "sr_Latn_RS",
+        "shi_MA": "shi_Tfng_MA",
+        "sr_BA": "sr_Cyrl_BA",
+        "sr_CS": "sr_Cyrl_RS",
+        "sr_Cyrl_CS": "sr_Cyrl_RS",
+        "sr_Cyrl_YU": "sr_Cyrl_RS",
+        "sr_Latn_CS": "sr_Latn_RS",
+        "sr_Latn_YU": "sr_Latn_RS",
+        "sr_ME": "sr_Latn_ME",
+        "sr_RS": "sr_Cyrl_RS",
+        "sr_XK": "sr_Cyrl_XK",
+        "sr_YU": "sr_Cyrl_RS",
+        "tl": "fil",
+        "tl_PH": "fil_PH",
+        "uz_AF": "uz_Arab_AF",
+        "uz_UZ": "uz_Latn_UZ",
+        "vai_LR": "vai_Vaii_LR",
+        "yue_CN": "yue_Hans_CN",
+        "yue_HK": "yue_Hant_HK",
+        "zh_CN": "zh_Hans_CN",
+        "zh_HK": "zh_Hant_HK",
+        "zh_MO": "zh_Hant_MO",
+        "zh_SG": "zh_Hans_SG",
+        "zh_TW": "zh_Hant_TW"
+    },
+    "parents": {
+        "az_Cyrl": "root",
+        "bs_Cyrl": "root",
+        "en_150": "en_001",
+        "en_AG": "en_001",
+        "en_AI": "en_001",
+        "en_AT": "en_150",
+        "en_AU": "en_001",
+        "en_BB": "en_001",
+        "en_BE": "en_150",
+        "en_BM": "en_001",
+        "en_BS": "en_001",
+        "en_BW": "en_001",
+        "en_BZ": "en_001",
+        "en_CA": "en_001",
+        "en_CC": "en_001",
+        "en_CH": "en_150",
+        "en_CK": "en_001",
+        "en_CM": "en_001",
+        "en_CX": "en_001",
+        "en_CY": "en_001",
+        "en_DE": "en_150",
+        "en_DG": "en_001",
+        "en_DK": "en_150",
+        "en_DM": "en_001",
+        "en_ER": "en_001",
+        "en_FI": "en_150",
+        "en_FJ": "en_001",
+        "en_FK": "en_001",
+        "en_FM": "en_001",
+        "en_GB": "en_001",
+        "en_GD": "en_001",
+        "en_GG": "en_001",
+        "en_GH": "en_001",
+        "en_GI": "en_001",
+        "en_GM": "en_001",
+        "en_GY": "en_001",
+        "en_HK": "en_001",
+        "en_IE": "en_001",
+        "en_IL": "en_001",
+        "en_IM": "en_001",
+        "en_IN": "en_001",
+        "en_IO": "en_001",
+        "en_JE": "en_001",
+        "en_JM": "en_001",
+        "en_KE": "en_001",
+        "en_KI": "en_001",
+        "en_KN": "en_001",
+        "en_KY": "en_001",
+        "en_LC": "en_001",
+        "en_LR": "en_001",
+        "en_LS": "en_001",
+        "en_MG": "en_001",
+        "en_MO": "en_001",
+        "en_MS": "en_001",
+        "en_MT": "en_001",
+        "en_MU": "en_001",
+        "en_MW": "en_001",
+        "en_MY": "en_001",
+        "en_NA": "en_001",
+        "en_NF": "en_001",
+        "en_NG": "en_001",
+        "en_NL": "en_150",
+        "en_NR": "en_001",
+        "en_NU": "en_001",
+        "en_NZ": "en_001",
+        "en_PG": "en_001",
+        "en_PH": "en_001",
+        "en_PK": "en_001",
+        "en_PN": "en_001",
+        "en_PW": "en_001",
+        "en_RW": "en_001",
+        "en_SB": "en_001",
+        "en_SC": "en_001",
+        "en_SD": "en_001",
+        "en_SE": "en_150",
+        "en_SG": "en_001",
+        "en_SH": "en_001",
+        "en_SI": "en_150",
+        "en_SL": "en_001",
+        "en_SS": "en_001",
+        "en_SX": "en_001",
+        "en_SZ": "en_001",
+        "en_TC": "en_001",
+        "en_TK": "en_001",
+        "en_TO": "en_001",
+        "en_TT": "en_001",
+        "en_TV": "en_001",
+        "en_TZ": "en_001",
+        "en_UG": "en_001",
+        "en_VC": "en_001",
+        "en_VG": "en_001",
+        "en_VU": "en_001",
+        "en_WS": "en_001",
+        "en_ZA": "en_001",
+        "en_ZM": "en_001",
+        "en_ZW": "en_001",
+        "es_AR": "es_419",
+        "es_BO": "es_419",
+        "es_BR": "es_419",
+        "es_BZ": "es_419",
+        "es_CL": "es_419",
+        "es_CO": "es_419",
+        "es_CR": "es_419",
+        "es_CU": "es_419",
+        "es_DO": "es_419",
+        "es_EC": "es_419",
+        "es_GT": "es_419",
+        "es_HN": "es_419",
+        "es_MX": "es_419",
+        "es_NI": "es_419",
+        "es_PA": "es_419",
+        "es_PE": "es_419",
+        "es_PR": "es_419",
+        "es_PY": "es_419",
+        "es_SV": "es_419",
+        "es_US": "es_419",
+        "es_UY": "es_419",
+        "es_VE": "es_419",
+        "pa_Arab": "root",
+        "pt_AO": "pt_PT",
+        "pt_CH": "pt_PT",
+        "pt_CV": "pt_PT",
+        "pt_GQ": "pt_PT",
+        "pt_GW": "pt_PT",
+        "pt_LU": "pt_PT",
+        "pt_MO": "pt_PT",
+        "pt_MZ": "pt_PT",
+        "pt_ST": "pt_PT",
+        "pt_TL": "pt_PT",
+        "shi_Latn": "root",
+        "sr_Latn": "root",
+        "uz_Arab": "root",
+        "uz_Cyrl": "root",
+        "vai_Latn": "root",
+        "yue_Hans": "root",
+        "zh_Hant": "root",
+        "zh_Hant_MO": "zh_Hant_HK"
+    }
+}
diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json
new file mode 100644
index 0000000000..c6ec208add
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json
@@ -0,0 +1,36 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+//////////////////////////////////////////////////////////////
+// This is a sample LOCALE_DEPS.json file for testing only. //
+//////////////////////////////////////////////////////////////
+
+{
+    "cldrVersion": "36.1",
+    "aliases": {
+        "ars": "ar_SA",
+        "in": "id",
+        "iw": "he",
+        "no": "nb",
+        "sh": "sr_Latn",
+        "zh_HK": "zh_Hant_HK",
+        "zh_Hant_HK": "yue",
+        "zh_MO": "zh_Hant_MO",
+        "zh_TW": "zh_Hant_TW"
+    },
+    "parents": {
+        "en_IN": "en_001",
+        "es_DO": "es_419",
+        "es_GT": "es_419",
+        "es_HN": "es_419",
+        "es_MX": "es_419",
+        "es_NI": "es_419",
+        "es_PA": "es_419",
+        "es_PR": "es_419",
+        "es_SV": "es_419",
+        "es_US": "es_419",
+        "sr_Latn": "root",
+        "yue_Hans": "root",
+        "zh_Hant": "root"
+    }
+}
diff --git a/intl/icu/source/python/icutools/databuilder/utils.py b/intl/icu/source/python/icutools/databuilder/utils.py
new file mode 100644
index 0000000000..3d53d18fae
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/utils.py
@@ -0,0 +1,143 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# Python 2/3 Compatibility (ICU-20299)
+# TODO(ICU-20301): Remove this.
+from __future__ import print_function
+
+import sys
+
+from . import *
+
+
+def dir_for(file):
+    if isinstance(file, LocalFile):
+        return get_local_dirname(file.dirname)
+    if isinstance(file, SrcFile):
+        return "{SRC_DIR}"
+    if isinstance(file, InFile):
+        return "{IN_DIR}"
+    if isinstance(file, TmpFile):
+        return "{TMP_DIR}"
+    if isinstance(file, OutFile):
+        return "{OUT_DIR}"
+    if isinstance(file, PkgFile):
+        return "{PKG_DIR}"
+    assert False
+
+
+LOCAL_DIRNAME_SUBSTITUTIONS = {
+    "SRC": "{SRC_DIR}",
+    "FILTERS": "{FILTERS_DIR}",
+    "CWD": "{CWD_DIR}"
+}
+
+
+def get_local_dirname(dirname):
+    if dirname.startswith("/"):
+        return dirname
+    elif dirname.startswith("$"):
+        # Note: directory separator substitution happens later
+        sep_idx = dirname.find("/")
+        if sep_idx == -1:
+            sep_idx = len(dirname)
+        variable = dirname[1:sep_idx]
+        if variable in LOCAL_DIRNAME_SUBSTITUTIONS:
+            return LOCAL_DIRNAME_SUBSTITUTIONS[variable] + dirname[sep_idx:]
+    print(
+        "Error: Local directory must be absolute, or relative to one of: " +
+        (", ".join("$%s" % v for v in LOCAL_DIRNAME_SUBSTITUTIONS.keys())),
+        file=sys.stderr
+    )
+    exit(1)
+
+
+ALL_TREES = [
+    "locales",
+    "curr",
+    "lang",
+    "region",
+    "zone",
+    "unit",
+    "coll",
+    "brkitr",
+    "rbnf",
+]
+
+
+def concat_dicts(*dicts):
+    # There is not a super great way to do this in Python:
+    new_dict = {}
+    for dict in dicts:
+        new_dict.update(dict)
+    return new_dict
+
+
+def repeated_execution_request_looper(request):
+    # dictionary of lists to list of dictionaries:
+    ld = [
+        dict(zip(request.repeat_with, t))
+        for t in zip(*request.repeat_with.values())
+    ]
+    if not ld:
+        # No special options given in repeat_with
+        ld = [{} for _ in range(len(request.input_files))]
+    return zip(ld, request.specific_dep_files, request.input_files, request.output_files)
+
+
+def format_single_request_command(request, cmd_template, common_vars):
+    return cmd_template.format(
+        ARGS = request.args.format(
+            INPUT_FILES = [file.filename for file in request.input_files],
+            OUTPUT_FILES = [file.filename for file in request.output_files],
+            **concat_dicts(common_vars, request.format_with)
+        )
+    )
+
+
+def format_repeated_request_command(request, cmd_template, loop_vars, common_vars):
+    (iter_vars, _, input_file, output_file) = loop_vars
+    return cmd_template.format(
+        ARGS = request.args.format(
+            INPUT_FILE = input_file.filename,
+            OUTPUT_FILE = output_file.filename,
+            **concat_dicts(common_vars, request.format_with, iter_vars)
+        )
+    )
+
+
+def flatten_requests(requests, config, common_vars):
+    result = []
+    for request in requests:
+        result += request.flatten(config, requests, common_vars)
+    return result
+
+
+def get_all_output_files(requests, include_tmp=False):
+    files = []
+    for request in requests:
+        files += request.all_output_files()
+
+    # Filter out all files but those in OUT_DIR if necessary.
+    # It is also easy to filter for uniqueness; do it right now and return.
+    if not include_tmp:
+        files = (file for file in files if isinstance(file, OutFile))
+        return list(set(files))
+
+    # Filter for unique values.  NOTE: Cannot use set() because we need to accept same filename as
+    # OutFile and TmpFile as different, and by default they evaluate as equal.
+    return [f for _, f in set((type(f), f) for f in files)]
+
+
+def compute_directories(requests):
+    dirs = set()
+    for file in get_all_output_files(requests, include_tmp=True):
+        path = "%s/%s" % (dir_for(file), file.filename)
+        dirs.add(path[:path.rfind("/")])
+    return list(sorted(dirs))
+
+
+class SpaceSeparatedList(list):
+    """A list that joins itself with spaces when converted to a string."""
+    def __str__(self):
+        return " ".join(self)
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
commit	36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree	105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/icu/source/python
parent	Initial commit. (diff)
download	firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip