# Copyright (C) 2018 and later: Unicode, Inc. and others. # License & terms of use: http://www.unicode.org/copyright.html # Python 2/3 Compatibility (ICU-20299) # TODO(ICU-20301): Remove this. from __future__ import print_function import argparse import glob as pyglob import io as pyio import json import os import sys from . import * from .comment_stripper import CommentStripper from .request_types import CopyRequest from .renderers import makefile, common_exec from . import filtration, utils flag_parser = argparse.ArgumentParser( description = """Generates rules for building ICU binary data files from text and other input files in source control. Use the --mode option to declare how to execute those rules, either exporting the rules to a Makefile or spawning child processes to run them immediately: --mode=gnumake prints a Makefile to standard out. --mode=unix-exec spawns child processes in a Unix-like environment. --mode=windows-exec spawns child processes in a Windows-like environment. Tips for --mode=unix-exec ========================= Create two empty directories for out_dir and tmp_dir. They will get filled with a lot of intermediate files. Set LD_LIBRARY_PATH to include the lib directory. e.g., from icu4c/source: $ LD_LIBRARY_PATH=lib PYTHONPATH=python python3 -m icutools.databuilder ... Once icutools.databuilder finishes, you have compiled the data, but you have not packaged it into a .dat or .so file. This is done by the separate pkgdata tool in bin. Read the docs of pkgdata: $ LD_LIBRARY_PATH=lib ./bin/pkgdata --help Example command line to call pkgdata: $ LD_LIBRARY_PATH=lib ./bin/pkgdata -m common -p icudt63l -c \\ -O data/icupkg.inc -s $OUTDIR -d $TMPDIR $TMPDIR/icudata.lst where $OUTDIR and $TMPDIR are your out and tmp directories, respectively. The above command will create icudt63l.dat in the tmpdir. Command-Line Arguments ====================== """, formatter_class = argparse.RawDescriptionHelpFormatter ) arg_group_required = flag_parser.add_argument_group("required arguments") arg_group_required.add_argument( "--mode", help = "What to do with the generated rules.", choices = ["gnumake", "unix-exec", "windows-exec", "bazel-exec"], required = True ) flag_parser.add_argument( "--src_dir", help = "Path to data source folder (icu4c/source/data).", default = "." ) flag_parser.add_argument( "--filter_file", metavar = "PATH", help = "Path to an ICU data filter JSON file.", default = None ) flag_parser.add_argument( "--include_uni_core_data", help = "Include the full Unicode core data in the dat file.", default = False, action = "store_true" ) flag_parser.add_argument( "--seqmode", help = "Whether to optimize rules to be run sequentially (fewer threads) or in parallel (many threads). Defaults to 'sequential', which is better for unix-exec and windows-exec modes. 'parallel' is often better for massively parallel build systems.", choices = ["sequential", "parallel"], default = "sequential" ) flag_parser.add_argument( "--verbose", help = "Print more verbose output (default false).", default = False, action = "store_true" ) arg_group_exec = flag_parser.add_argument_group("arguments for unix-exec and windows-exec modes") arg_group_exec.add_argument( "--out_dir", help = "Path to where to save output data files.", default = "icudata" ) arg_group_exec.add_argument( "--tmp_dir", help = "Path to where to save temporary files.", default = "icutmp" ) arg_group_exec.add_argument( "--tool_dir", help = "Path to where to find binary tools (genrb, etc).", default = "../bin" ) arg_group_exec.add_argument( "--tool_cfg", help = "The build configuration of the tools. Used in 'windows-exec' mode only.", default = "x86/Debug" ) class Config(object): def __init__(self, args): # Process arguments self.max_parallel = (args.seqmode == "parallel") # Boolean: Whether to include core Unicode data files in the .dat file self.include_uni_core_data = args.include_uni_core_data # Default fields before processing filter file self.filters_json_data = {} self.filter_dir = "ERROR_NO_FILTER_FILE" # Process filter file if args.filter_file: try: with open(args.filter_file, "r") as f: print("Note: Applying filters from %s." % args.filter_file, file=sys.stderr) self._parse_filter_file(f) except IOError: print("Error: Could not read filter file %s." % args.filter_file, file=sys.stderr) exit(1) self.filter_dir = os.path.abspath(os.path.dirname(args.filter_file)) # Either "unihan" or "implicithan" self.coll_han_type = "unihan" if "collationUCAData" in self.filters_json_data: self.coll_han_type = self.filters_json_data["collationUCAData"] # Either "additive" or "subtractive" self.strategy = "subtractive" if "strategy" in self.filters_json_data: self.strategy = self.filters_json_data["strategy"] # True or False (could be extended later to support enum/list) self.use_pool_bundle = True if "usePoolBundle" in self.filters_json_data: self.use_pool_bundle = self.filters_json_data["usePoolBundle"] # By default, exclude collation data that mimics the order of some large legacy charsets. # We do this in "subtractive" strategy by inserting a resourceFilter. # Later rules from an explicit filter file may override this default behavior. # (In "additive" strategy this is unnecessary.) if self.strategy == "subtractive": filters = self.filters_json_data.setdefault("resourceFilters", []) omit_charset_collations = { "categories": [ "coll_tree" ], "rules": [ "-/collations/big5han", "-/collations/gb2312han" ] } filters.insert(0, omit_charset_collations) def _parse_filter_file(self, f): # Use the Hjson parser if it is available; otherwise, use vanilla JSON. try: import hjson self.filters_json_data = hjson.load(f) except ImportError: self.filters_json_data = json.load(CommentStripper(f)) # Optionally pre-validate the JSON schema before further processing. # Some schema errors will be caught later, but this step ensures # maximal validity. try: import jsonschema schema_path = os.path.join(os.path.dirname(__file__), "filtration_schema.json") with open(schema_path) as schema_f: schema = json.load(CommentStripper(schema_f)) validator = jsonschema.Draft4Validator(schema) for error in validator.iter_errors(self.filters_json_data, schema): print("WARNING: ICU data filter JSON file:", error.message, "at", "".join( "[%d]" % part if isinstance(part, int) else ".%s" % part for part in error.absolute_path ), file=sys.stderr) except ImportError: print("Tip: to validate your filter file, install the Pip package 'jsonschema'", file=sys.stderr) pass def add_copy_input_requests(requests, config, common_vars): files_to_copy = set() for request in requests: request_files = request.all_input_files() # Also add known dependency txt files as possible inputs. # This is required for translit rule files. if hasattr(request, "dep_targets"): request_files += [ f for f in request.dep_targets if isinstance(f, InFile) ] for f in request_files: if isinstance(f, InFile): files_to_copy.add(f) result = [] id = 0 json_data = config.filters_json_data["fileReplacements"] dirname = json_data["directory"] for directive in json_data["replacements"]: if type(directive) == str: input_file = LocalFile(dirname, directive) output_file = InFile(directive) else: input_file = LocalFile(dirname, directive["src"]) output_file = InFile(directive["dest"]) result += [ CopyRequest( name = "input_copy_%d" % id, input_file = input_file, output_file = output_file ) ] files_to_copy.remove(output_file) id += 1 for f in files_to_copy: result += [ CopyRequest( name = "input_copy_%d" % id, input_file = SrcFile(f.filename), output_file = f ) ] id += 1 result += requests return result class IO(object): """I/O operations required when computing the build actions""" def __init__(self, src_dir): self.src_dir = src_dir def glob(self, pattern): absolute_paths = pyglob.glob(os.path.join(self.src_dir, pattern)) # Strip off the absolute path suffix so we are left with a relative path. relative_paths = [v[len(self.src_dir)+1:] for v in sorted(absolute_paths)] # For the purposes of icutools.databuilder, force Unix-style directory separators. # Within the Python code, including BUILDRULES.py and user-provided config files, # directory separators are normalized to '/', including on Windows platforms. return [v.replace("\\", "/") for v in relative_paths] def read_locale_deps(self, tree): return self._read_json("%s/LOCALE_DEPS.json" % tree) def _read_json(self, filename): with pyio.open(os.path.join(self.src_dir, filename), "r", encoding="utf-8-sig") as f: return json.load(CommentStripper(f)) def main(argv): args = flag_parser.parse_args(argv) config = Config(args) if args.mode == "gnumake": makefile_vars = { "SRC_DIR": "$(srcdir)", "IN_DIR": "$(srcdir)", "INDEX_NAME": "res_index" } makefile_env = ["ICUDATA_CHAR", "OUT_DIR", "TMP_DIR"] common = { key: "$(%s)" % key for key in list(makefile_vars.keys()) + makefile_env } common["FILTERS_DIR"] = config.filter_dir common["CWD_DIR"] = os.getcwd() else: makefile_vars = None common = { "SRC_DIR": args.src_dir, "IN_DIR": args.src_dir, "OUT_DIR": args.out_dir, "TMP_DIR": args.tmp_dir, "FILTERS_DIR": config.filter_dir, "CWD_DIR": os.getcwd(), "INDEX_NAME": "res_index", # TODO: Pull this from configure script: "ICUDATA_CHAR": "l" } # Automatically load BUILDRULES from the src_dir sys.path.append(args.src_dir) try: import BUILDRULES except ImportError: print("Cannot find BUILDRULES! Did you set your --src_dir?", file=sys.stderr) sys.exit(1) io = IO(args.src_dir) requests = BUILDRULES.generate(config, io, common) if "fileReplacements" in config.filters_json_data: tmp_in_dir = "{TMP_DIR}/in".format(**common) if makefile_vars: makefile_vars["IN_DIR"] = tmp_in_dir else: common["IN_DIR"] = tmp_in_dir requests = add_copy_input_requests(requests, config, common) requests = filtration.apply_filters(requests, config, io) requests = utils.flatten_requests(requests, config, common) build_dirs = utils.compute_directories(requests) if args.mode == "gnumake": print(makefile.get_gnumake_rules( build_dirs, requests, makefile_vars, common_vars = common )) elif args.mode == "windows-exec": return common_exec.run( platform = "windows", build_dirs = build_dirs, requests = requests, common_vars = common, tool_dir = args.tool_dir, tool_cfg = args.tool_cfg, verbose = args.verbose, ) elif args.mode == "unix-exec": return common_exec.run( platform = "unix", build_dirs = build_dirs, requests = requests, common_vars = common, tool_dir = args.tool_dir, verbose = args.verbose, ) elif args.mode == "bazel-exec": return common_exec.run( platform = "bazel", build_dirs = build_dirs, requests = requests, common_vars = common, tool_dir = args.tool_dir, verbose = args.verbose, ) else: print("Mode not supported: %s" % args.mode) return 1 return 0 if __name__ == "__main__": exit(main(sys.argv[1:]))