diff options
Diffstat (limited to '')
-rw-r--r-- | python/mozbuild/mozbuild/vendor/vendor_manifest.py | 789 |
1 files changed, 789 insertions, 0 deletions
diff --git a/python/mozbuild/mozbuild/vendor/vendor_manifest.py b/python/mozbuild/mozbuild/vendor/vendor_manifest.py new file mode 100644 index 0000000000..9de2c23e95 --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/vendor_manifest.py @@ -0,0 +1,789 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, # You can obtain one at http://mozilla.org/MPL/2.0/. + +import functools +import glob +import logging +import os +import re +import shutil +import stat +import sys +import tarfile +import tempfile +from collections import defaultdict + +import mozfile +import mozpack.path as mozpath +import requests + +from mozbuild.base import MozbuildObject +from mozbuild.vendor.rewrite_mozbuild import ( + MozBuildRewriteException, + add_file_to_moz_build_file, + remove_file_from_moz_build_file, +) + +DEFAULT_EXCLUDE_FILES = [".git*", ".git*/**"] +DEFAULT_KEEP_FILES = ["**/moz.build", "**/moz.yaml"] +DEFAULT_INCLUDE_FILES = [] + + +def throwe(): + raise Exception + + +def _replace_in_file(file, pattern, replacement, regex=False): + with open(file) as f: + contents = f.read() + + if regex: + newcontents = re.sub(pattern, replacement, contents) + else: + newcontents = contents.replace(pattern, replacement) + + if newcontents == contents: + raise Exception( + "Could not find '%s' in %s to %sreplace with '%s'" + % (pattern, file, "regex-" if regex else "", replacement) + ) + + with open(file, "w") as f: + f.write(newcontents) + + +def list_of_paths_to_readable_string(paths): + # From https://stackoverflow.com/a/41578071 + dic = defaultdict(list) + for item in paths: + if os.path.isdir(item): # To check path is a directory + _ = dic[item] # will set default value as empty list + else: + path, file = os.path.split(item) + dic[path].append(file) + + final_string = "[" + for key, val in dic.items(): + if len(val) == 0: + final_string += key + ", " + elif len(val) < 3: + final_string += ", ".join([os.path.join(key, v) for v in val]) + ", " + elif len(val) < 10: + final_string += "%s items in %s: %s and %s, " % ( + len(val), + key, + ", ".join(val[0:-1]), + val[-1], + ) + else: + final_string += "%s (omitted) items in %s, " % (len(val), key) + + if final_string[-2:] == ", ": + final_string = final_string[:-2] + + return final_string + "]" + + +class VendorManifest(MozbuildObject): + def should_perform_step(self, step): + return step not in self.manifest["vendoring"].get("skip-vendoring-steps", []) + + def vendor( + self, + command_context, + yaml_file, + manifest, + revision, + ignore_modified, + check_for_update, + force, + add_to_exports, + patch_mode, + ): + self.manifest = manifest + self.yaml_file = yaml_file + self._extract_directory = throwe + self.logInfo = functools.partial(self.log, logging.INFO, "vendor") + if "vendor-directory" not in self.manifest["vendoring"]: + self.manifest["vendoring"]["vendor-directory"] = os.path.dirname( + self.yaml_file + ) + + # ========================================================== + # If we're only patching; do that + if "patches" in self.manifest["vendoring"] and patch_mode == "only": + self.import_local_patches( + self.manifest["vendoring"]["patches"], + os.path.dirname(self.yaml_file), + self.manifest["vendoring"]["vendor-directory"], + ) + return + + # ========================================================== + self.source_host = self.get_source_host() + + ref_type = self.manifest["vendoring"].get("tracking", "commit") + flavor = self.manifest["vendoring"].get("flavor", "regular") + # Individiual files are special + + if revision == "tip": + # This case allows us to force-update a tag-tracking library to master + new_revision, timestamp = self.source_host.upstream_commit("HEAD") + elif ref_type == "tag": + new_revision, timestamp = self.source_host.upstream_tag(revision) + else: + new_revision, timestamp = self.source_host.upstream_commit(revision) + + self.logInfo( + {"ref_type": ref_type, "ref": new_revision, "timestamp": timestamp}, + "Latest {ref_type} is {ref} from {timestamp}", + ) + + # ========================================================== + if not force and self.manifest["origin"]["revision"] == new_revision: + # We're up to date, don't do anything + self.logInfo({}, "Latest upstream matches in-tree.") + return + elif flavor != "individual-file" and check_for_update: + # Only print the new revision to stdout + print("%s %s" % (new_revision, timestamp)) + return + + # ========================================================== + if flavor == "regular": + self.process_regular( + new_revision, timestamp, ignore_modified, add_to_exports + ) + elif flavor == "individual-files": + self.process_individual(new_revision, timestamp, ignore_modified) + elif flavor == "rust": + self.process_rust( + command_context, + self.manifest["origin"]["revision"], + new_revision, + timestamp, + ignore_modified, + ) + else: + raise Exception("Unknown flavor") + + def process_rust( + self, command_context, old_revision, new_revision, timestamp, ignore_modified + ): + # First update the Cargo.toml + cargo_file = os.path.join(os.path.dirname(self.yaml_file), "Cargo.toml") + try: + _replace_in_file(cargo_file, old_revision, new_revision) + except Exception: + # If we can't find it the first time, try again with a short hash + _replace_in_file(cargo_file, old_revision[:8], new_revision) + + # Then call ./mach vendor rust + from mozbuild.vendor.vendor_rust import VendorRust + + vendor_command = command_context._spawn(VendorRust) + vendor_command.vendor( + ignore_modified=True, build_peers_said_large_imports_were_ok=False + ) + + self.update_yaml(new_revision, timestamp) + + def process_individual(self, new_revision, timestamp, ignore_modified): + # This design is used because there is no github API to query + # for the last commit that modified a file; nor a way to get file + # blame. So really all we can do is just download and replace the + # files and see if they changed... + + def download_and_write_file(url, destination): + self.logInfo( + {"local_file": destination, "url": url}, + "Downloading {local_file} from {url}...", + ) + + with mozfile.NamedTemporaryFile() as tmpfile: + try: + req = requests.get(url, stream=True) + for data in req.iter_content(4096): + tmpfile.write(data) + tmpfile.seek(0) + + shutil.copy2(tmpfile.name, destination) + except Exception as e: + raise (e) + + # Only one of these loops will have content, so just do them both + for f in self.manifest["vendoring"].get("individual-files", []): + url = self.source_host.upstream_path_to_file(new_revision, f["upstream"]) + destination = self.get_full_path(f["destination"]) + download_and_write_file(url, destination) + + for f in self.manifest["vendoring"].get("individual-files-list", []): + url = self.source_host.upstream_path_to_file( + new_revision, + self.manifest["vendoring"]["individual-files-default-upstream"] + f, + ) + destination = self.get_full_path( + self.manifest["vendoring"]["individual-files-default-destination"] + f + ) + download_and_write_file(url, destination) + + self.spurious_check(new_revision, ignore_modified) + + self.logInfo({}, "Checking for update actions") + self.update_files(new_revision) + + self.update_yaml(new_revision, timestamp) + + self.logInfo({"rev": new_revision}, "Updated to '{rev}'.") + + if "patches" in self.manifest["vendoring"]: + # Remind the user + self.log( + logging.CRITICAL, + "vendor", + {}, + "Patches present in manifest!!! Please run " + "'./mach vendor --patch-mode only' after commiting changes.", + ) + + def process_regular(self, new_revision, timestamp, ignore_modified, add_to_exports): + + if self.should_perform_step("fetch"): + self.fetch_and_unpack(new_revision) + else: + self.logInfo({}, "Skipping fetching upstream source.") + + self.logInfo({}, "Checking for update actions") + self.update_files(new_revision) + + if self.should_perform_step("hg-add"): + self.logInfo({}, "Registering changes with version control.") + self.repository.add_remove_files( + self.manifest["vendoring"]["vendor-directory"], + os.path.dirname(self.yaml_file), + ) + else: + self.logInfo({}, "Skipping registering changes.") + + if self.should_perform_step("spurious-check"): + self.logInfo({}, "Checking for a spurious update.") + self.spurious_check(new_revision, ignore_modified) + else: + self.logInfo({}, "Skipping the spurious update check.") + + if self.should_perform_step("update-moz-yaml"): + self.logInfo({}, "Updating moz.yaml.") + self.update_yaml(new_revision, timestamp) + else: + self.logInfo({}, "Skipping updating the moz.yaml file.") + + if self.should_perform_step("update-moz-build"): + self.logInfo({}, "Updating moz.build files") + self.update_moz_build( + self.manifest["vendoring"]["vendor-directory"], + os.path.dirname(self.yaml_file), + add_to_exports, + ) + else: + self.logInfo({}, "Skipping update of moz.build files") + + self.logInfo({"rev": new_revision}, "Updated to '{rev}'.") + + if "patches" in self.manifest["vendoring"]: + # Remind the user + self.log( + logging.CRITICAL, + "vendor", + {}, + "Patches present in manifest!!! Please run " + "'./mach vendor --patch-mode only' after commiting changes.", + ) + + def get_source_host(self): + if self.manifest["vendoring"]["source-hosting"] == "gitlab": + from mozbuild.vendor.host_gitlab import GitLabHost + + return GitLabHost(self.manifest) + elif self.manifest["vendoring"]["source-hosting"] == "github": + from mozbuild.vendor.host_github import GitHubHost + + return GitHubHost(self.manifest) + elif self.manifest["vendoring"]["source-hosting"] == "googlesource": + from mozbuild.vendor.host_googlesource import GoogleSourceHost + + return GoogleSourceHost(self.manifest) + elif self.manifest["vendoring"]["source-hosting"] == "angle": + from mozbuild.vendor.host_angle import AngleHost + + return AngleHost(self.manifest) + elif self.manifest["vendoring"]["source-hosting"] == "codeberg": + from mozbuild.vendor.host_codeberg import CodebergHost + + return CodebergHost(self.manifest) + else: + raise Exception( + "Unknown source host: " + self.manifest["vendoring"]["source-hosting"] + ) + + def get_full_path(self, path, support_cwd=False): + if support_cwd and path[0:5] == "{cwd}": + path = path.replace("{cwd}", ".") + elif "{tmpextractdir}" in path: + # _extract_directory() will throw an exception if it is invalid to use it + path = path.replace("{tmpextractdir}", self._extract_directory()) + elif "{yaml_dir}" in path: + path = path.replace("{yaml_dir}", os.path.dirname(self.yaml_file)) + elif "{vendor_dir}" in path: + path = path.replace( + "{vendor_dir}", self.manifest["vendoring"]["vendor-directory"] + ) + else: + path = mozpath.join(self.manifest["vendoring"]["vendor-directory"], path) + return os.path.abspath(path) + + def convert_patterns_to_paths(self, directory, patterns): + # glob.iglob uses shell-style wildcards for path name completion. + # "recursive=True" enables the double asterisk "**" wildcard which matches + # for nested directories as well as the directory we're searching in. + paths = [] + for pattern in patterns: + pattern_full_path = mozpath.join(directory, pattern) + # If pattern is a directory recursively add contents of directory + if os.path.isdir(pattern_full_path): + # Append double asterisk to the end to make glob.iglob recursively match + # contents of directory + paths.extend( + glob.iglob(mozpath.join(pattern_full_path, "**"), recursive=True) + ) + # Otherwise pattern is a file or wildcard expression so add it without altering it + else: + paths.extend(glob.iglob(pattern_full_path, recursive=True)) + # Remove folder names from list of paths in order to avoid prematurely + # truncating directories elsewhere + # Sort the final list to ensure we preserve 01_, 02_ ordering for e.g. *.patch globs + final_paths = sorted( + [mozpath.normsep(path) for path in paths if not os.path.isdir(path)] + ) + return final_paths + + def fetch_and_unpack(self, revision): + """Fetch and unpack upstream source""" + + def validate_tar_member(member, path): + def is_within_directory(directory, target): + real_directory = os.path.realpath(directory) + real_target = os.path.realpath(target) + prefix = os.path.commonprefix([real_directory, real_target]) + return prefix == real_directory + + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted path traversal in tar file: " + member.name) + if member.issym(): + link_path = os.path.join(os.path.dirname(member_path), member.linkname) + if not is_within_directory(path, link_path): + raise Exception( + "Attempted link path traversal in tar file: " + member.name + ) + if member.mode & (stat.S_ISUID | stat.S_ISGID): + raise Exception( + "Attempted setuid or setgid in tar file: " + member.name + ) + + def safe_extract(tar, path=".", *, numeric_owner=False): + def _files(tar, path): + for member in tar: + validate_tar_member(member, path) + yield member + + tar.extractall(path, members=_files(tar, path), numeric_owner=numeric_owner) + + url = self.source_host.upstream_snapshot(revision) + self.logInfo({"url": url}, "Fetching code archive from {url}") + + with mozfile.NamedTemporaryFile() as tmptarfile: + tmpextractdir = tempfile.TemporaryDirectory() + try: + req = requests.get(url, stream=True) + for data in req.iter_content(4096): + tmptarfile.write(data) + tmptarfile.seek(0) + + vendor_dir = mozpath.normsep( + self.manifest["vendoring"]["vendor-directory"] + ) + if self.should_perform_step("keep"): + self.logInfo({}, "Retaining wanted in-tree files.") + to_keep = self.convert_patterns_to_paths( + vendor_dir, + self.manifest["vendoring"].get("keep", []) + + DEFAULT_KEEP_FILES + + self.manifest["vendoring"].get("patches", []), + ) + else: + self.logInfo({}, "Skipping retention of in-tree files.") + to_keep = [] + + self.logInfo({"vd": vendor_dir}, "Cleaning {vd} to import changes.") + # We use double asterisk wildcard here to get complete list of recursive contents + for file in self.convert_patterns_to_paths(vendor_dir, ["**"]): + file = mozpath.normsep(file) + if file not in to_keep: + mozfile.remove(file) + + self.logInfo({"vd": vendor_dir}, "Unpacking upstream files for {vd}.") + with tarfile.open(tmptarfile.name) as tar: + + safe_extract(tar, tmpextractdir.name) + + def get_first_dir(p): + halves = os.path.split(p) + return get_first_dir(halves[0]) if halves[0] else halves[1] + + one_prefix = get_first_dir(tar.getnames()[0]) + has_prefix = all( + map(lambda name: name.startswith(one_prefix), tar.getnames()) + ) + + # GitLab puts everything down a directory; move it up. + if has_prefix: + tardir = mozpath.join(tmpextractdir.name, one_prefix) + mozfile.copy_contents(tardir, tmpextractdir.name) + mozfile.remove(tardir) + + if self.should_perform_step("include"): + self.logInfo({}, "Retaining wanted files from upstream changes.") + to_include = self.convert_patterns_to_paths( + tmpextractdir.name, + self.manifest["vendoring"].get("include", []) + + DEFAULT_INCLUDE_FILES, + ) + else: + self.logInfo({}, "Skipping retention of included files.") + to_include = [] + + if self.should_perform_step("exclude"): + self.logInfo({}, "Removing excluded files from upstream changes.") + to_exclude = self.convert_patterns_to_paths( + tmpextractdir.name, + self.manifest["vendoring"].get("exclude", []) + + DEFAULT_EXCLUDE_FILES, + ) + else: + self.logInfo({}, "Skipping removing excluded files.") + to_exclude = [] + + to_exclude = list(set(to_exclude) - set(to_include)) + if to_exclude: + self.logInfo( + {"files": list_of_paths_to_readable_string(to_exclude)}, + "Removing: {files}", + ) + for exclusion in to_exclude: + mozfile.remove(exclusion) + + # Clear out empty directories + # removeEmpty() won't remove directories containing only empty directories + # so just keep callign it as long as it's doing something + def removeEmpty(tmpextractdir): + removed = False + folders = list(os.walk(tmpextractdir))[1:] + for folder in folders: + if not folder[2]: + try: + os.rmdir(folder[0]) + removed = True + except Exception: + pass + return removed + + while removeEmpty(tmpextractdir.name): + pass + + # Then copy over the directories + if self.should_perform_step("move-contents"): + self.logInfo({"d": vendor_dir}, "Copying to {d}.") + mozfile.copy_contents(tmpextractdir.name, vendor_dir) + else: + self.logInfo({}, "Skipping copying contents into tree.") + self._extract_directory = lambda: tmpextractdir.name + except Exception as e: + tmpextractdir.cleanup() + raise e + + def update_yaml(self, revision, timestamp): + with open(self.yaml_file) as f: + yaml = f.readlines() + + replaced = 0 + replacements = [ + [" release:", " %s (%s)." % (revision, timestamp)], + [" revision:", " %s" % (revision)], + ] + + for i in range(0, len(yaml)): + l = yaml[i] + + for r in replacements: + if r[0] in l: + print("Found " + l) + replaced += 1 + yaml[i] = re.sub(r[0] + " [v\.a-f0-9]+.*$", r[0] + r[1], yaml[i]) + + assert len(replacements) == replaced + + with open(self.yaml_file, "wb") as f: + f.write(("".join(yaml)).encode("utf-8")) + + def spurious_check(self, revision, ignore_modified): + changed_files = set( + [ + os.path.abspath(f) + for f in self.repository.get_changed_files(mode="staged") + ] + ) + generated_files = set( + [ + self.get_full_path(f) + for f in self.manifest["vendoring"].get("generated", []) + ] + ) + changed_files = set(changed_files) - generated_files + if not changed_files: + self.logInfo({"r": revision}, "Upstream {r} hasn't modified files locally.") + # We almost certainly won't be here if ignore_modified was passed, because a modified + # local file will show up as a changed_file, but we'll be safe anyway. + if not ignore_modified and generated_files: + for g in generated_files: + self.repository.clean_directory(g) + elif generated_files: + self.log( + logging.CRITICAL, + "vendor", + {"files": generated_files}, + "Because you passed --ignore-modified we are not cleaning your" + + " working directory, but the following files were probably" + + " spuriously edited and can be reverted: {files}", + ) + sys.exit(-2) + + self.logInfo( + {"rev": revision, "num": len(changed_files)}, + "Version '{rev}' has changed {num} files.", + ) + + def update_files(self, revision): + if "update-actions" not in self.manifest["vendoring"]: + return + + for update in self.manifest["vendoring"]["update-actions"]: + if update["action"] == "copy-file": + src = self.get_full_path(update["from"]) + dst = self.get_full_path(update["to"]) + + self.logInfo( + {"s": src, "d": dst}, "action: copy-file src: {s} dst: {d}" + ) + + with open(src) as f: + contents = f.read() + with open(dst, "w") as f: + f.write(contents) + elif update["action"] == "move-file": + src = self.get_full_path(update["from"]) + dst = self.get_full_path(update["to"]) + + self.logInfo( + {"s": src, "d": dst}, "action: move-file src: {s} dst: {d}" + ) + + shutil.move(src, dst) + elif update["action"] == "move-dir": + src = self.get_full_path(update["from"]) + dst = self.get_full_path(update["to"]) + + self.logInfo( + {"src": src, "dst": dst}, "action: move-dir src: {src} dst: {dst}" + ) + + if not os.path.isdir(src): + raise Exception( + "Cannot move from a source directory %s that is not a directory" + % src + ) + os.makedirs(dst, exist_ok=True) + + def copy_tree(src, dst): + names = os.listdir(src) + os.makedirs(dst, exist_ok=True) + + for name in names: + srcname = os.path.join(src, name) + dstname = os.path.join(dst, name) + + if os.path.isdir(srcname): + copy_tree(srcname, dstname) + else: + shutil.copy2(srcname, dstname) + + copy_tree(src, dst) + shutil.rmtree(src) + + elif update["action"] in ["replace-in-file", "replace-in-file-regex"]: + file = self.get_full_path(update["file"]) + + self.logInfo({"file": file}, "action: replace-in-file file: {file}") + + replacement = update["with"].replace("{revision}", revision) + _replace_in_file( + file, + update["pattern"], + replacement, + regex=update["action"] == "replace-in-file-regex", + ) + elif update["action"] == "delete-path": + path = self.get_full_path(update["path"]) + self.logInfo({"path": path}, "action: delete-path path: {path}") + mozfile.remove(path) + elif update["action"] in ["run-script", "run-command"]: + if update["action"] == "run-script": + command = self.get_full_path(update["script"], support_cwd=True) + else: + command = update["command"] + + run_dir = self.get_full_path(update["cwd"], support_cwd=True) + + args = [] + for a in update.get("args", []): + if a == "{revision}": + args.append(revision) + elif any( + s in a + for s in [ + "{cwd}", + "{vendor_dir}", + "{yaml_dir}", + "{tmpextractdir}", + ] + ): + args.append(self.get_full_path(a, support_cwd=True)) + else: + args.append(a) + + self.logInfo( + { + "command": command, + "run_dir": run_dir, + "args": args, + "type": update["action"], + }, + "action: {type} command: {command} working dir: {run_dir} args: {args}", + ) + extra_env = ( + {"GECKO_PATH": os.getcwd()} + if "GECKO_PATH" not in os.environ + else {} + ) + # We also add a signal to scripts that they are running under mach vendor + extra_env["MACH_VENDOR"] = "1" + self.run_process( + args=[command] + args, + cwd=run_dir, + log_name=command, + require_unix_environment=True, + append_env=extra_env, + ) + else: + assert False, "Unknown action supplied (how did this pass validation?)" + + def update_moz_build(self, vendoring_dir, moz_yaml_dir, add_to_exports): + if vendoring_dir == moz_yaml_dir: + vendoring_dir = moz_yaml_dir = None + + # If you edit this (especially for header files) you should double check + # rewrite_mozbuild.py around 'assignment_type' + source_suffixes = [".cc", ".c", ".cpp", ".S", ".asm"] + header_suffixes = [".h", ".hpp"] + + files_removed = self.repository.get_changed_files(diff_filter="D") + files_added = self.repository.get_changed_files(diff_filter="A") + + # Filter the files added to just source files we track in moz.build files. + files_added = [ + f for f in files_added if any([f.endswith(s) for s in source_suffixes]) + ] + header_files_to_add = [ + f for f in files_added if any([f.endswith(s) for s in header_suffixes]) + ] + if add_to_exports: + files_added += header_files_to_add + elif header_files_to_add: + self.log( + logging.WARNING, + "header_files_warning", + {}, + ( + "We found %s header files in the update, pass --add-to-exports if you want" + + " to attempt to include them in EXPORTS blocks: %s" + ) + % (len(header_files_to_add), header_files_to_add), + ) + + self.logInfo( + {"added": len(files_added), "removed": len(files_removed)}, + "Found {added} files added and {removed} files removed.", + ) + + should_abort = False + for f in files_added: + try: + add_file_to_moz_build_file(f, moz_yaml_dir, vendoring_dir) + except MozBuildRewriteException: + self.log( + logging.ERROR, + "vendor", + {}, + "Could not add %s to the appropriate moz.build file" % f, + ) + should_abort = True + + for f in files_removed: + try: + remove_file_from_moz_build_file(f, moz_yaml_dir, vendoring_dir) + except MozBuildRewriteException: + self.log( + logging.ERROR, + "vendor", + {}, + "Could not remove %s from the appropriate moz.build file" % f, + ) + should_abort = True + + if should_abort: + self.log( + logging.ERROR, + "vendor", + {}, + "This is a deficiency in ./mach vendor . " + + "Please review the affected files before committing.", + ) + # Exit with -1 to distinguish this from the Exception case of exiting with 1 + sys.exit(-1) + + def import_local_patches(self, patches, yaml_dir, vendor_dir): + self.logInfo({}, "Importing local patches...") + for patch in self.convert_patterns_to_paths(yaml_dir, patches): + script = [ + "patch", + "-p1", + "--directory", + vendor_dir, + "--input", + os.path.abspath(patch), + "--no-backup-if-mismatch", + ] + self.run_process( + args=script, + log_name=script, + ) |