diff options
Diffstat (limited to 'python/mozbuild/mozbuild/vendor')
-rw-r--r-- | python/mozbuild/mozbuild/vendor/__init__.py | 0 | ||||
-rw-r--r-- | python/mozbuild/mozbuild/vendor/host_angle.py | 37 | ||||
-rw-r--r-- | python/mozbuild/mozbuild/vendor/host_base.py | 77 | ||||
-rw-r--r-- | python/mozbuild/mozbuild/vendor/host_codeberg.py | 28 | ||||
-rw-r--r-- | python/mozbuild/mozbuild/vendor/host_github.py | 27 | ||||
-rw-r--r-- | python/mozbuild/mozbuild/vendor/host_gitlab.py | 26 | ||||
-rw-r--r-- | python/mozbuild/mozbuild/vendor/host_googlesource.py | 32 | ||||
-rw-r--r-- | python/mozbuild/mozbuild/vendor/mach_commands.py | 232 | ||||
-rw-r--r-- | python/mozbuild/mozbuild/vendor/moz.build | 8 | ||||
-rw-r--r-- | python/mozbuild/mozbuild/vendor/moz_yaml.py | 770 | ||||
-rw-r--r-- | python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py | 1286 | ||||
-rwxr-xr-x | python/mozbuild/mozbuild/vendor/test_vendor_changes.sh | 65 | ||||
-rw-r--r-- | python/mozbuild/mozbuild/vendor/vendor_manifest.py | 789 | ||||
-rw-r--r-- | python/mozbuild/mozbuild/vendor/vendor_python.py | 228 | ||||
-rw-r--r-- | python/mozbuild/mozbuild/vendor/vendor_rust.py | 961 |
15 files changed, 4566 insertions, 0 deletions
diff --git a/python/mozbuild/mozbuild/vendor/__init__.py b/python/mozbuild/mozbuild/vendor/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/__init__.py diff --git a/python/mozbuild/mozbuild/vendor/host_angle.py b/python/mozbuild/mozbuild/vendor/host_angle.py new file mode 100644 index 0000000000..9716c76a24 --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/host_angle.py @@ -0,0 +1,37 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, # You can obtain one at http://mozilla.org/MPL/2.0/. + +import requests + +from mozbuild.vendor.host_base import BaseHost + + +class AngleHost(BaseHost): + def upstream_commit(self, revision): + raise Exception("Should not be called") + + def upstream_tag(self, revision): + data = requests.get("https://omahaproxy.appspot.com/all.json").json() + + for row in data: + if row["os"] == "win64": + for version in row["versions"]: + if version["channel"] == "beta": + branch = "chromium/" + version["true_branch"] + + if revision != "HEAD" and revision != branch: + raise Exception( + "Passing a --revision for Angle that is not HEAD " + + "or the true branch is not supported." + ) + + return ( + branch, + version["current_reldate"], + ) + + raise Exception("Could not find win64 beta version in the JSON response") + + def upstream_snapshot(self, revision): + raise Exception("Not supported for Angle") diff --git a/python/mozbuild/mozbuild/vendor/host_base.py b/python/mozbuild/mozbuild/vendor/host_base.py new file mode 100644 index 0000000000..2484d82e09 --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/host_base.py @@ -0,0 +1,77 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, # You can obtain one at http://mozilla.org/MPL/2.0/. + +import os +import subprocess +import tempfile +import urllib + + +class BaseHost: + def __init__(self, manifest): + self.manifest = manifest + self.repo_url = urllib.parse.urlparse(self.manifest["vendoring"]["url"]) + + def upstream_tag(self, revision): + """Temporarily clone the repo to get the latest tag and timestamp""" + with tempfile.TemporaryDirectory() as temp_repo_clone: + starting_directory = os.getcwd() + os.chdir(temp_repo_clone) + subprocess.run( + [ + "git", + "clone", + "-c", + "core.autocrlf=input", + self.manifest["vendoring"]["url"], + self.manifest["origin"]["name"], + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + check=True, + ) + os.chdir("/".join([temp_repo_clone, self.manifest["origin"]["name"]])) + if revision == "HEAD": + tag = subprocess.run( + ["git", "--no-pager", "tag", "--sort=creatordate"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + check=True, + ).stdout.splitlines()[-1] + else: + try: + tag = subprocess.run( + ["git", "--no-pager", "tag", "-l", revision], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + check=True, + ).stdout.splitlines()[-1] + except IndexError: # 0 lines of output, the tag does not exist + raise Exception(f"Requested tag {revision} not found in source.") + + tag_timestamp = subprocess.run( + [ + "git", + "log", + "-1", + "--date=iso8601-strict", + "--format=%ad", + tag, + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + check=True, + ).stdout.splitlines()[-1] + os.chdir(starting_directory) + return tag, tag_timestamp + + def upstream_snapshot(self, revision): + raise Exception("Unimplemented for this subclass...") + + def upstream_path_to_file(self, revision, filepath): + raise Exception("Unimplemented for this subclass...") diff --git a/python/mozbuild/mozbuild/vendor/host_codeberg.py b/python/mozbuild/mozbuild/vendor/host_codeberg.py new file mode 100644 index 0000000000..158dd0472d --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/host_codeberg.py @@ -0,0 +1,28 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, # You can obtain one at http://mozilla.org/MPL/2.0/. + +import requests + +from mozbuild.vendor.host_base import BaseHost + + +class CodebergHost(BaseHost): + def upstream_commit(self, revision): + """Query the codeberg api for a git commit id and timestamp.""" + codeberg_api = ( + self.repo_url.scheme + "://" + self.repo_url.netloc + "/api/v1/repos/" + ) + codeberg_api += self.repo_url.path[1:] + codeberg_api += "/git/commits" + req = requests.get("/".join([codeberg_api, revision])) + req.raise_for_status() + info = req.json() + return (info["sha"], info["created"]) + + def upstream_snapshot(self, revision): + codeberg_api = ( + self.repo_url.scheme + "://" + self.repo_url.netloc + "/api/v1/repos/" + ) + codeberg_api += self.repo_url.path[1:] + return "/".join([codeberg_api, "archive", revision + ".tar.gz"]) diff --git a/python/mozbuild/mozbuild/vendor/host_github.py b/python/mozbuild/mozbuild/vendor/host_github.py new file mode 100644 index 0000000000..eeaa4b9eaf --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/host_github.py @@ -0,0 +1,27 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, # You can obtain one at http://mozilla.org/MPL/2.0/. + +import requests + +from mozbuild.vendor.host_base import BaseHost + + +class GitHubHost(BaseHost): + def upstream_commit(self, revision): + """Query the github api for a git commit id and timestamp.""" + github_api = "https://api.github.com" + repo = self.repo_url.path[1:].strip("/") + req = requests.get("/".join([github_api, "repos", repo, "commits", revision])) + req.raise_for_status() + info = req.json() + return (info["sha"], info["commit"]["committer"]["date"]) + + def upstream_snapshot(self, revision): + return "/".join( + [self.manifest["vendoring"]["url"], "archive", revision + ".tar.gz"] + ) + + def upstream_path_to_file(self, revision, filepath): + repo = self.repo_url.path[1:] + return "/".join(["https://raw.githubusercontent.com", repo, revision, filepath]) diff --git a/python/mozbuild/mozbuild/vendor/host_gitlab.py b/python/mozbuild/mozbuild/vendor/host_gitlab.py new file mode 100644 index 0000000000..8bfc3ddc79 --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/host_gitlab.py @@ -0,0 +1,26 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, # You can obtain one at http://mozilla.org/MPL/2.0/. + +import requests + +from mozbuild.vendor.host_base import BaseHost + + +class GitLabHost(BaseHost): + def upstream_commit(self, revision): + """Query the gitlab api for a git commit id and timestamp.""" + gitlab_api = ( + self.repo_url.scheme + "://" + self.repo_url.netloc + "/api/v4/projects/" + ) + gitlab_api += self.repo_url.path[1:].replace("/", "%2F") + gitlab_api += "/repository/commits" + req = requests.get("/".join([gitlab_api, revision])) + req.raise_for_status() + info = req.json() + return (info["id"], info["committed_date"]) + + def upstream_snapshot(self, revision): + return "/".join( + [self.manifest["vendoring"]["url"], "-", "archive", revision + ".tar.gz"] + ) diff --git a/python/mozbuild/mozbuild/vendor/host_googlesource.py b/python/mozbuild/mozbuild/vendor/host_googlesource.py new file mode 100644 index 0000000000..c903bd99b5 --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/host_googlesource.py @@ -0,0 +1,32 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, # You can obtain one at http://mozilla.org/MPL/2.0/. + +import requests + +from mozbuild.vendor.host_base import BaseHost + + +class GoogleSourceHost(BaseHost): + def upstream_commit(self, revision): + """Query for a git commit and timestamp.""" + url = "/".join( + [self.manifest["vendoring"]["url"], "+", revision + "?format=JSON"] + ) + req = requests.get(url) + req.raise_for_status() + try: + info = req.json() + except ValueError: + # As of 2017 May, googlesource sends 4 garbage characters + # at the beginning of the json response. Work around this. + # https://bugs.chromium.org/p/chromium/issues/detail?id=718550 + import json + + info = json.loads(req.text[4:]) + return (info["commit"], info["committer"]["time"]) + + def upstream_snapshot(self, revision): + return "/".join( + [self.manifest["vendoring"]["url"], "+archive", revision + ".tar.gz"] + ) diff --git a/python/mozbuild/mozbuild/vendor/mach_commands.py b/python/mozbuild/mozbuild/vendor/mach_commands.py new file mode 100644 index 0000000000..30fb0e16a5 --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/mach_commands.py @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, # You can obtain one at http://mozilla.org/MPL/2.0/. + +import logging +import sys + +from mach.decorators import Command, CommandArgument, SubCommand + +from mozbuild.vendor.moz_yaml import MozYamlVerifyError, load_moz_yaml + + +# Fun quirk of ./mach - you can specify a default argument as well as subcommands. +# If the default argument matches a subcommand, the subcommand gets called. If it +# doesn't, we wind up in the default command. +@Command( + "vendor", + category="misc", + description="Vendor third-party dependencies into the source repository.", +) +@CommandArgument( + "--check-for-update", + action="store_true", + help="For scripted use, prints the new commit to update to, or nothing if up to date.", + default=False, +) +@CommandArgument( + "--add-to-exports", + action="store_true", + help="Will attempt to add new header files into any relevant EXPORTS block.", + default=False, +) +@CommandArgument( + "--ignore-modified", + action="store_true", + help="Ignore modified files in current checkout.", + default=False, +) +@CommandArgument("-r", "--revision", help="Repository tag or commit to update to.") +@CommandArgument( + "-f", + "--force", + action="store_true", + help="Force a re-vendor even if we're up to date", +) +@CommandArgument( + "--verify", "-v", action="store_true", help="(Only) verify the manifest." +) +@CommandArgument( + "--patch-mode", + help="Select how vendored patches will be imported. 'none' skips patch import, and" + "'only' imports patches and skips library vendoring.", + default="", +) +@CommandArgument("library", nargs=1, help="The moz.yaml file of the library to vendor.") +def vendor( + command_context, + library, + revision, + ignore_modified=False, + check_for_update=False, + add_to_exports=False, + force=False, + verify=False, + patch_mode="", +): + """ + Vendor third-party dependencies into the source repository. + + Vendoring rust and python can be done with ./mach vendor [rust/python]. + Vendoring other libraries can be done with ./mach vendor [arguments] path/to/file.yaml + """ + library = library[0] + assert library not in ["rust", "python"] + + command_context.populate_logger() + command_context.log_manager.enable_unstructured() + if check_for_update: + logging.disable(level=logging.CRITICAL) + + try: + manifest = load_moz_yaml(library) + if verify: + print("%s: OK" % library) + sys.exit(0) + except MozYamlVerifyError as e: + print(e) + sys.exit(1) + + if "vendoring" not in manifest: + raise Exception( + "Cannot perform update actions if we don't have a 'vendoring' section in the moz.yaml" + ) + + if patch_mode and patch_mode not in ["none", "only"]: + print( + "Unknown patch mode given '%s'. Please use one of: 'none' or 'only'." + % patch_mode + ) + sys.exit(1) + if ( + manifest["vendoring"].get("patches", []) + and not patch_mode + and not check_for_update + ): + print( + "Patch mode was not given when required. Please use one of: 'none' or 'only'" + ) + sys.exit(1) + if patch_mode == "only" and not manifest["vendoring"].get("patches", []): + print( + "Patch import was specified for %s but there are no vendored patches defined." + % library + ) + sys.exit(1) + + if not ignore_modified and not check_for_update: + check_modified_files(command_context) + elif ignore_modified and not check_for_update: + print( + "Because you passed --ignore-modified we will not be " + + "able to detect spurious upstream updates." + ) + + if not revision: + revision = "HEAD" + + from mozbuild.vendor.vendor_manifest import VendorManifest + + vendor_command = command_context._spawn(VendorManifest) + vendor_command.vendor( + command_context, + library, + manifest, + revision, + ignore_modified, + check_for_update, + force, + add_to_exports, + patch_mode, + ) + + sys.exit(0) + + +def check_modified_files(command_context): + """ + Ensure that there aren't any uncommitted changes to files + in the working copy, since we're going to change some state + on the user. + """ + modified = command_context.repository.get_changed_files("M") + if modified: + command_context.log( + logging.ERROR, + "modified_files", + {}, + """You have uncommitted changes to the following files: + +{files} + +Please commit or stash these changes before vendoring, or re-run with `--ignore-modified`. +""".format( + files="\n".join(sorted(modified)) + ), + ) + sys.exit(1) + + +# ===================================================================== + + +@SubCommand( + "vendor", + "rust", + description="Vendor rust crates from crates.io into third_party/rust", +) +@CommandArgument( + "--ignore-modified", + action="store_true", + help="Ignore modified files in current checkout", + default=False, +) +@CommandArgument( + "--build-peers-said-large-imports-were-ok", + action="store_true", + help=( + "Permit overly-large files to be added to the repository. " + "To get permission to set this, raise a question in the #build " + "channel at https://chat.mozilla.org." + ), + default=False, +) +@CommandArgument( + "--issues-json", + help="Path to a code-review issues.json file to write out", +) +def vendor_rust(command_context, **kwargs): + from mozbuild.vendor.vendor_rust import VendorRust + + vendor_command = command_context._spawn(VendorRust) + issues_json = kwargs.pop("issues_json", None) + ok = vendor_command.vendor(**kwargs) + if issues_json: + with open(issues_json, "w") as fh: + fh.write(vendor_command.serialize_issues_json()) + sys.exit(0 if ok else 1) + + +# ===================================================================== + + +@SubCommand( + "vendor", + "python", + description="Vendor Python packages from pypi.org into third_party/python. " + "Some extra files like docs and tests will automatically be excluded." + "Installs the packages listed in third_party/python/requirements.in and " + "their dependencies.", + virtualenv_name="vendor", +) +@CommandArgument( + "--keep-extra-files", + action="store_true", + default=False, + help="Keep all files, including tests and documentation.", +) +def vendor_python(command_context, keep_extra_files): + from mozbuild.vendor.vendor_python import VendorPython + + vendor_command = command_context._spawn(VendorPython) + vendor_command.vendor(keep_extra_files) diff --git a/python/mozbuild/mozbuild/vendor/moz.build b/python/mozbuild/mozbuild/vendor/moz.build new file mode 100644 index 0000000000..315dc32600 --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/moz.build @@ -0,0 +1,8 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +with Files("**"): + BUG_COMPONENT = ("Developer Infrastructure", "Mach Vendor & Updatebot") diff --git a/python/mozbuild/mozbuild/vendor/moz_yaml.py b/python/mozbuild/mozbuild/vendor/moz_yaml.py new file mode 100644 index 0000000000..51210e19b2 --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/moz_yaml.py @@ -0,0 +1,770 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, # You can obtain one at http://mozilla.org/MPL/2.0/. + +# Utility package for working with moz.yaml files. +# +# Requires `pyyaml` and `voluptuous` +# (both are in-tree under third_party/python) + +import errno +import os +import re + +import voluptuous +import yaml +from voluptuous import ( + All, + Boolean, + FqdnUrl, + In, + Invalid, + Length, + Match, + Msg, + Required, + Schema, + Unique, +) +from yaml.error import MarkedYAMLError + +# TODO ensure this matches the approved list of licenses +VALID_LICENSES = [ + # Standard Licenses (as per https://spdx.org/licenses/) + "Apache-2.0", + "BSD-2-Clause", + "BSD-3-Clause", + "BSD-3-Clause-Clear", + "BSL-1.0", + "CC0-1.0", + "ISC", + "ICU", + "LGPL-2.1", + "LGPL-3.0", + "MIT", + "MPL-1.1", + "MPL-2.0", + "Unlicense", + "WTFPL", + "Zlib", + # Unique Licenses + "ACE", # http://www.cs.wustl.edu/~schmidt/ACE-copying.html + "Anti-Grain-Geometry", # http://www.antigrain.com/license/index.html + "JPNIC", # https://www.nic.ad.jp/ja/idn/idnkit/download/index.html + "Khronos", # https://www.khronos.org/openmaxdl + "libpng", # http://www.libpng.org/pub/png/src/libpng-LICENSE.txt + "Unicode", # http://www.unicode.org/copyright.html +] + +VALID_SOURCE_HOSTS = ["gitlab", "googlesource", "github", "angle", "codeberg"] + +""" +--- +# Third-Party Library Template +# All fields are mandatory unless otherwise noted + +# Version of this schema +schema: 1 + +bugzilla: + # Bugzilla product and component for this directory and subdirectories + product: product name + component: component name + +# Document the source of externally hosted code +origin: + + # Short name of the package/library + name: name of the package + + description: short (one line) description + + # Full URL for the package's homepage/etc + # Usually different from repository url + url: package's homepage url + + # Human-readable identifier for this version/release + # Generally "version NNN", "tag SSS", "bookmark SSS" + release: identifier + + # Revision to pull in + # Must be a long or short commit SHA (long preferred) + revision: sha + + # The package's license, where possible using the mnemonic from + # https://spdx.org/licenses/ + # Multiple licenses can be specified (as a YAML list) + # A "LICENSE" file must exist containing the full license text + license: MPL-2.0 + + # If the package's license is specified in a particular file, + # this is the name of the file. + # optional + license-file: COPYING + + # If there are any mozilla-specific notes you want to put + # about a library, they can be put here. + notes: Notes about the library + +# Configuration for the automated vendoring system. +# optional +vendoring: + + # Repository URL to vendor from + # eg. https://github.com/kinetiknz/nestegg + # Any repository host can be specified here, however initially we'll only + # support automated vendoring from selected sources. + url: source url (generally repository clone url) + + # Type of hosting for the upstream repository + # Valid values are 'gitlab', 'github', googlesource + source-hosting: gitlab + + # Type of Vendoring + # This is either 'regular', 'individual-files', or 'rust' + # If omitted, will default to 'regular' + flavor: rust + + # Type of git reference (commit, tag) to track updates from. + # You cannot use tag tracking with the individual-files flavor + # If omitted, will default to tracking commits. + tracking: commit + + # Base directory of the location where the source files will live in-tree. + # If omitted, will default to the location the moz.yaml file is in. + vendor-directory: third_party/directory + + # Allows skipping certain steps of the vendoring process. + # Most useful if e.g. vendoring upstream is complicated and should be done by a script + # The valid steps that can be skipped are listed below + skip-vendoring-steps: + - fetch + - keep + - include + - exclude + - move-contents + - hg-add + - spurious-check + - update-moz-yaml + - update-moz-build + + # List of patch files to apply after vendoring. Applied in the order + # specified, and alphabetically if globbing is used. Patches must apply + # cleanly before changes are pushed. + # Patch files should be relative to the vendor-directory rather than the gecko + # root directory. + # All patch files are implicitly added to the keep file list. + # optional + patches: + - file + - path/to/file + - path/*.patch + - path/** # Captures all files and subdirectories below path + - path/* # Captures all files but _not_ subdirectories below path. Equivalent to `path/` + + # List of files that are not removed from the destination directory while vendoring + # in a new version of the library. Intended for mozilla files not present in upstream. + # Implicitly contains "moz.yaml", "moz.build", and any files referenced in + # "patches" + # optional + keep: + - file + - path/to/file + - another/path + - *.mozilla + + # Files/paths that will not be vendored from the upstream repository + # Implicitly contains ".git", and ".gitignore" + # optional + exclude: + - file + - path/to/file + - another/path + - docs + - src/*.test + + # Files/paths that will always be vendored from source repository, even if + # they would otherwise be excluded by "exclude". + # optional + include: + - file + - path/to/file + - another/path + - docs/LICENSE.* + + # Files that are modified as part of the update process. + # To avoid creating updates that don't update anything, ./mach vendor will detect + # if any in-tree files have changed. If there are files that are always changed + # during an update process (e.g. version numbers or source revisions), list them + # here to avoid having them counted as substative changes. + # This field does NOT support directories or globbing + # optional + generated: + - '{yaml_dir}/vcs_version.h' + + # If neither "exclude" or "include" are set, all files will be vendored + # Files/paths in "include" will always be vendored, even if excluded + # eg. excluding "docs/" then including "docs/LICENSE" will vendor just the + # LICENSE file from the docs directory + + # All three file/path parameters ("keep", "exclude", and "include") support + # filenames, directory names, and globs/wildcards. + + # Actions to take after updating. Applied in order. + # The action subfield is required. It must be one of: + # - copy-file + # - move-file + # - move-dir + # - replace-in-file + # - replace-in-file-regex + # - delete-path + # - run-script + # Unless otherwise noted, all subfields of action are required. + # + # If the action is copy-file, move-file, or move-dir: + # from is the source file + # to is the destination + # + # If the action is replace-in-file or replace-in-file-regex: + # pattern is what in the file to search for. It is an exact strng match. + # with is the string to replace it with. Accepts the special keyword + # '{revision}' for the commit we are updating to. + # File is the file to replace it in. + # + # If the action is delete-path + # path is the file or directory to recursively delete + # + # If the action is run-script: + # script is the script to run + # cwd is the directory the script should run with as its cwd + # args is a list of arguments to pass to the script + # + # If the action is run-command: + # command is the command to run + # Unlike run-script, `command` is _not_ processed to be relative + # to the vendor directory, and is passed directly to python's + # execution code without any path substitution or manipulation + # cwd is the directory the command should run with as its cwd + # args is a list of arguments to pass to the command + # + # + # Unless specified otherwise, all files/directories are relative to the + # vendor-directory. If the vendor-directory is different from the + # directory of the yaml file, the keyword '{yaml_dir}' may be used + # to make the path relative to that directory. + # 'run-script' supports the addictional keyword {cwd} which, if used, + # must only be used at the beginning of the path. + # + # optional + update-actions: + - action: copy-file + from: include/vcs_version.h.in + to: '{yaml_dir}/vcs_version.h' + + - action: replace-in-file + pattern: '@VCS_TAG@' + with: '{revision}' + file: '{yaml_dir}/vcs_version.h' + + - action: delete-path + path: '{yaml_dir}/config' + + - action: run-script + script: '{cwd}/generate_sources.sh' + cwd: '{yaml_dir}' + + +# Configuration for automatic updating system. +# optional +updatebot: + + # TODO: allow multiple users to be specified + # Phabricator username for a maintainer of the library, used for assigning + # reviewers. For a review group, preface with #, such as "#build"" + maintainer-phab: tjr + + # Bugzilla email address for a maintainer of the library, used for needinfos + maintainer-bz: tom@mozilla.com + + # Optional: A preset for ./mach try to use. If present, fuzzy-query and fuzzy-paths will + # be ignored. If it, fuzzy-query, and fuzzy-path are omitted, ./mach try auto will be used + try-preset: media + + # Optional: A query string for ./mach try fuzzy. If try-preset, it and fuzzy-paths are omitted + # then ./mach try auto will be used + fuzzy-query: media + + # Optional: An array of test paths for ./mach try fuzzy. If try-preset, it and fuzzy-query are + # omitted then ./mach try auto will be used + fuzzy-paths: ['media'] + + # The tasks that Updatebot can run. Only one of each task is currently permitted + # optional + tasks: + - type: commit-alert + branch: upstream-branch-name + cc: ["bugzilla@email.address", "another@example.com"] + needinfo: ["bugzilla@email.address", "another@example.com"] + enabled: True + filter: security + frequency: every + platform: windows + blocking: 1234 + - type: vendoring + branch: master + enabled: False + + # frequency can be 'every', 'release', 'N weeks', 'N commits' + # or 'N weeks, M commits' requiring satisfying both constraints. + frequency: 2 weeks +""" + +RE_SECTION = re.compile(r"^(\S[^:]*):").search +RE_FIELD = re.compile(r"^\s\s([^:]+):\s+(\S+)$").search + + +class MozYamlVerifyError(Exception): + def __init__(self, filename, error): + self.filename = filename + self.error = error + + def __str__(self): + return "%s: %s" % (self.filename, self.error) + + +def load_moz_yaml(filename, verify=True, require_license_file=True): + """Loads and verifies the specified manifest.""" + + # Load and parse YAML. + try: + with open(filename, "r") as f: + manifest = yaml.load(f, Loader=yaml.BaseLoader) + except IOError as e: + if e.errno == errno.ENOENT: + raise MozYamlVerifyError(filename, "Failed to find manifest: %s" % filename) + raise + except MarkedYAMLError as e: + raise MozYamlVerifyError(filename, e) + + if not verify: + return manifest + + # Verify schema. + if "schema" not in manifest: + raise MozYamlVerifyError(filename, 'Missing manifest "schema"') + if manifest["schema"] == "1": + schema = _schema_1() + schema_additional = _schema_1_additional + schema_transform = _schema_1_transform + else: + raise MozYamlVerifyError(filename, "Unsupported manifest schema") + + try: + schema(manifest) + schema_additional(filename, manifest, require_license_file=require_license_file) + manifest = schema_transform(manifest) + except (voluptuous.Error, ValueError) as e: + raise MozYamlVerifyError(filename, e) + + return manifest + + +def _schema_1(): + """Returns Voluptuous Schema object.""" + return Schema( + { + Required("schema"): "1", + Required("bugzilla"): { + Required("product"): All(str, Length(min=1)), + Required("component"): All(str, Length(min=1)), + }, + "origin": { + Required("name"): All(str, Length(min=1)), + Required("description"): All(str, Length(min=1)), + "notes": All(str, Length(min=1)), + Required("url"): FqdnUrl(), + Required("license"): Msg(License(), msg="Unsupported License"), + "license-file": All(str, Length(min=1)), + Required("release"): All(str, Length(min=1)), + # The following regex defines a valid git reference + # The first group [^ ~^:?*[\]] matches 0 or more times anything + # that isn't a Space, ~, ^, :, ?, *, or ] + # The second group [^ ~^:?*[\]\.]+ matches 1 or more times + # anything that isn't a Space, ~, ^, :, ?, *, [, ], or . + "revision": Match(r"^[^ ~^:?*[\]]*[^ ~^:?*[\]\.]+$"), + }, + "updatebot": { + Required("maintainer-phab"): All(str, Length(min=1)), + Required("maintainer-bz"): All(str, Length(min=1)), + "try-preset": All(str, Length(min=1)), + "fuzzy-query": All(str, Length(min=1)), + "fuzzy-paths": All([str], Length(min=1)), + "tasks": All( + UpdatebotTasks(), + [ + { + Required("type"): In( + ["vendoring", "commit-alert"], + msg="Invalid type specified in tasks", + ), + "branch": All(str, Length(min=1)), + "enabled": Boolean(), + "cc": Unique([str]), + "needinfo": Unique([str]), + "filter": In( + ["none", "security", "source-extensions"], + msg="Invalid filter value specified in tasks", + ), + "source-extensions": Unique([str]), + "blocking": Match(r"^[0-9]+$"), + "frequency": Match( + r"^(every|release|[1-9][0-9]* weeks?|[1-9][0-9]* commits?|" + + r"[1-9][0-9]* weeks?, ?[1-9][0-9]* commits?)$" + ), + "platform": Match(r"^(windows|linux)$"), + } + ], + ), + }, + "vendoring": { + Required("url"): FqdnUrl(), + Required("source-hosting"): All( + str, + Length(min=1), + In(VALID_SOURCE_HOSTS, msg="Unsupported Source Hosting"), + ), + "tracking": Match(r"^(commit|tag)$"), + "flavor": Match(r"^(regular|rust|individual-files)$"), + "skip-vendoring-steps": Unique([str]), + "vendor-directory": All(str, Length(min=1)), + "patches": Unique([str]), + "keep": Unique([str]), + "exclude": Unique([str]), + "include": Unique([str]), + "generated": Unique([str]), + "individual-files": [ + { + Required("upstream"): All(str, Length(min=1)), + Required("destination"): All(str, Length(min=1)), + } + ], + "individual-files-default-upstream": All(str, Length(min=1)), + "individual-files-default-destination": All(str, Length(min=1)), + "individual-files-list": Unique([str]), + "update-actions": All( + UpdateActions(), + [ + { + Required("action"): In( + [ + "copy-file", + "move-file", + "move-dir", + "replace-in-file", + "replace-in-file-regex", + "run-script", + "run-command", + "delete-path", + ], + msg="Invalid action specified in update-actions", + ), + "from": All(str, Length(min=1)), + "to": All(str, Length(min=1)), + "pattern": All(str, Length(min=1)), + "with": All(str, Length(min=1)), + "file": All(str, Length(min=1)), + "script": All(str, Length(min=1)), + "command": All(str, Length(min=1)), + "args": All([All(str, Length(min=1))]), + "cwd": All(str, Length(min=1)), + "path": All(str, Length(min=1)), + } + ], + ), + }, + } + ) + + +def _schema_1_additional(filename, manifest, require_license_file=True): + """Additional schema/validity checks""" + + vendor_directory = os.path.dirname(filename) + if "vendoring" in manifest and "vendor-directory" in manifest["vendoring"]: + vendor_directory = manifest["vendoring"]["vendor-directory"] + + # LICENSE file must exist, except for Rust crates which are exempted + # because the license is required to be specified in the Cargo.toml file + if require_license_file and "origin" in manifest: + files = [f.lower() for f in os.listdir(vendor_directory)] + if ( + not ( + "license-file" in manifest["origin"] + and manifest["origin"]["license-file"].lower() in files + ) + and not ( + "license" in files + or "license.txt" in files + or "license.rst" in files + or "license.html" in files + or "license.md" in files + ) + and not ( + "vendoring" in manifest + and manifest["vendoring"].get("flavor", "regular") == "rust" + ) + ): + license = manifest["origin"]["license"] + if isinstance(license, list): + license = "/".join(license) + raise ValueError("Failed to find %s LICENSE file" % license) + + # Cannot vendor without an origin. + if "vendoring" in manifest and "origin" not in manifest: + raise ValueError('"vendoring" requires an "origin"') + + # Cannot vendor without a computer-readable revision. + if "vendoring" in manifest and "revision" not in manifest["origin"]: + raise ValueError( + 'If "vendoring" is present, "revision" must be present in "origin"' + ) + + # The Rust and Individual Flavor type precludes a lot of options + # individual-files could, in theory, use several of these, but until we have a use case let's + # disallow them so we're not worrying about whether they work. When we need them we can make + # sure they do. + if ( + "vendoring" in manifest + and manifest["vendoring"].get("flavor", "regular") != "regular" + ): + for i in [ + "skip-vendoring-steps", + "keep", + "exclude", + "include", + "generated", + ]: + if i in manifest["vendoring"]: + raise ValueError("A non-regular flavor of update cannot use '%s'" % i) + + if manifest["vendoring"].get("flavor", "regular") == "rust": + for i in [ + "update-actions", + ]: + if i in manifest["vendoring"]: + raise ValueError("A rust flavor of update cannot use '%s'" % i) + + # Ensure that only individual-files flavor uses those options + if ( + "vendoring" in manifest + and manifest["vendoring"].get("flavor", "regular") != "individual-files" + ): + if ( + "individual-files" in manifest["vendoring"] + or "individual-files-list" in manifest["vendoring"] + ): + raise ValueError( + "Only individual-files flavor of update can use 'individual-files'" + ) + + # Ensure that the individual-files flavor has all the correct options + if ( + "vendoring" in manifest + and manifest["vendoring"].get("flavor", "regular") == "individual-files" + ): + # Because the only way we can determine the latest tag is by doing a local clone, + # we don't want to do that for individual-files flavors because those flavors are + # usually on gigantic repos we don't want to clone for such a simple thing. + if manifest["vendoring"].get("tracking", "commit") == "tag": + raise ValueError( + "You cannot use tag tracking with the individual-files flavor. (Sorry.)" + ) + + # We need either individual-files or individual-files-list + if ( + "individual-files" not in manifest["vendoring"] + and "individual-files-list" not in manifest["vendoring"] + ): + raise ValueError( + "The individual-files flavor must include either " + + "'individual-files' or 'individual-files-list'" + ) + # For whichever we have, make sure we don't have the other and we don't have + # options we shouldn't or lack ones we should. + if "individual-files" in manifest["vendoring"]: + if "individual-files-list" in manifest["vendoring"]: + raise ValueError( + "individual-files-list is mutually exclusive with individual-files" + ) + if "individual-files-default-upstream" in manifest["vendoring"]: + raise ValueError( + "individual-files-default-upstream can only be used with individual-files-list" + ) + if "individual-files-default-destination" in manifest["vendoring"]: + raise ValueError( + "individual-files-default-destination can only be used " + + "with individual-files-list" + ) + if "individual-files-list" in manifest["vendoring"]: + if "individual-files" in manifest["vendoring"]: + raise ValueError( + "individual-files is mutually exclusive with individual-files-list" + ) + if "individual-files-default-upstream" not in manifest["vendoring"]: + raise ValueError( + "individual-files-default-upstream must be used with individual-files-list" + ) + if "individual-files-default-destination" not in manifest["vendoring"]: + raise ValueError( + "individual-files-default-destination must be used with individual-files-list" + ) + + if "updatebot" in manifest: + # If there are Updatebot tasks, then certain fields must be present and + # defaults need to be set. + if "tasks" in manifest["updatebot"]: + if "vendoring" not in manifest or "url" not in manifest["vendoring"]: + raise ValueError( + "If Updatebot tasks are specified, a vendoring url must be included." + ) + + if "try-preset" in manifest["updatebot"]: + for f in ["fuzzy-query", "fuzzy-paths"]: + if f in manifest["updatebot"]: + raise ValueError( + "If 'try-preset' is specified, then %s cannot be" % f + ) + + # Check for a simple YAML file + with open(filename, "r") as f: + has_schema = False + for line in f.readlines(): + m = RE_SECTION(line) + if m: + if m.group(1) == "schema": + has_schema = True + break + if not has_schema: + raise ValueError("Not simple YAML") + + +# Do type conversion for the few things that need it. +# Everythig is parsed as a string to (a) not cause problems with revisions that +# are only numerals and (b) not strip leading zeros from the numbers if we just +# converted them to string +def _schema_1_transform(manifest): + if "updatebot" in manifest: + if "tasks" in manifest["updatebot"]: + for i in range(len(manifest["updatebot"]["tasks"])): + if "enabled" in manifest["updatebot"]["tasks"][i]: + val = manifest["updatebot"]["tasks"][i]["enabled"] + manifest["updatebot"]["tasks"][i]["enabled"] = ( + val.lower() == "true" or val.lower() == "yes" + ) + return manifest + + +class UpdateActions(object): + """Voluptuous validator which verifies the update actions(s) are valid.""" + + def __call__(self, values): + for v in values: + if "action" not in v: + raise Invalid("All file-update entries must specify a valid action") + if v["action"] in ["copy-file", "move-file", "move-dir"]: + if "from" not in v or "to" not in v or len(v.keys()) != 3: + raise Invalid( + "%s action must (only) specify 'from' and 'to' keys" + % v["action"] + ) + elif v["action"] in ["replace-in-file", "replace-in-file-regex"]: + if ( + "pattern" not in v + or "with" not in v + or "file" not in v + or len(v.keys()) != 4 + ): + raise Invalid( + "replace-in-file action must (only) specify " + + "'pattern', 'with', and 'file' keys" + ) + elif v["action"] == "delete-path": + if "path" not in v or len(v.keys()) != 2: + raise Invalid( + "delete-path action must (only) specify the 'path' key" + ) + elif v["action"] == "run-script": + if "script" not in v or "cwd" not in v: + raise Invalid( + "run-script action must specify 'script' and 'cwd' keys" + ) + if set(v.keys()) - set(["args", "cwd", "script", "action"]) != set(): + raise Invalid( + "run-script action may only specify 'script', 'cwd', and 'args' keys" + ) + elif v["action"] == "run-command": + if "command" not in v or "cwd" not in v: + raise Invalid( + "run-command action must specify 'command' and 'cwd' keys" + ) + if set(v.keys()) - set(["args", "cwd", "command", "action"]) != set(): + raise Invalid( + "run-command action may only specify 'command', 'cwd', and 'args' keys" + ) + else: + # This check occurs before the validator above, so the above is + # redundant but we leave it to be verbose. + raise Invalid("Supplied action " + v["action"] + " is invalid.") + return values + + def __repr__(self): + return "UpdateActions" + + +class UpdatebotTasks(object): + """Voluptuous validator which verifies the updatebot task(s) are valid.""" + + def __call__(self, values): + seenTaskTypes = set() + for v in values: + if "type" not in v: + raise Invalid("All updatebot tasks must specify a valid type") + + if v["type"] in seenTaskTypes: + raise Invalid("Only one type of each task is currently supported") + seenTaskTypes.add(v["type"]) + + if v["type"] == "vendoring": + for i in ["filter", "branch", "source-extensions"]: + if i in v: + raise Invalid( + "'%s' is only valid for commit-alert task types" % i + ) + elif v["type"] == "commit-alert": + pass + else: + # This check occurs before the validator above, so the above is + # redundant but we leave it to be verbose. + raise Invalid("Supplied type " + v["type"] + " is invalid.") + return values + + def __repr__(self): + return "UpdatebotTasks" + + +class License(object): + """Voluptuous validator which verifies the license(s) are valid as per our + allow list.""" + + def __call__(self, values): + if isinstance(values, str): + values = [values] + elif not isinstance(values, list): + raise Invalid("Must be string or list") + for v in values: + if v not in VALID_LICENSES: + raise Invalid("Bad License") + return values + + def __repr__(self): + return "License" diff --git a/python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py b/python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py new file mode 100644 index 0000000000..8163c05dc3 --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py @@ -0,0 +1,1286 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, # You can obtain one at http://mozilla.org/MPL/2.0/. + +# Utility package for working with moz.yaml files. +# +# Requires `pyyaml` and `voluptuous` +# (both are in-tree under third_party/python) + +""" +Problem: + ./mach vendor needs to be able to add or remove files from moz.build files automatically to + be able to effectively update a library automatically and send useful try runs in. + + So far, it has been difficult to do that. + + Why: + - Some files need to go into UNIFIED_SOURCES vs SOURCES + - Some files are os-specific, and need to go into per-OS conditionals + - Some files are both UNIFIED_SOURCES/SOURCES sensitive and OS-specific. + +Proposal: + Design an algorithm that maps a third party library file to a suspected moz.build location. + Run the algorithm on all files specified in all third party libraries' moz.build files. + See if the proposed place in the moz.build file matches the actual place. + +Initial Algorithm + Given a file, which includes the filename and the path from gecko root, we want to find the + correct moz.build file and location within that file. + Take the path of the file, and iterate up the directory tree, looking for moz.build files as + we go. + Consider each of these moz.build files, starting with the one closest to the file. + Within a moz.build file, identify the SOURCES or UNIFIED_SOURCES block(s) that contains a file + in the same directory path as the file to be added. + If there is only one such block, use that one. + If there are multiple blocks, look at the files within each block and note the longest length + of a common prefix (including partial filenames - if we just did full directories the + result would be the same as the prior step and we would not narrow the results down). Use + the block containing the longest prefix. (We call this 'guessing'.) + +Result of the proposal: + The initial implementation works on 1675 of 1977 elligible files. + The files it does not work on include: + - general failures. Such as when we find that avutil.cpp wants to be next to adler32.cpp + but avutil.cpp is in SOURCES and adler32.cpp is in UNIFIED_SOURCES. (And many similar + cases.) + - per-cpu-feature files, where only a single file is added under a conditional + - When guessing, because of a len(...) > longest_so_far comparison, we would prefer the + first block we found. + - Changing this to prefer UNIFIED_SOURCES in the event of a tie + yielded 17 additional correct assignments (about a 1% improvement) + - As a result of the change immediately above, when guessing, because given equal + prefixes, we would prefer a UNIFIED_SOURCES block over other blocks, even if the other + blocks are longer + - Changing this (again) to prefer the block containing more files yielded 49 additional + correct assignments (about a 2.5% improvement) + + The files that are ineligible for consideration are: + - Those in libwebrtc + - Those specified in source assignments composed of generators (e.g. [f for f in '%.c']) + - Those specified in source assignments to subscripted variables + (e.g. SOURCES += foo['x86_files']) + + We needed to iterate up the directory and look at a different moz.build file _zero_ times. + This indicates this code is probably not needed, and therefore we will remove it from the + algorithm. + We needed to guess base on the longest prefix 944 times, indicating that this code is + absolutely crucial and should be double-checked. (And indeed, upon double-checking it, + bugs were identified.) + + After some initial testing, it was determined that this code completely fell down when the + vendoring directory differed from the moz.yaml directory (definitions below.) The code was + slightly refactored to handle this case, primarily by (a) re-inserting the logic to check + multiple moz.build files instead of the first and (b) handling some complicated normalization + notions (details in comments). + +Slightly Improved Algorithm Changes: + Don't bother iterating up the directory tree looking for moz.build files, just take the first. + When guessing, in the event of a common-prefix tie, prefer the block containing more files + + With these changes, we now Successfully Matched 1724 of 1977 files + +CODE CONCEPTS + +source-assignment + An assignment of files to a SOURCES or UNIFIED_SOURCES variable, such as + SOURCES += ['ffpvx.cpp'] + + We specifically look only for these two variable names to avoid identifying things + such as CXX_FLAGS. + + Sometimes; however, there is an intermediary variable, such as `SOURCES += celt_filenames` + In this situation we find the celt_filenames assignment, and treat it as a 'source-assignment' + +source-assignment-location + source-assignment-location is a human readable string that identifies where in the moz.build + file the source-assignment is. It can used to visually match the location upon manual + inspection; and given a source-assignment-location, re-identify it when iterating over all + source-assignments in a file. + + The actual string consists of the path from the root of the moz.build file to the + source-assignment, plus a suffix number. + + We suffix the final value with an incrementing counter. This is to support moz.build files + that, for whatever reason, use multiple SOURCES += [] list in the same basic block. This index + is per-file, so no two assignments in the same file (even if they have separate locations) + should have the same suffix. + + For example: + + When `SOURCES += ['ffpvx.xpp']` appears as the first line of the file (or any other + unindented-location) its source-assignment-location will be `> SOURCES 1`. + + When `SOURCES += ['ffpvx.xpp']` appears inside a conditional such as + `CONFIG['OS_TARGET'] == 'WINNT'` then its source-assignment-location will be + `> if CONFIG['OS_TARGET'] == 'WINNT' > SOURCES 1` + + When SOURCES += ['ffpvx.xpp'] appears as the second line of the file, and a different + SOURCES += [] was the first line, then its source-assignment-location will be "> SOURCES 2". + + No two source-assignments may have the same source-assignment-location. If they do, we raise + an assert. + +file vs filename + a 'filename' is a string specifing the name and sometimes the path of a file. + a 'file' is an object you get from open()-ing a filename + + A variable that is a string should always use 'filename' + +vendoring directory vs moz.yaml directory + In many cases, a library's moz.yaml file, moz.build file(s), and sources files will all live + under a single directory. e.g. libjpeg + + In other cases, a library's source files are in one directory (we call this the 'vendoring + directory') and the moz.yaml file and moz.build file(s) are in another directory (we call this + the moz.yaml directory). e.g. libdav1d + +normalized-filename + A filename is 'normalized' if it has been expanded to the full path from the gecko root. This + requires a moz.build file. + + For example a filename `lib/opus.c` may be specified inside the `media/libopus/moz.build` + file. The filename is normalized by os.path.join()-ing the dirname of the moz.build file + (i.e. `media/libopus`) to the filename, resulting in `media/libopus/lib/opus.c` + + A filename that begins with '/' is presumed to already be specified relative to the gecko + root, and therefore is not modified. + + Normalization gets more complicated when dealing with separate vendoring and moz.yaml + directories. This is because a file can be considered normalized when it looks like + third_party/libdav1d/src/a.cpp + _or_ when it looks like + media/libdav1d/../../third_party/libdav1d/src/a.cpp + This is because in the moz.build file, it will be specified as + `../../third_party/libdav1d/src/a.cpp` and we 'normalize' it by prepending the path to the + moz.build file. + + Normalization is not just about having an 'absolute' path from gecko_root to file. In fact + it's not really about that at all - it's about matching filenames. Therefore when we are + dealing with separate vendoring and moz.yaml directories we will very quickly 're-normalize' + a normalized filename to get it into one of those foo/bar/../../third_party/... paths that + will make sense for the moz.build file we are interested in. + + Whenever a filename is normalized, it should be specified as such in the variable name, + either as a prefix (normalized_filename) or a suffix (target_filename_normalized) + +statistic + Using some hacky stuff, we report statistics about how many times we hit certain branches of + the code. + e.g. + - "How many times did we refine a guess based on prefix length" + - "How many times did we refine a guess based on the number of files in the block" + - "What is the histogram of guess candidates" + + We do this to identify how frequently certain code paths were taken, allowing us to identify + strange behavior and investigate outliers. This process lead to identifying bugs and small + improvements. +""" + +import ast +import copy +import os +import re +import shutil +import subprocess +import sys +from pprint import pprint + +try: + from mozbuild.frontend.sandbox import alphabetical_sorted +except Exception: + + def alphabetical_sorted(iterable, key=lambda x: x.lower(), reverse=False): + return sorted(iterable, key=key, reverse=reverse) + + +# This can be edited to enable better Python 3.8 behavior, but is set so that +# everything is consistent by default so errors can be detected more easily. +FORCE_DOWNGRADE_BEHAVIOR = True + +statistics = { + "guess_candidates": {}, + "number_refinements": {}, + "needed_to_guess": 0, + "length_logic": {}, +} + + +def log(*args, **kwargs): + # If is helpful to keep some logging statements around, but we don't want to print them + # unless we are debugging + # print(*args, **kwargs) + pass + + +############################################## + +import inspect + + +def node_to_name(code, node): + if ( + not FORCE_DOWNGRADE_BEHAVIOR + and sys.version_info[0] >= 3 + and sys.version_info[1] >= 8 + ): + return ast.get_source_segment(code, node) + + return node.__class__.__name__ + + +def get_attribute_label(node): + assert isinstance(node, ast.Attribute) + + label = "" + subtarget = node + while isinstance(subtarget, ast.Attribute): + label = subtarget.attr + ("." if label else "") + label + subtarget = subtarget.value + + if isinstance(subtarget, ast.Name): + label = subtarget.id + "." + label + elif isinstance(subtarget, ast.Subscript) and isinstance(subtarget.value, ast.Name): + label = subtarget.value.id + "." + label + else: + raise Exception( + "Unxpected subtarget of type %s found in get_attribute_label. label=%s" + % (subtarget, label) + ) + + return label + + +def ast_get_source_segment(code, node): + caller = inspect.stack()[1] + + if "sphinx" in caller.filename or ( + not FORCE_DOWNGRADE_BEHAVIOR + and sys.version_info[0] >= 3 + and sys.version_info[1] >= 8 + ): + return ast.original_get_source_segment(code, node) + + if caller.function == "assignment_node_to_source_filename_list": + return "" + + raise Exception( + "ast_get_source_segment is not available with this Python version. (ver=%s.%s, caller=%s)" + % (sys.version_info.major, sys.version_info.minor, caller.function) + ) + + +# Overwrite it so we don't accidently use it +if sys.version_info[0] >= 3 and sys.version_info[1] >= 8: + ast.original_get_source_segment = ast.get_source_segment + ast.get_source_segment = ast_get_source_segment + + +############################################## + + +def node_to_readable_file_location(code, node, child_node=None): + location = "" + + if isinstance(node.parent, ast.Module): + # The next node up is the root, don't go higher. + pass + else: + location += node_to_readable_file_location(code, node.parent, node) + + location += " > " + if isinstance(node, ast.Module): + raise Exception("We shouldn't see a Module") + elif isinstance(node, ast.If): + assert child_node + if child_node in node.body: + location += "if " + node_to_name(code, node.test) + else: + location += "else-of-if " + node_to_name(code, node.test) + elif isinstance(node, ast.For): + location += ( + "for " + + node_to_name(code, node.target) + + " in " + + node_to_name(code, node.iter) + ) + elif isinstance(node, ast.AugAssign): + if isinstance(node.target, ast.Name): + location += node.target.id + else: + location += node_to_name(code, node.target) + elif isinstance(node, ast.Assign): + # This assert would fire if we did e.g. some_sources = all_sources = [ ... ] + assert len(node.targets) == 1, "Assignment node contains more than one target" + if isinstance(node.targets[0], ast.Name): + location += node.targets[0].id + else: + location += node_to_name(code, node.targets[0]) + else: + raise Exception("Got a node type I don't know how to handle: " + str(node)) + + return location + + +def assignment_node_to_source_filename_list(code, node): + """ + If the list of filenames is not a list of constants (e.g. it's a generated list) + it's (probably) infeasible to try and figure it out. At least we're not going to try + right now. Maybe in the future? + + If this happens, we'll return an empty list. The consequence of this is that we + won't be able to match a file against this list, so we may not be able to add it. + + (But if the file matches a generated list, perhaps it will be included in the + Sources list automatically?) + """ + if isinstance(node.value, ast.List) and "elts" in node.value._fields: + for f in node.value.elts: + if not isinstance(f, ast.Constant) and not isinstance(f, ast.Str): + log( + "Found non-constant source file name in list: ", + ast_get_source_segment(code, f), + ) + return [] + return [ + f.value if isinstance(f, ast.Constant) else f.s for f in node.value.elts + ] + elif isinstance(node.value, ast.ListComp): + # SOURCES += [f for f in foo if blah] + log("Could not find the files for " + ast_get_source_segment(code, node.value)) + elif isinstance(node.value, ast.Name) or isinstance(node.value, ast.Subscript): + # SOURCES += other_var + # SOURCES += files['X64_SOURCES'] + log("Could not find the files for " + ast_get_source_segment(code, node)) + elif isinstance(node.value, ast.Call): + # SOURCES += sorted(...) + log("Could not find the files for " + ast_get_source_segment(code, node)) + else: + raise Exception( + "Unexpected node received in assignment_node_to_source_filename_list: " + + str(node) + ) + return [] + + +def mozbuild_file_to_source_assignments(normalized_mozbuild_filename, assignment_type): + """ + Returns a dictionary of 'source-assignment-location' -> 'normalized source filename list' + contained in the moz.build file specified + + normalized_mozbuild_filename: the moz.build file to read + """ + source_assignments = {} + + if assignment_type == "source-files": + targets = ["SOURCES", "UNIFIED_SOURCES"] + else: + targets = ["EXPORTS"] + + # Parse the AST of the moz.build file + code = open(normalized_mozbuild_filename).read() + root = ast.parse(code) + + # Populate node parents. This allows us to walk up from a node to the root. + # (Really I think python's ast class should do this, but it doesn't, so we monkey-patch it) + for node in ast.walk(root): + for child in ast.iter_child_nodes(node): + child.parent = node + + # Find all the assignments of SOURCES or UNIFIED_SOURCES + if assignment_type == "source-files": + source_assignment_nodes = [ + node + for node in ast.walk(root) + if isinstance(node, ast.AugAssign) + and isinstance(node.target, ast.Name) + and node.target.id in targets + ] + assert ( + len([n for n in source_assignment_nodes if not isinstance(n.op, ast.Add)]) + == 0 + ), "We got a Source assignment that wasn't +=" + + # Recurse and find nodes where we do SOURCES += other_var or SOURCES += FILES['foo'] + recursive_assignment_nodes = [ + node + for node in source_assignment_nodes + if isinstance(node.value, ast.Name) or isinstance(node.value, ast.Subscript) + ] + + recursive_assignment_nodes_names = [ + node.value.id + for node in recursive_assignment_nodes + if isinstance(node.value, ast.Name) + ] + + # TODO: We do not dig into subscript variables. These are currently only used by two + # libraries that use external sources.mozbuild files. + # recursive_assignment_nodes_names.extend([something<node> for node in + # recursive_assignment_nodes if isinstance(node.value, ast.Subscript)] + + additional_assignment_nodes = [ + node + for node in ast.walk(root) + if isinstance(node, ast.Assign) + and isinstance(node.targets[0], ast.Name) + and node.targets[0].id in recursive_assignment_nodes_names + ] + + # Remove the original, useless assignment node (the SOURCES += other_var) + for node in recursive_assignment_nodes: + source_assignment_nodes.remove(node) + # Add the other_var += [''] source-assignment + source_assignment_nodes.extend(additional_assignment_nodes) + else: + source_assignment_nodes = [ + node + for node in ast.walk(root) + if isinstance(node, ast.AugAssign) + and ( + (isinstance(node.target, ast.Name) and node.target.id == "EXPORTS") + or ( + isinstance(node.target, ast.Attribute) + and get_attribute_label(node.target).startswith("EXPORTS") + ) + ) + ] + source_assignment_nodes.extend( + [ + node + for node in ast.walk(root) + if isinstance(node, ast.Assign) + and ( + ( + isinstance(node.targets[0], ast.Name) + and node.targets[0].id == "EXPORTS" + ) + or ( + isinstance(node.targets[0], ast.Attribute) + and get_attribute_label(node.targets[0]).startswith("EXPORTS") + ) + ) + ] + ) + + # Get the source-assignment-location for the node: + assignment_index = 1 + for a in source_assignment_nodes: + source_assignment_location = ( + node_to_readable_file_location(code, a) + " " + str(assignment_index) + ) + source_filename_list = assignment_node_to_source_filename_list(code, a) + + if not source_filename_list: + # In some cases (like generated source file lists) we will have an empty list. + # If that is the case, just omit the source assignment + continue + + normalized_source_filename_list = [ + normalize_filename(normalized_mozbuild_filename, f) + for f in source_filename_list + ] + + if source_assignment_location in source_assignments: + source_assignment_location = node_to_readable_file_location(code, a) + + assert ( + source_assignment_location not in source_assignments + ), "In %s, two assignments have the same key ('%s')" % ( + normalized_mozbuild_filename, + source_assignment_location, + ) + source_assignments[source_assignment_location] = normalized_source_filename_list + assignment_index += 1 + + return (source_assignments, root, code) + + +def unnormalize_filename(normalized_mozbuild_filename, normalized_filename): + if normalized_filename[0] == "/": + return normalized_filename + + mozbuild_path = ( + os.path.dirname(normalized_mozbuild_filename).replace(os.path.sep, "/") + "/" + ) + return normalized_filename.replace(mozbuild_path, "") + + +def normalize_filename(normalized_mozbuild_filename, filename): + if filename[0] == "/": + return filename + + mozbuild_path = os.path.dirname(normalized_mozbuild_filename).replace( + os.path.sep, "/" + ) + return os.path.join(mozbuild_path, filename).replace(os.path.sep, "/") + + +def get_mozbuild_file_search_order( + normalized_filename, + moz_yaml_dir=None, + vendoring_dir=None, + all_mozbuild_filenames_normalized=None, +): + """ + Returns an ordered list of normalized moz.build filenames to consider for a given filename + + normalized_filename: a source filename normalized to the gecko root + + moz_yaml_dir: the path from gecko_root to the moz.yaml file (which is the root of the + moz.build files) + + moz_yaml_dir: the path to where the library's source files are + + all_mozbuild_filenames_normalized: (optional) the list of all third-party moz.build files + If all_mozbuild_filenames_normalized is not specified, we look in the filesystem. + + The list is built out of two distinct steps. + + In Step 1 we will walk up a directory tree, looking for moz.build files. We append moz.build + files in this order, preferring the lowest moz.build we find, then moving on to one in a + higher directory. + The directory we start in is a little complicated. We take the series of subdirectories + between vendoring_dir and the file in question, and then append them to the moz.yaml + directory. + + Example: + + .. code-block:: python + + When moz_yaml directory != vendoring_directory: + moz_yaml_dir = foo/bar/ + vendoring_dir = third_party/baz/ + normalized_filename = third_party/baz/asm/arm/a.S + starting_directory: foo/bar/asm/arm/ + When moz_yaml directory == vendoring_directory + (In this case, these variables will actually be 'None' but the algorthm is the same) + moz_yaml_dir = foo/bar/ + vendoring_dir = foo/bar/ + normalized_filename = foo/bar/asm/arm/a.S + starting_directory: foo/bar/asm/arm/ + + In Step 2 we get a bit desparate. When the vendoring directory and the moz_yaml directory are + not the same, there is no guarentee that the moz_yaml directory will adhere to the same + directory structure as the vendoring directory. And indeed it doesn't in some cases + (e.g. libdav1d.) + So in this situation we start at the root of the moz_yaml directory and walk downwards, adding + _any_ moz.build file we encounter to the list. Later on (in all cases, not just + moz_yaml_dir != vendoring_dir) we only consider a moz.build file if it has source files whose + directory matches the normalized_filename, so this step, though desparate, is safe-ish and + believe it or not has worked for some file additions. + """ + ordered_list = [] + + if all_mozbuild_filenames_normalized is None: + assert os.path.isfile( + ".arcconfig" + ), "We do not seem to be running from the gecko root" + + # The first time around, this variable name is incorrect. + # It's actually the full path+filename, not a directory. + test_directory = None + if (moz_yaml_dir, vendoring_dir) == (None, None): + # In this situation, the library is vendored into the same directory as + # the moz.build files. We can start traversing directories up from the file to + # add to find the correct moz.build file + test_directory = normalized_filename + elif moz_yaml_dir and vendoring_dir: + # In this situation, the library is vendored in a different place (typically + # third_party/foo) from the moz.build files. + subdirectory_path = normalized_filename.replace(vendoring_dir, "") + test_directory = os.path.join(moz_yaml_dir, subdirectory_path) + else: + raise Exception("If moz_yaml_dir or vendoring_dir are specified, both must be") + + # Step 1 + while ( + len(os.path.dirname(test_directory).replace(os.path.sep, "/")) > 1 + ): # While we are not at '/' + containing_directory = os.path.dirname(test_directory) + + possible_normalized_mozbuild_filename = os.path.join( + containing_directory, "moz.build" + ) + + if not all_mozbuild_filenames_normalized: + if os.path.isfile(possible_normalized_mozbuild_filename): + ordered_list.append(possible_normalized_mozbuild_filename) + elif possible_normalized_mozbuild_filename in all_mozbuild_filenames_normalized: + ordered_list.append(possible_normalized_mozbuild_filename) + + test_directory = containing_directory + + # Step 2 + if moz_yaml_dir: + for root, dirs, files in os.walk(moz_yaml_dir): + for f in files: + if f == "moz.build": + ordered_list.append(os.path.join(root, f)) + + return ordered_list + + +def get_closest_mozbuild_file( + normalized_filename, + moz_yaml_dir=None, + vendoring_dir=None, + all_mozbuild_filenames_normalized=None, +): + """ + Returns the closest moz.build file in the directory tree to a normalized filename + """ + r = get_mozbuild_file_search_order( + normalized_filename, + moz_yaml_dir, + vendoring_dir, + all_mozbuild_filenames_normalized, + ) + return r[0] if r else None + + +def filenames_directory_is_in_filename_list( + filename_normalized, list_of_normalized_filenames +): + """ + Given a normalized filename and a list of normalized filenames, first turn them into a + containing directory, and a list of containing directories. Then test if the containing + directory of the filename is in the list. + + ex: + f = filenames_directory_is_in_filename_list + f("foo/bar/a.c", ["foo/b.c"]) -> false + f("foo/bar/a.c", ["foo/b.c", "foo/bar/c.c"]) -> true + f("foo/bar/a.c", ["foo/b.c", "foo/bar/baz/d.c"]) -> false + """ + path_list = set( + [ + os.path.dirname(f).replace(os.path.sep, "/") + for f in list_of_normalized_filenames + ] + ) + return os.path.dirname(filename_normalized).replace(os.path.sep, "/") in path_list + + +def find_all_posible_assignments_from_filename(source_assignments, filename_normalized): + """ + Given a list of source assignments and a normalized filename, narrow the list to assignments + that contain a file whose directory matches the filename's directory. + """ + possible_assignments = {} + for key, list_of_normalized_filenames in source_assignments.items(): + if not list_of_normalized_filenames: + continue + if filenames_directory_is_in_filename_list( + filename_normalized, list_of_normalized_filenames + ): + possible_assignments[key] = list_of_normalized_filenames + return possible_assignments + + +def guess_best_assignment(source_assignments, filename_normalized): + """ + Given several assignments, all of which contain the same directory as the filename, pick one + we think is best and return its source-assignment-location. + + We do this by looking at the filename itself (not just its directory) and picking the + assignment which contains a filename with the longest matching prefix. + + e.g: "foo/asm_neon.c" compared to ["foo/main.c", "foo/all_utility.c"], ["foo/asm_arm.c"] + -> ["foo/asm_arm.c"] (match of `foo/asm_`) + """ + length_of_longest_match = 0 + source_assignment_location_of_longest_match = None + statistic_number_refinements = 0 + statistic_length_logic = 0 + + for key, list_of_normalized_filenames in source_assignments.items(): + for f in list_of_normalized_filenames: + if filename_normalized == f: + # Do not cheat by matching the prefix of the exact file + continue + + prefix = os.path.commonprefix([filename_normalized, f]) + if len(prefix) > length_of_longest_match: + statistic_number_refinements += 1 + length_of_longest_match = len(prefix) + source_assignment_location_of_longest_match = key + elif len(prefix) == length_of_longest_match and len( + source_assignments[key] + ) > len(source_assignments[source_assignment_location_of_longest_match]): + statistic_number_refinements += 1 + statistic_length_logic += 1 + length_of_longest_match = len(prefix) + source_assignment_location_of_longest_match = key + return ( + source_assignment_location_of_longest_match, + (statistic_number_refinements, statistic_length_logic), + ) + + +def edit_moz_build_file_to_add_file( + normalized_mozbuild_filename, + unnormalized_filename_to_add, + unnormalized_list_of_files, +): + """ + This function edits the moz.build file in-place + + I had _really_ hoped to replace this whole damn thing with something that adds a + node to the AST, dumps the AST out, and then runs black on the file but there are + some issues: + - third party moz.build files (or maybe all moz.build files) aren't always run through black + - dumping the ast out losing comments + + """ + + # Make sure that we only write in forward slashes + if "\\" in unnormalized_filename_to_add: + unnormalized_filename_to_add = unnormalized_filename_to_add.replace("\\", "/") + + # add the file into the list, and then sort it in the same way the moz.build validator + # expects + unnormalized_list_of_files.append(unnormalized_filename_to_add) + unnormalized_list_of_files = alphabetical_sorted(unnormalized_list_of_files) + + # we're going to add our file by doing a find/replace of an adjacent file in the list + indx_of_addition = unnormalized_list_of_files.index(unnormalized_filename_to_add) + indx_of_addition + if indx_of_addition == 0: + target_indx = 1 + replace_before = False + else: + target_indx = indx_of_addition - 1 + replace_before = True + + find_str = unnormalized_list_of_files[target_indx] + + # We will only perform the first replacement. This is because sometimes there's moz.build + # code like: + # SOURCES += ['file.cpp'] + # SOURCES['file.cpp'].flags += ['-Winline'] + # If we replaced every time we found the target, we would be inserting into that second + # line. + did_replace = False + + with open(normalized_mozbuild_filename, mode="r") as file: + with open(normalized_mozbuild_filename + ".new", mode="wb") as output: + for line in file: + if not did_replace and find_str in line: + did_replace = True + + # Okay, we found the line we need to edit, now we need to be ugly about it + # Grab the type of quote used in this moz.build file: single or double + quote_type = line[line.index(find_str) - 1] + + if "[" not in line: + # We'll want to put our new file onto its own line + newline_to_add = "\n" + # And copy the indentation of the line we're adding adjacent to + indent_value = line[0 : line.index(quote_type)] + else: + # This is frustrating, we have the start of the array here. We aren't + # going to be able to indent things onto a newline properly. We're just + # going to have to stick it in on the same line. + newline_to_add = "" + indent_value = "" + + find_str = "%s%s%s" % (quote_type, find_str, quote_type) + if replace_before: + replacement_tuple = ( + find_str, + newline_to_add, + indent_value, + quote_type, + unnormalized_filename_to_add, + quote_type, + ) + replace_str = "%s,%s%s%s%s%s" % replacement_tuple + else: + replacement_tuple = ( + quote_type, + unnormalized_filename_to_add, + quote_type, + newline_to_add, + indent_value, + find_str, + ) + replace_str = "%s%s%s,%s%s%s" % replacement_tuple + + line = line.replace(find_str, replace_str) + + output.write((line.rstrip() + "\n").encode("utf-8")) + + shutil.move(normalized_mozbuild_filename + ".new", normalized_mozbuild_filename) + + +def edit_moz_build_file_to_remove_file( + normalized_mozbuild_filename, unnormalized_filename_to_remove +): + """ + This function edits the moz.build file in-place + """ + + simple_file_line = re.compile( + "^\s*['\"]" + unnormalized_filename_to_remove + "['\"],*$" + ) + did_replace = False + + with open(normalized_mozbuild_filename, mode="r") as file: + with open(normalized_mozbuild_filename + ".new", mode="wb") as output: + for line in file: + if not did_replace and unnormalized_filename_to_remove in line: + did_replace = True + + # If the line consists of just a single source file on it, then we're in the + # clear - we can just skip this line. + if simple_file_line.match(line): + # Do not output anything, just keep going. + continue + + # Okay, so the line is a little more complicated. + quote_type = line[line.index(unnormalized_filename_to_remove) - 1] + + if "[" in line or "]" in line: + find_str = "%s%s%s,*" % ( + quote_type, + unnormalized_filename_to_remove, + quote_type, + ) + line = re.sub(find_str, "", line) + else: + raise Exception( + "Got an unusual type of line we're trying to remove a file from:", + line, + ) + + output.write((line.rstrip() + "\n").encode("utf-8")) + + shutil.move(normalized_mozbuild_filename + ".new", normalized_mozbuild_filename) + + +def validate_directory_parameters(moz_yaml_dir, vendoring_dir): + # Validate the parameters + assert (moz_yaml_dir, vendoring_dir) == (None, None) or ( + moz_yaml_dir and vendoring_dir + ), "If either moz_yaml_dir or vendoring_dir are specified, they both must be" + + if moz_yaml_dir is not None and vendoring_dir is not None: + # Ensure they are provided with trailing slashes + moz_yaml_dir += "/" if moz_yaml_dir[-1] != "/" else "" + vendoring_dir += "/" if vendoring_dir[-1] != "/" else "" + + return (moz_yaml_dir, vendoring_dir) + + +HAS_ABSOLUTE = 1 +HAS_TRAVERSE_CHILD = 2 +HAS_RELATIVE_CHILD = 2 # behaves the same as above + + +def get_file_reference_modes(source_assignments): + """ + Given a set of source assignments, this function traverses through the + files references in those assignments to see if the files are referenced + using absolute paths (relative to gecko root) or relative paths. + + It will return all the modes that are seen. + """ + modes = set() + + for key, list_of_normalized_filenames in source_assignments.items(): + if not list_of_normalized_filenames: + continue + for file in list_of_normalized_filenames: + if file[0] == "/": + modes.add(HAS_ABSOLUTE) + elif file[0:2] == "../": + modes.add(HAS_TRAVERSE_CHILD) + else: + modes.add(HAS_RELATIVE_CHILD) + return modes + + +def renormalize_filename( + mode, + moz_yaml_dir, + vendoring_dir, + normalized_mozbuild_filename, + normalized_filename_to_act_on, +): + """ + Edit the normalized_filename_to_act_on to either + - Make it an absolute path from gecko root (if we're in that mode) + - Get a relative path from the vendoring directory to the yaml directory where the + moz.build file is (If they are in separate directories) + """ + if mode == HAS_ABSOLUTE: + # If the moz.build file uses absolute paths from the gecko root, this is easy, + # all we need to do is prepend a '/' to indicate that + normalized_filename_to_act_on = "/" + normalized_filename_to_act_on + elif moz_yaml_dir and vendoring_dir: + # To re-normalize it in this case, we: + # (a) get the path from gecko_root to the moz.build file we are considering + # (b) compute a relative path from that directory to the file we want + # (c) because (b) started at the moz.build file's directory, it is not + # normalized to the gecko_root. Therefore we need to normalize it by + # prepending (a) + a = os.path.dirname(normalized_mozbuild_filename).replace(os.path.sep, "/") + b = os.path.relpath(normalized_filename_to_act_on, start=a).replace( + os.path.sep, "/" + ) + c = os.path.join(a, b).replace(os.path.sep, "/") + normalized_filename_to_act_on = c + + return normalized_filename_to_act_on + + +######################################################### +# PUBLIC API +######################################################### + + +class MozBuildRewriteException(Exception): + pass + + +def remove_file_from_moz_build_file( + normalized_filename_to_remove, moz_yaml_dir=None, vendoring_dir=None +): + """ + Given a filename, relative to the gecko root (aka normalized), we look for the nearest + moz.build file, look in that file for the file, and then edit that moz.build file in-place. + """ + moz_yaml_dir, vendoring_dir = validate_directory_parameters( + moz_yaml_dir, vendoring_dir + ) + + all_possible_normalized_mozbuild_filenames = get_mozbuild_file_search_order( + normalized_filename_to_remove, moz_yaml_dir, vendoring_dir, None + ) + + # normalized_filename_to_remove is the path from gecko_root to the file. However, if we vendor + # separate from moz.yaml; then 'normalization' gets more complicated as explained above. + # We will need to re-normalize the filename for each moz.build file we want to test, so we + # save the original normalized filename for this purpose + original_normalized_filename_to_remove = normalized_filename_to_remove + + # These are the two header file types specified in vendor_manifest.py > source_suffixes + if normalized_filename_to_remove.endswith( + ".h" + ) or normalized_filename_to_remove.endswith(".hpp"): + assignment_type = "header-files" + else: + assignment_type = "source-files" + + for normalized_mozbuild_filename in all_possible_normalized_mozbuild_filenames: + source_assignments, root, code = mozbuild_file_to_source_assignments( + normalized_mozbuild_filename, assignment_type + ) + + modes = get_file_reference_modes(source_assignments) + + for mode in modes: + normalized_filename_to_remove = renormalize_filename( + mode, + moz_yaml_dir, + vendoring_dir, + normalized_mozbuild_filename, + normalized_filename_to_remove, + ) + + for key in source_assignments: + normalized_source_filename_list = source_assignments[key] + if normalized_filename_to_remove in normalized_source_filename_list: + unnormalized_filename_to_remove = unnormalize_filename( + normalized_mozbuild_filename, normalized_filename_to_remove + ) + edit_moz_build_file_to_remove_file( + normalized_mozbuild_filename, unnormalized_filename_to_remove + ) + return + + normalized_filename_to_remove = original_normalized_filename_to_remove + raise MozBuildRewriteException("Could not remove " + normalized_filename_to_remove) + + +def add_file_to_moz_build_file( + normalized_filename_to_add, moz_yaml_dir=None, vendoring_dir=None +): + """ + This is the overall function. Given a filename, relative to the gecko root (aka normalized), + we look for a moz.build file to add it to, look for the place in the moz.build file to add it, + and then edit that moz.build file in-place. + + It accepted two optional parameters. If one is specified they both must be. If a library is + vendored in a separate place from the moz.yaml file, these parameters specify those two + directories. + """ + moz_yaml_dir, vendoring_dir = validate_directory_parameters( + moz_yaml_dir, vendoring_dir + ) + + all_possible_normalized_mozbuild_filenames = get_mozbuild_file_search_order( + normalized_filename_to_add, moz_yaml_dir, vendoring_dir, None + ) + + # normalized_filename_to_add is the path from gecko_root to the file. However, if we vendor + # separate from moz.yaml; then 'normalization' gets more complicated as explained above. + # We will need to re-normalize the filename for each moz.build file we want to test, so we + # save the original normalized filename for this purpose + original_normalized_filename_to_add = normalized_filename_to_add + + if normalized_filename_to_add.endswith(".h") or normalized_filename_to_add.endswith( + ".hpp" + ): + assignment_type = "header-files" + else: + assignment_type = "source-files" + + for normalized_mozbuild_filename in all_possible_normalized_mozbuild_filenames: + source_assignments, root, code = mozbuild_file_to_source_assignments( + normalized_mozbuild_filename, assignment_type + ) + + modes = get_file_reference_modes(source_assignments) + + for mode in modes: + normalized_filename_to_add = renormalize_filename( + mode, + moz_yaml_dir, + vendoring_dir, + normalized_mozbuild_filename, + normalized_filename_to_add, + ) + + possible_assignments = find_all_posible_assignments_from_filename( + source_assignments, normalized_filename_to_add + ) + + if len(possible_assignments) == 0: + normalized_filename_to_add = original_normalized_filename_to_add + continue + + assert ( + len(possible_assignments) > 0 + ), "Could not find a single possible source assignment" + if len(possible_assignments) > 1: + best_guess, _ = guess_best_assignment( + possible_assignments, normalized_filename_to_add + ) + chosen_source_assignment_location = best_guess + else: + chosen_source_assignment_location = list(possible_assignments.keys())[0] + + guessed_list_containing_normalized_filenames = possible_assignments[ + chosen_source_assignment_location + ] + + # unnormalize filenames so we can edit the moz.build file. They rarely use full paths. + unnormalized_filename_to_add = unnormalize_filename( + normalized_mozbuild_filename, normalized_filename_to_add + ) + unnormalized_list_of_files = [ + unnormalize_filename(normalized_mozbuild_filename, f) + for f in guessed_list_containing_normalized_filenames + ] + + edit_moz_build_file_to_add_file( + normalized_mozbuild_filename, + unnormalized_filename_to_add, + unnormalized_list_of_files, + ) + return + + raise MozBuildRewriteException( + "Could not find a single moz.build file to add " + normalized_filename_to_add + ) + + +######################################################### +# TESTING CODE +######################################################### + + +def get_all_target_filenames_normalized(all_mozbuild_filenames_normalized): + """ + Given a list of moz.build files, returns all the files listed in all the souce assignments + in the file. + + This function is only used for debug/testing purposes - there is no reason to call this + as part of 'the algorithm' + """ + all_target_filenames_normalized = [] + for normalized_mozbuild_filename in all_mozbuild_filenames_normalized: + source_assignments, root, code = mozbuild_file_to_source_assignments( + normalized_mozbuild_filename + ) + for key in source_assignments: + list_of_normalized_filenames = source_assignments[key] + all_target_filenames_normalized.extend(list_of_normalized_filenames) + + return all_target_filenames_normalized + + +def try_to_match_target_file( + all_mozbuild_filenames_normalized, target_filename_normalized +): + """ + Runs 'the algorithm' on a target file, and returns if the algorithm was successful + + all_mozbuild_filenames_normalized: the list of all third-party moz.build files + target_filename_normalized - the target filename, normalized to the gecko root + """ + + # We do not update the statistics for failed matches, so save a copy + global statistics + backup_statistics = copy.deepcopy(statistics) + + if "" == target_filename_normalized: + raise Exception("Received an empty target_filename_normalized") + + normalized_mozbuild_filename = get_closest_mozbuild_file( + target_filename_normalized, None, None, all_mozbuild_filenames_normalized + ) + if not normalized_mozbuild_filename: + return (False, "No moz.build file found") + + source_assignments, root, code = mozbuild_file_to_source_assignments( + normalized_mozbuild_filename + ) + possible_assignments = find_all_posible_assignments_from_filename( + source_assignments, target_filename_normalized + ) + + if len(possible_assignments) == 0: + raise Exception("No possible assignments were found") + elif len(possible_assignments) > 1: + ( + best_guess, + (statistic_number_refinements, statistic_length_logic), + ) = guess_best_assignment(possible_assignments, target_filename_normalized) + chosen_source_assignment_location = best_guess + + statistics["needed_to_guess"] += 1 + + if len(possible_assignments) not in statistics["guess_candidates"]: + statistics["guess_candidates"][len(possible_assignments)] = 0 + statistics["guess_candidates"][len(possible_assignments)] += 1 + + if statistic_number_refinements not in statistics["number_refinements"]: + statistics["number_refinements"][statistic_number_refinements] = 0 + statistics["number_refinements"][statistic_number_refinements] += 1 + + if statistic_length_logic not in statistics["length_logic"]: + statistics["length_logic"][statistic_length_logic] = 0 + statistics["length_logic"][statistic_length_logic] += 1 + + else: + chosen_source_assignment_location = list(possible_assignments.keys())[0] + + guessed_list_containing_normalized_filenames = possible_assignments[ + chosen_source_assignment_location + ] + + if target_filename_normalized in guessed_list_containing_normalized_filenames: + return (True, None) + + # Restore the copy of the statistics so we don't alter it for failed matches + statistics = backup_statistics + return (False, chosen_source_assignment_location) + + +def get_gecko_root(): + """ + Using __file__ as a base, find the gecko root + """ + gecko_root = None + directory_to_check = os.path.dirname(os.path.abspath(__file__)) + while not os.path.isfile(os.path.join(directory_to_check, ".arcconfig")): + directory_to_check = os.path.dirname(directory_to_check) + if directory_to_check == "/": + print("Could not find gecko root") + sys.exit(1) + + gecko_root = directory_to_check + return gecko_root + + +def get_all_mozbuild_filenames(gecko_root): + """ + Find all the third party moz.build files in the gecko repo + """ + third_party_paths = open( + os.path.join(gecko_root, "tools", "rewriting", "ThirdPartyPaths.txt") + ).readlines() + all_mozbuild_filenames_normalized = [] + for path in third_party_paths: + # We need shell=True because some paths are specified as globs + # We need an exception handler because sometimes the directory doesn't exist and find barfs + try: + output = subprocess.check_output( + "find %s -name moz.build" % os.path.join(gecko_root, path.strip()), + shell=True, + ).decode("utf-8") + for f in output.split("\n"): + f = f.replace("//", "/").strip().replace(gecko_root, "")[1:] + if f: + all_mozbuild_filenames_normalized.append(f) + except Exception: + pass + + return all_mozbuild_filenames_normalized + + +def test_all_third_party_files(gecko_root, all_mozbuild_filenames_normalized): + """ + Run the algorithm on every source file in a third party moz.build file and output the results + """ + all_mozbuild_filenames_normalized = [ + f for f in all_mozbuild_filenames_normalized if "webrtc" not in f + ] + all_target_filenames_normalized = get_all_target_filenames_normalized( + all_mozbuild_filenames_normalized + ) + + total_attempted = 0 + failed_matched = [] + successfully_matched = 0 + + print("Going to try to match %i files..." % len(all_target_filenames_normalized)) + for target_filename_normalized in all_target_filenames_normalized: + result, wrong_guess = try_to_match_target_file( + all_mozbuild_filenames_normalized, target_filename_normalized + ) + + total_attempted += 1 + if result: + successfully_matched += 1 + else: + failed_matched.append((target_filename_normalized, wrong_guess)) + if total_attempted % 100 == 0: + print("Progress:", total_attempted) + + print( + "Successfully Matched %i of %i files" % (successfully_matched, total_attempted) + ) + if failed_matched: + print("Failed files:") + for f in failed_matched: + print("\t", f[0], f[1]) + print("Statistics:") + pprint(statistics) + + +if __name__ == "__main__": + gecko_root = get_gecko_root() + os.chdir(gecko_root) + + add_file_to_moz_build_file( + "third_party/jpeg-xl/lib/include/jxl/resizable_parallel_runner.h", + "media/libjxl", + "third_party/jpeg-xl", + ) + + # all_mozbuild_filenames_normalized = get_all_mozbuild_filenames(gecko_root) + # test_all_third_party_files(gecko_root, all_mozbuild_filenames_normalized) diff --git a/python/mozbuild/mozbuild/vendor/test_vendor_changes.sh b/python/mozbuild/mozbuild/vendor/test_vendor_changes.sh new file mode 100755 index 0000000000..3d0e390f7f --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/test_vendor_changes.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +if [[ ! -f "CLOBBER" ]]; then + echo "Script should be run from mozilla-central root" + exit 1 +fi + +echo "THIS SCRIPT WILL REVERT AND PURGE UNCOMMIT LOCAL CHANGES" +echo "TYPE ok TO CONTINUE" +read CONFIRMATION +if [[ $CONFIRMATION != "ok" ]]; then + echo "Did not get 'ok', exiting" + exit 0 +fi + +ALL_MOZ_YAML_FILES=$(find . -name moz.yaml) + +for f in $ALL_MOZ_YAML_FILES; do + IFS='' read -r -d '' INPUT <<"EOF" +import sys +import yaml +enabled = False +with open(sys.argv[1]) as yaml_in: + o = yaml.safe_load(yaml_in) + if "updatebot" in o: + if 'tasks' in o["updatebot"]: + for t in o["updatebot"]["tasks"]: + if t["type"] == "vendoring": + if t.get("enabled", True) and t.get("platform", "Linux").lower() == "linux": + enabled = True +if enabled: + print(sys.argv[1]) +EOF + + FILE=$(python3 -c "$INPUT" $f) + + if [[ ! -z $FILE ]]; then + UPDATEBOT_YAML_FILES+=("$FILE") + fi +done + + +for FILE in "${UPDATEBOT_YAML_FILES[@]}"; do + REVISION=$(yq eval ".origin.revision" $FILE) + HAS_PATCHES=$(yq eval ".vendoring.patches | (. != null)" $FILE) + + echo "$FILE - $REVISION" + if [[ $HAS_PATCHES == "false" ]]; then + ./mach vendor $FILE --force --revision $REVISION + if [[ $? == 1 ]]; then + exit 1 + fi + else + ./mach vendor $FILE --force --revision $REVISION --patch-mode=none + if [[ $? == 1 ]]; then + exit 1 + fi + ./mach vendor $FILE --force --revision $REVISION --patch-mode=only --ignore-modified + if [[ $? == 1 ]]; then + exit 1 + fi + fi + hg revert . + hg purge +done diff --git a/python/mozbuild/mozbuild/vendor/vendor_manifest.py b/python/mozbuild/mozbuild/vendor/vendor_manifest.py new file mode 100644 index 0000000000..9de2c23e95 --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/vendor_manifest.py @@ -0,0 +1,789 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, # You can obtain one at http://mozilla.org/MPL/2.0/. + +import functools +import glob +import logging +import os +import re +import shutil +import stat +import sys +import tarfile +import tempfile +from collections import defaultdict + +import mozfile +import mozpack.path as mozpath +import requests + +from mozbuild.base import MozbuildObject +from mozbuild.vendor.rewrite_mozbuild import ( + MozBuildRewriteException, + add_file_to_moz_build_file, + remove_file_from_moz_build_file, +) + +DEFAULT_EXCLUDE_FILES = [".git*", ".git*/**"] +DEFAULT_KEEP_FILES = ["**/moz.build", "**/moz.yaml"] +DEFAULT_INCLUDE_FILES = [] + + +def throwe(): + raise Exception + + +def _replace_in_file(file, pattern, replacement, regex=False): + with open(file) as f: + contents = f.read() + + if regex: + newcontents = re.sub(pattern, replacement, contents) + else: + newcontents = contents.replace(pattern, replacement) + + if newcontents == contents: + raise Exception( + "Could not find '%s' in %s to %sreplace with '%s'" + % (pattern, file, "regex-" if regex else "", replacement) + ) + + with open(file, "w") as f: + f.write(newcontents) + + +def list_of_paths_to_readable_string(paths): + # From https://stackoverflow.com/a/41578071 + dic = defaultdict(list) + for item in paths: + if os.path.isdir(item): # To check path is a directory + _ = dic[item] # will set default value as empty list + else: + path, file = os.path.split(item) + dic[path].append(file) + + final_string = "[" + for key, val in dic.items(): + if len(val) == 0: + final_string += key + ", " + elif len(val) < 3: + final_string += ", ".join([os.path.join(key, v) for v in val]) + ", " + elif len(val) < 10: + final_string += "%s items in %s: %s and %s, " % ( + len(val), + key, + ", ".join(val[0:-1]), + val[-1], + ) + else: + final_string += "%s (omitted) items in %s, " % (len(val), key) + + if final_string[-2:] == ", ": + final_string = final_string[:-2] + + return final_string + "]" + + +class VendorManifest(MozbuildObject): + def should_perform_step(self, step): + return step not in self.manifest["vendoring"].get("skip-vendoring-steps", []) + + def vendor( + self, + command_context, + yaml_file, + manifest, + revision, + ignore_modified, + check_for_update, + force, + add_to_exports, + patch_mode, + ): + self.manifest = manifest + self.yaml_file = yaml_file + self._extract_directory = throwe + self.logInfo = functools.partial(self.log, logging.INFO, "vendor") + if "vendor-directory" not in self.manifest["vendoring"]: + self.manifest["vendoring"]["vendor-directory"] = os.path.dirname( + self.yaml_file + ) + + # ========================================================== + # If we're only patching; do that + if "patches" in self.manifest["vendoring"] and patch_mode == "only": + self.import_local_patches( + self.manifest["vendoring"]["patches"], + os.path.dirname(self.yaml_file), + self.manifest["vendoring"]["vendor-directory"], + ) + return + + # ========================================================== + self.source_host = self.get_source_host() + + ref_type = self.manifest["vendoring"].get("tracking", "commit") + flavor = self.manifest["vendoring"].get("flavor", "regular") + # Individiual files are special + + if revision == "tip": + # This case allows us to force-update a tag-tracking library to master + new_revision, timestamp = self.source_host.upstream_commit("HEAD") + elif ref_type == "tag": + new_revision, timestamp = self.source_host.upstream_tag(revision) + else: + new_revision, timestamp = self.source_host.upstream_commit(revision) + + self.logInfo( + {"ref_type": ref_type, "ref": new_revision, "timestamp": timestamp}, + "Latest {ref_type} is {ref} from {timestamp}", + ) + + # ========================================================== + if not force and self.manifest["origin"]["revision"] == new_revision: + # We're up to date, don't do anything + self.logInfo({}, "Latest upstream matches in-tree.") + return + elif flavor != "individual-file" and check_for_update: + # Only print the new revision to stdout + print("%s %s" % (new_revision, timestamp)) + return + + # ========================================================== + if flavor == "regular": + self.process_regular( + new_revision, timestamp, ignore_modified, add_to_exports + ) + elif flavor == "individual-files": + self.process_individual(new_revision, timestamp, ignore_modified) + elif flavor == "rust": + self.process_rust( + command_context, + self.manifest["origin"]["revision"], + new_revision, + timestamp, + ignore_modified, + ) + else: + raise Exception("Unknown flavor") + + def process_rust( + self, command_context, old_revision, new_revision, timestamp, ignore_modified + ): + # First update the Cargo.toml + cargo_file = os.path.join(os.path.dirname(self.yaml_file), "Cargo.toml") + try: + _replace_in_file(cargo_file, old_revision, new_revision) + except Exception: + # If we can't find it the first time, try again with a short hash + _replace_in_file(cargo_file, old_revision[:8], new_revision) + + # Then call ./mach vendor rust + from mozbuild.vendor.vendor_rust import VendorRust + + vendor_command = command_context._spawn(VendorRust) + vendor_command.vendor( + ignore_modified=True, build_peers_said_large_imports_were_ok=False + ) + + self.update_yaml(new_revision, timestamp) + + def process_individual(self, new_revision, timestamp, ignore_modified): + # This design is used because there is no github API to query + # for the last commit that modified a file; nor a way to get file + # blame. So really all we can do is just download and replace the + # files and see if they changed... + + def download_and_write_file(url, destination): + self.logInfo( + {"local_file": destination, "url": url}, + "Downloading {local_file} from {url}...", + ) + + with mozfile.NamedTemporaryFile() as tmpfile: + try: + req = requests.get(url, stream=True) + for data in req.iter_content(4096): + tmpfile.write(data) + tmpfile.seek(0) + + shutil.copy2(tmpfile.name, destination) + except Exception as e: + raise (e) + + # Only one of these loops will have content, so just do them both + for f in self.manifest["vendoring"].get("individual-files", []): + url = self.source_host.upstream_path_to_file(new_revision, f["upstream"]) + destination = self.get_full_path(f["destination"]) + download_and_write_file(url, destination) + + for f in self.manifest["vendoring"].get("individual-files-list", []): + url = self.source_host.upstream_path_to_file( + new_revision, + self.manifest["vendoring"]["individual-files-default-upstream"] + f, + ) + destination = self.get_full_path( + self.manifest["vendoring"]["individual-files-default-destination"] + f + ) + download_and_write_file(url, destination) + + self.spurious_check(new_revision, ignore_modified) + + self.logInfo({}, "Checking for update actions") + self.update_files(new_revision) + + self.update_yaml(new_revision, timestamp) + + self.logInfo({"rev": new_revision}, "Updated to '{rev}'.") + + if "patches" in self.manifest["vendoring"]: + # Remind the user + self.log( + logging.CRITICAL, + "vendor", + {}, + "Patches present in manifest!!! Please run " + "'./mach vendor --patch-mode only' after commiting changes.", + ) + + def process_regular(self, new_revision, timestamp, ignore_modified, add_to_exports): + + if self.should_perform_step("fetch"): + self.fetch_and_unpack(new_revision) + else: + self.logInfo({}, "Skipping fetching upstream source.") + + self.logInfo({}, "Checking for update actions") + self.update_files(new_revision) + + if self.should_perform_step("hg-add"): + self.logInfo({}, "Registering changes with version control.") + self.repository.add_remove_files( + self.manifest["vendoring"]["vendor-directory"], + os.path.dirname(self.yaml_file), + ) + else: + self.logInfo({}, "Skipping registering changes.") + + if self.should_perform_step("spurious-check"): + self.logInfo({}, "Checking for a spurious update.") + self.spurious_check(new_revision, ignore_modified) + else: + self.logInfo({}, "Skipping the spurious update check.") + + if self.should_perform_step("update-moz-yaml"): + self.logInfo({}, "Updating moz.yaml.") + self.update_yaml(new_revision, timestamp) + else: + self.logInfo({}, "Skipping updating the moz.yaml file.") + + if self.should_perform_step("update-moz-build"): + self.logInfo({}, "Updating moz.build files") + self.update_moz_build( + self.manifest["vendoring"]["vendor-directory"], + os.path.dirname(self.yaml_file), + add_to_exports, + ) + else: + self.logInfo({}, "Skipping update of moz.build files") + + self.logInfo({"rev": new_revision}, "Updated to '{rev}'.") + + if "patches" in self.manifest["vendoring"]: + # Remind the user + self.log( + logging.CRITICAL, + "vendor", + {}, + "Patches present in manifest!!! Please run " + "'./mach vendor --patch-mode only' after commiting changes.", + ) + + def get_source_host(self): + if self.manifest["vendoring"]["source-hosting"] == "gitlab": + from mozbuild.vendor.host_gitlab import GitLabHost + + return GitLabHost(self.manifest) + elif self.manifest["vendoring"]["source-hosting"] == "github": + from mozbuild.vendor.host_github import GitHubHost + + return GitHubHost(self.manifest) + elif self.manifest["vendoring"]["source-hosting"] == "googlesource": + from mozbuild.vendor.host_googlesource import GoogleSourceHost + + return GoogleSourceHost(self.manifest) + elif self.manifest["vendoring"]["source-hosting"] == "angle": + from mozbuild.vendor.host_angle import AngleHost + + return AngleHost(self.manifest) + elif self.manifest["vendoring"]["source-hosting"] == "codeberg": + from mozbuild.vendor.host_codeberg import CodebergHost + + return CodebergHost(self.manifest) + else: + raise Exception( + "Unknown source host: " + self.manifest["vendoring"]["source-hosting"] + ) + + def get_full_path(self, path, support_cwd=False): + if support_cwd and path[0:5] == "{cwd}": + path = path.replace("{cwd}", ".") + elif "{tmpextractdir}" in path: + # _extract_directory() will throw an exception if it is invalid to use it + path = path.replace("{tmpextractdir}", self._extract_directory()) + elif "{yaml_dir}" in path: + path = path.replace("{yaml_dir}", os.path.dirname(self.yaml_file)) + elif "{vendor_dir}" in path: + path = path.replace( + "{vendor_dir}", self.manifest["vendoring"]["vendor-directory"] + ) + else: + path = mozpath.join(self.manifest["vendoring"]["vendor-directory"], path) + return os.path.abspath(path) + + def convert_patterns_to_paths(self, directory, patterns): + # glob.iglob uses shell-style wildcards for path name completion. + # "recursive=True" enables the double asterisk "**" wildcard which matches + # for nested directories as well as the directory we're searching in. + paths = [] + for pattern in patterns: + pattern_full_path = mozpath.join(directory, pattern) + # If pattern is a directory recursively add contents of directory + if os.path.isdir(pattern_full_path): + # Append double asterisk to the end to make glob.iglob recursively match + # contents of directory + paths.extend( + glob.iglob(mozpath.join(pattern_full_path, "**"), recursive=True) + ) + # Otherwise pattern is a file or wildcard expression so add it without altering it + else: + paths.extend(glob.iglob(pattern_full_path, recursive=True)) + # Remove folder names from list of paths in order to avoid prematurely + # truncating directories elsewhere + # Sort the final list to ensure we preserve 01_, 02_ ordering for e.g. *.patch globs + final_paths = sorted( + [mozpath.normsep(path) for path in paths if not os.path.isdir(path)] + ) + return final_paths + + def fetch_and_unpack(self, revision): + """Fetch and unpack upstream source""" + + def validate_tar_member(member, path): + def is_within_directory(directory, target): + real_directory = os.path.realpath(directory) + real_target = os.path.realpath(target) + prefix = os.path.commonprefix([real_directory, real_target]) + return prefix == real_directory + + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted path traversal in tar file: " + member.name) + if member.issym(): + link_path = os.path.join(os.path.dirname(member_path), member.linkname) + if not is_within_directory(path, link_path): + raise Exception( + "Attempted link path traversal in tar file: " + member.name + ) + if member.mode & (stat.S_ISUID | stat.S_ISGID): + raise Exception( + "Attempted setuid or setgid in tar file: " + member.name + ) + + def safe_extract(tar, path=".", *, numeric_owner=False): + def _files(tar, path): + for member in tar: + validate_tar_member(member, path) + yield member + + tar.extractall(path, members=_files(tar, path), numeric_owner=numeric_owner) + + url = self.source_host.upstream_snapshot(revision) + self.logInfo({"url": url}, "Fetching code archive from {url}") + + with mozfile.NamedTemporaryFile() as tmptarfile: + tmpextractdir = tempfile.TemporaryDirectory() + try: + req = requests.get(url, stream=True) + for data in req.iter_content(4096): + tmptarfile.write(data) + tmptarfile.seek(0) + + vendor_dir = mozpath.normsep( + self.manifest["vendoring"]["vendor-directory"] + ) + if self.should_perform_step("keep"): + self.logInfo({}, "Retaining wanted in-tree files.") + to_keep = self.convert_patterns_to_paths( + vendor_dir, + self.manifest["vendoring"].get("keep", []) + + DEFAULT_KEEP_FILES + + self.manifest["vendoring"].get("patches", []), + ) + else: + self.logInfo({}, "Skipping retention of in-tree files.") + to_keep = [] + + self.logInfo({"vd": vendor_dir}, "Cleaning {vd} to import changes.") + # We use double asterisk wildcard here to get complete list of recursive contents + for file in self.convert_patterns_to_paths(vendor_dir, ["**"]): + file = mozpath.normsep(file) + if file not in to_keep: + mozfile.remove(file) + + self.logInfo({"vd": vendor_dir}, "Unpacking upstream files for {vd}.") + with tarfile.open(tmptarfile.name) as tar: + + safe_extract(tar, tmpextractdir.name) + + def get_first_dir(p): + halves = os.path.split(p) + return get_first_dir(halves[0]) if halves[0] else halves[1] + + one_prefix = get_first_dir(tar.getnames()[0]) + has_prefix = all( + map(lambda name: name.startswith(one_prefix), tar.getnames()) + ) + + # GitLab puts everything down a directory; move it up. + if has_prefix: + tardir = mozpath.join(tmpextractdir.name, one_prefix) + mozfile.copy_contents(tardir, tmpextractdir.name) + mozfile.remove(tardir) + + if self.should_perform_step("include"): + self.logInfo({}, "Retaining wanted files from upstream changes.") + to_include = self.convert_patterns_to_paths( + tmpextractdir.name, + self.manifest["vendoring"].get("include", []) + + DEFAULT_INCLUDE_FILES, + ) + else: + self.logInfo({}, "Skipping retention of included files.") + to_include = [] + + if self.should_perform_step("exclude"): + self.logInfo({}, "Removing excluded files from upstream changes.") + to_exclude = self.convert_patterns_to_paths( + tmpextractdir.name, + self.manifest["vendoring"].get("exclude", []) + + DEFAULT_EXCLUDE_FILES, + ) + else: + self.logInfo({}, "Skipping removing excluded files.") + to_exclude = [] + + to_exclude = list(set(to_exclude) - set(to_include)) + if to_exclude: + self.logInfo( + {"files": list_of_paths_to_readable_string(to_exclude)}, + "Removing: {files}", + ) + for exclusion in to_exclude: + mozfile.remove(exclusion) + + # Clear out empty directories + # removeEmpty() won't remove directories containing only empty directories + # so just keep callign it as long as it's doing something + def removeEmpty(tmpextractdir): + removed = False + folders = list(os.walk(tmpextractdir))[1:] + for folder in folders: + if not folder[2]: + try: + os.rmdir(folder[0]) + removed = True + except Exception: + pass + return removed + + while removeEmpty(tmpextractdir.name): + pass + + # Then copy over the directories + if self.should_perform_step("move-contents"): + self.logInfo({"d": vendor_dir}, "Copying to {d}.") + mozfile.copy_contents(tmpextractdir.name, vendor_dir) + else: + self.logInfo({}, "Skipping copying contents into tree.") + self._extract_directory = lambda: tmpextractdir.name + except Exception as e: + tmpextractdir.cleanup() + raise e + + def update_yaml(self, revision, timestamp): + with open(self.yaml_file) as f: + yaml = f.readlines() + + replaced = 0 + replacements = [ + [" release:", " %s (%s)." % (revision, timestamp)], + [" revision:", " %s" % (revision)], + ] + + for i in range(0, len(yaml)): + l = yaml[i] + + for r in replacements: + if r[0] in l: + print("Found " + l) + replaced += 1 + yaml[i] = re.sub(r[0] + " [v\.a-f0-9]+.*$", r[0] + r[1], yaml[i]) + + assert len(replacements) == replaced + + with open(self.yaml_file, "wb") as f: + f.write(("".join(yaml)).encode("utf-8")) + + def spurious_check(self, revision, ignore_modified): + changed_files = set( + [ + os.path.abspath(f) + for f in self.repository.get_changed_files(mode="staged") + ] + ) + generated_files = set( + [ + self.get_full_path(f) + for f in self.manifest["vendoring"].get("generated", []) + ] + ) + changed_files = set(changed_files) - generated_files + if not changed_files: + self.logInfo({"r": revision}, "Upstream {r} hasn't modified files locally.") + # We almost certainly won't be here if ignore_modified was passed, because a modified + # local file will show up as a changed_file, but we'll be safe anyway. + if not ignore_modified and generated_files: + for g in generated_files: + self.repository.clean_directory(g) + elif generated_files: + self.log( + logging.CRITICAL, + "vendor", + {"files": generated_files}, + "Because you passed --ignore-modified we are not cleaning your" + + " working directory, but the following files were probably" + + " spuriously edited and can be reverted: {files}", + ) + sys.exit(-2) + + self.logInfo( + {"rev": revision, "num": len(changed_files)}, + "Version '{rev}' has changed {num} files.", + ) + + def update_files(self, revision): + if "update-actions" not in self.manifest["vendoring"]: + return + + for update in self.manifest["vendoring"]["update-actions"]: + if update["action"] == "copy-file": + src = self.get_full_path(update["from"]) + dst = self.get_full_path(update["to"]) + + self.logInfo( + {"s": src, "d": dst}, "action: copy-file src: {s} dst: {d}" + ) + + with open(src) as f: + contents = f.read() + with open(dst, "w") as f: + f.write(contents) + elif update["action"] == "move-file": + src = self.get_full_path(update["from"]) + dst = self.get_full_path(update["to"]) + + self.logInfo( + {"s": src, "d": dst}, "action: move-file src: {s} dst: {d}" + ) + + shutil.move(src, dst) + elif update["action"] == "move-dir": + src = self.get_full_path(update["from"]) + dst = self.get_full_path(update["to"]) + + self.logInfo( + {"src": src, "dst": dst}, "action: move-dir src: {src} dst: {dst}" + ) + + if not os.path.isdir(src): + raise Exception( + "Cannot move from a source directory %s that is not a directory" + % src + ) + os.makedirs(dst, exist_ok=True) + + def copy_tree(src, dst): + names = os.listdir(src) + os.makedirs(dst, exist_ok=True) + + for name in names: + srcname = os.path.join(src, name) + dstname = os.path.join(dst, name) + + if os.path.isdir(srcname): + copy_tree(srcname, dstname) + else: + shutil.copy2(srcname, dstname) + + copy_tree(src, dst) + shutil.rmtree(src) + + elif update["action"] in ["replace-in-file", "replace-in-file-regex"]: + file = self.get_full_path(update["file"]) + + self.logInfo({"file": file}, "action: replace-in-file file: {file}") + + replacement = update["with"].replace("{revision}", revision) + _replace_in_file( + file, + update["pattern"], + replacement, + regex=update["action"] == "replace-in-file-regex", + ) + elif update["action"] == "delete-path": + path = self.get_full_path(update["path"]) + self.logInfo({"path": path}, "action: delete-path path: {path}") + mozfile.remove(path) + elif update["action"] in ["run-script", "run-command"]: + if update["action"] == "run-script": + command = self.get_full_path(update["script"], support_cwd=True) + else: + command = update["command"] + + run_dir = self.get_full_path(update["cwd"], support_cwd=True) + + args = [] + for a in update.get("args", []): + if a == "{revision}": + args.append(revision) + elif any( + s in a + for s in [ + "{cwd}", + "{vendor_dir}", + "{yaml_dir}", + "{tmpextractdir}", + ] + ): + args.append(self.get_full_path(a, support_cwd=True)) + else: + args.append(a) + + self.logInfo( + { + "command": command, + "run_dir": run_dir, + "args": args, + "type": update["action"], + }, + "action: {type} command: {command} working dir: {run_dir} args: {args}", + ) + extra_env = ( + {"GECKO_PATH": os.getcwd()} + if "GECKO_PATH" not in os.environ + else {} + ) + # We also add a signal to scripts that they are running under mach vendor + extra_env["MACH_VENDOR"] = "1" + self.run_process( + args=[command] + args, + cwd=run_dir, + log_name=command, + require_unix_environment=True, + append_env=extra_env, + ) + else: + assert False, "Unknown action supplied (how did this pass validation?)" + + def update_moz_build(self, vendoring_dir, moz_yaml_dir, add_to_exports): + if vendoring_dir == moz_yaml_dir: + vendoring_dir = moz_yaml_dir = None + + # If you edit this (especially for header files) you should double check + # rewrite_mozbuild.py around 'assignment_type' + source_suffixes = [".cc", ".c", ".cpp", ".S", ".asm"] + header_suffixes = [".h", ".hpp"] + + files_removed = self.repository.get_changed_files(diff_filter="D") + files_added = self.repository.get_changed_files(diff_filter="A") + + # Filter the files added to just source files we track in moz.build files. + files_added = [ + f for f in files_added if any([f.endswith(s) for s in source_suffixes]) + ] + header_files_to_add = [ + f for f in files_added if any([f.endswith(s) for s in header_suffixes]) + ] + if add_to_exports: + files_added += header_files_to_add + elif header_files_to_add: + self.log( + logging.WARNING, + "header_files_warning", + {}, + ( + "We found %s header files in the update, pass --add-to-exports if you want" + + " to attempt to include them in EXPORTS blocks: %s" + ) + % (len(header_files_to_add), header_files_to_add), + ) + + self.logInfo( + {"added": len(files_added), "removed": len(files_removed)}, + "Found {added} files added and {removed} files removed.", + ) + + should_abort = False + for f in files_added: + try: + add_file_to_moz_build_file(f, moz_yaml_dir, vendoring_dir) + except MozBuildRewriteException: + self.log( + logging.ERROR, + "vendor", + {}, + "Could not add %s to the appropriate moz.build file" % f, + ) + should_abort = True + + for f in files_removed: + try: + remove_file_from_moz_build_file(f, moz_yaml_dir, vendoring_dir) + except MozBuildRewriteException: + self.log( + logging.ERROR, + "vendor", + {}, + "Could not remove %s from the appropriate moz.build file" % f, + ) + should_abort = True + + if should_abort: + self.log( + logging.ERROR, + "vendor", + {}, + "This is a deficiency in ./mach vendor . " + + "Please review the affected files before committing.", + ) + # Exit with -1 to distinguish this from the Exception case of exiting with 1 + sys.exit(-1) + + def import_local_patches(self, patches, yaml_dir, vendor_dir): + self.logInfo({}, "Importing local patches...") + for patch in self.convert_patterns_to_paths(yaml_dir, patches): + script = [ + "patch", + "-p1", + "--directory", + vendor_dir, + "--input", + os.path.abspath(patch), + "--no-backup-if-mismatch", + ] + self.run_process( + args=script, + log_name=script, + ) diff --git a/python/mozbuild/mozbuild/vendor/vendor_python.py b/python/mozbuild/mozbuild/vendor/vendor_python.py new file mode 100644 index 0000000000..db554e20d4 --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/vendor_python.py @@ -0,0 +1,228 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +import os +import shutil +import subprocess +import sys +from pathlib import Path + +import mozfile +from mozfile import TemporaryDirectory +from mozpack.files import FileFinder + +from mozbuild.base import MozbuildObject + +EXCLUDED_PACKAGES = { + # dlmanager's package on PyPI only has metadata, but is missing the code. + # https://github.com/parkouss/dlmanager/issues/1 + "dlmanager", + # gyp's package on PyPI doesn't have any downloadable files. + "gyp", + # We keep some wheels vendored in "_venv" for use in Mozharness + "_venv", + # We manage vendoring "vsdownload" with a moz.yaml file (there is no module + # on PyPI). + "vsdownload", + # The moz.build file isn't a vendored module, so don't delete it. + "moz.build", + "requirements.in", + # The ansicon package contains DLLs and we don't want to arbitrarily vendor + # them since they could be unsafe. This module should rarely be used in practice + # (it's a fallback for old versions of windows). We've intentionally vendored a + # modified 'dummy' version of it so that the dependency checks still succeed, but + # if it ever is attempted to be used, it will fail gracefully. + "ansicon", +} + + +class VendorPython(MozbuildObject): + def __init__(self, *args, **kwargs): + MozbuildObject.__init__(self, *args, virtualenv_name="vendor", **kwargs) + + def vendor(self, keep_extra_files=False): + from mach.python_lockfile import PoetryHandle + + self.populate_logger() + self.log_manager.enable_unstructured() + + vendor_dir = Path(self.topsrcdir) / "third_party" / "python" + requirements_in = vendor_dir / "requirements.in" + poetry_lockfile = vendor_dir / "poetry.lock" + _sort_requirements_in(requirements_in) + + with TemporaryDirectory() as work_dir: + work_dir = Path(work_dir) + poetry = PoetryHandle(work_dir) + poetry.add_requirements_in_file(requirements_in) + poetry.reuse_existing_lockfile(poetry_lockfile) + lockfiles = poetry.generate_lockfiles(do_update=False) + + # Vendoring packages is only viable if it's possible to have a single + # set of packages that work regardless of which environment they're used in. + # So, we scrub environment markers, so that we essentially ask pip to + # download "all dependencies for all environments". Pip will then either + # fetch them as requested, or intelligently raise an error if that's not + # possible (e.g.: if different versions of Python would result in different + # packages/package versions). + pip_lockfile_without_markers = work_dir / "requirements.no-markers.txt" + shutil.copy(str(lockfiles.pip_lockfile), str(pip_lockfile_without_markers)) + remove_environment_markers_from_requirements_txt( + pip_lockfile_without_markers + ) + + with TemporaryDirectory() as tmp: + # use requirements.txt to download archived source distributions of all + # packages + subprocess.check_call( + [ + sys.executable, + "-m", + "pip", + "download", + "-r", + str(pip_lockfile_without_markers), + "--no-deps", + "--dest", + tmp, + "--abi", + "none", + "--platform", + "any", + ] + ) + _purge_vendor_dir(vendor_dir) + self._extract(tmp, vendor_dir, keep_extra_files) + + requirements_out = vendor_dir / "requirements.txt" + + # since requirements.out and poetry.lockfile are both outputs from + # third party code, they may contain carriage returns on Windows. We + # should strip the carriage returns to maintain consistency in our output + # regardless of which platform is doing the vendoring. We can do this and + # the copying at the same time to minimize reads and writes. + _copy_file_strip_carriage_return(lockfiles.pip_lockfile, requirements_out) + _copy_file_strip_carriage_return(lockfiles.poetry_lockfile, poetry_lockfile) + self.repository.add_remove_files(vendor_dir) + + def _extract(self, src, dest, keep_extra_files=False): + """extract source distribution into vendor directory""" + + ignore = () + if not keep_extra_files: + ignore = ("*/doc", "*/docs", "*/test", "*/tests", "**/.git") + finder = FileFinder(src) + for archive, _ in finder.find("*"): + _, ext = os.path.splitext(archive) + archive_path = os.path.join(finder.base, archive) + if ext == ".whl": + # Archive is named like "$package-name-1.0-py2.py3-none-any.whl", and should + # have four dashes that aren't part of the package name. + package_name, version, spec, abi, platform_and_suffix = archive.rsplit( + "-", 4 + ) + + if package_name in EXCLUDED_PACKAGES: + print( + f"'{package_name}' is on the exclusion list and will not be vendored." + ) + continue + + target_package_dir = os.path.join(dest, package_name) + os.mkdir(target_package_dir) + + # Extract all the contents of the wheel into the package subdirectory. + # We're expecting at least a code directory and a ".dist-info" directory, + # though there may be a ".data" directory as well. + mozfile.extract(archive_path, target_package_dir, ignore=ignore) + _denormalize_symlinks(target_package_dir) + else: + # Archive is named like "$package-name-1.0.tar.gz", and the rightmost + # dash should separate the package name from the rest of the archive + # specifier. + package_name, archive_postfix = archive.rsplit("-", 1) + package_dir = os.path.join(dest, package_name) + + if package_name in EXCLUDED_PACKAGES: + print( + f"'{package_name}' is on the exclusion list and will not be vendored." + ) + continue + + # The archive should only contain one top-level directory, which has + # the source files. We extract this directory directly to + # the vendor directory. + extracted_files = mozfile.extract(archive_path, dest, ignore=ignore) + assert len(extracted_files) == 1 + extracted_package_dir = extracted_files[0] + + # The extracted package dir includes the version in the name, + # which we don't we don't want. + mozfile.move(extracted_package_dir, package_dir) + _denormalize_symlinks(package_dir) + + +def _sort_requirements_in(requirements_in: Path): + requirements = {} + with requirements_in.open(mode="r", newline="\n") as f: + comments = [] + for line in f.readlines(): + line = line.strip() + if not line or line.startswith("#"): + comments.append(line) + continue + name, version = line.split("==") + requirements[name] = version, comments + comments = [] + + with requirements_in.open(mode="w", newline="\n") as f: + for name, (version, comments) in sorted(requirements.items()): + if comments: + f.write("{}\n".format("\n".join(comments))) + f.write("{}=={}\n".format(name, version)) + + +def remove_environment_markers_from_requirements_txt(requirements_txt: Path): + with requirements_txt.open(mode="r", newline="\n") as f: + lines = f.readlines() + markerless_lines = [] + continuation_token = " \\" + for line in lines: + line = line.rstrip() + + if not line.startswith(" ") and not line.startswith("#") and ";" in line: + has_continuation_token = line.endswith(continuation_token) + # The first line of each requirement looks something like: + # package-name==X.Y; python_version>=3.7 + # We can scrub the environment marker by splitting on the semicolon + line = line.split(";")[0] + if has_continuation_token: + line += continuation_token + markerless_lines.append(line) + else: + markerless_lines.append(line) + + with requirements_txt.open(mode="w", newline="\n") as f: + f.write("\n".join(markerless_lines)) + + +def _purge_vendor_dir(vendor_dir): + for child in Path(vendor_dir).iterdir(): + if child.name not in EXCLUDED_PACKAGES: + mozfile.remove(str(child)) + + +def _denormalize_symlinks(target): + # If any files inside the vendored package were symlinks, turn them into normal files + # because hg.mozilla.org forbids symlinks in the repository. + link_finder = FileFinder(target) + for _, f in link_finder.find("**"): + if os.path.islink(f.path): + link_target = os.path.realpath(f.path) + os.unlink(f.path) + shutil.copyfile(link_target, f.path) + + +def _copy_file_strip_carriage_return(file_src: Path, file_dst): + shutil.copyfileobj(file_src.open(mode="r"), file_dst.open(mode="w", newline="\n")) diff --git a/python/mozbuild/mozbuild/vendor/vendor_rust.py b/python/mozbuild/mozbuild/vendor/vendor_rust.py new file mode 100644 index 0000000000..f87d2efde8 --- /dev/null +++ b/python/mozbuild/mozbuild/vendor/vendor_rust.py @@ -0,0 +1,961 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, # You can obtain one at http://mozilla.org/MPL/2.0/. + +import errno +import hashlib +import json +import logging +import os +import re +import subprocess +import typing +from collections import defaultdict +from itertools import dropwhile +from pathlib import Path + +import mozpack.path as mozpath +import toml +from looseversion import LooseVersion +from mozboot.util import MINIMUM_RUST_VERSION + +from mozbuild.base import BuildEnvironmentNotFoundException, MozbuildObject + +if typing.TYPE_CHECKING: + import datetime + +# Type of a TOML value. +TomlItem = typing.Union[ + str, + typing.List["TomlItem"], + typing.Dict[str, "TomlItem"], + bool, + int, + float, + "datetime.datetime", + "datetime.date", + "datetime.time", +] + + +CARGO_CONFIG_TEMPLATE = """\ +# This file contains vendoring instructions for cargo. +# It was generated by `mach vendor rust`. +# Please do not edit. + +{config} + +# Take advantage of the fact that cargo will treat lines starting with # +# as comments to add preprocessing directives. This file can thus by copied +# as-is to $topsrcdir/.cargo/config with no preprocessing to be used there +# (for e.g. independent tasks building rust code), or be preprocessed by +# the build system to produce a .cargo/config with the right content. +#define REPLACE_NAME {replace_name} +#define VENDORED_DIRECTORY {directory} +# We explicitly exclude the following section when preprocessing because +# it would overlap with the preprocessed [source."@REPLACE_NAME@"], and +# cargo would fail. +#ifndef REPLACE_NAME +[source.{replace_name}] +directory = "{directory}" +#endif + +# Thankfully, @REPLACE_NAME@ is unlikely to be a legitimate source, so +# cargo will ignore it when it's here verbatim. +#filter substitution +[source."@REPLACE_NAME@"] +directory = "@top_srcdir@/@VENDORED_DIRECTORY@" +""" + + +CARGO_LOCK_NOTICE = """ +NOTE: `cargo vendor` may have made changes to your Cargo.lock. To restore your +Cargo.lock to the HEAD version, run `git checkout -- Cargo.lock` or +`hg revert Cargo.lock`. +""" + + +WINDOWS_UNDESIRABLE_REASON = """\ +The windows and windows-sys crates and their dependencies are too big to \ +vendor, and is a risk of version duplication due to its current update \ +cadence. Until this is worked out with upstream, we prefer to avoid them.\ +""" + +PACKAGES_WE_DONT_WANT = { + "windows-sys": WINDOWS_UNDESIRABLE_REASON, + "windows": WINDOWS_UNDESIRABLE_REASON, + "windows_aarch64_msvc": WINDOWS_UNDESIRABLE_REASON, + "windows_i686_gnu": WINDOWS_UNDESIRABLE_REASON, + "windows_i686_msvc": WINDOWS_UNDESIRABLE_REASON, + "windows_x86_64_gnu": WINDOWS_UNDESIRABLE_REASON, + "windows_x86_64_msvc": WINDOWS_UNDESIRABLE_REASON, +} + +PACKAGES_WE_ALWAYS_WANT_AN_OVERRIDE_OF = [ + "autocfg", + "cmake", + "vcpkg", +] + + +# Historically duplicated crates. Eventually we want this list to be empty. +# If you do need to make changes increasing the number of duplicates, please +# add a comment as to why. +TOLERATED_DUPES = { + "mio": 2, + # Transition from time 0.1 to 0.3 underway, but chrono is stuck on 0.1 + # and hasn't been updated in 1.5 years (an hypothetical update is + # expected to remove the dependency on time altogether). + "time": 2, +} + + +class VendorRust(MozbuildObject): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._issues = [] + + def serialize_issues_json(self): + return json.dumps( + { + "Cargo.lock": [ + { + "path": "Cargo.lock", + "column": None, + "line": None, + "level": "error" if level == logging.ERROR else "warning", + "message": msg, + } + for (level, msg) in self._issues + ] + } + ) + + def log(self, level, action, params, format_str): + if level >= logging.WARNING: + self._issues.append((level, format_str.format(**params))) + super().log(level, action, params, format_str) + + def get_cargo_path(self): + try: + return self.substs["CARGO"] + except (BuildEnvironmentNotFoundException, KeyError): + if "MOZ_AUTOMATION" in os.environ: + cargo = os.path.join( + os.environ["MOZ_FETCHES_DIR"], "rustc", "bin", "cargo" + ) + assert os.path.exists(cargo) + return cargo + # Default if this tree isn't configured. + from mozfile import which + + cargo = which("cargo") + if not cargo: + raise OSError( + errno.ENOENT, + ( + "Could not find 'cargo' on your $PATH. " + "Hint: have you run `mach build` or `mach configure`?" + ), + ) + return cargo + + def check_cargo_version(self, cargo): + """ + Ensure that Cargo is new enough. + """ + out = ( + subprocess.check_output([cargo, "--version"]) + .splitlines()[0] + .decode("UTF-8") + ) + if not out.startswith("cargo"): + return False + version = LooseVersion(out.split()[1]) + # Cargo 1.68.0 changed vendoring in a way that creates a lot of noise + # if we go back and forth between vendoring with an older version and + # a newer version. Only allow the newer versions. + minimum_rust_version = MINIMUM_RUST_VERSION + if LooseVersion("1.68.0") >= MINIMUM_RUST_VERSION: + minimum_rust_version = "1.68.0" + if version < minimum_rust_version: + self.log( + logging.ERROR, + "cargo_version", + {}, + "Cargo >= {0} required (install Rust {0} or newer)".format( + minimum_rust_version + ), + ) + return False + self.log(logging.DEBUG, "cargo_version", {}, "cargo is new enough") + return True + + def has_modified_files(self): + """ + Ensure that there aren't any uncommitted changes to files + in the working copy, since we're going to change some state + on the user. Allow changes to Cargo.{toml,lock} since that's + likely to be a common use case. + """ + modified = [ + f + for f in self.repository.get_changed_files("M") + if os.path.basename(f) not in ("Cargo.toml", "Cargo.lock") + and not f.startswith("supply-chain/") + ] + if modified: + self.log( + logging.ERROR, + "modified_files", + {}, + """You have uncommitted changes to the following files: + +{files} + +Please commit or stash these changes before vendoring, or re-run with `--ignore-modified`. +""".format( + files="\n".join(sorted(modified)) + ), + ) + return modified + + def check_openssl(self): + """ + Set environment flags for building with openssl. + + MacOS doesn't include openssl, but the openssl-sys crate used by + mach-vendor expects one of the system. It's common to have one + installed in /usr/local/opt/openssl by homebrew, but custom link + flags are necessary to build against it. + """ + + test_paths = ["/usr/include", "/usr/local/include"] + if any( + [os.path.exists(os.path.join(path, "openssl/ssl.h")) for path in test_paths] + ): + # Assume we can use one of these system headers. + return None + + if os.path.exists("/usr/local/opt/openssl/include/openssl/ssl.h"): + # Found a likely homebrew install. + self.log( + logging.INFO, "openssl", {}, "Using OpenSSL in /usr/local/opt/openssl" + ) + return { + "OPENSSL_INCLUDE_DIR": "/usr/local/opt/openssl/include", + "OPENSSL_LIB_DIR": "/usr/local/opt/openssl/lib", + } + + self.log(logging.ERROR, "openssl", {}, "OpenSSL not found!") + return None + + def _ensure_cargo(self): + """ + Ensures all the necessary cargo bits are installed. + + Returns the path to cargo if successful, None otherwise. + """ + cargo = self.get_cargo_path() + if not self.check_cargo_version(cargo): + return None + return cargo + + # A whitelist of acceptable license identifiers for the + # packages.license field from https://spdx.org/licenses/. Cargo + # documentation claims that values are checked against the above + # list and that multiple entries can be separated by '/'. We + # choose to list all combinations instead for the sake of + # completeness and because some entries below obviously do not + # conform to the format prescribed in the documentation. + # + # It is insufficient to have additions to this whitelist reviewed + # solely by a build peer; any additions must be checked by somebody + # competent to review licensing minutiae. + + # Licenses for code used at runtime. Please see the above comment before + # adding anything to this list. + RUNTIME_LICENSE_WHITELIST = [ + "Apache-2.0", + "Apache-2.0 WITH LLVM-exception", + # BSD-2-Clause and BSD-3-Clause are ok, but packages using them + # must be added to the appropriate section of about:licenses. + # To encourage people to remember to do that, we do not whitelist + # the licenses themselves, and we require the packages to be added + # to RUNTIME_LICENSE_PACKAGE_WHITELIST below. + "CC0-1.0", + "ISC", + "MIT", + "MPL-2.0", + "Unicode-DFS-2016", + "Unlicense", + "Zlib", + ] + + # Licenses for code used at build time (e.g. code generators). Please see the above + # comments before adding anything to this list. + BUILDTIME_LICENSE_WHITELIST = { + "BSD-3-Clause": [ + "bindgen", + "fuchsia-zircon", + "fuchsia-zircon-sys", + "fuchsia-cprng", + "glsl", + "instant", + ] + } + + # This whitelist should only be used for packages that use an acceptable + # license, but that also need to explicitly mentioned in about:license. + RUNTIME_LICENSE_PACKAGE_WHITELIST = { + "BSD-2-Clause": [ + "arrayref", + "cloudabi", + "Inflector", + "mach", + "qlog", + ], + "BSD-3-Clause": [], + } + + # ICU4X is distributed as individual crates that all share the same LICENSE + # that will need to be individually added to the allow list below. We'll + # define the SHA256 once here, to make the review process easier as new + # ICU4X crates are vendored into the tree. + ICU4X_LICENSE_SHA256 = ( + "02420cc1b4c26d9a3318d60fd57048d015831249a5b776a1ada75cd227e78630" + ) + + # This whitelist should only be used for packages that use a + # license-file and for which the license-file entry has been + # reviewed. The table is keyed by package names and maps to the + # sha256 hash of the license file that we reviewed. + # + # As above, it is insufficient to have additions to this whitelist + # reviewed solely by a build peer; any additions must be checked by + # somebody competent to review licensing minutiae. + RUNTIME_LICENSE_FILE_PACKAGE_WHITELIST = { + # MIT + "deque": "6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb", + # we're whitelisting this fuchsia crate because it doesn't get built in the final + # product but has a license-file that needs ignoring + "fuchsia-cprng": "03b114f53e6587a398931762ee11e2395bfdba252a329940e2c8c9e81813845b", + # Old ICU4X crates for ICU4X 1.0, see comment above. + "yoke-derive": ICU4X_LICENSE_SHA256, + "zerofrom-derive": ICU4X_LICENSE_SHA256, + } + + @staticmethod + def runtime_license(package, license_string): + """Cargo docs say: + --- + https://doc.rust-lang.org/cargo/reference/manifest.html + + This is an SPDX 2.1 license expression for this package. Currently + crates.io will validate the license provided against a whitelist of + known license and exception identifiers from the SPDX license list + 2.4. Parentheses are not currently supported. + + Multiple licenses can be separated with a `/`, although that usage + is deprecated. Instead, use a license expression with AND and OR + operators to get more explicit semantics. + --- + But I have no idea how you can meaningfully AND licenses, so + we will abort if that is detected. We'll handle `/` and OR as + equivalent and approve is any is in our approved list.""" + + # This specific AND combination has been reviewed for encoding_rs. + if ( + license_string == "(Apache-2.0 OR MIT) AND BSD-3-Clause" + and package == "encoding_rs" + ): + return True + + # This specific AND combination has been reviewed for unicode-ident. + if ( + license_string == "(MIT OR Apache-2.0) AND Unicode-DFS-2016" + and package == "unicode-ident" + ): + return True + + if re.search(r"\s+AND", license_string): + return False + + license_list = re.split(r"\s*/\s*|\s+OR\s+", license_string) + for license in license_list: + if license in VendorRust.RUNTIME_LICENSE_WHITELIST: + return True + if package in VendorRust.RUNTIME_LICENSE_PACKAGE_WHITELIST.get(license, []): + return True + return False + + def _check_licenses(self, vendor_dir: str) -> bool: + def verify_acceptable_license(package: str, license: str) -> bool: + self.log( + logging.DEBUG, "package_license", {}, "has license {}".format(license) + ) + + if not self.runtime_license(package, license): + if license not in self.BUILDTIME_LICENSE_WHITELIST: + self.log( + logging.ERROR, + "package_license_error", + {}, + """Package {} has a non-approved license: {}. + + Please request license review on the package's license. If the package's license + is approved, please add it to the whitelist of suitable licenses. + """.format( + package, license + ), + ) + return False + elif package not in self.BUILDTIME_LICENSE_WHITELIST[license]: + self.log( + logging.ERROR, + "package_license_error", + {}, + """Package {} has a license that is approved for build-time dependencies: + {} + but the package itself is not whitelisted as being a build-time only package. + + If your package is build-time only, please add it to the whitelist of build-time + only packages. Otherwise, you need to request license review on the package's license. + If the package's license is approved, please add it to the whitelist of suitable licenses. + """.format( + package, license + ), + ) + return False + return True + + def check_package(package_name: str) -> bool: + self.log( + logging.DEBUG, + "package_check", + {}, + "Checking license for {}".format(package_name), + ) + + toml_file = os.path.join(vendor_dir, package_name, "Cargo.toml") + with open(toml_file, encoding="utf-8") as fh: + toml_data = toml.load(fh) + + package_entry: typing.Dict[str, TomlItem] = toml_data["package"] + license = package_entry.get("license", None) + license_file = package_entry.get("license-file", None) + + if license is not None and type(license) is not str: + self.log( + logging.ERROR, + "package_invalid_license_format", + {}, + "package {} has an invalid `license` field (expected a string)".format( + package_name + ), + ) + return False + + if license_file is not None and type(license_file) is not str: + self.log( + logging.ERROR, + "package_invalid_license_format", + {}, + "package {} has an invalid `license-file` field (expected a string)".format( + package_name + ), + ) + return False + + # License information is optional for crates to provide, but + # we require it. + if not license and not license_file: + self.log( + logging.ERROR, + "package_no_license", + {}, + "package {} does not provide a license".format(package_name), + ) + return False + + # The Cargo.toml spec suggests that crates should either have + # `license` or `license-file`, but not both. We might as well + # be defensive about that, though. + if license and license_file: + self.log( + logging.ERROR, + "package_many_licenses", + {}, + "package {} provides too many licenses".format(package_name), + ) + return False + + if license: + return verify_acceptable_license(package_name, license) + + # otherwise, it's a custom license in a separate file + assert license_file is not None + self.log( + logging.DEBUG, + "package_license_file", + {}, + "package has license-file {}".format(license_file), + ) + + if package_name not in self.RUNTIME_LICENSE_FILE_PACKAGE_WHITELIST: + self.log( + logging.ERROR, + "package_license_file_unknown", + {}, + """Package {} has an unreviewed license file: {}. + +Please request review on the provided license; if approved, the package can be added +to the whitelist of packages whose licenses are suitable. +""".format( + package_name, license_file + ), + ) + return False + + approved_hash = self.RUNTIME_LICENSE_FILE_PACKAGE_WHITELIST[package_name] + + with open( + os.path.join(vendor_dir, package_name, license_file), "rb" + ) as license_buf: + current_hash = hashlib.sha256(license_buf.read()).hexdigest() + + if current_hash != approved_hash: + self.log( + logging.ERROR, + "package_license_file_mismatch", + {}, + """Package {} has changed its license file: {} (hash {}). + +Please request review on the provided license; if approved, please update the +license file's hash. +""".format( + package_name, license_file, current_hash + ), + ) + return False + return True + + # Force all of the packages to be checked for license information + # before reducing via `all`, so all license issues are found in a + # single `mach vendor rust` invocation. + results = [ + check_package(p) + for p in os.listdir(vendor_dir) + if os.path.isdir(os.path.join(vendor_dir, p)) + ] + return all(results) + + def _check_build_rust(self, cargo_lock): + ret = True + crates = {} + for path in Path(self.topsrcdir).glob("build/rust/**/Cargo.toml"): + with open(path) as fh: + cargo_toml = toml.load(fh) + path = path.relative_to(self.topsrcdir) + package = cargo_toml["package"] + key = (package["name"], package["version"]) + if key in crates: + self.log( + logging.ERROR, + "build_rust", + { + "path": crates[key], + "path2": path, + "crate": key[0], + "version": key[1], + }, + "{path} and {path2} both contain {crate} {version}", + ) + ret = False + crates[key] = path + + for package in cargo_lock["package"]: + key = (package["name"], package["version"]) + if key in crates and "source" not in package: + crates.pop(key) + + for ((name, version), path) in crates.items(): + self.log( + logging.ERROR, + "build_rust", + {"path": path, "crate": name, "version": version}, + "{crate} {version} has an override in {path} that is not used", + ) + ret = False + return ret + + def vendor( + self, ignore_modified=False, build_peers_said_large_imports_were_ok=False + ): + from mozbuild.mach_commands import cargo_vet + + self.populate_logger() + self.log_manager.enable_unstructured() + if not ignore_modified and self.has_modified_files(): + return False + + cargo = self._ensure_cargo() + if not cargo: + self.log(logging.ERROR, "cargo_not_found", {}, "Cargo was not found.") + return False + + relative_vendor_dir = "third_party/rust" + vendor_dir = mozpath.join(self.topsrcdir, relative_vendor_dir) + + # We use check_call instead of mozprocess to ensure errors are displayed. + # We do an |update -p| here to regenerate the Cargo.lock file with minimal + # changes. See bug 1324462 + res = subprocess.run([cargo, "update", "-p", "gkrust"], cwd=self.topsrcdir) + if res.returncode: + self.log(logging.ERROR, "cargo_update_failed", {}, "Cargo update failed.") + return False + + with open(os.path.join(self.topsrcdir, "Cargo.lock")) as fh: + cargo_lock = toml.load(fh) + failed = False + for package in cargo_lock.get("patch", {}).get("unused", []): + self.log( + logging.ERROR, + "unused_patch", + {"crate": package["name"]}, + """Unused patch in top-level Cargo.toml for {crate}.""", + ) + failed = True + + if not self._check_build_rust(cargo_lock): + failed = True + + grouped = defaultdict(list) + for package in cargo_lock["package"]: + if package["name"] in PACKAGES_WE_ALWAYS_WANT_AN_OVERRIDE_OF: + # When the in-tree version is used, there is `source` for + # it in Cargo.lock, which is what we expect. + if package.get("source"): + self.log( + logging.ERROR, + "non_overridden", + { + "crate": package["name"], + "version": package["version"], + "source": package["source"], + }, + "Crate {crate} v{version} must be overridden but isn't " + "and comes from {source}.", + ) + failed = True + elif package["name"] in PACKAGES_WE_DONT_WANT: + self.log( + logging.ERROR, + "undesirable", + { + "crate": package["name"], + "version": package["version"], + "reason": PACKAGES_WE_DONT_WANT[package["name"]], + }, + "Crate {crate} is not desirable: {reason}", + ) + failed = True + grouped[package["name"]].append(package) + + for name, packages in grouped.items(): + # Allow to have crates of the same name when one depends on the other. + num = len( + [ + p + for p in packages + if all(d.split()[0] != name for d in p.get("dependencies", [])) + ] + ) + expected = TOLERATED_DUPES.get(name, 1) + if num > expected: + self.log( + logging.ERROR, + "duplicate_crate", + { + "crate": name, + "num": num, + "expected": expected, + "file": Path(__file__).relative_to(self.topsrcdir), + }, + "There are {num} different versions of crate {crate} " + "(expected {expected}). Please avoid the extra duplication " + "or adjust TOLERATED_DUPES in {file} if not possible " + "(but we'd prefer the former).", + ) + failed = True + elif num < expected and num > 1: + self.log( + logging.ERROR, + "less_duplicate_crate", + { + "crate": name, + "num": num, + "expected": expected, + "file": Path(__file__).relative_to(self.topsrcdir), + }, + "There are {num} different versions of crate {crate} " + "(expected {expected}). Please adjust TOLERATED_DUPES in " + "{file} to reflect this improvement.", + ) + failed = True + elif num < expected and num > 0: + self.log( + logging.ERROR, + "less_duplicate_crate", + { + "crate": name, + "file": Path(__file__).relative_to(self.topsrcdir), + }, + "Crate {crate} is not duplicated anymore. " + "Please adjust TOLERATED_DUPES in {file} to reflect this improvement.", + ) + failed = True + elif name in TOLERATED_DUPES and expected <= 1: + self.log( + logging.ERROR, + "broken_allowed_dupes", + { + "crate": name, + "file": Path(__file__).relative_to(self.topsrcdir), + }, + "Crate {crate} is not duplicated. Remove it from " + "TOLERATED_DUPES in {file}.", + ) + failed = True + + for name in TOLERATED_DUPES: + if name not in grouped: + self.log( + logging.ERROR, + "outdated_allowed_dupes", + { + "crate": name, + "file": Path(__file__).relative_to(self.topsrcdir), + }, + "Crate {crate} is not in Cargo.lock anymore. Remove it from " + "TOLERATED_DUPES in {file}.", + ) + failed = True + + # Only emit warnings for cargo-vet for now. + env = os.environ.copy() + env["PATH"] = os.pathsep.join( + ( + str(Path(cargo).parent), + os.environ["PATH"], + ) + ) + flags = ["--output-format=json"] + if "MOZ_AUTOMATION" in os.environ: + flags.append("--locked") + flags.append("--frozen") + res = cargo_vet( + self, + flags, + stdout=subprocess.PIPE, + env=env, + ) + if res.returncode: + vet = json.loads(res.stdout) + logged_error = False + for failure in vet.get("failures", []): + failure["crate"] = failure.pop("name") + self.log( + logging.ERROR, + "cargo_vet_failed", + failure, + "Missing audit for {crate}:{version} (requires {missing_criteria})." + " Run `./mach cargo vet` for more information.", + ) + logged_error = True + # NOTE: This could log more information, but the violation JSON + # output isn't super stable yet, so it's probably simpler to tell + # the caller to run `./mach cargo vet` directly. + for key in vet.get("violations", {}).keys(): + self.log( + logging.ERROR, + "cargo_vet_failed", + {"key": key}, + "Violation conflict for {key}. Run `./mach cargo vet` for more information.", + ) + logged_error = True + if "error" in vet: + # NOTE: The error format produced by cargo-vet is from the + # `miette` crate, and can include a lot of metadata and context. + # If we want to show more details in the future, we can expand + # this rendering to also include things like source labels and + # related error metadata. + error = vet["error"] + self.log( + logging.ERROR, + "cargo_vet_failed", + error, + "Vet {severity}: {message}", + ) + if "help" in error: + self.log(logging.INFO, "cargo_vet_failed", error, " help: {help}") + for cause in error.get("causes", []): + self.log( + logging.INFO, + "cargo_vet_failed", + {"cause": cause}, + " cause: {cause}", + ) + for related in error.get("related", []): + self.log( + logging.INFO, + "cargo_vet_failed", + related, + " related {severity}: {message}", + ) + self.log( + logging.INFO, + "cargo_vet_failed", + {}, + "Run `./mach cargo vet` for more information.", + ) + logged_error = True + if not logged_error: + self.log( + logging.ERROR, + "cargo_vet_failed", + {}, + "Unknown vet error. Run `./mach cargo vet` for more information.", + ) + failed = True + + # If we failed when checking the crates list and/or running `cargo vet`, + # stop before invoking `cargo vendor`. + if failed: + return False + + res = subprocess.run( + [cargo, "vendor", vendor_dir], cwd=self.topsrcdir, stdout=subprocess.PIPE + ) + if res.returncode: + self.log(logging.ERROR, "cargo_vendor_failed", {}, "Cargo vendor failed.") + return False + output = res.stdout.decode("UTF-8") + + # Get the snippet of configuration that cargo vendor outputs, and + # update .cargo/config with it. + # XXX(bug 1576765): Hopefully do something better after + # https://github.com/rust-lang/cargo/issues/7280 is addressed. + config = "\n".join( + dropwhile(lambda l: not l.startswith("["), output.splitlines()) + ) + + # The config is toml; parse it as such. + config = toml.loads(config) + + # For each replace-with, extract their configuration and update the + # corresponding directory to be relative to topsrcdir. + replaces = { + v["replace-with"] for v in config["source"].values() if "replace-with" in v + } + + # We only really expect one replace-with + if len(replaces) != 1: + self.log( + logging.ERROR, + "vendor_failed", + {}, + """cargo vendor didn't output a unique replace-with. Found: %s.""" + % replaces, + ) + return False + + replace_name = replaces.pop() + replace = config["source"].pop(replace_name) + replace["directory"] = mozpath.relpath( + mozpath.normsep(os.path.normcase(replace["directory"])), + mozpath.normsep(os.path.normcase(self.topsrcdir)), + ) + + cargo_config = os.path.join(self.topsrcdir, ".cargo", "config.in") + with open(cargo_config, "w", encoding="utf-8", newline="\n") as fh: + fh.write( + CARGO_CONFIG_TEMPLATE.format( + config=toml.dumps(config), + replace_name=replace_name, + directory=replace["directory"], + ) + ) + + if not self._check_licenses(vendor_dir): + self.log( + logging.ERROR, + "license_check_failed", + {}, + """The changes from `mach vendor rust` will NOT be added to version control. + +{notice}""".format( + notice=CARGO_LOCK_NOTICE + ), + ) + self.repository.clean_directory(vendor_dir) + return False + + self.repository.add_remove_files(vendor_dir) + + # 100k is a reasonable upper bound on source file size. + FILESIZE_LIMIT = 100 * 1024 + large_files = set() + cumulative_added_size = 0 + for f in self.repository.get_changed_files("A"): + path = mozpath.join(self.topsrcdir, f) + size = os.stat(path).st_size + cumulative_added_size += size + if size > FILESIZE_LIMIT: + large_files.add(f) + + # Forcefully complain about large files being added, as history has + # shown that large-ish files typically are not needed. + if large_files and not build_peers_said_large_imports_were_ok: + self.log( + logging.ERROR, + "filesize_check", + {}, + """The following files exceed the filesize limit of {size}: + +{files} + +If you can't reduce the size of these files, talk to a build peer (on the #build +channel at https://chat.mozilla.org) about the particular large files you are +adding. + +The changes from `mach vendor rust` will NOT be added to version control. + +{notice}""".format( + files="\n".join(sorted(large_files)), + size=FILESIZE_LIMIT, + notice=CARGO_LOCK_NOTICE, + ), + ) + self.repository.forget_add_remove_files(vendor_dir) + self.repository.clean_directory(vendor_dir) + return False + + # Only warn for large imports, since we may just have large code + # drops from time to time (e.g. importing features into m-c). + SIZE_WARN_THRESHOLD = 5 * 1024 * 1024 + if cumulative_added_size >= SIZE_WARN_THRESHOLD: + self.log( + logging.WARN, + "filesize_check", + {}, + """Your changes add {size} bytes of added files. + +Please consider finding ways to reduce the size of the vendored packages. +For instance, check the vendored packages for unusually large test or +benchmark files that don't need to be published to crates.io and submit +a pull request upstream to ignore those files when publishing.""".format( + size=cumulative_added_size + ), + ) + return True |