summaryrefslogtreecommitdiffstats
path: root/python/mozbuild/mozbuild/vendor
diff options
context:
space:
mode:
Diffstat (limited to 'python/mozbuild/mozbuild/vendor')
-rw-r--r--python/mozbuild/mozbuild/vendor/__init__.py0
-rw-r--r--python/mozbuild/mozbuild/vendor/host_angle.py37
-rw-r--r--python/mozbuild/mozbuild/vendor/host_base.py77
-rw-r--r--python/mozbuild/mozbuild/vendor/host_codeberg.py28
-rw-r--r--python/mozbuild/mozbuild/vendor/host_github.py27
-rw-r--r--python/mozbuild/mozbuild/vendor/host_gitlab.py26
-rw-r--r--python/mozbuild/mozbuild/vendor/host_googlesource.py32
-rw-r--r--python/mozbuild/mozbuild/vendor/mach_commands.py232
-rw-r--r--python/mozbuild/mozbuild/vendor/moz.build8
-rw-r--r--python/mozbuild/mozbuild/vendor/moz_yaml.py770
-rw-r--r--python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py1286
-rwxr-xr-xpython/mozbuild/mozbuild/vendor/test_vendor_changes.sh65
-rw-r--r--python/mozbuild/mozbuild/vendor/vendor_manifest.py789
-rw-r--r--python/mozbuild/mozbuild/vendor/vendor_python.py228
-rw-r--r--python/mozbuild/mozbuild/vendor/vendor_rust.py961
15 files changed, 4566 insertions, 0 deletions
diff --git a/python/mozbuild/mozbuild/vendor/__init__.py b/python/mozbuild/mozbuild/vendor/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/__init__.py
diff --git a/python/mozbuild/mozbuild/vendor/host_angle.py b/python/mozbuild/mozbuild/vendor/host_angle.py
new file mode 100644
index 0000000000..9716c76a24
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/host_angle.py
@@ -0,0 +1,37 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import requests
+
+from mozbuild.vendor.host_base import BaseHost
+
+
+class AngleHost(BaseHost):
+ def upstream_commit(self, revision):
+ raise Exception("Should not be called")
+
+ def upstream_tag(self, revision):
+ data = requests.get("https://omahaproxy.appspot.com/all.json").json()
+
+ for row in data:
+ if row["os"] == "win64":
+ for version in row["versions"]:
+ if version["channel"] == "beta":
+ branch = "chromium/" + version["true_branch"]
+
+ if revision != "HEAD" and revision != branch:
+ raise Exception(
+ "Passing a --revision for Angle that is not HEAD "
+ + "or the true branch is not supported."
+ )
+
+ return (
+ branch,
+ version["current_reldate"],
+ )
+
+ raise Exception("Could not find win64 beta version in the JSON response")
+
+ def upstream_snapshot(self, revision):
+ raise Exception("Not supported for Angle")
diff --git a/python/mozbuild/mozbuild/vendor/host_base.py b/python/mozbuild/mozbuild/vendor/host_base.py
new file mode 100644
index 0000000000..2484d82e09
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/host_base.py
@@ -0,0 +1,77 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import os
+import subprocess
+import tempfile
+import urllib
+
+
+class BaseHost:
+ def __init__(self, manifest):
+ self.manifest = manifest
+ self.repo_url = urllib.parse.urlparse(self.manifest["vendoring"]["url"])
+
+ def upstream_tag(self, revision):
+ """Temporarily clone the repo to get the latest tag and timestamp"""
+ with tempfile.TemporaryDirectory() as temp_repo_clone:
+ starting_directory = os.getcwd()
+ os.chdir(temp_repo_clone)
+ subprocess.run(
+ [
+ "git",
+ "clone",
+ "-c",
+ "core.autocrlf=input",
+ self.manifest["vendoring"]["url"],
+ self.manifest["origin"]["name"],
+ ],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ universal_newlines=True,
+ check=True,
+ )
+ os.chdir("/".join([temp_repo_clone, self.manifest["origin"]["name"]]))
+ if revision == "HEAD":
+ tag = subprocess.run(
+ ["git", "--no-pager", "tag", "--sort=creatordate"],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ universal_newlines=True,
+ check=True,
+ ).stdout.splitlines()[-1]
+ else:
+ try:
+ tag = subprocess.run(
+ ["git", "--no-pager", "tag", "-l", revision],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ universal_newlines=True,
+ check=True,
+ ).stdout.splitlines()[-1]
+ except IndexError: # 0 lines of output, the tag does not exist
+ raise Exception(f"Requested tag {revision} not found in source.")
+
+ tag_timestamp = subprocess.run(
+ [
+ "git",
+ "log",
+ "-1",
+ "--date=iso8601-strict",
+ "--format=%ad",
+ tag,
+ ],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ universal_newlines=True,
+ check=True,
+ ).stdout.splitlines()[-1]
+ os.chdir(starting_directory)
+ return tag, tag_timestamp
+
+ def upstream_snapshot(self, revision):
+ raise Exception("Unimplemented for this subclass...")
+
+ def upstream_path_to_file(self, revision, filepath):
+ raise Exception("Unimplemented for this subclass...")
diff --git a/python/mozbuild/mozbuild/vendor/host_codeberg.py b/python/mozbuild/mozbuild/vendor/host_codeberg.py
new file mode 100644
index 0000000000..158dd0472d
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/host_codeberg.py
@@ -0,0 +1,28 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import requests
+
+from mozbuild.vendor.host_base import BaseHost
+
+
+class CodebergHost(BaseHost):
+ def upstream_commit(self, revision):
+ """Query the codeberg api for a git commit id and timestamp."""
+ codeberg_api = (
+ self.repo_url.scheme + "://" + self.repo_url.netloc + "/api/v1/repos/"
+ )
+ codeberg_api += self.repo_url.path[1:]
+ codeberg_api += "/git/commits"
+ req = requests.get("/".join([codeberg_api, revision]))
+ req.raise_for_status()
+ info = req.json()
+ return (info["sha"], info["created"])
+
+ def upstream_snapshot(self, revision):
+ codeberg_api = (
+ self.repo_url.scheme + "://" + self.repo_url.netloc + "/api/v1/repos/"
+ )
+ codeberg_api += self.repo_url.path[1:]
+ return "/".join([codeberg_api, "archive", revision + ".tar.gz"])
diff --git a/python/mozbuild/mozbuild/vendor/host_github.py b/python/mozbuild/mozbuild/vendor/host_github.py
new file mode 100644
index 0000000000..eeaa4b9eaf
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/host_github.py
@@ -0,0 +1,27 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import requests
+
+from mozbuild.vendor.host_base import BaseHost
+
+
+class GitHubHost(BaseHost):
+ def upstream_commit(self, revision):
+ """Query the github api for a git commit id and timestamp."""
+ github_api = "https://api.github.com"
+ repo = self.repo_url.path[1:].strip("/")
+ req = requests.get("/".join([github_api, "repos", repo, "commits", revision]))
+ req.raise_for_status()
+ info = req.json()
+ return (info["sha"], info["commit"]["committer"]["date"])
+
+ def upstream_snapshot(self, revision):
+ return "/".join(
+ [self.manifest["vendoring"]["url"], "archive", revision + ".tar.gz"]
+ )
+
+ def upstream_path_to_file(self, revision, filepath):
+ repo = self.repo_url.path[1:]
+ return "/".join(["https://raw.githubusercontent.com", repo, revision, filepath])
diff --git a/python/mozbuild/mozbuild/vendor/host_gitlab.py b/python/mozbuild/mozbuild/vendor/host_gitlab.py
new file mode 100644
index 0000000000..8bfc3ddc79
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/host_gitlab.py
@@ -0,0 +1,26 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import requests
+
+from mozbuild.vendor.host_base import BaseHost
+
+
+class GitLabHost(BaseHost):
+ def upstream_commit(self, revision):
+ """Query the gitlab api for a git commit id and timestamp."""
+ gitlab_api = (
+ self.repo_url.scheme + "://" + self.repo_url.netloc + "/api/v4/projects/"
+ )
+ gitlab_api += self.repo_url.path[1:].replace("/", "%2F")
+ gitlab_api += "/repository/commits"
+ req = requests.get("/".join([gitlab_api, revision]))
+ req.raise_for_status()
+ info = req.json()
+ return (info["id"], info["committed_date"])
+
+ def upstream_snapshot(self, revision):
+ return "/".join(
+ [self.manifest["vendoring"]["url"], "-", "archive", revision + ".tar.gz"]
+ )
diff --git a/python/mozbuild/mozbuild/vendor/host_googlesource.py b/python/mozbuild/mozbuild/vendor/host_googlesource.py
new file mode 100644
index 0000000000..c903bd99b5
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/host_googlesource.py
@@ -0,0 +1,32 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import requests
+
+from mozbuild.vendor.host_base import BaseHost
+
+
+class GoogleSourceHost(BaseHost):
+ def upstream_commit(self, revision):
+ """Query for a git commit and timestamp."""
+ url = "/".join(
+ [self.manifest["vendoring"]["url"], "+", revision + "?format=JSON"]
+ )
+ req = requests.get(url)
+ req.raise_for_status()
+ try:
+ info = req.json()
+ except ValueError:
+ # As of 2017 May, googlesource sends 4 garbage characters
+ # at the beginning of the json response. Work around this.
+ # https://bugs.chromium.org/p/chromium/issues/detail?id=718550
+ import json
+
+ info = json.loads(req.text[4:])
+ return (info["commit"], info["committer"]["time"])
+
+ def upstream_snapshot(self, revision):
+ return "/".join(
+ [self.manifest["vendoring"]["url"], "+archive", revision + ".tar.gz"]
+ )
diff --git a/python/mozbuild/mozbuild/vendor/mach_commands.py b/python/mozbuild/mozbuild/vendor/mach_commands.py
new file mode 100644
index 0000000000..30fb0e16a5
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/mach_commands.py
@@ -0,0 +1,232 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import logging
+import sys
+
+from mach.decorators import Command, CommandArgument, SubCommand
+
+from mozbuild.vendor.moz_yaml import MozYamlVerifyError, load_moz_yaml
+
+
+# Fun quirk of ./mach - you can specify a default argument as well as subcommands.
+# If the default argument matches a subcommand, the subcommand gets called. If it
+# doesn't, we wind up in the default command.
+@Command(
+ "vendor",
+ category="misc",
+ description="Vendor third-party dependencies into the source repository.",
+)
+@CommandArgument(
+ "--check-for-update",
+ action="store_true",
+ help="For scripted use, prints the new commit to update to, or nothing if up to date.",
+ default=False,
+)
+@CommandArgument(
+ "--add-to-exports",
+ action="store_true",
+ help="Will attempt to add new header files into any relevant EXPORTS block.",
+ default=False,
+)
+@CommandArgument(
+ "--ignore-modified",
+ action="store_true",
+ help="Ignore modified files in current checkout.",
+ default=False,
+)
+@CommandArgument("-r", "--revision", help="Repository tag or commit to update to.")
+@CommandArgument(
+ "-f",
+ "--force",
+ action="store_true",
+ help="Force a re-vendor even if we're up to date",
+)
+@CommandArgument(
+ "--verify", "-v", action="store_true", help="(Only) verify the manifest."
+)
+@CommandArgument(
+ "--patch-mode",
+ help="Select how vendored patches will be imported. 'none' skips patch import, and"
+ "'only' imports patches and skips library vendoring.",
+ default="",
+)
+@CommandArgument("library", nargs=1, help="The moz.yaml file of the library to vendor.")
+def vendor(
+ command_context,
+ library,
+ revision,
+ ignore_modified=False,
+ check_for_update=False,
+ add_to_exports=False,
+ force=False,
+ verify=False,
+ patch_mode="",
+):
+ """
+ Vendor third-party dependencies into the source repository.
+
+ Vendoring rust and python can be done with ./mach vendor [rust/python].
+ Vendoring other libraries can be done with ./mach vendor [arguments] path/to/file.yaml
+ """
+ library = library[0]
+ assert library not in ["rust", "python"]
+
+ command_context.populate_logger()
+ command_context.log_manager.enable_unstructured()
+ if check_for_update:
+ logging.disable(level=logging.CRITICAL)
+
+ try:
+ manifest = load_moz_yaml(library)
+ if verify:
+ print("%s: OK" % library)
+ sys.exit(0)
+ except MozYamlVerifyError as e:
+ print(e)
+ sys.exit(1)
+
+ if "vendoring" not in manifest:
+ raise Exception(
+ "Cannot perform update actions if we don't have a 'vendoring' section in the moz.yaml"
+ )
+
+ if patch_mode and patch_mode not in ["none", "only"]:
+ print(
+ "Unknown patch mode given '%s'. Please use one of: 'none' or 'only'."
+ % patch_mode
+ )
+ sys.exit(1)
+ if (
+ manifest["vendoring"].get("patches", [])
+ and not patch_mode
+ and not check_for_update
+ ):
+ print(
+ "Patch mode was not given when required. Please use one of: 'none' or 'only'"
+ )
+ sys.exit(1)
+ if patch_mode == "only" and not manifest["vendoring"].get("patches", []):
+ print(
+ "Patch import was specified for %s but there are no vendored patches defined."
+ % library
+ )
+ sys.exit(1)
+
+ if not ignore_modified and not check_for_update:
+ check_modified_files(command_context)
+ elif ignore_modified and not check_for_update:
+ print(
+ "Because you passed --ignore-modified we will not be "
+ + "able to detect spurious upstream updates."
+ )
+
+ if not revision:
+ revision = "HEAD"
+
+ from mozbuild.vendor.vendor_manifest import VendorManifest
+
+ vendor_command = command_context._spawn(VendorManifest)
+ vendor_command.vendor(
+ command_context,
+ library,
+ manifest,
+ revision,
+ ignore_modified,
+ check_for_update,
+ force,
+ add_to_exports,
+ patch_mode,
+ )
+
+ sys.exit(0)
+
+
+def check_modified_files(command_context):
+ """
+ Ensure that there aren't any uncommitted changes to files
+ in the working copy, since we're going to change some state
+ on the user.
+ """
+ modified = command_context.repository.get_changed_files("M")
+ if modified:
+ command_context.log(
+ logging.ERROR,
+ "modified_files",
+ {},
+ """You have uncommitted changes to the following files:
+
+{files}
+
+Please commit or stash these changes before vendoring, or re-run with `--ignore-modified`.
+""".format(
+ files="\n".join(sorted(modified))
+ ),
+ )
+ sys.exit(1)
+
+
+# =====================================================================
+
+
+@SubCommand(
+ "vendor",
+ "rust",
+ description="Vendor rust crates from crates.io into third_party/rust",
+)
+@CommandArgument(
+ "--ignore-modified",
+ action="store_true",
+ help="Ignore modified files in current checkout",
+ default=False,
+)
+@CommandArgument(
+ "--build-peers-said-large-imports-were-ok",
+ action="store_true",
+ help=(
+ "Permit overly-large files to be added to the repository. "
+ "To get permission to set this, raise a question in the #build "
+ "channel at https://chat.mozilla.org."
+ ),
+ default=False,
+)
+@CommandArgument(
+ "--issues-json",
+ help="Path to a code-review issues.json file to write out",
+)
+def vendor_rust(command_context, **kwargs):
+ from mozbuild.vendor.vendor_rust import VendorRust
+
+ vendor_command = command_context._spawn(VendorRust)
+ issues_json = kwargs.pop("issues_json", None)
+ ok = vendor_command.vendor(**kwargs)
+ if issues_json:
+ with open(issues_json, "w") as fh:
+ fh.write(vendor_command.serialize_issues_json())
+ sys.exit(0 if ok else 1)
+
+
+# =====================================================================
+
+
+@SubCommand(
+ "vendor",
+ "python",
+ description="Vendor Python packages from pypi.org into third_party/python. "
+ "Some extra files like docs and tests will automatically be excluded."
+ "Installs the packages listed in third_party/python/requirements.in and "
+ "their dependencies.",
+ virtualenv_name="vendor",
+)
+@CommandArgument(
+ "--keep-extra-files",
+ action="store_true",
+ default=False,
+ help="Keep all files, including tests and documentation.",
+)
+def vendor_python(command_context, keep_extra_files):
+ from mozbuild.vendor.vendor_python import VendorPython
+
+ vendor_command = command_context._spawn(VendorPython)
+ vendor_command.vendor(keep_extra_files)
diff --git a/python/mozbuild/mozbuild/vendor/moz.build b/python/mozbuild/mozbuild/vendor/moz.build
new file mode 100644
index 0000000000..315dc32600
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/moz.build
@@ -0,0 +1,8 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+with Files("**"):
+ BUG_COMPONENT = ("Developer Infrastructure", "Mach Vendor & Updatebot")
diff --git a/python/mozbuild/mozbuild/vendor/moz_yaml.py b/python/mozbuild/mozbuild/vendor/moz_yaml.py
new file mode 100644
index 0000000000..51210e19b2
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/moz_yaml.py
@@ -0,0 +1,770 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Utility package for working with moz.yaml files.
+#
+# Requires `pyyaml` and `voluptuous`
+# (both are in-tree under third_party/python)
+
+import errno
+import os
+import re
+
+import voluptuous
+import yaml
+from voluptuous import (
+ All,
+ Boolean,
+ FqdnUrl,
+ In,
+ Invalid,
+ Length,
+ Match,
+ Msg,
+ Required,
+ Schema,
+ Unique,
+)
+from yaml.error import MarkedYAMLError
+
+# TODO ensure this matches the approved list of licenses
+VALID_LICENSES = [
+ # Standard Licenses (as per https://spdx.org/licenses/)
+ "Apache-2.0",
+ "BSD-2-Clause",
+ "BSD-3-Clause",
+ "BSD-3-Clause-Clear",
+ "BSL-1.0",
+ "CC0-1.0",
+ "ISC",
+ "ICU",
+ "LGPL-2.1",
+ "LGPL-3.0",
+ "MIT",
+ "MPL-1.1",
+ "MPL-2.0",
+ "Unlicense",
+ "WTFPL",
+ "Zlib",
+ # Unique Licenses
+ "ACE", # http://www.cs.wustl.edu/~schmidt/ACE-copying.html
+ "Anti-Grain-Geometry", # http://www.antigrain.com/license/index.html
+ "JPNIC", # https://www.nic.ad.jp/ja/idn/idnkit/download/index.html
+ "Khronos", # https://www.khronos.org/openmaxdl
+ "libpng", # http://www.libpng.org/pub/png/src/libpng-LICENSE.txt
+ "Unicode", # http://www.unicode.org/copyright.html
+]
+
+VALID_SOURCE_HOSTS = ["gitlab", "googlesource", "github", "angle", "codeberg"]
+
+"""
+---
+# Third-Party Library Template
+# All fields are mandatory unless otherwise noted
+
+# Version of this schema
+schema: 1
+
+bugzilla:
+ # Bugzilla product and component for this directory and subdirectories
+ product: product name
+ component: component name
+
+# Document the source of externally hosted code
+origin:
+
+ # Short name of the package/library
+ name: name of the package
+
+ description: short (one line) description
+
+ # Full URL for the package's homepage/etc
+ # Usually different from repository url
+ url: package's homepage url
+
+ # Human-readable identifier for this version/release
+ # Generally "version NNN", "tag SSS", "bookmark SSS"
+ release: identifier
+
+ # Revision to pull in
+ # Must be a long or short commit SHA (long preferred)
+ revision: sha
+
+ # The package's license, where possible using the mnemonic from
+ # https://spdx.org/licenses/
+ # Multiple licenses can be specified (as a YAML list)
+ # A "LICENSE" file must exist containing the full license text
+ license: MPL-2.0
+
+ # If the package's license is specified in a particular file,
+ # this is the name of the file.
+ # optional
+ license-file: COPYING
+
+ # If there are any mozilla-specific notes you want to put
+ # about a library, they can be put here.
+ notes: Notes about the library
+
+# Configuration for the automated vendoring system.
+# optional
+vendoring:
+
+ # Repository URL to vendor from
+ # eg. https://github.com/kinetiknz/nestegg
+ # Any repository host can be specified here, however initially we'll only
+ # support automated vendoring from selected sources.
+ url: source url (generally repository clone url)
+
+ # Type of hosting for the upstream repository
+ # Valid values are 'gitlab', 'github', googlesource
+ source-hosting: gitlab
+
+ # Type of Vendoring
+ # This is either 'regular', 'individual-files', or 'rust'
+ # If omitted, will default to 'regular'
+ flavor: rust
+
+ # Type of git reference (commit, tag) to track updates from.
+ # You cannot use tag tracking with the individual-files flavor
+ # If omitted, will default to tracking commits.
+ tracking: commit
+
+ # Base directory of the location where the source files will live in-tree.
+ # If omitted, will default to the location the moz.yaml file is in.
+ vendor-directory: third_party/directory
+
+ # Allows skipping certain steps of the vendoring process.
+ # Most useful if e.g. vendoring upstream is complicated and should be done by a script
+ # The valid steps that can be skipped are listed below
+ skip-vendoring-steps:
+ - fetch
+ - keep
+ - include
+ - exclude
+ - move-contents
+ - hg-add
+ - spurious-check
+ - update-moz-yaml
+ - update-moz-build
+
+ # List of patch files to apply after vendoring. Applied in the order
+ # specified, and alphabetically if globbing is used. Patches must apply
+ # cleanly before changes are pushed.
+ # Patch files should be relative to the vendor-directory rather than the gecko
+ # root directory.
+ # All patch files are implicitly added to the keep file list.
+ # optional
+ patches:
+ - file
+ - path/to/file
+ - path/*.patch
+ - path/** # Captures all files and subdirectories below path
+ - path/* # Captures all files but _not_ subdirectories below path. Equivalent to `path/`
+
+ # List of files that are not removed from the destination directory while vendoring
+ # in a new version of the library. Intended for mozilla files not present in upstream.
+ # Implicitly contains "moz.yaml", "moz.build", and any files referenced in
+ # "patches"
+ # optional
+ keep:
+ - file
+ - path/to/file
+ - another/path
+ - *.mozilla
+
+ # Files/paths that will not be vendored from the upstream repository
+ # Implicitly contains ".git", and ".gitignore"
+ # optional
+ exclude:
+ - file
+ - path/to/file
+ - another/path
+ - docs
+ - src/*.test
+
+ # Files/paths that will always be vendored from source repository, even if
+ # they would otherwise be excluded by "exclude".
+ # optional
+ include:
+ - file
+ - path/to/file
+ - another/path
+ - docs/LICENSE.*
+
+ # Files that are modified as part of the update process.
+ # To avoid creating updates that don't update anything, ./mach vendor will detect
+ # if any in-tree files have changed. If there are files that are always changed
+ # during an update process (e.g. version numbers or source revisions), list them
+ # here to avoid having them counted as substative changes.
+ # This field does NOT support directories or globbing
+ # optional
+ generated:
+ - '{yaml_dir}/vcs_version.h'
+
+ # If neither "exclude" or "include" are set, all files will be vendored
+ # Files/paths in "include" will always be vendored, even if excluded
+ # eg. excluding "docs/" then including "docs/LICENSE" will vendor just the
+ # LICENSE file from the docs directory
+
+ # All three file/path parameters ("keep", "exclude", and "include") support
+ # filenames, directory names, and globs/wildcards.
+
+ # Actions to take after updating. Applied in order.
+ # The action subfield is required. It must be one of:
+ # - copy-file
+ # - move-file
+ # - move-dir
+ # - replace-in-file
+ # - replace-in-file-regex
+ # - delete-path
+ # - run-script
+ # Unless otherwise noted, all subfields of action are required.
+ #
+ # If the action is copy-file, move-file, or move-dir:
+ # from is the source file
+ # to is the destination
+ #
+ # If the action is replace-in-file or replace-in-file-regex:
+ # pattern is what in the file to search for. It is an exact strng match.
+ # with is the string to replace it with. Accepts the special keyword
+ # '{revision}' for the commit we are updating to.
+ # File is the file to replace it in.
+ #
+ # If the action is delete-path
+ # path is the file or directory to recursively delete
+ #
+ # If the action is run-script:
+ # script is the script to run
+ # cwd is the directory the script should run with as its cwd
+ # args is a list of arguments to pass to the script
+ #
+ # If the action is run-command:
+ # command is the command to run
+ # Unlike run-script, `command` is _not_ processed to be relative
+ # to the vendor directory, and is passed directly to python's
+ # execution code without any path substitution or manipulation
+ # cwd is the directory the command should run with as its cwd
+ # args is a list of arguments to pass to the command
+ #
+ #
+ # Unless specified otherwise, all files/directories are relative to the
+ # vendor-directory. If the vendor-directory is different from the
+ # directory of the yaml file, the keyword '{yaml_dir}' may be used
+ # to make the path relative to that directory.
+ # 'run-script' supports the addictional keyword {cwd} which, if used,
+ # must only be used at the beginning of the path.
+ #
+ # optional
+ update-actions:
+ - action: copy-file
+ from: include/vcs_version.h.in
+ to: '{yaml_dir}/vcs_version.h'
+
+ - action: replace-in-file
+ pattern: '@VCS_TAG@'
+ with: '{revision}'
+ file: '{yaml_dir}/vcs_version.h'
+
+ - action: delete-path
+ path: '{yaml_dir}/config'
+
+ - action: run-script
+ script: '{cwd}/generate_sources.sh'
+ cwd: '{yaml_dir}'
+
+
+# Configuration for automatic updating system.
+# optional
+updatebot:
+
+ # TODO: allow multiple users to be specified
+ # Phabricator username for a maintainer of the library, used for assigning
+ # reviewers. For a review group, preface with #, such as "#build""
+ maintainer-phab: tjr
+
+ # Bugzilla email address for a maintainer of the library, used for needinfos
+ maintainer-bz: tom@mozilla.com
+
+ # Optional: A preset for ./mach try to use. If present, fuzzy-query and fuzzy-paths will
+ # be ignored. If it, fuzzy-query, and fuzzy-path are omitted, ./mach try auto will be used
+ try-preset: media
+
+ # Optional: A query string for ./mach try fuzzy. If try-preset, it and fuzzy-paths are omitted
+ # then ./mach try auto will be used
+ fuzzy-query: media
+
+ # Optional: An array of test paths for ./mach try fuzzy. If try-preset, it and fuzzy-query are
+ # omitted then ./mach try auto will be used
+ fuzzy-paths: ['media']
+
+ # The tasks that Updatebot can run. Only one of each task is currently permitted
+ # optional
+ tasks:
+ - type: commit-alert
+ branch: upstream-branch-name
+ cc: ["bugzilla@email.address", "another@example.com"]
+ needinfo: ["bugzilla@email.address", "another@example.com"]
+ enabled: True
+ filter: security
+ frequency: every
+ platform: windows
+ blocking: 1234
+ - type: vendoring
+ branch: master
+ enabled: False
+
+ # frequency can be 'every', 'release', 'N weeks', 'N commits'
+ # or 'N weeks, M commits' requiring satisfying both constraints.
+ frequency: 2 weeks
+"""
+
+RE_SECTION = re.compile(r"^(\S[^:]*):").search
+RE_FIELD = re.compile(r"^\s\s([^:]+):\s+(\S+)$").search
+
+
+class MozYamlVerifyError(Exception):
+ def __init__(self, filename, error):
+ self.filename = filename
+ self.error = error
+
+ def __str__(self):
+ return "%s: %s" % (self.filename, self.error)
+
+
+def load_moz_yaml(filename, verify=True, require_license_file=True):
+ """Loads and verifies the specified manifest."""
+
+ # Load and parse YAML.
+ try:
+ with open(filename, "r") as f:
+ manifest = yaml.load(f, Loader=yaml.BaseLoader)
+ except IOError as e:
+ if e.errno == errno.ENOENT:
+ raise MozYamlVerifyError(filename, "Failed to find manifest: %s" % filename)
+ raise
+ except MarkedYAMLError as e:
+ raise MozYamlVerifyError(filename, e)
+
+ if not verify:
+ return manifest
+
+ # Verify schema.
+ if "schema" not in manifest:
+ raise MozYamlVerifyError(filename, 'Missing manifest "schema"')
+ if manifest["schema"] == "1":
+ schema = _schema_1()
+ schema_additional = _schema_1_additional
+ schema_transform = _schema_1_transform
+ else:
+ raise MozYamlVerifyError(filename, "Unsupported manifest schema")
+
+ try:
+ schema(manifest)
+ schema_additional(filename, manifest, require_license_file=require_license_file)
+ manifest = schema_transform(manifest)
+ except (voluptuous.Error, ValueError) as e:
+ raise MozYamlVerifyError(filename, e)
+
+ return manifest
+
+
+def _schema_1():
+ """Returns Voluptuous Schema object."""
+ return Schema(
+ {
+ Required("schema"): "1",
+ Required("bugzilla"): {
+ Required("product"): All(str, Length(min=1)),
+ Required("component"): All(str, Length(min=1)),
+ },
+ "origin": {
+ Required("name"): All(str, Length(min=1)),
+ Required("description"): All(str, Length(min=1)),
+ "notes": All(str, Length(min=1)),
+ Required("url"): FqdnUrl(),
+ Required("license"): Msg(License(), msg="Unsupported License"),
+ "license-file": All(str, Length(min=1)),
+ Required("release"): All(str, Length(min=1)),
+ # The following regex defines a valid git reference
+ # The first group [^ ~^:?*[\]] matches 0 or more times anything
+ # that isn't a Space, ~, ^, :, ?, *, or ]
+ # The second group [^ ~^:?*[\]\.]+ matches 1 or more times
+ # anything that isn't a Space, ~, ^, :, ?, *, [, ], or .
+ "revision": Match(r"^[^ ~^:?*[\]]*[^ ~^:?*[\]\.]+$"),
+ },
+ "updatebot": {
+ Required("maintainer-phab"): All(str, Length(min=1)),
+ Required("maintainer-bz"): All(str, Length(min=1)),
+ "try-preset": All(str, Length(min=1)),
+ "fuzzy-query": All(str, Length(min=1)),
+ "fuzzy-paths": All([str], Length(min=1)),
+ "tasks": All(
+ UpdatebotTasks(),
+ [
+ {
+ Required("type"): In(
+ ["vendoring", "commit-alert"],
+ msg="Invalid type specified in tasks",
+ ),
+ "branch": All(str, Length(min=1)),
+ "enabled": Boolean(),
+ "cc": Unique([str]),
+ "needinfo": Unique([str]),
+ "filter": In(
+ ["none", "security", "source-extensions"],
+ msg="Invalid filter value specified in tasks",
+ ),
+ "source-extensions": Unique([str]),
+ "blocking": Match(r"^[0-9]+$"),
+ "frequency": Match(
+ r"^(every|release|[1-9][0-9]* weeks?|[1-9][0-9]* commits?|"
+ + r"[1-9][0-9]* weeks?, ?[1-9][0-9]* commits?)$"
+ ),
+ "platform": Match(r"^(windows|linux)$"),
+ }
+ ],
+ ),
+ },
+ "vendoring": {
+ Required("url"): FqdnUrl(),
+ Required("source-hosting"): All(
+ str,
+ Length(min=1),
+ In(VALID_SOURCE_HOSTS, msg="Unsupported Source Hosting"),
+ ),
+ "tracking": Match(r"^(commit|tag)$"),
+ "flavor": Match(r"^(regular|rust|individual-files)$"),
+ "skip-vendoring-steps": Unique([str]),
+ "vendor-directory": All(str, Length(min=1)),
+ "patches": Unique([str]),
+ "keep": Unique([str]),
+ "exclude": Unique([str]),
+ "include": Unique([str]),
+ "generated": Unique([str]),
+ "individual-files": [
+ {
+ Required("upstream"): All(str, Length(min=1)),
+ Required("destination"): All(str, Length(min=1)),
+ }
+ ],
+ "individual-files-default-upstream": All(str, Length(min=1)),
+ "individual-files-default-destination": All(str, Length(min=1)),
+ "individual-files-list": Unique([str]),
+ "update-actions": All(
+ UpdateActions(),
+ [
+ {
+ Required("action"): In(
+ [
+ "copy-file",
+ "move-file",
+ "move-dir",
+ "replace-in-file",
+ "replace-in-file-regex",
+ "run-script",
+ "run-command",
+ "delete-path",
+ ],
+ msg="Invalid action specified in update-actions",
+ ),
+ "from": All(str, Length(min=1)),
+ "to": All(str, Length(min=1)),
+ "pattern": All(str, Length(min=1)),
+ "with": All(str, Length(min=1)),
+ "file": All(str, Length(min=1)),
+ "script": All(str, Length(min=1)),
+ "command": All(str, Length(min=1)),
+ "args": All([All(str, Length(min=1))]),
+ "cwd": All(str, Length(min=1)),
+ "path": All(str, Length(min=1)),
+ }
+ ],
+ ),
+ },
+ }
+ )
+
+
+def _schema_1_additional(filename, manifest, require_license_file=True):
+ """Additional schema/validity checks"""
+
+ vendor_directory = os.path.dirname(filename)
+ if "vendoring" in manifest and "vendor-directory" in manifest["vendoring"]:
+ vendor_directory = manifest["vendoring"]["vendor-directory"]
+
+ # LICENSE file must exist, except for Rust crates which are exempted
+ # because the license is required to be specified in the Cargo.toml file
+ if require_license_file and "origin" in manifest:
+ files = [f.lower() for f in os.listdir(vendor_directory)]
+ if (
+ not (
+ "license-file" in manifest["origin"]
+ and manifest["origin"]["license-file"].lower() in files
+ )
+ and not (
+ "license" in files
+ or "license.txt" in files
+ or "license.rst" in files
+ or "license.html" in files
+ or "license.md" in files
+ )
+ and not (
+ "vendoring" in manifest
+ and manifest["vendoring"].get("flavor", "regular") == "rust"
+ )
+ ):
+ license = manifest["origin"]["license"]
+ if isinstance(license, list):
+ license = "/".join(license)
+ raise ValueError("Failed to find %s LICENSE file" % license)
+
+ # Cannot vendor without an origin.
+ if "vendoring" in manifest and "origin" not in manifest:
+ raise ValueError('"vendoring" requires an "origin"')
+
+ # Cannot vendor without a computer-readable revision.
+ if "vendoring" in manifest and "revision" not in manifest["origin"]:
+ raise ValueError(
+ 'If "vendoring" is present, "revision" must be present in "origin"'
+ )
+
+ # The Rust and Individual Flavor type precludes a lot of options
+ # individual-files could, in theory, use several of these, but until we have a use case let's
+ # disallow them so we're not worrying about whether they work. When we need them we can make
+ # sure they do.
+ if (
+ "vendoring" in manifest
+ and manifest["vendoring"].get("flavor", "regular") != "regular"
+ ):
+ for i in [
+ "skip-vendoring-steps",
+ "keep",
+ "exclude",
+ "include",
+ "generated",
+ ]:
+ if i in manifest["vendoring"]:
+ raise ValueError("A non-regular flavor of update cannot use '%s'" % i)
+
+ if manifest["vendoring"].get("flavor", "regular") == "rust":
+ for i in [
+ "update-actions",
+ ]:
+ if i in manifest["vendoring"]:
+ raise ValueError("A rust flavor of update cannot use '%s'" % i)
+
+ # Ensure that only individual-files flavor uses those options
+ if (
+ "vendoring" in manifest
+ and manifest["vendoring"].get("flavor", "regular") != "individual-files"
+ ):
+ if (
+ "individual-files" in manifest["vendoring"]
+ or "individual-files-list" in manifest["vendoring"]
+ ):
+ raise ValueError(
+ "Only individual-files flavor of update can use 'individual-files'"
+ )
+
+ # Ensure that the individual-files flavor has all the correct options
+ if (
+ "vendoring" in manifest
+ and manifest["vendoring"].get("flavor", "regular") == "individual-files"
+ ):
+ # Because the only way we can determine the latest tag is by doing a local clone,
+ # we don't want to do that for individual-files flavors because those flavors are
+ # usually on gigantic repos we don't want to clone for such a simple thing.
+ if manifest["vendoring"].get("tracking", "commit") == "tag":
+ raise ValueError(
+ "You cannot use tag tracking with the individual-files flavor. (Sorry.)"
+ )
+
+ # We need either individual-files or individual-files-list
+ if (
+ "individual-files" not in manifest["vendoring"]
+ and "individual-files-list" not in manifest["vendoring"]
+ ):
+ raise ValueError(
+ "The individual-files flavor must include either "
+ + "'individual-files' or 'individual-files-list'"
+ )
+ # For whichever we have, make sure we don't have the other and we don't have
+ # options we shouldn't or lack ones we should.
+ if "individual-files" in manifest["vendoring"]:
+ if "individual-files-list" in manifest["vendoring"]:
+ raise ValueError(
+ "individual-files-list is mutually exclusive with individual-files"
+ )
+ if "individual-files-default-upstream" in manifest["vendoring"]:
+ raise ValueError(
+ "individual-files-default-upstream can only be used with individual-files-list"
+ )
+ if "individual-files-default-destination" in manifest["vendoring"]:
+ raise ValueError(
+ "individual-files-default-destination can only be used "
+ + "with individual-files-list"
+ )
+ if "individual-files-list" in manifest["vendoring"]:
+ if "individual-files" in manifest["vendoring"]:
+ raise ValueError(
+ "individual-files is mutually exclusive with individual-files-list"
+ )
+ if "individual-files-default-upstream" not in manifest["vendoring"]:
+ raise ValueError(
+ "individual-files-default-upstream must be used with individual-files-list"
+ )
+ if "individual-files-default-destination" not in manifest["vendoring"]:
+ raise ValueError(
+ "individual-files-default-destination must be used with individual-files-list"
+ )
+
+ if "updatebot" in manifest:
+ # If there are Updatebot tasks, then certain fields must be present and
+ # defaults need to be set.
+ if "tasks" in manifest["updatebot"]:
+ if "vendoring" not in manifest or "url" not in manifest["vendoring"]:
+ raise ValueError(
+ "If Updatebot tasks are specified, a vendoring url must be included."
+ )
+
+ if "try-preset" in manifest["updatebot"]:
+ for f in ["fuzzy-query", "fuzzy-paths"]:
+ if f in manifest["updatebot"]:
+ raise ValueError(
+ "If 'try-preset' is specified, then %s cannot be" % f
+ )
+
+ # Check for a simple YAML file
+ with open(filename, "r") as f:
+ has_schema = False
+ for line in f.readlines():
+ m = RE_SECTION(line)
+ if m:
+ if m.group(1) == "schema":
+ has_schema = True
+ break
+ if not has_schema:
+ raise ValueError("Not simple YAML")
+
+
+# Do type conversion for the few things that need it.
+# Everythig is parsed as a string to (a) not cause problems with revisions that
+# are only numerals and (b) not strip leading zeros from the numbers if we just
+# converted them to string
+def _schema_1_transform(manifest):
+ if "updatebot" in manifest:
+ if "tasks" in manifest["updatebot"]:
+ for i in range(len(manifest["updatebot"]["tasks"])):
+ if "enabled" in manifest["updatebot"]["tasks"][i]:
+ val = manifest["updatebot"]["tasks"][i]["enabled"]
+ manifest["updatebot"]["tasks"][i]["enabled"] = (
+ val.lower() == "true" or val.lower() == "yes"
+ )
+ return manifest
+
+
+class UpdateActions(object):
+ """Voluptuous validator which verifies the update actions(s) are valid."""
+
+ def __call__(self, values):
+ for v in values:
+ if "action" not in v:
+ raise Invalid("All file-update entries must specify a valid action")
+ if v["action"] in ["copy-file", "move-file", "move-dir"]:
+ if "from" not in v or "to" not in v or len(v.keys()) != 3:
+ raise Invalid(
+ "%s action must (only) specify 'from' and 'to' keys"
+ % v["action"]
+ )
+ elif v["action"] in ["replace-in-file", "replace-in-file-regex"]:
+ if (
+ "pattern" not in v
+ or "with" not in v
+ or "file" not in v
+ or len(v.keys()) != 4
+ ):
+ raise Invalid(
+ "replace-in-file action must (only) specify "
+ + "'pattern', 'with', and 'file' keys"
+ )
+ elif v["action"] == "delete-path":
+ if "path" not in v or len(v.keys()) != 2:
+ raise Invalid(
+ "delete-path action must (only) specify the 'path' key"
+ )
+ elif v["action"] == "run-script":
+ if "script" not in v or "cwd" not in v:
+ raise Invalid(
+ "run-script action must specify 'script' and 'cwd' keys"
+ )
+ if set(v.keys()) - set(["args", "cwd", "script", "action"]) != set():
+ raise Invalid(
+ "run-script action may only specify 'script', 'cwd', and 'args' keys"
+ )
+ elif v["action"] == "run-command":
+ if "command" not in v or "cwd" not in v:
+ raise Invalid(
+ "run-command action must specify 'command' and 'cwd' keys"
+ )
+ if set(v.keys()) - set(["args", "cwd", "command", "action"]) != set():
+ raise Invalid(
+ "run-command action may only specify 'command', 'cwd', and 'args' keys"
+ )
+ else:
+ # This check occurs before the validator above, so the above is
+ # redundant but we leave it to be verbose.
+ raise Invalid("Supplied action " + v["action"] + " is invalid.")
+ return values
+
+ def __repr__(self):
+ return "UpdateActions"
+
+
+class UpdatebotTasks(object):
+ """Voluptuous validator which verifies the updatebot task(s) are valid."""
+
+ def __call__(self, values):
+ seenTaskTypes = set()
+ for v in values:
+ if "type" not in v:
+ raise Invalid("All updatebot tasks must specify a valid type")
+
+ if v["type"] in seenTaskTypes:
+ raise Invalid("Only one type of each task is currently supported")
+ seenTaskTypes.add(v["type"])
+
+ if v["type"] == "vendoring":
+ for i in ["filter", "branch", "source-extensions"]:
+ if i in v:
+ raise Invalid(
+ "'%s' is only valid for commit-alert task types" % i
+ )
+ elif v["type"] == "commit-alert":
+ pass
+ else:
+ # This check occurs before the validator above, so the above is
+ # redundant but we leave it to be verbose.
+ raise Invalid("Supplied type " + v["type"] + " is invalid.")
+ return values
+
+ def __repr__(self):
+ return "UpdatebotTasks"
+
+
+class License(object):
+ """Voluptuous validator which verifies the license(s) are valid as per our
+ allow list."""
+
+ def __call__(self, values):
+ if isinstance(values, str):
+ values = [values]
+ elif not isinstance(values, list):
+ raise Invalid("Must be string or list")
+ for v in values:
+ if v not in VALID_LICENSES:
+ raise Invalid("Bad License")
+ return values
+
+ def __repr__(self):
+ return "License"
diff --git a/python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py b/python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py
new file mode 100644
index 0000000000..8163c05dc3
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/rewrite_mozbuild.py
@@ -0,0 +1,1286 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Utility package for working with moz.yaml files.
+#
+# Requires `pyyaml` and `voluptuous`
+# (both are in-tree under third_party/python)
+
+"""
+Problem:
+ ./mach vendor needs to be able to add or remove files from moz.build files automatically to
+ be able to effectively update a library automatically and send useful try runs in.
+
+ So far, it has been difficult to do that.
+
+ Why:
+ - Some files need to go into UNIFIED_SOURCES vs SOURCES
+ - Some files are os-specific, and need to go into per-OS conditionals
+ - Some files are both UNIFIED_SOURCES/SOURCES sensitive and OS-specific.
+
+Proposal:
+ Design an algorithm that maps a third party library file to a suspected moz.build location.
+ Run the algorithm on all files specified in all third party libraries' moz.build files.
+ See if the proposed place in the moz.build file matches the actual place.
+
+Initial Algorithm
+ Given a file, which includes the filename and the path from gecko root, we want to find the
+ correct moz.build file and location within that file.
+ Take the path of the file, and iterate up the directory tree, looking for moz.build files as
+ we go.
+ Consider each of these moz.build files, starting with the one closest to the file.
+ Within a moz.build file, identify the SOURCES or UNIFIED_SOURCES block(s) that contains a file
+ in the same directory path as the file to be added.
+ If there is only one such block, use that one.
+ If there are multiple blocks, look at the files within each block and note the longest length
+ of a common prefix (including partial filenames - if we just did full directories the
+ result would be the same as the prior step and we would not narrow the results down). Use
+ the block containing the longest prefix. (We call this 'guessing'.)
+
+Result of the proposal:
+ The initial implementation works on 1675 of 1977 elligible files.
+ The files it does not work on include:
+ - general failures. Such as when we find that avutil.cpp wants to be next to adler32.cpp
+ but avutil.cpp is in SOURCES and adler32.cpp is in UNIFIED_SOURCES. (And many similar
+ cases.)
+ - per-cpu-feature files, where only a single file is added under a conditional
+ - When guessing, because of a len(...) > longest_so_far comparison, we would prefer the
+ first block we found.
+ - Changing this to prefer UNIFIED_SOURCES in the event of a tie
+ yielded 17 additional correct assignments (about a 1% improvement)
+ - As a result of the change immediately above, when guessing, because given equal
+ prefixes, we would prefer a UNIFIED_SOURCES block over other blocks, even if the other
+ blocks are longer
+ - Changing this (again) to prefer the block containing more files yielded 49 additional
+ correct assignments (about a 2.5% improvement)
+
+ The files that are ineligible for consideration are:
+ - Those in libwebrtc
+ - Those specified in source assignments composed of generators (e.g. [f for f in '%.c'])
+ - Those specified in source assignments to subscripted variables
+ (e.g. SOURCES += foo['x86_files'])
+
+ We needed to iterate up the directory and look at a different moz.build file _zero_ times.
+ This indicates this code is probably not needed, and therefore we will remove it from the
+ algorithm.
+ We needed to guess base on the longest prefix 944 times, indicating that this code is
+ absolutely crucial and should be double-checked. (And indeed, upon double-checking it,
+ bugs were identified.)
+
+ After some initial testing, it was determined that this code completely fell down when the
+ vendoring directory differed from the moz.yaml directory (definitions below.) The code was
+ slightly refactored to handle this case, primarily by (a) re-inserting the logic to check
+ multiple moz.build files instead of the first and (b) handling some complicated normalization
+ notions (details in comments).
+
+Slightly Improved Algorithm Changes:
+ Don't bother iterating up the directory tree looking for moz.build files, just take the first.
+ When guessing, in the event of a common-prefix tie, prefer the block containing more files
+
+ With these changes, we now Successfully Matched 1724 of 1977 files
+
+CODE CONCEPTS
+
+source-assignment
+ An assignment of files to a SOURCES or UNIFIED_SOURCES variable, such as
+ SOURCES += ['ffpvx.cpp']
+
+ We specifically look only for these two variable names to avoid identifying things
+ such as CXX_FLAGS.
+
+ Sometimes; however, there is an intermediary variable, such as `SOURCES += celt_filenames`
+ In this situation we find the celt_filenames assignment, and treat it as a 'source-assignment'
+
+source-assignment-location
+ source-assignment-location is a human readable string that identifies where in the moz.build
+ file the source-assignment is. It can used to visually match the location upon manual
+ inspection; and given a source-assignment-location, re-identify it when iterating over all
+ source-assignments in a file.
+
+ The actual string consists of the path from the root of the moz.build file to the
+ source-assignment, plus a suffix number.
+
+ We suffix the final value with an incrementing counter. This is to support moz.build files
+ that, for whatever reason, use multiple SOURCES += [] list in the same basic block. This index
+ is per-file, so no two assignments in the same file (even if they have separate locations)
+ should have the same suffix.
+
+ For example:
+
+ When `SOURCES += ['ffpvx.xpp']` appears as the first line of the file (or any other
+ unindented-location) its source-assignment-location will be `> SOURCES 1`.
+
+ When `SOURCES += ['ffpvx.xpp']` appears inside a conditional such as
+ `CONFIG['OS_TARGET'] == 'WINNT'` then its source-assignment-location will be
+ `> if CONFIG['OS_TARGET'] == 'WINNT' > SOURCES 1`
+
+ When SOURCES += ['ffpvx.xpp'] appears as the second line of the file, and a different
+ SOURCES += [] was the first line, then its source-assignment-location will be "> SOURCES 2".
+
+ No two source-assignments may have the same source-assignment-location. If they do, we raise
+ an assert.
+
+file vs filename
+ a 'filename' is a string specifing the name and sometimes the path of a file.
+ a 'file' is an object you get from open()-ing a filename
+
+ A variable that is a string should always use 'filename'
+
+vendoring directory vs moz.yaml directory
+ In many cases, a library's moz.yaml file, moz.build file(s), and sources files will all live
+ under a single directory. e.g. libjpeg
+
+ In other cases, a library's source files are in one directory (we call this the 'vendoring
+ directory') and the moz.yaml file and moz.build file(s) are in another directory (we call this
+ the moz.yaml directory). e.g. libdav1d
+
+normalized-filename
+ A filename is 'normalized' if it has been expanded to the full path from the gecko root. This
+ requires a moz.build file.
+
+ For example a filename `lib/opus.c` may be specified inside the `media/libopus/moz.build`
+ file. The filename is normalized by os.path.join()-ing the dirname of the moz.build file
+ (i.e. `media/libopus`) to the filename, resulting in `media/libopus/lib/opus.c`
+
+ A filename that begins with '/' is presumed to already be specified relative to the gecko
+ root, and therefore is not modified.
+
+ Normalization gets more complicated when dealing with separate vendoring and moz.yaml
+ directories. This is because a file can be considered normalized when it looks like
+ third_party/libdav1d/src/a.cpp
+ _or_ when it looks like
+ media/libdav1d/../../third_party/libdav1d/src/a.cpp
+ This is because in the moz.build file, it will be specified as
+ `../../third_party/libdav1d/src/a.cpp` and we 'normalize' it by prepending the path to the
+ moz.build file.
+
+ Normalization is not just about having an 'absolute' path from gecko_root to file. In fact
+ it's not really about that at all - it's about matching filenames. Therefore when we are
+ dealing with separate vendoring and moz.yaml directories we will very quickly 're-normalize'
+ a normalized filename to get it into one of those foo/bar/../../third_party/... paths that
+ will make sense for the moz.build file we are interested in.
+
+ Whenever a filename is normalized, it should be specified as such in the variable name,
+ either as a prefix (normalized_filename) or a suffix (target_filename_normalized)
+
+statistic
+ Using some hacky stuff, we report statistics about how many times we hit certain branches of
+ the code.
+ e.g.
+ - "How many times did we refine a guess based on prefix length"
+ - "How many times did we refine a guess based on the number of files in the block"
+ - "What is the histogram of guess candidates"
+
+ We do this to identify how frequently certain code paths were taken, allowing us to identify
+ strange behavior and investigate outliers. This process lead to identifying bugs and small
+ improvements.
+"""
+
+import ast
+import copy
+import os
+import re
+import shutil
+import subprocess
+import sys
+from pprint import pprint
+
+try:
+ from mozbuild.frontend.sandbox import alphabetical_sorted
+except Exception:
+
+ def alphabetical_sorted(iterable, key=lambda x: x.lower(), reverse=False):
+ return sorted(iterable, key=key, reverse=reverse)
+
+
+# This can be edited to enable better Python 3.8 behavior, but is set so that
+# everything is consistent by default so errors can be detected more easily.
+FORCE_DOWNGRADE_BEHAVIOR = True
+
+statistics = {
+ "guess_candidates": {},
+ "number_refinements": {},
+ "needed_to_guess": 0,
+ "length_logic": {},
+}
+
+
+def log(*args, **kwargs):
+ # If is helpful to keep some logging statements around, but we don't want to print them
+ # unless we are debugging
+ # print(*args, **kwargs)
+ pass
+
+
+##############################################
+
+import inspect
+
+
+def node_to_name(code, node):
+ if (
+ not FORCE_DOWNGRADE_BEHAVIOR
+ and sys.version_info[0] >= 3
+ and sys.version_info[1] >= 8
+ ):
+ return ast.get_source_segment(code, node)
+
+ return node.__class__.__name__
+
+
+def get_attribute_label(node):
+ assert isinstance(node, ast.Attribute)
+
+ label = ""
+ subtarget = node
+ while isinstance(subtarget, ast.Attribute):
+ label = subtarget.attr + ("." if label else "") + label
+ subtarget = subtarget.value
+
+ if isinstance(subtarget, ast.Name):
+ label = subtarget.id + "." + label
+ elif isinstance(subtarget, ast.Subscript) and isinstance(subtarget.value, ast.Name):
+ label = subtarget.value.id + "." + label
+ else:
+ raise Exception(
+ "Unxpected subtarget of type %s found in get_attribute_label. label=%s"
+ % (subtarget, label)
+ )
+
+ return label
+
+
+def ast_get_source_segment(code, node):
+ caller = inspect.stack()[1]
+
+ if "sphinx" in caller.filename or (
+ not FORCE_DOWNGRADE_BEHAVIOR
+ and sys.version_info[0] >= 3
+ and sys.version_info[1] >= 8
+ ):
+ return ast.original_get_source_segment(code, node)
+
+ if caller.function == "assignment_node_to_source_filename_list":
+ return ""
+
+ raise Exception(
+ "ast_get_source_segment is not available with this Python version. (ver=%s.%s, caller=%s)"
+ % (sys.version_info.major, sys.version_info.minor, caller.function)
+ )
+
+
+# Overwrite it so we don't accidently use it
+if sys.version_info[0] >= 3 and sys.version_info[1] >= 8:
+ ast.original_get_source_segment = ast.get_source_segment
+ ast.get_source_segment = ast_get_source_segment
+
+
+##############################################
+
+
+def node_to_readable_file_location(code, node, child_node=None):
+ location = ""
+
+ if isinstance(node.parent, ast.Module):
+ # The next node up is the root, don't go higher.
+ pass
+ else:
+ location += node_to_readable_file_location(code, node.parent, node)
+
+ location += " > "
+ if isinstance(node, ast.Module):
+ raise Exception("We shouldn't see a Module")
+ elif isinstance(node, ast.If):
+ assert child_node
+ if child_node in node.body:
+ location += "if " + node_to_name(code, node.test)
+ else:
+ location += "else-of-if " + node_to_name(code, node.test)
+ elif isinstance(node, ast.For):
+ location += (
+ "for "
+ + node_to_name(code, node.target)
+ + " in "
+ + node_to_name(code, node.iter)
+ )
+ elif isinstance(node, ast.AugAssign):
+ if isinstance(node.target, ast.Name):
+ location += node.target.id
+ else:
+ location += node_to_name(code, node.target)
+ elif isinstance(node, ast.Assign):
+ # This assert would fire if we did e.g. some_sources = all_sources = [ ... ]
+ assert len(node.targets) == 1, "Assignment node contains more than one target"
+ if isinstance(node.targets[0], ast.Name):
+ location += node.targets[0].id
+ else:
+ location += node_to_name(code, node.targets[0])
+ else:
+ raise Exception("Got a node type I don't know how to handle: " + str(node))
+
+ return location
+
+
+def assignment_node_to_source_filename_list(code, node):
+ """
+ If the list of filenames is not a list of constants (e.g. it's a generated list)
+ it's (probably) infeasible to try and figure it out. At least we're not going to try
+ right now. Maybe in the future?
+
+ If this happens, we'll return an empty list. The consequence of this is that we
+ won't be able to match a file against this list, so we may not be able to add it.
+
+ (But if the file matches a generated list, perhaps it will be included in the
+ Sources list automatically?)
+ """
+ if isinstance(node.value, ast.List) and "elts" in node.value._fields:
+ for f in node.value.elts:
+ if not isinstance(f, ast.Constant) and not isinstance(f, ast.Str):
+ log(
+ "Found non-constant source file name in list: ",
+ ast_get_source_segment(code, f),
+ )
+ return []
+ return [
+ f.value if isinstance(f, ast.Constant) else f.s for f in node.value.elts
+ ]
+ elif isinstance(node.value, ast.ListComp):
+ # SOURCES += [f for f in foo if blah]
+ log("Could not find the files for " + ast_get_source_segment(code, node.value))
+ elif isinstance(node.value, ast.Name) or isinstance(node.value, ast.Subscript):
+ # SOURCES += other_var
+ # SOURCES += files['X64_SOURCES']
+ log("Could not find the files for " + ast_get_source_segment(code, node))
+ elif isinstance(node.value, ast.Call):
+ # SOURCES += sorted(...)
+ log("Could not find the files for " + ast_get_source_segment(code, node))
+ else:
+ raise Exception(
+ "Unexpected node received in assignment_node_to_source_filename_list: "
+ + str(node)
+ )
+ return []
+
+
+def mozbuild_file_to_source_assignments(normalized_mozbuild_filename, assignment_type):
+ """
+ Returns a dictionary of 'source-assignment-location' -> 'normalized source filename list'
+ contained in the moz.build file specified
+
+ normalized_mozbuild_filename: the moz.build file to read
+ """
+ source_assignments = {}
+
+ if assignment_type == "source-files":
+ targets = ["SOURCES", "UNIFIED_SOURCES"]
+ else:
+ targets = ["EXPORTS"]
+
+ # Parse the AST of the moz.build file
+ code = open(normalized_mozbuild_filename).read()
+ root = ast.parse(code)
+
+ # Populate node parents. This allows us to walk up from a node to the root.
+ # (Really I think python's ast class should do this, but it doesn't, so we monkey-patch it)
+ for node in ast.walk(root):
+ for child in ast.iter_child_nodes(node):
+ child.parent = node
+
+ # Find all the assignments of SOURCES or UNIFIED_SOURCES
+ if assignment_type == "source-files":
+ source_assignment_nodes = [
+ node
+ for node in ast.walk(root)
+ if isinstance(node, ast.AugAssign)
+ and isinstance(node.target, ast.Name)
+ and node.target.id in targets
+ ]
+ assert (
+ len([n for n in source_assignment_nodes if not isinstance(n.op, ast.Add)])
+ == 0
+ ), "We got a Source assignment that wasn't +="
+
+ # Recurse and find nodes where we do SOURCES += other_var or SOURCES += FILES['foo']
+ recursive_assignment_nodes = [
+ node
+ for node in source_assignment_nodes
+ if isinstance(node.value, ast.Name) or isinstance(node.value, ast.Subscript)
+ ]
+
+ recursive_assignment_nodes_names = [
+ node.value.id
+ for node in recursive_assignment_nodes
+ if isinstance(node.value, ast.Name)
+ ]
+
+ # TODO: We do not dig into subscript variables. These are currently only used by two
+ # libraries that use external sources.mozbuild files.
+ # recursive_assignment_nodes_names.extend([something<node> for node in
+ # recursive_assignment_nodes if isinstance(node.value, ast.Subscript)]
+
+ additional_assignment_nodes = [
+ node
+ for node in ast.walk(root)
+ if isinstance(node, ast.Assign)
+ and isinstance(node.targets[0], ast.Name)
+ and node.targets[0].id in recursive_assignment_nodes_names
+ ]
+
+ # Remove the original, useless assignment node (the SOURCES += other_var)
+ for node in recursive_assignment_nodes:
+ source_assignment_nodes.remove(node)
+ # Add the other_var += [''] source-assignment
+ source_assignment_nodes.extend(additional_assignment_nodes)
+ else:
+ source_assignment_nodes = [
+ node
+ for node in ast.walk(root)
+ if isinstance(node, ast.AugAssign)
+ and (
+ (isinstance(node.target, ast.Name) and node.target.id == "EXPORTS")
+ or (
+ isinstance(node.target, ast.Attribute)
+ and get_attribute_label(node.target).startswith("EXPORTS")
+ )
+ )
+ ]
+ source_assignment_nodes.extend(
+ [
+ node
+ for node in ast.walk(root)
+ if isinstance(node, ast.Assign)
+ and (
+ (
+ isinstance(node.targets[0], ast.Name)
+ and node.targets[0].id == "EXPORTS"
+ )
+ or (
+ isinstance(node.targets[0], ast.Attribute)
+ and get_attribute_label(node.targets[0]).startswith("EXPORTS")
+ )
+ )
+ ]
+ )
+
+ # Get the source-assignment-location for the node:
+ assignment_index = 1
+ for a in source_assignment_nodes:
+ source_assignment_location = (
+ node_to_readable_file_location(code, a) + " " + str(assignment_index)
+ )
+ source_filename_list = assignment_node_to_source_filename_list(code, a)
+
+ if not source_filename_list:
+ # In some cases (like generated source file lists) we will have an empty list.
+ # If that is the case, just omit the source assignment
+ continue
+
+ normalized_source_filename_list = [
+ normalize_filename(normalized_mozbuild_filename, f)
+ for f in source_filename_list
+ ]
+
+ if source_assignment_location in source_assignments:
+ source_assignment_location = node_to_readable_file_location(code, a)
+
+ assert (
+ source_assignment_location not in source_assignments
+ ), "In %s, two assignments have the same key ('%s')" % (
+ normalized_mozbuild_filename,
+ source_assignment_location,
+ )
+ source_assignments[source_assignment_location] = normalized_source_filename_list
+ assignment_index += 1
+
+ return (source_assignments, root, code)
+
+
+def unnormalize_filename(normalized_mozbuild_filename, normalized_filename):
+ if normalized_filename[0] == "/":
+ return normalized_filename
+
+ mozbuild_path = (
+ os.path.dirname(normalized_mozbuild_filename).replace(os.path.sep, "/") + "/"
+ )
+ return normalized_filename.replace(mozbuild_path, "")
+
+
+def normalize_filename(normalized_mozbuild_filename, filename):
+ if filename[0] == "/":
+ return filename
+
+ mozbuild_path = os.path.dirname(normalized_mozbuild_filename).replace(
+ os.path.sep, "/"
+ )
+ return os.path.join(mozbuild_path, filename).replace(os.path.sep, "/")
+
+
+def get_mozbuild_file_search_order(
+ normalized_filename,
+ moz_yaml_dir=None,
+ vendoring_dir=None,
+ all_mozbuild_filenames_normalized=None,
+):
+ """
+ Returns an ordered list of normalized moz.build filenames to consider for a given filename
+
+ normalized_filename: a source filename normalized to the gecko root
+
+ moz_yaml_dir: the path from gecko_root to the moz.yaml file (which is the root of the
+ moz.build files)
+
+ moz_yaml_dir: the path to where the library's source files are
+
+ all_mozbuild_filenames_normalized: (optional) the list of all third-party moz.build files
+ If all_mozbuild_filenames_normalized is not specified, we look in the filesystem.
+
+ The list is built out of two distinct steps.
+
+ In Step 1 we will walk up a directory tree, looking for moz.build files. We append moz.build
+ files in this order, preferring the lowest moz.build we find, then moving on to one in a
+ higher directory.
+ The directory we start in is a little complicated. We take the series of subdirectories
+ between vendoring_dir and the file in question, and then append them to the moz.yaml
+ directory.
+
+ Example:
+
+ .. code-block:: python
+
+ When moz_yaml directory != vendoring_directory:
+ moz_yaml_dir = foo/bar/
+ vendoring_dir = third_party/baz/
+ normalized_filename = third_party/baz/asm/arm/a.S
+ starting_directory: foo/bar/asm/arm/
+ When moz_yaml directory == vendoring_directory
+ (In this case, these variables will actually be 'None' but the algorthm is the same)
+ moz_yaml_dir = foo/bar/
+ vendoring_dir = foo/bar/
+ normalized_filename = foo/bar/asm/arm/a.S
+ starting_directory: foo/bar/asm/arm/
+
+ In Step 2 we get a bit desparate. When the vendoring directory and the moz_yaml directory are
+ not the same, there is no guarentee that the moz_yaml directory will adhere to the same
+ directory structure as the vendoring directory. And indeed it doesn't in some cases
+ (e.g. libdav1d.)
+ So in this situation we start at the root of the moz_yaml directory and walk downwards, adding
+ _any_ moz.build file we encounter to the list. Later on (in all cases, not just
+ moz_yaml_dir != vendoring_dir) we only consider a moz.build file if it has source files whose
+ directory matches the normalized_filename, so this step, though desparate, is safe-ish and
+ believe it or not has worked for some file additions.
+ """
+ ordered_list = []
+
+ if all_mozbuild_filenames_normalized is None:
+ assert os.path.isfile(
+ ".arcconfig"
+ ), "We do not seem to be running from the gecko root"
+
+ # The first time around, this variable name is incorrect.
+ # It's actually the full path+filename, not a directory.
+ test_directory = None
+ if (moz_yaml_dir, vendoring_dir) == (None, None):
+ # In this situation, the library is vendored into the same directory as
+ # the moz.build files. We can start traversing directories up from the file to
+ # add to find the correct moz.build file
+ test_directory = normalized_filename
+ elif moz_yaml_dir and vendoring_dir:
+ # In this situation, the library is vendored in a different place (typically
+ # third_party/foo) from the moz.build files.
+ subdirectory_path = normalized_filename.replace(vendoring_dir, "")
+ test_directory = os.path.join(moz_yaml_dir, subdirectory_path)
+ else:
+ raise Exception("If moz_yaml_dir or vendoring_dir are specified, both must be")
+
+ # Step 1
+ while (
+ len(os.path.dirname(test_directory).replace(os.path.sep, "/")) > 1
+ ): # While we are not at '/'
+ containing_directory = os.path.dirname(test_directory)
+
+ possible_normalized_mozbuild_filename = os.path.join(
+ containing_directory, "moz.build"
+ )
+
+ if not all_mozbuild_filenames_normalized:
+ if os.path.isfile(possible_normalized_mozbuild_filename):
+ ordered_list.append(possible_normalized_mozbuild_filename)
+ elif possible_normalized_mozbuild_filename in all_mozbuild_filenames_normalized:
+ ordered_list.append(possible_normalized_mozbuild_filename)
+
+ test_directory = containing_directory
+
+ # Step 2
+ if moz_yaml_dir:
+ for root, dirs, files in os.walk(moz_yaml_dir):
+ for f in files:
+ if f == "moz.build":
+ ordered_list.append(os.path.join(root, f))
+
+ return ordered_list
+
+
+def get_closest_mozbuild_file(
+ normalized_filename,
+ moz_yaml_dir=None,
+ vendoring_dir=None,
+ all_mozbuild_filenames_normalized=None,
+):
+ """
+ Returns the closest moz.build file in the directory tree to a normalized filename
+ """
+ r = get_mozbuild_file_search_order(
+ normalized_filename,
+ moz_yaml_dir,
+ vendoring_dir,
+ all_mozbuild_filenames_normalized,
+ )
+ return r[0] if r else None
+
+
+def filenames_directory_is_in_filename_list(
+ filename_normalized, list_of_normalized_filenames
+):
+ """
+ Given a normalized filename and a list of normalized filenames, first turn them into a
+ containing directory, and a list of containing directories. Then test if the containing
+ directory of the filename is in the list.
+
+ ex:
+ f = filenames_directory_is_in_filename_list
+ f("foo/bar/a.c", ["foo/b.c"]) -> false
+ f("foo/bar/a.c", ["foo/b.c", "foo/bar/c.c"]) -> true
+ f("foo/bar/a.c", ["foo/b.c", "foo/bar/baz/d.c"]) -> false
+ """
+ path_list = set(
+ [
+ os.path.dirname(f).replace(os.path.sep, "/")
+ for f in list_of_normalized_filenames
+ ]
+ )
+ return os.path.dirname(filename_normalized).replace(os.path.sep, "/") in path_list
+
+
+def find_all_posible_assignments_from_filename(source_assignments, filename_normalized):
+ """
+ Given a list of source assignments and a normalized filename, narrow the list to assignments
+ that contain a file whose directory matches the filename's directory.
+ """
+ possible_assignments = {}
+ for key, list_of_normalized_filenames in source_assignments.items():
+ if not list_of_normalized_filenames:
+ continue
+ if filenames_directory_is_in_filename_list(
+ filename_normalized, list_of_normalized_filenames
+ ):
+ possible_assignments[key] = list_of_normalized_filenames
+ return possible_assignments
+
+
+def guess_best_assignment(source_assignments, filename_normalized):
+ """
+ Given several assignments, all of which contain the same directory as the filename, pick one
+ we think is best and return its source-assignment-location.
+
+ We do this by looking at the filename itself (not just its directory) and picking the
+ assignment which contains a filename with the longest matching prefix.
+
+ e.g: "foo/asm_neon.c" compared to ["foo/main.c", "foo/all_utility.c"], ["foo/asm_arm.c"]
+ -> ["foo/asm_arm.c"] (match of `foo/asm_`)
+ """
+ length_of_longest_match = 0
+ source_assignment_location_of_longest_match = None
+ statistic_number_refinements = 0
+ statistic_length_logic = 0
+
+ for key, list_of_normalized_filenames in source_assignments.items():
+ for f in list_of_normalized_filenames:
+ if filename_normalized == f:
+ # Do not cheat by matching the prefix of the exact file
+ continue
+
+ prefix = os.path.commonprefix([filename_normalized, f])
+ if len(prefix) > length_of_longest_match:
+ statistic_number_refinements += 1
+ length_of_longest_match = len(prefix)
+ source_assignment_location_of_longest_match = key
+ elif len(prefix) == length_of_longest_match and len(
+ source_assignments[key]
+ ) > len(source_assignments[source_assignment_location_of_longest_match]):
+ statistic_number_refinements += 1
+ statistic_length_logic += 1
+ length_of_longest_match = len(prefix)
+ source_assignment_location_of_longest_match = key
+ return (
+ source_assignment_location_of_longest_match,
+ (statistic_number_refinements, statistic_length_logic),
+ )
+
+
+def edit_moz_build_file_to_add_file(
+ normalized_mozbuild_filename,
+ unnormalized_filename_to_add,
+ unnormalized_list_of_files,
+):
+ """
+ This function edits the moz.build file in-place
+
+ I had _really_ hoped to replace this whole damn thing with something that adds a
+ node to the AST, dumps the AST out, and then runs black on the file but there are
+ some issues:
+ - third party moz.build files (or maybe all moz.build files) aren't always run through black
+ - dumping the ast out losing comments
+
+ """
+
+ # Make sure that we only write in forward slashes
+ if "\\" in unnormalized_filename_to_add:
+ unnormalized_filename_to_add = unnormalized_filename_to_add.replace("\\", "/")
+
+ # add the file into the list, and then sort it in the same way the moz.build validator
+ # expects
+ unnormalized_list_of_files.append(unnormalized_filename_to_add)
+ unnormalized_list_of_files = alphabetical_sorted(unnormalized_list_of_files)
+
+ # we're going to add our file by doing a find/replace of an adjacent file in the list
+ indx_of_addition = unnormalized_list_of_files.index(unnormalized_filename_to_add)
+ indx_of_addition
+ if indx_of_addition == 0:
+ target_indx = 1
+ replace_before = False
+ else:
+ target_indx = indx_of_addition - 1
+ replace_before = True
+
+ find_str = unnormalized_list_of_files[target_indx]
+
+ # We will only perform the first replacement. This is because sometimes there's moz.build
+ # code like:
+ # SOURCES += ['file.cpp']
+ # SOURCES['file.cpp'].flags += ['-Winline']
+ # If we replaced every time we found the target, we would be inserting into that second
+ # line.
+ did_replace = False
+
+ with open(normalized_mozbuild_filename, mode="r") as file:
+ with open(normalized_mozbuild_filename + ".new", mode="wb") as output:
+ for line in file:
+ if not did_replace and find_str in line:
+ did_replace = True
+
+ # Okay, we found the line we need to edit, now we need to be ugly about it
+ # Grab the type of quote used in this moz.build file: single or double
+ quote_type = line[line.index(find_str) - 1]
+
+ if "[" not in line:
+ # We'll want to put our new file onto its own line
+ newline_to_add = "\n"
+ # And copy the indentation of the line we're adding adjacent to
+ indent_value = line[0 : line.index(quote_type)]
+ else:
+ # This is frustrating, we have the start of the array here. We aren't
+ # going to be able to indent things onto a newline properly. We're just
+ # going to have to stick it in on the same line.
+ newline_to_add = ""
+ indent_value = ""
+
+ find_str = "%s%s%s" % (quote_type, find_str, quote_type)
+ if replace_before:
+ replacement_tuple = (
+ find_str,
+ newline_to_add,
+ indent_value,
+ quote_type,
+ unnormalized_filename_to_add,
+ quote_type,
+ )
+ replace_str = "%s,%s%s%s%s%s" % replacement_tuple
+ else:
+ replacement_tuple = (
+ quote_type,
+ unnormalized_filename_to_add,
+ quote_type,
+ newline_to_add,
+ indent_value,
+ find_str,
+ )
+ replace_str = "%s%s%s,%s%s%s" % replacement_tuple
+
+ line = line.replace(find_str, replace_str)
+
+ output.write((line.rstrip() + "\n").encode("utf-8"))
+
+ shutil.move(normalized_mozbuild_filename + ".new", normalized_mozbuild_filename)
+
+
+def edit_moz_build_file_to_remove_file(
+ normalized_mozbuild_filename, unnormalized_filename_to_remove
+):
+ """
+ This function edits the moz.build file in-place
+ """
+
+ simple_file_line = re.compile(
+ "^\s*['\"]" + unnormalized_filename_to_remove + "['\"],*$"
+ )
+ did_replace = False
+
+ with open(normalized_mozbuild_filename, mode="r") as file:
+ with open(normalized_mozbuild_filename + ".new", mode="wb") as output:
+ for line in file:
+ if not did_replace and unnormalized_filename_to_remove in line:
+ did_replace = True
+
+ # If the line consists of just a single source file on it, then we're in the
+ # clear - we can just skip this line.
+ if simple_file_line.match(line):
+ # Do not output anything, just keep going.
+ continue
+
+ # Okay, so the line is a little more complicated.
+ quote_type = line[line.index(unnormalized_filename_to_remove) - 1]
+
+ if "[" in line or "]" in line:
+ find_str = "%s%s%s,*" % (
+ quote_type,
+ unnormalized_filename_to_remove,
+ quote_type,
+ )
+ line = re.sub(find_str, "", line)
+ else:
+ raise Exception(
+ "Got an unusual type of line we're trying to remove a file from:",
+ line,
+ )
+
+ output.write((line.rstrip() + "\n").encode("utf-8"))
+
+ shutil.move(normalized_mozbuild_filename + ".new", normalized_mozbuild_filename)
+
+
+def validate_directory_parameters(moz_yaml_dir, vendoring_dir):
+ # Validate the parameters
+ assert (moz_yaml_dir, vendoring_dir) == (None, None) or (
+ moz_yaml_dir and vendoring_dir
+ ), "If either moz_yaml_dir or vendoring_dir are specified, they both must be"
+
+ if moz_yaml_dir is not None and vendoring_dir is not None:
+ # Ensure they are provided with trailing slashes
+ moz_yaml_dir += "/" if moz_yaml_dir[-1] != "/" else ""
+ vendoring_dir += "/" if vendoring_dir[-1] != "/" else ""
+
+ return (moz_yaml_dir, vendoring_dir)
+
+
+HAS_ABSOLUTE = 1
+HAS_TRAVERSE_CHILD = 2
+HAS_RELATIVE_CHILD = 2 # behaves the same as above
+
+
+def get_file_reference_modes(source_assignments):
+ """
+ Given a set of source assignments, this function traverses through the
+ files references in those assignments to see if the files are referenced
+ using absolute paths (relative to gecko root) or relative paths.
+
+ It will return all the modes that are seen.
+ """
+ modes = set()
+
+ for key, list_of_normalized_filenames in source_assignments.items():
+ if not list_of_normalized_filenames:
+ continue
+ for file in list_of_normalized_filenames:
+ if file[0] == "/":
+ modes.add(HAS_ABSOLUTE)
+ elif file[0:2] == "../":
+ modes.add(HAS_TRAVERSE_CHILD)
+ else:
+ modes.add(HAS_RELATIVE_CHILD)
+ return modes
+
+
+def renormalize_filename(
+ mode,
+ moz_yaml_dir,
+ vendoring_dir,
+ normalized_mozbuild_filename,
+ normalized_filename_to_act_on,
+):
+ """
+ Edit the normalized_filename_to_act_on to either
+ - Make it an absolute path from gecko root (if we're in that mode)
+ - Get a relative path from the vendoring directory to the yaml directory where the
+ moz.build file is (If they are in separate directories)
+ """
+ if mode == HAS_ABSOLUTE:
+ # If the moz.build file uses absolute paths from the gecko root, this is easy,
+ # all we need to do is prepend a '/' to indicate that
+ normalized_filename_to_act_on = "/" + normalized_filename_to_act_on
+ elif moz_yaml_dir and vendoring_dir:
+ # To re-normalize it in this case, we:
+ # (a) get the path from gecko_root to the moz.build file we are considering
+ # (b) compute a relative path from that directory to the file we want
+ # (c) because (b) started at the moz.build file's directory, it is not
+ # normalized to the gecko_root. Therefore we need to normalize it by
+ # prepending (a)
+ a = os.path.dirname(normalized_mozbuild_filename).replace(os.path.sep, "/")
+ b = os.path.relpath(normalized_filename_to_act_on, start=a).replace(
+ os.path.sep, "/"
+ )
+ c = os.path.join(a, b).replace(os.path.sep, "/")
+ normalized_filename_to_act_on = c
+
+ return normalized_filename_to_act_on
+
+
+#########################################################
+# PUBLIC API
+#########################################################
+
+
+class MozBuildRewriteException(Exception):
+ pass
+
+
+def remove_file_from_moz_build_file(
+ normalized_filename_to_remove, moz_yaml_dir=None, vendoring_dir=None
+):
+ """
+ Given a filename, relative to the gecko root (aka normalized), we look for the nearest
+ moz.build file, look in that file for the file, and then edit that moz.build file in-place.
+ """
+ moz_yaml_dir, vendoring_dir = validate_directory_parameters(
+ moz_yaml_dir, vendoring_dir
+ )
+
+ all_possible_normalized_mozbuild_filenames = get_mozbuild_file_search_order(
+ normalized_filename_to_remove, moz_yaml_dir, vendoring_dir, None
+ )
+
+ # normalized_filename_to_remove is the path from gecko_root to the file. However, if we vendor
+ # separate from moz.yaml; then 'normalization' gets more complicated as explained above.
+ # We will need to re-normalize the filename for each moz.build file we want to test, so we
+ # save the original normalized filename for this purpose
+ original_normalized_filename_to_remove = normalized_filename_to_remove
+
+ # These are the two header file types specified in vendor_manifest.py > source_suffixes
+ if normalized_filename_to_remove.endswith(
+ ".h"
+ ) or normalized_filename_to_remove.endswith(".hpp"):
+ assignment_type = "header-files"
+ else:
+ assignment_type = "source-files"
+
+ for normalized_mozbuild_filename in all_possible_normalized_mozbuild_filenames:
+ source_assignments, root, code = mozbuild_file_to_source_assignments(
+ normalized_mozbuild_filename, assignment_type
+ )
+
+ modes = get_file_reference_modes(source_assignments)
+
+ for mode in modes:
+ normalized_filename_to_remove = renormalize_filename(
+ mode,
+ moz_yaml_dir,
+ vendoring_dir,
+ normalized_mozbuild_filename,
+ normalized_filename_to_remove,
+ )
+
+ for key in source_assignments:
+ normalized_source_filename_list = source_assignments[key]
+ if normalized_filename_to_remove in normalized_source_filename_list:
+ unnormalized_filename_to_remove = unnormalize_filename(
+ normalized_mozbuild_filename, normalized_filename_to_remove
+ )
+ edit_moz_build_file_to_remove_file(
+ normalized_mozbuild_filename, unnormalized_filename_to_remove
+ )
+ return
+
+ normalized_filename_to_remove = original_normalized_filename_to_remove
+ raise MozBuildRewriteException("Could not remove " + normalized_filename_to_remove)
+
+
+def add_file_to_moz_build_file(
+ normalized_filename_to_add, moz_yaml_dir=None, vendoring_dir=None
+):
+ """
+ This is the overall function. Given a filename, relative to the gecko root (aka normalized),
+ we look for a moz.build file to add it to, look for the place in the moz.build file to add it,
+ and then edit that moz.build file in-place.
+
+ It accepted two optional parameters. If one is specified they both must be. If a library is
+ vendored in a separate place from the moz.yaml file, these parameters specify those two
+ directories.
+ """
+ moz_yaml_dir, vendoring_dir = validate_directory_parameters(
+ moz_yaml_dir, vendoring_dir
+ )
+
+ all_possible_normalized_mozbuild_filenames = get_mozbuild_file_search_order(
+ normalized_filename_to_add, moz_yaml_dir, vendoring_dir, None
+ )
+
+ # normalized_filename_to_add is the path from gecko_root to the file. However, if we vendor
+ # separate from moz.yaml; then 'normalization' gets more complicated as explained above.
+ # We will need to re-normalize the filename for each moz.build file we want to test, so we
+ # save the original normalized filename for this purpose
+ original_normalized_filename_to_add = normalized_filename_to_add
+
+ if normalized_filename_to_add.endswith(".h") or normalized_filename_to_add.endswith(
+ ".hpp"
+ ):
+ assignment_type = "header-files"
+ else:
+ assignment_type = "source-files"
+
+ for normalized_mozbuild_filename in all_possible_normalized_mozbuild_filenames:
+ source_assignments, root, code = mozbuild_file_to_source_assignments(
+ normalized_mozbuild_filename, assignment_type
+ )
+
+ modes = get_file_reference_modes(source_assignments)
+
+ for mode in modes:
+ normalized_filename_to_add = renormalize_filename(
+ mode,
+ moz_yaml_dir,
+ vendoring_dir,
+ normalized_mozbuild_filename,
+ normalized_filename_to_add,
+ )
+
+ possible_assignments = find_all_posible_assignments_from_filename(
+ source_assignments, normalized_filename_to_add
+ )
+
+ if len(possible_assignments) == 0:
+ normalized_filename_to_add = original_normalized_filename_to_add
+ continue
+
+ assert (
+ len(possible_assignments) > 0
+ ), "Could not find a single possible source assignment"
+ if len(possible_assignments) > 1:
+ best_guess, _ = guess_best_assignment(
+ possible_assignments, normalized_filename_to_add
+ )
+ chosen_source_assignment_location = best_guess
+ else:
+ chosen_source_assignment_location = list(possible_assignments.keys())[0]
+
+ guessed_list_containing_normalized_filenames = possible_assignments[
+ chosen_source_assignment_location
+ ]
+
+ # unnormalize filenames so we can edit the moz.build file. They rarely use full paths.
+ unnormalized_filename_to_add = unnormalize_filename(
+ normalized_mozbuild_filename, normalized_filename_to_add
+ )
+ unnormalized_list_of_files = [
+ unnormalize_filename(normalized_mozbuild_filename, f)
+ for f in guessed_list_containing_normalized_filenames
+ ]
+
+ edit_moz_build_file_to_add_file(
+ normalized_mozbuild_filename,
+ unnormalized_filename_to_add,
+ unnormalized_list_of_files,
+ )
+ return
+
+ raise MozBuildRewriteException(
+ "Could not find a single moz.build file to add " + normalized_filename_to_add
+ )
+
+
+#########################################################
+# TESTING CODE
+#########################################################
+
+
+def get_all_target_filenames_normalized(all_mozbuild_filenames_normalized):
+ """
+ Given a list of moz.build files, returns all the files listed in all the souce assignments
+ in the file.
+
+ This function is only used for debug/testing purposes - there is no reason to call this
+ as part of 'the algorithm'
+ """
+ all_target_filenames_normalized = []
+ for normalized_mozbuild_filename in all_mozbuild_filenames_normalized:
+ source_assignments, root, code = mozbuild_file_to_source_assignments(
+ normalized_mozbuild_filename
+ )
+ for key in source_assignments:
+ list_of_normalized_filenames = source_assignments[key]
+ all_target_filenames_normalized.extend(list_of_normalized_filenames)
+
+ return all_target_filenames_normalized
+
+
+def try_to_match_target_file(
+ all_mozbuild_filenames_normalized, target_filename_normalized
+):
+ """
+ Runs 'the algorithm' on a target file, and returns if the algorithm was successful
+
+ all_mozbuild_filenames_normalized: the list of all third-party moz.build files
+ target_filename_normalized - the target filename, normalized to the gecko root
+ """
+
+ # We do not update the statistics for failed matches, so save a copy
+ global statistics
+ backup_statistics = copy.deepcopy(statistics)
+
+ if "" == target_filename_normalized:
+ raise Exception("Received an empty target_filename_normalized")
+
+ normalized_mozbuild_filename = get_closest_mozbuild_file(
+ target_filename_normalized, None, None, all_mozbuild_filenames_normalized
+ )
+ if not normalized_mozbuild_filename:
+ return (False, "No moz.build file found")
+
+ source_assignments, root, code = mozbuild_file_to_source_assignments(
+ normalized_mozbuild_filename
+ )
+ possible_assignments = find_all_posible_assignments_from_filename(
+ source_assignments, target_filename_normalized
+ )
+
+ if len(possible_assignments) == 0:
+ raise Exception("No possible assignments were found")
+ elif len(possible_assignments) > 1:
+ (
+ best_guess,
+ (statistic_number_refinements, statistic_length_logic),
+ ) = guess_best_assignment(possible_assignments, target_filename_normalized)
+ chosen_source_assignment_location = best_guess
+
+ statistics["needed_to_guess"] += 1
+
+ if len(possible_assignments) not in statistics["guess_candidates"]:
+ statistics["guess_candidates"][len(possible_assignments)] = 0
+ statistics["guess_candidates"][len(possible_assignments)] += 1
+
+ if statistic_number_refinements not in statistics["number_refinements"]:
+ statistics["number_refinements"][statistic_number_refinements] = 0
+ statistics["number_refinements"][statistic_number_refinements] += 1
+
+ if statistic_length_logic not in statistics["length_logic"]:
+ statistics["length_logic"][statistic_length_logic] = 0
+ statistics["length_logic"][statistic_length_logic] += 1
+
+ else:
+ chosen_source_assignment_location = list(possible_assignments.keys())[0]
+
+ guessed_list_containing_normalized_filenames = possible_assignments[
+ chosen_source_assignment_location
+ ]
+
+ if target_filename_normalized in guessed_list_containing_normalized_filenames:
+ return (True, None)
+
+ # Restore the copy of the statistics so we don't alter it for failed matches
+ statistics = backup_statistics
+ return (False, chosen_source_assignment_location)
+
+
+def get_gecko_root():
+ """
+ Using __file__ as a base, find the gecko root
+ """
+ gecko_root = None
+ directory_to_check = os.path.dirname(os.path.abspath(__file__))
+ while not os.path.isfile(os.path.join(directory_to_check, ".arcconfig")):
+ directory_to_check = os.path.dirname(directory_to_check)
+ if directory_to_check == "/":
+ print("Could not find gecko root")
+ sys.exit(1)
+
+ gecko_root = directory_to_check
+ return gecko_root
+
+
+def get_all_mozbuild_filenames(gecko_root):
+ """
+ Find all the third party moz.build files in the gecko repo
+ """
+ third_party_paths = open(
+ os.path.join(gecko_root, "tools", "rewriting", "ThirdPartyPaths.txt")
+ ).readlines()
+ all_mozbuild_filenames_normalized = []
+ for path in third_party_paths:
+ # We need shell=True because some paths are specified as globs
+ # We need an exception handler because sometimes the directory doesn't exist and find barfs
+ try:
+ output = subprocess.check_output(
+ "find %s -name moz.build" % os.path.join(gecko_root, path.strip()),
+ shell=True,
+ ).decode("utf-8")
+ for f in output.split("\n"):
+ f = f.replace("//", "/").strip().replace(gecko_root, "")[1:]
+ if f:
+ all_mozbuild_filenames_normalized.append(f)
+ except Exception:
+ pass
+
+ return all_mozbuild_filenames_normalized
+
+
+def test_all_third_party_files(gecko_root, all_mozbuild_filenames_normalized):
+ """
+ Run the algorithm on every source file in a third party moz.build file and output the results
+ """
+ all_mozbuild_filenames_normalized = [
+ f for f in all_mozbuild_filenames_normalized if "webrtc" not in f
+ ]
+ all_target_filenames_normalized = get_all_target_filenames_normalized(
+ all_mozbuild_filenames_normalized
+ )
+
+ total_attempted = 0
+ failed_matched = []
+ successfully_matched = 0
+
+ print("Going to try to match %i files..." % len(all_target_filenames_normalized))
+ for target_filename_normalized in all_target_filenames_normalized:
+ result, wrong_guess = try_to_match_target_file(
+ all_mozbuild_filenames_normalized, target_filename_normalized
+ )
+
+ total_attempted += 1
+ if result:
+ successfully_matched += 1
+ else:
+ failed_matched.append((target_filename_normalized, wrong_guess))
+ if total_attempted % 100 == 0:
+ print("Progress:", total_attempted)
+
+ print(
+ "Successfully Matched %i of %i files" % (successfully_matched, total_attempted)
+ )
+ if failed_matched:
+ print("Failed files:")
+ for f in failed_matched:
+ print("\t", f[0], f[1])
+ print("Statistics:")
+ pprint(statistics)
+
+
+if __name__ == "__main__":
+ gecko_root = get_gecko_root()
+ os.chdir(gecko_root)
+
+ add_file_to_moz_build_file(
+ "third_party/jpeg-xl/lib/include/jxl/resizable_parallel_runner.h",
+ "media/libjxl",
+ "third_party/jpeg-xl",
+ )
+
+ # all_mozbuild_filenames_normalized = get_all_mozbuild_filenames(gecko_root)
+ # test_all_third_party_files(gecko_root, all_mozbuild_filenames_normalized)
diff --git a/python/mozbuild/mozbuild/vendor/test_vendor_changes.sh b/python/mozbuild/mozbuild/vendor/test_vendor_changes.sh
new file mode 100755
index 0000000000..3d0e390f7f
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/test_vendor_changes.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+if [[ ! -f "CLOBBER" ]]; then
+ echo "Script should be run from mozilla-central root"
+ exit 1
+fi
+
+echo "THIS SCRIPT WILL REVERT AND PURGE UNCOMMIT LOCAL CHANGES"
+echo "TYPE ok TO CONTINUE"
+read CONFIRMATION
+if [[ $CONFIRMATION != "ok" ]]; then
+ echo "Did not get 'ok', exiting"
+ exit 0
+fi
+
+ALL_MOZ_YAML_FILES=$(find . -name moz.yaml)
+
+for f in $ALL_MOZ_YAML_FILES; do
+ IFS='' read -r -d '' INPUT <<"EOF"
+import sys
+import yaml
+enabled = False
+with open(sys.argv[1]) as yaml_in:
+ o = yaml.safe_load(yaml_in)
+ if "updatebot" in o:
+ if 'tasks' in o["updatebot"]:
+ for t in o["updatebot"]["tasks"]:
+ if t["type"] == "vendoring":
+ if t.get("enabled", True) and t.get("platform", "Linux").lower() == "linux":
+ enabled = True
+if enabled:
+ print(sys.argv[1])
+EOF
+
+ FILE=$(python3 -c "$INPUT" $f)
+
+ if [[ ! -z $FILE ]]; then
+ UPDATEBOT_YAML_FILES+=("$FILE")
+ fi
+done
+
+
+for FILE in "${UPDATEBOT_YAML_FILES[@]}"; do
+ REVISION=$(yq eval ".origin.revision" $FILE)
+ HAS_PATCHES=$(yq eval ".vendoring.patches | (. != null)" $FILE)
+
+ echo "$FILE - $REVISION"
+ if [[ $HAS_PATCHES == "false" ]]; then
+ ./mach vendor $FILE --force --revision $REVISION
+ if [[ $? == 1 ]]; then
+ exit 1
+ fi
+ else
+ ./mach vendor $FILE --force --revision $REVISION --patch-mode=none
+ if [[ $? == 1 ]]; then
+ exit 1
+ fi
+ ./mach vendor $FILE --force --revision $REVISION --patch-mode=only --ignore-modified
+ if [[ $? == 1 ]]; then
+ exit 1
+ fi
+ fi
+ hg revert .
+ hg purge
+done
diff --git a/python/mozbuild/mozbuild/vendor/vendor_manifest.py b/python/mozbuild/mozbuild/vendor/vendor_manifest.py
new file mode 100644
index 0000000000..9de2c23e95
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/vendor_manifest.py
@@ -0,0 +1,789 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import functools
+import glob
+import logging
+import os
+import re
+import shutil
+import stat
+import sys
+import tarfile
+import tempfile
+from collections import defaultdict
+
+import mozfile
+import mozpack.path as mozpath
+import requests
+
+from mozbuild.base import MozbuildObject
+from mozbuild.vendor.rewrite_mozbuild import (
+ MozBuildRewriteException,
+ add_file_to_moz_build_file,
+ remove_file_from_moz_build_file,
+)
+
+DEFAULT_EXCLUDE_FILES = [".git*", ".git*/**"]
+DEFAULT_KEEP_FILES = ["**/moz.build", "**/moz.yaml"]
+DEFAULT_INCLUDE_FILES = []
+
+
+def throwe():
+ raise Exception
+
+
+def _replace_in_file(file, pattern, replacement, regex=False):
+ with open(file) as f:
+ contents = f.read()
+
+ if regex:
+ newcontents = re.sub(pattern, replacement, contents)
+ else:
+ newcontents = contents.replace(pattern, replacement)
+
+ if newcontents == contents:
+ raise Exception(
+ "Could not find '%s' in %s to %sreplace with '%s'"
+ % (pattern, file, "regex-" if regex else "", replacement)
+ )
+
+ with open(file, "w") as f:
+ f.write(newcontents)
+
+
+def list_of_paths_to_readable_string(paths):
+ # From https://stackoverflow.com/a/41578071
+ dic = defaultdict(list)
+ for item in paths:
+ if os.path.isdir(item): # To check path is a directory
+ _ = dic[item] # will set default value as empty list
+ else:
+ path, file = os.path.split(item)
+ dic[path].append(file)
+
+ final_string = "["
+ for key, val in dic.items():
+ if len(val) == 0:
+ final_string += key + ", "
+ elif len(val) < 3:
+ final_string += ", ".join([os.path.join(key, v) for v in val]) + ", "
+ elif len(val) < 10:
+ final_string += "%s items in %s: %s and %s, " % (
+ len(val),
+ key,
+ ", ".join(val[0:-1]),
+ val[-1],
+ )
+ else:
+ final_string += "%s (omitted) items in %s, " % (len(val), key)
+
+ if final_string[-2:] == ", ":
+ final_string = final_string[:-2]
+
+ return final_string + "]"
+
+
+class VendorManifest(MozbuildObject):
+ def should_perform_step(self, step):
+ return step not in self.manifest["vendoring"].get("skip-vendoring-steps", [])
+
+ def vendor(
+ self,
+ command_context,
+ yaml_file,
+ manifest,
+ revision,
+ ignore_modified,
+ check_for_update,
+ force,
+ add_to_exports,
+ patch_mode,
+ ):
+ self.manifest = manifest
+ self.yaml_file = yaml_file
+ self._extract_directory = throwe
+ self.logInfo = functools.partial(self.log, logging.INFO, "vendor")
+ if "vendor-directory" not in self.manifest["vendoring"]:
+ self.manifest["vendoring"]["vendor-directory"] = os.path.dirname(
+ self.yaml_file
+ )
+
+ # ==========================================================
+ # If we're only patching; do that
+ if "patches" in self.manifest["vendoring"] and patch_mode == "only":
+ self.import_local_patches(
+ self.manifest["vendoring"]["patches"],
+ os.path.dirname(self.yaml_file),
+ self.manifest["vendoring"]["vendor-directory"],
+ )
+ return
+
+ # ==========================================================
+ self.source_host = self.get_source_host()
+
+ ref_type = self.manifest["vendoring"].get("tracking", "commit")
+ flavor = self.manifest["vendoring"].get("flavor", "regular")
+ # Individiual files are special
+
+ if revision == "tip":
+ # This case allows us to force-update a tag-tracking library to master
+ new_revision, timestamp = self.source_host.upstream_commit("HEAD")
+ elif ref_type == "tag":
+ new_revision, timestamp = self.source_host.upstream_tag(revision)
+ else:
+ new_revision, timestamp = self.source_host.upstream_commit(revision)
+
+ self.logInfo(
+ {"ref_type": ref_type, "ref": new_revision, "timestamp": timestamp},
+ "Latest {ref_type} is {ref} from {timestamp}",
+ )
+
+ # ==========================================================
+ if not force and self.manifest["origin"]["revision"] == new_revision:
+ # We're up to date, don't do anything
+ self.logInfo({}, "Latest upstream matches in-tree.")
+ return
+ elif flavor != "individual-file" and check_for_update:
+ # Only print the new revision to stdout
+ print("%s %s" % (new_revision, timestamp))
+ return
+
+ # ==========================================================
+ if flavor == "regular":
+ self.process_regular(
+ new_revision, timestamp, ignore_modified, add_to_exports
+ )
+ elif flavor == "individual-files":
+ self.process_individual(new_revision, timestamp, ignore_modified)
+ elif flavor == "rust":
+ self.process_rust(
+ command_context,
+ self.manifest["origin"]["revision"],
+ new_revision,
+ timestamp,
+ ignore_modified,
+ )
+ else:
+ raise Exception("Unknown flavor")
+
+ def process_rust(
+ self, command_context, old_revision, new_revision, timestamp, ignore_modified
+ ):
+ # First update the Cargo.toml
+ cargo_file = os.path.join(os.path.dirname(self.yaml_file), "Cargo.toml")
+ try:
+ _replace_in_file(cargo_file, old_revision, new_revision)
+ except Exception:
+ # If we can't find it the first time, try again with a short hash
+ _replace_in_file(cargo_file, old_revision[:8], new_revision)
+
+ # Then call ./mach vendor rust
+ from mozbuild.vendor.vendor_rust import VendorRust
+
+ vendor_command = command_context._spawn(VendorRust)
+ vendor_command.vendor(
+ ignore_modified=True, build_peers_said_large_imports_were_ok=False
+ )
+
+ self.update_yaml(new_revision, timestamp)
+
+ def process_individual(self, new_revision, timestamp, ignore_modified):
+ # This design is used because there is no github API to query
+ # for the last commit that modified a file; nor a way to get file
+ # blame. So really all we can do is just download and replace the
+ # files and see if they changed...
+
+ def download_and_write_file(url, destination):
+ self.logInfo(
+ {"local_file": destination, "url": url},
+ "Downloading {local_file} from {url}...",
+ )
+
+ with mozfile.NamedTemporaryFile() as tmpfile:
+ try:
+ req = requests.get(url, stream=True)
+ for data in req.iter_content(4096):
+ tmpfile.write(data)
+ tmpfile.seek(0)
+
+ shutil.copy2(tmpfile.name, destination)
+ except Exception as e:
+ raise (e)
+
+ # Only one of these loops will have content, so just do them both
+ for f in self.manifest["vendoring"].get("individual-files", []):
+ url = self.source_host.upstream_path_to_file(new_revision, f["upstream"])
+ destination = self.get_full_path(f["destination"])
+ download_and_write_file(url, destination)
+
+ for f in self.manifest["vendoring"].get("individual-files-list", []):
+ url = self.source_host.upstream_path_to_file(
+ new_revision,
+ self.manifest["vendoring"]["individual-files-default-upstream"] + f,
+ )
+ destination = self.get_full_path(
+ self.manifest["vendoring"]["individual-files-default-destination"] + f
+ )
+ download_and_write_file(url, destination)
+
+ self.spurious_check(new_revision, ignore_modified)
+
+ self.logInfo({}, "Checking for update actions")
+ self.update_files(new_revision)
+
+ self.update_yaml(new_revision, timestamp)
+
+ self.logInfo({"rev": new_revision}, "Updated to '{rev}'.")
+
+ if "patches" in self.manifest["vendoring"]:
+ # Remind the user
+ self.log(
+ logging.CRITICAL,
+ "vendor",
+ {},
+ "Patches present in manifest!!! Please run "
+ "'./mach vendor --patch-mode only' after commiting changes.",
+ )
+
+ def process_regular(self, new_revision, timestamp, ignore_modified, add_to_exports):
+
+ if self.should_perform_step("fetch"):
+ self.fetch_and_unpack(new_revision)
+ else:
+ self.logInfo({}, "Skipping fetching upstream source.")
+
+ self.logInfo({}, "Checking for update actions")
+ self.update_files(new_revision)
+
+ if self.should_perform_step("hg-add"):
+ self.logInfo({}, "Registering changes with version control.")
+ self.repository.add_remove_files(
+ self.manifest["vendoring"]["vendor-directory"],
+ os.path.dirname(self.yaml_file),
+ )
+ else:
+ self.logInfo({}, "Skipping registering changes.")
+
+ if self.should_perform_step("spurious-check"):
+ self.logInfo({}, "Checking for a spurious update.")
+ self.spurious_check(new_revision, ignore_modified)
+ else:
+ self.logInfo({}, "Skipping the spurious update check.")
+
+ if self.should_perform_step("update-moz-yaml"):
+ self.logInfo({}, "Updating moz.yaml.")
+ self.update_yaml(new_revision, timestamp)
+ else:
+ self.logInfo({}, "Skipping updating the moz.yaml file.")
+
+ if self.should_perform_step("update-moz-build"):
+ self.logInfo({}, "Updating moz.build files")
+ self.update_moz_build(
+ self.manifest["vendoring"]["vendor-directory"],
+ os.path.dirname(self.yaml_file),
+ add_to_exports,
+ )
+ else:
+ self.logInfo({}, "Skipping update of moz.build files")
+
+ self.logInfo({"rev": new_revision}, "Updated to '{rev}'.")
+
+ if "patches" in self.manifest["vendoring"]:
+ # Remind the user
+ self.log(
+ logging.CRITICAL,
+ "vendor",
+ {},
+ "Patches present in manifest!!! Please run "
+ "'./mach vendor --patch-mode only' after commiting changes.",
+ )
+
+ def get_source_host(self):
+ if self.manifest["vendoring"]["source-hosting"] == "gitlab":
+ from mozbuild.vendor.host_gitlab import GitLabHost
+
+ return GitLabHost(self.manifest)
+ elif self.manifest["vendoring"]["source-hosting"] == "github":
+ from mozbuild.vendor.host_github import GitHubHost
+
+ return GitHubHost(self.manifest)
+ elif self.manifest["vendoring"]["source-hosting"] == "googlesource":
+ from mozbuild.vendor.host_googlesource import GoogleSourceHost
+
+ return GoogleSourceHost(self.manifest)
+ elif self.manifest["vendoring"]["source-hosting"] == "angle":
+ from mozbuild.vendor.host_angle import AngleHost
+
+ return AngleHost(self.manifest)
+ elif self.manifest["vendoring"]["source-hosting"] == "codeberg":
+ from mozbuild.vendor.host_codeberg import CodebergHost
+
+ return CodebergHost(self.manifest)
+ else:
+ raise Exception(
+ "Unknown source host: " + self.manifest["vendoring"]["source-hosting"]
+ )
+
+ def get_full_path(self, path, support_cwd=False):
+ if support_cwd and path[0:5] == "{cwd}":
+ path = path.replace("{cwd}", ".")
+ elif "{tmpextractdir}" in path:
+ # _extract_directory() will throw an exception if it is invalid to use it
+ path = path.replace("{tmpextractdir}", self._extract_directory())
+ elif "{yaml_dir}" in path:
+ path = path.replace("{yaml_dir}", os.path.dirname(self.yaml_file))
+ elif "{vendor_dir}" in path:
+ path = path.replace(
+ "{vendor_dir}", self.manifest["vendoring"]["vendor-directory"]
+ )
+ else:
+ path = mozpath.join(self.manifest["vendoring"]["vendor-directory"], path)
+ return os.path.abspath(path)
+
+ def convert_patterns_to_paths(self, directory, patterns):
+ # glob.iglob uses shell-style wildcards for path name completion.
+ # "recursive=True" enables the double asterisk "**" wildcard which matches
+ # for nested directories as well as the directory we're searching in.
+ paths = []
+ for pattern in patterns:
+ pattern_full_path = mozpath.join(directory, pattern)
+ # If pattern is a directory recursively add contents of directory
+ if os.path.isdir(pattern_full_path):
+ # Append double asterisk to the end to make glob.iglob recursively match
+ # contents of directory
+ paths.extend(
+ glob.iglob(mozpath.join(pattern_full_path, "**"), recursive=True)
+ )
+ # Otherwise pattern is a file or wildcard expression so add it without altering it
+ else:
+ paths.extend(glob.iglob(pattern_full_path, recursive=True))
+ # Remove folder names from list of paths in order to avoid prematurely
+ # truncating directories elsewhere
+ # Sort the final list to ensure we preserve 01_, 02_ ordering for e.g. *.patch globs
+ final_paths = sorted(
+ [mozpath.normsep(path) for path in paths if not os.path.isdir(path)]
+ )
+ return final_paths
+
+ def fetch_and_unpack(self, revision):
+ """Fetch and unpack upstream source"""
+
+ def validate_tar_member(member, path):
+ def is_within_directory(directory, target):
+ real_directory = os.path.realpath(directory)
+ real_target = os.path.realpath(target)
+ prefix = os.path.commonprefix([real_directory, real_target])
+ return prefix == real_directory
+
+ member_path = os.path.join(path, member.name)
+ if not is_within_directory(path, member_path):
+ raise Exception("Attempted path traversal in tar file: " + member.name)
+ if member.issym():
+ link_path = os.path.join(os.path.dirname(member_path), member.linkname)
+ if not is_within_directory(path, link_path):
+ raise Exception(
+ "Attempted link path traversal in tar file: " + member.name
+ )
+ if member.mode & (stat.S_ISUID | stat.S_ISGID):
+ raise Exception(
+ "Attempted setuid or setgid in tar file: " + member.name
+ )
+
+ def safe_extract(tar, path=".", *, numeric_owner=False):
+ def _files(tar, path):
+ for member in tar:
+ validate_tar_member(member, path)
+ yield member
+
+ tar.extractall(path, members=_files(tar, path), numeric_owner=numeric_owner)
+
+ url = self.source_host.upstream_snapshot(revision)
+ self.logInfo({"url": url}, "Fetching code archive from {url}")
+
+ with mozfile.NamedTemporaryFile() as tmptarfile:
+ tmpextractdir = tempfile.TemporaryDirectory()
+ try:
+ req = requests.get(url, stream=True)
+ for data in req.iter_content(4096):
+ tmptarfile.write(data)
+ tmptarfile.seek(0)
+
+ vendor_dir = mozpath.normsep(
+ self.manifest["vendoring"]["vendor-directory"]
+ )
+ if self.should_perform_step("keep"):
+ self.logInfo({}, "Retaining wanted in-tree files.")
+ to_keep = self.convert_patterns_to_paths(
+ vendor_dir,
+ self.manifest["vendoring"].get("keep", [])
+ + DEFAULT_KEEP_FILES
+ + self.manifest["vendoring"].get("patches", []),
+ )
+ else:
+ self.logInfo({}, "Skipping retention of in-tree files.")
+ to_keep = []
+
+ self.logInfo({"vd": vendor_dir}, "Cleaning {vd} to import changes.")
+ # We use double asterisk wildcard here to get complete list of recursive contents
+ for file in self.convert_patterns_to_paths(vendor_dir, ["**"]):
+ file = mozpath.normsep(file)
+ if file not in to_keep:
+ mozfile.remove(file)
+
+ self.logInfo({"vd": vendor_dir}, "Unpacking upstream files for {vd}.")
+ with tarfile.open(tmptarfile.name) as tar:
+
+ safe_extract(tar, tmpextractdir.name)
+
+ def get_first_dir(p):
+ halves = os.path.split(p)
+ return get_first_dir(halves[0]) if halves[0] else halves[1]
+
+ one_prefix = get_first_dir(tar.getnames()[0])
+ has_prefix = all(
+ map(lambda name: name.startswith(one_prefix), tar.getnames())
+ )
+
+ # GitLab puts everything down a directory; move it up.
+ if has_prefix:
+ tardir = mozpath.join(tmpextractdir.name, one_prefix)
+ mozfile.copy_contents(tardir, tmpextractdir.name)
+ mozfile.remove(tardir)
+
+ if self.should_perform_step("include"):
+ self.logInfo({}, "Retaining wanted files from upstream changes.")
+ to_include = self.convert_patterns_to_paths(
+ tmpextractdir.name,
+ self.manifest["vendoring"].get("include", [])
+ + DEFAULT_INCLUDE_FILES,
+ )
+ else:
+ self.logInfo({}, "Skipping retention of included files.")
+ to_include = []
+
+ if self.should_perform_step("exclude"):
+ self.logInfo({}, "Removing excluded files from upstream changes.")
+ to_exclude = self.convert_patterns_to_paths(
+ tmpextractdir.name,
+ self.manifest["vendoring"].get("exclude", [])
+ + DEFAULT_EXCLUDE_FILES,
+ )
+ else:
+ self.logInfo({}, "Skipping removing excluded files.")
+ to_exclude = []
+
+ to_exclude = list(set(to_exclude) - set(to_include))
+ if to_exclude:
+ self.logInfo(
+ {"files": list_of_paths_to_readable_string(to_exclude)},
+ "Removing: {files}",
+ )
+ for exclusion in to_exclude:
+ mozfile.remove(exclusion)
+
+ # Clear out empty directories
+ # removeEmpty() won't remove directories containing only empty directories
+ # so just keep callign it as long as it's doing something
+ def removeEmpty(tmpextractdir):
+ removed = False
+ folders = list(os.walk(tmpextractdir))[1:]
+ for folder in folders:
+ if not folder[2]:
+ try:
+ os.rmdir(folder[0])
+ removed = True
+ except Exception:
+ pass
+ return removed
+
+ while removeEmpty(tmpextractdir.name):
+ pass
+
+ # Then copy over the directories
+ if self.should_perform_step("move-contents"):
+ self.logInfo({"d": vendor_dir}, "Copying to {d}.")
+ mozfile.copy_contents(tmpextractdir.name, vendor_dir)
+ else:
+ self.logInfo({}, "Skipping copying contents into tree.")
+ self._extract_directory = lambda: tmpextractdir.name
+ except Exception as e:
+ tmpextractdir.cleanup()
+ raise e
+
+ def update_yaml(self, revision, timestamp):
+ with open(self.yaml_file) as f:
+ yaml = f.readlines()
+
+ replaced = 0
+ replacements = [
+ [" release:", " %s (%s)." % (revision, timestamp)],
+ [" revision:", " %s" % (revision)],
+ ]
+
+ for i in range(0, len(yaml)):
+ l = yaml[i]
+
+ for r in replacements:
+ if r[0] in l:
+ print("Found " + l)
+ replaced += 1
+ yaml[i] = re.sub(r[0] + " [v\.a-f0-9]+.*$", r[0] + r[1], yaml[i])
+
+ assert len(replacements) == replaced
+
+ with open(self.yaml_file, "wb") as f:
+ f.write(("".join(yaml)).encode("utf-8"))
+
+ def spurious_check(self, revision, ignore_modified):
+ changed_files = set(
+ [
+ os.path.abspath(f)
+ for f in self.repository.get_changed_files(mode="staged")
+ ]
+ )
+ generated_files = set(
+ [
+ self.get_full_path(f)
+ for f in self.manifest["vendoring"].get("generated", [])
+ ]
+ )
+ changed_files = set(changed_files) - generated_files
+ if not changed_files:
+ self.logInfo({"r": revision}, "Upstream {r} hasn't modified files locally.")
+ # We almost certainly won't be here if ignore_modified was passed, because a modified
+ # local file will show up as a changed_file, but we'll be safe anyway.
+ if not ignore_modified and generated_files:
+ for g in generated_files:
+ self.repository.clean_directory(g)
+ elif generated_files:
+ self.log(
+ logging.CRITICAL,
+ "vendor",
+ {"files": generated_files},
+ "Because you passed --ignore-modified we are not cleaning your"
+ + " working directory, but the following files were probably"
+ + " spuriously edited and can be reverted: {files}",
+ )
+ sys.exit(-2)
+
+ self.logInfo(
+ {"rev": revision, "num": len(changed_files)},
+ "Version '{rev}' has changed {num} files.",
+ )
+
+ def update_files(self, revision):
+ if "update-actions" not in self.manifest["vendoring"]:
+ return
+
+ for update in self.manifest["vendoring"]["update-actions"]:
+ if update["action"] == "copy-file":
+ src = self.get_full_path(update["from"])
+ dst = self.get_full_path(update["to"])
+
+ self.logInfo(
+ {"s": src, "d": dst}, "action: copy-file src: {s} dst: {d}"
+ )
+
+ with open(src) as f:
+ contents = f.read()
+ with open(dst, "w") as f:
+ f.write(contents)
+ elif update["action"] == "move-file":
+ src = self.get_full_path(update["from"])
+ dst = self.get_full_path(update["to"])
+
+ self.logInfo(
+ {"s": src, "d": dst}, "action: move-file src: {s} dst: {d}"
+ )
+
+ shutil.move(src, dst)
+ elif update["action"] == "move-dir":
+ src = self.get_full_path(update["from"])
+ dst = self.get_full_path(update["to"])
+
+ self.logInfo(
+ {"src": src, "dst": dst}, "action: move-dir src: {src} dst: {dst}"
+ )
+
+ if not os.path.isdir(src):
+ raise Exception(
+ "Cannot move from a source directory %s that is not a directory"
+ % src
+ )
+ os.makedirs(dst, exist_ok=True)
+
+ def copy_tree(src, dst):
+ names = os.listdir(src)
+ os.makedirs(dst, exist_ok=True)
+
+ for name in names:
+ srcname = os.path.join(src, name)
+ dstname = os.path.join(dst, name)
+
+ if os.path.isdir(srcname):
+ copy_tree(srcname, dstname)
+ else:
+ shutil.copy2(srcname, dstname)
+
+ copy_tree(src, dst)
+ shutil.rmtree(src)
+
+ elif update["action"] in ["replace-in-file", "replace-in-file-regex"]:
+ file = self.get_full_path(update["file"])
+
+ self.logInfo({"file": file}, "action: replace-in-file file: {file}")
+
+ replacement = update["with"].replace("{revision}", revision)
+ _replace_in_file(
+ file,
+ update["pattern"],
+ replacement,
+ regex=update["action"] == "replace-in-file-regex",
+ )
+ elif update["action"] == "delete-path":
+ path = self.get_full_path(update["path"])
+ self.logInfo({"path": path}, "action: delete-path path: {path}")
+ mozfile.remove(path)
+ elif update["action"] in ["run-script", "run-command"]:
+ if update["action"] == "run-script":
+ command = self.get_full_path(update["script"], support_cwd=True)
+ else:
+ command = update["command"]
+
+ run_dir = self.get_full_path(update["cwd"], support_cwd=True)
+
+ args = []
+ for a in update.get("args", []):
+ if a == "{revision}":
+ args.append(revision)
+ elif any(
+ s in a
+ for s in [
+ "{cwd}",
+ "{vendor_dir}",
+ "{yaml_dir}",
+ "{tmpextractdir}",
+ ]
+ ):
+ args.append(self.get_full_path(a, support_cwd=True))
+ else:
+ args.append(a)
+
+ self.logInfo(
+ {
+ "command": command,
+ "run_dir": run_dir,
+ "args": args,
+ "type": update["action"],
+ },
+ "action: {type} command: {command} working dir: {run_dir} args: {args}",
+ )
+ extra_env = (
+ {"GECKO_PATH": os.getcwd()}
+ if "GECKO_PATH" not in os.environ
+ else {}
+ )
+ # We also add a signal to scripts that they are running under mach vendor
+ extra_env["MACH_VENDOR"] = "1"
+ self.run_process(
+ args=[command] + args,
+ cwd=run_dir,
+ log_name=command,
+ require_unix_environment=True,
+ append_env=extra_env,
+ )
+ else:
+ assert False, "Unknown action supplied (how did this pass validation?)"
+
+ def update_moz_build(self, vendoring_dir, moz_yaml_dir, add_to_exports):
+ if vendoring_dir == moz_yaml_dir:
+ vendoring_dir = moz_yaml_dir = None
+
+ # If you edit this (especially for header files) you should double check
+ # rewrite_mozbuild.py around 'assignment_type'
+ source_suffixes = [".cc", ".c", ".cpp", ".S", ".asm"]
+ header_suffixes = [".h", ".hpp"]
+
+ files_removed = self.repository.get_changed_files(diff_filter="D")
+ files_added = self.repository.get_changed_files(diff_filter="A")
+
+ # Filter the files added to just source files we track in moz.build files.
+ files_added = [
+ f for f in files_added if any([f.endswith(s) for s in source_suffixes])
+ ]
+ header_files_to_add = [
+ f for f in files_added if any([f.endswith(s) for s in header_suffixes])
+ ]
+ if add_to_exports:
+ files_added += header_files_to_add
+ elif header_files_to_add:
+ self.log(
+ logging.WARNING,
+ "header_files_warning",
+ {},
+ (
+ "We found %s header files in the update, pass --add-to-exports if you want"
+ + " to attempt to include them in EXPORTS blocks: %s"
+ )
+ % (len(header_files_to_add), header_files_to_add),
+ )
+
+ self.logInfo(
+ {"added": len(files_added), "removed": len(files_removed)},
+ "Found {added} files added and {removed} files removed.",
+ )
+
+ should_abort = False
+ for f in files_added:
+ try:
+ add_file_to_moz_build_file(f, moz_yaml_dir, vendoring_dir)
+ except MozBuildRewriteException:
+ self.log(
+ logging.ERROR,
+ "vendor",
+ {},
+ "Could not add %s to the appropriate moz.build file" % f,
+ )
+ should_abort = True
+
+ for f in files_removed:
+ try:
+ remove_file_from_moz_build_file(f, moz_yaml_dir, vendoring_dir)
+ except MozBuildRewriteException:
+ self.log(
+ logging.ERROR,
+ "vendor",
+ {},
+ "Could not remove %s from the appropriate moz.build file" % f,
+ )
+ should_abort = True
+
+ if should_abort:
+ self.log(
+ logging.ERROR,
+ "vendor",
+ {},
+ "This is a deficiency in ./mach vendor . "
+ + "Please review the affected files before committing.",
+ )
+ # Exit with -1 to distinguish this from the Exception case of exiting with 1
+ sys.exit(-1)
+
+ def import_local_patches(self, patches, yaml_dir, vendor_dir):
+ self.logInfo({}, "Importing local patches...")
+ for patch in self.convert_patterns_to_paths(yaml_dir, patches):
+ script = [
+ "patch",
+ "-p1",
+ "--directory",
+ vendor_dir,
+ "--input",
+ os.path.abspath(patch),
+ "--no-backup-if-mismatch",
+ ]
+ self.run_process(
+ args=script,
+ log_name=script,
+ )
diff --git a/python/mozbuild/mozbuild/vendor/vendor_python.py b/python/mozbuild/mozbuild/vendor/vendor_python.py
new file mode 100644
index 0000000000..db554e20d4
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/vendor_python.py
@@ -0,0 +1,228 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import os
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+import mozfile
+from mozfile import TemporaryDirectory
+from mozpack.files import FileFinder
+
+from mozbuild.base import MozbuildObject
+
+EXCLUDED_PACKAGES = {
+ # dlmanager's package on PyPI only has metadata, but is missing the code.
+ # https://github.com/parkouss/dlmanager/issues/1
+ "dlmanager",
+ # gyp's package on PyPI doesn't have any downloadable files.
+ "gyp",
+ # We keep some wheels vendored in "_venv" for use in Mozharness
+ "_venv",
+ # We manage vendoring "vsdownload" with a moz.yaml file (there is no module
+ # on PyPI).
+ "vsdownload",
+ # The moz.build file isn't a vendored module, so don't delete it.
+ "moz.build",
+ "requirements.in",
+ # The ansicon package contains DLLs and we don't want to arbitrarily vendor
+ # them since they could be unsafe. This module should rarely be used in practice
+ # (it's a fallback for old versions of windows). We've intentionally vendored a
+ # modified 'dummy' version of it so that the dependency checks still succeed, but
+ # if it ever is attempted to be used, it will fail gracefully.
+ "ansicon",
+}
+
+
+class VendorPython(MozbuildObject):
+ def __init__(self, *args, **kwargs):
+ MozbuildObject.__init__(self, *args, virtualenv_name="vendor", **kwargs)
+
+ def vendor(self, keep_extra_files=False):
+ from mach.python_lockfile import PoetryHandle
+
+ self.populate_logger()
+ self.log_manager.enable_unstructured()
+
+ vendor_dir = Path(self.topsrcdir) / "third_party" / "python"
+ requirements_in = vendor_dir / "requirements.in"
+ poetry_lockfile = vendor_dir / "poetry.lock"
+ _sort_requirements_in(requirements_in)
+
+ with TemporaryDirectory() as work_dir:
+ work_dir = Path(work_dir)
+ poetry = PoetryHandle(work_dir)
+ poetry.add_requirements_in_file(requirements_in)
+ poetry.reuse_existing_lockfile(poetry_lockfile)
+ lockfiles = poetry.generate_lockfiles(do_update=False)
+
+ # Vendoring packages is only viable if it's possible to have a single
+ # set of packages that work regardless of which environment they're used in.
+ # So, we scrub environment markers, so that we essentially ask pip to
+ # download "all dependencies for all environments". Pip will then either
+ # fetch them as requested, or intelligently raise an error if that's not
+ # possible (e.g.: if different versions of Python would result in different
+ # packages/package versions).
+ pip_lockfile_without_markers = work_dir / "requirements.no-markers.txt"
+ shutil.copy(str(lockfiles.pip_lockfile), str(pip_lockfile_without_markers))
+ remove_environment_markers_from_requirements_txt(
+ pip_lockfile_without_markers
+ )
+
+ with TemporaryDirectory() as tmp:
+ # use requirements.txt to download archived source distributions of all
+ # packages
+ subprocess.check_call(
+ [
+ sys.executable,
+ "-m",
+ "pip",
+ "download",
+ "-r",
+ str(pip_lockfile_without_markers),
+ "--no-deps",
+ "--dest",
+ tmp,
+ "--abi",
+ "none",
+ "--platform",
+ "any",
+ ]
+ )
+ _purge_vendor_dir(vendor_dir)
+ self._extract(tmp, vendor_dir, keep_extra_files)
+
+ requirements_out = vendor_dir / "requirements.txt"
+
+ # since requirements.out and poetry.lockfile are both outputs from
+ # third party code, they may contain carriage returns on Windows. We
+ # should strip the carriage returns to maintain consistency in our output
+ # regardless of which platform is doing the vendoring. We can do this and
+ # the copying at the same time to minimize reads and writes.
+ _copy_file_strip_carriage_return(lockfiles.pip_lockfile, requirements_out)
+ _copy_file_strip_carriage_return(lockfiles.poetry_lockfile, poetry_lockfile)
+ self.repository.add_remove_files(vendor_dir)
+
+ def _extract(self, src, dest, keep_extra_files=False):
+ """extract source distribution into vendor directory"""
+
+ ignore = ()
+ if not keep_extra_files:
+ ignore = ("*/doc", "*/docs", "*/test", "*/tests", "**/.git")
+ finder = FileFinder(src)
+ for archive, _ in finder.find("*"):
+ _, ext = os.path.splitext(archive)
+ archive_path = os.path.join(finder.base, archive)
+ if ext == ".whl":
+ # Archive is named like "$package-name-1.0-py2.py3-none-any.whl", and should
+ # have four dashes that aren't part of the package name.
+ package_name, version, spec, abi, platform_and_suffix = archive.rsplit(
+ "-", 4
+ )
+
+ if package_name in EXCLUDED_PACKAGES:
+ print(
+ f"'{package_name}' is on the exclusion list and will not be vendored."
+ )
+ continue
+
+ target_package_dir = os.path.join(dest, package_name)
+ os.mkdir(target_package_dir)
+
+ # Extract all the contents of the wheel into the package subdirectory.
+ # We're expecting at least a code directory and a ".dist-info" directory,
+ # though there may be a ".data" directory as well.
+ mozfile.extract(archive_path, target_package_dir, ignore=ignore)
+ _denormalize_symlinks(target_package_dir)
+ else:
+ # Archive is named like "$package-name-1.0.tar.gz", and the rightmost
+ # dash should separate the package name from the rest of the archive
+ # specifier.
+ package_name, archive_postfix = archive.rsplit("-", 1)
+ package_dir = os.path.join(dest, package_name)
+
+ if package_name in EXCLUDED_PACKAGES:
+ print(
+ f"'{package_name}' is on the exclusion list and will not be vendored."
+ )
+ continue
+
+ # The archive should only contain one top-level directory, which has
+ # the source files. We extract this directory directly to
+ # the vendor directory.
+ extracted_files = mozfile.extract(archive_path, dest, ignore=ignore)
+ assert len(extracted_files) == 1
+ extracted_package_dir = extracted_files[0]
+
+ # The extracted package dir includes the version in the name,
+ # which we don't we don't want.
+ mozfile.move(extracted_package_dir, package_dir)
+ _denormalize_symlinks(package_dir)
+
+
+def _sort_requirements_in(requirements_in: Path):
+ requirements = {}
+ with requirements_in.open(mode="r", newline="\n") as f:
+ comments = []
+ for line in f.readlines():
+ line = line.strip()
+ if not line or line.startswith("#"):
+ comments.append(line)
+ continue
+ name, version = line.split("==")
+ requirements[name] = version, comments
+ comments = []
+
+ with requirements_in.open(mode="w", newline="\n") as f:
+ for name, (version, comments) in sorted(requirements.items()):
+ if comments:
+ f.write("{}\n".format("\n".join(comments)))
+ f.write("{}=={}\n".format(name, version))
+
+
+def remove_environment_markers_from_requirements_txt(requirements_txt: Path):
+ with requirements_txt.open(mode="r", newline="\n") as f:
+ lines = f.readlines()
+ markerless_lines = []
+ continuation_token = " \\"
+ for line in lines:
+ line = line.rstrip()
+
+ if not line.startswith(" ") and not line.startswith("#") and ";" in line:
+ has_continuation_token = line.endswith(continuation_token)
+ # The first line of each requirement looks something like:
+ # package-name==X.Y; python_version>=3.7
+ # We can scrub the environment marker by splitting on the semicolon
+ line = line.split(";")[0]
+ if has_continuation_token:
+ line += continuation_token
+ markerless_lines.append(line)
+ else:
+ markerless_lines.append(line)
+
+ with requirements_txt.open(mode="w", newline="\n") as f:
+ f.write("\n".join(markerless_lines))
+
+
+def _purge_vendor_dir(vendor_dir):
+ for child in Path(vendor_dir).iterdir():
+ if child.name not in EXCLUDED_PACKAGES:
+ mozfile.remove(str(child))
+
+
+def _denormalize_symlinks(target):
+ # If any files inside the vendored package were symlinks, turn them into normal files
+ # because hg.mozilla.org forbids symlinks in the repository.
+ link_finder = FileFinder(target)
+ for _, f in link_finder.find("**"):
+ if os.path.islink(f.path):
+ link_target = os.path.realpath(f.path)
+ os.unlink(f.path)
+ shutil.copyfile(link_target, f.path)
+
+
+def _copy_file_strip_carriage_return(file_src: Path, file_dst):
+ shutil.copyfileobj(file_src.open(mode="r"), file_dst.open(mode="w", newline="\n"))
diff --git a/python/mozbuild/mozbuild/vendor/vendor_rust.py b/python/mozbuild/mozbuild/vendor/vendor_rust.py
new file mode 100644
index 0000000000..f87d2efde8
--- /dev/null
+++ b/python/mozbuild/mozbuild/vendor/vendor_rust.py
@@ -0,0 +1,961 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import errno
+import hashlib
+import json
+import logging
+import os
+import re
+import subprocess
+import typing
+from collections import defaultdict
+from itertools import dropwhile
+from pathlib import Path
+
+import mozpack.path as mozpath
+import toml
+from looseversion import LooseVersion
+from mozboot.util import MINIMUM_RUST_VERSION
+
+from mozbuild.base import BuildEnvironmentNotFoundException, MozbuildObject
+
+if typing.TYPE_CHECKING:
+ import datetime
+
+# Type of a TOML value.
+TomlItem = typing.Union[
+ str,
+ typing.List["TomlItem"],
+ typing.Dict[str, "TomlItem"],
+ bool,
+ int,
+ float,
+ "datetime.datetime",
+ "datetime.date",
+ "datetime.time",
+]
+
+
+CARGO_CONFIG_TEMPLATE = """\
+# This file contains vendoring instructions for cargo.
+# It was generated by `mach vendor rust`.
+# Please do not edit.
+
+{config}
+
+# Take advantage of the fact that cargo will treat lines starting with #
+# as comments to add preprocessing directives. This file can thus by copied
+# as-is to $topsrcdir/.cargo/config with no preprocessing to be used there
+# (for e.g. independent tasks building rust code), or be preprocessed by
+# the build system to produce a .cargo/config with the right content.
+#define REPLACE_NAME {replace_name}
+#define VENDORED_DIRECTORY {directory}
+# We explicitly exclude the following section when preprocessing because
+# it would overlap with the preprocessed [source."@REPLACE_NAME@"], and
+# cargo would fail.
+#ifndef REPLACE_NAME
+[source.{replace_name}]
+directory = "{directory}"
+#endif
+
+# Thankfully, @REPLACE_NAME@ is unlikely to be a legitimate source, so
+# cargo will ignore it when it's here verbatim.
+#filter substitution
+[source."@REPLACE_NAME@"]
+directory = "@top_srcdir@/@VENDORED_DIRECTORY@"
+"""
+
+
+CARGO_LOCK_NOTICE = """
+NOTE: `cargo vendor` may have made changes to your Cargo.lock. To restore your
+Cargo.lock to the HEAD version, run `git checkout -- Cargo.lock` or
+`hg revert Cargo.lock`.
+"""
+
+
+WINDOWS_UNDESIRABLE_REASON = """\
+The windows and windows-sys crates and their dependencies are too big to \
+vendor, and is a risk of version duplication due to its current update \
+cadence. Until this is worked out with upstream, we prefer to avoid them.\
+"""
+
+PACKAGES_WE_DONT_WANT = {
+ "windows-sys": WINDOWS_UNDESIRABLE_REASON,
+ "windows": WINDOWS_UNDESIRABLE_REASON,
+ "windows_aarch64_msvc": WINDOWS_UNDESIRABLE_REASON,
+ "windows_i686_gnu": WINDOWS_UNDESIRABLE_REASON,
+ "windows_i686_msvc": WINDOWS_UNDESIRABLE_REASON,
+ "windows_x86_64_gnu": WINDOWS_UNDESIRABLE_REASON,
+ "windows_x86_64_msvc": WINDOWS_UNDESIRABLE_REASON,
+}
+
+PACKAGES_WE_ALWAYS_WANT_AN_OVERRIDE_OF = [
+ "autocfg",
+ "cmake",
+ "vcpkg",
+]
+
+
+# Historically duplicated crates. Eventually we want this list to be empty.
+# If you do need to make changes increasing the number of duplicates, please
+# add a comment as to why.
+TOLERATED_DUPES = {
+ "mio": 2,
+ # Transition from time 0.1 to 0.3 underway, but chrono is stuck on 0.1
+ # and hasn't been updated in 1.5 years (an hypothetical update is
+ # expected to remove the dependency on time altogether).
+ "time": 2,
+}
+
+
+class VendorRust(MozbuildObject):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._issues = []
+
+ def serialize_issues_json(self):
+ return json.dumps(
+ {
+ "Cargo.lock": [
+ {
+ "path": "Cargo.lock",
+ "column": None,
+ "line": None,
+ "level": "error" if level == logging.ERROR else "warning",
+ "message": msg,
+ }
+ for (level, msg) in self._issues
+ ]
+ }
+ )
+
+ def log(self, level, action, params, format_str):
+ if level >= logging.WARNING:
+ self._issues.append((level, format_str.format(**params)))
+ super().log(level, action, params, format_str)
+
+ def get_cargo_path(self):
+ try:
+ return self.substs["CARGO"]
+ except (BuildEnvironmentNotFoundException, KeyError):
+ if "MOZ_AUTOMATION" in os.environ:
+ cargo = os.path.join(
+ os.environ["MOZ_FETCHES_DIR"], "rustc", "bin", "cargo"
+ )
+ assert os.path.exists(cargo)
+ return cargo
+ # Default if this tree isn't configured.
+ from mozfile import which
+
+ cargo = which("cargo")
+ if not cargo:
+ raise OSError(
+ errno.ENOENT,
+ (
+ "Could not find 'cargo' on your $PATH. "
+ "Hint: have you run `mach build` or `mach configure`?"
+ ),
+ )
+ return cargo
+
+ def check_cargo_version(self, cargo):
+ """
+ Ensure that Cargo is new enough.
+ """
+ out = (
+ subprocess.check_output([cargo, "--version"])
+ .splitlines()[0]
+ .decode("UTF-8")
+ )
+ if not out.startswith("cargo"):
+ return False
+ version = LooseVersion(out.split()[1])
+ # Cargo 1.68.0 changed vendoring in a way that creates a lot of noise
+ # if we go back and forth between vendoring with an older version and
+ # a newer version. Only allow the newer versions.
+ minimum_rust_version = MINIMUM_RUST_VERSION
+ if LooseVersion("1.68.0") >= MINIMUM_RUST_VERSION:
+ minimum_rust_version = "1.68.0"
+ if version < minimum_rust_version:
+ self.log(
+ logging.ERROR,
+ "cargo_version",
+ {},
+ "Cargo >= {0} required (install Rust {0} or newer)".format(
+ minimum_rust_version
+ ),
+ )
+ return False
+ self.log(logging.DEBUG, "cargo_version", {}, "cargo is new enough")
+ return True
+
+ def has_modified_files(self):
+ """
+ Ensure that there aren't any uncommitted changes to files
+ in the working copy, since we're going to change some state
+ on the user. Allow changes to Cargo.{toml,lock} since that's
+ likely to be a common use case.
+ """
+ modified = [
+ f
+ for f in self.repository.get_changed_files("M")
+ if os.path.basename(f) not in ("Cargo.toml", "Cargo.lock")
+ and not f.startswith("supply-chain/")
+ ]
+ if modified:
+ self.log(
+ logging.ERROR,
+ "modified_files",
+ {},
+ """You have uncommitted changes to the following files:
+
+{files}
+
+Please commit or stash these changes before vendoring, or re-run with `--ignore-modified`.
+""".format(
+ files="\n".join(sorted(modified))
+ ),
+ )
+ return modified
+
+ def check_openssl(self):
+ """
+ Set environment flags for building with openssl.
+
+ MacOS doesn't include openssl, but the openssl-sys crate used by
+ mach-vendor expects one of the system. It's common to have one
+ installed in /usr/local/opt/openssl by homebrew, but custom link
+ flags are necessary to build against it.
+ """
+
+ test_paths = ["/usr/include", "/usr/local/include"]
+ if any(
+ [os.path.exists(os.path.join(path, "openssl/ssl.h")) for path in test_paths]
+ ):
+ # Assume we can use one of these system headers.
+ return None
+
+ if os.path.exists("/usr/local/opt/openssl/include/openssl/ssl.h"):
+ # Found a likely homebrew install.
+ self.log(
+ logging.INFO, "openssl", {}, "Using OpenSSL in /usr/local/opt/openssl"
+ )
+ return {
+ "OPENSSL_INCLUDE_DIR": "/usr/local/opt/openssl/include",
+ "OPENSSL_LIB_DIR": "/usr/local/opt/openssl/lib",
+ }
+
+ self.log(logging.ERROR, "openssl", {}, "OpenSSL not found!")
+ return None
+
+ def _ensure_cargo(self):
+ """
+ Ensures all the necessary cargo bits are installed.
+
+ Returns the path to cargo if successful, None otherwise.
+ """
+ cargo = self.get_cargo_path()
+ if not self.check_cargo_version(cargo):
+ return None
+ return cargo
+
+ # A whitelist of acceptable license identifiers for the
+ # packages.license field from https://spdx.org/licenses/. Cargo
+ # documentation claims that values are checked against the above
+ # list and that multiple entries can be separated by '/'. We
+ # choose to list all combinations instead for the sake of
+ # completeness and because some entries below obviously do not
+ # conform to the format prescribed in the documentation.
+ #
+ # It is insufficient to have additions to this whitelist reviewed
+ # solely by a build peer; any additions must be checked by somebody
+ # competent to review licensing minutiae.
+
+ # Licenses for code used at runtime. Please see the above comment before
+ # adding anything to this list.
+ RUNTIME_LICENSE_WHITELIST = [
+ "Apache-2.0",
+ "Apache-2.0 WITH LLVM-exception",
+ # BSD-2-Clause and BSD-3-Clause are ok, but packages using them
+ # must be added to the appropriate section of about:licenses.
+ # To encourage people to remember to do that, we do not whitelist
+ # the licenses themselves, and we require the packages to be added
+ # to RUNTIME_LICENSE_PACKAGE_WHITELIST below.
+ "CC0-1.0",
+ "ISC",
+ "MIT",
+ "MPL-2.0",
+ "Unicode-DFS-2016",
+ "Unlicense",
+ "Zlib",
+ ]
+
+ # Licenses for code used at build time (e.g. code generators). Please see the above
+ # comments before adding anything to this list.
+ BUILDTIME_LICENSE_WHITELIST = {
+ "BSD-3-Clause": [
+ "bindgen",
+ "fuchsia-zircon",
+ "fuchsia-zircon-sys",
+ "fuchsia-cprng",
+ "glsl",
+ "instant",
+ ]
+ }
+
+ # This whitelist should only be used for packages that use an acceptable
+ # license, but that also need to explicitly mentioned in about:license.
+ RUNTIME_LICENSE_PACKAGE_WHITELIST = {
+ "BSD-2-Clause": [
+ "arrayref",
+ "cloudabi",
+ "Inflector",
+ "mach",
+ "qlog",
+ ],
+ "BSD-3-Clause": [],
+ }
+
+ # ICU4X is distributed as individual crates that all share the same LICENSE
+ # that will need to be individually added to the allow list below. We'll
+ # define the SHA256 once here, to make the review process easier as new
+ # ICU4X crates are vendored into the tree.
+ ICU4X_LICENSE_SHA256 = (
+ "02420cc1b4c26d9a3318d60fd57048d015831249a5b776a1ada75cd227e78630"
+ )
+
+ # This whitelist should only be used for packages that use a
+ # license-file and for which the license-file entry has been
+ # reviewed. The table is keyed by package names and maps to the
+ # sha256 hash of the license file that we reviewed.
+ #
+ # As above, it is insufficient to have additions to this whitelist
+ # reviewed solely by a build peer; any additions must be checked by
+ # somebody competent to review licensing minutiae.
+ RUNTIME_LICENSE_FILE_PACKAGE_WHITELIST = {
+ # MIT
+ "deque": "6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb",
+ # we're whitelisting this fuchsia crate because it doesn't get built in the final
+ # product but has a license-file that needs ignoring
+ "fuchsia-cprng": "03b114f53e6587a398931762ee11e2395bfdba252a329940e2c8c9e81813845b",
+ # Old ICU4X crates for ICU4X 1.0, see comment above.
+ "yoke-derive": ICU4X_LICENSE_SHA256,
+ "zerofrom-derive": ICU4X_LICENSE_SHA256,
+ }
+
+ @staticmethod
+ def runtime_license(package, license_string):
+ """Cargo docs say:
+ ---
+ https://doc.rust-lang.org/cargo/reference/manifest.html
+
+ This is an SPDX 2.1 license expression for this package. Currently
+ crates.io will validate the license provided against a whitelist of
+ known license and exception identifiers from the SPDX license list
+ 2.4. Parentheses are not currently supported.
+
+ Multiple licenses can be separated with a `/`, although that usage
+ is deprecated. Instead, use a license expression with AND and OR
+ operators to get more explicit semantics.
+ ---
+ But I have no idea how you can meaningfully AND licenses, so
+ we will abort if that is detected. We'll handle `/` and OR as
+ equivalent and approve is any is in our approved list."""
+
+ # This specific AND combination has been reviewed for encoding_rs.
+ if (
+ license_string == "(Apache-2.0 OR MIT) AND BSD-3-Clause"
+ and package == "encoding_rs"
+ ):
+ return True
+
+ # This specific AND combination has been reviewed for unicode-ident.
+ if (
+ license_string == "(MIT OR Apache-2.0) AND Unicode-DFS-2016"
+ and package == "unicode-ident"
+ ):
+ return True
+
+ if re.search(r"\s+AND", license_string):
+ return False
+
+ license_list = re.split(r"\s*/\s*|\s+OR\s+", license_string)
+ for license in license_list:
+ if license in VendorRust.RUNTIME_LICENSE_WHITELIST:
+ return True
+ if package in VendorRust.RUNTIME_LICENSE_PACKAGE_WHITELIST.get(license, []):
+ return True
+ return False
+
+ def _check_licenses(self, vendor_dir: str) -> bool:
+ def verify_acceptable_license(package: str, license: str) -> bool:
+ self.log(
+ logging.DEBUG, "package_license", {}, "has license {}".format(license)
+ )
+
+ if not self.runtime_license(package, license):
+ if license not in self.BUILDTIME_LICENSE_WHITELIST:
+ self.log(
+ logging.ERROR,
+ "package_license_error",
+ {},
+ """Package {} has a non-approved license: {}.
+
+ Please request license review on the package's license. If the package's license
+ is approved, please add it to the whitelist of suitable licenses.
+ """.format(
+ package, license
+ ),
+ )
+ return False
+ elif package not in self.BUILDTIME_LICENSE_WHITELIST[license]:
+ self.log(
+ logging.ERROR,
+ "package_license_error",
+ {},
+ """Package {} has a license that is approved for build-time dependencies:
+ {}
+ but the package itself is not whitelisted as being a build-time only package.
+
+ If your package is build-time only, please add it to the whitelist of build-time
+ only packages. Otherwise, you need to request license review on the package's license.
+ If the package's license is approved, please add it to the whitelist of suitable licenses.
+ """.format(
+ package, license
+ ),
+ )
+ return False
+ return True
+
+ def check_package(package_name: str) -> bool:
+ self.log(
+ logging.DEBUG,
+ "package_check",
+ {},
+ "Checking license for {}".format(package_name),
+ )
+
+ toml_file = os.path.join(vendor_dir, package_name, "Cargo.toml")
+ with open(toml_file, encoding="utf-8") as fh:
+ toml_data = toml.load(fh)
+
+ package_entry: typing.Dict[str, TomlItem] = toml_data["package"]
+ license = package_entry.get("license", None)
+ license_file = package_entry.get("license-file", None)
+
+ if license is not None and type(license) is not str:
+ self.log(
+ logging.ERROR,
+ "package_invalid_license_format",
+ {},
+ "package {} has an invalid `license` field (expected a string)".format(
+ package_name
+ ),
+ )
+ return False
+
+ if license_file is not None and type(license_file) is not str:
+ self.log(
+ logging.ERROR,
+ "package_invalid_license_format",
+ {},
+ "package {} has an invalid `license-file` field (expected a string)".format(
+ package_name
+ ),
+ )
+ return False
+
+ # License information is optional for crates to provide, but
+ # we require it.
+ if not license and not license_file:
+ self.log(
+ logging.ERROR,
+ "package_no_license",
+ {},
+ "package {} does not provide a license".format(package_name),
+ )
+ return False
+
+ # The Cargo.toml spec suggests that crates should either have
+ # `license` or `license-file`, but not both. We might as well
+ # be defensive about that, though.
+ if license and license_file:
+ self.log(
+ logging.ERROR,
+ "package_many_licenses",
+ {},
+ "package {} provides too many licenses".format(package_name),
+ )
+ return False
+
+ if license:
+ return verify_acceptable_license(package_name, license)
+
+ # otherwise, it's a custom license in a separate file
+ assert license_file is not None
+ self.log(
+ logging.DEBUG,
+ "package_license_file",
+ {},
+ "package has license-file {}".format(license_file),
+ )
+
+ if package_name not in self.RUNTIME_LICENSE_FILE_PACKAGE_WHITELIST:
+ self.log(
+ logging.ERROR,
+ "package_license_file_unknown",
+ {},
+ """Package {} has an unreviewed license file: {}.
+
+Please request review on the provided license; if approved, the package can be added
+to the whitelist of packages whose licenses are suitable.
+""".format(
+ package_name, license_file
+ ),
+ )
+ return False
+
+ approved_hash = self.RUNTIME_LICENSE_FILE_PACKAGE_WHITELIST[package_name]
+
+ with open(
+ os.path.join(vendor_dir, package_name, license_file), "rb"
+ ) as license_buf:
+ current_hash = hashlib.sha256(license_buf.read()).hexdigest()
+
+ if current_hash != approved_hash:
+ self.log(
+ logging.ERROR,
+ "package_license_file_mismatch",
+ {},
+ """Package {} has changed its license file: {} (hash {}).
+
+Please request review on the provided license; if approved, please update the
+license file's hash.
+""".format(
+ package_name, license_file, current_hash
+ ),
+ )
+ return False
+ return True
+
+ # Force all of the packages to be checked for license information
+ # before reducing via `all`, so all license issues are found in a
+ # single `mach vendor rust` invocation.
+ results = [
+ check_package(p)
+ for p in os.listdir(vendor_dir)
+ if os.path.isdir(os.path.join(vendor_dir, p))
+ ]
+ return all(results)
+
+ def _check_build_rust(self, cargo_lock):
+ ret = True
+ crates = {}
+ for path in Path(self.topsrcdir).glob("build/rust/**/Cargo.toml"):
+ with open(path) as fh:
+ cargo_toml = toml.load(fh)
+ path = path.relative_to(self.topsrcdir)
+ package = cargo_toml["package"]
+ key = (package["name"], package["version"])
+ if key in crates:
+ self.log(
+ logging.ERROR,
+ "build_rust",
+ {
+ "path": crates[key],
+ "path2": path,
+ "crate": key[0],
+ "version": key[1],
+ },
+ "{path} and {path2} both contain {crate} {version}",
+ )
+ ret = False
+ crates[key] = path
+
+ for package in cargo_lock["package"]:
+ key = (package["name"], package["version"])
+ if key in crates and "source" not in package:
+ crates.pop(key)
+
+ for ((name, version), path) in crates.items():
+ self.log(
+ logging.ERROR,
+ "build_rust",
+ {"path": path, "crate": name, "version": version},
+ "{crate} {version} has an override in {path} that is not used",
+ )
+ ret = False
+ return ret
+
+ def vendor(
+ self, ignore_modified=False, build_peers_said_large_imports_were_ok=False
+ ):
+ from mozbuild.mach_commands import cargo_vet
+
+ self.populate_logger()
+ self.log_manager.enable_unstructured()
+ if not ignore_modified and self.has_modified_files():
+ return False
+
+ cargo = self._ensure_cargo()
+ if not cargo:
+ self.log(logging.ERROR, "cargo_not_found", {}, "Cargo was not found.")
+ return False
+
+ relative_vendor_dir = "third_party/rust"
+ vendor_dir = mozpath.join(self.topsrcdir, relative_vendor_dir)
+
+ # We use check_call instead of mozprocess to ensure errors are displayed.
+ # We do an |update -p| here to regenerate the Cargo.lock file with minimal
+ # changes. See bug 1324462
+ res = subprocess.run([cargo, "update", "-p", "gkrust"], cwd=self.topsrcdir)
+ if res.returncode:
+ self.log(logging.ERROR, "cargo_update_failed", {}, "Cargo update failed.")
+ return False
+
+ with open(os.path.join(self.topsrcdir, "Cargo.lock")) as fh:
+ cargo_lock = toml.load(fh)
+ failed = False
+ for package in cargo_lock.get("patch", {}).get("unused", []):
+ self.log(
+ logging.ERROR,
+ "unused_patch",
+ {"crate": package["name"]},
+ """Unused patch in top-level Cargo.toml for {crate}.""",
+ )
+ failed = True
+
+ if not self._check_build_rust(cargo_lock):
+ failed = True
+
+ grouped = defaultdict(list)
+ for package in cargo_lock["package"]:
+ if package["name"] in PACKAGES_WE_ALWAYS_WANT_AN_OVERRIDE_OF:
+ # When the in-tree version is used, there is `source` for
+ # it in Cargo.lock, which is what we expect.
+ if package.get("source"):
+ self.log(
+ logging.ERROR,
+ "non_overridden",
+ {
+ "crate": package["name"],
+ "version": package["version"],
+ "source": package["source"],
+ },
+ "Crate {crate} v{version} must be overridden but isn't "
+ "and comes from {source}.",
+ )
+ failed = True
+ elif package["name"] in PACKAGES_WE_DONT_WANT:
+ self.log(
+ logging.ERROR,
+ "undesirable",
+ {
+ "crate": package["name"],
+ "version": package["version"],
+ "reason": PACKAGES_WE_DONT_WANT[package["name"]],
+ },
+ "Crate {crate} is not desirable: {reason}",
+ )
+ failed = True
+ grouped[package["name"]].append(package)
+
+ for name, packages in grouped.items():
+ # Allow to have crates of the same name when one depends on the other.
+ num = len(
+ [
+ p
+ for p in packages
+ if all(d.split()[0] != name for d in p.get("dependencies", []))
+ ]
+ )
+ expected = TOLERATED_DUPES.get(name, 1)
+ if num > expected:
+ self.log(
+ logging.ERROR,
+ "duplicate_crate",
+ {
+ "crate": name,
+ "num": num,
+ "expected": expected,
+ "file": Path(__file__).relative_to(self.topsrcdir),
+ },
+ "There are {num} different versions of crate {crate} "
+ "(expected {expected}). Please avoid the extra duplication "
+ "or adjust TOLERATED_DUPES in {file} if not possible "
+ "(but we'd prefer the former).",
+ )
+ failed = True
+ elif num < expected and num > 1:
+ self.log(
+ logging.ERROR,
+ "less_duplicate_crate",
+ {
+ "crate": name,
+ "num": num,
+ "expected": expected,
+ "file": Path(__file__).relative_to(self.topsrcdir),
+ },
+ "There are {num} different versions of crate {crate} "
+ "(expected {expected}). Please adjust TOLERATED_DUPES in "
+ "{file} to reflect this improvement.",
+ )
+ failed = True
+ elif num < expected and num > 0:
+ self.log(
+ logging.ERROR,
+ "less_duplicate_crate",
+ {
+ "crate": name,
+ "file": Path(__file__).relative_to(self.topsrcdir),
+ },
+ "Crate {crate} is not duplicated anymore. "
+ "Please adjust TOLERATED_DUPES in {file} to reflect this improvement.",
+ )
+ failed = True
+ elif name in TOLERATED_DUPES and expected <= 1:
+ self.log(
+ logging.ERROR,
+ "broken_allowed_dupes",
+ {
+ "crate": name,
+ "file": Path(__file__).relative_to(self.topsrcdir),
+ },
+ "Crate {crate} is not duplicated. Remove it from "
+ "TOLERATED_DUPES in {file}.",
+ )
+ failed = True
+
+ for name in TOLERATED_DUPES:
+ if name not in grouped:
+ self.log(
+ logging.ERROR,
+ "outdated_allowed_dupes",
+ {
+ "crate": name,
+ "file": Path(__file__).relative_to(self.topsrcdir),
+ },
+ "Crate {crate} is not in Cargo.lock anymore. Remove it from "
+ "TOLERATED_DUPES in {file}.",
+ )
+ failed = True
+
+ # Only emit warnings for cargo-vet for now.
+ env = os.environ.copy()
+ env["PATH"] = os.pathsep.join(
+ (
+ str(Path(cargo).parent),
+ os.environ["PATH"],
+ )
+ )
+ flags = ["--output-format=json"]
+ if "MOZ_AUTOMATION" in os.environ:
+ flags.append("--locked")
+ flags.append("--frozen")
+ res = cargo_vet(
+ self,
+ flags,
+ stdout=subprocess.PIPE,
+ env=env,
+ )
+ if res.returncode:
+ vet = json.loads(res.stdout)
+ logged_error = False
+ for failure in vet.get("failures", []):
+ failure["crate"] = failure.pop("name")
+ self.log(
+ logging.ERROR,
+ "cargo_vet_failed",
+ failure,
+ "Missing audit for {crate}:{version} (requires {missing_criteria})."
+ " Run `./mach cargo vet` for more information.",
+ )
+ logged_error = True
+ # NOTE: This could log more information, but the violation JSON
+ # output isn't super stable yet, so it's probably simpler to tell
+ # the caller to run `./mach cargo vet` directly.
+ for key in vet.get("violations", {}).keys():
+ self.log(
+ logging.ERROR,
+ "cargo_vet_failed",
+ {"key": key},
+ "Violation conflict for {key}. Run `./mach cargo vet` for more information.",
+ )
+ logged_error = True
+ if "error" in vet:
+ # NOTE: The error format produced by cargo-vet is from the
+ # `miette` crate, and can include a lot of metadata and context.
+ # If we want to show more details in the future, we can expand
+ # this rendering to also include things like source labels and
+ # related error metadata.
+ error = vet["error"]
+ self.log(
+ logging.ERROR,
+ "cargo_vet_failed",
+ error,
+ "Vet {severity}: {message}",
+ )
+ if "help" in error:
+ self.log(logging.INFO, "cargo_vet_failed", error, " help: {help}")
+ for cause in error.get("causes", []):
+ self.log(
+ logging.INFO,
+ "cargo_vet_failed",
+ {"cause": cause},
+ " cause: {cause}",
+ )
+ for related in error.get("related", []):
+ self.log(
+ logging.INFO,
+ "cargo_vet_failed",
+ related,
+ " related {severity}: {message}",
+ )
+ self.log(
+ logging.INFO,
+ "cargo_vet_failed",
+ {},
+ "Run `./mach cargo vet` for more information.",
+ )
+ logged_error = True
+ if not logged_error:
+ self.log(
+ logging.ERROR,
+ "cargo_vet_failed",
+ {},
+ "Unknown vet error. Run `./mach cargo vet` for more information.",
+ )
+ failed = True
+
+ # If we failed when checking the crates list and/or running `cargo vet`,
+ # stop before invoking `cargo vendor`.
+ if failed:
+ return False
+
+ res = subprocess.run(
+ [cargo, "vendor", vendor_dir], cwd=self.topsrcdir, stdout=subprocess.PIPE
+ )
+ if res.returncode:
+ self.log(logging.ERROR, "cargo_vendor_failed", {}, "Cargo vendor failed.")
+ return False
+ output = res.stdout.decode("UTF-8")
+
+ # Get the snippet of configuration that cargo vendor outputs, and
+ # update .cargo/config with it.
+ # XXX(bug 1576765): Hopefully do something better after
+ # https://github.com/rust-lang/cargo/issues/7280 is addressed.
+ config = "\n".join(
+ dropwhile(lambda l: not l.startswith("["), output.splitlines())
+ )
+
+ # The config is toml; parse it as such.
+ config = toml.loads(config)
+
+ # For each replace-with, extract their configuration and update the
+ # corresponding directory to be relative to topsrcdir.
+ replaces = {
+ v["replace-with"] for v in config["source"].values() if "replace-with" in v
+ }
+
+ # We only really expect one replace-with
+ if len(replaces) != 1:
+ self.log(
+ logging.ERROR,
+ "vendor_failed",
+ {},
+ """cargo vendor didn't output a unique replace-with. Found: %s."""
+ % replaces,
+ )
+ return False
+
+ replace_name = replaces.pop()
+ replace = config["source"].pop(replace_name)
+ replace["directory"] = mozpath.relpath(
+ mozpath.normsep(os.path.normcase(replace["directory"])),
+ mozpath.normsep(os.path.normcase(self.topsrcdir)),
+ )
+
+ cargo_config = os.path.join(self.topsrcdir, ".cargo", "config.in")
+ with open(cargo_config, "w", encoding="utf-8", newline="\n") as fh:
+ fh.write(
+ CARGO_CONFIG_TEMPLATE.format(
+ config=toml.dumps(config),
+ replace_name=replace_name,
+ directory=replace["directory"],
+ )
+ )
+
+ if not self._check_licenses(vendor_dir):
+ self.log(
+ logging.ERROR,
+ "license_check_failed",
+ {},
+ """The changes from `mach vendor rust` will NOT be added to version control.
+
+{notice}""".format(
+ notice=CARGO_LOCK_NOTICE
+ ),
+ )
+ self.repository.clean_directory(vendor_dir)
+ return False
+
+ self.repository.add_remove_files(vendor_dir)
+
+ # 100k is a reasonable upper bound on source file size.
+ FILESIZE_LIMIT = 100 * 1024
+ large_files = set()
+ cumulative_added_size = 0
+ for f in self.repository.get_changed_files("A"):
+ path = mozpath.join(self.topsrcdir, f)
+ size = os.stat(path).st_size
+ cumulative_added_size += size
+ if size > FILESIZE_LIMIT:
+ large_files.add(f)
+
+ # Forcefully complain about large files being added, as history has
+ # shown that large-ish files typically are not needed.
+ if large_files and not build_peers_said_large_imports_were_ok:
+ self.log(
+ logging.ERROR,
+ "filesize_check",
+ {},
+ """The following files exceed the filesize limit of {size}:
+
+{files}
+
+If you can't reduce the size of these files, talk to a build peer (on the #build
+channel at https://chat.mozilla.org) about the particular large files you are
+adding.
+
+The changes from `mach vendor rust` will NOT be added to version control.
+
+{notice}""".format(
+ files="\n".join(sorted(large_files)),
+ size=FILESIZE_LIMIT,
+ notice=CARGO_LOCK_NOTICE,
+ ),
+ )
+ self.repository.forget_add_remove_files(vendor_dir)
+ self.repository.clean_directory(vendor_dir)
+ return False
+
+ # Only warn for large imports, since we may just have large code
+ # drops from time to time (e.g. importing features into m-c).
+ SIZE_WARN_THRESHOLD = 5 * 1024 * 1024
+ if cumulative_added_size >= SIZE_WARN_THRESHOLD:
+ self.log(
+ logging.WARN,
+ "filesize_check",
+ {},
+ """Your changes add {size} bytes of added files.
+
+Please consider finding ways to reduce the size of the vendored packages.
+For instance, check the vendored packages for unusually large test or
+benchmark files that don't need to be published to crates.io and submit
+a pull request upstream to ignore those files when publishing.""".format(
+ size=cumulative_added_size
+ ),
+ )
+ return True