diff options
Diffstat (limited to 'taskcluster/docker/funsize-update-generator/scripts')
-rw-r--r-- | taskcluster/docker/funsize-update-generator/scripts/funsize.py | 471 | ||||
-rwxr-xr-x | taskcluster/docker/funsize-update-generator/scripts/mbsdiff_hook.sh | 157 |
2 files changed, 628 insertions, 0 deletions
diff --git a/taskcluster/docker/funsize-update-generator/scripts/funsize.py b/taskcluster/docker/funsize-update-generator/scripts/funsize.py new file mode 100644 index 0000000000..84fd2fbd0b --- /dev/null +++ b/taskcluster/docker/funsize-update-generator/scripts/funsize.py @@ -0,0 +1,471 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import argparse +import asyncio +import configparser +import json +import logging +import os +import shutil +import tempfile +import time +from contextlib import AsyncExitStack +from distutils.util import strtobool +from pathlib import Path + +import aiohttp +from mardor.reader import MarReader +from mardor.signing import get_keysize +from scriptworker.utils import get_hash, retry_async + +log = logging.getLogger(__name__) + + +ROOT_URL = os.environ.get( + "TASKCLUSTER_ROOT_URL", "https://firefox-ci-tc.services.mozilla.com" +) +QUEUE_PREFIX = f"{ROOT_URL}/api/queue/" +ALLOWED_URL_PREFIXES = ( + "http://download.cdn.mozilla.net/pub/mozilla.org/firefox/nightly/", + "http://download.cdn.mozilla.net/pub/firefox/nightly/", + "http://ftp.mozilla.org/", + "http://download.mozilla.org/", + "https://archive.mozilla.org/", + "http://archive.mozilla.org/", + QUEUE_PREFIX, +) +STAGING_URL_PREFIXES = ( + "http://ftp.stage.mozaws.net/", + "https://ftp.stage.mozaws.net/", +) + +BCJ_OPTIONS = { + "x86": ["--x86"], + "x86_64": ["--x86"], + "aarch64": [], + # macOS Universal Builds + "macos-x86_64-aarch64": [], +} + + +def verify_signature(mar, cert): + log.info("Checking %s signature", mar) + with open(mar, "rb") as mar_fh: + m = MarReader(mar_fh) + if not m.verify(verify_key=cert): + raise ValueError( + "MAR Signature invalid: %s (%s) against %s", mar, m.signature_type, cert + ) + + +def process_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument("--artifacts-dir", required=True) + parser.add_argument("--signing-cert", type=argparse.FileType("rb"), required=True) + parser.add_argument("--task-definition", required=True, type=argparse.FileType("r")) + parser.add_argument( + "--allow-staging-prefixes", + action="store_true", + default=strtobool(os.environ.get("FUNSIZE_ALLOW_STAGING_PREFIXES", "false")), + help="Allow files from staging buckets.", + ) + parser.add_argument( + "-q", + "--quiet", + dest="log_level", + action="store_const", + const=logging.INFO, + default=logging.DEBUG, + ) + parser.add_argument( + "--arch", + type=str, + required=True, + choices=BCJ_OPTIONS.keys(), + help="The archtecture you are building.", + ) + return parser.parse_args() + + +def validate_mar_channel_id(mar, channel_ids): + log.info("Checking %s for MAR_CHANNEL_ID %s", mar, channel_ids) + # We may get a string with a list representation, or a single entry string. + channel_ids = set(channel_ids.split(",")) + + product_info = MarReader(open(mar, "rb")).productinfo + if not isinstance(product_info, tuple): + raise ValueError( + "Malformed product information in mar: {}".format(product_info) + ) + + found_channel_ids = set(product_info[1].split(",")) + + if not found_channel_ids.issubset(channel_ids): + raise ValueError( + "MAR_CHANNEL_ID mismatch, {} not in {}".format(product_info[1], channel_ids) + ) + + log.info("%s channel %s in %s", mar, product_info[1], channel_ids) + + +async def retry_download(*args, semaphore=None, **kwargs): # noqa: E999 + """Retry download() calls.""" + async with AsyncExitStack() as stack: + if semaphore: + await stack.enter_async_context(semaphore) + await retry_async( + download, + retry_exceptions=(aiohttp.ClientError, asyncio.TimeoutError), + args=args, + kwargs=kwargs, + ) + + +def verify_allowed_url(mar, allowed_url_prefixes): + if not any(mar.startswith(prefix) for prefix in allowed_url_prefixes): + raise ValueError( + "{mar} is not in allowed URL prefixes: {p}".format( + mar=mar, p=allowed_url_prefixes + ) + ) + + +async def download(url, dest, mode=None): # noqa: E999 + log.info("Downloading %s to %s", url, dest) + chunk_size = 4096 + bytes_downloaded = 0 + async with aiohttp.ClientSession(raise_for_status=True) as session: + start = time.time() + async with session.get(url, timeout=120) as resp: + # Additional early logging for download timeouts. + log.debug("Fetching from url %s", resp.url) + for history in resp.history: + log.debug("Redirection history: %s", history.url) + log.debug("Headers for %s: %s", resp.url, resp.headers) + if "Content-Length" in resp.headers: + log.debug( + "Content-Length expected for %s: %s", + url, + resp.headers["Content-Length"], + ) + log_interval = chunk_size * 1024 + with open(dest, "wb") as fd: + while True: + chunk = await resp.content.read(chunk_size) + if not chunk: + break + fd.write(chunk) + bytes_downloaded += len(chunk) + log_interval -= len(chunk) + if log_interval <= 0: + log.debug("Bytes downloaded for %s: %d", url, bytes_downloaded) + log_interval = chunk_size * 1024 + end = time.time() + log.info( + "Downloaded %s, %s bytes in %s seconds: sha256:%s", + url, + bytes_downloaded, + int(end - start), + get_hash(dest, hash_alg="sha256"), + ) + if mode: + log.info("chmod %o %s", mode, dest) + os.chmod(dest, mode) + + +async def download_buildsystem_bits(partials_config, downloads, tools_dir): + """Download external tools needed to make partials.""" + + # We're making the assumption that the "to" mar is the same for all, + # as that's the way this task is currently used. + to_url = extract_download_urls(partials_config, mar_type="to").pop() + + repo = get_option( + downloads[to_url]["extracted_path"], + filename="platform.ini", + section="Build", + option="SourceRepository", + ) + revision = get_option( + downloads[to_url]["extracted_path"], + filename="platform.ini", + section="Build", + option="SourceStamp", + ) + + urls = { + "make_incremental_update.sh": f"{repo}/raw-file/{revision}/tools/" + "update-packaging/make_incremental_update.sh", + "common.sh": f"{repo}/raw-file/{revision}/tools/update-packaging/common.sh", + "mar": "https://archive.mozilla.org/pub/mozilla.org/firefox/nightly/" + "latest-mozilla-central/mar-tools/linux64/mar", + "mbsdiff": "https://archive.mozilla.org/pub/mozilla.org/firefox/nightly/" + "latest-mozilla-central/mar-tools/linux64/mbsdiff", + } + for filename, url in urls.items(): + filename = tools_dir / filename + await retry_download(url, dest=filename, mode=0o755) + + +def find_file(directory, filename): + log.debug("Searching for %s in %s", filename, directory) + return next(Path(directory).rglob(filename)) + + +def get_option(directory, filename, section, option): + log.info("Extracting [%s]: %s from %s/**/%s", section, option, directory, filename) + f = find_file(directory, filename) + config = configparser.ConfigParser() + config.read(f) + rv = config.get(section, option) + log.info("Found %s", rv) + return rv + + +def extract_download_urls(partials_config, mar_type): + """Extract a set of urls to download from the task configuration. + + mar_type should be one of "from", "to" + """ + return {definition[f"{mar_type}_mar"] for definition in partials_config} + + +async def download_and_verify_mars(partials_config, allowed_url_prefixes, signing_cert): + """Download, check signature, channel ID and unpack MAR files.""" + # Separate these categories so we can opt to perform checks on only 'to' downloads. + from_urls = extract_download_urls(partials_config, mar_type="from") + to_urls = extract_download_urls(partials_config, mar_type="to") + tasks = list() + downloads = dict() + + semaphore = asyncio.Semaphore(2) # Magic 2 to reduce network timeout errors. + for url in from_urls.union(to_urls): + verify_allowed_url(url, allowed_url_prefixes) + downloads[url] = { + "download_path": Path(tempfile.mkdtemp()) / Path(url).name, + } + tasks.append( + retry_download(url, downloads[url]["download_path"], semaphore=semaphore) + ) + + await asyncio.gather(*tasks) + + for url in downloads: + # Verify signature, but not from an artifact as we don't + # depend on the signing task + if not os.getenv("MOZ_DISABLE_MAR_CERT_VERIFICATION") and not url.startswith( + QUEUE_PREFIX + ): + verify_signature(downloads[url]["download_path"], signing_cert) + + # Only validate the target channel ID, as we update from beta->release + if url in to_urls: + validate_mar_channel_id( + downloads[url]["download_path"], os.environ["MAR_CHANNEL_ID"] + ) + + downloads[url]["extracted_path"] = tempfile.mkdtemp() + with open(downloads[url]["download_path"], "rb") as mar_fh: + log.info( + "Unpacking %s into %s", + downloads[url]["download_path"], + downloads[url]["extracted_path"], + ) + m = MarReader(mar_fh) + m.extract(downloads[url]["extracted_path"]) + + return downloads + + +async def run_command(cmd, cwd="/", env=None, label=None, silent=False): + log.info("Running: %s", cmd) + if not env: + env = dict() + process = await asyncio.create_subprocess_shell( + cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=cwd, + env=env, + ) + if label: + label = "{}: ".format(label) + else: + label = "" + + async def read_output(stream, label, printcmd): + while True: + line = await stream.readline() + if line == b"": + break + printcmd("%s%s", label, line.decode("utf-8").rstrip()) + + if silent: + await process.wait() + else: + await asyncio.gather( + read_output(process.stdout, label, log.info), + read_output(process.stderr, label, log.warning), + ) + await process.wait() + + +async def generate_partial(from_dir, to_dir, dest_mar, mar_data, tools_dir, arch): + log.info("Generating partial %s", dest_mar) + env = os.environ.copy() + env["LC_ALL"] = "C" + env["MAR"] = tools_dir / "mar" + env["MBSDIFF"] = tools_dir / "mbsdiff" + if arch: + env["BCJ_OPTIONS"] = " ".join(BCJ_OPTIONS[arch]) + env["MOZ_PRODUCT_VERSION"] = mar_data["version"] + env["MAR_CHANNEL_ID"] = mar_data["MAR_CHANNEL_ID"] + env["BRANCH"] = mar_data["branch"] + + make_incremental_update = tools_dir / "make_incremental_update.sh" + cmd = f"{make_incremental_update} {dest_mar} {from_dir} {to_dir}" + + await run_command(cmd, cwd=dest_mar.parent, env=env, label=dest_mar.name) + validate_mar_channel_id(dest_mar, mar_data["MAR_CHANNEL_ID"]) + + +async def manage_partial( + partial_def, artifacts_dir, tools_dir, downloads, semaphore, arch=None +): + from_url = partial_def["from_mar"] + to_url = partial_def["to_mar"] + from_path = downloads[from_url]["extracted_path"] + to_path = downloads[to_url]["extracted_path"] + + mar_data = { + "MAR_CHANNEL_ID": os.environ["MAR_CHANNEL_ID"], + "version": get_option( + to_path, filename="application.ini", section="App", option="Version" + ), + "appName": get_option( + from_path, filename="application.ini", section="App", option="Name" + ), + # Use Gecko repo and rev from platform.ini, not application.ini + "repo": get_option( + to_path, filename="platform.ini", section="Build", option="SourceRepository" + ), + "revision": get_option( + to_path, filename="platform.ini", section="Build", option="SourceStamp" + ), + "locale": partial_def["locale"], + "from_mar": partial_def["from_mar"], + "from_size": os.path.getsize(downloads[from_url]["download_path"]), + "from_hash": get_hash(downloads[from_url]["download_path"], hash_alg="sha512"), + "from_buildid": get_option( + from_path, filename="application.ini", section="App", option="BuildID" + ), + "to_mar": partial_def["to_mar"], + "to_size": os.path.getsize(downloads[to_url]["download_path"]), + "to_hash": get_hash(downloads[to_url]["download_path"], hash_alg="sha512"), + "to_buildid": get_option( + to_path, filename="application.ini", section="App", option="BuildID" + ), + "mar": partial_def["dest_mar"], + } + # if branch not set explicitly use repo-name + mar_data["branch"] = partial_def.get("branch", Path(mar_data["repo"]).name) + + for field in ( + "update_number", + "previousVersion", + "previousBuildNumber", + "toVersion", + "toBuildNumber", + ): + if field in partial_def: + mar_data[field] = partial_def[field] + + dest_mar = Path(artifacts_dir) / mar_data["mar"] + + async with semaphore: + await generate_partial(from_path, to_path, dest_mar, mar_data, tools_dir, arch) + + mar_data["size"] = os.path.getsize(dest_mar) + mar_data["hash"] = get_hash(dest_mar, hash_alg="sha512") + return mar_data + + +async def async_main(args, signing_cert): + tasks = [] + + allowed_url_prefixes = list(ALLOWED_URL_PREFIXES) + if args.allow_staging_prefixes: + allowed_url_prefixes += STAGING_URL_PREFIXES + + task = json.load(args.task_definition) + + downloads = await download_and_verify_mars( + task["extra"]["funsize"]["partials"], allowed_url_prefixes, signing_cert + ) + + tools_dir = Path(tempfile.mkdtemp()) + await download_buildsystem_bits( + partials_config=task["extra"]["funsize"]["partials"], + downloads=downloads, + tools_dir=tools_dir, + ) + + # May want to consider os.cpu_count() if we ever run on osx/win. + # sched_getaffinity is the list of cores we can run on, not the total. + semaphore = asyncio.Semaphore(len(os.sched_getaffinity(0))) + for definition in task["extra"]["funsize"]["partials"]: + tasks.append( + asyncio.ensure_future( + retry_async( + manage_partial, + retry_exceptions=(aiohttp.ClientError, asyncio.TimeoutError), + kwargs=dict( + partial_def=definition, + artifacts_dir=args.artifacts_dir, + tools_dir=tools_dir, + arch=args.arch, + downloads=downloads, + semaphore=semaphore, + ), + ) + ) + ) + manifest = await asyncio.gather(*tasks) + + for url in downloads: + downloads[url]["download_path"].unlink() + shutil.rmtree(downloads[url]["extracted_path"]) + shutil.rmtree(tools_dir) + + return manifest + + +def main(): + args = process_arguments() + + logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") + log.setLevel(args.log_level) + + signing_cert = args.signing_cert.read() + assert get_keysize(signing_cert) == 4096 + + artifacts_dir = Path(args.artifacts_dir) + if not artifacts_dir.exists(): + artifacts_dir.mkdir() + + loop = asyncio.get_event_loop() + manifest = loop.run_until_complete(async_main(args, signing_cert)) + loop.close() + + manifest_file = artifacts_dir / "manifest.json" + with open(manifest_file, "w") as fp: + json.dump(manifest, fp, indent=2, sort_keys=True) + + log.debug("{}".format(json.dumps(manifest, indent=2, sort_keys=True))) + + +if __name__ == "__main__": + main() diff --git a/taskcluster/docker/funsize-update-generator/scripts/mbsdiff_hook.sh b/taskcluster/docker/funsize-update-generator/scripts/mbsdiff_hook.sh new file mode 100755 index 0000000000..965d938247 --- /dev/null +++ b/taskcluster/docker/funsize-update-generator/scripts/mbsdiff_hook.sh @@ -0,0 +1,157 @@ +#!/bin/bash +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# +# This tool contains functions that are to be used to handle/enable funsize +# Author: Mihai Tabara +# + +HOOK= +AWS_BUCKET_NAME= +LOCAL_CACHE_DIR= + +# Don't cache files smaller than this, as it's slower with S3 +# Bug 1437473 +CACHE_THRESHOLD=500000 + +S3_CACHE_HITS=0 +S3_CACHE_MISSES=0 + +getsha512(){ + openssl sha512 "${1}" | awk '{print $2}' +} + +print_usage(){ + echo "$(basename "$0") [-S S3-BUCKET-NAME] [-c LOCAL-CACHE-DIR-PATH] [-g] [-u] PATH-FROM-URL PATH-TO-URL PATH-PATCH" + echo "Script that saves/retrieves from cache presumptive patches as args" + echo "" + echo "-A SERVER-URL - host where to send the files" + echo "-c LOCAL-CACHE-DIR-PATH local path to which patches are cached" + echo "-g pre hook - tests whether patch already in cache" + echo "-u post hook - upload patch to cache for future use" + echo "" + echo "PATH-FROM-URL : path on disk for source file" + echo "PATH-TO-URL : path on disk for destination file" + echo "PATH-PATCH : path on disk for patch between source and destination" +} + +upload_patch(){ + if [ "$(stat -c "%s" "$2")" -lt ${CACHE_THRESHOLD} ] + then + return 0 + fi + sha_from=$(getsha512 "$1") + sha_to=$(getsha512 "$2") + patch_path="$3" + patch_filename="$(basename "$3")" + + # save to local cache first + if [ -n "$LOCAL_CACHE_DIR" ]; then + local_cmd="mkdir -p "$LOCAL_CACHE_DIR/$sha_from"" + if $local_cmd >&2; then + cp -avf "${patch_path}" "$LOCAL_CACHE_DIR/$sha_from/$sha_to" + echo "${patch_path} saved on local cache." + fi + fi + + if [ -n "${AWS_BUCKET_NAME}" ]; then + BUCKET_PATH="s3://${AWS_BUCKET_NAME}${sha_from}/${sha_to}/${patch_filename}" + if aws s3 cp "${patch_path}" "${BUCKET_PATH}"; then + echo "${patch_path} saved on s://${AWS_BUCKET_NAME}" + return 0 + fi + echo "${patch_path} failed to be uploaded to s3://${AWS_BUCKET_NAME}" + return 1 + fi + return 0 +} + +get_patch(){ + # $1 and $2 are the /path/to/filename + if [ "$(stat -c "%s" "$2")" -lt ${CACHE_THRESHOLD} ] + then + return 1 + fi + sha_from=$(getsha512 "$1") + sha_to=$(getsha512 "$2") + destination_file="$3" + s3_filename="$(basename "$3")" + + # Try to retrieve from local cache first. + if [ -n "$LOCAL_CACHE_DIR" ]; then + if [ -r "$LOCAL_CACHE_DIR/$sha_from/$sha_to" ]; then + cp -avf "$LOCAL_CACHE_DIR/$sha_from/$sha_to" "$destination_file" + echo "Successful retrieved ${destination_file} from local cache." + return 0 + fi + fi + # If not in the local cache, we might find it remotely. + + if [ -n "${AWS_BUCKET_NAME}" ]; then + BUCKET_PATH="s3://${AWS_BUCKET_NAME}${sha_from}/${sha_to}/${s3_filename}" + if aws s3 ls "${BUCKET_PATH}"; then + ((S3_CACHE_HITS++)) + echo "s3 cache hit for ${s3_filename} (${S3_CACHE_HITS} total hits)" + if aws s3 cp "${BUCKET_PATH}" "${destination_file}"; then + echo "Successful retrieved ${destination_file} from s3://${AWS_BUCKET_NAME}" + return 0 + else + echo "Failed to retrieve ${destination_file} from s3://${AWS_BUCKET_NAME}" + return 1 + fi + # Not found, fall through to default error + else + ((S3_CACHE_MISSES++)) + echo "s3 cache miss for ${s3_filename} (${S3_CACHE_MISSES} total misses)" + fi + fi + return 1 +} + +OPTIND=1 + +while getopts ":S:c:gu" option; do + case $option in + S) + # This will probably be bucketname/path/prefix but we can use it either way + AWS_BUCKET_NAME="$OPTARG" + # Ensure trailing slash is there. + if [[ ! $AWS_BUCKET_NAME =~ .*/$ ]]; then + AWS_BUCKET_NAME="${AWS_BUCKET_NAME}/" + fi + ;; + c) + LOCAL_CACHE_DIR="$OPTARG" + ;; + g) + HOOK="PRE" + ;; + u) + HOOK="POST" + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + print_usage + exit 1 + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + print_usage + exit 1 + ;; + *) + echo "Unimplemented option: -$OPTARG" >&2 + print_usage + exit 1 + ;; + esac +done +shift $((OPTIND-1)) + +if [ "$HOOK" == "PRE" ]; then + get_patch "$1" "$2" "$3" +elif [ "$HOOK" == "POST" ]; then + upload_patch "$1" "$2" "$3" +fi |