diff options
Diffstat (limited to 'tools/update-verify/scripts')
-rw-r--r-- | tools/update-verify/scripts/async_download.py | 362 | ||||
-rw-r--r-- | tools/update-verify/scripts/chunked-verify.py | 68 | ||||
-rwxr-xr-x | tools/update-verify/scripts/chunked-verify.sh | 72 |
3 files changed, 502 insertions, 0 deletions
diff --git a/tools/update-verify/scripts/async_download.py b/tools/update-verify/scripts/async_download.py new file mode 100644 index 0000000000..efedc8295f --- /dev/null +++ b/tools/update-verify/scripts/async_download.py @@ -0,0 +1,362 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +import asyncio +import glob +import logging +import os +import sys +import xml.etree.ElementTree as ET +from os import path + +import aiohttp + +logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") +log = logging.getLogger(__name__) + +UV_CACHE_PATH = os.getenv( + "UV_CACHE_PATH", os.path.join(path.dirname(__file__), "../release/updates/cache/") +) +UV_PARALLEL_DOWNLOADS = os.getenv("UV_PARALLEL_DOWNLOADS", 20) + +FTP_SERVER_TO = os.getenv("ftp_server_to", "http://stage.mozilla.org/pub/mozilla.org") +FTP_SERVER_FROM = os.getenv( + "ftp_server_from", "http://stage.mozilla.org/pub/mozilla.org" +) +AUS_SERVER = os.getenv("aus_server", "https://aus5.mozilla.org") + + +def create_cache(): + if not os.path.isdir(UV_CACHE_PATH): + os.mkdir(UV_CACHE_PATH) + + +def remove_cache(): + """ + Removes all files in the cache folder + We don't support folders or .dot(hidden) files + By not deleting the cache directory, it allows us to use Docker tmpfs mounts, + which are the only workaround to poor mount r/w performance on MacOS + Bug Reference: + https://forums.docker.com/t/file-access-in-mounted-volumes-extremely-slow-cpu-bound/8076/288 + """ + files = glob.glob(f"{UV_CACHE_PATH}/*") + for f in files: + os.remove(f) + + +def _cachepath(i, ext): + # Helper function: given an index, return a cache file path + return path.join(UV_CACHE_PATH, f"obj_{i:0>5}.{ext}") + + +async def fetch_url(url, path, connector): + """ + Fetch/download a file to a specific path + + Parameters + ---------- + url : str + URL to be fetched + + path : str + Path to save binary + + Returns + ------- + dict + Request result. If error result['error'] is True + """ + + def _result(response, error=False): + data = { + "headers": dict(response.headers), + "status": response.status, + "reason": response.reason, + "_request_info": str(response._request_info), + "url": url, + "path": path, + "error": error, + } + return data + + # Set connection timeout to 15 minutes + timeout = aiohttp.ClientTimeout(total=900) + + try: + async with aiohttp.ClientSession( + connector=connector, connector_owner=False, timeout=timeout + ) as session: + log.info(f"Retrieving {url}") + async with session.get( + url, headers={"Cache-Control": "max-stale=0"} + ) as response: + # Any response code > 299 means something went wrong + if response.status > 299: + log.info(f"Failed to download {url} with status {response.status}") + return _result(response, True) + + with open(path, "wb") as fd: + while True: + chunk = await response.content.read() + if not chunk: + break + fd.write(chunk) + result = _result(response) + log.info(f'Finished downloading {url}\n{result["headers"]}') + return result + + except ( + UnicodeDecodeError, # Data parsing + asyncio.TimeoutError, # Async timeout + aiohttp.ClientError, # aiohttp error + ) as e: + log.error("=============") + log.error(f"Error downloading {url}") + log.error(e) + log.error("=============") + return {"path": path, "url": url, "error": True} + + +async def download_multi(targets, sourceFunc): + """ + Download list of targets + + Parameters + ---------- + targets : list + List of urls to download + + sourceFunc : str + Source function name (for filename) + + Returns + ------- + tuple + List of responses (Headers) + """ + + targets = set(targets) + amount = len(targets) + + connector = aiohttp.TCPConnector( + limit=UV_PARALLEL_DOWNLOADS, # Simultaneous connections, per host + ttl_dns_cache=600, # Cache DNS for 10 mins + ) + + log.info(f"\nDownloading {amount} files ({UV_PARALLEL_DOWNLOADS} async limit)") + + # Transform targets into {url, path} objects + payloads = [ + {"url": url, "path": _cachepath(i, sourceFunc)} + for (i, url) in enumerate(targets) + ] + + downloads = [] + + fetches = [fetch_url(t["url"], t["path"], connector) for t in payloads] + + downloads.extend(await asyncio.gather(*fetches)) + connector.close() + + results = [] + # Remove file if download failed + for fetch in downloads: + # If there's an error, try to remove the file, but keep going if file not present + if fetch["error"]: + try: + os.unlink(fetch.get("path", None)) + except (TypeError, FileNotFoundError) as e: + log.info(f"Unable to cleanup error file: {e} continuing...") + continue + + results.append(fetch) + + return results + + +async def download_builds(verifyConfig): + """ + Given UpdateVerifyConfig, download and cache all necessary updater files + Include "to" and "from"/"updater_pacakge" + + Parameters + ---------- + verifyConfig : UpdateVerifyConfig + Chunked config + + Returns + ------- + list : List of file paths and urls to each updater file + """ + + updaterUrls = set() + for release in verifyConfig.releases: + ftpServerFrom = release["ftp_server_from"] + ftpServerTo = release["ftp_server_to"] + + for locale in release["locales"]: + toUri = verifyConfig.to + if toUri is not None and ftpServerTo is not None: + toUri = toUri.replace("%locale%", locale) + updaterUrls.add(f"{ftpServerTo}{toUri}") + + for reference in ("updater_package", "from"): + uri = release.get(reference, None) + if uri is None: + continue + uri = uri.replace("%locale%", locale) + # /ja-JP-mac/ locale is replaced with /ja/ for updater packages + uri = uri.replace("ja-JP-mac", "ja") + updaterUrls.add(f"{ftpServerFrom}{uri}") + + log.info(f"About to download {len(updaterUrls)} updater packages") + + updaterResults = await download_multi(list(updaterUrls), "updater.async.cache") + return updaterResults + + +def get_mar_urls_from_update(path): + """ + Given an update.xml file, return MAR URLs + + If update.xml doesn't have URLs, returns empty list + + Parameters + ---------- + path : str + Path to update.xml file + + Returns + ------- + list : List of URLs + """ + + result = [] + root = ET.parse(path).getroot() + for patch in root.findall("update/patch"): + url = patch.get("URL") + if url: + result.append(url) + return result + + +async def download_mars(updatePaths): + """ + Given list of update.xml paths, download MARs for each + + Parameters + ---------- + update_paths : list + List of paths to update.xml files + """ + + patchUrls = set() + for updatePath in updatePaths: + for url in get_mar_urls_from_update(updatePath): + patchUrls.add(url) + + log.info(f"About to download {len(patchUrls)} MAR packages") + marResults = await download_multi(list(patchUrls), "mar.async.cache") + return marResults + + +async def download_update_xml(verifyConfig): + """ + Given UpdateVerifyConfig, download and cache all necessary update.xml files + + Parameters + ---------- + verifyConfig : UpdateVerifyConfig + Chunked config + + Returns + ------- + list : List of file paths and urls to each update.xml file + """ + + xmlUrls = set() + product = verifyConfig.product + urlTemplate = ( + "{server}/update/3/{product}/{release}/{build}/{platform}/" + "{locale}/{channel}/default/default/default/update.xml?force=1" + ) + + for release in verifyConfig.releases: + for locale in release["locales"]: + xmlUrls.add( + urlTemplate.format( + server=AUS_SERVER, + product=product, + release=release["release"], + build=release["build_id"], + platform=release["platform"], + locale=locale, + channel=verifyConfig.channel, + ) + ) + + log.info(f"About to download {len(xmlUrls)} update.xml files") + xmlResults = await download_multi(list(xmlUrls), "update.xml.async.cache") + return xmlResults + + +async def _download_from_config(verifyConfig): + """ + Given an UpdateVerifyConfig object, download all necessary files to cache + + Parameters + ---------- + verifyConfig : UpdateVerifyConfig + The config - already chunked + """ + remove_cache() + create_cache() + + downloadList = [] + ################## + # Download files # + ################## + xmlFiles = await download_update_xml(verifyConfig) + downloadList.extend(xmlFiles) + downloadList += await download_mars(x["path"] for x in xmlFiles) + downloadList += await download_builds(verifyConfig) + + ##################### + # Create cache.list # + ##################### + cacheLinks = [] + + # Rename files and add to cache_links + for download in downloadList: + cacheLinks.append(download["url"]) + fileIndex = len(cacheLinks) + os.rename(download["path"], _cachepath(fileIndex, "cache")) + + cacheIndexPath = path.join(UV_CACHE_PATH, "urls.list") + with open(cacheIndexPath, "w") as cache: + cache.writelines(f"{l}\n" for l in cacheLinks) + + # Log cache + log.info("Cache index urls.list contents:") + with open(cacheIndexPath, "r") as cache: + for ln, url in enumerate(cache.readlines()): + line = url.replace("\n", "") + log.info(f"Line {ln+1}: {line}") + + return None + + +def download_from_config(verifyConfig): + """ + Given an UpdateVerifyConfig object, download all necessary files to cache + (sync function that calls the async one) + + Parameters + ---------- + verifyConfig : UpdateVerifyConfig + The config - already chunked + """ + return asyncio.run(_download_from_config(verifyConfig)) diff --git a/tools/update-verify/scripts/chunked-verify.py b/tools/update-verify/scripts/chunked-verify.py new file mode 100644 index 0000000000..8c4320d4cc --- /dev/null +++ b/tools/update-verify/scripts/chunked-verify.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +import logging +import os +import sys +from os import path +from tempfile import mkstemp + +sys.path.append(path.join(path.dirname(__file__), "../python")) +logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") +log = logging.getLogger(__name__) + +from async_download import download_from_config +from mozrelease.update_verify import UpdateVerifyConfig +from util.commands import run_cmd + +UPDATE_VERIFY_COMMAND = ["bash", "verify.sh", "-c"] +UPDATE_VERIFY_DIR = path.join(path.dirname(__file__), "../release/updates") + + +if __name__ == "__main__": + from argparse import ArgumentParser + + parser = ArgumentParser("") + + parser.set_defaults( + chunks=None, + thisChunk=None, + ) + parser.add_argument("--verify-config", required=True, dest="verifyConfig") + parser.add_argument("--verify-channel", required=True, dest="verify_channel") + parser.add_argument("--chunks", required=True, dest="chunks", type=int) + parser.add_argument("--this-chunk", required=True, dest="thisChunk", type=int) + parser.add_argument("--diff-summary", required=True, type=str) + + options = parser.parse_args() + assert options.chunks and options.thisChunk, "chunks and this-chunk are required" + assert path.isfile(options.verifyConfig), "Update verify config must exist!" + verifyConfigFile = options.verifyConfig + + fd, configFile = mkstemp() + # Needs to be opened in "bytes" mode because we perform relative seeks on it + fh = os.fdopen(fd, "wb") + try: + verifyConfig = UpdateVerifyConfig() + verifyConfig.read(path.join(UPDATE_VERIFY_DIR, verifyConfigFile)) + myVerifyConfig = verifyConfig.getChunk(options.chunks, options.thisChunk) + # override the channel if explicitly set + if options.verify_channel: + myVerifyConfig.channel = options.verify_channel + myVerifyConfig.write(fh) + fh.close() + run_cmd(["cat", configFile]) + + # Before verifying, we want to download and cache all required files + download_from_config(myVerifyConfig) + + run_cmd( + UPDATE_VERIFY_COMMAND + [configFile], + cwd=UPDATE_VERIFY_DIR, + env={"DIFF_SUMMARY_LOG": path.abspath(options.diff_summary)}, + ) + finally: + if path.exists(configFile): + os.unlink(configFile) diff --git a/tools/update-verify/scripts/chunked-verify.sh b/tools/update-verify/scripts/chunked-verify.sh new file mode 100755 index 0000000000..ad6af19080 --- /dev/null +++ b/tools/update-verify/scripts/chunked-verify.sh @@ -0,0 +1,72 @@ +#!/bin/bash +set -ex +set -o pipefail +# This ugly hack is a cross-platform (Linux/Mac/Windows+MSYS) way to get the +# absolute path to the directory containing this script +pushd `dirname $0` &>/dev/null +MY_DIR=$(pwd) +popd &>/dev/null +SCRIPTS_DIR="$MY_DIR/.." +PYTHON='./mach python' +VERIFY_CONFIG="$MOZ_FETCHES_DIR/update-verify.cfg" + +while [ "$#" -gt 0 ]; do + case $1 in + # Parse total-chunks + --total-chunks=*) chunks="${1#*=}"; shift 1;; + --total-chunks) chunks="${2}"; shift 2;; + + # Parse this-chunk + --this-chunk=*) thisChunk="${1#*=}"; shift 1;; + --this-chunk) thisChunk="${2}"; shift 2;; + + # Stop if other parameters are sent + *) echo "Unknown parameter: ${1}"; exit 1;; + esac +done + +# Validate parameters +if [ -z "${chunks}" ]; then echo "Required parameter: --total-chunks"; exit 1; fi +if [ -z "${thisChunk}" ]; then echo "Required parameter: --this-chunk"; exit 1; fi + +# release promotion +if [ -n "$CHANNEL" ]; then + EXTRA_PARAMS="--verify-channel $CHANNEL" +else + EXTRA_PARAMS="" +fi +$PYTHON $MY_DIR/chunked-verify.py --chunks $chunks --this-chunk $thisChunk \ +--verify-config $VERIFY_CONFIG --diff-summary $PWD/diff-summary.log $EXTRA_PARAMS \ +2>&1 | tee $SCRIPTS_DIR/../verify_log.txt + +print_failed_msg() +{ + echo "-------------------------" + echo "This run has failed, see the above log" + echo + return 1 +} + +print_warning_msg() +{ + echo "-------------------------" + echo "This run has warnings, see the above log" + echo + return 2 +} + +set +x + +echo "Scanning log for failures and warnings" +echo "--------------------------------------" + +# Test for a failure, note we are set -e. +# Grep returns 0 on a match and 1 on no match +# Testing for failures first is important because it's OK to to mark as failed +# when there's failures+warnings, but not OK to mark as warnings in the same +# situation. +( ! grep 'TEST-UNEXPECTED-FAIL:' $SCRIPTS_DIR/../verify_log.txt ) || print_failed_msg +( ! grep 'WARN:' $SCRIPTS_DIR/../verify_log.txt ) || print_warning_msg + +echo "-------------------------" +echo "All is well" |