# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. import os import pathlib import shutil import socket import subprocess import tempfile import threading import traceback from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer import mozfile from logger.logger import RaptorLogger LOG = RaptorLogger(component="raptor-benchmark") here = pathlib.Path(__file__).parent.resolve() class Benchmark(object): """utility class for running benchmarks in raptor""" def __init__(self, config, test, debug_mode=False): self.config = config self.test = test self.debug_mode = debug_mode self.httpd = None self.server_thread = None # Note that we can only change the repository, revision, and branch through here. # The path to the test should remain constant. If it needs to be changed, make a # patch that changes it for the new test. if self.config.get("benchmark_repository", None): self.test["repository"] = self.config["benchmark_repository"] self.test["repository_revision"] = self.config["benchmark_revision"] if self.config.get("benchmark_branch", None): self.test["branch"] = self.config["benchmark_branch"] self.setup_benchmarks( os.getenv("MOZ_DEVELOPER_REPO_DIR"), os.getenv("MOZ_MOZBUILD_DIR"), run_local=self.config.get("run_local", False), ) LOG.info(f"bench_dir: {self.bench_dir}") LOG.info("bench_dir contains:") LOG.info(list(self.bench_dir.iterdir())) # now have the benchmark source ready, go ahead and serve it up! self.start_http_server() def start_http_server(self): # pick a free port sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(("", 0)) self.host = self.config["host"] self.port = sock.getsockname()[1] sock.close() _webserver = "%s:%d" % (self.host, self.port) self.httpd = self.setup_webserver(_webserver) self.server_thread = threading.Thread(target=self.httpd.serve_forever) self.server_thread.start() def setup_webserver(self, webserver): LOG.info("starting webserver on %r" % webserver) LOG.info("serving benchmarks from here: %s" % self.bench_dir) self.host, self.port = webserver.split(":") class CustomHandler(SimpleHTTPRequestHandler): doc_root = self.bench_dir verbose = self.debug_mode or self.config.get("verbose", False) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs, directory=CustomHandler.doc_root) def log_message(self, *args): if CustomHandler.verbose: super(CustomHandler, self).log_message(*args) def end_headers(self): self.send_header("Access-Control-Allow-Origin", "*") SimpleHTTPRequestHandler.end_headers(self) return ThreadingHTTPServer((self.host, int(self.port)), CustomHandler) def stop_http_server(self): try: if self.httpd: self.httpd.shutdown() except Exception: LOG.warning(f"Failed to stop benchmark server: {traceback.format_exc()}") try: if self.server_thread: self.server_thread.join(5) except Exception: LOG.warning(f"Failed to stop benchmark server: {traceback.format_exc()}") def _full_clone(self, benchmark_repository, dest): subprocess.check_call( [ "git", "clone", "-c", "http.postBuffer=2147483648", "-c", "core.autocrlf=false", benchmark_repository, str(dest.resolve()), ] ) def _get_benchmark_folder(self, benchmark_dest, run_local): if not run_local: # If the test didn't specify a repo and we're in CI # then we'll find them here and we don't need to do anything else return pathlib.Path(benchmark_dest, "tests", "webkit", "PerformanceTests") return pathlib.Path(benchmark_dest, "testing", "raptor", "benchmarks") def _sparse_clone(self, benchmark_repository, dest): """Get a partial clone of the repo. This need git version 2.30+ so it's currently unused but it works. See bug 1804694. This method should only be used in CI, locally we can simply pull the whole repo. """ subprocess.check_call( [ "git", "clone", "--depth", "1", "--filter", "blob:none", "--sparse", benchmark_repository, str(dest.resolve()), ] ) subprocess.check_call( [ "git", "sparse-checkout", "set", self.test.get("repository_path", "benchmarks"), ], cwd=dest, ) def _copy_or_link_files( self, benchmark_path, benchmark_dest, skip_files_and_hidden=True, host_from_parent=True, ): if not benchmark_dest.exists(): benchmark_dest.mkdir(parents=True, exist_ok=True) dest = pathlib.Path(benchmark_dest, benchmark_path.name) if hasattr(os, "symlink") and os.name != "nt": if not dest.exists(): os.symlink(benchmark_path, dest) else: # Clobber the benchmark in case a recent update removed any files. mozfile.remove(str(dest.resolve())) shutil.copytree(benchmark_path, dest) if host_from_parent and any( path.is_file() for path in benchmark_path.iterdir() ): # Host the parent of this directory to prevent hosting issues # (e.g. linked files ending up with different routes) host_folder = dest.parent self.test["test_url"] = self.test["test_url"].replace( "/", f"/{benchmark_path.name}/" ) dest = host_folder return dest def _verify_benchmark_revision(self, benchmark_revision, external_repo_path): try: # Check if the given revision is valid subprocess.check_call( ["git", "rev-parse", "--verify", f"{benchmark_revision}^{{commit}}"], cwd=external_repo_path, ) LOG.info("Given benchmark repository revision verified") except Exception: LOG.error( f"Given revision doesn't exist in this repository: {benchmark_revision}" ) raise def _update_benchmark_repo(self, external_repo_path): default_branch = self.test.get("repository_branch", None) if default_branch is None: try: # Get the default branch name, and check it if's been updated default_branch = ( subprocess.check_output( ["git", "rev-parse", "--abbrev-ref", "origin/HEAD"], cwd=external_repo_path, ) .decode("utf-8") .strip() .split("/")[-1] ) remote_default_branch = ( subprocess.check_output( ["git", "remote", "set-head", "origin", "-a"], cwd=external_repo_path, ) .decode("utf-8") .strip() ) if default_branch not in remote_default_branch: default_branch = remote_default_branch.split()[-1] except Exception: LOG.critical("Failed to find the default branch of the repository!") raise else: LOG.info(f"Using non-default branch {default_branch}") try: subprocess.check_call(["git", "pull", "--all"], cwd=external_repo_path) except subprocess.CalledProcessError: LOG.info("Failed to pull new branches from remote") LOG.info(external_repo_path) subprocess.check_call( ["git", "checkout", default_branch], cwd=external_repo_path ) subprocess.check_call(["git", "pull"], cwd=external_repo_path) def _setup_git_benchmarks(self, mozbuild_path, benchmark_dest, run_local=True): """Setup a benchmark from a github repository.""" benchmark_repository = self.test["repository"] benchmark_revision = self.test["repository_revision"] # Specifies where we can find the benchmark within the cloned repo, this is the # folder that will be hosted to run the test. If it isn't given, we'll host the # root of the repository. benchmark_repo_path = self.test.get("repository_path", "") # Get the performance-tests cache (if it exists), otherwise create a temp folder if mozbuild_path is None: mozbuild_path = tempfile.mkdtemp() external_repo_path = pathlib.Path( mozbuild_path, "performance-tests", benchmark_repository.split("/")[-1] ) try: subprocess.check_output(["git", "--version"]) except Exception as ex: LOG.info( "Git is not available! Please install git and " "ensure it is included in the terminal path" ) raise ex if not external_repo_path.is_dir(): LOG.info("Cloning the benchmarks to {}".format(external_repo_path)) # Bug 1804694 - Use sparse checkouts instead of full clones # Locally, we should always do a full clone self._full_clone(benchmark_repository, external_repo_path) else: # Make sure that the repo origin wasn't changed url = ( subprocess.check_output( ["git", "config", "--get", "remote.origin.url"], cwd=external_repo_path, ) .decode("utf-8") .strip() ) if url != benchmark_repository: LOG.info( "Removing repo with a different remote origin before installing new one" ) mozfile.remove(external_repo_path) self._full_clone(benchmark_repository, external_repo_path) else: self._update_benchmark_repo(external_repo_path) self._verify_benchmark_revision(benchmark_revision, external_repo_path) subprocess.check_call( ["git", "checkout", benchmark_revision], cwd=external_repo_path ) benchmark_dest = pathlib.Path( self._get_benchmark_folder(benchmark_dest, run_local), self.test["name"] ) benchmark_dest = self._copy_or_link_files( pathlib.Path(external_repo_path, benchmark_repo_path), benchmark_dest, skip_files_and_hidden=False, host_from_parent=self.test.get("host_from_parent", True), ) return benchmark_dest def _setup_in_tree_benchmarks(self, topsrc_path, benchmark_dest, run_local=True): """Setup a benchmakr that is found in-tree. This method will be deprecated once bug 1804578 is resolved (copying our in-tree benchmarks into a repo) to have a standard way of running benchmarks. """ benchmark_dest = self._get_benchmark_folder(benchmark_dest, run_local) if not run_local: # If the test didn't specify a repo and we're in CI # then we'll find them here and we don't need to do anything else return benchmark_dest benchmark_dest = self._copy_or_link_files( pathlib.Path(topsrc_path, "third_party", "webkit", "PerformanceTests"), benchmark_dest, ) return benchmark_dest def setup_benchmarks( self, topsrc_path, mozbuild_path, run_local=True, ): """Make sure benchmarks are linked to the proper location in the objdir. Benchmarks can either live in-tree or in an external repository. In the latter case also clone/update the repository if necessary. """ # bench_dir is where we will download all mitmproxy required files # when running locally it comes from obj_path via mozharness/mach if self.config.get("obj_path", None) is not None: bench_dir = pathlib.Path(self.config.get("obj_path")) else: # in production it is ../tasks/task_N/build/tests/raptor/raptor/... # 'here' is that path, we can start with that bench_dir = pathlib.Path(here) if self.test.get("repository", None) is not None: # Setup benchmarks that are found on Github bench_dir = self._setup_git_benchmarks( mozbuild_path, bench_dir, run_local=run_local ) else: # Setup the benchmarks that are available in-tree bench_dir = self._setup_in_tree_benchmarks( topsrc_path, bench_dir, run_local=run_local ) self.bench_dir = bench_dir