diff options
Diffstat (limited to 'tools/tryselect/util')
-rw-r--r-- | tools/tryselect/util/__init__.py | 3 | ||||
-rw-r--r-- | tools/tryselect/util/dicttools.py | 50 | ||||
-rw-r--r-- | tools/tryselect/util/estimates.py | 126 | ||||
-rw-r--r-- | tools/tryselect/util/fzf.py | 423 | ||||
-rw-r--r-- | tools/tryselect/util/manage_estimates.py | 132 |
5 files changed, 734 insertions, 0 deletions
diff --git a/tools/tryselect/util/__init__.py b/tools/tryselect/util/__init__.py new file mode 100644 index 0000000000..c580d191c1 --- /dev/null +++ b/tools/tryselect/util/__init__.py @@ -0,0 +1,3 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/tools/tryselect/util/dicttools.py b/tools/tryselect/util/dicttools.py new file mode 100644 index 0000000000..465e4a43de --- /dev/null +++ b/tools/tryselect/util/dicttools.py @@ -0,0 +1,50 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import copy + + +def merge_to(source, dest): + """ + Merge dict and arrays (override scalar values) + + Keys from source override keys from dest, and elements from lists in source + are appended to lists in dest. + + :param dict source: to copy from + :param dict dest: to copy to (modified in place) + """ + + for key, value in source.items(): + # Override mismatching or empty types + if type(value) != type(dest.get(key)): # noqa + dest[key] = source[key] + continue + + # Merge dict + if isinstance(value, dict): + merge_to(value, dest[key]) + continue + + if isinstance(value, list): + dest[key] = dest[key] + source[key] + continue + + dest[key] = source[key] + + return dest + + +def merge(*objects): + """ + Merge the given objects, using the semantics described for merge_to, with + objects later in the list taking precedence. From an inheritance + perspective, "parents" should be listed before "children". + + Returns the result without modifying any arguments. + """ + if len(objects) == 1: + return copy.deepcopy(objects[0]) + return merge_to(objects[-1], merge(*objects[:-1])) diff --git a/tools/tryselect/util/estimates.py b/tools/tryselect/util/estimates.py new file mode 100644 index 0000000000..634b1de2a2 --- /dev/null +++ b/tools/tryselect/util/estimates.py @@ -0,0 +1,126 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import json +import os +from datetime import datetime, timedelta + +TASK_DURATION_CACHE = "task_duration_history.json" +GRAPH_QUANTILE_CACHE = "graph_quantile_cache.csv" +TASK_DURATION_TAG_FILE = "task_duration_tag.json" + + +def find_all_dependencies(graph, tasklist): + all_dependencies = dict() + + def find_dependencies(task): + dependencies = set() + if task in all_dependencies: + return all_dependencies[task] + if task not in graph: + # Don't add tasks (and so durations) for + # things optimised out. + return dependencies + dependencies.add(task) + for dep in graph.get(task, list()): + all_dependencies[dep] = find_dependencies(dep) + dependencies.update(all_dependencies[dep]) + return dependencies + + full_deps = set() + for task in tasklist: + full_deps.update(find_dependencies(task)) + + # Since these have been asked for, they're not inherited dependencies. + return sorted(full_deps - set(tasklist)) + + +def find_longest_path(graph, tasklist, duration_data): + + dep_durations = dict() + + def find_dependency_durations(task): + if task in dep_durations: + return dep_durations[task] + + durations = [find_dependency_durations(dep) for dep in graph.get(task, list())] + durations.append(0.0) + md = max(durations) + duration_data.get(task, 0.0) + dep_durations[task] = md + return md + + longest_paths = [find_dependency_durations(task) for task in tasklist] + # Default in case there are no tasks + if longest_paths: + return max(longest_paths) + else: + return 0 + + +def determine_percentile(quantiles_file, duration): + + duration = duration.total_seconds() + + with open(quantiles_file) as f: + f.readline() # skip header + boundaries = [float(l.strip()) for l in f.readlines()] + + boundaries.sort() + for i, v in enumerate(boundaries): + if duration < v: + break + # Estimate percentile from len(boundaries)-quantile + return int(100 * i / len(boundaries)) + + +def task_duration_data(cache_dir): + with open(os.path.join(cache_dir, TASK_DURATION_CACHE)) as f: + return json.load(f) + + +def duration_summary(graph_cache_file, tasklist, cache_dir): + durations = task_duration_data(cache_dir) + + graph = dict() + if graph_cache_file: + with open(graph_cache_file) as f: + graph = json.load(f) + dependencies = find_all_dependencies(graph, tasklist) + longest_path = find_longest_path(graph, tasklist, durations) + dependency_duration = 0.0 + for task in dependencies: + dependency_duration += int(durations.get(task, 0.0)) + + total_requested_duration = 0.0 + for task in tasklist: + duration = int(durations.get(task, 0.0)) + total_requested_duration += duration + output = dict() + + total_requested_duration = timedelta(seconds=total_requested_duration) + total_dependency_duration = timedelta(seconds=dependency_duration) + + output["selected_duration"] = total_requested_duration + output["dependency_duration"] = total_dependency_duration + output["dependency_count"] = len(dependencies) + output["selected_count"] = len(tasklist) + + percentile = None + graph_quantile_cache = os.path.join(cache_dir, GRAPH_QUANTILE_CACHE) + if os.path.isfile(graph_quantile_cache): + percentile = determine_percentile( + graph_quantile_cache, total_dependency_duration + total_requested_duration + ) + if percentile: + output["percentile"] = percentile + + output["wall_duration_seconds"] = timedelta(seconds=int(longest_path)) + output["eta_datetime"] = datetime.now() + timedelta(seconds=longest_path) + + output["task_durations"] = { + task: int(durations.get(task, 0.0)) for task in tasklist + } + + return output diff --git a/tools/tryselect/util/fzf.py b/tools/tryselect/util/fzf.py new file mode 100644 index 0000000000..a111177812 --- /dev/null +++ b/tools/tryselect/util/fzf.py @@ -0,0 +1,423 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import os +import platform +import shutil +import subprocess +import sys +from distutils.spawn import find_executable + +import mozfile +import six +from gecko_taskgraph.target_tasks import filter_by_uncommon_try_tasks +from mach.util import get_state_dir +from mozboot.util import http_download_and_save +from mozbuild.base import MozbuildObject +from mozterm import Terminal +from packaging.version import Version + +from ..push import check_working_directory +from ..tasks import generate_tasks +from ..util.manage_estimates import ( + download_task_history_data, + make_trimmed_taskgraph_cache, +) + +terminal = Terminal() + +here = os.path.abspath(os.path.dirname(__file__)) +build = MozbuildObject.from_environment(cwd=here) + +PREVIEW_SCRIPT = os.path.join(build.topsrcdir, "tools/tryselect/selectors/preview.py") + +FZF_MIN_VERSION = "0.20.0" +FZF_CURRENT_VERSION = "0.29.0" + +# It would make more sense to have the full filename be the key; but that makes +# the line too long and ./mach lint and black can't agree about what to about that. +# You can get these from the github release, e.g. +# https://github.com/junegunn/fzf/releases/download/0.24.1/fzf_0.24.1_checksums.txt +# However the darwin releases may not be included, so double check you have everything +FZF_CHECKSUMS = { + "linux_armv5.tar.gz": "61d3c2aa77b977ba694836fd1134da9272bd97ee490ececaf87959b985820111", + "linux_armv6.tar.gz": "db6b30fcbbd99ac4cf7e3ff6c5db1d3c0afcbe37d10ec3961bdc43e8c4f2e4f9", + "linux_armv7.tar.gz": "ed86f0e91e41d2cea7960a78e3eb175dc2a5fc1510380c195d0c3559bfdc701c", + "linux_arm64.tar.gz": "47988d8b68905541cbc26587db3ed1cfa8bc3aa8da535120abb4229b988f259e", + "linux_amd64.tar.gz": "0106f458b933be65edb0e8f0edb9a16291a79167836fd26a77ff5496269b5e9a", + "windows_armv5.zip": "08eaac45b3600d82608d292c23e7312696e7e11b6278b292feba25e8eb91c712", + "windows_armv6.zip": "8b6618726a9d591a45120fddebc29f4164e01ce6639ed9aa8fc79ab03eefcfed", + "windows_armv7.zip": "c167117b4c08f4f098446291115871ce5f14a8a8b22f0ca70e1b4342452ab5d7", + "windows_arm64.zip": "0cda7bf68850a3e867224a05949612405e63a4421d52396c1a6c9427d4304d72", + "windows_amd64.zip": "f0797ceee089017108c80b09086c71b8eec43d4af11ce939b78b1d5cfd202540", + "darwin_arm64.zip": "2571b4d381f1fc691e7603bbc8113a67116da2404751ebb844818d512dd62b4b", + "darwin_amd64.zip": "bc541e8ae0feb94efa96424bfe0b944f746db04e22f5cccfe00709925839a57f", + "openbsd_amd64.tar.gz": "b62343827ff83949c09d5e2c8ca0c1198d05f733c9a779ec37edd840541ccdab", + "freebsd_amd64.tar.gz": "f0367f2321c070d103589c7c7eb6a771bc7520820337a6c2fbb75be37ff783a9", +} + +FZF_INSTALL_MANUALLY = """ +The `mach try fuzzy` command depends on fzf. Please install it following the +appropriate instructions for your platform: + + https://github.com/junegunn/fzf#installation + +Only the binary is required, if you do not wish to install the shell and +editor integrations, download the appropriate binary and put it on your $PATH: + + https://github.com/junegunn/fzf/releases +""".lstrip() + +FZF_COULD_NOT_DETERMINE_PLATFORM = ( + """ +Could not automatically obtain the `fzf` binary because we could not determine +your Operating System. + +""".lstrip() + + FZF_INSTALL_MANUALLY +) + +FZF_COULD_NOT_DETERMINE_MACHINE = ( + """ +Could not automatically obtain the `fzf` binary because we could not determine +your machine type. It's reported as '%s' and we don't handle that case; but fzf +may still be available as a prebuilt binary. + +""".lstrip() + + FZF_INSTALL_MANUALLY +) + +FZF_NOT_SUPPORTED_X86 = ( + """ +We don't believe that a prebuilt binary for `fzf` if available on %s, but we +could be wrong. + +""".lstrip() + + FZF_INSTALL_MANUALLY +) + +FZF_NOT_FOUND = ( + """ +Could not find the `fzf` binary. + +""".lstrip() + + FZF_INSTALL_MANUALLY +) + +FZF_VERSION_FAILED = ( + """ +Could not obtain the 'fzf' version; we require version > 0.20.0 for some of +the features. + +""".lstrip() + + FZF_INSTALL_MANUALLY +) + +FZF_INSTALL_FAILED = ( + """ +Failed to install fzf. + +""".lstrip() + + FZF_INSTALL_MANUALLY +) + +FZF_HEADER = """ +For more shortcuts, see {t.italic_white}mach help try fuzzy{t.normal} and {t.italic_white}man fzf +{shortcuts} +""".strip() + +fzf_shortcuts = { + "ctrl-a": "select-all", + "ctrl-d": "deselect-all", + "ctrl-t": "toggle-all", + "alt-bspace": "beginning-of-line+kill-line", + "?": "toggle-preview", +} + +fzf_header_shortcuts = [ + ("select", "tab"), + ("accept", "enter"), + ("cancel", "ctrl-c"), + ("select-all", "ctrl-a"), + ("cursor-up", "up"), + ("cursor-down", "down"), +] + + +def run_cmd(cmd, cwd=None): + is_win = platform.system() == "Windows" + return subprocess.call(cmd, cwd=cwd, shell=True if is_win else False) + + +def get_fzf_platform(): + if platform.machine() in ["i386", "i686"]: + print(FZF_NOT_SUPPORTED_X86 % platform.machine()) + sys.exit(1) + + if platform.system().lower() == "windows": + if platform.machine().lower() in ["x86_64", "amd64"]: + return "windows_amd64.zip" + elif platform.machine().lower() == "arm64": + return "windows_arm64.zip" + else: + print(FZF_COULD_NOT_DETERMINE_MACHINE % platform.machine()) + sys.exit(1) + elif platform.system().lower() == "darwin": + if platform.machine().lower() in ["x86_64", "amd64"]: + return "darwin_amd64.zip" + elif platform.machine().lower() == "arm64": + return "darwin_arm64.zip" + else: + print(FZF_COULD_NOT_DETERMINE_MACHINE % platform.machine()) + sys.exit(1) + elif platform.system().lower() == "linux": + if platform.machine().lower() in ["x86_64", "amd64"]: + return "linux_amd64.tar.gz" + elif platform.machine().lower() == "arm64": + return "linux_arm64.tar.gz" + else: + print(FZF_COULD_NOT_DETERMINE_MACHINE % platform.machine()) + sys.exit(1) + else: + print(FZF_COULD_NOT_DETERMINE_PLATFORM) + sys.exit(1) + + +def get_fzf_state_dir(): + return os.path.join(get_state_dir(), "fzf") + + +def get_fzf_filename(): + return "fzf-%s-%s" % (FZF_CURRENT_VERSION, get_fzf_platform()) + + +def get_fzf_download_link(): + return "https://github.com/junegunn/fzf/releases/download/%s/%s" % ( + FZF_CURRENT_VERSION, + get_fzf_filename(), + ) + + +def clean_up_state_dir(): + """ + We used to have a checkout of fzf that we would update. + Now we only download the bin and cpin the hash; so if + we find the old git checkout, wipe it + """ + + fzf_path = os.path.join(get_state_dir(), "fzf") + git_path = os.path.join(fzf_path, ".git") + if os.path.isdir(git_path): + shutil.rmtree(fzf_path, ignore_errors=True) + + # Also delete any existing fzf binary + fzf_bin = find_executable("fzf", fzf_path) + if fzf_bin: + mozfile.remove(fzf_bin) + + # Make sure the state dir is present + if not os.path.isdir(fzf_path): + os.makedirs(fzf_path) + + +def download_and_install_fzf(): + clean_up_state_dir() + download_link = get_fzf_download_link() + download_file = get_fzf_filename() + download_destination_path = get_fzf_state_dir() + download_destination_file = os.path.join(download_destination_path, download_file) + http_download_and_save( + download_link, download_destination_file, FZF_CHECKSUMS[get_fzf_platform()] + ) + + mozfile.extract(download_destination_file, download_destination_path) + mozfile.remove(download_destination_file) + + +def get_fzf_version(fzf_bin): + cmd = [fzf_bin, "--version"] + try: + fzf_version = subprocess.check_output(cmd) + except subprocess.CalledProcessError: + print(FZF_VERSION_FAILED) + sys.exit(1) + + # Some fzf versions have extra, e.g 0.18.0 (ff95134) + fzf_version = six.ensure_text(fzf_version.split()[0]) + + return fzf_version + + +def should_force_fzf_update(fzf_bin): + fzf_version = get_fzf_version(fzf_bin) + + # 0.20.0 introduced passing selections through a temporary file, + # which is good for large ctrl-a actions. + if Version(fzf_version) < Version(FZF_MIN_VERSION): + print("fzf version is old, you must update to use ./mach try fuzzy.") + return True + return False + + +def fzf_bootstrap(update=False): + """ + Bootstrap fzf if necessary and return path to the executable. + + This function is a bit complicated. We fetch a new version of fzf if: + 1) an existing fzf is too outdated + 2) the user says --update and we are behind the recommended version + 3) no fzf can be found and + 3a) user passes --update + 3b) user agrees to a prompt + + """ + fzf_path = get_fzf_state_dir() + + fzf_bin = find_executable("fzf") + if not fzf_bin: + fzf_bin = find_executable("fzf", fzf_path) + + if fzf_bin and should_force_fzf_update(fzf_bin): # Case (1) + update = True + + if fzf_bin and not update: + return fzf_bin + + elif fzf_bin and update: + # Case 2 + fzf_version = get_fzf_version(fzf_bin) + if Version(fzf_version) < Version(FZF_CURRENT_VERSION) and update: + # Bug 1623197: We only want to run fzf's `install` if it's not in the $PATH + # Swap to os.path.commonpath when we're not on Py2 + if fzf_bin and update and not fzf_bin.startswith(fzf_path): + print( + "fzf installed somewhere other than {}, please update manually".format( + fzf_path + ) + ) + sys.exit(1) + + download_and_install_fzf() + print("Updated fzf to {}".format(FZF_CURRENT_VERSION)) + else: + print("fzf is the recommended version and does not need an update") + + else: # not fzf_bin: + if not update: + # Case 3b + install = input("Could not detect fzf, install it now? [y/n]: ") + if install.lower() != "y": + return + + # Case 3a and 3b-fall-through + download_and_install_fzf() + fzf_bin = find_executable("fzf", fzf_path) + print("Installed fzf to {}".format(fzf_path)) + + return fzf_bin + + +def format_header(): + shortcuts = [] + for action, key in fzf_header_shortcuts: + shortcuts.append( + "{t.white}{action}{t.normal}: {t.yellow}<{key}>{t.normal}".format( + t=terminal, action=action, key=key + ) + ) + return FZF_HEADER.format(shortcuts=", ".join(shortcuts), t=terminal) + + +def run_fzf(cmd, tasks): + env = dict(os.environ) + env.update( + {"PYTHONPATH": os.pathsep.join([p for p in sys.path if "requests" in p])} + ) + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stdin=subprocess.PIPE, + env=env, + universal_newlines=True, + ) + out = proc.communicate("\n".join(tasks))[0].splitlines() + + selected = [] + query = None + if out: + query = out[0] + selected = out[1:] + return query, selected + + +def setup_tasks_for_fzf( + push, + parameters, + full=False, + disable_target_task_filter=False, + show_estimates=True, +): + check_working_directory(push) + tg = generate_tasks( + parameters, full=full, disable_target_task_filter=disable_target_task_filter + ) + all_tasks = sorted(tg.tasks.keys()) + + # graph_Cache created by generate_tasks, recreate the path to that file. + cache_dir = os.path.join( + get_state_dir(specific_to_topsrcdir=True), "cache", "taskgraph" + ) + if full: + graph_cache = os.path.join(cache_dir, "full_task_graph") + dep_cache = os.path.join(cache_dir, "full_task_dependencies") + target_set = os.path.join(cache_dir, "full_task_set") + else: + graph_cache = os.path.join(cache_dir, "target_task_graph") + dep_cache = os.path.join(cache_dir, "target_task_dependencies") + target_set = os.path.join(cache_dir, "target_task_set") + + if show_estimates: + download_task_history_data(cache_dir=cache_dir) + make_trimmed_taskgraph_cache(graph_cache, dep_cache, target_file=target_set) + + if not full and not disable_target_task_filter: + # Put all_tasks into a list because it's used multiple times, and "filter()" + # returns a consumable iterator. + all_tasks = list(filter(filter_by_uncommon_try_tasks, all_tasks)) + + return all_tasks, dep_cache, cache_dir + + +def build_base_cmd(fzf, dep_cache, cache_dir, show_estimates=True): + key_shortcuts = [k + ":" + v for k, v in fzf_shortcuts.items()] + base_cmd = [ + fzf, + "-m", + "--bind", + ",".join(key_shortcuts), + "--header", + format_header(), + "--preview-window=right:30%", + "--print-query", + ] + + if show_estimates: + base_cmd.extend( + [ + "--preview", + '{} {} -g {} -s -c {} -t "{{+f}}"'.format( + sys.executable, PREVIEW_SCRIPT, dep_cache, cache_dir + ), + ] + ) + else: + base_cmd.extend( + [ + "--preview", + '{} {} -t "{{+f}}"'.format(sys.executable, PREVIEW_SCRIPT), + ] + ) + + return base_cmd diff --git a/tools/tryselect/util/manage_estimates.py b/tools/tryselect/util/manage_estimates.py new file mode 100644 index 0000000000..23fa481228 --- /dev/null +++ b/tools/tryselect/util/manage_estimates.py @@ -0,0 +1,132 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import json +import os +from datetime import datetime, timedelta + +import requests +import six + +TASK_DURATION_URL = ( + "https://storage.googleapis.com/mozilla-mach-data/task_duration_history.json" +) +GRAPH_QUANTILES_URL = ( + "https://storage.googleapis.com/mozilla-mach-data/machtry_quantiles.csv" +) +from .estimates import GRAPH_QUANTILE_CACHE, TASK_DURATION_CACHE, TASK_DURATION_TAG_FILE + + +def check_downloaded_history(tag_file, duration_cache, quantile_cache): + if not os.path.isfile(tag_file): + return False + + try: + with open(tag_file) as f: + duration_tags = json.load(f) + download_date = datetime.strptime( + duration_tags.get("download_date"), "%Y-%M-%d" + ) + if download_date < datetime.now() - timedelta(days=7): + return False + except (OSError, ValueError): + return False + + if not os.path.isfile(duration_cache): + return False + # Check for old format version of file. + with open(duration_cache) as f: + data = json.load(f) + if isinstance(data, list): + return False + if not os.path.isfile(quantile_cache): + return False + + return True + + +def download_task_history_data(cache_dir): + """Fetch task duration data exported from BigQuery.""" + task_duration_cache = os.path.join(cache_dir, TASK_DURATION_CACHE) + task_duration_tag_file = os.path.join(cache_dir, TASK_DURATION_TAG_FILE) + graph_quantile_cache = os.path.join(cache_dir, GRAPH_QUANTILE_CACHE) + + if check_downloaded_history( + task_duration_tag_file, task_duration_cache, graph_quantile_cache + ): + return + + try: + os.unlink(task_duration_tag_file) + os.unlink(task_duration_cache) + os.unlink(graph_quantile_cache) + except OSError: + print("No existing task history to clean up.") + + try: + r = requests.get(TASK_DURATION_URL, stream=True) + r.raise_for_status() + except requests.exceptions.RequestException as exc: + # This is fine, the durations just won't be in the preview window. + print( + "Error fetching task duration cache from {}: {}".format( + TASK_DURATION_URL, exc + ) + ) + return + + # The data retrieved from google storage is a newline-separated + # list of json entries, which Python's json module can't parse. + duration_data = list() + for line in r.text.splitlines(): + duration_data.append(json.loads(line)) + + # Reformat duration data to avoid list of dicts, as this is slow in the preview window + duration_data = {d["name"]: d["mean_duration_seconds"] for d in duration_data} + + with open(task_duration_cache, "w") as f: + json.dump(duration_data, f, indent=4) + + try: + r = requests.get(GRAPH_QUANTILES_URL, stream=True) + r.raise_for_status() + except requests.exceptions.RequestException as exc: + # This is fine, the percentile just won't be in the preview window. + print( + "Error fetching task group percentiles from {}: {}".format( + GRAPH_QUANTILES_URL, exc + ) + ) + return + + with open(graph_quantile_cache, "w") as f: + f.write(six.ensure_text(r.content)) + + with open(task_duration_tag_file, "w") as f: + json.dump({"download_date": datetime.now().strftime("%Y-%m-%d")}, f, indent=4) + + +def make_trimmed_taskgraph_cache(graph_cache, dep_cache, target_file=None): + """Trim the taskgraph cache used for dependencies. + + Speeds up the fzf preview window to less human-perceptible + ranges.""" + if not os.path.isfile(graph_cache): + return + + target_task_set = set() + if target_file and os.path.isfile(target_file): + with open(target_file) as f: + target_task_set = set(json.load(f).keys()) + + with open(graph_cache) as f: + graph = json.load(f) + graph = { + name: list(defn["dependencies"].values()) + for name, defn in graph.items() + if name in target_task_set + } + with open(dep_cache, "w") as f: + json.dump(graph, f, indent=4) |