diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /tools/tryselect/util/manage_estimates.py | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | tools/tryselect/util/manage_estimates.py | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/tools/tryselect/util/manage_estimates.py b/tools/tryselect/util/manage_estimates.py new file mode 100644 index 0000000000..23fa481228 --- /dev/null +++ b/tools/tryselect/util/manage_estimates.py @@ -0,0 +1,132 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import json +import os +from datetime import datetime, timedelta + +import requests +import six + +TASK_DURATION_URL = ( + "https://storage.googleapis.com/mozilla-mach-data/task_duration_history.json" +) +GRAPH_QUANTILES_URL = ( + "https://storage.googleapis.com/mozilla-mach-data/machtry_quantiles.csv" +) +from .estimates import GRAPH_QUANTILE_CACHE, TASK_DURATION_CACHE, TASK_DURATION_TAG_FILE + + +def check_downloaded_history(tag_file, duration_cache, quantile_cache): + if not os.path.isfile(tag_file): + return False + + try: + with open(tag_file) as f: + duration_tags = json.load(f) + download_date = datetime.strptime( + duration_tags.get("download_date"), "%Y-%M-%d" + ) + if download_date < datetime.now() - timedelta(days=7): + return False + except (OSError, ValueError): + return False + + if not os.path.isfile(duration_cache): + return False + # Check for old format version of file. + with open(duration_cache) as f: + data = json.load(f) + if isinstance(data, list): + return False + if not os.path.isfile(quantile_cache): + return False + + return True + + +def download_task_history_data(cache_dir): + """Fetch task duration data exported from BigQuery.""" + task_duration_cache = os.path.join(cache_dir, TASK_DURATION_CACHE) + task_duration_tag_file = os.path.join(cache_dir, TASK_DURATION_TAG_FILE) + graph_quantile_cache = os.path.join(cache_dir, GRAPH_QUANTILE_CACHE) + + if check_downloaded_history( + task_duration_tag_file, task_duration_cache, graph_quantile_cache + ): + return + + try: + os.unlink(task_duration_tag_file) + os.unlink(task_duration_cache) + os.unlink(graph_quantile_cache) + except OSError: + print("No existing task history to clean up.") + + try: + r = requests.get(TASK_DURATION_URL, stream=True) + r.raise_for_status() + except requests.exceptions.RequestException as exc: + # This is fine, the durations just won't be in the preview window. + print( + "Error fetching task duration cache from {}: {}".format( + TASK_DURATION_URL, exc + ) + ) + return + + # The data retrieved from google storage is a newline-separated + # list of json entries, which Python's json module can't parse. + duration_data = list() + for line in r.text.splitlines(): + duration_data.append(json.loads(line)) + + # Reformat duration data to avoid list of dicts, as this is slow in the preview window + duration_data = {d["name"]: d["mean_duration_seconds"] for d in duration_data} + + with open(task_duration_cache, "w") as f: + json.dump(duration_data, f, indent=4) + + try: + r = requests.get(GRAPH_QUANTILES_URL, stream=True) + r.raise_for_status() + except requests.exceptions.RequestException as exc: + # This is fine, the percentile just won't be in the preview window. + print( + "Error fetching task group percentiles from {}: {}".format( + GRAPH_QUANTILES_URL, exc + ) + ) + return + + with open(graph_quantile_cache, "w") as f: + f.write(six.ensure_text(r.content)) + + with open(task_duration_tag_file, "w") as f: + json.dump({"download_date": datetime.now().strftime("%Y-%m-%d")}, f, indent=4) + + +def make_trimmed_taskgraph_cache(graph_cache, dep_cache, target_file=None): + """Trim the taskgraph cache used for dependencies. + + Speeds up the fzf preview window to less human-perceptible + ranges.""" + if not os.path.isfile(graph_cache): + return + + target_task_set = set() + if target_file and os.path.isfile(target_file): + with open(target_file) as f: + target_task_set = set(json.load(f).keys()) + + with open(graph_cache) as f: + graph = json.load(f) + graph = { + name: list(defn["dependencies"].values()) + for name, defn in graph.items() + if name in target_task_set + } + with open(dep_cache, "w") as f: + json.dump(graph, f, indent=4) |