summaryrefslogtreecommitdiffstats
path: root/tools/tryselect/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/tryselect/util')
-rw-r--r--tools/tryselect/util/__init__.py3
-rw-r--r--tools/tryselect/util/dicttools.py51
-rw-r--r--tools/tryselect/util/estimates.py127
-rw-r--r--tools/tryselect/util/manage_estimates.py131
4 files changed, 312 insertions, 0 deletions
diff --git a/tools/tryselect/util/__init__.py b/tools/tryselect/util/__init__.py
new file mode 100644
index 0000000000..c580d191c1
--- /dev/null
+++ b/tools/tryselect/util/__init__.py
@@ -0,0 +1,3 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/tools/tryselect/util/dicttools.py b/tools/tryselect/util/dicttools.py
new file mode 100644
index 0000000000..be4b18e618
--- /dev/null
+++ b/tools/tryselect/util/dicttools.py
@@ -0,0 +1,51 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import copy
+
+
+def merge_to(source, dest):
+ """
+ Merge dict and arrays (override scalar values)
+
+ Keys from source override keys from dest, and elements from lists in source
+ are appended to lists in dest.
+
+ :param dict source: to copy from
+ :param dict dest: to copy to (modified in place)
+ """
+
+ for key, value in source.items():
+ # Override mismatching or empty types
+ if type(value) != type(dest.get(key)): # noqa
+ dest[key] = source[key]
+ continue
+
+ # Merge dict
+ if isinstance(value, dict):
+ merge_to(value, dest[key])
+ continue
+
+ if isinstance(value, list):
+ dest[key] = dest[key] + source[key]
+ continue
+
+ dest[key] = source[key]
+
+ return dest
+
+
+def merge(*objects):
+ """
+ Merge the given objects, using the semantics described for merge_to, with
+ objects later in the list taking precedence. From an inheritance
+ perspective, "parents" should be listed before "children".
+
+ Returns the result without modifying any arguments.
+ """
+ if len(objects) == 1:
+ return copy.deepcopy(objects[0])
+ return merge_to(objects[-1], merge(*objects[:-1]))
diff --git a/tools/tryselect/util/estimates.py b/tools/tryselect/util/estimates.py
new file mode 100644
index 0000000000..fa5f69fee1
--- /dev/null
+++ b/tools/tryselect/util/estimates.py
@@ -0,0 +1,127 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function
+
+import os
+import json
+from datetime import datetime, timedelta
+
+TASK_DURATION_CACHE = "task_duration_history.json"
+GRAPH_QUANTILE_CACHE = "graph_quantile_cache.csv"
+TASK_DURATION_TAG_FILE = "task_duration_tag.json"
+
+
+def find_all_dependencies(graph, tasklist):
+ all_dependencies = dict()
+
+ def find_dependencies(task):
+ dependencies = set()
+ if task in all_dependencies:
+ return all_dependencies[task]
+ if task not in graph:
+ # Don't add tasks (and so durations) for
+ # things optimised out.
+ return dependencies
+ dependencies.add(task)
+ for dep in graph.get(task, list()):
+ all_dependencies[dep] = find_dependencies(dep)
+ dependencies.update(all_dependencies[dep])
+ return dependencies
+
+ full_deps = set()
+ for task in tasklist:
+ full_deps.update(find_dependencies(task))
+
+ # Since these have been asked for, they're not inherited dependencies.
+ return sorted(full_deps - set(tasklist))
+
+
+def find_longest_path(graph, tasklist, duration_data):
+
+ dep_durations = dict()
+
+ def find_dependency_durations(task):
+ if task in dep_durations:
+ return dep_durations[task]
+
+ durations = [find_dependency_durations(dep) for dep in graph.get(task, list())]
+ durations.append(0.0)
+ md = max(durations) + duration_data.get(task, 0.0)
+ dep_durations[task] = md
+ return md
+
+ longest_paths = [find_dependency_durations(task) for task in tasklist]
+ # Default in case there are no tasks
+ if longest_paths:
+ return max(longest_paths)
+ else:
+ return 0
+
+
+def determine_quantile(quantiles_file, duration):
+
+ duration = duration.total_seconds()
+
+ with open(quantiles_file) as f:
+ f.readline() # skip header
+ boundaries = [float(l.strip()) for l in f.readlines()]
+ boundaries.sort()
+
+ for i, v in enumerate(boundaries):
+ if duration < v:
+ break
+ # In case we weren't given 100 elements
+ return int(100 * i / len(boundaries))
+
+
+def task_duration_data(cache_dir):
+ with open(os.path.join(cache_dir, TASK_DURATION_CACHE)) as f:
+ return json.load(f)
+
+
+def duration_summary(graph_cache_file, tasklist, cache_dir):
+ durations = task_duration_data(cache_dir)
+
+ graph = dict()
+ if graph_cache_file:
+ with open(graph_cache_file) as f:
+ graph = json.load(f)
+ dependencies = find_all_dependencies(graph, tasklist)
+ longest_path = find_longest_path(graph, tasklist, durations)
+ dependency_duration = 0.0
+ for task in dependencies:
+ dependency_duration += int(durations.get(task, 0.0))
+
+ total_requested_duration = 0.0
+ for task in tasklist:
+ duration = int(durations.get(task, 0.0))
+ total_requested_duration += duration
+ output = dict()
+
+ total_requested_duration = timedelta(seconds=total_requested_duration)
+ total_dependency_duration = timedelta(seconds=dependency_duration)
+
+ output["selected_duration"] = total_requested_duration
+ output["dependency_duration"] = total_dependency_duration
+ output["dependency_count"] = len(dependencies)
+ output["selected_count"] = len(tasklist)
+
+ quantile = None
+ graph_quantile_cache = os.path.join(cache_dir, GRAPH_QUANTILE_CACHE)
+ if os.path.isfile(graph_quantile_cache):
+ quantile = 100 - determine_quantile(
+ graph_quantile_cache, total_dependency_duration + total_requested_duration
+ )
+ if quantile:
+ output["quantile"] = quantile
+
+ output["wall_duration_seconds"] = timedelta(seconds=int(longest_path))
+ output["eta_datetime"] = datetime.now() + timedelta(seconds=longest_path)
+
+ output["task_durations"] = {
+ task: int(durations.get(task, 0.0)) for task in tasklist
+ }
+
+ return output
diff --git a/tools/tryselect/util/manage_estimates.py b/tools/tryselect/util/manage_estimates.py
new file mode 100644
index 0000000000..ce0ca0979d
--- /dev/null
+++ b/tools/tryselect/util/manage_estimates.py
@@ -0,0 +1,131 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function
+
+import os
+import requests
+import json
+from datetime import datetime, timedelta
+import six
+
+
+TASK_DURATION_URL = (
+ "https://storage.googleapis.com/mozilla-mach-data/task_duration_history.json"
+)
+GRAPH_QUANTILES_URL = (
+ "https://storage.googleapis.com/mozilla-mach-data/machtry_quantiles.csv"
+)
+from .estimates import TASK_DURATION_CACHE, GRAPH_QUANTILE_CACHE, TASK_DURATION_TAG_FILE
+
+
+def check_downloaded_history(tag_file, duration_cache, quantile_cache):
+ if not os.path.isfile(tag_file):
+ return False
+
+ try:
+ with open(tag_file) as f:
+ duration_tags = json.load(f)
+ download_date = datetime.strptime(
+ duration_tags.get("download_date"), "%Y-%M-%d"
+ )
+ if download_date < datetime.now() - timedelta(days=7):
+ return False
+ except (IOError, ValueError):
+ return False
+
+ if not os.path.isfile(duration_cache):
+ return False
+ # Check for old format version of file.
+ with open(duration_cache) as f:
+ data = json.load(f)
+ if isinstance(data, list):
+ return False
+ if not os.path.isfile(quantile_cache):
+ return False
+
+ return True
+
+
+def download_task_history_data(cache_dir):
+ """Fetch task duration data exported from BigQuery."""
+ task_duration_cache = os.path.join(cache_dir, TASK_DURATION_CACHE)
+ task_duration_tag_file = os.path.join(cache_dir, TASK_DURATION_TAG_FILE)
+ graph_quantile_cache = os.path.join(cache_dir, GRAPH_QUANTILE_CACHE)
+
+ if check_downloaded_history(
+ task_duration_tag_file, task_duration_cache, graph_quantile_cache
+ ):
+ return
+
+ try:
+ os.unlink(task_duration_tag_file)
+ os.unlink(task_duration_cache)
+ os.unlink(graph_quantile_cache)
+ except OSError:
+ print("No existing task history to clean up.")
+
+ try:
+ r = requests.get(TASK_DURATION_URL, stream=True)
+ except requests.exceptions.RequestException as exc:
+ # This is fine, the durations just won't be in the preview window.
+ print(
+ "Error fetching task duration cache from {}: {}".format(
+ TASK_DURATION_URL, exc
+ )
+ )
+ return
+
+ # The data retrieved from google storage is a newline-separated
+ # list of json entries, which Python's json module can't parse.
+ duration_data = list()
+ for line in r.text.splitlines():
+ duration_data.append(json.loads(line))
+
+ # Reformat duration data to avoid list of dicts, as this is slow in the preview window
+ duration_data = {d["name"]: d["mean_duration_seconds"] for d in duration_data}
+
+ with open(task_duration_cache, "w") as f:
+ json.dump(duration_data, f, indent=4)
+
+ try:
+ r = requests.get(GRAPH_QUANTILES_URL, stream=True)
+ except requests.exceptions.RequestException as exc:
+ # This is fine, the percentile just won't be in the preview window.
+ print(
+ "Error fetching task group percentiles from {}: {}".format(
+ GRAPH_QUANTILES_URL, exc
+ )
+ )
+ return
+
+ with open(graph_quantile_cache, "w") as f:
+ f.write(six.ensure_text(r.content))
+
+ with open(task_duration_tag_file, "w") as f:
+ json.dump({"download_date": datetime.now().strftime("%Y-%m-%d")}, f, indent=4)
+
+
+def make_trimmed_taskgraph_cache(graph_cache, dep_cache, target_file=None):
+ """Trim the taskgraph cache used for dependencies.
+
+ Speeds up the fzf preview window to less human-perceptible
+ ranges."""
+ if not os.path.isfile(graph_cache):
+ return
+
+ target_task_set = set()
+ if target_file and os.path.isfile(target_file):
+ with open(target_file) as f:
+ target_task_set = set(json.load(f).keys())
+
+ with open(graph_cache) as f:
+ graph = json.load(f)
+ graph = {
+ name: list(defn["dependencies"].values())
+ for name, defn in graph.items()
+ if name in target_task_set
+ }
+ with open(dep_cache, "w") as f:
+ json.dump(graph, f, indent=4)