summaryrefslogtreecommitdiffstats
path: root/python/mozbuild/mozbuild/analyze/hg.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/mozbuild/mozbuild/analyze/hg.py')
-rw-r--r--python/mozbuild/mozbuild/analyze/hg.py176
1 files changed, 176 insertions, 0 deletions
diff --git a/python/mozbuild/mozbuild/analyze/hg.py b/python/mozbuild/mozbuild/analyze/hg.py
new file mode 100644
index 0000000000..605ff6838e
--- /dev/null
+++ b/python/mozbuild/mozbuild/analyze/hg.py
@@ -0,0 +1,176 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import bisect
+import gzip
+import json
+import math
+from collections import Counter
+from datetime import datetime, timedelta
+
+import mozpack.path as mozpath
+import requests
+
+PUSHLOG_CHUNK_SIZE = 500
+
+URL = "https://hg.mozilla.org/mozilla-central/json-pushes?"
+
+
+def unix_epoch(date):
+ return (date - datetime(1970, 1, 1)).total_seconds()
+
+
+def unix_from_date(n, today):
+ return unix_epoch(today - timedelta(days=n))
+
+
+def get_lastpid(session):
+ return session.get(URL + "&version=2").json()["lastpushid"]
+
+
+def get_pushlog_chunk(session, start, end):
+ # returns pushes sorted by date
+ res = session.get(
+ URL
+ + "version=1&startID={0}&\
+ endID={1}&full=1".format(
+ start, end
+ )
+ ).json()
+ return sorted(res.items(), key=lambda x: x[1]["date"])
+
+
+def collect_data(session, date):
+ if date < 1206031764: # first push
+ raise Exception("No pushes exist before March 20, 2008.")
+ lastpushid = get_lastpid(session)
+ data = []
+ start_id = lastpushid - PUSHLOG_CHUNK_SIZE
+ end_id = lastpushid + 1
+ while True:
+ res = get_pushlog_chunk(session, start_id, end_id)
+ starting_date = res[0][1]["date"] # date of oldest push in chunk
+ dates = [x[1]["date"] for x in res]
+ if starting_date < date:
+ i = bisect.bisect_left(dates, date)
+ data.append(res[i:])
+ return data
+ else:
+ data.append(res)
+ end_id = start_id + 1
+ start_id = start_id - PUSHLOG_CHUNK_SIZE
+
+
+def get_data(epoch):
+ session = requests.Session()
+ data = collect_data(session, epoch)
+ return {k: v for sublist in data for (k, v) in sublist}
+
+
+class Pushlog(object):
+ def __init__(self, days):
+ info = get_data(unix_from_date(days, datetime.today()))
+ self.pushlog = info
+ self.pids = self.get_pids()
+ self.pushes = self.make_pushes()
+ self.files = [l for p in self.pushes for l in set(p.files)]
+ self.file_set = set(self.files)
+ self.file_count = Counter(self.files)
+
+ def make_pushes(self):
+ pids = self.pids
+ all_pushes = self.pushlog
+ return [Push(pid, all_pushes[str(pid)]) for pid in pids]
+
+ def get_pids(self):
+ keys = self.pushlog.keys()
+ keys.sort()
+ return keys
+
+
+class Push(object):
+ def __init__(self, pid, p_dict):
+ self.id = pid
+ self.date = p_dict["date"]
+ self.files = [f for x in p_dict["changesets"] for f in x["files"]]
+
+
+class Report(object):
+ def __init__(self, days, path=None, cost_dict=None):
+ obj = Pushlog(days)
+ self.file_set = obj.file_set
+ self.file_count = obj.file_count
+ self.name = str(days) + "day_report"
+ self.cost_dict = self.get_cost_dict(path, cost_dict)
+
+ def get_cost_dict(self, path, cost_dict):
+ if path is not None:
+ with gzip.open(path) as file:
+ return json.loads(file.read())
+ else:
+ if cost_dict is not None:
+ return cost_dict
+ else:
+ raise Exception
+
+ def organize_data(self):
+ costs = self.cost_dict
+ counts = self.file_count
+ res = []
+ for f in self.file_set:
+ cost = costs.get(f)
+ count = counts.get(f)
+ if cost is not None:
+ res.append((f, cost, count, round(cost * count, 3)))
+ return res
+
+ def get_sorted_report(self, format):
+ res = self.organize_data()
+ res.sort(key=(lambda x: x[3]), reverse=True)
+
+ def ms_to_mins_secs(ms):
+ secs = ms / 1000.0
+ mins = secs / 60
+ secs = secs % 60
+ return "%d:%02d" % (math.trunc(mins), int(round(secs)))
+
+ if format in ("html", "pretty"):
+ res = [
+ (f, ms_to_mins_secs(cost), count, ms_to_mins_secs(total))
+ for (f, cost, count, total) in res
+ ]
+
+ return res
+
+ def cut(self, size, lst):
+ if len(lst) <= size:
+ return lst
+ else:
+ return lst[:size]
+
+ def generate_output(self, format, limit, dst):
+ import tablib
+
+ data = tablib.Dataset(headers=["FILE", "TIME", "CHANGES", "TOTAL"])
+ res = self.get_sorted_report(format)
+ if limit is not None:
+ res = self.cut(limit, res)
+ for x in res:
+ data.append(x)
+ if format == "pretty":
+ print(data)
+ else:
+ file_name = self.name + "." + format
+ content = None
+ data.export(format)
+ if format == "csv":
+ content = data.csv
+ elif format == "json":
+ content = data.json
+ else:
+ content = data.html
+ file_path = mozpath.join(dst, file_name)
+ with open(file_path, "wb") as f:
+ f.write(content)
+ print("Created report: %s" % file_path)