diff options
Diffstat (limited to 'src/rocksdb/build_tools/benchmark_log_tool.py')
-rwxr-xr-x | src/rocksdb/build_tools/benchmark_log_tool.py | 238 |
1 files changed, 238 insertions, 0 deletions
diff --git a/src/rocksdb/build_tools/benchmark_log_tool.py b/src/rocksdb/build_tools/benchmark_log_tool.py new file mode 100755 index 000000000..d1ad45911 --- /dev/null +++ b/src/rocksdb/build_tools/benchmark_log_tool.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 +# Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +# This source code is licensed under both the GPLv2 (found in the +# COPYING file in the root directory) and Apache 2.0 License +# (found in the LICENSE.Apache file in the root directory). + +"""Access the results of benchmark runs +Send these results on to OpenSearch graphing service +""" + +import argparse +import itertools +import logging +import os +import re +import sys + +import requests +from dateutil import parser + +logging.basicConfig(level=logging.DEBUG) + + +class Configuration: + opensearch_user = os.environ["ES_USER"] + opensearch_pass = os.environ["ES_PASS"] + + +class BenchmarkResultException(Exception): + def __init__(self, message, content): + super().__init__(self, message) + self.content = content + + +class BenchmarkUtils: + + expected_keys = [ + "ops_sec", + "mb_sec", + "lsm_sz", + "blob_sz", + "c_wgb", + "w_amp", + "c_mbps", + "c_wsecs", + "c_csecs", + "b_rgb", + "b_wgb", + "usec_op", + "p50", + "p99", + "p99.9", + "p99.99", + "pmax", + "uptime", + "stall%", + "Nstall", + "u_cpu", + "s_cpu", + "rss", + "test", + "date", + "version", + "job_id", + ] + + def sanity_check(row): + if "test" not in row: + logging.debug(f"not 'test' in row: {row}") + return False + if row["test"] == "": + logging.debug(f"row['test'] == '': {row}") + return False + if "date" not in row: + logging.debug(f"not 'date' in row: {row}") + return False + if "ops_sec" not in row: + logging.debug(f"not 'ops_sec' in row: {row}") + return False + try: + _ = int(row["ops_sec"]) + except (ValueError, TypeError): + logging.debug(f"int(row['ops_sec']): {row}") + return False + try: + (_, _) = parser.parse(row["date"], fuzzy_with_tokens=True) + except (parser.ParserError): + logging.error( + f"parser.parse((row['date']): not a valid format for date in row: {row}" + ) + return False + return True + + def conform_opensearch(row): + (dt, _) = parser.parse(row["date"], fuzzy_with_tokens=True) + # create a test_date field, which was previously what was expected + # repair the date field, which has what can be a WRONG ISO FORMAT, (no leading 0 on single-digit day-of-month) + # e.g. 2022-07-1T00:14:55 should be 2022-07-01T00:14:55 + row["test_date"] = dt.isoformat() + row["date"] = dt.isoformat() + return {key.replace(".", "_"): value for key, value in row.items()} + + +class ResultParser: + def __init__(self, field="(\w|[+-:.%])+", intrafield="(\s)+", separator="\t"): + self.field = re.compile(field) + self.intra = re.compile(intrafield) + self.sep = re.compile(separator) + + def ignore(self, l_in: str): + if len(l_in) == 0: + return True + if l_in[0:1] == "#": + return True + return False + + def line(self, line_in: str): + """Parse a line into items + Being clever about separators + """ + line = line_in + row = [] + while line != "": + match_item = self.field.match(line) + if match_item: + item = match_item.group(0) + row.append(item) + line = line[len(item) :] + else: + match_intra = self.intra.match(line) + if match_intra: + intra = match_intra.group(0) + # Count the separators + # If there are >1 then generate extra blank fields + # White space with no true separators fakes up a single separator + tabbed = self.sep.split(intra) + sep_count = len(tabbed) - 1 + if sep_count == 0: + sep_count = 1 + for _ in range(sep_count - 1): + row.append("") + line = line[len(intra) :] + else: + raise BenchmarkResultException( + "Invalid TSV line", f"{line_in} at {line}" + ) + return row + + def parse(self, lines): + """Parse something that iterates lines""" + rows = [self.line(line) for line in lines if not self.ignore(line)] + header = rows[0] + width = len(header) + records = [ + {k: v for (k, v) in itertools.zip_longest(header, row[:width])} + for row in rows[1:] + ] + return records + + +def load_report_from_tsv(filename: str): + file = open(filename, "r") + contents = file.readlines() + file.close() + parser = ResultParser() + report = parser.parse(contents) + logging.debug(f"Loaded TSV Report: {report}") + return report + + +def push_report_to_opensearch(report, esdocument): + sanitized = [ + BenchmarkUtils.conform_opensearch(row) + for row in report + if BenchmarkUtils.sanity_check(row) + ] + logging.debug( + f"upload {len(sanitized)} sane of {len(report)} benchmarks to opensearch" + ) + for single_benchmark in sanitized: + logging.debug(f"upload benchmark: {single_benchmark}") + response = requests.post( + esdocument, + json=single_benchmark, + auth=(os.environ["ES_USER"], os.environ["ES_PASS"]), + ) + logging.debug( + f"Sent to OpenSearch, status: {response.status_code}, result: {response.text}" + ) + response.raise_for_status() + + +def push_report_to_null(report): + + for row in report: + if BenchmarkUtils.sanity_check(row): + logging.debug(f"row {row}") + conformed = BenchmarkUtils.conform_opensearch(row) + logging.debug(f"conformed row {conformed}") + + +def main(): + """Tool for fetching, parsing and uploading benchmark results to OpenSearch / ElasticSearch + This tool will + + (1) Open a local tsv benchmark report file + (2) Upload to OpenSearch document, via https/JSON + """ + + parser = argparse.ArgumentParser(description="CircleCI benchmark scraper.") + + # --tsvfile is the name of the file to read results from + # --esdocument is the ElasticSearch document to push these results into + # + parser.add_argument( + "--tsvfile", + default="build_tools/circle_api_scraper_input.txt", + help="File from which to read tsv report", + ) + parser.add_argument( + "--esdocument", + help="ElasticSearch/OpenSearch document URL to upload report into", + ) + parser.add_argument( + "--upload", choices=["opensearch", "none"], default="opensearch" + ) + + args = parser.parse_args() + logging.debug(f"Arguments: {args}") + reports = load_report_from_tsv(args.tsvfile) + if args.upload == "opensearch": + push_report_to_opensearch(reports, args.esdocument) + else: + push_report_to_null(reports) + + +if __name__ == "__main__": + sys.exit(main()) |