From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/rocksdb/tools/advisor/README.md | 96 ++++ src/rocksdb/tools/advisor/advisor/__init__.py | 0 src/rocksdb/tools/advisor/advisor/bench_runner.py | 39 ++ .../advisor/advisor/config_optimizer_example.py | 140 ++++++ .../tools/advisor/advisor/db_bench_runner.py | 237 ++++++++++ .../tools/advisor/advisor/db_config_optimizer.py | 293 ++++++++++++ src/rocksdb/tools/advisor/advisor/db_log_parser.py | 134 ++++++ .../tools/advisor/advisor/db_options_parser.py | 348 ++++++++++++++ .../tools/advisor/advisor/db_stats_fetcher.py | 346 ++++++++++++++ .../tools/advisor/advisor/db_timeseries_parser.py | 203 ++++++++ src/rocksdb/tools/advisor/advisor/ini_parser.py | 76 +++ src/rocksdb/tools/advisor/advisor/rule_parser.py | 510 +++++++++++++++++++++ .../tools/advisor/advisor/rule_parser_example.py | 98 ++++ src/rocksdb/tools/advisor/advisor/rules.ini | 214 +++++++++ src/rocksdb/tools/advisor/test/__init__.py | 0 src/rocksdb/tools/advisor/test/input_files/LOG-0 | 30 ++ src/rocksdb/tools/advisor/test/input_files/LOG-1 | 25 + .../tools/advisor/test/input_files/OPTIONS-000005 | 49 ++ .../test/input_files/log_stats_parser_keys_ts | 3 + .../tools/advisor/test/input_files/rules_err1.ini | 56 +++ .../tools/advisor/test/input_files/rules_err2.ini | 15 + .../tools/advisor/test/input_files/rules_err3.ini | 15 + .../tools/advisor/test/input_files/rules_err4.ini | 15 + .../tools/advisor/test/input_files/test_rules.ini | 47 ++ .../advisor/test/input_files/triggered_rules.ini | 83 ++++ .../tools/advisor/test/test_db_bench_runner.py | 141 ++++++ .../tools/advisor/test/test_db_log_parser.py | 96 ++++ .../tools/advisor/test/test_db_options_parser.py | 214 +++++++++ .../tools/advisor/test/test_db_stats_fetcher.py | 121 +++++ src/rocksdb/tools/advisor/test/test_rule_parser.py | 226 +++++++++ 30 files changed, 3870 insertions(+) create mode 100644 src/rocksdb/tools/advisor/README.md create mode 100644 src/rocksdb/tools/advisor/advisor/__init__.py create mode 100644 src/rocksdb/tools/advisor/advisor/bench_runner.py create mode 100644 src/rocksdb/tools/advisor/advisor/config_optimizer_example.py create mode 100644 src/rocksdb/tools/advisor/advisor/db_bench_runner.py create mode 100644 src/rocksdb/tools/advisor/advisor/db_config_optimizer.py create mode 100644 src/rocksdb/tools/advisor/advisor/db_log_parser.py create mode 100644 src/rocksdb/tools/advisor/advisor/db_options_parser.py create mode 100755 src/rocksdb/tools/advisor/advisor/db_stats_fetcher.py create mode 100644 src/rocksdb/tools/advisor/advisor/db_timeseries_parser.py create mode 100644 src/rocksdb/tools/advisor/advisor/ini_parser.py create mode 100644 src/rocksdb/tools/advisor/advisor/rule_parser.py create mode 100644 src/rocksdb/tools/advisor/advisor/rule_parser_example.py create mode 100644 src/rocksdb/tools/advisor/advisor/rules.ini create mode 100644 src/rocksdb/tools/advisor/test/__init__.py create mode 100644 src/rocksdb/tools/advisor/test/input_files/LOG-0 create mode 100644 src/rocksdb/tools/advisor/test/input_files/LOG-1 create mode 100644 src/rocksdb/tools/advisor/test/input_files/OPTIONS-000005 create mode 100644 src/rocksdb/tools/advisor/test/input_files/log_stats_parser_keys_ts create mode 100644 src/rocksdb/tools/advisor/test/input_files/rules_err1.ini create mode 100644 src/rocksdb/tools/advisor/test/input_files/rules_err2.ini create mode 100644 src/rocksdb/tools/advisor/test/input_files/rules_err3.ini create mode 100644 src/rocksdb/tools/advisor/test/input_files/rules_err4.ini create mode 100644 src/rocksdb/tools/advisor/test/input_files/test_rules.ini create mode 100644 src/rocksdb/tools/advisor/test/input_files/triggered_rules.ini create mode 100644 src/rocksdb/tools/advisor/test/test_db_bench_runner.py create mode 100644 src/rocksdb/tools/advisor/test/test_db_log_parser.py create mode 100644 src/rocksdb/tools/advisor/test/test_db_options_parser.py create mode 100644 src/rocksdb/tools/advisor/test/test_db_stats_fetcher.py create mode 100644 src/rocksdb/tools/advisor/test/test_rule_parser.py (limited to 'src/rocksdb/tools/advisor') diff --git a/src/rocksdb/tools/advisor/README.md b/src/rocksdb/tools/advisor/README.md new file mode 100644 index 000000000..b02d7ec50 --- /dev/null +++ b/src/rocksdb/tools/advisor/README.md @@ -0,0 +1,96 @@ +# Rocksdb Tuning Advisor + +## Motivation + +The performance of Rocksdb is contingent on its tuning. However, +because of the complexity of its underlying technology and a large number of +configurable parameters, a good configuration is sometimes hard to obtain. The aim of +the python command-line tool, Rocksdb Advisor, is to automate the process of +suggesting improvements in the configuration based on advice from Rocksdb +experts. + +## Overview + +Experts share their wisdom as rules comprising of conditions and suggestions in the INI format (refer +[rules.ini](https://github.com/facebook/rocksdb/blob/main/tools/advisor/advisor/rules.ini)). +Users provide the Rocksdb configuration that they want to improve upon (as the +familiar Rocksdb OPTIONS file — +[example](https://github.com/facebook/rocksdb/blob/main/examples/rocksdb_option_file_example.ini)) +and the path of the file which contains Rocksdb logs and statistics. +The [Advisor](https://github.com/facebook/rocksdb/blob/main/tools/advisor/advisor/rule_parser_example.py) +creates appropriate DataSource objects (for Rocksdb +[logs](https://github.com/facebook/rocksdb/blob/main/tools/advisor/advisor/db_log_parser.py), +[options](https://github.com/facebook/rocksdb/blob/main/tools/advisor/advisor/db_options_parser.py), +[statistics](https://github.com/facebook/rocksdb/blob/main/tools/advisor/advisor/db_stats_fetcher.py) etc.) +and provides them to the [Rules Engine](https://github.com/facebook/rocksdb/blob/main/tools/advisor/advisor/rule_parser.py). +The Rules uses rules from experts to parse data-sources and trigger appropriate rules. +The Advisor's output gives information about which rules were triggered, +why they were triggered and what each of them suggests. Each suggestion +provided by a triggered rule advises some action on a Rocksdb +configuration option, for example, increase CFOptions.write_buffer_size, +set bloom_bits to 2 etc. + +## Usage + +### Prerequisites +The tool needs the following to run: +* python3 + +### Running the tool +An example command to run the tool: + +```shell +cd rocksdb/tools/advisor +python3 -m advisor.rule_parser_example --rules_spec=advisor/rules.ini --rocksdb_options=test/input_files/OPTIONS-000005 --log_files_path_prefix=test/input_files/LOG-0 --stats_dump_period_sec=20 +``` + +### Command-line arguments + +Most important amongst all the input that the Advisor needs, are the rules +spec and starting Rocksdb configuration. The configuration is provided as the +familiar Rocksdb Options file (refer [example](https://github.com/facebook/rocksdb/blob/main/examples/rocksdb_option_file_example.ini)). +The Rules spec is written in the INI format (more details in +[rules.ini](https://github.com/facebook/rocksdb/blob/main/tools/advisor/advisor/rules.ini)). + +In brief, a Rule is made of conditions and is triggered when all its +constituent conditions are triggered. When triggered, a Rule suggests changes +(increase/decrease/set to a suggested value) to certain Rocksdb options that +aim to improve Rocksdb performance. Every Condition has a 'source' i.e. +the data source that would be checked for triggering that condition. +For example, a log Condition (with 'source=LOG') is triggered if a particular +'regex' is found in the Rocksdb LOG files. As of now the Rules Engine +supports 3 types of Conditions (and consequently data-sources): +LOG, OPTIONS, TIME_SERIES. The TIME_SERIES data can be sourced from the +Rocksdb [statistics](https://github.com/facebook/rocksdb/blob/main/include/rocksdb/statistics.h) +or [perf context](https://github.com/facebook/rocksdb/blob/main/include/rocksdb/perf_context.h). + +For more information about the remaining command-line arguments, run: + +```shell +cd rocksdb/tools/advisor +python3 -m advisor.rule_parser_example --help +``` + +### Sample output + +Here, a Rocksdb log-based rule has been triggered: + +```shell +Rule: stall-too-many-memtables +LogCondition: stall-too-many-memtables regex: Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+ +Suggestion: inc-bg-flush option : DBOptions.max_background_flushes action : increase suggested_values : ['2'] +Suggestion: inc-write-buffer option : CFOptions.max_write_buffer_number action : increase +scope: col_fam: +{'default'} +``` + +## Running the tests + +Tests for the code have been added to the +[test/](https://github.com/facebook/rocksdb/tree/main/tools/advisor/test) +directory. For example, to run the unit tests for db_log_parser.py: + +```shell +cd rocksdb/tools/advisor +python3 -m unittest -v test.test_db_log_parser +``` diff --git a/src/rocksdb/tools/advisor/advisor/__init__.py b/src/rocksdb/tools/advisor/advisor/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/rocksdb/tools/advisor/advisor/bench_runner.py b/src/rocksdb/tools/advisor/advisor/bench_runner.py new file mode 100644 index 000000000..45d6c8313 --- /dev/null +++ b/src/rocksdb/tools/advisor/advisor/bench_runner.py @@ -0,0 +1,39 @@ +# Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +# This source code is licensed under both the GPLv2 (found in the +# COPYING file in the root directory) and Apache 2.0 License +# (found in the LICENSE.Apache file in the root directory). + +import re +from abc import ABC, abstractmethod + + +class BenchmarkRunner(ABC): + @staticmethod + @abstractmethod + def is_metric_better(new_metric, old_metric): + pass + + @abstractmethod + def run_experiment(self): + # should return a list of DataSource objects + pass + + @staticmethod + def get_info_log_file_name(log_dir, db_path): + # Example: DB Path = /dev/shm and OPTIONS file has option + # db_log_dir=/tmp/rocks/, then the name of the log file will be + # 'dev_shm_LOG' and its location will be /tmp/rocks. If db_log_dir is + # not specified in the OPTIONS file, then the location of the log file + # will be /dev/shm and the name of the file will be 'LOG' + file_name = "" + if log_dir: + # refer GetInfoLogPrefix() in rocksdb/util/filename.cc + # example db_path: /dev/shm/dbbench + file_name = db_path[1:] # to ignore the leading '/' character + to_be_replaced = re.compile("[^0-9a-zA-Z\-_\.]") # noqa + for character in to_be_replaced.findall(db_path): + file_name = file_name.replace(character, "_") + if not file_name.endswith("_"): + file_name += "_" + file_name += "LOG" + return file_name diff --git a/src/rocksdb/tools/advisor/advisor/config_optimizer_example.py b/src/rocksdb/tools/advisor/advisor/config_optimizer_example.py new file mode 100644 index 000000000..40e2bb953 --- /dev/null +++ b/src/rocksdb/tools/advisor/advisor/config_optimizer_example.py @@ -0,0 +1,140 @@ +# Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +# This source code is licensed under both the GPLv2 (found in the +# COPYING file in the root directory) and Apache 2.0 License +# (found in the LICENSE.Apache file in the root directory). + +import argparse + +from advisor.db_config_optimizer import ConfigOptimizer +from advisor.db_log_parser import NO_COL_FAMILY +from advisor.db_options_parser import DatabaseOptions +from advisor.rule_parser import RulesSpec + + +CONFIG_OPT_NUM_ITER = 10 + + +def main(args): + # initialise the RulesSpec parser + rule_spec_parser = RulesSpec(args.rules_spec) + # initialise the benchmark runner + bench_runner_module = __import__( + args.benchrunner_module, fromlist=[args.benchrunner_class] + ) + bench_runner_class = getattr(bench_runner_module, args.benchrunner_class) + ods_args = {} + if args.ods_client and args.ods_entity: + ods_args["client_script"] = args.ods_client + ods_args["entity"] = args.ods_entity + if args.ods_key_prefix: + ods_args["key_prefix"] = args.ods_key_prefix + db_bench_runner = bench_runner_class(args.benchrunner_pos_args, ods_args) + # initialise the database configuration + db_options = DatabaseOptions(args.rocksdb_options, args.misc_options) + # set the frequency at which stats are dumped in the LOG file and the + # location of the LOG file. + db_log_dump_settings = { + "DBOptions.stats_dump_period_sec": {NO_COL_FAMILY: args.stats_dump_period_sec} + } + db_options.update_options(db_log_dump_settings) + # initialise the configuration optimizer + config_optimizer = ConfigOptimizer( + db_bench_runner, db_options, rule_spec_parser, args.base_db_path + ) + # run the optimiser to improve the database configuration for given + # benchmarks, with the help of expert-specified rules + final_db_options = config_optimizer.run() + # generate the final rocksdb options file + print( + "Final configuration in: " + final_db_options.generate_options_config("final") + ) + print("Final miscellaneous options: " + repr(final_db_options.get_misc_options())) + + +if __name__ == "__main__": + """ + An example run of this tool from the command-line would look like: + python3 -m advisor.config_optimizer_example + --base_db_path=/tmp/rocksdbtest-155919/dbbench + --rocksdb_options=temp/OPTIONS_boot.tmp --misc_options bloom_bits=2 + --rules_spec=advisor/rules.ini --stats_dump_period_sec=20 + --benchrunner_module=advisor.db_bench_runner + --benchrunner_class=DBBenchRunner --benchrunner_pos_args ./../../db_bench + readwhilewriting use_existing_db=true duration=90 + """ + parser = argparse.ArgumentParser( + description="This script is used for\ + searching for a better database configuration" + ) + parser.add_argument( + "--rocksdb_options", + required=True, + type=str, + help="path of the starting Rocksdb OPTIONS file", + ) + # these are options that are column-family agnostic and are not yet + # supported by the Rocksdb Options file: eg. bloom_bits=2 + parser.add_argument( + "--misc_options", + nargs="*", + help="whitespace-separated list of options that are not supported " + + "by the Rocksdb OPTIONS file, given in the " + + '= format eg. "bloom_bits=2 ' + + 'rate_limiter_bytes_per_sec=128000000"', + ) + parser.add_argument( + "--base_db_path", required=True, type=str, help="path for the Rocksdb database" + ) + parser.add_argument( + "--rules_spec", + required=True, + type=str, + help="path of the file containing the expert-specified Rules", + ) + parser.add_argument( + "--stats_dump_period_sec", + required=True, + type=int, + help="the frequency (in seconds) at which STATISTICS are printed to " + + "the Rocksdb LOG file", + ) + # ODS arguments + parser.add_argument("--ods_client", type=str, help="the ODS client binary") + parser.add_argument( + "--ods_entity", + type=str, + help="the servers for which the ODS stats need to be fetched", + ) + parser.add_argument( + "--ods_key_prefix", + type=str, + help="the prefix that needs to be attached to the keys of time " + + "series to be fetched from ODS", + ) + # benchrunner_module example: advisor.db_benchmark_client + parser.add_argument( + "--benchrunner_module", + required=True, + type=str, + help="the module containing the BenchmarkRunner class to be used by " + + "the Optimizer, example: advisor.db_bench_runner", + ) + # benchrunner_class example: DBBenchRunner + parser.add_argument( + "--benchrunner_class", + required=True, + type=str, + help="the name of the BenchmarkRunner class to be used by the " + + "Optimizer, should be present in the module provided in the " + + "benchrunner_module argument, example: DBBenchRunner", + ) + parser.add_argument( + "--benchrunner_pos_args", + nargs="*", + help="whitespace-separated positional arguments that are passed on " + + "to the constructor of the BenchmarkRunner class provided in the " + + 'benchrunner_class argument, example: "use_existing_db=true ' + + 'duration=900"', + ) + args = parser.parse_args() + main(args) diff --git a/src/rocksdb/tools/advisor/advisor/db_bench_runner.py b/src/rocksdb/tools/advisor/advisor/db_bench_runner.py new file mode 100644 index 000000000..f5802ed15 --- /dev/null +++ b/src/rocksdb/tools/advisor/advisor/db_bench_runner.py @@ -0,0 +1,237 @@ +# Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +# This source code is licensed under both the GPLv2 (found in the +# COPYING file in the root directory) and Apache 2.0 License +# (found in the LICENSE.Apache file in the root directory). + +import shutil +import subprocess +import time + +from advisor.bench_runner import BenchmarkRunner +from advisor.db_log_parser import DatabaseLogs, DataSource, NO_COL_FAMILY +from advisor.db_stats_fetcher import ( + DatabasePerfContext, + LogStatsParser, + OdsStatsFetcher, +) + + +""" +NOTE: This is not thread-safe, because the output file is simply overwritten. +""" + + +class DBBenchRunner(BenchmarkRunner): + OUTPUT_FILE = "temp/dbbench_out.tmp" + ERROR_FILE = "temp/dbbench_err.tmp" + DB_PATH = "DB path" + THROUGHPUT = "ops/sec" + PERF_CON = " PERF_CONTEXT:" + + @staticmethod + def is_metric_better(new_metric, old_metric): + # for db_bench 'throughput' is the metric returned by run_experiment + return new_metric >= old_metric + + @staticmethod + def get_opt_args_str(misc_options_dict): + # given a dictionary of options and their values, return a string + # that can be appended as command-line arguments + optional_args_str = "" + for option_name, option_value in misc_options_dict.items(): + if option_value: + optional_args_str += " --" + option_name + "=" + str(option_value) + return optional_args_str + + def __init__(self, positional_args, ods_args=None): + # parse positional_args list appropriately + self.db_bench_binary = positional_args[0] + self.benchmark = positional_args[1] + self.db_bench_args = None + if len(positional_args) > 2: + # options list with each option given as "