diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/seastar/tests/perf | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/seastar/tests/perf')
-rw-r--r-- | src/seastar/tests/perf/CMakeLists.txt | 77 | ||||
-rw-r--r-- | src/seastar/tests/perf/fstream_perf.cc | 79 | ||||
-rw-r--r-- | src/seastar/tests/perf/future_util_perf.cc | 75 | ||||
-rw-r--r-- | src/seastar/tests/perf/perf-tests.md | 106 | ||||
-rw-r--r-- | src/seastar/tests/perf/perf_tests.cc | 362 | ||||
-rw-r--r-- | src/seastar/tests/perf/perf_tests.hh | 230 |
6 files changed, 929 insertions, 0 deletions
diff --git a/src/seastar/tests/perf/CMakeLists.txt b/src/seastar/tests/perf/CMakeLists.txt new file mode 100644 index 00000000..bd97681c --- /dev/null +++ b/src/seastar/tests/perf/CMakeLists.txt @@ -0,0 +1,77 @@ +# +# This file is open source software, licensed to you under the terms +# of the Apache License, Version 2.0 (the "License"). See the NOTICE file +# distributed with this work for additional information regarding copyright +# ownership. You may not use this file except in compliance with the License. +# +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# +# Copyright (C) 2018 Scylladb, Ltd. +# + +add_library (seastar_perf_testing + perf_tests.hh + perf_tests.cc) + +target_link_libraries (seastar_perf_testing + PUBLIC + fmt::fmt + seastar_with_flags) + +# Logical target for all perf tests. +add_custom_target (perf_tests) + +macro (seastar_add_test name) + set (args ${ARGN}) + + cmake_parse_arguments ( + parsed_args + "NO_SEASTAR_PERF_TESTING_LIBRARY" + "" + "SOURCES" + ${args}) + + set (target test_perf_${name}) + add_executable (${target} ${parsed_args_SOURCES}) + + if (parsed_args_NO_SEASTAR_PERF_TESTING_LIBRARY) + set (libraries seastar_with_flags) + else () + set (libraries + seastar_with_flags + seastar_perf_testing) + endif () + + target_link_libraries (${target} + PRIVATE ${libraries}) + + target_include_directories (${target} + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${Seastar_SOURCE_DIR}/src) + + set_target_properties (${target} + PROPERTIES + OUTPUT_NAME ${name}) + + add_dependencies (perf_tests ${target}) + set (${name}_test ${target}) +endmacro () + +seastar_add_test (fstream + SOURCES fstream_perf.cc + NO_SEASTAR_PERF_TESTING_LIBRARY) + +seastar_add_test (future_util + SOURCES future_util_perf.cc) diff --git a/src/seastar/tests/perf/fstream_perf.cc b/src/seastar/tests/perf/fstream_perf.cc new file mode 100644 index 00000000..b8cf4503 --- /dev/null +++ b/src/seastar/tests/perf/fstream_perf.cc @@ -0,0 +1,79 @@ +/* + * This file is open source software, licensed to you under the terms + * of the Apache License, Version 2.0 (the "License"). See the NOTICE file + * distributed with this work for additional information regarding copyright + * ownership. You may not use this file except in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Copyright (C) 2016 ScyllaDB + */ + +#include <seastar/core/reactor.hh> +#include <seastar/core/fstream.hh> +#include <seastar/core/file.hh> +#include <seastar/core/app-template.hh> + +using namespace seastar; +using namespace std::chrono_literals; + +int main(int ac, char** av) { + app_template at; + namespace bpo = boost::program_options; + at.add_options() + ("concurrency", bpo::value<unsigned>()->default_value(1), "Write operations to issue in parallel") + ("buffer-size", bpo::value<size_t>()->default_value(4096), "Write buffer size") + ("total-ops", bpo::value<unsigned>()->default_value(100000), "Total write operations to issue") + ("sloppy-size", bpo::value<bool>()->default_value(false), "Enable the sloppy-size optimization") + ; + return at.run(ac, av, [&at] { + auto concurrency = at.configuration()["concurrency"].as<unsigned>(); + auto buffer_size = at.configuration()["buffer-size"].as<size_t>(); + auto total_ops = at.configuration()["total-ops"].as<unsigned>(); + auto sloppy_size = at.configuration()["sloppy-size"].as<bool>(); + file_open_options foo; + foo.sloppy_size = sloppy_size; + return open_file_dma( + "testfile.tmp", open_flags::wo | open_flags::create | open_flags::exclusive, + foo).then([=] (file f) { + file_output_stream_options foso; + foso.buffer_size = buffer_size; + foso.preallocation_size = 32 << 20; + foso.write_behind = concurrency; + auto os = make_file_output_stream(f, foso); + return do_with(std::move(os), std::move(f), unsigned(0), [=] (output_stream<char>& os, file& f, unsigned& completed) { + auto start = std::chrono::steady_clock::now(); + return repeat([=, &os, &completed] { + if (completed == total_ops) { + return make_ready_future<stop_iteration>(stop_iteration::yes); + } + char buf[buffer_size]; + memset(buf, 0, buffer_size); + return os.write(buf, buffer_size).then([&completed] { + ++completed; + return stop_iteration::no; + }); + }).then([=, &os] { + auto end = std::chrono::steady_clock::now(); + using fseconds = std::chrono::duration<float, std::ratio<1, 1>>; + auto iops = total_ops / std::chrono::duration_cast<fseconds>(end - start).count(); + fmt::print("{:10} {:10} {:10} {:12}\n", "bufsize", "ops", "iodepth", "IOPS"); + fmt::print("{:10d} {:10d} {:10d} {:12.0f}\n", buffer_size, total_ops, concurrency, iops); + return os.flush(); + }).then([&os] { + return os.close(); + }); + }); + }); + }); +} diff --git a/src/seastar/tests/perf/future_util_perf.cc b/src/seastar/tests/perf/future_util_perf.cc new file mode 100644 index 00000000..4b7e8f93 --- /dev/null +++ b/src/seastar/tests/perf/future_util_perf.cc @@ -0,0 +1,75 @@ +/* + * This file is open source software, licensed to you under the terms + * of the Apache License, Version 2.0 (the "License"). See the NOTICE file + * distributed with this work for additional information regarding copyright + * ownership. You may not use this file except in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Copyright (C) 2018 ScyllaDB Ltd. + */ + +#include <boost/range.hpp> +#include <boost/range/irange.hpp> + +#include "perf_tests.hh" + +struct parallel_for_each { + std::vector<int> empty_range; + std::vector<int> range; + int value; + + parallel_for_each() + : empty_range() + , range(boost::copy_range<std::vector<int>>(boost::irange(1, 100))) + { } +}; + +PERF_TEST_F(parallel_for_each, empty) +{ + return seastar::parallel_for_each(empty_range, [] (int) -> future<> { + abort(); + }); +} + +[[gnu::noinline]] +future<> immediate(int v, int& vs) +{ + vs += v; + return make_ready_future<>(); +} + +PERF_TEST_F(parallel_for_each, immediate) +{ + return seastar::parallel_for_each(range, [this] (int v) { + return immediate(v, value); + }).then([this] { + perf_tests::do_not_optimize(value); + }); +} + +[[gnu::noinline]] +future<> suspend(int v, int& vs) +{ + vs += v; + return later(); +} + +PERF_TEST_F(parallel_for_each, suspend) +{ + return seastar::parallel_for_each(range, [this] (int v) { + return suspend(v, value); + }).then([this] { + perf_tests::do_not_optimize(value); + }); +} diff --git a/src/seastar/tests/perf/perf-tests.md b/src/seastar/tests/perf/perf-tests.md new file mode 100644 index 00000000..c42e7be4 --- /dev/null +++ b/src/seastar/tests/perf/perf-tests.md @@ -0,0 +1,106 @@ +# perf-tests + +`perf-tests` is a simple microbenchmarking framework. Its main purpose is to allow monitoring the impact that code changes have on performance. + +## Theory of operation + +The framework performs each test in several runs. During a run the microbenchmark code is executed in a loop and the average time of an iteration is computed. The shown results are median, median absolute deviation, maximum and minimum value of all the runs. + +``` +single run iterations: 0 +single run duration: 1.000s +number of runs: 5 + +test iterations median mad min max +combined.one_row 745336 691.218ns 0.175ns 689.073ns 696.476ns +combined.single_active 7871 85.271us 76.185ns 85.145us 108.316us +``` + +`perf-tests` allows limiting the number of iterations or the duration of each run. In the latter case there is an additional dry run used to estimate how many iterations can be run in the specified time. The measured runs are limited by that number of iterations. This means that there is no overhead caused by timers and that each run consists of the same number of iterations. + +### Flags + +* `-i <n>` or `--iterations <n>` – limits the number of iterations in each run to no more than `n` (0 for unlimited) +* `-d <t>` or `--duration <t>` – limits the duration of each run to no more than `t` seconds (0 for unlimited) +* `-r <n>` or `--runs <n>` – the number of runs of each test to execute +* `-t <regexs>` or `--tests <regexs>` – executes only tests which names match any regular expression in a comma-separated list `regexs` +* `--list` – lists all available tests + +## Example usage + +### Simple test + +Performance tests are defined in a similar manner to unit tests. Macro `PERF_TEST(test_group, test_case)` allows specifying the name of the test and the group it belongs to. Microbenchmark can either return nothing or a future. + +Compiler may attempt to optimise too much of the test logic. A way of preventing this is passing the final result of all computations to a function `perf_tests::do_not_optimize()`. That function should introduce little to none overhead, but forces the compiler to actually compute the value. + +```c++ +PERF_TEST(example, simple1) +{ + auto v = compute_value(); + perf_tests::do_not_optimize(v); +} + +PERF_TEST(example, simple2) +{ + return compute_different_value().then([] (auto v) { + perf_tests::do_not_optimize(v); + }); +} +``` + +### Fixtures + +As it is in case of unit tests, performance tests may benefit from using a fixture that would set up a proper environment. Such tests should use macro `PERF_TEST_F(test_group, test_case)`. The test itself will be a member function of a class derivative of `test_group`. + +The constructor and destructor of a fixture are executed in a context of Seastar thread, but the actual test logic is not. The same instance of a fixture will be used for multiple iterations of the test. + +```c++ +class example { +protected: + data_set _ds1; + data_set _ds2; +private: + static data_set perpare_data_set(); +public: + example() + : _ds1(prepare_data_set()) + , _ds2(prepare_data_set()) + { } +}; + +PERF_TEST_F(example, fixture1) +{ + auto r = do_something_with(_ds1); + perf_tests::do_not_optimize(r); +} + +PERF_TEST_F(example, fixture2) +{ + auto r = do_something_with(_ds1, _ds2); + perf_tests::do_not_optimize(r); +} +``` + +### Custom time measurement + +Even with fixtures it may be necessary to do some costly initialisation during each iteration. Its impact can be reduced by specifying the exact part of the test that should be measured using functions `perf_tests::start_measuring_time()` and `perf_tests::stop_measuring_time()`. + +```c++ +PERF_TEST(example, custom_time_measurement2) +{ + auto data = prepare_data(); + perf_tests::start_measuring_time(); + do_something(std::move(data)); + perf_tests::stop_measuring_time(); +} + +PERF_TEST(example, custom_time_measurement2) +{ + auto data = prepare_data(); + perf_tests::start_measuring_time(); + return do_something_else(std::move(data)).finally([] { + perf_tests::stop_measuring_time(); + }); +} +``` diff --git a/src/seastar/tests/perf/perf_tests.cc b/src/seastar/tests/perf/perf_tests.cc new file mode 100644 index 00000000..2b745fe3 --- /dev/null +++ b/src/seastar/tests/perf/perf_tests.cc @@ -0,0 +1,362 @@ +/* + * This file is open source software, licensed to you under the terms + * of the Apache License, Version 2.0 (the "License"). See the NOTICE file + * distributed with this work for additional information regarding copyright + * ownership. You may not use this file except in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Copyright (C) 2018 ScyllaDB Ltd. + */ + +#include "perf_tests.hh" + +#include <fstream> +#include <regex> + +#include <boost/range.hpp> +#include <boost/range/adaptors.hpp> +#include <boost/range/algorithm.hpp> + +#include <fmt/ostream.h> + +#include <seastar/core/app-template.hh> +#include <seastar/core/thread.hh> +#include <seastar/json/formatter.hh> + +namespace perf_tests { +namespace internal { + +namespace { + +// We need to use signal-based timer instead of seastar ones so that +// tests that do not suspend can be interrupted. +// This causes no overhead though since the timer is used only in a dry run. +class signal_timer { + std::function<void()> _fn; + timer_t _timer; +public: + explicit signal_timer(std::function<void()> fn) : _fn(fn) { + sigevent se{}; + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = SIGALRM; + se.sigev_value.sival_ptr = this; + auto ret = timer_create(CLOCK_MONOTONIC, &se, &_timer); + if (ret) { + throw std::system_error(ret, std::system_category()); + } + } + + ~signal_timer() { + timer_delete(_timer); + } + + void arm(std::chrono::steady_clock::duration dt) { + time_t sec = std::chrono::duration_cast<std::chrono::seconds>(dt).count(); + auto nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(dt).count(); + nsec -= std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::seconds(sec)).count(); + + itimerspec ts{}; + ts.it_value.tv_sec = sec; + ts.it_value.tv_nsec = nsec; + auto ret = timer_settime(_timer, 0, &ts, nullptr); + if (ret) { + throw std::system_error(ret, std::system_category()); + } + } + + void cancel() { + itimerspec ts{}; + auto ret = timer_settime(_timer, 0, &ts, nullptr); + if (ret) { + throw std::system_error(ret, std::system_category()); + } + } +public: + static void init() { + struct sigaction sa{}; + sa.sa_sigaction = &signal_timer::signal_handler; + sa.sa_flags = SA_SIGINFO; + auto ret = sigaction(SIGALRM, &sa, nullptr); + if (ret) { + throw std::system_error(ret, std::system_category()); + } + } +private: + static void signal_handler(int, siginfo_t* si, void*) { + auto t = static_cast<signal_timer*>(si->si_value.sival_ptr); + t->_fn(); + } +}; + +} + +time_measurement measure_time; + +struct config; +struct result; + +struct result_printer { + virtual ~result_printer() = default; + + virtual void print_configuration(const config&) = 0; + virtual void print_result(const result&) = 0; +}; + +struct config { + uint64_t single_run_iterations; + std::chrono::nanoseconds single_run_duration; + unsigned number_of_runs; + std::vector<std::unique_ptr<result_printer>> printers; +}; + +struct result { + sstring test_name; + + uint64_t total_iterations; + unsigned runs; + + double median; + double mad; + double min; + double max; +}; + +namespace { + +struct duration { + double value; +}; + +static inline std::ostream& operator<<(std::ostream& os, duration d) +{ + auto value = d.value; + if (value < 1'000) { + os << fmt::format("{:.3f}ns", value); + } else if (value < 1'000'000) { + // fmt hasn't discovered unicode yet so we are stuck with uicroseconds + // See: https://github.com/fmtlib/fmt/issues/628 + os << fmt::format("{:.3f}us", value / 1'000); + } else if (value < 1'000'000'000) { + os << fmt::format("{:.3f}ms", value / 1'000'000); + } else { + os << fmt::format("{:.3f}s", value / 1'000'000'000); + } + return os; +} + +} + +static constexpr auto format_string = "{:<40} {:>11} {:>11} {:>11} {:>11} {:>11}\n"; + +struct stdout_printer final : result_printer { + virtual void print_configuration(const config& c) override { + fmt::print("{:<25} {}\n{:<25} {}\n{:<25} {}\n\n", + "single run iterations:", c.single_run_iterations, + "single run duration:", duration { double(c.single_run_duration.count()) }, + "number of runs:", c.number_of_runs); + fmt::print(format_string, "test", "iterations", "median", "mad", "min", "max"); + } + + virtual void print_result(const result& r) override { + fmt::print(format_string, r.test_name, r.total_iterations / r.runs, duration { r.median }, + duration { r.mad }, duration { r.min }, duration { r.max }); + } +}; + +class json_printer final : public result_printer { + std::string _output_file; + std::unordered_map<std::string, + std::unordered_map<std::string, + std::unordered_map<std::string, double>>> _root; +public: + explicit json_printer(const std::string& file) : _output_file(file) { } + + ~json_printer() { + std::ofstream out(_output_file); + out << json::formatter::to_json(_root); + } + + virtual void print_configuration(const config&) override { } + + virtual void print_result(const result& r) override { + auto& result = _root["results"][r.test_name]; + result["runs"] = r.runs; + result["total_iterations"] = r.total_iterations; + result["median"] = r.median; + result["mad"] = r.mad; + result["min"] = r.min; + result["max"] = r.max; + } +}; + +void performance_test::do_run(const config& conf) +{ + _max_single_run_iterations = conf.single_run_iterations; + if (!_max_single_run_iterations) { + _max_single_run_iterations = std::numeric_limits<uint64_t>::max(); + } + + signal_timer tmr([this] { + _max_single_run_iterations.store(0, std::memory_order_relaxed); + }); + + // dry run, estimate the number of iterations + if (conf.single_run_duration.count()) { + // switch out of seastar thread + later().then([&] { + tmr.arm(conf.single_run_duration); + return do_single_run().finally([&] { + tmr.cancel(); + _max_single_run_iterations = _single_run_iterations; + }); + }).get(); + } + + auto results = std::vector<double>(conf.number_of_runs); + uint64_t total_iterations = 0; + for (auto i = 0u; i < conf.number_of_runs; i++) { + // switch out of seastar thread + later().then([&] { + _single_run_iterations = 0; + return do_single_run().then([&] (clock_type::duration dt) { + double ns = std::chrono::duration_cast<std::chrono::nanoseconds>(dt).count(); + results[i] = ns / _single_run_iterations; + + total_iterations += _single_run_iterations; + }); + }).get(); + } + + result r{}; + r.test_name = name(); + r.total_iterations = total_iterations; + r.runs = conf.number_of_runs; + + auto mid = conf.number_of_runs / 2; + + boost::range::sort(results); + r.median = results[mid]; + + auto diffs = boost::copy_range<std::vector<double>>( + results | boost::adaptors::transformed([&] (double x) { return fabs(x - r.median); }) + ); + boost::range::sort(diffs); + r.mad = diffs[mid]; + + r.min = results[0]; + r.max = results[results.size() - 1]; + + for (auto& rp : conf.printers) { + rp->print_result(r); + } +} + +void performance_test::run(const config& conf) +{ + set_up(); + try { + do_run(conf); + } catch (...) { + tear_down(); + throw; + } + tear_down(); +} + +std::vector<std::unique_ptr<performance_test>>& all_tests() +{ + static std::vector<std::unique_ptr<performance_test>> tests; + return tests; +} + +void performance_test::register_test(std::unique_ptr<performance_test> test) +{ + all_tests().emplace_back(std::move(test)); +} + +void run_all(const std::vector<std::string>& tests, const config& conf) +{ + auto can_run = [tests = boost::copy_range<std::vector<std::regex>>(tests)] (auto&& test) { + auto it = boost::range::find_if(tests, [&test] (const std::regex& regex) { + return std::regex_match(test->name(), regex); + }); + return tests.empty() || it != tests.end(); + }; + + for (auto& rp : conf.printers) { + rp->print_configuration(conf); + } + for (auto&& test : all_tests() | boost::adaptors::filtered(std::move(can_run))) { + test->run(conf); + } +} + +} +} + +int main(int ac, char** av) +{ + using namespace perf_tests::internal; + namespace bpo = boost::program_options; + + app_template app; + app.add_options() + ("iterations,i", bpo::value<size_t>()->default_value(0), + "number of iterations in a single run") + ("duration,d", bpo::value<double>()->default_value(1), + "duration of a single run in seconds") + ("runs,r", bpo::value<size_t>()->default_value(5), "number of runs") + ("test,t", bpo::value<std::vector<std::string>>(), "tests to execute") + ("no-stdout", "do not print to stdout") + ("json-output", bpo::value<std::string>(), "output json file") + ("list", "list available tests") + ; + + return app.run(ac, av, [&] { + return async([&] { + signal_timer::init(); + + config conf; + conf.single_run_iterations = app.configuration()["iterations"].as<size_t>(); + auto dur = std::chrono::duration<double>(app.configuration()["duration"].as<double>()); + conf.single_run_duration = std::chrono::duration_cast<std::chrono::nanoseconds>(dur); + conf.number_of_runs = app.configuration()["runs"].as<size_t>(); + + std::vector<std::string> tests_to_run; + if (app.configuration().count("test")) { + tests_to_run = app.configuration()["test"].as<std::vector<std::string>>(); + } + + if (app.configuration().count("list")) { + fmt::print("available tests:\n"); + for (auto&& t : all_tests()) { + fmt::print("\t{}\n", t->name()); + } + return; + } + + if (!app.configuration().count("no-stdout")) { + conf.printers.emplace_back(std::make_unique<stdout_printer>()); + } + + if (app.configuration().count("json-output")) { + conf.printers.emplace_back(std::make_unique<json_printer>( + app.configuration()["json-output"].as<std::string>() + )); + } + + run_all(tests_to_run, conf); + }); + }); +} diff --git a/src/seastar/tests/perf/perf_tests.hh b/src/seastar/tests/perf/perf_tests.hh new file mode 100644 index 00000000..be01298a --- /dev/null +++ b/src/seastar/tests/perf/perf_tests.hh @@ -0,0 +1,230 @@ +/* + * This file is open source software, licensed to you under the terms + * of the Apache License, Version 2.0 (the "License"). See the NOTICE file + * distributed with this work for additional information regarding copyright + * ownership. You may not use this file except in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Copyright (C) 2018 ScyllaDB Ltd. + */ + +#pragma once + +#include <atomic> +#include <memory> + +#include <fmt/format.h> + +#include <seastar/core/future.hh> +#include <seastar/core/future-util.hh> + + +using namespace seastar; + +namespace perf_tests { +namespace internal { + +struct config; + +using clock_type = std::chrono::steady_clock; + +class performance_test { + std::string _test_case; + std::string _test_group; + + uint64_t _single_run_iterations = 0; + std::atomic<uint64_t> _max_single_run_iterations; +private: + void do_run(const config&); +protected: + [[gnu::always_inline]] [[gnu::hot]] + bool stop_iteration() const { + return _single_run_iterations >= _max_single_run_iterations.load(std::memory_order_relaxed); + } + + [[gnu::always_inline]] [[gnu::hot]] + void next_iteration() { + _single_run_iterations++; + } + + virtual void set_up() = 0; + virtual void tear_down() noexcept = 0; + virtual future<clock_type::duration> do_single_run() = 0; +public: + performance_test(const std::string& test_case, const std::string& test_group) + : _test_case(test_case) + , _test_group(test_group) + { } + + virtual ~performance_test() = default; + + const std::string& test_case() const { return _test_case; } + const std::string& test_group() const { return _test_group; } + std::string name() const { return fmt::format("{}.{}", test_group(), test_case()); } + + void run(const config&); +public: + static void register_test(std::unique_ptr<performance_test>); +}; + +// Helper for measuring time. +// Each microbenchmark can either use the default behaviour which measures +// only the start and stop time of the whole run or manually invoke +// start_measuring_time() and stop_measuring_time() in order to measure +// only parts of each iteration. +class time_measurement { + clock_type::time_point _run_start_time; + clock_type::time_point _start_time; + clock_type::duration _total_time; +public: + [[gnu::always_inline]] [[gnu::hot]] + void start_run() { + _total_time = { }; + auto t = clock_type::now(); + _run_start_time = t; + _start_time = t; + } + + [[gnu::always_inline]] [[gnu::hot]] + clock_type::duration stop_run() { + auto t = clock_type::now(); + if (_start_time == _run_start_time) { + return t - _start_time; + } + return _total_time; + } + + [[gnu::always_inline]] [[gnu::hot]] + void start_iteration() { + _start_time = clock_type::now(); + } + + [[gnu::always_inline]] [[gnu::hot]] + void stop_iteration() { + auto t = clock_type::now(); + _total_time += t - _start_time; + } +}; + +extern time_measurement measure_time; + +namespace { + +template<bool Condition, typename TrueFn, typename FalseFn> +struct do_if_constexpr_ : FalseFn { + do_if_constexpr_(TrueFn, FalseFn false_fn) : FalseFn(std::move(false_fn)) { } + decltype(auto) operator()() const { + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64095 + return FalseFn::operator()(0); + } +}; +template<typename TrueFn, typename FalseFn> +struct do_if_constexpr_<true, TrueFn, FalseFn> : TrueFn { + do_if_constexpr_(TrueFn true_fn, FalseFn) : TrueFn(std::move(true_fn)) { } + decltype(auto) operator()() const { return TrueFn::operator()(0); } +}; + +template<bool Condition, typename TrueFn, typename FalseFn> +do_if_constexpr_<Condition, TrueFn, FalseFn> if_constexpr_(TrueFn&& true_fn, FalseFn&& false_fn) +{ + return do_if_constexpr_<Condition, TrueFn, FalseFn>(std::forward<TrueFn>(true_fn), + std::forward<FalseFn>(false_fn)); +} + +} + +template<typename Test> +class concrete_performance_test final : public performance_test { + compat::optional<Test> _test; +protected: + virtual void set_up() override { + _test.emplace(); + } + + virtual void tear_down() noexcept override { + _test = compat::nullopt; + } + + [[gnu::hot]] + virtual future<clock_type::duration> do_single_run() override { + // Redundant 'this->'s courtesy of https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61636 + return if_constexpr_<is_future<decltype(_test->run())>::value>([&] (auto&&...) { + measure_time.start_run(); + return do_until([this] { return this->stop_iteration(); }, [this] { + this->next_iteration(); + return _test->run(); + }).then([] { + return measure_time.stop_run(); + }); + }, [&] (auto&&...) { + measure_time.start_run(); + while (!stop_iteration()) { + this->next_iteration(); + _test->run(); + } + return make_ready_future<clock_type::duration>(measure_time.stop_run()); + })(); + } +public: + using performance_test::performance_test; +}; + +void register_test(std::unique_ptr<performance_test>); + +template<typename Test> +struct test_registrar { + test_registrar(const std::string& test_group, const std::string& test_case) { + auto test = std::make_unique<concrete_performance_test<Test>>(test_case, test_group); + performance_test::register_test(std::move(test)); + } +}; + +} + +[[gnu::always_inline]] +inline void start_measuring_time() +{ + internal::measure_time.start_iteration(); +} + +[[gnu::always_inline]] +inline void stop_measuring_time() +{ + internal::measure_time.stop_iteration(); +} + + +template<typename T> +void do_not_optimize(const T& v) +{ + asm volatile("" : : "r,m" (v)); +} + +} + +#define PERF_TEST_F(test_group, test_case) \ + struct test_##test_group##_##test_case : test_group { \ + [[gnu::always_inline]] inline auto run(); \ + }; \ + static ::perf_tests::internal::test_registrar<test_##test_group##_##test_case> \ + test_##test_group##_##test_case##_registrar(#test_group, #test_case); \ + [[gnu::always_inline]] auto test_##test_group##_##test_case::run() + +#define PERF_TEST(test_group, test_case) \ + struct test_##test_group##_##test_case { \ + [[gnu::always_inline]] inline auto run(); \ + }; \ + static ::perf_tests::internal::test_registrar<test_##test_group##_##test_case> \ + test_##test_group##_##test_case##_registrar(#test_group, #test_case); \ + [[gnu::always_inline]] auto test_##test_group##_##test_case::run() |