summaryrefslogtreecommitdiffstats
path: root/src/database/engine/dbengine-stresstest.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/database/engine/dbengine-stresstest.c456
1 files changed, 456 insertions, 0 deletions
diff --git a/src/database/engine/dbengine-stresstest.c b/src/database/engine/dbengine-stresstest.c
new file mode 100644
index 000000000..1d978cd52
--- /dev/null
+++ b/src/database/engine/dbengine-stresstest.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "../../daemon/common.h"
+
+#ifdef ENABLE_DBENGINE
+
+static RRDHOST *dbengine_rrdhost_find_or_create(char *name) {
+ /* We don't want to drop metrics when generating load,
+ * we prefer to block data generation itself */
+
+ return rrdhost_find_or_create(
+ name,
+ name,
+ name,
+ os_type,
+ netdata_configured_timezone,
+ netdata_configured_abbrev_timezone,
+ netdata_configured_utc_offset,
+ program_name,
+ NETDATA_VERSION,
+ default_rrd_update_every,
+ default_rrd_history_entries,
+ RRD_MEMORY_MODE_DBENGINE,
+ health_plugin_enabled(),
+ default_rrdpush_enabled,
+ default_rrdpush_destination,
+ default_rrdpush_api_key,
+ default_rrdpush_send_charts_matching,
+ default_rrdpush_enable_replication,
+ default_rrdpush_seconds_to_replicate,
+ default_rrdpush_replication_step,
+ NULL,
+ 0
+ );
+}
+
+static inline void rrddim_set_by_pointer_fake_time(RRDDIM *rd, collected_number value, time_t now) {
+ rd->collector.last_collected_time.tv_sec = now;
+ rd->collector.last_collected_time.tv_usec = 0;
+ rd->collector.collected_value = value;
+ rrddim_set_updated(rd);
+
+ rd->collector.counter++;
+
+ collected_number v = (value >= 0) ? value : -value;
+ if(unlikely(v > rd->collector.collected_value_max)) rd->collector.collected_value_max = v;
+}
+
+struct dbengine_chart_thread {
+ uv_thread_t thread;
+ RRDHOST *host;
+ char *chartname; /* Will be prefixed by type, e.g. "example_local1.", "example_local2." etc */
+ unsigned dset_charts; /* number of charts */
+ unsigned dset_dims; /* dimensions per chart */
+ unsigned chart_i; /* current chart offset */
+ time_t time_present; /* current virtual time of the benchmark */
+ volatile time_t time_max; /* latest timestamp of stored values */
+ unsigned history_seconds; /* how far back in the past to go */
+
+ volatile long done; /* initialize to 0, set to 1 to stop thread */
+ struct completion charts_initialized;
+ unsigned long errors, stored_metrics_nr; /* statistics */
+
+ RRDSET *st;
+ RRDDIM *rd[]; /* dset_dims elements */
+};
+
+collected_number generate_dbengine_chart_value(int chart_i, int dim_i, time_t time_current)
+{
+ collected_number value;
+
+ value = ((collected_number)time_current) * (chart_i + 1);
+ value += ((collected_number)time_current) * (dim_i + 1);
+ value %= 1024LLU;
+
+ return value;
+}
+
+static void generate_dbengine_chart(void *arg)
+{
+ fprintf(stderr, "%s() running...\n", __FUNCTION__ );
+ struct dbengine_chart_thread *thread_info = (struct dbengine_chart_thread *)arg;
+ RRDHOST *host = thread_info->host;
+ char *chartname = thread_info->chartname;
+ const unsigned DSET_DIMS = thread_info->dset_dims;
+ unsigned history_seconds = thread_info->history_seconds;
+ time_t time_present = thread_info->time_present;
+
+ unsigned j, update_every = 1;
+ RRDSET *st;
+ RRDDIM *rd[DSET_DIMS];
+ char name[RRD_ID_LENGTH_MAX + 1];
+ time_t time_current;
+
+ // create the chart
+ snprintfz(name, RRD_ID_LENGTH_MAX, "example_local%u", thread_info->chart_i + 1);
+ thread_info->st = st = rrdset_create(host, name, chartname, chartname, "example", NULL, chartname, chartname,
+ chartname, NULL, 1, update_every, RRDSET_TYPE_LINE);
+ for (j = 0 ; j < DSET_DIMS ; ++j) {
+ snprintfz(name, RRD_ID_LENGTH_MAX, "%s%u", chartname, j + 1);
+
+ thread_info->rd[j] = rd[j] = rrddim_add(st, name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+ completion_mark_complete(&thread_info->charts_initialized);
+
+ // feed it with the test data
+ time_current = time_present - history_seconds;
+ for (j = 0 ; j < DSET_DIMS ; ++j) {
+ rd[j]->collector.last_collected_time.tv_sec =
+ st->last_collected_time.tv_sec = st->last_updated.tv_sec = time_current - update_every;
+ rd[j]->collector.last_collected_time.tv_usec =
+ st->last_collected_time.tv_usec = st->last_updated.tv_usec = 0;
+ }
+ for( ; !thread_info->done && time_current < time_present ; time_current += update_every) {
+ st->usec_since_last_update = USEC_PER_SEC * update_every;
+
+ for (j = 0; j < DSET_DIMS; ++j) {
+ collected_number value;
+
+ value = generate_dbengine_chart_value(thread_info->chart_i, j, time_current);
+ rrddim_set_by_pointer_fake_time(rd[j], value, time_current);
+ ++thread_info->stored_metrics_nr;
+ }
+ rrdset_done(st);
+ thread_info->time_max = time_current;
+ }
+ for (j = 0; j < DSET_DIMS; ++j) {
+ rrdeng_store_metric_finalize((rd[j])->tiers[0].sch);
+ }
+}
+
+void generate_dbengine_dataset(unsigned history_seconds)
+{
+ fprintf(stderr, "%s() running...\n", __FUNCTION__ );
+ const int DSET_CHARTS = 16;
+ const int DSET_DIMS = 128;
+ const uint64_t EXPECTED_COMPRESSION_RATIO = 20;
+ RRDHOST *host = NULL;
+ struct dbengine_chart_thread **thread_info;
+ int i;
+ time_t time_present;
+
+ default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
+ default_rrdeng_page_cache_mb = 128;
+ // Worst case for uncompressible data
+ default_rrdeng_disk_quota_mb = (((uint64_t)DSET_DIMS * DSET_CHARTS) * sizeof(storage_number) * history_seconds) /
+ (1024 * 1024);
+ default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100;
+
+ nd_log_limits_unlimited();
+ fprintf(stderr, "Initializing localhost with hostname 'dbengine-dataset'");
+
+ host = dbengine_rrdhost_find_or_create("dbengine-dataset");
+ if (NULL == host)
+ return;
+
+ thread_info = mallocz(sizeof(*thread_info) * DSET_CHARTS);
+ for (i = 0 ; i < DSET_CHARTS ; ++i) {
+ thread_info[i] = mallocz(sizeof(*thread_info[i]) + sizeof(RRDDIM *) * DSET_DIMS);
+ }
+ fprintf(stderr, "\nRunning DB-engine workload generator\n");
+
+ time_present = now_realtime_sec();
+ for (i = 0 ; i < DSET_CHARTS ; ++i) {
+ thread_info[i]->host = host;
+ thread_info[i]->chartname = "random";
+ thread_info[i]->dset_charts = DSET_CHARTS;
+ thread_info[i]->chart_i = i;
+ thread_info[i]->dset_dims = DSET_DIMS;
+ thread_info[i]->history_seconds = history_seconds;
+ thread_info[i]->time_present = time_present;
+ thread_info[i]->time_max = 0;
+ thread_info[i]->done = 0;
+ completion_init(&thread_info[i]->charts_initialized);
+ fatal_assert(0 == uv_thread_create(&thread_info[i]->thread, generate_dbengine_chart, thread_info[i]));
+ completion_wait_for(&thread_info[i]->charts_initialized);
+ completion_destroy(&thread_info[i]->charts_initialized);
+ }
+ for (i = 0 ; i < DSET_CHARTS ; ++i) {
+ fatal_assert(0 == uv_thread_join(&thread_info[i]->thread));
+ }
+
+ for (i = 0 ; i < DSET_CHARTS ; ++i) {
+ freez(thread_info[i]);
+ }
+ freez(thread_info);
+ rrd_wrlock();
+ rrdhost_free___while_having_rrd_wrlock(localhost, true);
+ rrd_wrunlock();
+}
+
+struct dbengine_query_thread {
+ uv_thread_t thread;
+ RRDHOST *host;
+ char *chartname; /* Will be prefixed by type, e.g. "example_local1.", "example_local2." etc */
+ unsigned dset_charts; /* number of charts */
+ unsigned dset_dims; /* dimensions per chart */
+ time_t time_present; /* current virtual time of the benchmark */
+ unsigned history_seconds; /* how far back in the past to go */
+ volatile long done; /* initialize to 0, set to 1 to stop thread */
+ unsigned long errors, queries_nr, queried_metrics_nr; /* statistics */
+ uint8_t delete_old_data; /* if non zero then data are deleted when disk space is exhausted */
+
+ struct dbengine_chart_thread *chart_threads[]; /* dset_charts elements */
+};
+
+static void query_dbengine_chart(void *arg)
+{
+ fprintf(stderr, "%s() running...\n", __FUNCTION__ );
+ struct dbengine_query_thread *thread_info = (struct dbengine_query_thread *)arg;
+ const int DSET_CHARTS = thread_info->dset_charts;
+ const int DSET_DIMS = thread_info->dset_dims;
+ time_t time_after, time_before, time_min, time_approx_min, time_max, duration;
+ int i, j, update_every = 1;
+ RRDSET *st;
+ RRDDIM *rd;
+ uint8_t same;
+ time_t time_now, time_retrieved, end_time;
+ collected_number generatedv;
+ NETDATA_DOUBLE value, expected;
+ struct storage_engine_query_handle seqh;
+ size_t value_errors = 0, time_errors = 0;
+
+ do {
+ // pick a chart and dimension
+ i = random() % DSET_CHARTS;
+ st = thread_info->chart_threads[i]->st;
+ j = random() % DSET_DIMS;
+ rd = thread_info->chart_threads[i]->rd[j];
+
+ time_min = thread_info->time_present - thread_info->history_seconds + 1;
+ time_max = thread_info->chart_threads[i]->time_max;
+
+ if (thread_info->delete_old_data) {
+ /* A time window of twice the disk space is sufficient for compression space savings of up to 50% */
+ time_approx_min = time_max - (default_rrdeng_disk_quota_mb * 2 * 1024 * 1024) /
+ (((uint64_t) DSET_DIMS * DSET_CHARTS) * sizeof(storage_number));
+ time_min = MAX(time_min, time_approx_min);
+ }
+ if (!time_max) {
+ time_before = time_after = time_min;
+ } else {
+ time_after = time_min + random() % (MAX(time_max - time_min, 1));
+ duration = random() % 3600;
+ time_before = MIN(time_after + duration, time_max); /* up to 1 hour queries */
+ }
+
+ storage_engine_query_init(rd->tiers[0].seb, rd->tiers[0].smh, &seqh, time_after, time_before, STORAGE_PRIORITY_NORMAL);
+ ++thread_info->queries_nr;
+ for (time_now = time_after ; time_now <= time_before ; time_now += update_every) {
+ generatedv = generate_dbengine_chart_value(i, j, time_now);
+ expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE) generatedv, SN_DEFAULT_FLAGS));
+
+ if (unlikely(storage_engine_query_is_finished(&seqh))) {
+ if (!thread_info->delete_old_data) { /* data validation only when we don't delete */
+ fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT
+ ", found data gap, ### ERROR 12 ###\n",
+ rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected);
+ ++thread_info->errors;
+ }
+ break;
+ }
+
+ STORAGE_POINT sp = storage_engine_query_next_metric(&seqh);
+ value = sp.sum;
+ time_retrieved = sp.start_time_s;
+ end_time = sp.end_time_s;
+
+ if (!netdata_double_isnumber(value)) {
+ if (!thread_info->delete_old_data) { /* data validation only when we don't delete */
+ fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT
+ ", found data gap, ### ERROR 13 ###\n",
+ rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected);
+ ++thread_info->errors;
+ }
+ break;
+ }
+ ++thread_info->queried_metrics_nr;
+
+ same = (roundndd(value) == roundndd(expected)) ? 1 : 0;
+ if (!same) {
+ if (!thread_info->delete_old_data) { /* data validation only when we don't delete */
+ if(!value_errors)
+ fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT
+ ", found " NETDATA_DOUBLE_FORMAT ", ### ERROR 14 ###\n",
+ rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected, value);
+ value_errors++;
+ thread_info->errors++;
+ }
+ }
+ if (end_time != time_now) {
+ if (!thread_info->delete_old_data) { /* data validation only when we don't delete */
+ if(!time_errors)
+ fprintf(stderr,
+ " DB-engine stresstest %s/%s: at %lu secs, found timestamp %lu ### ERROR 15 ###\n",
+ rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, (unsigned long) time_retrieved);
+ time_errors++;
+ thread_info->errors++;
+ }
+ }
+ }
+ storage_engine_query_finalize(&seqh);
+ } while(!thread_info->done);
+
+ if(value_errors)
+ fprintf(stderr, "%zu value errors encountered\n", value_errors);
+
+ if(time_errors)
+ fprintf(stderr, "%zu time errors encountered\n", time_errors);
+}
+
+void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS,
+ unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB, unsigned DISK_SPACE_MB)
+{
+ fprintf(stderr, "%s() running...\n", __FUNCTION__ );
+ const unsigned DSET_DIMS = 128;
+ const uint64_t EXPECTED_COMPRESSION_RATIO = 20;
+ const unsigned HISTORY_SECONDS = 3600 * 24 * 365 * 50; /* 50 year of history */
+ RRDHOST *host = NULL;
+ struct dbengine_chart_thread **chart_threads;
+ struct dbengine_query_thread **query_threads;
+ unsigned i, j;
+ time_t time_start, test_duration;
+
+ nd_log_limits_unlimited();
+
+ if (!TEST_DURATION_SEC)
+ TEST_DURATION_SEC = 10;
+ if (!DSET_CHARTS)
+ DSET_CHARTS = 1;
+ if (!QUERY_THREADS)
+ QUERY_THREADS = 1;
+ if (PAGE_CACHE_MB < RRDENG_MIN_PAGE_CACHE_SIZE_MB)
+ PAGE_CACHE_MB = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
+
+ default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
+ default_rrdeng_page_cache_mb = PAGE_CACHE_MB;
+ if (DISK_SPACE_MB) {
+ fprintf(stderr, "By setting disk space limit data are allowed to be deleted. "
+ "Data validation is turned off for this run.\n");
+ default_rrdeng_disk_quota_mb = DISK_SPACE_MB;
+ } else {
+ // Worst case for uncompressible data
+ default_rrdeng_disk_quota_mb =
+ (((uint64_t) DSET_DIMS * DSET_CHARTS) * sizeof(storage_number) * HISTORY_SECONDS) / (1024 * 1024);
+ default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100;
+ }
+
+ fprintf(stderr, "Initializing localhost with hostname 'dbengine-stress-test'\n");
+
+ (void)sql_init_meta_database(DB_CHECK_NONE, 1);
+ host = dbengine_rrdhost_find_or_create("dbengine-stress-test");
+ if (NULL == host)
+ return;
+
+ chart_threads = mallocz(sizeof(*chart_threads) * DSET_CHARTS);
+ for (i = 0 ; i < DSET_CHARTS ; ++i) {
+ chart_threads[i] = mallocz(sizeof(*chart_threads[i]) + sizeof(RRDDIM *) * DSET_DIMS);
+ }
+ query_threads = mallocz(sizeof(*query_threads) * QUERY_THREADS);
+ for (i = 0 ; i < QUERY_THREADS ; ++i) {
+ query_threads[i] = mallocz(sizeof(*query_threads[i]) + sizeof(struct dbengine_chart_thread *) * DSET_CHARTS);
+ }
+ fprintf(stderr, "\nRunning DB-engine stress test, %u seconds writers ramp-up time,\n"
+ "%u seconds of concurrent readers and writers, %u writer threads, %u reader threads,\n"
+ "%u MiB of page cache.\n",
+ RAMP_UP_SECONDS, TEST_DURATION_SEC, DSET_CHARTS, QUERY_THREADS, PAGE_CACHE_MB);
+
+ time_start = now_realtime_sec() + HISTORY_SECONDS; /* move history to the future */
+ for (i = 0 ; i < DSET_CHARTS ; ++i) {
+ chart_threads[i]->host = host;
+ chart_threads[i]->chartname = "random";
+ chart_threads[i]->dset_charts = DSET_CHARTS;
+ chart_threads[i]->chart_i = i;
+ chart_threads[i]->dset_dims = DSET_DIMS;
+ chart_threads[i]->history_seconds = HISTORY_SECONDS;
+ chart_threads[i]->time_present = time_start;
+ chart_threads[i]->time_max = 0;
+ chart_threads[i]->done = 0;
+ chart_threads[i]->errors = chart_threads[i]->stored_metrics_nr = 0;
+ completion_init(&chart_threads[i]->charts_initialized);
+ fatal_assert(0 == uv_thread_create(&chart_threads[i]->thread, generate_dbengine_chart, chart_threads[i]));
+ }
+ /* barrier so that subsequent queries can access valid chart data */
+ for (i = 0 ; i < DSET_CHARTS ; ++i) {
+ completion_wait_for(&chart_threads[i]->charts_initialized);
+ completion_destroy(&chart_threads[i]->charts_initialized);
+ }
+ sleep(RAMP_UP_SECONDS);
+ /* at this point data have already began being written to the database */
+ for (i = 0 ; i < QUERY_THREADS ; ++i) {
+ query_threads[i]->host = host;
+ query_threads[i]->chartname = "random";
+ query_threads[i]->dset_charts = DSET_CHARTS;
+ query_threads[i]->dset_dims = DSET_DIMS;
+ query_threads[i]->history_seconds = HISTORY_SECONDS;
+ query_threads[i]->time_present = time_start;
+ query_threads[i]->done = 0;
+ query_threads[i]->errors = query_threads[i]->queries_nr = query_threads[i]->queried_metrics_nr = 0;
+ for (j = 0 ; j < DSET_CHARTS ; ++j) {
+ query_threads[i]->chart_threads[j] = chart_threads[j];
+ }
+ query_threads[i]->delete_old_data = DISK_SPACE_MB ? 1 : 0;
+ fatal_assert(0 == uv_thread_create(&query_threads[i]->thread, query_dbengine_chart, query_threads[i]));
+ }
+ sleep(TEST_DURATION_SEC);
+ /* stop workload */
+ for (i = 0 ; i < DSET_CHARTS ; ++i) {
+ chart_threads[i]->done = 1;
+ }
+ for (i = 0 ; i < QUERY_THREADS ; ++i) {
+ query_threads[i]->done = 1;
+ }
+ for (i = 0 ; i < DSET_CHARTS ; ++i) {
+ assert(0 == uv_thread_join(&chart_threads[i]->thread));
+ }
+ for (i = 0 ; i < QUERY_THREADS ; ++i) {
+ assert(0 == uv_thread_join(&query_threads[i]->thread));
+ }
+ test_duration = now_realtime_sec() - (time_start - HISTORY_SECONDS);
+ if (!test_duration)
+ test_duration = 1;
+ fprintf(stderr, "\nDB-engine stress test finished in %lld seconds.\n", (long long)test_duration);
+ unsigned long stored_metrics_nr = 0;
+ for (i = 0 ; i < DSET_CHARTS ; ++i) {
+ stored_metrics_nr += chart_threads[i]->stored_metrics_nr;
+ }
+ unsigned long queried_metrics_nr = 0;
+ for (i = 0 ; i < QUERY_THREADS ; ++i) {
+ queried_metrics_nr += query_threads[i]->queried_metrics_nr;
+ }
+ fprintf(stderr, "%u metrics were stored (dataset size of %lu MiB) in %u charts by 1 writer thread per chart.\n",
+ DSET_CHARTS * DSET_DIMS, stored_metrics_nr * sizeof(storage_number) / (1024 * 1024), DSET_CHARTS);
+ fprintf(stderr, "Metrics were being generated per 1 emulated second and time was accelerated.\n");
+ fprintf(stderr, "%lu metric data points were queried by %u reader threads.\n", queried_metrics_nr, QUERY_THREADS);
+ fprintf(stderr, "Query starting time is randomly chosen from the beginning of the time-series up to the time of\n"
+ "the latest data point, and ending time from 1 second up to 1 hour after the starting time.\n");
+ fprintf(stderr, "Performance is %lld written data points/sec and %lld read data points/sec.\n",
+ (long long)(stored_metrics_nr / test_duration), (long long)(queried_metrics_nr / test_duration));
+
+ for (i = 0 ; i < DSET_CHARTS ; ++i) {
+ freez(chart_threads[i]);
+ }
+ freez(chart_threads);
+ for (i = 0 ; i < QUERY_THREADS ; ++i) {
+ freez(query_threads[i]);
+ }
+ freez(query_threads);
+ rrd_wrlock();
+ rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].si);
+ rrdeng_exit((struct rrdengine_instance *)host->db[0].si);
+ rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL);
+ rrd_wrunlock();
+}
+
+#endif