diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-02-06 16:11:30 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-02-06 16:11:30 +0000 |
commit | aa2fe8ccbfcb117efa207d10229eeeac5d0f97c7 (patch) | |
tree | 941cbdd387b41c1a81587c20a6df9f0e5e0ff7ab /web/api | |
parent | Adding upstream version 1.37.1. (diff) | |
download | netdata-aa2fe8ccbfcb117efa207d10229eeeac5d0f97c7.tar.xz netdata-aa2fe8ccbfcb117efa207d10229eeeac5d0f97c7.zip |
Adding upstream version 1.38.0.upstream/1.38.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
25 files changed, 604 insertions, 339 deletions
diff --git a/web/api/README.md b/web/api/README.md index fc520a09a..82a55eb25 100644 --- a/web/api/README.md +++ b/web/api/README.md @@ -1,6 +1,10 @@ <!-- title: "API" -custom_edit_url: https://github.com/netdata/netdata/edit/master/web/api/README.md +custom_edit_url: "https://github.com/netdata/netdata/edit/master/web/api/README.md" +sidebar_label: "API" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Operations" --> # API diff --git a/web/api/badges/README.md b/web/api/badges/README.md index 84409471a..8f6eca62a 100644 --- a/web/api/badges/README.md +++ b/web/api/badges/README.md @@ -25,7 +25,7 @@ Similarly, there is [a chart that shows outbound bandwidth per class](http://lon The right one is a **volume** calculation. Netdata calculated the total of the last 86.400 seconds (a day) which gives `kilobits`, then divided it by 8 to make it KB, then by 1024 to make it MB and then by 1024 to make it GB. Calculations like this are quite accurate, since for every value collected, every second, Netdata interpolates it to second boundary using microsecond calculations. -Let's see a few more badge examples (they come from the [Netdata registry](/registry/README.md)): +Let's see a few more badge examples (they come from the [Netdata registry](https://github.com/netdata/netdata/blob/master/registry/README.md)): - **cpu usage of user `root`** (you can pick any user; 100% = 1 core). This will be `green <10%`, `yellow <20%`, `orange <50%`, `blue <100%` (1 core), `red` otherwise (you define thresholds and colors on the URL). diff --git a/web/api/badges/web_buffer_svg.c b/web/api/badges/web_buffer_svg.c index 080f2240f..ca0f4b7a0 100644 --- a/web/api/badges/web_buffer_svg.c +++ b/web/api/badges/web_buffer_svg.c @@ -913,7 +913,7 @@ int web_client_api_request_v1_badge(RRDHOST *host, struct web_client *w, char *u if(!strcmp(name, "chart")) chart = value; else if(!strcmp(name, "dimension") || !strcmp(name, "dim") || !strcmp(name, "dimensions") || !strcmp(name, "dims")) { if(!dimensions) - dimensions = buffer_create(100); + dimensions = buffer_create(100, &netdata_buffers_statistics.buffers_api); buffer_strcat(dimensions, "|"); buffer_strcat(dimensions, value); @@ -969,7 +969,7 @@ int web_client_api_request_v1_badge(RRDHOST *host, struct web_client *w, char *u ret = HTTP_RESP_OK; goto cleanup; } - st->last_accessed_time = now_realtime_sec(); + st->last_accessed_time_s = now_realtime_sec(); if(alarm) { rca = rrdcalc_from_rrdset_get(st, alarm); @@ -1110,14 +1110,14 @@ int web_client_api_request_v1_badge(RRDHOST *host, struct web_client *w, char *u ret = HTTP_RESP_INTERNAL_SERVER_ERROR; // if the collected value is too old, don't calculate its value - if (rrdset_last_entry_t(st) >= (now_realtime_sec() - (st->update_every * st->gap_when_lost_iterations_above))) + if (rrdset_last_entry_s(st) >= (now_realtime_sec() - (st->update_every * gap_when_lost_iterations_above))) ret = rrdset2value_api_v1(st, w->response.data, &n, (dimensions) ? buffer_tostring(dimensions) : NULL, points, after, before, group, group_options, 0, options, NULL, &latest_timestamp, NULL, NULL, NULL, &value_is_null, NULL, 0, 0, - QUERY_SOURCE_API_BADGE); + QUERY_SOURCE_API_BADGE, STORAGE_PRIORITY_NORMAL); // if the value cannot be calculated, show empty badge if (ret != HTTP_RESP_OK) { diff --git a/web/api/exporters/prometheus/README.md b/web/api/exporters/prometheus/README.md index cf7e2caa8..1ff86f4e0 100644 --- a/web/api/exporters/prometheus/README.md +++ b/web/api/exporters/prometheus/README.md @@ -5,6 +5,6 @@ custom_edit_url: https://github.com/netdata/netdata/edit/master/web/api/exporter # Prometheus exporter -Read the Prometheus exporter documentation: [Using Netdata with Prometheus](/exporting/prometheus/README.md). +Read the Prometheus exporter documentation: [Using Netdata with Prometheus](https://github.com/netdata/netdata/blob/master/exporting/prometheus/README.md). diff --git a/web/api/exporters/shell/allmetrics_shell.c b/web/api/exporters/shell/allmetrics_shell.c index 0ffbac67b..dded5a536 100644 --- a/web/api/exporters/shell/allmetrics_shell.c +++ b/web/api/exporters/shell/allmetrics_shell.c @@ -127,7 +127,7 @@ void rrd_stats_api_v1_charts_allmetrics_json(RRDHOST *host, const char *filter_s rrdset_family(st), rrdset_context(st), rrdset_units(st), - (int64_t)rrdset_last_entry_t(st)); + (int64_t) rrdset_last_entry_s(st)); chart_counter++; dimension_counter = 0; diff --git a/web/api/formatters/README.md b/web/api/formatters/README.md index 3e67ac6ee..4c281f064 100644 --- a/web/api/formatters/README.md +++ b/web/api/formatters/README.md @@ -12,18 +12,18 @@ The following formats are supported: | format|module|content type|description| |:----:|:----:|:----------:|:----------| -| `array`|[ssv](/web/api/formatters/ssv/README.md)|application/json|a JSON array| -| `csv`|[csv](/web/api/formatters/csv/README.md)|text/plain|a text table, comma separated, with a header line (dimension names) and `\r\n` at the end of the lines| -| `csvjsonarray`|[csv](/web/api/formatters/csv/README.md)|application/json|a JSON array, with each row as another array (the first row has the dimension names)| -| `datasource`|[json](/web/api/formatters/json/README.md)|application/json|a Google Visualization Provider `datasource` javascript callback| -| `datatable`|[json](/web/api/formatters/json/README.md)|application/json|a Google `datatable`| -| `html`|[csv](/web/api/formatters/csv/README.md)|text/html|an html table| -| `json`|[json](/web/api/formatters/json/README.md)|application/json|a JSON object| -| `jsonp`|[json](/web/api/formatters/json/README.md)|application/json|a JSONP javascript callback| -| `markdown`|[csv](/web/api/formatters/csv/README.md)|text/plain|a markdown table| -| `ssv`|[ssv](/web/api/formatters/ssv/README.md)|text/plain|a space separated list of values| -| `ssvcomma`|[ssv](/web/api/formatters/ssv/README.md)|text/plain|a comma separated list of values| -| `tsv`|[csv](/web/api/formatters/csv/README.md)|text/plain|a TAB delimited `csv` (MS Excel flavor)| +| `array`|[ssv](https://github.com/netdata/netdata/blob/master/web/api/formatters/ssv/README.md)|application/json|a JSON array| +| `csv`|[csv](https://github.com/netdata/netdata/blob/master/web/api/formatters/csv/README.md)|text/plain|a text table, comma separated, with a header line (dimension names) and `\r\n` at the end of the lines| +| `csvjsonarray`|[csv](https://github.com/netdata/netdata/blob/master/web/api/formatters/csv/README.md)|application/json|a JSON array, with each row as another array (the first row has the dimension names)| +| `datasource`|[json](https://github.com/netdata/netdata/blob/master/web/api/formatters/json/README.md)|application/json|a Google Visualization Provider `datasource` javascript callback| +| `datatable`|[json](https://github.com/netdata/netdata/blob/master/web/api/formatters/json/README.md)|application/json|a Google `datatable`| +| `html`|[csv](https://github.com/netdata/netdata/blob/master/web/api/formatters/csv/README.md)|text/html|an html table| +| `json`|[json](https://github.com/netdata/netdata/blob/master/web/api/formatters/json/README.md)|application/json|a JSON object| +| `jsonp`|[json](https://github.com/netdata/netdata/blob/master/web/api/formatters/json/README.md)|application/json|a JSONP javascript callback| +| `markdown`|[csv](https://github.com/netdata/netdata/blob/master/web/api/formatters/csv/README.md)|text/plain|a markdown table| +| `ssv`|[ssv](https://github.com/netdata/netdata/blob/master/web/api/formatters/ssv/README.md)|text/plain|a space separated list of values| +| `ssvcomma`|[ssv](https://github.com/netdata/netdata/blob/master/web/api/formatters/ssv/README.md)|text/plain|a comma separated list of values| +| `tsv`|[csv](https://github.com/netdata/netdata/blob/master/web/api/formatters/csv/README.md)|text/plain|a TAB delimited `csv` (MS Excel flavor)| For examples of each format, check the relative module documentation. diff --git a/web/api/formatters/charts2json.c b/web/api/formatters/charts2json.c index 1fc20b493..61a9ecf2f 100644 --- a/web/api/formatters/charts2json.c +++ b/web/api/formatters/charts2json.c @@ -10,7 +10,7 @@ const char* get_release_channel() { if (use_stable == -1) { char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s/.environment", netdata_configured_user_config_dir); - procfile *ff = procfile_open(filename, "=", PROCFILE_FLAG_DEFAULT); + procfile *ff = procfile_open(filename, "=", PROCFILE_FLAG_ERROR_ON_ERROR_LOG); if (ff) { procfile_set_quotes(ff, "'\""); ff = procfile_readall(ff); @@ -78,7 +78,7 @@ void charts2json(RRDHOST *host, BUFFER *wb, int skip_volatile, int show_archived rrdset2json(st, wb, &dimensions, &memory, skip_volatile); c++; - st->last_accessed_time = now; + st->last_accessed_time_s = now; } } rrdset_foreach_done(st); @@ -102,10 +102,10 @@ void charts2json(RRDHOST *host, BUFFER *wb, int skip_volatile, int show_archived , dimensions , alarms , memory - , rrd_hosts_available + , rrdhost_hosts_available() ); - if(unlikely(rrd_hosts_available > 1)) { + if(unlikely(rrdhost_hosts_available() > 1)) { rrd_rdlock(); size_t found = 0; @@ -178,7 +178,7 @@ void chartcollectors2json(RRDHOST *host, BUFFER *wb) { }; sprintf(name, "%s:%s", col.plugin, col.module); dictionary_set(dict, name, &col, sizeof(struct collector)); - st->last_accessed_time = now; + st->last_accessed_time_s = now; } } rrdset_foreach_done(st); diff --git a/web/api/formatters/csv/README.md b/web/api/formatters/csv/README.md index df7c11efa..fc5ffec1b 100644 --- a/web/api/formatters/csv/README.md +++ b/web/api/formatters/csv/README.md @@ -5,7 +5,7 @@ custom_edit_url: https://github.com/netdata/netdata/edit/master/web/api/formatte # CSV formatter -The CSV formatter presents [results of database queries](/web/api/queries/README.md) in the following formats: +The CSV formatter presents [results of database queries](https://github.com/netdata/netdata/blob/master/web/api/queries/README.md) in the following formats: | format|content type|description| | :----:|:----------:|:----------| diff --git a/web/api/formatters/json/README.md b/web/api/formatters/json/README.md index a0f8108e7..75f729ada 100644 --- a/web/api/formatters/json/README.md +++ b/web/api/formatters/json/README.md @@ -5,7 +5,7 @@ custom_edit_url: https://github.com/netdata/netdata/edit/master/web/api/formatte # JSON formatter -The CSV formatter presents [results of database queries](/web/api/queries/README.md) in the following formats: +The CSV formatter presents [results of database queries](https://github.com/netdata/netdata/blob/master/web/api/queries/README.md) in the following formats: | format | content type | description| |:----:|:----------:|:----------| diff --git a/web/api/formatters/json_wrapper.c b/web/api/formatters/json_wrapper.c index 8b9b7522c..beb74912e 100644 --- a/web/api/formatters/json_wrapper.c +++ b/web/api/formatters/json_wrapper.c @@ -32,6 +32,53 @@ static int fill_formatted_callback(const char *name, const char *value, RRDLABEL return 1; } +void rrdr_show_plan(RRDR *r, BUFFER *wb, const char *kq, const char *sq __maybe_unused) { + QUERY_TARGET *qt = r->internal.qt; + + buffer_sprintf(wb, "\n\t%squery_plan%s: {", kq, kq); + + for(size_t m = 0; m < qt->query.used; m++) { + QUERY_METRIC *qm = &qt->query.array[m]; + + if(m) + buffer_strcat(wb, ","); + + buffer_sprintf(wb, "\n\t\t%s%s%s: {", kq, string2str(qm->dimension.id), kq); + + buffer_sprintf(wb, "\n\t\t\t%splans%s: [", kq, kq); + for(size_t p = 0; p < qm->plan.used ;p++) { + QUERY_PLAN_ENTRY *qp = &qm->plan.array[p]; + if(p) + buffer_strcat(wb, ","); + + buffer_strcat(wb, "\n\t\t\t\t{"); + buffer_sprintf(wb, "\n\t\t\t\t\t%stier%s: %zu,", kq, kq, qp->tier); + buffer_sprintf(wb, "\n\t\t\t\t\t%safter%s: %ld,", kq, kq, qp->after); + buffer_sprintf(wb, "\n\t\t\t\t\t%sbefore%s: %ld", kq, kq, qp->before); + buffer_strcat(wb, "\n\t\t\t\t}"); + } + buffer_strcat(wb, "\n\t\t\t],"); + + buffer_sprintf(wb, "\n\t\t\t%stiers%s: [", kq, kq); + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(tier) + buffer_strcat(wb, ","); + + buffer_strcat(wb, "\n\t\t\t\t{"); + buffer_sprintf(wb, "\n\t\t\t\t\t%stier%s: %zu,", kq, kq, tier); + buffer_sprintf(wb, "\n\t\t\t\t\t%sdb_first_time%s: %ld,", kq, kq, qm->tiers[tier].db_first_time_s); + buffer_sprintf(wb, "\n\t\t\t\t\t%sdb_last_time%s: %ld,", kq, kq, qm->tiers[tier].db_last_time_s); + buffer_sprintf(wb, "\n\t\t\t\t\t%sweight%s: %ld", kq, kq, qm->tiers[tier].weight); + buffer_strcat(wb, "\n\t\t\t\t}"); + } + buffer_strcat(wb, "\n\t\t\t]"); + + buffer_strcat(wb, "\n\t\t}"); + } + + buffer_strcat(wb, "\n\t},"); +} + void rrdr_json_wrapper_begin(RRDR *r, BUFFER *wb, uint32_t format, RRDR_OPTIONS options, int string_value, RRDR_GROUPING group_method) { @@ -70,9 +117,9 @@ void rrdr_json_wrapper_begin(RRDR *r, BUFFER *wb, uint32_t format, RRDR_OPTIONS , kq, kq, sq, qt->id, sq , kq, kq, sq, qt->id, sq , kq, kq, (long long)r->update_every - , kq, kq, (long long)qt->db.minimum_latest_update_every - , kq, kq, (long long)qt->db.first_time_t - , kq, kq, (long long)qt->db.last_time_t + , kq, kq, (long long)qt->db.minimum_latest_update_every_s + , kq, kq, (long long)qt->db.first_time_s + , kq, kq, (long long)qt->db.last_time_s , kq, kq, (long long)r->before , kq, kq, (long long)r->after , kq, kq, sq, web_client_api_request_v1_data_group_to_string(group_method), sq @@ -369,9 +416,12 @@ void rrdr_json_wrapper_begin(RRDR *r, BUFFER *wb, uint32_t format, RRDR_OPTIONS for(size_t tier = 0; tier < storage_tiers ; tier++) buffer_sprintf(wb, "%s%zu", tier>0?", ":"", r->internal.tier_points_read[tier]); - buffer_strcat(wb, " ]"); + buffer_strcat(wb, " ],"); + + if(options & RRDR_OPTION_SHOW_PLAN) + rrdr_show_plan(r, wb, kq, sq); - buffer_sprintf(wb, ",\n %sresult%s: ", kq, kq); + buffer_sprintf(wb, "\n %sresult%s: ", kq, kq); if(string_value) buffer_strcat(wb, sq); //info("JSONWRAPPER(): %s: END", r->st->id); diff --git a/web/api/formatters/rrd2json.c b/web/api/formatters/rrd2json.c index 8bf547192..64cde5b2b 100644 --- a/web/api/formatters/rrd2json.c +++ b/web/api/formatters/rrd2json.c @@ -77,6 +77,7 @@ int rrdset2value_api_v1( , time_t timeout , size_t tier , QUERY_SOURCE query_source + , STORAGE_PRIORITY priority ) { int ret = HTTP_RESP_INTERNAL_SERVER_ERROR; @@ -94,7 +95,8 @@ int rrdset2value_api_v1( group_options, timeout, tier, - query_source); + query_source, + priority); if(!r) { if(value_is_null) *value_is_null = 1; diff --git a/web/api/formatters/rrd2json.h b/web/api/formatters/rrd2json.h index 048281d7e..88b9f773f 100644 --- a/web/api/formatters/rrd2json.h +++ b/web/api/formatters/rrd2json.h @@ -79,6 +79,7 @@ int rrdset2value_api_v1( , time_t timeout , size_t tier , QUERY_SOURCE query_source + , STORAGE_PRIORITY priority ); #endif /* NETDATA_RRD2JSON_H */ diff --git a/web/api/formatters/rrdset2json.c b/web/api/formatters/rrdset2json.c index 1e8106335..449d4ddf5 100644 --- a/web/api/formatters/rrdset2json.c +++ b/web/api/formatters/rrdset2json.c @@ -25,8 +25,8 @@ void chart_labels2json(RRDSET *st, BUFFER *wb, size_t indentation) // generate JSON for the /api/v1/chart API call void rrdset2json(RRDSET *st, BUFFER *wb, size_t *dimensions_count, size_t *memory_used, int skip_volatile) { - time_t first_entry_t = rrdset_first_entry_t(st); - time_t last_entry_t = rrdset_last_entry_t(st); + time_t first_entry_t = rrdset_first_entry_s(st); + time_t last_entry_t = rrdset_last_entry_s(st); buffer_sprintf( wb, @@ -83,7 +83,7 @@ void rrdset2json(RRDSET *st, BUFFER *wb, size_t *dimensions_count, size_t *memor "\t\t\t\"dimensions\": {\n", st->update_every); - unsigned long memory = sizeof(RRDSET) + st->memsize; + unsigned long memory = sizeof(RRDSET); size_t dimensions = 0; RRDDIM *rd; diff --git a/web/api/formatters/ssv/README.md b/web/api/formatters/ssv/README.md index d9e193d66..4ca2a64ca 100644 --- a/web/api/formatters/ssv/README.md +++ b/web/api/formatters/ssv/README.md @@ -5,7 +5,7 @@ custom_edit_url: https://github.com/netdata/netdata/edit/master/web/api/formatte # SSV formatter -The SSV formatter sums all dimensions in [results of database queries](/web/api/queries/README.md) +The SSV formatter sums all dimensions in [results of database queries](https://github.com/netdata/netdata/blob/master/web/api/queries/README.md) to a single value and returns a list of such values showing how it changes through time. It supports the following formats: diff --git a/web/api/formatters/value/README.md b/web/api/formatters/value/README.md index a51e32de7..5b75ded7c 100644 --- a/web/api/formatters/value/README.md +++ b/web/api/formatters/value/README.md @@ -5,7 +5,7 @@ custom_edit_url: https://github.com/netdata/netdata/edit/master/web/api/formatte # Value formatter -The Value formatter presents [results of database queries](/web/api/queries/README.md) as a single value. +The Value formatter presents [results of database queries](https://github.com/netdata/netdata/blob/master/web/api/queries/README.md) as a single value. To calculate the single value to be returned, it sums the values of all dimensions. @@ -18,7 +18,7 @@ The Value formatter respects the following API `&options=`: | `min2max` | yes | to return the delta from the minimum value to the maximum value (across dimensions)| The Value formatter is not exposed by the API by itself. -Instead it is used by the [`ssv`](/web/api/formatters/ssv/README.md) formatter -and [health monitoring queries](/health/README.md). +Instead it is used by the [`ssv`](https://github.com/netdata/netdata/blob/master/web/api/formatters/ssv/README.md) formatter +and [health monitoring queries](https://github.com/netdata/netdata/blob/master/health/README.md). diff --git a/web/api/formatters/value/value.c b/web/api/formatters/value/value.c index 46a71303e..915d58ac9 100644 --- a/web/api/formatters/value/value.c +++ b/web/api/formatters/value/value.c @@ -106,7 +106,7 @@ QUERY_VALUE rrdmetric2value(RRDHOST *host, struct rrdcontext_acquired *rca, struct rrdinstance_acquired *ria, struct rrdmetric_acquired *rma, time_t after, time_t before, RRDR_OPTIONS options, RRDR_GROUPING group_method, const char *group_options, - size_t tier, time_t timeout, QUERY_SOURCE query_source + size_t tier, time_t timeout, QUERY_SOURCE query_source, STORAGE_PRIORITY priority ) { QUERY_TARGET_REQUEST qtr = { .host = host, @@ -122,6 +122,7 @@ QUERY_VALUE rrdmetric2value(RRDHOST *host, .tier = tier, .timeout = timeout, .query_source = query_source, + .priority = priority, }; ONEWAYALLOC *owa = onewayalloc_create(16 * 1024); diff --git a/web/api/formatters/value/value.h b/web/api/formatters/value/value.h index 76b1869f3..3f7f51ccb 100644 --- a/web/api/formatters/value/value.h +++ b/web/api/formatters/value/value.h @@ -23,7 +23,7 @@ QUERY_VALUE rrdmetric2value(RRDHOST *host, struct rrdcontext_acquired *rca, struct rrdinstance_acquired *ria, struct rrdmetric_acquired *rma, time_t after, time_t before, RRDR_OPTIONS options, RRDR_GROUPING group_method, const char *group_options, - size_t tier, time_t timeout, QUERY_SOURCE query_source + size_t tier, time_t timeout, QUERY_SOURCE query_source, STORAGE_PRIORITY priority ); NETDATA_DOUBLE rrdr2value(RRDR *r, long i, RRDR_OPTIONS options, int *all_values_are_null, NETDATA_DOUBLE *anomaly_rate); diff --git a/web/api/health/README.md b/web/api/health/README.md index 9ec8f31c0..bfdd0ac68 100644 --- a/web/api/health/README.md +++ b/web/api/health/README.md @@ -72,7 +72,7 @@ You can access the API via GET requests, by adding the bearer token to an `Autho curl "http://NODE:19999/api/v1/manage/health?cmd=RESET" -H "X-Auth-Token: Mytoken" ``` -By default access to the health management API is only allowed from `localhost`. Accessing the API from anything else will return a 403 error with the message `You are not allowed to access this resource.`. You can change permissions by editing the `allow management from` variable in `netdata.conf` within the [web] section. See [web server access lists](/web/server/README.md#access-lists) for more information. +By default access to the health management API is only allowed from `localhost`. Accessing the API from anything else will return a 403 error with the message `You are not allowed to access this resource.`. You can change permissions by editing the `allow management from` variable in `netdata.conf` within the [web] section. See [web server access lists](https://github.com/netdata/netdata/blob/master/web/server/README.md#access-lists) for more information. The command `RESET` just returns Netdata to the default operation, with all health checks and notifications enabled. If you've configured and entered your token correctly, you should see the plain text response `All health checks and notifications are enabled`. @@ -126,7 +126,7 @@ curl "http://NODE:19999/api/v1/manage/health?cmd=SILENCE&context=load" -H "X-Aut #### Selection criteria -The `selection criteria` are key/value pairs, in the format `key : value`, where value is a Netdata [simple pattern](/libnetdata/simple_pattern/README.md). This means that you can create very powerful selectors (you will rarely need more than one or two). +The `selection criteria` are key/value pairs, in the format `key : value`, where value is a Netdata [simple pattern](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md). This means that you can create very powerful selectors (you will rarely need more than one or two). The accepted keys for the `selection criteria` are the following: @@ -220,6 +220,6 @@ The file's location is configurable in `netdata.conf`. The default is shown belo ### Further reading -The test script under [tests/health_mgmtapi](/tests/health_mgmtapi/README.md) contains a series of tests that you can either run or read through to understand the various calls and responses better. +The test script under [tests/health_mgmtapi](https://github.com/netdata/netdata/blob/master/tests/health_mgmtapi/README.md) contains a series of tests that you can either run or read through to understand the various calls and responses better. diff --git a/web/api/health/health_cmdapi.c b/web/api/health/health_cmdapi.c index bad3e960a..7a939bc0f 100644 --- a/web/api/health/health_cmdapi.c +++ b/web/api/health/health_cmdapi.c @@ -196,7 +196,7 @@ int web_client_api_request_v1_mgmt_health(RRDHOST *host, struct web_client *w, c w->response.data = wb; buffer_no_cacheable(w->response.data); if (ret == HTTP_RESP_OK && config_changed) { - BUFFER *jsonb = buffer_create(200); + BUFFER *jsonb = buffer_create(200, &netdata_buffers_statistics.buffers_health); health_silencers2json(jsonb); health_silencers2file(jsonb); buffer_free(jsonb); diff --git a/web/api/queries/README.md b/web/api/queries/README.md index 44cdd05b4..2a17ac784 100644 --- a/web/api/queries/README.md +++ b/web/api/queries/README.md @@ -88,7 +88,7 @@ To disable alignment, pass `&options=unaligned` to the query. To execute the query, the engine evaluates all dimensions of the chart, one after another. -The engine does not evaluate dimensions that do not match the [simple pattern](/libnetdata/simple_pattern/README.md) +The engine does not evaluate dimensions that do not match the [simple pattern](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md) given at the `dimensions` parameter, except when `options=percentage` is given (this option requires all the dimensions to be evaluated to find the percentage of each dimension vs to chart total). diff --git a/web/api/queries/query.c b/web/api/queries/query.c index 0365b6e96..1f10c5137 100644 --- a/web/api/queries/query.c +++ b/web/api/queries/query.c @@ -17,6 +17,8 @@ #include "percentile/percentile.h" #include "trimmed_mean/trimmed_mean.h" +#define POINTS_TO_EXPAND_QUERY 5 + // ---------------------------------------------------------------------------- static struct { @@ -694,7 +696,7 @@ static inline void rrdr_done(RRDR *r, long rrdr_line) { // tier management static bool query_metric_is_valid_tier(QUERY_METRIC *qm, size_t tier) { - if(!qm->tiers[tier].db_metric_handle || !qm->tiers[tier].db_first_time_t || !qm->tiers[tier].db_last_time_t || !qm->tiers[tier].db_update_every) + if(!qm->tiers[tier].db_metric_handle || !qm->tiers[tier].db_first_time_s || !qm->tiers[tier].db_last_time_s || !qm->tiers[tier].db_update_every_s) return false; return true; @@ -705,11 +707,11 @@ static size_t query_metric_first_working_tier(QUERY_METRIC *qm) { // find the db time-range for this tier for all metrics STORAGE_METRIC_HANDLE *db_metric_handle = qm->tiers[tier].db_metric_handle; - time_t first_t = qm->tiers[tier].db_first_time_t; - time_t last_t = qm->tiers[tier].db_last_time_t; - time_t update_every = qm->tiers[tier].db_update_every; + time_t first_time_s = qm->tiers[tier].db_first_time_s; + time_t last_time_s = qm->tiers[tier].db_last_time_s; + time_t update_every_s = qm->tiers[tier].db_update_every_s; - if(!db_metric_handle || !first_t || !last_t || !update_every) + if(!db_metric_handle || !first_time_s || !last_time_s || !update_every_s) continue; return tier; @@ -718,19 +720,23 @@ static size_t query_metric_first_working_tier(QUERY_METRIC *qm) { return 0; } -static long query_plan_points_coverage_weight(time_t db_first_t, time_t db_last_t, time_t db_update_every, time_t after_wanted, time_t before_wanted, size_t points_wanted, size_t tier __maybe_unused) { - if(db_first_t == 0 || db_last_t == 0 || db_update_every == 0) +static long query_plan_points_coverage_weight(time_t db_first_time_s, time_t db_last_time_s, time_t db_update_every_s, time_t after_wanted, time_t before_wanted, size_t points_wanted, size_t tier __maybe_unused) { + if(db_first_time_s == 0 || + db_last_time_s == 0 || + db_update_every_s == 0 || + db_first_time_s > before_wanted || + db_last_time_s < after_wanted) return -LONG_MAX; - time_t common_first_t = MAX(db_first_t, after_wanted); - time_t common_last_t = MIN(db_last_t, before_wanted); + long long common_first_t = MAX(db_first_time_s, after_wanted); + long long common_last_t = MIN(db_last_time_s, before_wanted); - long time_coverage = (common_last_t - common_first_t) * 1000000 / (before_wanted - after_wanted); - size_t points_wanted_in_coverage = points_wanted * time_coverage / 1000000; + long long time_coverage = (common_last_t - common_first_t) * 1000000LL / (before_wanted - after_wanted); + long long points_wanted_in_coverage = (long long)points_wanted * time_coverage / 1000000LL; - long points_available = (common_last_t - common_first_t) / db_update_every; - long points_delta = (long)(points_available - points_wanted_in_coverage); - long points_coverage = (points_delta < 0) ? (long)(points_available * time_coverage / points_wanted_in_coverage) : time_coverage; + long long points_available = (common_last_t - common_first_t) / db_update_every_s; + long long points_delta = (long)(points_available - points_wanted_in_coverage); + long long points_coverage = (points_delta < 0) ? (long)(points_available * time_coverage / points_wanted_in_coverage) : time_coverage; // a way to benefit higher tiers // points_coverage += (long)tier * 10000; @@ -738,7 +744,7 @@ static long query_plan_points_coverage_weight(time_t db_first_t, time_t db_last_ if(points_available <= 0) return -LONG_MAX; - return points_coverage; + return (long)(points_coverage + (25000LL * tier)); // 2.5% benefit for each higher tier } static size_t query_metric_best_tier_for_timeframe(QUERY_METRIC *qm, time_t after_wanted, time_t before_wanted, size_t points_wanted) { @@ -748,27 +754,49 @@ static size_t query_metric_best_tier_for_timeframe(QUERY_METRIC *qm, time_t afte if(unlikely(after_wanted == before_wanted || points_wanted <= 0)) return query_metric_first_working_tier(qm); - long weight[storage_tiers]; + time_t min_first_time_s = 0; + time_t max_last_time_s = 0; + + for(size_t tier = 0; tier < storage_tiers ; tier++) { + time_t first_time_s = qm->tiers[tier].db_first_time_s; + time_t last_time_s = qm->tiers[tier].db_last_time_s; + + if(!min_first_time_s || (first_time_s && first_time_s < min_first_time_s)) + min_first_time_s = first_time_s; + + if(!max_last_time_s || (last_time_s && last_time_s > max_last_time_s)) + max_last_time_s = last_time_s; + } for(size_t tier = 0; tier < storage_tiers ; tier++) { // find the db time-range for this tier for all metrics STORAGE_METRIC_HANDLE *db_metric_handle = qm->tiers[tier].db_metric_handle; - time_t first_t = qm->tiers[tier].db_first_time_t; - time_t last_t = qm->tiers[tier].db_last_time_t; - time_t update_every = qm->tiers[tier].db_update_every; - - if(!db_metric_handle || !first_t || !last_t || !update_every) { - weight[tier] = -LONG_MAX; + time_t first_time_s = qm->tiers[tier].db_first_time_s; + time_t last_time_s = qm->tiers[tier].db_last_time_s; + time_t update_every_s = qm->tiers[tier].db_update_every_s; + + if( !db_metric_handle || + !first_time_s || + !last_time_s || + !update_every_s || + first_time_s > before_wanted || + last_time_s < after_wanted + ) { + qm->tiers[tier].weight = -LONG_MAX; continue; } - weight[tier] = query_plan_points_coverage_weight(first_t, last_t, update_every, after_wanted, before_wanted, points_wanted, tier); + internal_fatal(first_time_s > before_wanted || last_time_s < after_wanted, "QUERY: invalid db durations"); + + qm->tiers[tier].weight = query_plan_points_coverage_weight( + min_first_time_s, max_last_time_s, update_every_s, + after_wanted, before_wanted, points_wanted, tier); } size_t best_tier = 0; for(size_t tier = 1; tier < storage_tiers ; tier++) { - if(weight[tier] >= weight[best_tier]) + if(qm->tiers[tier].weight >= qm->tiers[best_tier].weight) best_tier = tier; } @@ -788,38 +816,38 @@ static size_t rrddim_find_best_tier_for_timeframe(QUERY_TARGET *qt, time_t after for(size_t tier = 0; tier < storage_tiers ; tier++) { - time_t common_first_t = 0; - time_t common_last_t = 0; - time_t common_update_every = 0; + time_t common_first_time_s = 0; + time_t common_last_time_s = 0; + time_t common_update_every_s = 0; // find the db time-range for this tier for all metrics for(size_t i = 0, used = qt->query.used; i < used ; i++) { QUERY_METRIC *qm = &qt->query.array[i]; - time_t first_t = qm->tiers[tier].db_first_time_t; - time_t last_t = qm->tiers[tier].db_last_time_t; - time_t update_every = qm->tiers[tier].db_update_every; + time_t first_time_s = qm->tiers[tier].db_first_time_s; + time_t last_time_s = qm->tiers[tier].db_last_time_s; + time_t update_every_s = qm->tiers[tier].db_update_every_s; - if(!first_t || !last_t || !update_every) + if(!first_time_s || !last_time_s || !update_every_s) continue; - if(!common_first_t) - common_first_t = first_t; + if(!common_first_time_s) + common_first_time_s = first_time_s; else - common_first_t = MIN(first_t, common_first_t); + common_first_time_s = MIN(first_time_s, common_first_time_s); - if(!common_last_t) - common_last_t = last_t; + if(!common_last_time_s) + common_last_time_s = last_time_s; else - common_last_t = MAX(last_t, common_last_t); + common_last_time_s = MAX(last_time_s, common_last_time_s); - if(!common_update_every) - common_update_every = update_every; + if(!common_update_every_s) + common_update_every_s = update_every_s; else - common_update_every = MIN(update_every, common_update_every); + common_update_every_s = MIN(update_every_s, common_update_every_s); } - weight[tier] = query_plan_points_coverage_weight(common_first_t, common_last_t, common_update_every, after_wanted, before_wanted, points_wanted, tier); + weight[tier] = query_plan_points_coverage_weight(common_first_time_s, common_last_time_s, common_update_every_s, after_wanted, before_wanted, points_wanted, tier); } size_t best_tier = 0; @@ -842,19 +870,19 @@ static time_t rrdset_find_natural_update_every_for_timeframe(QUERY_TARGET *qt, t best_tier = rrddim_find_best_tier_for_timeframe(qt, after_wanted, before_wanted, points_wanted); // find the db minimum update every for this tier for all metrics - time_t common_update_every = default_rrd_update_every; + time_t common_update_every_s = default_rrd_update_every; for(size_t i = 0, used = qt->query.used; i < used ; i++) { QUERY_METRIC *qm = &qt->query.array[i]; - time_t update_every = qm->tiers[best_tier].db_update_every; + time_t update_every_s = qm->tiers[best_tier].db_update_every_s; if(!i) - common_update_every = update_every; + common_update_every_s = update_every_s; else - common_update_every = MIN(update_every, common_update_every); + common_update_every_s = MIN(update_every_s, common_update_every_s); } - return common_update_every; + return common_update_every_s; } // ---------------------------------------------------------------------------- @@ -888,17 +916,6 @@ QUERY_POINT QUERY_POINT_EMPTY = { #define query_point_set_id(point, point_id) debug_dummy() #endif -typedef struct query_plan_entry { - size_t tier; - time_t after; - time_t before; -} QUERY_PLAN_ENTRY; - -typedef struct query_plan { - size_t entries; - QUERY_PLAN_ENTRY data[RRD_STORAGE_TIERS*2]; -} QUERY_PLAN; - typedef struct query_engine_ops { // configuration RRDR *r; @@ -908,14 +925,15 @@ typedef struct query_engine_ops { TIER_QUERY_FETCH tier_query_fetch; // query planer - QUERY_PLAN plan; size_t current_plan; time_t current_plan_expire_time; + time_t plan_expanded_after; + time_t plan_expanded_before; // storage queries size_t tier; struct query_metric_tier *tier_ptr; - struct storage_engine_query_handle handle; + struct storage_engine_query_handle *handle; STORAGE_POINT (*next_metric)(struct storage_engine_query_handle *handle); int (*is_finished)(struct storage_engine_query_handle *handle); void (*finalize)(struct storage_engine_query_handle *handle); @@ -937,31 +955,128 @@ typedef struct query_engine_ops { // ---------------------------------------------------------------------------- // query planer -#define query_plan_should_switch_plan(ops, now) ((now) >= (ops).current_plan_expire_time) +#define query_plan_should_switch_plan(ops, now) ((now) >= (ops)->current_plan_expire_time) + +static size_t query_planer_expand_duration_in_points(time_t this_update_every, time_t next_update_every) { + + time_t delta = this_update_every - next_update_every; + if(delta < 0) delta = -delta; + + size_t points; + if(delta < this_update_every * POINTS_TO_EXPAND_QUERY) + points = POINTS_TO_EXPAND_QUERY; + else + points = (delta + this_update_every - 1) / this_update_every; + + return points; +} -static void query_planer_activate_plan(QUERY_ENGINE_OPS *ops, size_t plan_id, time_t overwrite_after) { - if(unlikely(plan_id >= ops->plan.entries)) - plan_id = ops->plan.entries - 1; +static void query_planer_initialize_plans(QUERY_ENGINE_OPS *ops) { + QUERY_METRIC *qm = ops->qm; + + for(size_t p = 0; p < qm->plan.used ; p++) { + size_t tier = qm->plan.array[p].tier; + time_t update_every = qm->tiers[tier].db_update_every_s; + + size_t points_to_add_to_after; + if(p > 0) { + // there is another plan before to this + + size_t tier0 = qm->plan.array[p - 1].tier; + time_t update_every0 = qm->tiers[tier0].db_update_every_s; + + points_to_add_to_after = query_planer_expand_duration_in_points(update_every, update_every0); + } + else + points_to_add_to_after = (tier == 0) ? 0 : POINTS_TO_EXPAND_QUERY; - time_t after = ops->plan.data[plan_id].after; - time_t before = ops->plan.data[plan_id].before; + size_t points_to_add_to_before; + if(p + 1 < qm->plan.used) { + // there is another plan after to this - if(overwrite_after > after && overwrite_after < before) - after = overwrite_after; + size_t tier1 = qm->plan.array[p+1].tier; + time_t update_every1 = qm->tiers[tier1].db_update_every_s; - ops->tier = ops->plan.data[plan_id].tier; - ops->tier_ptr = &ops->qm->tiers[ops->tier]; - ops->tier_ptr->eng->api.query_ops.init(ops->tier_ptr->db_metric_handle, &ops->handle, after, before); - ops->next_metric = ops->tier_ptr->eng->api.query_ops.next_metric; - ops->is_finished = ops->tier_ptr->eng->api.query_ops.is_finished; - ops->finalize = ops->tier_ptr->eng->api.query_ops.finalize; + points_to_add_to_before = query_planer_expand_duration_in_points(update_every, update_every1); + } + else + points_to_add_to_before = POINTS_TO_EXPAND_QUERY; + + time_t after = qm->plan.array[p].after - (time_t)(update_every * points_to_add_to_after); + time_t before = qm->plan.array[p].before + (time_t)(update_every * points_to_add_to_before); + + qm->plan.array[p].expanded_after = after; + qm->plan.array[p].expanded_before = before; + + struct query_metric_tier *tier_ptr = &qm->tiers[tier]; + tier_ptr->eng->api.query_ops.init( + tier_ptr->db_metric_handle, + &qm->plan.array[p].handle, + after, before, + ops->r->internal.qt->request.priority); + + qm->plan.array[p].next_metric = tier_ptr->eng->api.query_ops.next_metric; + qm->plan.array[p].is_finished = tier_ptr->eng->api.query_ops.is_finished; + qm->plan.array[p].finalize = tier_ptr->eng->api.query_ops.finalize; + qm->plan.array[p].initialized = true; + qm->plan.array[p].finalized = false; + } +} + +static void query_planer_finalize_plan(QUERY_ENGINE_OPS *ops, size_t plan_id) { + QUERY_METRIC *qm = ops->qm; + + if(qm->plan.array[plan_id].initialized && !qm->plan.array[plan_id].finalized) { + qm->plan.array[plan_id].finalize(&qm->plan.array[plan_id].handle); + qm->plan.array[plan_id].initialized = false; + qm->plan.array[plan_id].finalized = true; + qm->plan.array[plan_id].next_metric = NULL; + qm->plan.array[plan_id].is_finished = NULL; + qm->plan.array[plan_id].finalize = NULL; + + if(ops->current_plan == plan_id) { + ops->next_metric = NULL; + ops->is_finished = NULL; + ops->finalize = NULL; + } + } +} + +static void query_planer_finalize_remaining_plans(QUERY_ENGINE_OPS *ops) { + QUERY_METRIC *qm = ops->qm; + + for(size_t p = 0; p < qm->plan.used ; p++) + query_planer_finalize_plan(ops, p); +} + +static void query_planer_activate_plan(QUERY_ENGINE_OPS *ops, size_t plan_id, time_t overwrite_after __maybe_unused) { + QUERY_METRIC *qm = ops->qm; + + internal_fatal(plan_id >= qm->plan.used, "QUERY: invalid plan_id given"); + internal_fatal(!qm->plan.array[plan_id].initialized, "QUERY: plan has not been initialized"); + internal_fatal(qm->plan.array[plan_id].finalized, "QUERY: plan has been finalized"); + + internal_fatal(qm->plan.array[plan_id].after > qm->plan.array[plan_id].before, "QUERY: flipped after/before"); + + ops->tier = qm->plan.array[plan_id].tier; + ops->tier_ptr = &qm->tiers[ops->tier]; + ops->handle = &qm->plan.array[plan_id].handle; + ops->next_metric = qm->plan.array[plan_id].next_metric; + ops->is_finished = qm->plan.array[plan_id].is_finished; + ops->finalize = qm->plan.array[plan_id].finalize; ops->current_plan = plan_id; - ops->current_plan_expire_time = ops->plan.data[plan_id].before; + + if(plan_id + 1 < qm->plan.used && qm->plan.array[plan_id + 1].after < qm->plan.array[plan_id].before) + ops->current_plan_expire_time = qm->plan.array[plan_id + 1].after; + else + ops->current_plan_expire_time = qm->plan.array[plan_id].before; + + ops->plan_expanded_after = qm->plan.array[plan_id].expanded_after; + ops->plan_expanded_before = qm->plan.array[plan_id].expanded_before; } -static void query_planer_next_plan(QUERY_ENGINE_OPS *ops, time_t now, time_t last_point_end_time) { - internal_error(now < ops->current_plan_expire_time && now < ops->plan.data[ops->current_plan].before, - "QUERY: switching query plan too early!"); +static bool query_planer_next_plan(QUERY_ENGINE_OPS *ops, time_t now, time_t last_point_end_time) { + QUERY_METRIC *qm = ops->qm; size_t old_plan = ops->current_plan; @@ -969,32 +1084,26 @@ static void query_planer_next_plan(QUERY_ENGINE_OPS *ops, time_t now, time_t las do { ops->current_plan++; - if (ops->current_plan >= ops->plan.entries) { + if (ops->current_plan >= qm->plan.used) { ops->current_plan = old_plan; ops->current_plan_expire_time = ops->r->internal.qt->window.before; // let the query run with current plan // we will not switch it - return; + return false; } - next_plan_before_time = ops->plan.data[ops->current_plan].before; + next_plan_before_time = qm->plan.array[ops->current_plan].before; } while(now >= next_plan_before_time || last_point_end_time >= next_plan_before_time); - if(!query_metric_is_valid_tier(ops->qm, ops->plan.data[ops->current_plan].tier)) { + if(!query_metric_is_valid_tier(qm, qm->plan.array[ops->current_plan].tier)) { ops->current_plan = old_plan; ops->current_plan_expire_time = ops->r->internal.qt->window.before; - return; - } - - if(ops->finalize) { - ops->finalize(&ops->handle); - ops->finalize = NULL; - ops->is_finished = NULL; + return false; } - // internal_error(true, "QUERY: switched plan to %zu (all is %zu), previous expiration was %ld, this starts at %ld, now is %ld, last_point_end_time %ld", ops->current_plan, ops->plan.entries, ops->plan.data[ops->current_plan-1].before, ops->plan.data[ops->current_plan].after, now, last_point_end_time); - + query_planer_finalize_plan(ops, old_plan); query_planer_activate_plan(ops, ops->current_plan, MIN(now, last_point_end_time)); + return true; } static int compare_query_plan_entries_on_start_time(const void *a, const void *b) { @@ -1004,59 +1113,66 @@ static int compare_query_plan_entries_on_start_time(const void *a, const void *b } static bool query_plan(QUERY_ENGINE_OPS *ops, time_t after_wanted, time_t before_wanted, size_t points_wanted) { - //BUFFER *wb = buffer_create(1000); - //buffer_sprintf(wb, "QUERY PLAN for chart '%s' dimension '%s', from %ld to %ld:", rd->rrdset->name, rd->name, after_wanted, before_wanted); + QUERY_METRIC *qm = ops->qm; // put our selected tier as the first plan size_t selected_tier; if(ops->r->internal.query_options & RRDR_OPTION_SELECTED_TIER && ops->r->internal.qt->window.tier < storage_tiers - && query_metric_is_valid_tier(ops->qm, ops->r->internal.qt->window.tier)) { + && query_metric_is_valid_tier(qm, ops->r->internal.qt->window.tier)) { selected_tier = ops->r->internal.qt->window.tier; } else { - selected_tier = query_metric_best_tier_for_timeframe(ops->qm, after_wanted, before_wanted, points_wanted); + selected_tier = query_metric_best_tier_for_timeframe(qm, after_wanted, before_wanted, points_wanted); if(ops->r->internal.query_options & RRDR_OPTION_SELECTED_TIER) ops->r->internal.query_options &= ~RRDR_OPTION_SELECTED_TIER; + + if(!query_metric_is_valid_tier(qm, selected_tier)) + return false; + + if(qm->tiers[selected_tier].db_first_time_s > before_wanted || + qm->tiers[selected_tier].db_last_time_s < after_wanted) + return false; } - ops->plan.entries = 1; - ops->plan.data[0].tier = selected_tier; - ops->plan.data[0].after = ops->qm->tiers[selected_tier].db_first_time_t; - ops->plan.data[0].before = ops->qm->tiers[selected_tier].db_last_time_t; + qm->plan.used = 1; + qm->plan.array[0].tier = selected_tier; + qm->plan.array[0].after = (qm->tiers[selected_tier].db_first_time_s < after_wanted) ? after_wanted : qm->tiers[selected_tier].db_first_time_s; + qm->plan.array[0].before = (qm->tiers[selected_tier].db_last_time_s > before_wanted) ? before_wanted : qm->tiers[selected_tier].db_last_time_s; if(!(ops->r->internal.query_options & RRDR_OPTION_SELECTED_TIER)) { // the selected tier - time_t selected_tier_first_time_t = ops->plan.data[0].after; - time_t selected_tier_last_time_t = ops->plan.data[0].before; - - //buffer_sprintf(wb, ": SELECTED tier %zu, from %ld to %ld", selected_tier, ops->plan.data[0].after, ops->plan.data[0].before); + time_t selected_tier_first_time_s = qm->plan.array[0].after; + time_t selected_tier_last_time_s = qm->plan.array[0].before; // check if our selected tier can start the query - if (selected_tier_first_time_t > after_wanted) { + if (selected_tier_first_time_s > after_wanted) { // we need some help from other tiers for (size_t tr = (int)selected_tier + 1; tr < storage_tiers; tr++) { - if(!query_metric_is_valid_tier(ops->qm, tr)) + if(!query_metric_is_valid_tier(qm, tr)) continue; // find the first time of this tier - time_t first_time_t = ops->qm->tiers[tr].db_first_time_t; - - //buffer_sprintf(wb, ": EVAL AFTER tier %d, %ld", tier, first_time_t); + time_t tier_first_time_s = qm->tiers[tr].db_first_time_s; // can it help? - if (first_time_t < selected_tier_first_time_t) { + if (tier_first_time_s < selected_tier_first_time_s) { // it can help us add detail at the beginning of the query QUERY_PLAN_ENTRY t = { .tier = tr, - .after = (first_time_t < after_wanted) ? after_wanted : first_time_t, - .before = selected_tier_first_time_t}; - ops->plan.data[ops->plan.entries++] = t; + .after = (tier_first_time_s < after_wanted) ? after_wanted : tier_first_time_s, + .before = selected_tier_first_time_s, + .initialized = false, + .finalized = false, + }; + qm->plan.array[qm->plan.used++] = t; + + internal_fatal(!t.after || !t.before, "QUERY: invalid plan selected"); // prepare for the tier - selected_tier_first_time_t = t.after; + selected_tier_first_time_s = t.after; if (t.after <= after_wanted) break; @@ -1065,28 +1181,33 @@ static bool query_plan(QUERY_ENGINE_OPS *ops, time_t after_wanted, time_t before } // check if our selected tier can finish the query - if (selected_tier_last_time_t < before_wanted) { + if (selected_tier_last_time_s < before_wanted) { // we need some help from other tiers for (int tr = (int)selected_tier - 1; tr >= 0; tr--) { - if(!query_metric_is_valid_tier(ops->qm, tr)) + if(!query_metric_is_valid_tier(qm, tr)) continue; // find the last time of this tier - time_t last_time_t = ops->qm->tiers[tr].db_last_time_t; + time_t tier_last_time_s = qm->tiers[tr].db_last_time_s; - //buffer_sprintf(wb, ": EVAL BEFORE tier %d, %ld", tier, last_time_t); + //buffer_sprintf(wb, ": EVAL BEFORE tier %d, %ld", tier, last_time_s); // can it help? - if (last_time_t > selected_tier_last_time_t) { + if (tier_last_time_s > selected_tier_last_time_s) { // it can help us add detail at the end of the query QUERY_PLAN_ENTRY t = { .tier = tr, - .after = selected_tier_last_time_t, - .before = (last_time_t > before_wanted) ? before_wanted : last_time_t}; - ops->plan.data[ops->plan.entries++] = t; + .after = selected_tier_last_time_s, + .before = (tier_last_time_s > before_wanted) ? before_wanted : tier_last_time_s, + .initialized = false, + .finalized = false, + }; + qm->plan.array[qm->plan.used++] = t; // prepare for the tier - selected_tier_last_time_t = t.before; + selected_tier_last_time_s = t.before; + + internal_fatal(!t.after || !t.before, "QUERY: invalid plan selected"); if (t.before >= before_wanted) break; @@ -1096,26 +1217,21 @@ static bool query_plan(QUERY_ENGINE_OPS *ops, time_t after_wanted, time_t before } // sort the query plan - if(ops->plan.entries > 1) - qsort(&ops->plan.data, ops->plan.entries, sizeof(QUERY_PLAN_ENTRY), compare_query_plan_entries_on_start_time); + if(qm->plan.used > 1) + qsort(&qm->plan.array, qm->plan.used, sizeof(QUERY_PLAN_ENTRY), compare_query_plan_entries_on_start_time); - // make sure it has the whole timeframe we need - if(ops->plan.data[0].after < after_wanted) - ops->plan.data[0].after = after_wanted; - - if(ops->plan.data[ops->plan.entries - 1].before > before_wanted) - ops->plan.data[ops->plan.entries - 1].before = before_wanted; - - //buffer_sprintf(wb, ": FINAL STEPS %zu", ops->plan.entries); - - //for(size_t i = 0; i < ops->plan.entries ;i++) - // buffer_sprintf(wb, ": STEP %zu = use tier %zu from %ld to %ld", i+1, ops->plan.data[i].tier, ops->plan.data[i].after, ops->plan.data[i].before); - - //internal_error(true, "%s", buffer_tostring(wb)); - - if(!query_metric_is_valid_tier(ops->qm, ops->plan.data[0].tier)) + if(!query_metric_is_valid_tier(qm, qm->plan.array[0].tier)) return false; +#ifdef NETDATA_INTERNAL_CHECKS + for(size_t p = 0; p < qm->plan.used ;p++) { + internal_fatal(qm->plan.array[p].after > qm->plan.array[p].before, "QUERY: flipped after/before"); + internal_fatal(qm->plan.array[p].after < after_wanted, "QUERY: too small plan first time"); + internal_fatal(qm->plan.array[p].before > before_wanted, "QUERY: too big plan last time"); + } +#endif + + query_planer_initialize_plans(ops); query_planer_activate_plan(ops, 0, 0); return true; @@ -1146,24 +1262,45 @@ static bool query_plan(QUERY_ENGINE_OPS *ops, time_t after_wanted, time_t before #define query_add_point_to_group(r, point, ops) do { \ if(likely(netdata_double_isnumber((point).value))) { \ if(likely(fpclassify((point).value) != FP_ZERO)) \ - (ops).group_points_non_zero++; \ + (ops)->group_points_non_zero++; \ \ if(unlikely((point).flags & SN_FLAG_RESET)) \ - (ops).group_value_flags |= RRDR_VALUE_RESET; \ + (ops)->group_value_flags |= RRDR_VALUE_RESET; \ \ - (ops).grouping_add(r, (point).value); \ + (ops)->grouping_add(r, (point).value); \ } \ \ - (ops).group_points_added++; \ - (ops).group_anomaly_rate += (point).anomaly; \ + (ops)->group_points_added++; \ + (ops)->group_anomaly_rate += (point).anomaly; \ } while(0) -static inline void rrd2rrdr_do_dimension(RRDR *r, size_t dim_id_in_rrdr) { +static QUERY_ENGINE_OPS *rrd2rrdr_query_prep(RRDR *r, size_t dim_id_in_rrdr) { QUERY_TARGET *qt = r->internal.qt; - QUERY_METRIC *qm = &qt->query.array[dim_id_in_rrdr]; + + QUERY_ENGINE_OPS *ops = onewayalloc_mallocz(r->internal.owa, sizeof(QUERY_ENGINE_OPS)); + *ops = (QUERY_ENGINE_OPS) { + .r = r, + .qm = &qt->query.array[dim_id_in_rrdr], + .grouping_add = r->internal.grouping_add, + .grouping_flush = r->internal.grouping_flush, + .tier_query_fetch = r->internal.tier_query_fetch, + .view_update_every = r->update_every, + .query_granularity = (time_t)(r->update_every / r->group), + .group_value_flags = RRDR_VALUE_NOTHING, + }; + + if(!query_plan(ops, qt->window.after, qt->window.before, qt->window.points)) + return NULL; + + return ops; +} + +static void rrd2rrdr_query_execute(RRDR *r, size_t dim_id_in_rrdr, QUERY_ENGINE_OPS *ops) { + QUERY_TARGET *qt = r->internal.qt; + QUERY_METRIC *qm = &qt->query.array[dim_id_in_rrdr]; (void)qm; size_t points_wanted = qt->window.points; time_t after_wanted = qt->window.after; - time_t before_wanted = qt->window.before; + time_t before_wanted = qt->window.before; (void)before_wanted; // bool debug_this = false; // if(strcmp("user", string2str(rd->id)) == 0 && strcmp("system.cpu", string2str(rd->rrdset->id)) == 0) @@ -1174,39 +1311,30 @@ static inline void rrd2rrdr_do_dimension(RRDR *r, size_t dim_id_in_rrdr) { size_t points_added = 0; - QUERY_ENGINE_OPS ops = { - .r = r, - .qm = qm, - .grouping_add = r->internal.grouping_add, - .grouping_flush = r->internal.grouping_flush, - .tier_query_fetch = r->internal.tier_query_fetch, - .view_update_every = r->update_every, - .query_granularity = (time_t)(r->update_every / r->group), - .group_value_flags = RRDR_VALUE_NOTHING - }; - long rrdr_line = -1; bool use_anomaly_bit_as_value = (r->internal.query_options & RRDR_OPTION_ANOMALY_BIT) ? true : false; - if(!query_plan(&ops, after_wanted, before_wanted, points_wanted)) - return; - NETDATA_DOUBLE min = r->min, max = r->max; QUERY_POINT last2_point = QUERY_POINT_EMPTY; QUERY_POINT last1_point = QUERY_POINT_EMPTY; QUERY_POINT new_point = QUERY_POINT_EMPTY; - time_t now_start_time = after_wanted - ops.query_granularity; - time_t now_end_time = after_wanted + ops.view_update_every - ops.query_granularity; + // ONE POINT READ-AHEAD + // when we switch plans, we read-ahead a point from the next plan + // to join them smoothly at the exact time the next plan begins + STORAGE_POINT next1_point = STORAGE_POINT_UNSET; + + time_t now_start_time = after_wanted - ops->query_granularity; + time_t now_end_time = after_wanted + ops->view_update_every - ops->query_granularity; size_t db_points_read_since_plan_switch = 0; (void)db_points_read_since_plan_switch; // The main loop, based on the query granularity we need - for( ; points_added < points_wanted ; now_start_time = now_end_time, now_end_time += ops.view_update_every) { + for( ; points_added < points_wanted ; now_start_time = now_end_time, now_end_time += ops->view_update_every) { if(unlikely(query_plan_should_switch_plan(ops, now_end_time))) { - query_planer_next_plan(&ops, now_end_time, new_point.end_time); + query_planer_next_plan(ops, now_end_time, new_point.end_time); db_points_read_since_plan_switch = 0; } @@ -1219,7 +1347,7 @@ static inline void rrd2rrdr_do_dimension(RRDR *r, size_t dim_id_in_rrdr) { last1_point = new_point; } - if(unlikely(ops.is_finished(&ops.handle))) { + if(unlikely(ops->is_finished(ops->handle))) { if(count_same_end_time != 0) { last2_point = last1_point; last1_point = new_point; @@ -1235,29 +1363,62 @@ static inline void rrd2rrdr_do_dimension(RRDR *r, size_t dim_id_in_rrdr) { // fetch the new point { - db_points_read_since_plan_switch++; - STORAGE_POINT sp = ops.next_metric(&ops.handle); + STORAGE_POINT sp; + if(likely(storage_point_is_unset(next1_point))) { + db_points_read_since_plan_switch++; + sp = ops->next_metric(ops->handle); + } + else { + // ONE POINT READ-AHEAD + sp = next1_point; + storage_point_unset(next1_point); + db_points_read_since_plan_switch = 1; + } + + // ONE POINT READ-AHEAD + if(unlikely(query_plan_should_switch_plan(ops, sp.end_time_s) && + query_planer_next_plan(ops, now_end_time, new_point.end_time))) { + + // The end time of the current point, crosses our plans (tiers) + // so, we switched plan (tier) + // + // There are 2 cases now: + // + // A. the entire point of the previous plan is to the future of point from the next plan + // B. part of the point of the previous plan overlaps with the point from the next plan + + STORAGE_POINT sp2 = ops->next_metric(ops->handle); + + if(sp.start_time_s > sp2.start_time_s) + // the point from the previous plan is useless + sp = sp2; + else + // let the query run from the previous plan + // but setting this will also cut off the interpolation + // of the point from the previous plan + next1_point = sp2; + } - ops.db_points_read_per_tier[ops.tier]++; - ops.db_total_points_read++; + ops->db_points_read_per_tier[ops->tier]++; + ops->db_total_points_read++; - new_point.start_time = sp.start_time; - new_point.end_time = sp.end_time; + new_point.start_time = sp.start_time_s; + new_point.end_time = sp.end_time_s; new_point.anomaly = sp.count ? (NETDATA_DOUBLE)sp.anomaly_count * 100.0 / (NETDATA_DOUBLE)sp.count : 0.0; - query_point_set_id(new_point, ops.db_total_points_read); + query_point_set_id(new_point, ops->db_total_points_read); // if(debug_this) // info("QUERY: got point %zu, from time %ld to %ld // now from %ld to %ld // query from %ld to %ld", // new_point.id, new_point.start_time, new_point.end_time, now_start_time, now_end_time, after_wanted, before_wanted); // - // set the right value to the point we got - if(likely(!storage_point_is_unset(sp) && !storage_point_is_empty(sp))) { + // get the right value from the point we got + if(likely(!storage_point_is_unset(sp) && !storage_point_is_gap(sp))) { if(unlikely(use_anomaly_bit_as_value)) new_point.value = new_point.anomaly; else { - switch (ops.tier_query_fetch) { + switch (ops->tier_query_fetch) { default: case TIER_QUERY_FETCH_AVERAGE: new_point.value = sp.sum / sp.count; @@ -1284,19 +1445,30 @@ static inline void rrd2rrdr_do_dimension(RRDR *r, size_t dim_id_in_rrdr) { } // check if the db is giving us zero duration points - if(unlikely(new_point.start_time == new_point.end_time)) { - internal_error(true, "QUERY: '%s', dimension '%s' next_metric() returned point %zu start time %ld, end time %ld, that are both equal", - qt->id, string2str(qm->dimension.id), new_point.id, new_point.start_time, new_point.end_time); + if(unlikely(db_points_read_since_plan_switch > 1 && + new_point.start_time == new_point.end_time)) { - new_point.start_time = new_point.end_time - ops.tier_ptr->db_update_every; + internal_error(true, "QUERY: '%s', dimension '%s' next_metric() returned " + "point %zu from %ld to %ld, that are both equal", + qt->id, string2str(qm->dimension.id), + new_point.id, new_point.start_time, new_point.end_time); + + new_point.start_time = new_point.end_time - ops->tier_ptr->db_update_every_s; } // check if the db is advancing the query - if(unlikely(new_point.end_time <= last1_point.end_time)) { - internal_error(db_points_read_since_plan_switch > 1, - "QUERY: '%s', dimension '%s' next_metric() returned point %zu from %ld to %ld, before the last point %zu from %ld to %ld, now is %ld to %ld", - qt->id, string2str(qm->dimension.id), new_point.id, new_point.start_time, new_point.end_time, - last1_point.id, last1_point.start_time, last1_point.end_time, now_start_time, now_end_time); + if(unlikely(db_points_read_since_plan_switch > 1 && + new_point.end_time <= last1_point.end_time)) { + + internal_error(true, + "QUERY: '%s', dimension '%s' next_metric() returned " + "point %zu from %ld to %ld, before the " + "last point %zu from %ld to %ld, " + "now is %ld to %ld", + qt->id, string2str(qm->dimension.id), + new_point.id, new_point.start_time, new_point.end_time, + last1_point.id, last1_point.start_time, last1_point.end_time, + now_start_time, now_end_time); count_same_end_time++; continue; @@ -1321,12 +1493,16 @@ static inline void rrd2rrdr_do_dimension(RRDR *r, size_t dim_id_in_rrdr) { // at exactly the time we will want // we only log if this is not point 1 - internal_error(new_point.end_time < after_wanted && new_point.id > 1, - "QUERY: '%s', dimension '%s' next_metric() returned point %zu from %ld time %ld, which is entirely before our current timeframe %ld to %ld (and before the entire query, after %ld, before %ld)", + internal_error(new_point.end_time < ops->plan_expanded_after && + db_points_read_since_plan_switch > 1, + "QUERY: '%s', dimension '%s' next_metric() " + "returned point %zu from %ld time %ld, " + "which is entirely before our current timeframe %ld to %ld " + "(and before the entire query, after %ld, before %ld)", qt->id, string2str(qm->dimension.id), new_point.id, new_point.start_time, new_point.end_time, now_start_time, now_end_time, - after_wanted, before_wanted); + ops->plan_expanded_after, ops->plan_expanded_before); } } @@ -1339,20 +1515,31 @@ static inline void rrd2rrdr_do_dimension(RRDR *r, size_t dim_id_in_rrdr) { if(unlikely(count_same_end_time)) { internal_error(true, - "QUERY: '%s', dimension '%s', the database does not advance the query, it returned an end time less or equal to the end time of the last point we got %ld, %zu times", - qt->id, string2str(qm->dimension.id), last1_point.end_time, count_same_end_time); + "QUERY: '%s', dimension '%s', the database does not advance the query," + " it returned an end time less or equal to the end time of the last " + "point we got %ld, %zu times", + qt->id, string2str(qm->dimension.id), + last1_point.end_time, count_same_end_time); if(unlikely(new_point.end_time <= last1_point.end_time)) new_point.end_time = now_end_time; } + time_t stop_time = new_point.end_time; + if(unlikely(!storage_point_is_unset(next1_point))) { + // ONE POINT READ-AHEAD + // the point crosses the start time of the + // read ahead storage point we have read + stop_time = next1_point.start_time_s; + } + // the inner loop // we have 3 points in memory: last2, last1, new // we select the one to use based on their timestamps size_t iterations = 0; - for ( ; now_end_time <= new_point.end_time && points_added < points_wanted ; - now_end_time += ops.view_update_every, iterations++) { + for ( ; now_end_time <= stop_time && points_added < points_wanted ; + now_end_time += ops->view_update_every, iterations++) { // now_start_time is wrong in this loop // but, we don't need it @@ -1411,20 +1598,20 @@ static inline void rrd2rrdr_do_dimension(RRDR *r, size_t dim_id_in_rrdr) { RRDR_VALUE_FLAGS *rrdr_value_options_ptr = &r->o[rrdr_o_v_index]; // update the dimension options - if(likely(ops.group_points_non_zero)) + if(likely(ops->group_points_non_zero)) r->od[dim_id_in_rrdr] |= RRDR_DIMENSION_NONZERO; // store the specific point options - *rrdr_value_options_ptr = ops.group_value_flags; + *rrdr_value_options_ptr = ops->group_value_flags; // store the group value - NETDATA_DOUBLE group_value = ops.grouping_flush(r, rrdr_value_options_ptr); + NETDATA_DOUBLE group_value = ops->grouping_flush(r, rrdr_value_options_ptr); r->v[rrdr_o_v_index] = group_value; // we only store uint8_t anomaly rates, // so let's get double precision by storing // anomaly rates in the range 0 - 200 - r->ar[rrdr_o_v_index] = ops.group_anomaly_rate / (NETDATA_DOUBLE)ops.group_points_added; + r->ar[rrdr_o_v_index] = ops->group_anomaly_rate / (NETDATA_DOUBLE)ops->group_points_added; if(likely(points_added || dim_id_in_rrdr)) { // find the min/max across all dimensions @@ -1440,72 +1627,71 @@ static inline void rrd2rrdr_do_dimension(RRDR *r, size_t dim_id_in_rrdr) { } points_added++; - ops.group_points_added = 0; - ops.group_value_flags = RRDR_VALUE_NOTHING; - ops.group_points_non_zero = 0; - ops.group_anomaly_rate = 0; + ops->group_points_added = 0; + ops->group_value_flags = RRDR_VALUE_NOTHING; + ops->group_points_non_zero = 0; + ops->group_anomaly_rate = 0; } // the loop above increased "now" by query_granularity, // but the main loop will increase it too, // so, let's undo the last iteration of this loop if(iterations) - now_end_time -= ops.view_update_every; + now_end_time -= ops->view_update_every; } - ops.finalize(&ops.handle); + query_planer_finalize_remaining_plans(ops); r->internal.result_points_generated += points_added; - r->internal.db_points_read += ops.db_total_points_read; + r->internal.db_points_read += ops->db_total_points_read; for(size_t tr = 0; tr < storage_tiers ; tr++) - r->internal.tier_points_read[tr] += ops.db_points_read_per_tier[tr]; + r->internal.tier_points_read[tr] += ops->db_points_read_per_tier[tr]; r->min = min; r->max = max; r->before = max_date; - r->after = min_date - ops.view_update_every + ops.query_granularity; + r->after = min_date - ops->view_update_every + ops->query_granularity; rrdr_done(r, rrdr_line); internal_error(points_added != points_wanted, "QUERY: '%s', dimension '%s', requested %zu points, but RRDR added %zu (%zu db points read).", qt->id, string2str(qm->dimension.id), - (size_t)points_wanted, (size_t)points_added, ops.db_total_points_read); + (size_t)points_wanted, (size_t)points_added, ops->db_total_points_read); } // ---------------------------------------------------------------------------- // fill the gap of a tier void store_metric_at_tier(RRDDIM *rd, size_t tier, struct rrddim_tier *t, STORAGE_POINT sp, usec_t now_ut); -void store_metric_collection_completed(void); -void rrdr_fill_tier_gap_from_smaller_tiers(RRDDIM *rd, size_t tier, time_t now) { +void rrdr_fill_tier_gap_from_smaller_tiers(RRDDIM *rd, size_t tier, time_t now_s) { if(unlikely(tier >= storage_tiers)) return; if(storage_tiers_backfill[tier] == RRD_BACKFILL_NONE) return; - struct rrddim_tier *t = rd->tiers[tier]; + struct rrddim_tier *t = &rd->tiers[tier]; if(unlikely(!t)) return; - time_t latest_time_t = t->query_ops->latest_time(t->db_metric_handle); + time_t latest_time_s = t->query_ops->latest_time_s(t->db_metric_handle); time_t granularity = (time_t)t->tier_grouping * (time_t)rd->update_every; - time_t time_diff = now - latest_time_t; + time_t time_diff = now_s - latest_time_s; // if the user wants only NEW backfilling, and we don't have any data - if(storage_tiers_backfill[tier] == RRD_BACKFILL_NEW && latest_time_t <= 0) return; + if(storage_tiers_backfill[tier] == RRD_BACKFILL_NEW && latest_time_s <= 0) return; // there is really nothing we can do - if(now <= latest_time_t || time_diff < granularity) return; + if(now_s <= latest_time_s || time_diff < granularity) return; struct storage_engine_query_handle handle; // for each lower tier for(int read_tier = (int)tier - 1; read_tier >= 0 ; read_tier--){ - time_t smaller_tier_first_time = rd->tiers[read_tier]->query_ops->oldest_time(rd->tiers[read_tier]->db_metric_handle); - time_t smaller_tier_last_time = rd->tiers[read_tier]->query_ops->latest_time(rd->tiers[read_tier]->db_metric_handle); - if(smaller_tier_last_time <= latest_time_t) continue; // it is as bad as we are + time_t smaller_tier_first_time = rd->tiers[read_tier].query_ops->oldest_time_s(rd->tiers[read_tier].db_metric_handle); + time_t smaller_tier_last_time = rd->tiers[read_tier].query_ops->latest_time_s(rd->tiers[read_tier].db_metric_handle); + if(smaller_tier_last_time <= latest_time_s) continue; // it is as bad as we are - long after_wanted = (latest_time_t < smaller_tier_first_time) ? smaller_tier_first_time : latest_time_t; + long after_wanted = (latest_time_s < smaller_tier_first_time) ? smaller_tier_first_time : latest_time_s; long before_wanted = smaller_tier_last_time; - struct rrddim_tier *tmp = rd->tiers[read_tier]; - tmp->query_ops->init(tmp->db_metric_handle, &handle, after_wanted, before_wanted); + struct rrddim_tier *tmp = &rd->tiers[read_tier]; + tmp->query_ops->init(tmp->db_metric_handle, &handle, after_wanted, before_wanted, STORAGE_PRIORITY_HIGH); size_t points_read = 0; @@ -1514,9 +1700,9 @@ void rrdr_fill_tier_gap_from_smaller_tiers(RRDDIM *rd, size_t tier, time_t now) STORAGE_POINT sp = tmp->query_ops->next_metric(&handle); points_read++; - if(sp.end_time > latest_time_t) { - latest_time_t = sp.end_time; - store_metric_at_tier(rd, tier, t, sp, sp.end_time * USEC_PER_SEC); + if(sp.end_time_s > latest_time_s) { + latest_time_s = sp.end_time_s; + store_metric_at_tier(rd, tier, t, sp, sp.end_time_s * USEC_PER_SEC); } } @@ -1551,12 +1737,12 @@ static void rrd2rrdr_log_request_response_metadata(RRDR *r , const char *msg ) { - time_t first_entry_t = r->internal.qt->db.first_time_t; - time_t last_entry_t = r->internal.qt->db.last_time_t; + time_t first_entry_s = r->internal.qt->db.first_time_s; + time_t last_entry_s = r->internal.qt->db.last_time_s; internal_error( - true, - "rrd2rrdr() on %s update every %ld with %s grouping %s (group: %zu, resampling_time: %ld, resampling_group: %zu), " + true, + "rrd2rrdr() on %s update every %ld with %s grouping %s (group: %zu, resampling_time: %ld, resampling_group: %zu), " "after (got: %ld, want: %ld, req: %ld, db: %ld), " "before (got: %ld, want: %ld, req: %ld, db: %ld), " "duration (got: %ld, want: %ld, req: %ld, db: %ld), " @@ -1576,19 +1762,19 @@ static void rrd2rrdr_log_request_response_metadata(RRDR *r , r->after , after_wanted , after_requested - , first_entry_t + , first_entry_s // before , r->before , before_wanted , before_requested - , last_entry_t + , last_entry_s // duration , (long)(r->before - r->after + r->internal.qt->window.query_granularity) , (long)(before_wanted - after_wanted + r->internal.qt->window.query_granularity) , (long)before_requested - after_requested - , (long)((last_entry_t - first_entry_t) + r->internal.qt->window.query_granularity) + , (long)((last_entry_s - first_entry_s) + r->internal.qt->window.query_granularity) // points , r->rows @@ -1708,7 +1894,7 @@ bool query_target_calculate_window(QUERY_TARGET *qt) { time_t resampling_time_requested = qt->request.resampling_time; RRDR_OPTIONS options = qt->request.options; size_t tier = qt->request.tier; - time_t update_every = qt->db.minimum_latest_update_every; + time_t update_every = qt->db.minimum_latest_update_every_s; // RULES // points_requested = 0 @@ -1763,30 +1949,30 @@ bool query_target_calculate_window(QUERY_TARGET *qt) { if (after_wanted == 0 || before_wanted == 0) { relative_period_requested = true; - time_t first_entry_t = qt->db.first_time_t; - time_t last_entry_t = qt->db.last_time_t; + time_t first_entry_s = qt->db.first_time_s; + time_t last_entry_s = qt->db.last_time_s; - if (first_entry_t == 0 || last_entry_t == 0) { - internal_error(true, "QUERY: no data detected on query '%s' (db first_entry_t = %ld, last_entry_t = %ld", qt->id, first_entry_t, last_entry_t); + if (first_entry_s == 0 || last_entry_s == 0) { + internal_error(true, "QUERY: no data detected on query '%s' (db first_entry_t = %ld, last_entry_t = %ld", qt->id, first_entry_s, last_entry_s); query_debug_log_free(); return false; } - query_debug_log(":first_entry_t %ld, last_entry_t %ld", first_entry_t, last_entry_t); + query_debug_log(":first_entry_t %ld, last_entry_t %ld", first_entry_s, last_entry_s); if (after_wanted == 0) { - after_wanted = first_entry_t; + after_wanted = first_entry_s; query_debug_log(":zero after_wanted %ld", after_wanted); } if (before_wanted == 0) { - before_wanted = last_entry_t; + before_wanted = last_entry_s; before_is_aligned_to_db_end = true; query_debug_log(":zero before_wanted %ld", before_wanted); } if (points_wanted == 0) { - points_wanted = (last_entry_t - first_entry_t) / update_every; + points_wanted = (last_entry_s - first_entry_s) / update_every; query_debug_log(":zero points_wanted %zu", points_wanted); } } @@ -1804,7 +1990,7 @@ bool query_target_calculate_window(QUERY_TARGET *qt) { update_every = rrdset_find_natural_update_every_for_timeframe( qt, after_wanted, before_wanted, points_wanted, options, tier); - if (update_every <= 0) update_every = qt->db.minimum_latest_update_every; + if (update_every <= 0) update_every = qt->db.minimum_latest_update_every_s; query_debug_log(":natural update every %ld", update_every); } @@ -1975,7 +2161,8 @@ RRDR *rrd2rrdr_legacy( ONEWAYALLOC *owa, RRDSET *st, size_t points, time_t after, time_t before, RRDR_GROUPING group_method, time_t resampling_time, RRDR_OPTIONS options, const char *dimensions, - const char *group_options, time_t timeout, size_t tier, QUERY_SOURCE query_source) { + const char *group_options, time_t timeout, size_t tier, QUERY_SOURCE query_source, + STORAGE_PRIORITY priority) { QUERY_TARGET_REQUEST qtr = { .st = st, @@ -1990,6 +2177,7 @@ RRDR *rrd2rrdr_legacy( .timeout = timeout, .tier = tier, .query_source = query_source, + .priority = priority, }; return rrd2rrdr(owa, query_target_create(&qtr)); @@ -2056,16 +2244,48 @@ RRDR *rrd2rrdr(ONEWAYALLOC *owa, QUERY_TARGET *qt) { if (qt->request.timeout) now_realtime_timeval(&query_start_time); + size_t last_db_points_read = 0; + size_t last_result_points_generated = 0; + + QUERY_ENGINE_OPS **ops = onewayalloc_callocz(r->internal.owa, qt->query.used, sizeof(QUERY_ENGINE_OPS *)); + + size_t capacity = libuv_worker_threads * 2; + size_t max_queries_to_prepare = (qt->query.used > (capacity - 1)) ? (capacity - 1) : qt->query.used; + size_t queries_prepared = 0; + while(queries_prepared < max_queries_to_prepare) { + // preload another query + ops[queries_prepared] = rrd2rrdr_query_prep(r, queries_prepared); + queries_prepared++; + } + for(size_t c = 0, max = qt->query.used; c < max ; c++) { + + if(queries_prepared < max) { + // preload another query + ops[queries_prepared] = rrd2rrdr_query_prep(r, queries_prepared); + queries_prepared++; + } + // set the query target dimension options to rrdr r->od[c] = qt->query.array[c].dimension.options; - r->od[c] |= RRDR_DIMENSION_SELECTED; - // reset the grouping for the new dimension r->internal.grouping_reset(r); - rrd2rrdr_do_dimension(r, c); + if(ops[c]) { + r->od[c] |= RRDR_DIMENSION_SELECTED; + rrd2rrdr_query_execute(r, c, ops[c]); + } + + global_statistics_rrdr_query_completed( + 1, + r->internal.db_points_read - last_db_points_read, + r->internal.result_points_generated - last_result_points_generated, + qt->request.query_source); + + last_db_points_read = r->internal.db_points_read; + last_result_points_generated = r->internal.result_points_generated; + if (qt->request.timeout) now_realtime_timeval(&query_current_time); @@ -2106,6 +2326,12 @@ RRDR *rrd2rrdr(ONEWAYALLOC *owa, QUERY_TARGET *qt) { log_access("QUERY CANCELED RUNTIME EXCEEDED %0.2f ms (LIMIT %lld ms)", (NETDATA_DOUBLE)dt_usec(&query_start_time, &query_current_time) / 1000.0, (long long)qt->request.timeout); r->result_options |= RRDR_RESULT_OPTION_CANCEL; + + for(size_t i = c + 1; i < queries_prepared ; i++) { + if(ops[i]) + query_planer_finalize_remaining_plans(ops[i]); + } + break; } } @@ -2169,7 +2395,5 @@ RRDR *rrd2rrdr(ONEWAYALLOC *owa, QUERY_TARGET *qt) { } } - global_statistics_rrdr_query_completed(dimensions_used, r->internal.db_points_read, - r->internal.result_points_generated, qt->request.query_source); return r; } diff --git a/web/api/queries/rrdr.h b/web/api/queries/rrdr.h index 6151cddc7..e31a98099 100644 --- a/web/api/queries/rrdr.h +++ b/web/api/queries/rrdr.h @@ -40,7 +40,8 @@ typedef enum rrdr_options { RRDR_OPTION_RETURN_RAW = 0x00100000, // Return raw data for aggregating across multiple nodes RRDR_OPTION_RETURN_JWAR = 0x00200000, // Return anomaly rates in jsonwrap RRDR_OPTION_SELECTED_TIER = 0x00400000, // Use the selected tier for the query - RRDR_OPTION_ALL_DIMENSIONS = 0x00800000, // Return the full dimensions list + RRDR_OPTION_ALL_DIMENSIONS = 0x00800000, // Return the full dimensions list + RRDR_OPTION_SHOW_PLAN = 0x01000000, // Return the query plan in jsonwrap // internal ones - not to be exposed to the API RRDR_OPTION_INTERNAL_AR = 0x10000000, // internal use only, to let the formatters we want to render the anomaly rate @@ -138,7 +139,8 @@ RRDR *rrd2rrdr_legacy( ONEWAYALLOC *owa, RRDSET *st, size_t points, time_t after, time_t before, RRDR_GROUPING group_method, time_t resampling_time, RRDR_OPTIONS options, const char *dimensions, - const char *group_options, time_t timeout, size_t tier, QUERY_SOURCE query_source); + const char *group_options, time_t timeout, size_t tier, QUERY_SOURCE query_source, + STORAGE_PRIORITY priority); RRDR *rrd2rrdr(ONEWAYALLOC *owa, struct query_target *qt); bool query_target_calculate_window(struct query_target *qt); diff --git a/web/api/queries/weights.c b/web/api/queries/weights.c index a9555a66b..dc98aeedf 100644 --- a/web/api/queries/weights.c +++ b/web/api/queries/weights.c @@ -520,6 +520,7 @@ NETDATA_DOUBLE *rrd2rrdr_ks2( .group_options = group_options, .tier = tier, .query_source = QUERY_SOURCE_API_WEIGHTS, + .priority = STORAGE_PRIORITY_NORMAL, }; RRDR *r = rrd2rrdr(owa, query_target_create(&qtr)); @@ -638,7 +639,9 @@ static void rrdset_metric_correlations_volume( options |= RRDR_OPTION_MATCH_IDS | RRDR_OPTION_ABSOLUTE | RRDR_OPTION_NATURAL_POINTS; - QUERY_VALUE baseline_average = rrdmetric2value(host, rca, ria, rma, baseline_after, baseline_before, options, group_method, group_options, tier, 0, QUERY_SOURCE_API_WEIGHTS); + QUERY_VALUE baseline_average = rrdmetric2value(host, rca, ria, rma, baseline_after, baseline_before, + options, group_method, group_options, tier, 0, + QUERY_SOURCE_API_WEIGHTS, STORAGE_PRIORITY_NORMAL); merge_query_value_to_stats(&baseline_average, stats); if(!netdata_double_isnumber(baseline_average.value)) { @@ -646,7 +649,9 @@ static void rrdset_metric_correlations_volume( baseline_average.value = 0.0; } - QUERY_VALUE highlight_average = rrdmetric2value(host, rca, ria, rma, after, before, options, group_method, group_options, tier, 0, QUERY_SOURCE_API_WEIGHTS); + QUERY_VALUE highlight_average = rrdmetric2value(host, rca, ria, rma, after, before, + options, group_method, group_options, tier, 0, + QUERY_SOURCE_API_WEIGHTS, STORAGE_PRIORITY_NORMAL); merge_query_value_to_stats(&highlight_average, stats); if(!netdata_double_isnumber(highlight_average.value)) @@ -659,7 +664,9 @@ static void rrdset_metric_correlations_volume( char highlight_countif_options[50 + 1]; snprintfz(highlight_countif_options, 50, "%s" NETDATA_DOUBLE_FORMAT, highlight_average.value < baseline_average.value ? "<" : ">", baseline_average.value); - QUERY_VALUE highlight_countif = rrdmetric2value(host, rca, ria, rma, after, before, options, RRDR_GROUPING_COUNTIF, highlight_countif_options, tier, 0, QUERY_SOURCE_API_WEIGHTS); + QUERY_VALUE highlight_countif = rrdmetric2value(host, rca, ria, rma, after, before, + options, RRDR_GROUPING_COUNTIF, highlight_countif_options, tier, 0, + QUERY_SOURCE_API_WEIGHTS, STORAGE_PRIORITY_NORMAL); merge_query_value_to_stats(&highlight_countif, stats); if(!netdata_double_isnumber(highlight_countif.value)) { @@ -700,7 +707,10 @@ static void rrdset_weights_anomaly_rate( options |= RRDR_OPTION_MATCH_IDS | RRDR_OPTION_ANOMALY_BIT | RRDR_OPTION_NATURAL_POINTS; - QUERY_VALUE qv = rrdmetric2value(host, rca, ria, rma, after, before, options, group_method, group_options, tier, 0, QUERY_SOURCE_API_WEIGHTS); + QUERY_VALUE qv = rrdmetric2value(host, rca, ria, rma, after, before, + options, group_method, group_options, tier, 0, + QUERY_SOURCE_API_WEIGHTS, STORAGE_PRIORITY_NORMAL); + merge_query_value_to_stats(&qv, stats); if(netdata_double_isnumber(qv.value)) diff --git a/web/api/web_api_v1.c b/web/api/web_api_v1.c index 93f501f9e..1b38a33b1 100644 --- a/web/api/web_api_v1.c +++ b/web/api/web_api_v1.c @@ -41,6 +41,7 @@ static struct { , {"natural-points" , 0 , RRDR_OPTION_NATURAL_POINTS} , {"virtual-points" , 0 , RRDR_OPTION_VIRTUAL_POINTS} , {"all-dimensions" , 0 , RRDR_OPTION_ALL_DIMENSIONS} + , {"plan" , 0 , RRDR_OPTION_SHOW_PLAN} , {NULL , 0 , 0} }; @@ -311,7 +312,7 @@ inline int web_client_api_request_v1_alarm_count(RRDHOST *host, struct web_clien else if (!strcmp("CLEAR", value)) status = RRDCALC_STATUS_CLEAR; } else if(!strcmp(name, "context") || !strcmp(name, "ctx")) { - if(!contexts) contexts = buffer_create(255); + if(!contexts) contexts = buffer_create(255, &netdata_buffers_statistics.buffers_api); buffer_strcat(contexts, "|"); buffer_strcat(contexts, value); } @@ -388,7 +389,7 @@ inline int web_client_api_request_single_chart(RRDHOST *host, struct web_client } w->response.data->contenttype = CT_APPLICATION_JSON; - st->last_accessed_time = now_realtime_sec(); + st->last_accessed_time_s = now_realtime_sec(); callback(st, w->response.data); return HTTP_RESP_OK; @@ -459,7 +460,7 @@ static int web_client_api_request_v1_context(RRDHOST *host, struct web_client *w else if(!strcmp(name, "chart_label_key")) chart_label_key = value; else if(!strcmp(name, "chart_labels_filter")) chart_labels_filter = value; else if(!strcmp(name, "dimension") || !strcmp(name, "dim") || !strcmp(name, "dimensions") || !strcmp(name, "dims")) { - if(!dimensions) dimensions = buffer_create(100); + if(!dimensions) dimensions = buffer_create(100, &netdata_buffers_statistics.buffers_api); buffer_strcat(dimensions, "|"); buffer_strcat(dimensions, value); } @@ -520,7 +521,7 @@ static int web_client_api_request_v1_contexts(RRDHOST *host, struct web_client * else if(!strcmp(name, "chart_label_key")) chart_label_key = value; else if(!strcmp(name, "chart_labels_filter")) chart_labels_filter = value; else if(!strcmp(name, "dimension") || !strcmp(name, "dim") || !strcmp(name, "dimensions") || !strcmp(name, "dims")) { - if(!dimensions) dimensions = buffer_create(100); + if(!dimensions) dimensions = buffer_create(100, &netdata_buffers_statistics.buffers_api); buffer_strcat(dimensions, "|"); buffer_strcat(dimensions, value); } @@ -560,18 +561,6 @@ inline int web_client_api_request_v1_charts(RRDHOST *host, struct web_client *w, return HTTP_RESP_OK; } -inline int web_client_api_request_v1_archivedcharts(RRDHOST *host __maybe_unused, struct web_client *w, char *url) { - (void)url; - - buffer_flush(w->response.data); - w->response.data->contenttype = CT_APPLICATION_JSON; -#ifdef ENABLE_DBENGINE - if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) - sql_rrdset2json(host, w->response.data); -#endif - return HTTP_RESP_OK; -} - inline int web_client_api_request_v1_chart(RRDHOST *host, struct web_client *w, char *url) { return web_client_api_request_single_chart(host, w, url, rrd_stats_api_v1_chart); } @@ -637,7 +626,7 @@ inline int web_client_api_request_v1_data(RRDHOST *host, struct web_client *w, c else if(!strcmp(name, "chart_labels_filter")) chart_labels_filter = value; else if(!strcmp(name, "chart")) chart = value; else if(!strcmp(name, "dimension") || !strcmp(name, "dim") || !strcmp(name, "dimensions") || !strcmp(name, "dims")) { - if(!dimensions) dimensions = buffer_create(100); + if(!dimensions) dimensions = buffer_create(100, &netdata_buffers_statistics.buffers_api); buffer_strcat(dimensions, "|"); buffer_strcat(dimensions, value); } @@ -752,6 +741,7 @@ inline int web_client_api_request_v1_data(RRDHOST *host, struct web_client *w, c .chart_label_key = chart_label_key, .charts_labels_filter = chart_labels_filter, .query_source = QUERY_SOURCE_API_DATA, + .priority = STORAGE_PRIORITY_NORMAL, }; qt = query_target_create(&qtr); @@ -1068,15 +1058,13 @@ static inline void web_client_api_request_v1_info_mirrored_hosts(BUFFER *wb) { if (count > 0) buffer_strcat(wb, ",\n"); - netdata_mutex_lock(&host->receiver_lock); buffer_sprintf( wb, "\t\t{ \"guid\": \"%s\", \"hostname\": \"%s\", \"reachable\": %s, \"hops\": %d" , host->machine_guid , rrdhost_hostname(host) - , (host->receiver || host == localhost) ? "true" : "false" + , (host == localhost || !rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN)) ? "true" : "false" , host->system_info ? host->system_info->hops : (host == localhost) ? 0 : 1 ); - netdata_mutex_unlock(&host->receiver_lock); rrdhost_aclk_state_lock(host); if (host->aclk_state.claimed_id) @@ -1510,7 +1498,7 @@ int web_client_api_request_v1_functions(RRDHOST *host, struct web_client *w, cha } #ifndef ENABLE_DBENGINE -int web_client_api_request_v1_dbengine_stats(RRDHOST *host, struct web_client *w, char *url) { +int web_client_api_request_v1_dbengine_stats(RRDHOST *host __maybe_unused, struct web_client *w __maybe_unused, char *url __maybe_unused) { return HTTP_RESP_NOT_FOUND; } #else @@ -1519,12 +1507,6 @@ static void web_client_api_v1_dbengine_stats_for_tier(BUFFER *wb, size_t tier) { buffer_sprintf(wb, "\n\t\t\"default_granularity_secs\":%zu" - ",\n\t\t\"sizeof_metric\":%zu" - ",\n\t\t\"sizeof_metric_in_index\":%zu" - ",\n\t\t\"sizeof_page\":%zu" - ",\n\t\t\"sizeof_page_in_index\":%zu" - ",\n\t\t\"sizeof_extent\":%zu" - ",\n\t\t\"sizeof_page_in_extent\":%zu" ",\n\t\t\"sizeof_datafile\":%zu" ",\n\t\t\"sizeof_page_in_cache\":%zu" ",\n\t\t\"sizeof_point_data\":%zu" @@ -1540,8 +1522,8 @@ static void web_client_api_v1_dbengine_stats_for_tier(BUFFER *wb, size_t tier) { ",\n\t\t\"pages_uncompressed_bytes\":%zu" ",\n\t\t\"pages_duration_secs\":%lld" ",\n\t\t\"single_point_pages\":%zu" - ",\n\t\t\"first_t\":%llu" - ",\n\t\t\"last_t\":%llu" + ",\n\t\t\"first_t\":%ld" + ",\n\t\t\"last_t\":%ld" ",\n\t\t\"database_retention_secs\":%lld" ",\n\t\t\"average_compression_savings\":%0.2f" ",\n\t\t\"average_point_duration_secs\":%0.2f" @@ -1550,16 +1532,9 @@ static void web_client_api_v1_dbengine_stats_for_tier(BUFFER *wb, size_t tier) { ",\n\t\t\"average_page_size_bytes\":%0.2f" ",\n\t\t\"estimated_concurrently_collected_metrics\":%zu" ",\n\t\t\"currently_collected_metrics\":%zu" - ",\n\t\t\"max_concurrently_collected_metrics\":%zu" ",\n\t\t\"disk_space\":%zu" ",\n\t\t\"max_disk_space\":%zu" , stats.default_granularity_secs - , stats.sizeof_metric - , stats.sizeof_metric_in_index - , stats.sizeof_page - , stats.sizeof_page_in_index - , stats.sizeof_extent - , stats.sizeof_page_in_extent , stats.sizeof_datafile , stats.sizeof_page_in_cache , stats.sizeof_point_data @@ -1575,8 +1550,8 @@ static void web_client_api_v1_dbengine_stats_for_tier(BUFFER *wb, size_t tier) { , stats.pages_uncompressed_bytes , (long long)stats.pages_duration_secs , stats.single_point_pages - , stats.first_t - , stats.last_t + , stats.first_time_s + , stats.last_time_s , (long long)stats.database_retention_secs , stats.average_compression_savings , stats.average_point_duration_secs @@ -1585,7 +1560,6 @@ static void web_client_api_v1_dbengine_stats_for_tier(BUFFER *wb, size_t tier) { , stats.average_page_size_bytes , stats.estimated_concurrently_collected_metrics , stats.currently_collected_metrics - , stats.max_concurrently_collected_metrics , stats.disk_space , stats.max_disk_space ); @@ -1634,7 +1608,6 @@ static struct api_command { { "charts", 0, WEB_CLIENT_ACL_DASHBOARD | WEB_CLIENT_ACL_ACLK, web_client_api_request_v1_charts }, { "context", 0, WEB_CLIENT_ACL_DASHBOARD | WEB_CLIENT_ACL_ACLK, web_client_api_request_v1_context }, { "contexts", 0, WEB_CLIENT_ACL_DASHBOARD | WEB_CLIENT_ACL_ACLK, web_client_api_request_v1_contexts }, - { "archivedcharts", 0, WEB_CLIENT_ACL_DASHBOARD | WEB_CLIENT_ACL_ACLK, web_client_api_request_v1_archivedcharts }, // registry checks the ACL by itself, so we allow everything { "registry", 0, WEB_CLIENT_ACL_NOCHECK, web_client_api_request_v1_registry }, diff --git a/web/api/web_api_v1.h b/web/api/web_api_v1.h index e6682c99c..9dd6a1c23 100644 --- a/web/api/web_api_v1.h +++ b/web/api/web_api_v1.h @@ -9,7 +9,6 @@ #include "web/api/health/health_cmdapi.h" #include "web/api/queries/weights.h" -#define MAX_CHART_LABELS_FILTER (32) RRDR_OPTIONS web_client_api_request_v1_data_options(char *o); void web_client_api_request_v1_data_options_to_buffer(BUFFER *wb, RRDR_OPTIONS options); void web_client_api_request_v1_data_options_to_string(char *buf, size_t size, RRDR_OPTIONS options); @@ -24,7 +23,6 @@ int web_client_api_request_single_chart(RRDHOST *host, struct web_client *w, cha int web_client_api_request_v1_alarm_variables(RRDHOST *host, struct web_client *w, char *url); int web_client_api_request_v1_alarm_count(RRDHOST *host, struct web_client *w, char *url); int web_client_api_request_v1_charts(RRDHOST *host, struct web_client *w, char *url); -int web_client_api_request_v1_archivedcharts(RRDHOST *host, struct web_client *w, char *url); int web_client_api_request_v1_chart(RRDHOST *host, struct web_client *w, char *url); int web_client_api_request_v1_data(RRDHOST *host, struct web_client *w, char *url); int web_client_api_request_v1_registry(RRDHOST *host, struct web_client *w, char *url); |