summaryrefslogtreecommitdiffstats
path: root/aclk/aclk_query.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-05-08 16:27:04 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-05-08 16:27:04 +0000
commita836a244a3d2bdd4da1ee2641e3e957850668cea (patch)
treecb87c75b3677fab7144f868435243f864048a1e6 /aclk/aclk_query.c
parentAdding upstream version 1.38.1. (diff)
downloadnetdata-a836a244a3d2bdd4da1ee2641e3e957850668cea.tar.xz
netdata-a836a244a3d2bdd4da1ee2641e3e957850668cea.zip
Adding upstream version 1.39.0.upstream/1.39.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--aclk/aclk_query.c251
1 files changed, 130 insertions, 121 deletions
diff --git a/aclk/aclk_query.c b/aclk/aclk_query.c
index 9eced0811..46d1e1e5e 100644
--- a/aclk/aclk_query.c
+++ b/aclk/aclk_query.c
@@ -3,67 +3,97 @@
#include "aclk_query.h"
#include "aclk_stats.h"
#include "aclk_tx_msgs.h"
+#include "../../web/server/web_client_cache.h"
#define WEB_HDR_ACCEPT_ENC "Accept-Encoding:"
+#define ACLK_MAX_WEB_RESPONSE_SIZE (30 * 1024 * 1024)
pthread_cond_t query_cond_wait = PTHREAD_COND_INITIALIZER;
pthread_mutex_t query_lock_wait = PTHREAD_MUTEX_INITIALIZER;
#define QUERY_THREAD_LOCK pthread_mutex_lock(&query_lock_wait)
#define QUERY_THREAD_UNLOCK pthread_mutex_unlock(&query_lock_wait)
-static usec_t aclk_web_api_v1_request(RRDHOST *host, struct web_client *w, char *url)
-{
- usec_t t;
+struct pending_req_list {
+ const char *msg_id;
+ uint32_t hash;
- t = now_monotonic_high_precision_usec();
- w->response.code = web_client_api_request_v1(host, w, url);
- t = now_monotonic_high_precision_usec() - t;
+ int canceled;
- if (aclk_stats_enabled) {
- ACLK_STATS_LOCK;
- aclk_metrics_per_sample.cloud_q_process_total += t;
- aclk_metrics_per_sample.cloud_q_process_count++;
- if (aclk_metrics_per_sample.cloud_q_process_max < t)
- aclk_metrics_per_sample.cloud_q_process_max = t;
- ACLK_STATS_UNLOCK;
- }
+ struct pending_req_list *next;
+};
+
+static struct pending_req_list *pending_req_list_head = NULL;
+static pthread_mutex_t pending_req_list_lock = PTHREAD_MUTEX_INITIALIZER;
- return t;
+static struct pending_req_list *pending_req_list_add(const char *msg_id)
+{
+ struct pending_req_list *new = callocz(1, sizeof(struct pending_req_list));
+ new->msg_id = msg_id;
+ new->hash = simple_hash(msg_id);
+
+ pthread_mutex_lock(&pending_req_list_lock);
+ new->next = pending_req_list_head;
+ pending_req_list_head = new;
+ pthread_mutex_unlock(&pending_req_list_lock);
+ return new;
}
-static RRDHOST *node_id_2_rrdhost(const char *node_id)
+void pending_req_list_rm(const char *msg_id)
{
- int res;
- uuid_t node_id_bin, host_id_bin;
+ uint32_t hash = simple_hash(msg_id);
+ struct pending_req_list *prev = NULL;
- RRDHOST *host = find_host_by_node_id((char *)node_id);
- if (host)
- return host;
+ pthread_mutex_lock(&pending_req_list_lock);
+ struct pending_req_list *curr = pending_req_list_head;
- char host_id[UUID_STR_LEN];
- if (uuid_parse(node_id, node_id_bin)) {
- error("Couldn't parse UUID %s", node_id);
- return NULL;
+ while (curr) {
+ if (curr->hash == hash && strcmp(curr->msg_id, msg_id) == 0) {
+ if (prev)
+ prev->next = curr->next;
+ else
+ pending_req_list_head = curr->next;
+
+ freez(curr);
+ break;
+ }
+
+ prev = curr;
+ curr = curr->next;
}
- if ((res = get_host_id(&node_id_bin, &host_id_bin))) {
- error("node not found rc=%d", res);
- return NULL;
+ pthread_mutex_unlock(&pending_req_list_lock);
+}
+
+int mark_pending_req_cancelled(const char *msg_id)
+{
+ uint32_t hash = simple_hash(msg_id);
+
+ pthread_mutex_lock(&pending_req_list_lock);
+ struct pending_req_list *curr = pending_req_list_head;
+
+ while (curr) {
+ if (curr->hash == hash && strcmp(curr->msg_id, msg_id) == 0) {
+ curr->canceled = 1;
+ pthread_mutex_unlock(&pending_req_list_lock);
+ return 0;
+ }
+
+ curr = curr->next;
}
- uuid_unparse_lower(host_id_bin, host_id);
- return rrdhost_find_by_guid(host_id);
+ pthread_mutex_unlock(&pending_req_list_lock);
+ return 1;
}
-#define NODE_ID_QUERY "/node/"
-// TODO this function should be quarantied and written nicely
-// lots of skeletons from initial ACLK Legacy impl.
-// quick and dirty from the start
-static int http_api_v2(struct aclk_query_thread *query_thr, aclk_query_t query)
+static bool aclk_web_client_interrupt_cb(struct web_client *w __maybe_unused, void *data)
{
+ struct pending_req_list *req = (struct pending_req_list *)data;
+ return req->canceled;
+}
+
+static int http_api_v2(struct aclk_query_thread *query_thr, aclk_query_t query) {
int retval = 0;
- usec_t t;
BUFFER *local_buffer = NULL;
- BUFFER *log_buffer = buffer_create(NETDATA_WEB_REQUEST_URL_SIZE, &netdata_buffers_statistics.buffers_aclk);
- RRDHOST *query_host = localhost;
+ size_t size = 0;
+ size_t sent = 0;
#ifdef NETDATA_WITH_ZLIB
int z_ret;
@@ -71,73 +101,55 @@ static int http_api_v2(struct aclk_query_thread *query_thr, aclk_query_t query)
char *start, *end;
#endif
- struct web_client *w = (struct web_client *)callocz(1, sizeof(struct web_client));
- w->response.data = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE, &netdata_buffers_statistics.buffers_aclk);
- w->response.header = buffer_create(NETDATA_WEB_RESPONSE_HEADER_SIZE, &netdata_buffers_statistics.buffers_aclk);
- w->response.header_output = buffer_create(NETDATA_WEB_RESPONSE_HEADER_SIZE, &netdata_buffers_statistics.buffers_aclk);
- strcpy(w->origin, "*"); // Simulate web_client_create_on_fd()
- w->cookie1[0] = 0; // Simulate web_client_create_on_fd()
- w->cookie2[0] = 0; // Simulate web_client_create_on_fd()
+ struct web_client *w = web_client_get_from_cache();
w->acl = WEB_CLIENT_ACL_ACLK;
+ w->mode = WEB_CLIENT_MODE_GET;
+ w->timings.tv_in = query->created_tv;
- buffer_strcat(log_buffer, query->data.http_api_v2.query);
- size_t size = 0;
- size_t sent = 0;
- w->tv_in = query->created_tv;
- now_realtime_timeval(&w->tv_ready);
-
- if (query->timeout) {
- int in_queue = (int) (dt_usec(&w->tv_in, &w->tv_ready) / 1000);
- if (in_queue > query->timeout) {
- log_access("QUERY CANCELED: QUEUE TIME EXCEEDED %d ms (LIMIT %d ms)", in_queue, query->timeout);
- retval = 1;
- w->response.code = HTTP_RESP_BACKEND_FETCH_FAILED;
- aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_SND_TIMEOUT, CLOUD_EMSG_SND_TIMEOUT, NULL, 0);
- goto cleanup;
- }
+ w->interrupt.callback = aclk_web_client_interrupt_cb;
+ w->interrupt.callback_data = pending_req_list_add(query->msg_id);
+
+ usec_t t;
+ web_client_timeout_checkpoint_set(w, query->timeout);
+ if(web_client_timeout_checkpoint_and_check(w, &t)) {
+ log_access("QUERY CANCELED: QUEUE TIME EXCEEDED %llu ms (LIMIT %d ms)", t / USEC_PER_MS, query->timeout);
+ retval = 1;
+ w->response.code = HTTP_RESP_BACKEND_FETCH_FAILED;
+ aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_SND_TIMEOUT, CLOUD_EMSG_SND_TIMEOUT, NULL, 0);
+ goto cleanup;
}
- if (!strncmp(query->data.http_api_v2.query, NODE_ID_QUERY, strlen(NODE_ID_QUERY))) {
- char *node_uuid = query->data.http_api_v2.query + strlen(NODE_ID_QUERY);
- char nodeid[UUID_STR_LEN];
- if (strlen(node_uuid) < (UUID_STR_LEN - 1)) {
- error_report(CLOUD_EMSG_MALFORMED_NODE_ID);
- retval = 1;
- w->response.code = 404;
- aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_MALFORMED_NODE_ID, CLOUD_EMSG_MALFORMED_NODE_ID, NULL, 0);
- goto cleanup;
- }
- strncpyz(nodeid, node_uuid, UUID_STR_LEN - 1);
-
- query_host = node_id_2_rrdhost(nodeid);
- if (!query_host) {
- error_report("Host with node_id \"%s\" not found! Returning 404 to Cloud!", nodeid);
- retval = 1;
- w->response.code = 404;
- aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_NODE_NOT_FOUND, CLOUD_EMSG_NODE_NOT_FOUND, NULL, 0);
- goto cleanup;
- }
+ web_client_decode_path_and_query_string(w, query->data.http_api_v2.query);
+ char *path = (char *)buffer_tostring(w->url_path_decoded);
+
+ if (aclk_stats_enabled) {
+ char *url_path_endpoint = strrchr(path, '/');
+ ACLK_STATS_LOCK;
+ int stat_idx = aclk_cloud_req_http_type_to_idx(url_path_endpoint ? url_path_endpoint + 1 : "other");
+ aclk_metrics_per_sample.cloud_req_http_by_type[stat_idx]++;
+ ACLK_STATS_UNLOCK;
}
- char *mysep = strchr(query->data.http_api_v2.query, '?');
- if (mysep) {
- url_decode_r(w->decoded_query_string, mysep, NETDATA_WEB_REQUEST_URL_SIZE + 1);
- *mysep = '\0';
- } else
- url_decode_r(w->decoded_query_string, query->data.http_api_v2.query, NETDATA_WEB_REQUEST_URL_SIZE + 1);
+ w->response.code = web_client_api_request_with_node_selection(localhost, w, path);
+ web_client_timeout_checkpoint_response_ready(w, &t);
- mysep = strrchr(query->data.http_api_v2.query, '/');
+ if(buffer_strlen(w->response.data) > ACLK_MAX_WEB_RESPONSE_SIZE) {
+ buffer_flush(w->response.data);
+ buffer_strcat(w->response.data, "response is too big");
+ w->response.data->content_type = CT_TEXT_PLAIN;
+ w->response.code = HTTP_RESP_CONTENT_TOO_LONG;
+ }
if (aclk_stats_enabled) {
ACLK_STATS_LOCK;
- int stat_idx = aclk_cloud_req_http_type_to_idx(mysep ? mysep + 1 : "other");
- aclk_metrics_per_sample.cloud_req_http_by_type[stat_idx]++;
+ aclk_metrics_per_sample.cloud_q_process_total += t;
+ aclk_metrics_per_sample.cloud_q_process_count++;
+ if (aclk_metrics_per_sample.cloud_q_process_max < t)
+ aclk_metrics_per_sample.cloud_q_process_max = t;
ACLK_STATS_UNLOCK;
}
- // execute the query
- t = aclk_web_api_v1_request(query_host, w, mysep ? mysep + 1 : "noop");
- size = (w->mode == WEB_CLIENT_MODE_FILECOPY) ? w->response.rlen : w->response.data->len;
+ size = w->response.data->len;
sent = size;
#ifdef NETDATA_WITH_ZLIB
@@ -152,8 +164,8 @@ static int http_api_v2(struct aclk_query_thread *query_thr, aclk_query_t query)
w->response.zstream.zfree = Z_NULL;
w->response.zstream.opaque = Z_NULL;
if(deflateInit2(&w->response.zstream, web_gzip_level, Z_DEFLATED, 15 + 16, 8, web_gzip_strategy) == Z_OK) {
- w->response.zinitialized = 1;
- w->response.zoutput = 1;
+ w->response.zinitialized = true;
+ w->response.zoutput = true;
} else
error("Failed to initialize zlib. Proceeding without compression.");
}
@@ -189,10 +201,10 @@ static int http_api_v2(struct aclk_query_thread *query_thr, aclk_query_t query)
}
#endif
- w->response.data->date = w->tv_ready.tv_sec;
+ w->response.data->date = w->timings.tv_ready.tv_sec;
web_client_build_http_header(w);
local_buffer = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE, &netdata_buffers_statistics.buffers_aclk);
- local_buffer->contenttype = CT_APPLICATION_JSON;
+ local_buffer->content_type = CT_APPLICATION_JSON;
buffer_strcat(local_buffer, w->response.header_output->buffer);
@@ -217,7 +229,7 @@ static int http_api_v2(struct aclk_query_thread *query_thr, aclk_query_t query)
struct timeval tv;
cleanup:
- now_realtime_timeval(&tv);
+ now_monotonic_high_precision_timeval(&tv);
log_access("%llu: %d '[ACLK]:%d' '%s' (sent/all = %zu/%zu bytes %0.0f%%, prep/sent/total = %0.2f/%0.2f/%0.2f ms) %d '%s'",
w->id
, gettid()
@@ -226,24 +238,21 @@ cleanup:
, sent
, size
, size > sent ? -(((size - sent) / (double)size) * 100.0) : ((size > 0) ? (((sent - size ) / (double)size) * 100.0) : 0.0)
- , dt_usec(&w->tv_ready, &w->tv_in) / 1000.0
- , dt_usec(&tv, &w->tv_ready) / 1000.0
- , dt_usec(&tv, &w->tv_in) / 1000.0
+ , dt_usec(&w->timings.tv_ready, &w->timings.tv_in) / 1000.0
+ , dt_usec(&tv, &w->timings.tv_ready) / 1000.0
+ , dt_usec(&tv, &w->timings.tv_in) / 1000.0
, w->response.code
- , strip_control_characters((char *)buffer_tostring(log_buffer))
+ , strip_control_characters((char *)buffer_tostring(w->url_as_received))
);
+ web_client_release_to_cache(w);
+
+ pending_req_list_rm(query->msg_id);
+
#ifdef NETDATA_WITH_ZLIB
- if(w->response.zinitialized)
- deflateEnd(&w->response.zstream);
buffer_free(z_buffer);
#endif
- buffer_free(w->response.data);
- buffer_free(w->response.header);
- buffer_free(w->response.header_output);
- freez(w);
buffer_free(local_buffer);
- buffer_free(log_buffer);
return retval;
}
@@ -257,19 +266,19 @@ static int send_bin_msg(struct aclk_query_thread *query_thr, aclk_query_t query)
const char *aclk_query_get_name(aclk_query_type_t qt, int unknown_ok)
{
switch (qt) {
- case HTTP_API_V2: return "http_api_request_v2";
- case REGISTER_NODE: return "register_node";
- case NODE_STATE_UPDATE: return "node_state_update";
- case CHART_DIMS_UPDATE: return "chart_and_dim_update";
- case CHART_CONFIG_UPDATED: return "chart_config_updated";
- case CHART_RESET: return "reset_chart_messages";
- case RETENTION_UPDATED: return "update_retention_info";
- case UPDATE_NODE_INFO: return "update_node_info";
- case ALARM_LOG_HEALTH: return "alarm_log_health";
- case ALARM_PROVIDE_CFG: return "provide_alarm_config";
- case ALARM_SNAPSHOT: return "alarm_snapshot";
- case UPDATE_NODE_COLLECTORS: return "update_node_collectors";
- case PROTO_BIN_MESSAGE: return "generic_binary_proto_message";
+ case HTTP_API_V2: return "http_api_request_v2";
+ case REGISTER_NODE: return "register_node";
+ case NODE_STATE_UPDATE: return "node_state_update";
+ case CHART_DIMS_UPDATE: return "chart_and_dim_update";
+ case CHART_CONFIG_UPDATED: return "chart_config_updated";
+ case CHART_RESET: return "reset_chart_messages";
+ case RETENTION_UPDATED: return "update_retention_info";
+ case UPDATE_NODE_INFO: return "update_node_info";
+ case ALARM_PROVIDE_CHECKPOINT: return "alarm_checkpoint";
+ case ALARM_PROVIDE_CFG: return "provide_alarm_config";
+ case ALARM_SNAPSHOT: return "alarm_snapshot";
+ case UPDATE_NODE_COLLECTORS: return "update_node_collectors";
+ case PROTO_BIN_MESSAGE: return "generic_binary_proto_message";
default:
if (!unknown_ok)
error_report("Unknown query type used %d", (int) qt);