summaryrefslogtreecommitdiffstats
path: root/src/fluent-bit/plugins/in_podman_metrics/podman_metrics.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/fluent-bit/plugins/in_podman_metrics/podman_metrics.c')
-rw-r--r--src/fluent-bit/plugins/in_podman_metrics/podman_metrics.c515
1 files changed, 515 insertions, 0 deletions
diff --git a/src/fluent-bit/plugins/in_podman_metrics/podman_metrics.c b/src/fluent-bit/plugins/in_podman_metrics/podman_metrics.c
new file mode 100644
index 000000000..df64452ff
--- /dev/null
+++ b/src/fluent-bit/plugins/in_podman_metrics/podman_metrics.c
@@ -0,0 +1,515 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+/* Fluent Bit
+ * ==========
+ * Copyright (C) 2015-2022 The Fluent Bit Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fluent-bit/flb_input_plugin.h>
+#include <fluent-bit/flb_config.h>
+#include <fluent-bit/flb_metrics.h>
+#include <fluent-bit/flb_metrics_exporter.h>
+#include <fluent-bit/flb_jsmn.h>
+
+#include <monkey/mk_core/mk_list.h>
+
+#include "podman_metrics.h"
+#include "podman_metrics_config.h"
+#include "podman_metrics_data.h"
+
+/*
+ * Collect information about podman containers (ID and Name) from podman configuration
+ * file (default is /var/lib/containers/storage/overlay-containers/containers.json).
+ * Since flb_jsmn library show JSON as a tree, search for objects with parent 0 (objects
+ * that are children to root array, and in them, search for ID and name (which is also
+ * an array.
+ */
+static int collect_container_data(struct flb_in_metrics *ctx)
+{
+ /* Buffers for reading data from JSON */
+ char *buffer;
+ char name[CONTAINER_NAME_SIZE];
+ char id[CONTAINER_ID_SIZE];
+ char image_name[IMAGE_NAME_SIZE];
+ char metadata[CONTAINER_METADATA_SIZE];
+ char *metadata_token_start;
+ char *metadata_token_stop;
+ int metadata_token_size;
+
+ int array_id;
+ int r, i, j;
+ size_t read_bytes = 0;
+ int collected_containers = 0;
+ int token_len;
+
+ jsmn_parser p;
+ jsmntok_t t[JSON_TOKENS];
+
+ flb_utils_read_file(ctx->config, &buffer, &read_bytes);
+ if (!read_bytes) {
+ flb_plg_warn(ctx->ins, "Failed to open %s", ctx->config);
+ return -1;
+ }
+ buffer[read_bytes] = 0;
+ flb_plg_debug(ctx->ins, "Read %zu bytes", read_bytes);
+
+ jsmn_init(&p);
+ r = jsmn_parse(&p, buffer, strlen(buffer), t, sizeof(t) / sizeof(t[0]));
+ if (r < 0) {
+ flb_plg_warn(ctx->ins, "Failed to parse JSON %d: %s", r, buffer);
+ free(buffer);
+ return -1;
+ }
+
+ flb_plg_debug(ctx->ins, "Got %d nested tokens", t[0].size);
+
+ if (r < 1 || t[0].type != JSMN_ARRAY) {
+ flb_plg_warn(ctx->ins, "Expected array at the json root");
+ free(buffer);
+ return -1;
+ }
+
+ for (i=0; i<r; i++) {
+ if (t[i].type == JSMN_STRING) {
+ if (sizeof(JSON_FIELD_ID)-1 == t[i].end - t[i].start &&
+ strncmp(buffer + t[i].start, JSON_FIELD_ID, t[i].end - t[i].start) == 0) {
+ token_len = t[i + 1].end - t[i + 1].start;
+ strncpy(id, buffer + t[i+1].start, t[i + 1].end - t[i + 1].start);
+ id[token_len] = '\0';
+ flb_plg_trace(ctx->ins, "Found id %s", id);
+ }
+ else if (sizeof(JSON_FIELD_NAMES)-1 == t[i].end - t[i].start &&
+ strncmp(buffer + t[i].start, JSON_FIELD_NAMES, t[i].end - t[i].start) == 0) {
+ array_id = i + 1;
+ if (t[array_id].type == JSMN_ARRAY) {
+ j = array_id + 1;
+ while (t[j].parent == array_id)
+ {
+ strncpy(name, buffer + t[j].start, t[j].end - t[j].start);
+ name[t[j].end - t[j].start] = '\0';
+ flb_plg_trace(ctx->ins, "Found name %s", name);
+ j++;
+ }
+ }
+ }
+ else if (sizeof(JSON_FIELD_METADATA)-1 == t[i].end - t[i].start &&
+ strncmp(buffer + t[i].start, JSON_FIELD_METADATA, t[i].end - t[i].start) == 0) {
+ token_len = t[i + 1].end - t[i + 1].start;
+ strncpy(metadata, buffer + t[i+1].start, t[i + 1].end - t[i + 1].start);
+ metadata[token_len] = '\0';
+
+ metadata_token_start = strstr(metadata, JSON_SUBFIELD_IMAGE_NAME);
+ if (metadata_token_start) {
+ metadata_token_stop = strstr(metadata_token_start + JSON_SUBFIELD_SIZE_IMAGE_NAME+1, "\\\"");
+ metadata_token_size = metadata_token_stop - metadata_token_start - JSON_SUBFIELD_SIZE_IMAGE_NAME;
+
+ strncpy(image_name, metadata_token_start+JSON_SUBFIELD_SIZE_IMAGE_NAME, metadata_token_size);
+ image_name[metadata_token_size] = '\0';
+
+ flb_plg_trace(ctx->ins, "Found image name %s", image_name);
+ add_container_to_list(ctx, id, name, image_name);
+ }
+ else {
+ flb_plg_warn(ctx->ins, "Image name was not found for %s", id);
+ add_container_to_list(ctx, id, name, "unknown");
+ }
+ collected_containers++;
+ }
+ }
+ }
+
+ flb_plg_debug(ctx->ins, "Collected %d containers from podman config file", collected_containers);
+ free(buffer);
+ return collected_containers;
+}
+
+/*
+ * Create structure instance based on previously found id, name and image name. Set all its values (like
+ * memory or cpu to UINT64_MAX, in case it won't be found later. This function also adds this structure
+ * to internal list, so it can be found by iteration later on.
+ */
+static int add_container_to_list(struct flb_in_metrics *ctx, flb_sds_t id, flb_sds_t name, flb_sds_t image_name)
+{
+ struct container *cnt;
+ cnt = flb_malloc(sizeof(struct container));
+ if (!cnt) {
+ flb_errno();
+ return -1;
+ }
+ cnt->id = flb_sds_create(id);
+ cnt->name = flb_sds_create(name);
+ cnt->image_name = flb_sds_create(image_name);
+
+ cnt->memory_usage = UINT64_MAX;
+ cnt->memory_max_usage = UINT64_MAX;
+ cnt->memory_limit = UINT64_MAX;
+ cnt->rss = UINT64_MAX;
+ cnt->cpu_user = UINT64_MAX;
+ cnt->cpu = UINT64_MAX;
+
+ mk_list_init(&cnt->net_data);
+
+ mk_list_add(&cnt->_head, &ctx->items);
+ return 0;
+}
+
+/*
+ * Iterate over container list and remove collected data
+ */
+static int destroy_container_list(struct flb_in_metrics *ctx)
+{
+ struct container *cnt;
+ struct net_iface *iface;
+ struct sysfs_path *pth;
+ struct mk_list *head;
+ struct mk_list *tmp;
+ struct mk_list *inner_head;
+ struct mk_list *inner_tmp;
+
+ mk_list_foreach_safe(head, tmp, &ctx->items) {
+ cnt = mk_list_entry(head, struct container, _head);
+ flb_plg_debug(ctx->ins, "Destroying container data (id: %s, name: %s", cnt->id, cnt->name);
+
+ flb_sds_destroy(cnt->id);
+ flb_sds_destroy(cnt->name);
+ flb_sds_destroy(cnt->image_name);
+ mk_list_foreach_safe(inner_head, inner_tmp, &cnt->net_data) {
+ iface = mk_list_entry(inner_head, struct net_iface, _head);
+ flb_sds_destroy(iface->name);
+ mk_list_del(&iface->_head);
+ flb_free(iface);
+ }
+ mk_list_del(&cnt->_head);
+ flb_free(cnt);
+ }
+
+ mk_list_foreach_safe(head, tmp, &ctx->sysfs_items) {
+ pth = mk_list_entry(head, struct sysfs_path, _head);
+ flb_plg_trace(ctx->ins, "Destroying sysfs data (name: %s", pth->path);
+ flb_sds_destroy(pth->path);
+ mk_list_del(&pth->_head);
+ flb_free(pth);
+ }
+ return 0;
+}
+
+
+/*
+ * Create counter for given metric name, using name, image name and value as counter labels. Counters
+ * are created per counter name, so they are "shared" between multiple containers - counter
+ * name remains the same, only labels like ID are changed.
+ * This function creates counter only once per counter name - every next call only sets counter
+ * value for specific labels.
+ */
+static int create_counter(struct flb_in_metrics *ctx, struct cmt_counter **counter, flb_sds_t id, flb_sds_t name, flb_sds_t image_name, flb_sds_t metric_prefix,
+ flb_sds_t *fields, flb_sds_t metric_name, flb_sds_t description, flb_sds_t interface, uint64_t value)
+{
+ flb_sds_t *labels;
+ uint64_t fvalue = value;
+
+ int label_count;
+ if (value == UINT64_MAX) {
+ flb_plg_debug(ctx->ins, "Ignoring invalid counter for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
+ return -1;
+ }
+
+ if (strcmp(metric_name, COUNTER_CPU) == 0 || strcmp(metric_name, COUNTER_CPU_USER) == 0) {
+ fvalue = fvalue / 1000000000;
+ flb_plg_trace(ctx->ins, "Converting %s from nanoseconds to seconds (%lu -> %lu)", metric_name, value, fvalue);
+
+ }
+
+ if (interface == NULL) {
+ labels = (char *[]){id, name, image_name};
+ label_count = 3;
+ }
+ else {
+ labels = (char *[]){id, name, image_name, interface};
+ label_count = 4;
+ }
+
+ /* if counter was not yet created, it means that this function is called for the first time per counter type */
+ if (*counter == NULL) {
+ flb_plg_debug(ctx->ins, "Creating counter for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
+ *counter = cmt_counter_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
+ }
+
+ /* Allow setting value that is not grater that current one (if, for example, memory usage stays exactly the same) */
+ cmt_counter_allow_reset(*counter);
+ flb_plg_debug(ctx->ins, "Set counter for %s, %s_%s_%s: %lu", name, COUNTER_PREFIX, metric_prefix, metric_name, fvalue);
+ if (cmt_counter_set(*counter, cfl_time_now(), fvalue, label_count, labels) == -1) {
+ flb_plg_warn(ctx->ins, "Failed to set counter for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Create gauge for given metric name, using name, image name and value as counter labels. Gauges
+ * are created per counter name, so they are "shared" between multiple containers - counter
+ * name remains the same, only labels like ID are changed.
+ * This function creates gauge only once per counter name - every next call only sets gauge
+ * value for specific labels.
+ */
+static int create_gauge(struct flb_in_metrics *ctx, struct cmt_gauge **gauge, flb_sds_t id, flb_sds_t name, flb_sds_t image_name, flb_sds_t metric_prefix,
+ flb_sds_t *fields, flb_sds_t metric_name, flb_sds_t description, flb_sds_t interface, uint64_t value)
+{
+ flb_sds_t *labels;
+ int label_count;
+ if (value == UINT64_MAX) {
+ flb_plg_debug(ctx->ins, "Ignoring invalid gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
+ return -1;
+ }
+
+ labels = (char *[]){id, name, image_name};
+ label_count = 3;
+
+ /* if gauge was not yet created, it means that this function is called for the first time per counter type */
+ if (*gauge == NULL) {
+ flb_plg_debug(ctx->ins, "Creating gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
+ *gauge = cmt_gauge_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
+ }
+
+ flb_plg_debug(ctx->ins, "Set gauge for %s, %s_%s_%s: %lu", name, COUNTER_PREFIX, metric_prefix, metric_name, value);
+ if (cmt_gauge_set(*gauge, cfl_time_now(), value, label_count, labels) == -1) {
+ flb_plg_warn(ctx->ins, "Failed to set gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Call create_counter for every counter type defined in this plugin.
+ *
+ * Currently supported counters are:
+ * - container_memory_usage_bytes
+ * - container_memory_max_usage_bytes
+ * - container_memory_rss
+ * - container_spec_memory_limit_bytes
+ * - container_cpu_user_seconds_total
+ * - container_cpu_usage_seconds_total
+ * - container_network_receive_bytes_total
+ * - container_network_receive_errors_total
+ * - container_network_transmit_bytes_total
+ * - container_network_transmit_errors_total
+ */
+static int create_counters(struct flb_in_metrics *ctx)
+{
+ struct container *cnt;
+ struct net_iface *iface;
+ struct mk_list *head;
+ struct mk_list *tmp;
+ struct mk_list *inner_head;
+ struct mk_list *inner_tmp;
+
+ mk_list_foreach_safe(head, tmp, &ctx->items)
+ {
+ cnt = mk_list_entry(head, struct container, _head);
+ create_counter(ctx, &ctx->c_memory_usage, cnt->id, cnt->name, cnt->image_name, COUNTER_MEMORY_PREFIX, FIELDS_METRIC, COUNTER_MEMORY_USAGE,
+ DESCRIPTION_MEMORY_USAGE, NULL, cnt->memory_usage);
+ create_counter(ctx, &ctx->c_memory_max_usage, cnt->id, cnt->name, cnt->image_name, COUNTER_MEMORY_PREFIX, FIELDS_METRIC, COUNTER_MEMORY_MAX_USAGE,
+ DESCRIPTION_MEMORY_MAX_USAGE, NULL, cnt->memory_max_usage);
+ create_counter(ctx, &ctx->c_memory_limit, cnt->id, cnt->name, cnt->image_name, COUNTER_SPEC_MEMORY_PREFIX, FIELDS_METRIC, COUNTER_MEMORY_LIMIT,
+ DESCRIPTION_MEMORY_LIMIT, NULL, cnt->memory_limit);
+ create_gauge(ctx, &ctx->g_rss, cnt->id, cnt->name, cnt->image_name, COUNTER_MEMORY_PREFIX, FIELDS_METRIC, GAUGE_MEMORY_RSS,
+ DESCRIPTION_MEMORY_RSS, NULL, cnt->rss);
+ create_counter(ctx, &ctx->c_cpu_user, cnt->id, cnt->name, cnt->image_name, COUNTER_CPU_PREFIX, FIELDS_METRIC, COUNTER_CPU_USER,
+ DESCRIPTION_CPU_USER, NULL, cnt->cpu_user);
+ create_counter(ctx, &ctx->c_cpu, cnt->id, cnt->name, cnt->image_name, COUNTER_CPU_PREFIX, FIELDS_METRIC, COUNTER_CPU,
+ DESCRIPTION_CPU, NULL, cnt->cpu);
+ mk_list_foreach_safe(inner_head, inner_tmp, &cnt->net_data)
+ {
+ iface = mk_list_entry(inner_head, struct net_iface, _head);
+ create_counter(ctx, &ctx->rx_bytes, cnt->id, cnt->name, cnt->image_name, COUNTER_NETWORK_PREFIX, FIELDS_METRIC_WITH_IFACE, COUNTER_RX_BYTES,
+ DESCRIPTION_RX_BYTES, iface->name, iface->rx_bytes);
+ create_counter(ctx, &ctx->rx_errors, cnt->id, cnt->name, cnt->image_name, COUNTER_NETWORK_PREFIX, FIELDS_METRIC_WITH_IFACE, COUNTER_RX_ERRORS,
+ DESCRIPTION_RX_ERRORS, iface->name, iface->rx_errors);
+ create_counter(ctx, &ctx->tx_bytes, cnt->id, cnt->name, cnt->image_name, COUNTER_NETWORK_PREFIX, FIELDS_METRIC_WITH_IFACE, COUNTER_TX_BYTES,
+ DESCRIPTION_TX_BYTES, iface->name, iface->tx_bytes);
+ create_counter(ctx, &ctx->tx_errors, cnt->id, cnt->name, cnt->image_name, COUNTER_NETWORK_PREFIX, FIELDS_METRIC_WITH_IFACE, COUNTER_TX_ERRORS,
+ DESCRIPTION_TX_ERRORS, iface->name, iface->tx_errors);
+ }
+ }
+ return 0;
+}
+
+/* Main function. Destroy (optionally) previous data, gather container data and
+ * create counters.
+ */
+static int scrape_metrics(struct flb_config *config, struct flb_in_metrics *ctx)
+{
+ uint64_t start_ts = cfl_time_now();
+ flb_plg_debug(ctx->ins, "Starting to scrape podman metrics");
+ if (destroy_container_list(ctx) == -1) {
+ flb_plg_error(ctx->ins, "Could not destroy previous container data");
+ return -1;
+ }
+
+ if (collect_container_data(ctx) == -1) {
+ flb_plg_error(ctx->ins, "Could not collect container ids");
+ return -1;
+ }
+
+ if (collect_sysfs_directories(ctx, ctx->sysfs_path) == -1)
+ {
+ flb_plg_error(ctx->ins, "Could not collect sysfs data");
+ return -1;
+ }
+
+ if (ctx->cgroup_version == CGROUP_V1) {
+ if (fill_counters_with_sysfs_data_v1(ctx) == -1) {
+ flb_plg_error(ctx->ins, "Could not collect V1 sysfs data");
+ return -1;
+ }
+ }
+ else if (ctx->cgroup_version == CGROUP_V2) {
+ if (fill_counters_with_sysfs_data_v2(ctx) == -1) {
+ flb_plg_error(ctx->ins, "Could not collect V2 sysfs data");
+ return -1;
+ }
+ }
+
+ if (create_counters(ctx) == -1) {
+ flb_plg_error(ctx->ins, "Could not create container counters");
+ return -1;
+ }
+
+ if (flb_input_metrics_append(ctx->ins, NULL, 0, ctx->ins->cmt) == -1) {
+ flb_plg_error(ctx->ins, "Could not append metrics");
+ return -1;
+ }
+
+ flb_plg_info(ctx->ins, "Scraping metrics took %luns", cfl_time_now() - start_ts);
+ return 0;
+}
+
+/*
+ * Call scrape_metrics function every `scrape interval`.
+ */
+static int cb_metrics_collect_runtime(struct flb_input_instance *ins, struct flb_config *config, void *in_context)
+{
+ return scrape_metrics(config, in_context);
+}
+
+/*
+ * Initialize plugin, setup config file path and (optionally) scrape container
+ * data (if `scrape_at_start` is set).
+ */
+static int in_metrics_init(struct flb_input_instance *in, struct flb_config *config, void *data)
+{
+ struct flb_in_metrics *ctx;
+ int coll_fd_runtime;
+
+ ctx = flb_calloc(1, sizeof(struct flb_in_metrics));
+ if (!ctx) {
+ return -1;
+ }
+ ctx->ins = in;
+
+ ctx->c_memory_usage = NULL;
+ ctx->c_memory_max_usage = NULL;
+ ctx->g_rss = NULL;
+ ctx->c_memory_limit = NULL;
+ ctx->c_cpu_user = NULL;
+ ctx->c_cpu = NULL;
+ ctx->rx_bytes = NULL;
+ ctx->rx_errors = NULL;
+ ctx->tx_bytes = NULL;
+ ctx->tx_errors = NULL;
+
+ if (flb_input_config_map_set(in, (void *) ctx) == -1) {
+ flb_free(ctx);
+ return -1;
+ }
+
+ flb_input_set_context(in, ctx);
+ coll_fd_runtime = flb_input_set_collector_time(in, cb_metrics_collect_runtime, ctx->scrape_interval, 0, config);
+ if (coll_fd_runtime == -1) {
+ flb_plg_error(ctx->ins, "Could not set collector for podman metrics plugin");
+ return -1;
+ }
+ ctx->coll_fd_runtime = coll_fd_runtime;
+
+ if (ctx->podman_config_path) {
+ flb_plg_info(ctx->ins, "Using config file %s", ctx->podman_config_path);
+ ctx->config = flb_sds_create(ctx->podman_config_path);
+ }
+ else {
+ flb_plg_info(ctx->ins, "Using default config file %s", PODMAN_CONFIG_DEFAULT_PATH);
+ ctx->config = flb_sds_create(PODMAN_CONFIG_DEFAULT_PATH);
+ }
+
+ if (get_cgroup_version(ctx) == CGROUP_V2) {
+ flb_plg_info(ctx->ins, "Detected cgroups v2");
+ ctx->cgroup_version = CGROUP_V2;
+ }
+ else {
+ flb_plg_info(ctx->ins, "Detected cgroups v1");
+ ctx->cgroup_version = CGROUP_V1;
+ }
+
+ mk_list_init(&ctx->items);
+ mk_list_init(&ctx->sysfs_items);
+
+ if (ctx->scrape_interval >= 2 && ctx->scrape_on_start) {
+ flb_plg_info(ctx->ins, "Generating podman metrics (initial scrape)");
+ if (scrape_metrics(config, ctx) == -1) {
+ flb_plg_error(ctx->ins, "Could not start collector for podman metrics plugin");
+ flb_sds_destroy(ctx->config);
+ destroy_container_list(ctx);
+ flb_free(ctx);
+ return -1;
+ }
+ }
+
+ flb_plg_info(ctx->ins, "Generating podman metrics");
+
+ return 0;
+}
+
+/*
+ * Function called at plugin exit - destroy collected container data list.
+ */
+static int in_metrics_exit(void *data, struct flb_config *config)
+{
+ struct flb_in_metrics *ctx = data;
+
+ if (!ctx) {
+ return 0;
+ }
+
+ flb_sds_destroy(ctx->config);
+ destroy_container_list(ctx);
+ flb_free(ctx);
+ return 0;
+}
+
+/*
+ * Function called at plugin pause.
+ */
+static void in_metrics_pause(void *data, struct flb_config *config)
+{
+ struct flb_in_metrics *ctx = data;
+ flb_input_collector_pause(ctx->coll_fd_runtime, ctx->ins);
+}
+
+/*
+ * Function called at plugin resume.
+ */
+static void in_metrics_resume(void *data, struct flb_config *config)
+{
+ struct flb_in_metrics *ctx = data;
+ flb_input_collector_resume(ctx->coll_fd_runtime, ctx->ins);
+}