summaryrefslogtreecommitdiffstats
path: root/src/fluent-bit/plugins/in_node_exporter_metrics/ne_cpu_linux.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/fluent-bit/plugins/in_node_exporter_metrics/ne_cpu_linux.c')
-rw-r--r--src/fluent-bit/plugins/in_node_exporter_metrics/ne_cpu_linux.c396
1 files changed, 396 insertions, 0 deletions
diff --git a/src/fluent-bit/plugins/in_node_exporter_metrics/ne_cpu_linux.c b/src/fluent-bit/plugins/in_node_exporter_metrics/ne_cpu_linux.c
new file mode 100644
index 000000000..8963f0c55
--- /dev/null
+++ b/src/fluent-bit/plugins/in_node_exporter_metrics/ne_cpu_linux.c
@@ -0,0 +1,396 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+/* Fluent Bit
+ * ==========
+ * Copyright (C) 2015-2022 The Fluent Bit Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fluent-bit/flb_info.h>
+#include <fluent-bit/flb_input_plugin.h>
+
+#include "ne.h"
+#include "ne_utils.h"
+
+#include <unistd.h>
+
+/*
+ * See kernel documentation for a description:
+ * https://www.kernel.org/doc/html/latest/filesystems/proc.html
+ *
+ * user: normal processes executing in user mode
+ * nice: niced processes executing in user mode
+ * system: processes executing in kernel mode
+ * idle: twiddling thumbs
+ * iowait: In a word, iowait stands for waiting for I/O to complete. But there are several problems:
+ * irq: servicing interrupts
+ * softirq: servicing softirqs
+ * steal: involuntary wait
+ * guest: running a normal guest
+ * guest_nice: running a niced guest
+ *
+ * Ensure to pick the correct version of the documentation, older versions here:
+ * https://github.com/torvalds/linux/tree/master/Documentation
+ */
+struct cpu_stat_info {
+ double user;
+ double nice;
+ double system;
+ double idle;
+ double iowait;
+ double irq;
+ double softirq;
+ double steal;
+ double guest;
+ double guest_nice;
+};
+
+/*
+ * Thermal throttle stats, reads /sys/devices/system/cpu/cpu*
+ * ----------------------------------------------------------
+ */
+static inline int cpu_thermal_init(struct flb_ne *ctx)
+{
+ struct cmt_counter *c;
+
+ c = cmt_counter_create(ctx->cmt, "node", "cpu", "core_throttles_total",
+ "Number of times this CPU core has been throttled.",
+ 2, (char *[]) {"core", "package"});
+ if (!c) {
+ return -1;
+ }
+ ctx->cpu_core_throttles = c;
+
+
+ c = cmt_counter_create(ctx->cmt, "node", "cpu", "package_throttles_total",
+ "Number of times this CPU package has been throttled.",
+ 1, (char *[]) {"package"});
+ if (!c) {
+ return -1;
+ }
+ ctx->cpu_package_throttles = c;
+
+ return 0;
+}
+
+static int cpu_thermal_update(struct flb_ne *ctx, uint64_t ts)
+{
+ int ret;
+ uint64_t core_id = 0;
+ uint64_t physical_package_id = 0;
+ uint64_t core_throttle_count;
+ uint64_t package_throttle_count;
+ char tmp1[32];
+ char tmp2[32];
+ struct mk_list *head;
+ struct mk_list list;
+ struct flb_slist_entry *entry;
+ const char *pattern = "/devices/system/cpu/cpu[0-9]*";
+ /* Status arrays */
+ uint64_t core_throttles_set[32][256];
+ uint64_t package_throttles_set[32];
+
+ ret = ne_utils_path_scan(ctx, ctx->path_sysfs, pattern, NE_SCAN_DIR, &list);
+ if (ret != 0) {
+ return -1;
+ }
+
+ if (mk_list_size(&list) == 0) {
+ return 0;
+ }
+
+ /* Reset arrays status */
+ memset(&core_throttles_set, 0, sizeof(core_throttles_set));
+ memset(&package_throttles_set, 0, sizeof(package_throttles_set));
+
+ /* Process entries */
+ mk_list_foreach(head, &list) {
+ entry = mk_list_entry(head, struct flb_slist_entry, _head);
+
+ /* Core ID */
+ ret = ne_utils_file_read_uint64(ctx->path_sysfs,
+ entry->str,
+ "topology", "core_id",
+ &core_id);
+ if (ret != 0) {
+ continue;
+ }
+
+ /* Physical ID */
+ ret = ne_utils_file_read_uint64(ctx->path_sysfs,
+ entry->str,
+ "topology", "physical_package_id",
+ &physical_package_id);
+ if (ret != 0) {
+ continue;
+ }
+
+ /* Only update this kv pair once */
+ if (core_throttles_set[physical_package_id][core_id] != 0) {
+ continue;
+ }
+ core_throttles_set[physical_package_id][core_id] = 1;
+
+ /* Package Metric: node_cpu_core_throttles_total */
+ ret = ne_utils_file_read_uint64(ctx->path_sysfs,
+ entry->str,
+ "thermal_throttle", "core_throttle_count",
+ &core_throttle_count);
+ if (ret != 0) {
+ flb_plg_debug(ctx->ins,
+ "CPU is missing core_throttle_count: %s",
+ entry->str);
+ }
+ else {
+ snprintf(tmp1, sizeof(tmp1) -1, "%" PRIu64, core_id);
+ snprintf(tmp2, sizeof(tmp2) -1, "%" PRIu64, physical_package_id);
+
+ /* Set new value */
+ cmt_counter_set(ctx->cpu_core_throttles, ts,
+ (double) core_throttle_count,
+ 2, (char *[]) {tmp1, tmp2});
+ }
+
+ /* Only update this entry once */
+ if (package_throttles_set[physical_package_id] != 0) {
+ continue;
+ }
+ package_throttles_set[physical_package_id] = 1;
+
+ /* Package Metric: node_cpu_package_throttles_total */
+ ret = ne_utils_file_read_uint64(ctx->path_sysfs,
+ entry->str,
+ "thermal_throttle", "package_throttle_count",
+ &package_throttle_count);
+ if (ret != 0) {
+ flb_plg_debug(ctx->ins,
+ "CPU is missing package_throttle_count: %s",
+ entry->str);
+ }
+ else {
+ /* Set new value */
+ cmt_counter_set(ctx->cpu_package_throttles, ts,
+ (double) package_throttle_count,
+ 1, (char *[]) {tmp2});
+ }
+ }
+ flb_slist_destroy(&list);
+
+ /*
+ * FIXME: continue fixing this:
+ *
+ * https://github.com/prometheus/node_exporter/blob/master/collector/cpu_linux.go#L194
+ */
+
+ return 0;
+}
+
+/*
+ * CPU stats, reads /proc/stat
+ * ---------------------------
+ */
+static inline int cpu_stat_init(struct flb_ne *ctx)
+{
+ struct cmt_counter *c;
+
+ c = cmt_counter_create(ctx->cmt, "node", "cpu", "seconds_total",
+ "Seconds the CPUs spent in each mode.",
+ 2, (char *[]) {"cpu", "mode"});
+ if (!c) {
+ return -1;
+ }
+ ctx->cpu_seconds = c;
+
+ c = cmt_counter_create(ctx->cmt, "node", "cpu", "guest_seconds_total",
+ "Seconds the CPUs spent in guests (VMs) for each mode.",
+ 2, (char *[]) {"cpu", "mode"});
+ if (!c) {
+ return -1;
+ }
+ ctx->cpu_guest_seconds = c;
+
+ return 0;
+}
+
+static int stat_line(char *line, struct cpu_stat_info *st)
+{
+ int ret;
+ double user_hz = sysconf(_SC_CLK_TCK);
+ const char *cpu_fmt = "%lf %lf %lf %lf %lf %lf %lf %lf %lf %lf";
+
+ ret = sscanf(line, cpu_fmt,
+ &st->user,
+ &st->nice,
+ &st->system,
+ &st->idle,
+ &st->iowait,
+ &st->irq,
+ &st->softirq,
+ &st->steal,
+ &st->guest,
+ &st->guest_nice);
+
+ /* On some older kernels the 'guest_nice' value may be missing */
+ if (ret < 9) {
+ return -1;
+ }
+ /* Ensure we zero initialise it */
+ if ( ret == 9 ) {
+ st->guest_nice = 0;
+ }
+
+ /* Convert to seconds based on USER_HZ kernel param */
+ st->user /= user_hz;
+ st->nice /= user_hz;
+ st->system /= user_hz;
+ st->idle /= user_hz;
+ st->iowait /= user_hz;
+ st->irq /= user_hz;
+ st->softirq /= user_hz;
+ st->steal /= user_hz;
+ st->guest /= user_hz;
+ st->guest_nice /= user_hz;
+
+ return 0;
+}
+
+static int cpu_stat_set_metrics(struct flb_ne *ctx, char *cpu_id,
+ struct cpu_stat_info *st, uint64_t ts)
+{
+
+ /* CPU seconds */
+ cmt_counter_set(ctx->cpu_seconds, ts,
+ st->idle,
+ 2, (char *[]) {cpu_id, "idle"});
+
+ cmt_counter_set(ctx->cpu_seconds, ts,
+ st->iowait,
+ 2, (char *[]) {cpu_id, "iowait"});
+
+ cmt_counter_set(ctx->cpu_seconds, ts,
+ st->irq,
+ 2, (char *[]) {cpu_id, "irq"});
+
+ cmt_counter_set(ctx->cpu_seconds, ts,
+ st->nice,
+ 2, (char *[]) {cpu_id, "nice"});
+
+ cmt_counter_set(ctx->cpu_seconds, ts,
+ st->softirq,
+ 2, (char *[]) {cpu_id, "softirq"});
+
+
+ cmt_counter_set(ctx->cpu_seconds, ts,
+ st->steal,
+ 2, (char *[]) {cpu_id, "steal"});
+
+ cmt_counter_set(ctx->cpu_seconds, ts,
+ st->system,
+ 2, (char *[]) {cpu_id, "system"});
+
+ cmt_counter_set(ctx->cpu_seconds, ts,
+ st->user,
+ 2, (char *[]) {cpu_id, "user"});
+
+ /* CPU Guest Seconds */
+ cmt_counter_set(ctx->cpu_guest_seconds, ts,
+ st->guest,
+ 2, (char *[]) {cpu_id, "user"});
+
+ cmt_counter_set(ctx->cpu_guest_seconds, ts,
+ st->guest_nice,
+ 2, (char *[]) {cpu_id, "nice"});
+
+ return 0;
+}
+
+static int cpu_stat_update(struct flb_ne *ctx, uint64_t ts)
+{
+ int len;
+ int ret;
+ char *p;
+ char tmp[32];
+ struct mk_list list;
+ struct mk_list *head;
+ struct flb_slist_entry *line;
+ struct cpu_stat_info st = {0};
+
+ ret = ne_utils_file_read_lines(ctx->path_procfs, "/stat", &list);
+ if (ret == -1) {
+ return -1;
+ }
+
+ mk_list_foreach(head, &list) {
+ line = mk_list_entry(head, struct flb_slist_entry, _head);
+
+ if (strncmp(line->str, "cpu ", 4) == 0) {
+ /* CPU total, we skip this state since we care only about per core stats */
+ continue;
+ }
+ else if (strncmp(line->str, "cpu", 3) == 0) {
+ /* CPU ID (per core) */
+ p = strchr(line->str + 3, ' ');
+ len = p - (line->str + 3);
+ memcpy(tmp, line->str + 3, len);
+ tmp[len] = '\0';
+
+ /* Capture metrics */
+ ret = stat_line(p, &st);
+ if (ret != 0) {
+ flb_plg_error(ctx->ins,
+ "could not process line: %s", line->str);
+ continue;
+ }
+
+ /* Update our counters */
+ cpu_stat_set_metrics(ctx, tmp, &st, ts);
+ }
+ }
+
+ flb_slist_destroy(&list);
+ return 0;
+}
+
+int ne_cpu_init(struct flb_ne *ctx)
+{
+ int ret;
+
+ /* CPU Thermal */
+ ret = cpu_thermal_init(ctx);
+ if (ret == -1) {
+ flb_plg_error(ctx->ins, "could not initialize cpu_thermal metrics");
+ return -1;
+ }
+
+ /* CPU Stats */
+ ret = cpu_stat_init(ctx);
+ if (ret == -1) {
+ flb_plg_error(ctx->ins, "could not initialize cpu_stat metrics");
+ return -1;
+ }
+ cpu_stat_init(ctx);
+ return 0;
+}
+
+int ne_cpu_update(struct flb_ne *ctx)
+{
+ uint64_t ts;
+
+ ts = cfl_time_now();
+
+ cpu_thermal_update(ctx, ts);
+ cpu_stat_update(ctx, ts);
+
+ return 0;
+}