diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-08-10 09:18:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-08-10 09:18:49 +0000 |
commit | dd814a7c1a8de056a79f7238578b09236edd5506 (patch) | |
tree | 429e7eed5a634a4efe9a6877ce66da8e64aa1782 /collectors | |
parent | Adding upstream version 1.41.0. (diff) | |
download | netdata-dd814a7c1a8de056a79f7238578b09236edd5506.tar.xz netdata-dd814a7c1a8de056a79f7238578b09236edd5506.zip |
Adding upstream version 1.42.0.upstream/1.42.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors')
194 files changed, 22092 insertions, 14511 deletions
diff --git a/collectors/COLLECTORS.md b/collectors/COLLECTORS.md index dbf2a9a1..aa56ac70 100644 --- a/collectors/COLLECTORS.md +++ b/collectors/COLLECTORS.md @@ -451,7 +451,7 @@ The Netdata Agent can collect these system- and hardware-level metrics using a v - [MegaRAID controllers](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/megacli/README.md): Collect adapter, physical drives, and battery stats using the `megacli` tool. -- [NVIDIA GPU](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/nvidia_smi/README.md): Monitor +- [NVIDIA GPU](https://github.com/netdata/go.d.plugin/blob/master/modules/nvidia_smi/README.md): Monitor performance metrics (memory usage, fan speed, pcie bandwidth utilization, temperature, and more) using the `nvidia-smi` tool. - [Sensors](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/sensors/README.md): Reads system @@ -669,6 +669,7 @@ $ sudo systemctl restart netdata - [netdata-debsecan](https://github.com/nodiscc/netdata-debsecan) - Check/graph the number of CVEs in currently installed packages. - [netdata-logcount](https://github.com/nodiscc/netdata-logcount) - Check/graph the number of syslog messages, by level over time. - [netdata-apt](https://github.com/nodiscc/netdata-apt) - Check/graph and alert on the number of upgradeable packages, and available distribution upgrades. +- [diskquota](https://github.com/netdata/community/tree/main/collectors/python.d.plugin/diskquota) - Monitors the defined quotas on one or more filesystems depending on configuration. ## Etc diff --git a/collectors/Makefile.am b/collectors/Makefile.am index 2aec3dd3..d477e5b8 100644 --- a/collectors/Makefile.am +++ b/collectors/Makefile.am @@ -25,6 +25,7 @@ SUBDIRS = \ statsd.plugin \ ebpf.plugin \ tc.plugin \ + systemd-journal.plugin \ $(NULL) usercustompluginsconfigdir=$(configdir)/custom-plugins.d diff --git a/collectors/all.h b/collectors/all.h index 0ce40c75..22b75aaa 100644 --- a/collectors/all.h +++ b/collectors/all.h @@ -23,18 +23,6 @@ #define NETDATA_CHART_PRIO_SYSTEM_IO 150 #define NETDATA_CHART_PRIO_SYSTEM_PGPGIO 151 #define NETDATA_CHART_PRIO_SYSTEM_RAM 200 -#define NETDATA_CHART_PRIO_SYSTEM_SWAP 201 -#define NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS 202 -#define NETDATA_CHART_PRIO_SYSTEM_SWAPIO 250 -#define NETDATA_CHART_PRIO_SYSTEM_ZSWAPIO 300 -#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_COMPRESS_RATIO 301 -#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_TOT_SIZE 302 -#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_STORED_PAGE 303 -#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS 304 -#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_LIM_HIT 305 -#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_WRT_BACK_PAGES 306 -#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_SAME_FILL_PAGE 307 -#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_DUPP_ENTRY 308 #define NETDATA_CHART_PRIO_SYSTEM_NET 500 #define NETDATA_CHART_PRIO_SYSTEM_IPV4 500 // freebsd only #define NETDATA_CHART_PRIO_SYSTEM_IP 501 @@ -87,17 +75,35 @@ #define NETDATA_CHART_PRIO_MEM_SYSTEM_AVAILABLE 1010 #define NETDATA_CHART_PRIO_MEM_SYSTEM_OOM_KILL 1020 #define NETDATA_CHART_PRIO_MEM_SYSTEM_COMMITTED 1030 -#define NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS 1040 +#define NETDATA_CHART_PRIO_MEM_SWAP 1035 +#define NETDATA_CHART_PRIO_MEM_SWAP_CALLS 1037 +#define NETDATA_CHART_PRIO_MEM_SWAPIO 1038 +#define NETDATA_CHART_PRIO_MEM_ZSWAP 1036 +#define NETDATA_CHART_PRIO_MEM_ZSWAPIO 1037 +#define NETDATA_CHART_PRIO_MEM_ZSWAP_COMPRESS_RATIO 1038 +#define NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_TOT_SIZE 1039 +#define NETDATA_CHART_PRIO_MEM_ZSWAP_STORED_PAGE 1040 +#define NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS 1041 +#define NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_LIM_HIT 1042 +#define NETDATA_CHART_PRIO_MEM_ZSWAP_WRT_BACK_PAGES 1043 +#define NETDATA_CHART_PRIO_MEM_ZSWAP_SAME_FILL_PAGE 1044 +#define NETDATA_CHART_PRIO_MEM_ZSWAP_DUPP_ENTRY 1045 +#define NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS 1050 #define NETDATA_CHART_PRIO_MEM_KERNEL 1100 #define NETDATA_CHART_PRIO_MEM_SLAB 1200 +#define NETDATA_CHART_PRIO_MEM_RECLAIMING 1210 +#define NETDATA_CHART_PRIO_MEM_HIGH_LOW 1211 +#define NETDATA_CHART_PRIO_MEM_CMA 1212 #define NETDATA_CHART_PRIO_MEM_HUGEPAGES 1250 -#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FAULTS 1251 -#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FILE 1252 -#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_ZERO 1253 -#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_KHUGEPAGED 1254 -#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SPLITS 1255 -#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SWAPOUT 1256 -#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_COMPACT 1257 +#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_DETAILS 1251 +#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FAULTS 1252 +#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FILE 1253 +#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_ZERO 1254 +#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_KHUGEPAGED 1255 +#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SPLITS 1256 +#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SWAPOUT 1257 +#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_COMPACT 1258 +#define NETDATA_CHART_PRIO_MEM_DIRECTMAP 1260 #define NETDATA_CHART_PRIO_MEM_KSM 1300 #define NETDATA_CHART_PRIO_MEM_KSM_SAVINGS 1301 #define NETDATA_CHART_PRIO_MEM_KSM_RATIOS 1302 @@ -367,10 +373,13 @@ #define NETDATA_CHART_PRIO_POWER_SUPPLY_ENERGY 9502 #define NETDATA_CHART_PRIO_POWER_SUPPLY_VOLTAGE 9503 +// Linux powercap + +#define NETDATA_CHART_PRIO_POWERCAP 9600 // Wireless -#define NETDATA_CHART_PRIO_WIRELESS_IFACE 7110 +#define NETDATA_CHART_PRIO_WIRELESS_IFACE 7110 // CGROUPS @@ -381,19 +390,20 @@ #define NETDATA_CHART_PRIO_STATSD_PRIVATE 90000 // many charts -// INTERNAL NETDATA INFO +// PCI + +#define NETDATA_CHART_PRIO_PCI_AER 100000 -#define NETDATA_CHART_PRIO_CHECKS 99999 +// AMD GPUs -#define NETDATA_CHART_PRIO_NETDATA_TIMEX 132030 -#define NETDATA_CHART_PRIO_NETDATA_TC_TIME 1000100 +#define NETDATA_CHART_PRIO_DRM_AMDGPU 39000 // NETDATA ML CHARTS // [ml] charts -#define ML_CHART_PRIO_DIMENSIONS 39181 -#define ML_CHART_PRIO_ANOMALY_RATE 39182 -#define ML_CHART_PRIO_DETECTOR_EVENTS 39183 +#define ML_CHART_PRIO_DIMENSIONS 39181 +#define ML_CHART_PRIO_ANOMALY_RATE 39182 +#define ML_CHART_PRIO_DETECTOR_EVENTS 39183 // [netdata.ml] charts #define NETDATA_ML_CHART_RUNNING 890001 diff --git a/collectors/apps.plugin/README.md b/collectors/apps.plugin/README.md index ad4e0882..fd5371f0 100644 --- a/collectors/apps.plugin/README.md +++ b/collectors/apps.plugin/README.md @@ -30,7 +30,7 @@ a predefined set of members (of course, only process groups found running are re Unlike traditional process monitoring tools (like `top`), `apps.plugin` is able to account the resource utilization of exit processes. Their utilization is accounted at their currently running parents. So, `apps.plugin` is perfectly able to measure the resources used by shell scripts and other processes -that fork/spawn other short lived processes hundreds of times per second. +that fork/spawn other short-lived processes hundreds of times per second. ## Charts @@ -75,7 +75,7 @@ The above are reported: - For **Applications** per target configured. - For **Users** per username or UID (when the username is not available). -- For **User Groups** per groupname or GID (when groupname is not available). +- For **User Groups** per group name or GID (when group name is not available). ## Performance @@ -183,8 +183,7 @@ If this fails (i.e. `setcap` fails), `apps.plugin` is setuid to `root`. There are a few cases, like `docker` and `virtuozzo` containers, where `setcap` succeeds, but the capabilities are silently ignored (in `lxc` containers `setcap` fails). -In these cases ()`setcap` succeeds but capabilities do not work), you will have to setuid -to root `apps.plugin` by running these commands: +In this case, you will have to setuid to root `apps.plugin` by running these commands: ```sh chown root:netdata /usr/libexec/netdata/plugins.d/apps.plugin @@ -200,7 +199,7 @@ iterating forever, collecting metrics for each running process and sending them This is a one-way communication, from `apps.plugin` to Netdata. So, since `apps.plugin` cannot be instructed by Netdata for the actions it performs, -we think it is pretty safe to allow it have these increased privileges. +we think it is pretty safe to allow it to have these increased privileges. Keep in mind that `apps.plugin` will still run without escalated permissions, but it will not be able to collect all the information. @@ -219,7 +218,7 @@ Here is an example for the process group `sql` at `https://registry.my-netdata.i ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.processes&dimensions=sql&value_color=green%3E0%7Cred) -Netdata is able give you a lot more badges for your app. +Netdata is able to give you a lot more badges for your app. Examples below for process group `sql`: - CPU usage: ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.cpu&dimensions=sql&value_color=green=0%7Corange%3C50%7Cred) @@ -227,7 +226,7 @@ Examples below for process group `sql`: - Disk Physical Writes ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.pwrites&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) - Disk Logical Reads ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.lreads&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) - Disk Logical Writes ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.lwrites&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) -- Open Files ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.files&dimensions=sql&value_color=green%3E30%7Cred) +- Open Files ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.fds_files&dimensions=sql&value_color=green%3E30%7Cred) - Real Memory ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.mem&dimensions=sql&value_color=green%3C100%7Corange%3C200%7Cred) - Virtual Memory ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.vmem&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) - Swap Memory ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.swap&dimensions=sql&value_color=green=0%7Cred) @@ -235,8 +234,8 @@ Examples below for process group `sql`: - Processes ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.processes&dimensions=sql&value_color=green%3E0%7Cred) - Threads ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.threads&dimensions=sql&value_color=green%3E=28%7Cred) - Major Faults (swap activity) ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.major_faults&dimensions=sql&value_color=green=0%7Cred) -- Open Pipes ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.pipes&dimensions=sql&value_color=green=0%7Cred) -- Open Sockets ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.sockets&dimensions=sql&value_color=green%3E=3%7Cred) +- Open Pipes ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.fds_pipes&dimensions=sql&value_color=green=0%7Cred) +- Open Sockets ![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.fds_sockets&dimensions=sql&value_color=green%3E=3%7Cred) For more information about badges check [Generating Badges](https://github.com/netdata/netdata/blob/master/web/api/badges/README.md) diff --git a/collectors/apps.plugin/apps_plugin.c b/collectors/apps.plugin/apps_plugin.c index 105c5426..94f997e8 100644 --- a/collectors/apps.plugin/apps_plugin.c +++ b/collectors/apps.plugin/apps_plugin.c @@ -13,7 +13,7 @@ #define APPS_PLUGIN_PROCESSES_FUNCTION_DESCRIPTION "Detailed information on the currently running processes." #define APPS_PLUGIN_FUNCTIONS() do { \ - fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " \"processes\" 10 \"%s\"\n", APPS_PLUGIN_PROCESSES_FUNCTION_DESCRIPTION); \ + fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " \"processes\" %d \"%s\"\n", PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT, APPS_PLUGIN_PROCESSES_FUNCTION_DESCRIPTION); \ } while(0) @@ -144,12 +144,13 @@ static const char *proc_states[] = { // log each problem once per process // log flood protection flags (log_thrown) typedef enum __attribute__((packed)) { - PID_LOG_IO = (1 << 0), - PID_LOG_STATUS = (1 << 1), - PID_LOG_CMDLINE = (1 << 2), - PID_LOG_FDS = (1 << 3), - PID_LOG_STAT = (1 << 4), - PID_LOG_LIMITS = (1 << 5), + PID_LOG_IO = (1 << 0), + PID_LOG_STATUS = (1 << 1), + PID_LOG_CMDLINE = (1 << 2), + PID_LOG_FDS = (1 << 3), + PID_LOG_STAT = (1 << 4), + PID_LOG_LIMITS = (1 << 5), + PID_LOG_LIMITS_DETAIL = (1 << 6), } PID_LOG; static int @@ -1362,6 +1363,9 @@ static inline kernel_uint_t get_proc_pid_limits_limit(char *buf, const char *key char *v = &line[key_len]; while(isspace(*v)) v++; + if(strcmp(v, "unlimited") == 0) + return 0; + return str2ull(v, NULL); } @@ -1373,11 +1377,17 @@ static inline int read_proc_pid_limits(struct pid_stat *p, void *ptr) { #else static char proc_pid_limits_buffer[MAX_PROC_PID_LIMITS + 1]; int ret = 0; + bool read_limits = false; + + errno = 0; + proc_pid_limits_buffer[0] = '\0'; kernel_uint_t all_fds = pid_openfds_sum(p); - if(all_fds < p->limits.max_open_files / 2 && p->io_collected_usec > p->last_limits_collected_usec && p->io_collected_usec - p->last_limits_collected_usec <= 60 * USEC_PER_SEC) + if(all_fds < p->limits.max_open_files / 2 && p->io_collected_usec > p->last_limits_collected_usec && p->io_collected_usec - p->last_limits_collected_usec <= 60 * USEC_PER_SEC) { // too frequent, we want to collect limits once per minute + ret = 1; goto cleanup; + } if(unlikely(!p->limits_filename)) { char filename[FILENAME_MAX + 1]; @@ -1394,8 +1404,25 @@ static inline int read_proc_pid_limits(struct pid_stat *p, void *ptr) { if(bytes <= 0) goto cleanup; + // make it '\0' terminated + if(bytes < MAX_PROC_PID_LIMITS) + proc_pid_limits_buffer[bytes] = '\0'; + else + proc_pid_limits_buffer[MAX_PROC_PID_LIMITS - 1] = '\0'; + p->limits.max_open_files = get_proc_pid_limits_limit(proc_pid_limits_buffer, PROC_PID_LIMITS_MAX_OPEN_FILES_KEY, sizeof(PROC_PID_LIMITS_MAX_OPEN_FILES_KEY) - 1, 0); + if(p->limits.max_open_files == 1) { + // it seems a bug in the kernel or something similar + // it sets max open files to 1 but the number of files + // the process has open are more than 1... + // https://github.com/netdata/netdata/issues/15443 + p->limits.max_open_files = 0; + ret = 1; + goto cleanup; + } + p->last_limits_collected_usec = p->io_collected_usec; + read_limits = true; ret = 1; @@ -1405,6 +1432,62 @@ cleanup: else p->openfds_limits_percent = 0.0; + if(p->openfds_limits_percent > 100.0) { + if(!(p->log_thrown & PID_LOG_LIMITS_DETAIL)) { + char *line; + + if(!read_limits) { + proc_pid_limits_buffer[0] = '\0'; + line = "NOT READ"; + } + else { + line = strstr(proc_pid_limits_buffer, PROC_PID_LIMITS_MAX_OPEN_FILES_KEY); + if (line) { + line++; // skip the initial newline + + char *end = strchr(line, '\n'); + if (end) + *end = '\0'; + } + } + + netdata_log_info( + "FDS_LIMITS: PID %d (%s) is using " + "%0.2f %% of its fds limits, " + "open fds = %llu (" + "files = %llu, " + "pipes = %llu, " + "sockets = %llu, " + "inotifies = %llu, " + "eventfds = %llu, " + "timerfds = %llu, " + "signalfds = %llu, " + "eventpolls = %llu " + "other = %llu " + "), open fds limit = %llu, " + "%s, " + "original line [%s]", + p->pid, p->comm, p->openfds_limits_percent, all_fds, + p->openfds.files, + p->openfds.pipes, + p->openfds.sockets, + p->openfds.inotifies, + p->openfds.eventfds, + p->openfds.timerfds, + p->openfds.signalfds, + p->openfds.eventpolls, + p->openfds.other, + p->limits.max_open_files, + read_limits ? "and we have read the limits AFTER counting the fds" + : "but we have read the limits BEFORE counting the fds", + line); + + p->log_thrown |= PID_LOG_LIMITS_DETAIL; + } + } + else + p->log_thrown &= ~PID_LOG_LIMITS_DETAIL; + return ret; #endif } @@ -4489,7 +4572,7 @@ static int check_capabilities() { } #endif -netdata_mutex_t mutex = NETDATA_MUTEX_INITIALIZER; +static netdata_mutex_t mutex = NETDATA_MUTEX_INITIALIZER; #define PROCESS_FILTER_CATEGORY "category:" #define PROCESS_FILTER_USER "user:" @@ -4542,15 +4625,6 @@ static void get_MemTotal(void) { #endif } -static void apps_plugin_function_error(const char *transaction, int code, const char *msg) { - char buffer[PLUGINSD_LINE_MAX + 1]; - json_escape_string(buffer, msg, PLUGINSD_LINE_MAX); - - pluginsd_function_result_begin_to_stdout(transaction, code, "application/json", now_realtime_sec()); - fprintf(stdout, "{\"status\":%d,\"error_message\":\"%s\"}", code, buffer); - pluginsd_function_result_end_to_stdout(); -} - static void apps_plugin_function_processes_help(const char *transaction) { pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600); fprintf(stdout, "%s", @@ -4598,7 +4672,7 @@ static void apps_plugin_function_processes_help(const char *transaction) { buffer_json_add_array_item_double(wb, _tmp); \ } while(0) -static void apps_plugin_function_processes(const char *transaction, char *function __maybe_unused, char *line_buffer __maybe_unused, int line_max __maybe_unused, int timeout __maybe_unused) { +static void function_processes(const char *transaction, char *function __maybe_unused, char *line_buffer __maybe_unused, int line_max __maybe_unused, int timeout __maybe_unused) { struct pid_stat *p; char *words[PLUGINSD_MAX_WORDS] = { NULL }; @@ -4619,21 +4693,21 @@ static void apps_plugin_function_processes(const char *transaction, char *functi if(!category && strncmp(keyword, PROCESS_FILTER_CATEGORY, strlen(PROCESS_FILTER_CATEGORY)) == 0) { category = find_target_by_name(apps_groups_root_target, &keyword[strlen(PROCESS_FILTER_CATEGORY)]); if(!category) { - apps_plugin_function_error(transaction, HTTP_RESP_BAD_REQUEST, "No category with that name found."); + pluginsd_function_json_error(transaction, HTTP_RESP_BAD_REQUEST, "No category with that name found."); return; } } else if(!user && strncmp(keyword, PROCESS_FILTER_USER, strlen(PROCESS_FILTER_USER)) == 0) { user = find_target_by_name(users_root_target, &keyword[strlen(PROCESS_FILTER_USER)]); if(!user) { - apps_plugin_function_error(transaction, HTTP_RESP_BAD_REQUEST, "No user with that name found."); + pluginsd_function_json_error(transaction, HTTP_RESP_BAD_REQUEST, "No user with that name found."); return; } } else if(strncmp(keyword, PROCESS_FILTER_GROUP, strlen(PROCESS_FILTER_GROUP)) == 0) { group = find_target_by_name(groups_root_target, &keyword[strlen(PROCESS_FILTER_GROUP)]); if(!group) { - apps_plugin_function_error(transaction, HTTP_RESP_BAD_REQUEST, "No group with that name found."); + pluginsd_function_json_error(transaction, HTTP_RESP_BAD_REQUEST, "No group with that name found."); return; } } @@ -4659,7 +4733,7 @@ static void apps_plugin_function_processes(const char *transaction, char *functi else { char msg[PLUGINSD_LINE_MAX]; snprintfz(msg, PLUGINSD_LINE_MAX, "Invalid parameter '%s'", keyword); - apps_plugin_function_error(transaction, HTTP_RESP_BAD_REQUEST, msg); + pluginsd_function_json_error(transaction, HTTP_RESP_BAD_REQUEST, msg); return; } } @@ -4672,7 +4746,7 @@ static void apps_plugin_function_processes(const char *transaction, char *functi unsigned int io_divisor = 1024 * RATES_DETAIL; BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX, NULL); - buffer_json_initialize(wb, "\"", "\"", 0, true, false); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_NEWLINE_ON_ARRAY_ITEMS); buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); buffer_json_member_add_string(wb, "type", "table"); buffer_json_member_add_time_t(wb, "update_every", update_every); @@ -5149,7 +5223,7 @@ static void apps_plugin_function_processes(const char *transaction, char *functi RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_VISIBLE, NULL); buffer_rrdf_table_add_field(wb, field_id++, "Uptime", "Uptime in seconds", RRDF_FIELD_TYPE_DURATION, - RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_DURATION, 2, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_DURATION_S, 2, "seconds", Uptime_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_VISIBLE, NULL); @@ -5449,9 +5523,9 @@ static void apps_plugin_function_processes(const char *transaction, char *functi pluginsd_function_result_end_to_stdout(); } -bool apps_plugin_exit = false; +static bool apps_plugin_exit = false; -void *reader_main(void *arg __maybe_unused) { +static void *reader_main(void *arg __maybe_unused) { char buffer[PLUGINSD_LINE_MAX + 1]; char *s = NULL; @@ -5483,9 +5557,9 @@ void *reader_main(void *arg __maybe_unused) { netdata_mutex_lock(&mutex); if(strncmp(function, "processes", strlen("processes")) == 0) - apps_plugin_function_processes(transaction, function, buffer, PLUGINSD_LINE_MAX + 1, timeout); + function_processes(transaction, function, buffer, PLUGINSD_LINE_MAX + 1, timeout); else - apps_plugin_function_error(transaction, HTTP_RESP_NOT_FOUND, "No function with this name found in apps.plugin."); + pluginsd_function_json_error(transaction, HTTP_RESP_NOT_FOUND, "No function with this name found in apps.plugin."); fflush(stdout); netdata_mutex_unlock(&mutex); @@ -5613,6 +5687,8 @@ int main(int argc, char **argv) { netdata_thread_create(&reader_thread, "APPS_READER", NETDATA_THREAD_OPTION_DONT_LOG, reader_main, NULL); netdata_mutex_lock(&mutex); + APPS_PLUGIN_FUNCTIONS(); + usec_t step = update_every * USEC_PER_SEC; global_iterations_counter = 1; heartbeat_t hb; diff --git a/collectors/apps.plugin/multi_metadata.yaml b/collectors/apps.plugin/metadata.yaml index 2bdb3dbf..9794a5ea 100644 --- a/collectors/apps.plugin/multi_metadata.yaml +++ b/collectors/apps.plugin/metadata.yaml @@ -1,4 +1,4 @@ -name: apps.plugin +plugin_name: apps.plugin modules: # removed system.processes_state - meta: @@ -6,54 +6,55 @@ modules: module_name: apps monitored_instance: name: Applications - link: '' - categories: [] - icon_filename: '' + link: "" + categories: + - data-collection.processes-and-system-services + icon_filename: "applications.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: - - applications - - processes - - os - - host monitoring + - applications + - processes + - os + - host monitoring most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Monitor Applications for optimal software performance and resource usage." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -225,56 +226,57 @@ modules: module_name: groups monitored_instance: name: User Groups - link: '' - categories: [] - icon_filename: '' + link: "" + categories: + - data-collection.processes-and-system-services + icon_filename: "user.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: - - groups - - processes - - user auditing - - authorization - - os - - host monitoring + - groups + - processes + - user auditing + - authorization + - os + - host monitoring most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration monitors resource utilization on a user groups context." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -446,54 +448,55 @@ modules: module_name: users monitored_instance: name: Users - link: '' - categories: [] - icon_filename: '' + link: "" + categories: + - data-collection.processes-and-system-services + icon_filename: "users.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: - - users - - processes - - os - - host monitoring + - users + - processes + - os + - host monitoring most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration monitors resource utilization on a user context." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: diff --git a/collectors/apps.plugin/metrics.csv b/collectors/apps.plugin/metrics.csv deleted file mode 100644 index afda7a86..00000000 --- a/collectors/apps.plugin/metrics.csv +++ /dev/null @@ -1,81 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -system.processes_state,,"running, sleeping_interruptible, sleeping_uninterruptible, zombie, stopped",processes,"System Processes State",line,,apps.plugin,system -apps.cpu,,a dimension per app group,percentage,"Apps CPU Time (100% = 1 core)",stacked,,apps.plugin,apps -apps.cpu_user,,a dimension per app group,percentage,"Apps CPU User Time (100% = 1 core)",stacked,,apps.plugin,apps -apps.cpu_system,,a dimension per app group,percentage,"Apps CPU System Time (100% = 1 core)",stacked,,apps.plugin,apps -apps.cpu_guest,,a dimension per app group,percentage,"Apps CPU Guest Time (100% = 1 core)",stacked,,apps.plugin,apps -apps.mem,,a dimension per app group,MiB,"Apps Real Memory (w/o shared)",stacked,,apps.plugin,apps -apps.rss,,a dimension per app group,MiB,"Apps Resident Set Size (w/shared)",stacked,,apps.plugin,apps -apps.vmem,,a dimension per app group,MiB,"Apps Virtual Memory Size",stacked,,apps.plugin,apps -apps.swap,,a dimension per app group,MiB,"Apps Swap Memory",stacked,,apps.plugin,apps -apps.major_faults,,a dimension per app group,"page faults/s","Apps Major Page Faults (swap read)",stacked,,apps.plugin,apps -apps.minor_faults,,a dimension per app group,"page faults/s","Apps Minor Page Faults (swap read)",stacked,,apps.plugin,apps -apps.preads,,a dimension per app group,"KiB/s","Apps Disk Reads",stacked,,apps.plugin,apps -apps.pwrites,,a dimension per app group,"KiB/s","Apps Disk Writes",stacked,,apps.plugin,apps -apps.lreads,,a dimension per app group,"KiB/s","Apps Disk Logical Reads",stacked,,apps.plugin,apps -apps.lwrites,,a dimension per app group,"KiB/s","Apps I/O Logical Writes",stacked,,apps.plugin,apps -apps.threads,,a dimension per app group,threads,"Apps Threads",stacked,,apps.plugin,apps -apps.processes,,a dimension per app group,processes,"Apps Processes",stacked,,apps.plugin,apps -apps.voluntary_ctxt_switches,,a dimension per app group,processes,"Apps Voluntary Context Switches",stacked,,apps.plugin,apps -apps.involuntary_ctxt_switches,,a dimension per app group,processes,"Apps Involuntary Context Switches",stacked,,apps.plugin,apps -apps.uptime,,a dimension per app group,seconds,"Apps Carried Over Uptime",line,,apps.plugin,apps -apps.uptime_min,,a dimension per app group,seconds,"Apps Minimum Uptime",line,,apps.plugin,apps -apps.uptime_avg,,a dimension per app group,seconds,"Apps Average Uptime",line,,apps.plugin,apps -apps.uptime_max,,a dimension per app group,seconds,"Apps Maximum Uptime",line,,apps.plugin,apps -apps.files,,a dimension per app group,"open files","Apps Open Files",stacked,,apps.plugin,apps -apps.sockets,,a dimension per app group,"open sockets","Apps Open Sockets",stacked,,apps.plugin,apps -apps.pipes,,a dimension per app group,"open pipes","Apps Open Pipes",stacked,,apps.plugin,apps -groups.cpu,,a dimension per user group,percentage,"User Groups CPU Time (100% = 1 core)",stacked,,apps.plugin,groups -groups.cpu_user,,a dimension per user group,percentage,"User Groups CPU User Time (100% = 1 core)",stacked,,apps.plugin,groups -groups.cpu_system,,a dimension per user group,percentage,"User Groups CPU System Time (100% = 1 core)",stacked,,apps.plugin,groups -groups.cpu_guest,,a dimension per user group,percentage,"User Groups CPU Guest Time (100% = 1 core)",stacked,,apps.plugin,groups -groups.mem,,a dimension per user group,MiB,"User Groups Real Memory (w/o shared)",stacked,,apps.plugin,groups -groups.rss,,a dimension per user group,MiB,"User Groups Resident Set Size (w/shared)",stacked,,apps.plugin,groups -groups.vmem,,a dimension per user group,MiB,"User Groups Virtual Memory Size",stacked,,apps.plugin,groups -groups.swap,,a dimension per user group,MiB,"User Groups Swap Memory",stacked,,apps.plugin,groups -groups.major_faults,,a dimension per user group,"page faults/s","User Groups Major Page Faults (swap read)",stacked,,apps.plugin,groups -groups.minor_faults,,a dimension per user group,"page faults/s","User Groups Page Faults (swap read)",stacked,,apps.plugin,groups -groups.preads,,a dimension per user group,"KiB/s","User Groups Disk Reads",stacked,,apps.plugin,groups -groups.pwrites,,a dimension per user group,"KiB/s","User Groups Disk Writes",stacked,,apps.plugin,groups -groups.lreads,,a dimension per user group,"KiB/s","User Groups Disk Logical Reads",stacked,,apps.plugin,groups -groups.lwrites,,a dimension per user group,"KiB/s","User Groups I/O Logical Writes",stacked,,apps.plugin,groups -groups.threads,,a dimension per user group,threads,"User Groups Threads",stacked,,apps.plugin,groups -groups.processes,,a dimension per user group,processes,"User Groups Processes",stacked,,apps.plugin,groups -groups.voluntary_ctxt_switches,,a dimension per app group,processes,"User Groups Voluntary Context Switches",stacked,,apps.plugin,groups -groups.involuntary_ctxt_switches,,a dimension per app group,processes,"User Groups Involuntary Context Switches",stacked,,apps.plugin,groups -groups.uptime,,a dimension per user group,seconds,"User Groups Carried Over Uptime",line,,apps.plugin,groups -groups.uptime_min,,a dimension per user group,seconds,"User Groups Minimum Uptime",line,,apps.plugin,groups -groups.uptime_avg,,a dimension per user group,seconds,"User Groups Average Uptime",line,,apps.plugin,groups -groups.uptime_max,,a dimension per user group,seconds,"User Groups Maximum Uptime",line,,apps.plugin,groups -groups.files,,a dimension per user group,"open files","User Groups Open Files",stacked,,apps.plugin,groups -groups.sockets,,a dimension per user group,"open sockets","User Groups Open Sockets",stacked,,apps.plugin,groups -groups.pipes,,a dimension per user group,"open pipes","User Groups Open Pipes",stacked,,apps.plugin,groups -users.cpu,,a dimension per user,percentage,"Users CPU Time (100% = 1 core)",stacked,,apps.plugin,users -users.cpu_user,,a dimension per user,percentage,"Users CPU User Time (100% = 1 core)",stacked,,apps.plugin,users -users.cpu_system,,a dimension per user,percentage,"Users CPU System Time (100% = 1 core)",stacked,,apps.plugin,users -users.cpu_guest,,a dimension per user,percentage,"Users CPU Guest Time (100% = 1 core)",stacked,,apps.plugin,users -users.mem,,a dimension per user,MiB,"Users Real Memory (w/o shared)",stacked,,apps.plugin,users -users.rss,,a dimension per user,MiB,"Users Resident Set Size (w/shared)",stacked,,apps.plugin,users -users.vmem,,a dimension per user,MiB,"Users Virtual Memory Size",stacked,,apps.plugin,users -users.swap,,a dimension per user,MiB,"Users Swap Memory",stacked,,apps.plugin,users -users.major_faults,,a dimension per user,"page faults/s","Users Major Page Faults (swap read)",stacked,,apps.plugin,users -users.minor_faults,,a dimension per user,"page faults/s","Users Page Faults (swap read)",stacked,,apps.plugin,users -users.preads,,a dimension per user,"KiB/s","Users Disk Reads",stacked,,apps.plugin,users -users.pwrites,,a dimension per user,"KiB/s","Users Disk Writes",stacked,,apps.plugin,users -users.lreads,,a dimension per user,"KiB/s","Users Disk Logical Reads",stacked,,apps.plugin,users -users.lwrites,,a dimension per user,"KiB/s","Users I/O Logical Writes",stacked,,apps.plugin,users -users.threads,,a dimension per user,threads,"Users Threads",stacked,,apps.plugin,users -users.processes,,a dimension per user,processes,"Users Processes",stacked,,apps.plugin,users -users.voluntary_ctxt_switches,,a dimension per app group,processes,"Users Voluntary Context Switches",stacked,,apps.plugin,users -users.involuntary_ctxt_switches,,a dimension per app group,processes,"Users Involuntary Context Switches",stacked,,apps.plugin,users -users.uptime,,a dimension per user,seconds,"Users Carried Over Uptime",line,,apps.plugin,users -users.uptime_min,,a dimension per user,seconds,"Users Minimum Uptime",line,,apps.plugin,users -users.uptime_avg,,a dimension per user,seconds,"Users Average Uptime",line,,apps.plugin,users -users.uptime_max,,a dimension per user,seconds,"Users Maximum Uptime",line,,apps.plugin,users -users.files,,a dimension per user,"open files","Users Open Files",stacked,,apps.plugin,users -users.sockets,,a dimension per user,"open sockets","Users Open Sockets",stacked,,apps.plugin,users -users.pipes,,a dimension per user,"open pipes","Users Open Pipes",stacked,,apps.plugin,users -netdata.apps_cpu,,"user, system",milliseconds/s,"Apps Plugin CPU",stacked,,apps.plugin,netdata -netdata.apps_sizes,,"calls, files, filenames, inode_changes, link_changes, pids, fds, targets, new_pids",files/s,"Apps Plugin Files",line,,apps.plugin,netdata -netdata.apps_fix,,"utime, stime, gtime, minflt, majflt",percentage,"Apps Plugin Normalization Ratios",line,,apps.plugin,netdata -netdata.apps_children_fix,,"utime, stime, gtime, minflt, majflt",percentage,"Apps Plugin Exited Children Normalization Ratios",line,,apps.plugin,netdata
\ No newline at end of file diff --git a/collectors/cgroups.plugin/multi_metadata.yaml b/collectors/cgroups.plugin/metadata.yaml index b2b13c2d..b342d30a 100644 --- a/collectors/cgroups.plugin/multi_metadata.yaml +++ b/collectors/cgroups.plugin/metadata.yaml @@ -1,4 +1,4 @@ -name: cgroups.plugin +plugin_name: cgroups.plugin modules: - &module meta: &meta @@ -9,7 +9,7 @@ modules: link: "" categories: - data-collection.containers-and-vms - icon_filename: netdata.png + icon_filename: container.svg related_resources: integrations: list: [] @@ -18,14 +18,14 @@ modules: keywords: - containers most_popular: true - overview: - data_collection: - metrics_description: "" + overview: &overview + data_collection: &data_collection + metrics_description: "Monitor Containers for performance, resource usage, and health status." method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: description: "" default_behavior: @@ -398,22 +398,26 @@ modules: chart_type: line dimensions: - name: mtu - - <<: *module meta: <<: *meta monitored_instance: name: Kubernetes Containers link: https://kubernetes.io/ - icon_filename: k8s.png + icon_filename: kubernetes.svg categories: - - data-collection.containers-vms + - data-collection.containers-and-vms - data-collection.kubernetes keywords: - k8s - kubernetes - pods - containers + overview: + <<: *overview + data-collection: + <<: *data_collection + metrics_description: Monitor Kubernetes Clusters for performance, resource usage, and health status. alerts: - name: k8s_cgroup_10min_cpu_usage link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf @@ -792,16 +796,23 @@ modules: chart_type: line dimensions: - name: mtu - - <<: *module meta: <<: *meta monitored_instance: name: Systemd Services link: "" - icon_filename: systemd.png + icon_filename: systemd.svg categories: - data-collection.systemd + keywords: + - systemd + - services + overview: + <<: *overview + data-collection: + <<: *data_collection + metrics_desctiption: "Monitor Systemd Services for performance, resource usage, and health status." alerts: [] metrics: folding: @@ -964,14 +975,18 @@ modules: monitored_instance: name: Virtual Machines link: "" - icon_filename: k8s.png + icon_filename: container.svg categories: - - data-collection.containers-vms - - data-collection.kubernetes + - data-collection.containers-and-vms keywords: - vms - virtualization - container + overview: + <<: *overview + data_collection: + <<: *data_collection + metrics_description: "Monitor Virtual Machines for performance, resource usage, and health status." - <<: *module meta: <<: *meta @@ -980,11 +995,16 @@ modules: link: "" icon_filename: lxc.png categories: - - data-collection.containers-vms + - data-collection.containers-and-vms keywords: - lxc - lxd - container + overview: + <<: *overview + data_collection: + <<: *data_collection + metrics_description: "Monitor LXC Containers for performance, resource usage, and health status." - <<: *module meta: <<: *meta @@ -993,22 +1013,32 @@ modules: link: "" icon_filename: libvirt.png categories: - - data-collection.containers-vms + - data-collection.containers-and-vms keywords: - libvirt - container + overview: + <<: *overview + data_collection: + <<: *data_collection + metrics_description: "Monitor Libvirt for performance, resource usage, and health status." - <<: *module meta: <<: *meta monitored_instance: name: oVirt Containers link: "" - icon_filename: ovirt.png + icon_filename: ovirt.svg categories: - - data-collection.containers-vms + - data-collection.containers-and-vms keywords: - ovirt - container + overview: + <<: *overview + data_collection: + <<: *data_collection + metrics_description: "Monitor oVirt for performance, resource usage, and health status." - <<: *module meta: <<: *meta @@ -1017,7 +1047,12 @@ modules: link: "" icon_filename: proxmox.png categories: - - data-collection.containers-vms + - data-collection.containers-and-vms keywords: - proxmox - container + overview: + <<: *overview + data_collection: + <<: *data_collection + metrics_description: "Monitor Proxmox for performance, resource usage, and health status." diff --git a/collectors/cgroups.plugin/metrics.csv b/collectors/cgroups.plugin/metrics.csv deleted file mode 100644 index aae057ba..00000000 --- a/collectors/cgroups.plugin/metrics.csv +++ /dev/null @@ -1,109 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -cgroup.cpu_limit,cgroup,used,percentage,"CPU Usage within the limits",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.cpu,cgroup,"user, system",percentage,"CPU Usage (100% = 1 core)",stacked,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.cpu_per_core,cgroup,a dimension per core,percentage,"CPU Usage (100% = 1 core) Per Core",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.throttled,cgroup,throttled,percentage,"CPU Throttled Runnable Periods",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.throttled_duration,cgroup,duration,ms,"CPU Throttled Time Duration",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.cpu_shares,cgroup,shares,shares,"CPU Time Relative Share",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.mem,cgroup,"cache, rss, swap, rss_huge, mapped_file",MiB,"Memory Usage",stacked,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.writeback,cgroup,"dirty, writeback",MiB,"Writeback Memory",area,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.mem_activity,cgroup,"in, out",MiB/s,"Memory Activity",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.pgfaults,cgroup,"pgfault, swap",MiB/s,"Memory Page Faults",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.mem_usage,cgroup,"ram, swap",MiB,"Used Memory",stacked,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.mem_usage_limit,cgroup,"available, used",MiB,"Used RAM within the limits",stacked,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.mem_utilization,cgroup,utilization,percentage,"Memory Utilization",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.mem_failcnt,cgroup,failures,count,"Memory Limit Failures",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.io,cgroup,"read, write",KiB/s,"I/O Bandwidth (all disks)",area,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.serviced_ops,cgroup,"read, write",operations/s,"Serviced I/O Operations (all disks)",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.throttle_io,cgroup,"read, write",KiB/s,"Throttle I/O Bandwidth (all disks)",area,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.throttle_serviced_ops,cgroup,"read, write",operations/s,"Throttle Serviced I/O Operations (all disks)",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.queued_ops,cgroup,"read, write",operations,"Queued I/O Operations (all disks)",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.merged_ops,cgroup,"read, write",operations/s,"Merged I/O Operations (all disks)",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.cpu_some_pressure,cgroup,"some10, some60, some300",percentage,"CPU some pressure",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.cpu_some_pressure_stall_time,cgroup,time,ms,"CPU some pressure stall time",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.cpu_full_pressure,cgroup,"some10, some60, some300",percentage,"CPU full pressure",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.cpu_full_pressure_stall_time,cgroup,time,ms,"CPU full pressure stall time",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.memory_some_pressure,cgroup,"some10, some60, some300",percentage,"Memory some pressure",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.memory_some_pressure_stall_time,cgroup,time,ms,"Memory some pressure stall time",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.memory_full_pressure,cgroup,"some10, some60, some300",percentage,"Memory full pressure",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.memory_full_pressure_stall_time,cgroup,time,ms,"Memory full pressure stall time",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.io_some_pressure,cgroup,"some10, some60, some300",percentage,"I/O some pressure",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.io_some_pressure_stall_time,cgroup,time,ms,"I/O some pressure stall time",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.io_full_pressure,cgroup,"some10, some60, some300",percentage,"I/O some pressure",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.io_full_pressure_stall_time,cgroup,time,ms,"I/O some pressure stall time",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup -cgroup.net_net,"cgroup, network device","received, sent",kilobits/s,"Bandwidth",area,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev -cgroup.net_packets,"cgroup, network device","received, sent, multicast",pps,"Packets",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev -cgroup.net_errors,"cgroup, network device","inbound, outbound",errors/s,"Interface Errors",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev -cgroup.net_drops,"cgroup, network device","inbound, outbound",errors/s,"Interface Drops",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev -cgroup.net_fifo,"cgroup, network device","receive, transmit",errors/s,"Interface FIFO Buffer Errors",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev -cgroup.net_compressed,"cgroup, network device","receive, sent",pps,"Interface FIFO Buffer Errors",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev -cgroup.net_events,"cgroup, network device","frames, collisions, carrier",events/s,"Network Interface Events",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev -cgroup.net_operstate,"cgroup, network device","up, down, notpresent, lowerlayerdown, testing, dormant, unknown",state,"Interface Operational State",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev -cgroup.net_carrier,"cgroup, network device","up, down",state,"Interface Physical Link State",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev -cgroup.net_mtu,"cgroup, network device",mtu,octets,"Interface MTU",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev -k8s.cgroup.cpu_limit,k8s cgroup,used,percentage,"CPU Usage within the limits",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.cpu,k8s cgroup,"user, system",percentage,"CPU Usage (100% = 1000 mCPU)",stacked,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.cpu_per_core,k8s cgroup,a dimension per core,percentage,"CPU Usage (100% = 1000 mCPU) Per Core",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.throttled,k8s cgroup,throttled,percentage,"CPU Throttled Runnable Periods",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.throttled_duration,k8s cgroup,duration,ms,"CPU Throttled Time Duration",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.cpu_shares,k8s cgroup,shares,shares,"CPU Time Relative Share",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.mem,k8s cgroup,"cache, rss, swap, rss_huge, mapped_file",MiB,"Memory Usage",stacked,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.writeback,k8s cgroup,"dirty, writeback",MiB,"Writeback Memory",area,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.mem_activity,k8s cgroup,"in, out",MiB/s,"Memory Activity",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.pgfaults,k8s cgroup,"pgfault, swap",MiB/s,"Memory Page Faults",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.mem_usage,k8s cgroup,"ram, swap",MiB,"Used Memory",stacked,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.mem_usage_limit,k8s cgroup,"available, used",MiB,"Used RAM within the limits",stacked,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.mem_utilization,k8s cgroup,utilization,percentage,"Memory Utilization",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.mem_failcnt,k8s cgroup,failures,count,"Memory Limit Failures",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.io,k8s cgroup,"read, write",KiB/s,"I/O Bandwidth (all disks)",area,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.serviced_ops,k8s cgroup,"read, write",operations/s,"Serviced I/O Operations (all disks)",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.throttle_io,k8s cgroup,"read, write",KiB/s,"Throttle I/O Bandwidth (all disks)",area,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.throttle_serviced_ops,k8s cgroup,"read, write",operations/s,"Throttle Serviced I/O Operations (all disks)",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.queued_ops,k8s cgroup,"read, write",operations,"Queued I/O Operations (all disks)",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.merged_ops,k8s cgroup,"read, write",operations/s,"Merged I/O Operations (all disks)",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.cpu_some_pressure,k8s cgroup,"some10, some60, some300",percentage,"CPU some pressure",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.cpu_some_pressure_stall_time,k8s cgroup,time,ms,"CPU some pressure stall time",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.cpu_full_pressure,k8s cgroup,"some10, some60, some300",percentage,"CPU full pressure",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.cpu_full_pressure_stall_time,k8s cgroup,time,ms,"CPU full pressure stall time",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.memory_some_pressure,k8s cgroup,"some10, some60, some300",percentage,"Memory some pressure",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.memory_some_pressure_stall_time,k8s cgroup,time,ms,"Memory some pressure stall time",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.memory_full_pressure,k8s cgroup,"some10, some60, some300",percentage,"Memory full pressure",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.memory_full_pressure_stall_time,k8s cgroup,time,ms,"Memory full pressure stall time",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.io_some_pressure,k8s cgroup,"some10, some60, some300",percentage,"I/O some pressure",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.io_some_pressure_stall_time,k8s cgroup,time,ms,"I/O some pressure stall time",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.io_full_pressure,k8s cgroup,"some10, some60, some300",percentage,"I/O some pressure",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.io_full_pressure_stall_time,k8s cgroup,time,ms,"I/O some pressure stall time",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup -k8s.cgroup.net_net,"k8s cgroup, network device","received, sent",kilobits/s,"Bandwidth",area,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev -k8s.cgroup.net_packets,"k8s cgroup, network device","received, sent, multicast",pps,"Packets",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev -k8s.cgroup.net_errors,"k8s cgroup, network device","inbound, outbound",errors/s,"Interface Errors",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev -k8s.cgroup.net_drops,"k8s cgroup, network device","inbound, outbound",errors/s,"Interface Drops",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev -k8s.cgroup.net_fifo,"k8s cgroup, network device","receive, transmit",errors/s,"Interface FIFO Buffer Errors",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev -k8s.cgroup.net_compressed,"k8s cgroup, network device","receive, sent",pps,"Interface FIFO Buffer Errors",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev -k8s.cgroup.net_events,"k8s cgroup, network device","frames, collisions, carrier",events/s,"Network Interface Events",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev -k8s.cgroup.net_operstate,"k8s cgroup, network device","up, down, notpresent, lowerlayerdown, testing, dormant, unknown",state,"Interface Operational State",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev -k8s.cgroup.net_carrier,"k8s cgroup, network device","up, down",state,"Interface Physical Link State",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev -k8s.cgroup.net_mtu,"k8s cgroup, network device",mtu,octets,"Interface MTU",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev -services.cpu,,a dimension per systemd service,percentage,"Systemd Services CPU utilization (100% = 1 core)",stacked,,cgroups.plugin,systemd -services.mem_usage,,a dimension per systemd service,MiB,"Systemd Services Used Memory",stacked,,cgroups.plugin,systemd -services.mem_rss,,a dimension per systemd service,MiB,"Systemd Services RSS Memory",stacked,,cgroups.plugin,systemd -services.mem_mapped,,a dimension per systemd service,MiB,"Systemd Services Mapped Memory",stacked,,cgroups.plugin,systemd -services.mem_cache,,a dimension per systemd service,MiB,"Systemd Services Cache Memory",stacked,,cgroups.plugin,systemd -services.mem_writeback,,a dimension per systemd service,MiB,"Systemd Services Writeback Memory",stacked,,cgroups.plugin,systemd -services.mem_pgfault,,a dimension per systemd service,MiB/s,"Systemd Services Memory Minor Page Faults",stacked,,cgroups.plugin,systemd -services.mem_pgmajfault,,a dimension per systemd service,MiB/s,"Systemd Services Memory Major Page Faults",stacked,,cgroups.plugin,systemd -services.mem_pgpgin,,a dimension per systemd service,MiB/s,"Systemd Services Memory Charging Activity",stacked,,cgroups.plugin,systemd -services.mem_pgpgout,,a dimension per systemd service,MiB/s,"Systemd Services Memory Uncharging Activity",stacked,,cgroups.plugin,systemd -services.mem_failcnt,,a dimension per systemd service,failures,"Systemd Services Memory Limit Failures",stacked,,cgroups.plugin,systemd -services.swap_usage,,a dimension per systemd service,MiB,"Systemd Services Swap Memory Used",stacked,,cgroups.plugin,systemd -services.io_read,,a dimension per systemd service,KiB/s,"Systemd Services Disk Read Bandwidth",stacked,,cgroups.plugin,systemd -services.io_write,,a dimension per systemd service,KiB/s,"Systemd Services Disk Write Bandwidth",stacked,,cgroups.plugin,systemd -services.io_ops_read,,a dimension per systemd service,operations/s,"Systemd Services Disk Read Operations",stacked,,cgroups.plugin,systemd -services.io_ops_write,,a dimension per systemd service,operations/s,"Systemd Services Disk Write Operations",stacked,,cgroups.plugin,systemd -services.throttle_io_read,,a dimension per systemd service,KiB/s,"Systemd Services Throttle Disk Read Bandwidth",stacked,,cgroups.plugin,systemd -services.services.throttle_io_write,,a dimension per systemd service,KiB/s,"Systemd Services Throttle Disk Write Bandwidth",stacked,,cgroups.plugin,systemd -services.throttle_io_ops_read,,a dimension per systemd service,operations/s,"Systemd Services Throttle Disk Read Operations",stacked,,cgroups.plugin,systemd -throttle_io_ops_write,,a dimension per systemd service,operations/s,"Systemd Services Throttle Disk Write Operations",stacked,,cgroups.plugin,systemd -services.queued_io_ops_read,,a dimension per systemd service,operations/s,"Systemd Services Queued Disk Read Operations",stacked,,cgroups.plugin,systemd -services.queued_io_ops_write,,a dimension per systemd service,operations/s,"Systemd Services Queued Disk Write Operations",stacked,,cgroups.plugin,systemd -services.merged_io_ops_read,,a dimension per systemd service,operations/s,"Systemd Services Merged Disk Read Operations",stacked,,cgroups.plugin,systemd -services.merged_io_ops_write,,a dimension per systemd service,operations/s,"Systemd Services Merged Disk Write Operations",stacked,,cgroups.plugin,systemd
\ No newline at end of file diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index fb805e63..9c7488c8 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -62,6 +62,8 @@ static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_irq_some = CONFIG_BOOLEAN_NO; +static int cgroup_enable_pressure_irq_full = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES; static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO; @@ -474,6 +476,7 @@ void read_cgroup_plugin_configuration() { " !*.mount " " !*.partition " " !*.service " + " !*.service/udev " " !*.socket " " !*.slice " " !*.swap " @@ -828,6 +831,7 @@ struct cgroup { struct pressure cpu_pressure; struct pressure io_pressure; struct pressure memory_pressure; + struct pressure irq_pressure; // per cgroup charts RRDSET *st_cpu; @@ -1451,28 +1455,33 @@ static inline void cgroup2_read_pressure(struct pressure *res) { return; } - res->some.share_time.value10 = strtod(procfile_lineword(ff, 0, 2), NULL); - res->some.share_time.value60 = strtod(procfile_lineword(ff, 0, 4), NULL); - res->some.share_time.value300 = strtod(procfile_lineword(ff, 0, 6), NULL); - res->some.total_time.value_total = str2ull(procfile_lineword(ff, 0, 8), NULL) / 1000; // us->ms + bool did_some = false, did_full = false; - if (lines > 2) { - res->full.share_time.value10 = strtod(procfile_lineword(ff, 1, 2), NULL); - res->full.share_time.value60 = strtod(procfile_lineword(ff, 1, 4), NULL); - res->full.share_time.value300 = strtod(procfile_lineword(ff, 1, 6), NULL); - res->full.total_time.value_total = str2ull(procfile_lineword(ff, 1, 8), NULL) / 1000; // us->ms + for(size_t l = 0; l < lines ;l++) { + const char *key = procfile_lineword(ff, l, 0); + if(strcmp(key, "some") == 0) { + res->some.share_time.value10 = strtod(procfile_lineword(ff, l, 2), NULL); + res->some.share_time.value60 = strtod(procfile_lineword(ff, l, 4), NULL); + res->some.share_time.value300 = strtod(procfile_lineword(ff, l, 6), NULL); + res->some.total_time.value_total = str2ull(procfile_lineword(ff, l, 8), NULL) / 1000; // us->ms + did_some = true; + } + else if(strcmp(key, "full") == 0) { + res->full.share_time.value10 = strtod(procfile_lineword(ff, l, 2), NULL); + res->full.share_time.value60 = strtod(procfile_lineword(ff, l, 4), NULL); + res->full.share_time.value300 = strtod(procfile_lineword(ff, l, 6), NULL); + res->full.total_time.value_total = str2ull(procfile_lineword(ff, l, 8), NULL) / 1000; // us->ms + did_full = true; + } } - res->updated = 1; + res->updated = (did_full || did_some) ? 1 : 0; - if (unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) { - res->some.enabled = CONFIG_BOOLEAN_YES; - if (lines > 2) { - res->full.enabled = CONFIG_BOOLEAN_YES; - } else { - res->full.enabled = CONFIG_BOOLEAN_NO; - } - } + if(unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) + res->some.enabled = (did_some) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO; + + if(unlikely(res->full.enabled == CONFIG_BOOLEAN_AUTO)) + res->full.enabled = (did_full) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO; } } @@ -1637,6 +1646,7 @@ static inline void read_cgroup(struct cgroup *cg) { cgroup2_read_pressure(&cg->cpu_pressure); cgroup2_read_pressure(&cg->io_pressure); cgroup2_read_pressure(&cg->memory_pressure); + cgroup2_read_pressure(&cg->irq_pressure); cgroup_read_memory(&cg->memory, 1); } } @@ -1851,6 +1861,7 @@ static inline void cgroup_free(struct cgroup *cg) { free_pressure(&cg->cpu_pressure); free_pressure(&cg->io_pressure); free_pressure(&cg->memory_pressure); + free_pressure(&cg->irq_pressure); freez(cg->id); freez(cg->intermediate_id); @@ -2465,6 +2476,18 @@ static inline void discovery_update_filenames() { netdata_log_debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); } } + + if (unlikely((cgroup_enable_pressure_irq_some || cgroup_enable_pressure_irq_full) && !cg->irq_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/irq.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->irq_pressure.filename = strdupz(filename); + cg->irq_pressure.some.enabled = cgroup_enable_pressure_irq_some; + cg->irq_pressure.full.enabled = cgroup_enable_pressure_irq_full; + netdata_log_debug(D_CGROUP, "irq.pressure filename for cgroup '%s': '%s'", cg->id, cg->irq_pressure.filename); + } else { + netdata_log_debug(D_CGROUP, "irq.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } } } } @@ -4643,6 +4666,112 @@ void update_cgroup_charts(int update_every) { update_pressure_charts(pcs); } + res = &cg->irq_pressure; + + if (likely(res->updated && res->some.enabled)) { + struct pressure_charts *pcs; + pcs = &res->some; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "irq_some_pressure" + , NULL + , "interrupts" + , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure" : "cgroup.irq_some_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2310 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "irq_some_pressure_stall_time" + , NULL + , "interrupts" + , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure_stall_time" : "cgroup.irq_some_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2330 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + + if (likely(res->updated && res->full.enabled)) { + struct pressure_charts *pcs; + pcs = &res->full; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure"); + + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "irq_full_pressure" + , NULL + , "interrupts" + , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure" : "cgroup.irq_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2350 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "irq_full_pressure_stall_time" + , NULL + , "interrupts" + , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure_stall_time" : "cgroup.irq_full_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2370 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + res = &cg->io_pressure; if (likely(res->updated && res->some.enabled)) { diff --git a/collectors/charts.d.plugin/ap/metadata.yaml b/collectors/charts.d.plugin/ap/metadata.yaml index 344b6817..c4e96a14 100644 --- a/collectors/charts.d.plugin/ap/metadata.yaml +++ b/collectors/charts.d.plugin/ap/metadata.yaml @@ -1,107 +1,143 @@ -meta: - plugin_name: charts.d.plugin - module_name: ap - monitored_instance: - name: Access Points - link: '' - categories: - - data-collection.networking-stack-and-network-interfaces - icon_filename: 'netdata.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Maintain surveillance over Access Points, ensuring optimal wireless network connectivity and performance. Monitor and troubleshoot in realtime for high-quality network operations.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: wireless device - description: "" - labels: [] +plugin_name: charts.d.plugin +modules: + - meta: + plugin_name: charts.d.plugin + module_name: ap + monitored_instance: + name: Access Points + link: "https://learn.netdata.cloud/docs/data-collection/networking-stack-and-network-interfaces/linux-access-points" + categories: + - data-collection.linux-systems.network-metrics + icon_filename: "network-wired.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - ap + - access + - point + - wireless + - network + most_popular: false + overview: + data_collection: + metrics_description: "The ap collector visualizes data related to wireless access points." + method_description: "It uses the `iw` command line utility to detect access points. For each interface that is of `type AP`, it then runs `iw INTERFACE station dump` and collects statistics." + supported_platforms: + include: [Linux] + exclude: [] + multi_instance: false + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "The plugin is able to auto-detect if you are running access points on your linux box." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: "`iw` utility." + description: "Make sure the `iw` utility is installed." + configuration: + file: + name: charts.d/ap.conf + options: + description: | + The config file is sourced by the charts.d plugin. It's a standard bash file. + + The following collapsed table contains all the options that can be configured for the ap collector. + folding: + title: "Config options" + enabled: true + list: + - name: ap_update_every + description: The data collection frequency. If unset, will inherit the netdata update frequency. + default_value: 1 + required: false + - name: ap_priority + description: Controls the order of charts at the netdata dashboard. + default_value: 6900 + required: false + - name: ap_retries + description: The number of retries to do in case of failure before disabling the collector. + default_value: 10 + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Change the collection frequency + description: Specify a custom collection frequence (update_every) for this collector + config: | + # the data collection frequency + # if unset, will inherit the netdata update frequency + ap_update_every=10 + + # the charts priority on the dashboard + #ap_priority=6900 + + # the number of retries to do in case of failure + # before disabling the module + #ap_retries=10 + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: ap.clients - description: Connected clients to ${ssid} on ${dev} - unit: "clients" - chart_type: line - dimensions: - - name: clients - - name: ap.net - description: Bandwidth for ${ssid} on ${dev} - unit: "kilobits/s" - chart_type: area - dimensions: - - name: received - - name: sent - - name: ap.packets - description: Packets for ${ssid} on ${dev} - unit: "packets/s" - chart_type: line - dimensions: - - name: received - - name: sent - - name: ap.issues - description: Transmit Issues for ${ssid} on ${dev} - unit: "issues/s" - chart_type: line - dimensions: - - name: retries - - name: failures - - name: ap.signal - description: Average Signal for ${ssid} on ${dev} - unit: "dBm" - chart_type: line - dimensions: - - name: average signal - - name: ap.bitrate - description: Bitrate for ${ssid} on ${dev} - unit: "Mbps" - chart_type: line - dimensions: - - name: receive - - name: transmit - - name: expected + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: wireless device + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: ap.clients + description: Connected clients to ${ssid} on ${dev} + unit: "clients" + chart_type: line + dimensions: + - name: clients + - name: ap.net + description: Bandwidth for ${ssid} on ${dev} + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: ap.packets + description: Packets for ${ssid} on ${dev} + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ap.issues + description: Transmit Issues for ${ssid} on ${dev} + unit: "issues/s" + chart_type: line + dimensions: + - name: retries + - name: failures + - name: ap.signal + description: Average Signal for ${ssid} on ${dev} + unit: "dBm" + chart_type: line + dimensions: + - name: average signal + - name: ap.bitrate + description: Bitrate for ${ssid} on ${dev} + unit: "Mbps" + chart_type: line + dimensions: + - name: receive + - name: transmit + - name: expected diff --git a/collectors/charts.d.plugin/ap/metrics.csv b/collectors/charts.d.plugin/ap/metrics.csv deleted file mode 100644 index 8428cf6d..00000000 --- a/collectors/charts.d.plugin/ap/metrics.csv +++ /dev/null @@ -1,7 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -ap.clients,wireless device,clients,clients,"Connected clients to ${ssid} on ${dev}",line,,charts.d.plugin,ap -ap.net,wireless device,"received, sent",kilobits/s,"Bandwidth for ${ssid} on ${dev}",area,,charts.d.plugin,ap -ap.packets,wireless device,"received, sent",packets/s,"Packets for ${ssid} on ${dev}",line,,charts.d.plugin,ap -ap.issues,wireless device,"retries, failures",issues/s,"Transmit Issues for ${ssid} on ${dev}",line,,charts.d.plugin,ap -ap.signal,wireless device,"average signal",dBm,"Average Signal for ${ssid} on ${dev}",line,,charts.d.plugin,ap -ap.bitrate,wireless device,"receive, transmit, expected",Mbps,"Bitrate for ${ssid} on ${dev}",line,,charts.d.plugin,ap
\ No newline at end of file diff --git a/collectors/charts.d.plugin/apcupsd/metadata.yaml b/collectors/charts.d.plugin/apcupsd/metadata.yaml index 203f0482..d078074b 100644 --- a/collectors/charts.d.plugin/apcupsd/metadata.yaml +++ b/collectors/charts.d.plugin/apcupsd/metadata.yaml @@ -1,144 +1,198 @@ -meta: - plugin_name: charts.d.plugin - module_name: apcupsd - monitored_instance: - name: APC UPS - link: '' - categories: - - data-collection.ups - icon_filename: 'apc.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor APC UPS performance with Netdata for optimal uninterruptible power supply operations. Enhance your power supply reliability with real-time APC UPS metrics.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: apcupsd_ups_charge - link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf - metric: apcupsd.charge - info: average UPS charge over the last minute - os: "*" -- name: apcupsd_10min_ups_load - link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf - metric: apcupsd.load - info: average UPS load over the last 10 minutes - os: "*" -- name: apcupsd_last_collected_secs - link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf - metric: apcupsd.load - info: number of seconds since the last successful data collection -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: ups - description: "" - labels: [] +plugin_name: charts.d.plugin +modules: + - meta: + plugin_name: charts.d.plugin + module_name: apcupsd + monitored_instance: + name: APC UPS + link: "https://www.apc.com" + categories: + - data-collection.ups + icon_filename: "apc.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - ups + - apc + - power + - supply + - battery + - apcupsd + most_popular: false + overview: + data_collection: + metrics_description: "Monitor APC UPS performance with Netdata for optimal uninterruptible power supply operations. Enhance your power supply reliability with real-time APC UPS metrics." + method_description: "The collector uses the `apcaccess` tool to contact the `apcupsd` daemon and get the APC UPS statistics." + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "By default, with no configuration provided, the collector will try to contact 127.0.0.1:3551 with using the `apcaccess` utility." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: "Required software" + description: "Make sure the `apcaccess` and `apcupsd` are installed and running." + configuration: + file: + name: charts.d/apcupsd.conf + options: + description: | + The config file is sourced by the charts.d plugin. It's a standard bash file. + + The following collapsed table contains all the options that can be configured for the apcupsd collector. + folding: + title: "Config options" + enabled: true + list: + - name: apcupsd_sources + description: This is an array of apcupsd sources. You can have multiple entries there. Please refer to the example below on how to set it. + default_value: "127.0.0.1:3551" + required: false + - name: apcupsd_timeout + description: How long to wait for apcupsd to respond. + default_value: 3 + required: false + - name: apcupsd_update_every + description: The data collection frequency. If unset, will inherit the netdata update frequency. + default_value: 1 + required: false + - name: apcupsd_priority + description: The charts priority on the dashboard. + default_value: 90000 + required: false + - name: apcupsd_retries + description: The number of retries to do in case of failure before disabling the collector. + default_value: 10 + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Multiple apcupsd sources + description: Specify a multiple apcupsd sources along with a custom update interval + config: | + # add all your APC UPSes in this array - uncomment it too + declare -A apcupsd_sources=( + ["local"]="127.0.0.1:3551", + ["remote"]="1.2.3.4:3551" + ) + + # how long to wait for apcupsd to respond + #apcupsd_timeout=3 + + # the data collection frequency + # if unset, will inherit the netdata update frequency + apcupsd_update_every=5 + + # the charts priority on the dashboard + #apcupsd_priority=90000 + + # the number of retries to do in case of failure + # before disabling the module + #apcupsd_retries=10 + troubleshooting: + problems: + list: [] + alerts: + - name: apcupsd_ups_charge + link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf + metric: apcupsd.charge + info: average UPS charge over the last minute + os: "*" + - name: apcupsd_10min_ups_load + link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf + metric: apcupsd.load + info: average UPS load over the last 10 minutes + os: "*" + - name: apcupsd_last_collected_secs + link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf + metric: apcupsd.load + info: number of seconds since the last successful data collection metrics: - - name: apcupsd.charge - description: UPS Charge - unit: "percentage" - chart_type: area - dimensions: - - name: charge - - name: apcupsd.battery.voltage - description: UPS Battery Voltage - unit: "Volts" - chart_type: line - dimensions: - - name: voltage - - name: nominal - - name: apcupsd.input.voltage - description: UPS Input Voltage - unit: "Volts" - chart_type: line - dimensions: - - name: voltage - - name: min - - name: max - - name: apcupsd.output.voltage - description: UPS Output Voltage - unit: "Volts" - chart_type: line - dimensions: - - name: absolute - - name: nominal - - name: apcupsd.input.frequency - description: UPS Input Voltage - unit: "Hz" - chart_type: line - dimensions: - - name: frequency - - name: apcupsd.load - description: UPS Load - unit: "percentage" - chart_type: area - dimensions: - - name: load - - name: apcupsd.load_usage - description: UPS Load Usage - unit: "Watts" - chart_type: area - dimensions: - - name: load - - name: apcupsd.temperature - description: UPS Temperature - unit: "Celsius" - chart_type: line - dimensions: - - name: temp - - name: apcupsd.time - description: UPS Time Remaining - unit: "Minutes" - chart_type: area - dimensions: - - name: time - - name: apcupsd.online - description: UPS ONLINE flag - unit: "boolean" - chart_type: line - dimensions: - - name: online + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: ups + description: "Metrics related to UPS. Each UPS provides its own set of the following metrics." + labels: [] + metrics: + - name: apcupsd.charge + description: UPS Charge + unit: "percentage" + chart_type: area + dimensions: + - name: charge + - name: apcupsd.battery.voltage + description: UPS Battery Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: voltage + - name: nominal + - name: apcupsd.input.voltage + description: UPS Input Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: voltage + - name: min + - name: max + - name: apcupsd.output.voltage + description: UPS Output Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: absolute + - name: nominal + - name: apcupsd.input.frequency + description: UPS Input Voltage + unit: "Hz" + chart_type: line + dimensions: + - name: frequency + - name: apcupsd.load + description: UPS Load + unit: "percentage" + chart_type: area + dimensions: + - name: load + - name: apcupsd.load_usage + description: UPS Load Usage + unit: "Watts" + chart_type: area + dimensions: + - name: load + - name: apcupsd.temperature + description: UPS Temperature + unit: "Celsius" + chart_type: line + dimensions: + - name: temp + - name: apcupsd.time + description: UPS Time Remaining + unit: "Minutes" + chart_type: area + dimensions: + - name: time + - name: apcupsd.online + description: UPS ONLINE flag + unit: "boolean" + chart_type: line + dimensions: + - name: online diff --git a/collectors/charts.d.plugin/apcupsd/metrics.csv b/collectors/charts.d.plugin/apcupsd/metrics.csv deleted file mode 100644 index 828abf1f..00000000 --- a/collectors/charts.d.plugin/apcupsd/metrics.csv +++ /dev/null @@ -1,11 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -apcupsd.charge,ups,charge,percentage,"UPS Charge",area,,charts.d.plugin,apcupsd -apcupsd.battery.voltage,ups,"voltage, nominal",Volts,"UPS Battery Voltage",line,,charts.d.plugin,apcupsd -apcupsd.input.voltage,ups,"voltage, min, max",Volts,"UPS Input Voltage",line,,charts.d.plugin,apcupsd -apcupsd.output.voltage,ups,"absolute, nominal",Volts,"UPS Output Voltage",line,,charts.d.plugin,apcupsd -apcupsd.input.frequency,ups,frequency,Hz,"UPS Input Voltage",line,,charts.d.plugin,apcupsd -apcupsd.load,ups,load,percentage,"UPS Load",area,,charts.d.plugin,apcupsd -apcupsd.load_usage,ups,load,Watts,"UPS Load Usage",area,,charts.d.plugin,apcupsd -apcupsd.temperature,ups,temp,Celsius,"UPS Temperature",line,,charts.d.plugin,apcupsd -apcupsd.time,ups,time,Minutes,"UPS Time Remaining",area,,charts.d.plugin,apcupsd -apcupsd.online,ups,online,boolean,"UPS ONLINE flag",line,,charts.d.plugin,apcupsd
\ No newline at end of file diff --git a/collectors/charts.d.plugin/libreswan/metadata.yaml b/collectors/charts.d.plugin/libreswan/metadata.yaml index 480db363..484d79ed 100644 --- a/collectors/charts.d.plugin/libreswan/metadata.yaml +++ b/collectors/charts.d.plugin/libreswan/metadata.yaml @@ -1,79 +1,143 @@ -meta: - plugin_name: charts.d.plugin - module_name: libreswan - monitored_instance: - name: Libreswan - link: '' - categories: - - data-collection.vpns - icon_filename: 'libreswan.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Libreswan performance for optimal IPsec VPN operations. Improve your VPN operations with Netdata''s real-time metrics and built-in alerts.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: IPSEC tunnel - description: "" - labels: [] +plugin_name: charts.d.plugin +modules: + - meta: + plugin_name: charts.d.plugin + module_name: libreswan + monitored_instance: + name: Libreswan + link: "https://libreswan.org/" + categories: + - data-collection.vpns + icon_filename: "libreswan.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - vpn + - libreswan + - network + - ipsec + most_popular: false + overview: + data_collection: + metrics_description: "Monitor Libreswan performance for optimal IPsec VPN operations. Improve your VPN operations with Netdata''s real-time metrics and built-in alerts." + method_description: "The collector uses the `ipsec` command to collect the information it needs." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: "Permissions to execute `ipsec`" + description: | + The plugin executes 2 commands to collect all the information it needs: + + ```sh + ipsec whack --status + ipsec whack --trafficstatus + ``` + + The first command is used to extract the currently established tunnels, their IDs and their names. + The second command is used to extract the current uptime and traffic. + + Most probably user `netdata` will not be able to query libreswan, so the `ipsec` commands will be denied. + The plugin attempts to run `ipsec` as `sudo ipsec ...`, to get access to libreswan statistics. + + To allow user `netdata` execute `sudo ipsec ...`, create the file `/etc/sudoers.d/netdata` with this content: + + ``` + netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --status + netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --trafficstatus + ``` + + Make sure the path `/sbin/ipsec` matches your setup (execute `which ipsec` to find the right path). + configuration: + file: + name: charts.d/libreswan.conf + options: + description: | + The config file is sourced by the charts.d plugin. It's a standard bash file. + + The following collapsed table contains all the options that can be configured for the libreswan collector. + folding: + title: "Config options" + enabled: true + list: + - name: libreswan_update_every + description: The data collection frequency. If unset, will inherit the netdata update frequency. + default_value: 1 + required: false + - name: libreswan_priority + description: The charts priority on the dashboard + default_value: 90000 + required: false + - name: libreswan_retries + description: The number of retries to do in case of failure before disabling the collector. + default_value: 10 + required: false + - name: libreswan_sudo + description: Whether to run `ipsec` with `sudo` or not. + default_value: 1 + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Run `ipsec` without sudo + description: Run the `ipsec` utility without sudo + config: | + # the data collection frequency + # if unset, will inherit the netdata update frequency + #libreswan_update_every=1 + + # the charts priority on the dashboard + #libreswan_priority=90000 + + # the number of retries to do in case of failure + # before disabling the module + #libreswan_retries=10 + + # set to 1, to run ipsec with sudo (the default) + # set to 0, to run ipsec without sudo + libreswan_sudo=0 + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: libreswan.net - description: LibreSWAN Tunnel ${name} Traffic - unit: "kilobits/s" - chart_type: area - dimensions: - - name: in - - name: out - - name: libreswan.uptime - description: LibreSWAN Tunnel ${name} Uptime - unit: "seconds" - chart_type: line - dimensions: - - name: uptime + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: IPSEC tunnel + description: "Metrics related to IPSEC tunnels. Each tunnel provides its own set of the following metrics." + labels: [] + metrics: + - name: libreswan.net + description: LibreSWAN Tunnel ${name} Traffic + unit: "kilobits/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: libreswan.uptime + description: LibreSWAN Tunnel ${name} Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: uptime diff --git a/collectors/charts.d.plugin/libreswan/metrics.csv b/collectors/charts.d.plugin/libreswan/metrics.csv deleted file mode 100644 index e81c43b2..00000000 --- a/collectors/charts.d.plugin/libreswan/metrics.csv +++ /dev/null @@ -1,3 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -libreswan.net,IPSEC tunnel,"in, out",kilobits/s,"LibreSWAN Tunnel ${name} Traffic",area,,charts.d.plugin,libreswan -libreswan.uptime,IPSEC tunnel,uptime,seconds,"LibreSWAN Tunnel ${name} Uptime",line,,charts.d.plugin,libreswan
\ No newline at end of file diff --git a/collectors/charts.d.plugin/nut/metadata.yaml b/collectors/charts.d.plugin/nut/metadata.yaml index ce6e5700..ea2e6b2e 100644 --- a/collectors/charts.d.plugin/nut/metadata.yaml +++ b/collectors/charts.d.plugin/nut/metadata.yaml @@ -1,152 +1,219 @@ -meta: - plugin_name: charts.d.plugin - module_name: nut - monitored_instance: - name: UPS/PDU - link: '' - categories: - - data-collection.ups - icon_filename: 'plug-circle-bolt.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Examine UPS/PDU metrics with Netdata for insights into power device performance. Improve your power device performance with comprehensive dashboards and anomaly detection.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: nut_ups_charge - link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf - metric: nut.charge - info: average UPS charge over the last minute - os: "*" -- name: nut_10min_ups_load - link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf - metric: nut.load - info: average UPS load over the last 10 minutes - os: "*" -- name: nut_last_collected_secs - link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf - metric: nut.load - info: number of seconds since the last successful data collection -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: ups - description: "" - labels: [] +plugin_name: charts.d.plugin +modules: + - meta: + plugin_name: charts.d.plugin + module_name: nut + monitored_instance: + name: Network UPS Tools (NUT) + link: '' + categories: + - data-collection.ups + icon_filename: 'plug-circle-bolt.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - nut + - network ups tools + - ups + - pdu + most_popular: false + overview: + data_collection: + metrics_description: 'Examine UPS/PDU metrics with Netdata for insights into power device performance. Improve your power device performance with comprehensive dashboards and anomaly detection.' + method_description: 'This collector uses the `nut` (Network UPS Tools) to query statistics for multiple UPS devices.' + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: + - title: "Required software" + description: "Make sure the Network UPS Tools (`nut`) is installed and can detect your UPS devices." + configuration: + file: + name: charts.d/nut.conf + options: + description: | + The config file is sourced by the charts.d plugin. It's a standard bash file. + + The following collapsed table contains all the options that can be configured for the nut collector. + folding: + title: "Config options" + enabled: true + list: + - name: nut_ups + description: A space separated list of UPS names. If empty, the list returned by `upsc -l` will be used. + default_value: "" + required: false + - name: nut_names + description: Each line represents an alias for one UPS. If empty, the FQDN will be used. + default_value: "" + required: false + - name: nut_timeout + description: How long to wait for nut to respond. + default_value: 2 + required: false + - name: nut_clients_chart + description: Set this to 1 to enable another chart showing the number of UPS clients connected to `upsd`. + default_value: 1 + required: false + - name: nut_update_every + description: The data collection frequency. If unset, will inherit the netdata update frequency. + default_value: 2 + required: false + - name: nut_priority + description: The charts priority on the dashboard + default_value: 90000 + required: false + - name: nut_retries + description: The number of retries to do in case of failure before disabling the collector. + default_value: 10 + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Provide names to UPS devices + description: Map aliases to UPS devices + config: | + # a space separated list of UPS names + # if empty, the list returned by 'upsc -l' will be used + #nut_ups= + + # each line represents an alias for one UPS + # if empty, the FQDN will be used + nut_names["XXXXXX"]="UPS-office" + nut_names["YYYYYY"]="UPS-rack" + + # how much time in seconds, to wait for nut to respond + #nut_timeout=2 + + # set this to 1, to enable another chart showing the number + # of UPS clients connected to upsd + #nut_clients_chart=1 + + # the data collection frequency + # if unset, will inherit the netdata update frequency + #nut_update_every=2 + + # the charts priority on the dashboard + #nut_priority=90000 + + # the number of retries to do in case of failure + # before disabling the module + #nut_retries=10 + troubleshooting: + problems: + list: [] + alerts: + - name: nut_ups_charge + link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf + metric: nut.charge + info: average UPS charge over the last minute + os: "*" + - name: nut_10min_ups_load + link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf + metric: nut.load + info: average UPS load over the last 10 minutes + os: "*" + - name: nut_last_collected_secs + link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf + metric: nut.load + info: number of seconds since the last successful data collection metrics: - - name: nut.charge - description: UPS Charge - unit: "percentage" - chart_type: area - dimensions: - - name: charge - - name: nut.runtime - description: UPS Runtime - unit: "seconds" - chart_type: line - dimensions: - - name: runtime - - name: nut.battery.voltage - description: UPS Battery Voltage - unit: "Volts" - chart_type: line - dimensions: - - name: voltage - - name: high - - name: low - - name: nominal - - name: nut.input.voltage - description: UPS Input Voltage - unit: "Volts" - chart_type: line - dimensions: - - name: voltage - - name: fault - - name: nominal - - name: nut.input.current - description: UPS Input Current - unit: "Ampere" - chart_type: line - dimensions: - - name: nominal - - name: nut.input.frequency - description: UPS Input Frequency - unit: "Hz" - chart_type: line - dimensions: - - name: frequency - - name: nominal - - name: nut.output.voltage - description: UPS Output Voltage - unit: "Volts" - chart_type: line - dimensions: - - name: voltage - - name: nut.load - description: UPS Load - unit: "percentage" - chart_type: area - dimensions: - - name: load - - name: nut.load_usage - description: UPS Load Usage - unit: "Watts" - chart_type: area - dimensions: - - name: load_usage - - name: nut.temperature - description: UPS Temperature - unit: "temperature" - chart_type: line - dimensions: - - name: temp - - name: nut.clients - description: UPS Connected Clients - unit: "clients" - chart_type: area - dimensions: - - name: clients + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: ups + description: "Metrics related to UPS. Each UPS provides its own set of the following metrics." + labels: [] + metrics: + - name: nut.charge + description: UPS Charge + unit: "percentage" + chart_type: area + dimensions: + - name: charge + - name: nut.runtime + description: UPS Runtime + unit: "seconds" + chart_type: line + dimensions: + - name: runtime + - name: nut.battery.voltage + description: UPS Battery Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: voltage + - name: high + - name: low + - name: nominal + - name: nut.input.voltage + description: UPS Input Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: voltage + - name: fault + - name: nominal + - name: nut.input.current + description: UPS Input Current + unit: "Ampere" + chart_type: line + dimensions: + - name: nominal + - name: nut.input.frequency + description: UPS Input Frequency + unit: "Hz" + chart_type: line + dimensions: + - name: frequency + - name: nominal + - name: nut.output.voltage + description: UPS Output Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: voltage + - name: nut.load + description: UPS Load + unit: "percentage" + chart_type: area + dimensions: + - name: load + - name: nut.load_usage + description: UPS Load Usage + unit: "Watts" + chart_type: area + dimensions: + - name: load_usage + - name: nut.temperature + description: UPS Temperature + unit: "temperature" + chart_type: line + dimensions: + - name: temp + - name: nut.clients + description: UPS Connected Clients + unit: "clients" + chart_type: area + dimensions: + - name: clients diff --git a/collectors/charts.d.plugin/nut/metrics.csv b/collectors/charts.d.plugin/nut/metrics.csv deleted file mode 100644 index 2abd5725..00000000 --- a/collectors/charts.d.plugin/nut/metrics.csv +++ /dev/null @@ -1,12 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -nut.charge,ups,charge,percentage,"UPS Charge",area,,charts.d.plugin,nut -nut.runtime,ups,runtime,seconds,"UPS Runtime",line,,charts.d.plugin,nut -nut.battery.voltage,ups,"voltage, high, low, nominal",Volts,"UPS Battery Voltage",line,,charts.d.plugin,nut -nut.input.voltage,ups,"voltage, fault, nominal",Volts,"UPS Input Voltage",line,,charts.d.plugin,nut -nut.input.current,ups,nominal,Ampere,"UPS Input Current",line,,charts.d.plugin,nut -nut.input.frequency,ups,"frequency, nominal",Hz,"UPS Input Frequency",line,,charts.d.plugin,nut -nut.output.voltage,ups,voltage,Volts,"UPS Output Voltage",line,,charts.d.plugin,nut -nut.load,ups,load,percentage,"UPS Load",area,,charts.d.plugin,nut -nut.load_usage,ups,load_usage,Watts,"UPS Load Usage",area,,charts.d.plugin,nut -nut.temperature,ups,temp,temperature,"UPS Temperature",line,,charts.d.plugin,nut -nut.clients,ups,clients,clients,"UPS Connected Clients",area,,charts.d.plugin,nut
\ No newline at end of file diff --git a/collectors/charts.d.plugin/opensips/metadata.yaml b/collectors/charts.d.plugin/opensips/metadata.yaml index 90a2ebe7..27f66328 100644 --- a/collectors/charts.d.plugin/opensips/metadata.yaml +++ b/collectors/charts.d.plugin/opensips/metadata.yaml @@ -1,215 +1,267 @@ -meta: - plugin_name: charts.d.plugin - module_name: opensips - monitored_instance: - name: OpenSIPS - link: '' - categories: - - data-collection.telephony-servers - icon_filename: 'opensips.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Examine OpenSIPS metrics for insights into SIP server operations. Study call rates, error rates, and response times for reliable voice over IP services.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: charts.d.plugin +modules: + - meta: + plugin_name: charts.d.plugin + module_name: opensips + monitored_instance: + name: OpenSIPS + link: "https://opensips.org/" + categories: + - data-collection.telephony-servers + icon_filename: "opensips.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - opensips + - sip + - voice + - video + - stream + most_popular: false + overview: + data_collection: + metrics_description: "Examine OpenSIPS metrics for insights into SIP server operations. Study call rates, error rates, and response times for reliable voice over IP services." + method_description: "The collector uses the `opensipsctl` command line utility to gather OpenSIPS metrics." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "The collector will attempt to call `opensipsctl` along with a default number of parameters, even without any configuration." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: "Required software" + description: "The collector requires the `opensipsctl` to be installed." + configuration: + file: + name: charts.d/opensips.conf + options: + description: | + The config file is sourced by the charts.d plugin. It's a standard bash file. + + The following collapsed table contains all the options that can be configured for the opensips collector. + folding: + title: "Config options" + enabled: true + list: + - name: opensips_opts + description: Specify parameters to the `opensipsctl` command. If the default value fails to get global status, set here whatever options are needed to connect to the opensips server. + default_value: "fifo get_statistics all" + required: false + - name: opensips_cmd + description: If `opensipsctl` is not in $PATH, specify it's full path here. + default_value: "" + required: false + - name: opensips_timeout + description: How long to wait for `opensipsctl` to respond. + default_value: 2 + required: false + - name: opensips_update_every + description: The data collection frequency. If unset, will inherit the netdata update frequency. + default_value: 5 + required: false + - name: opensips_priority + description: The charts priority on the dashboard. + default_value: 80000 + required: false + - name: opensips_retries + description: The number of retries to do in case of failure before disabling the collector. + default_value: 10 + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Custom `opensipsctl` command + description: Set a custom path to the `opensipsctl` command + config: | + #opensips_opts="fifo get_statistics all" + opensips_cmd=/opt/opensips/bin/opensipsctl + #opensips_timeout=2 + + # the data collection frequency + # if unset, will inherit the netdata update frequency + #opensips_update_every=5 + + # the charts priority on the dashboard + #opensips_priority=80000 + + # the number of retries to do in case of failure + # before disabling the module + #opensips_retries=10 + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: opensips.dialogs_active - description: OpenSIPS Active Dialogs - unit: "dialogs" - chart_type: area - dimensions: - - name: active - - name: early - - name: opensips.users - description: OpenSIPS Users - unit: "users" - chart_type: line - dimensions: - - name: registered - - name: location - - name: contacts - - name: expires - - name: opensips.registrar - description: OpenSIPS Registrar - unit: "registrations/s" - chart_type: line - dimensions: - - name: accepted - - name: rejected - - name: opensips.transactions - description: OpenSIPS Transactions - unit: "transactions/s" - chart_type: line - dimensions: - - name: UAS - - name: UAC - - name: opensips.core_rcv - description: OpenSIPS Core Receives - unit: "queries/s" - chart_type: line - dimensions: - - name: requests - - name: replies - - name: opensips.core_fwd - description: OpenSIPS Core Forwards - unit: "queries/s" - chart_type: line - dimensions: - - name: requests - - name: replies - - name: opensips.core_drop - description: OpenSIPS Core Drops - unit: "queries/s" - chart_type: line - dimensions: - - name: requests - - name: replies - - name: opensips.core_err - description: OpenSIPS Core Errors - unit: "queries/s" - chart_type: line - dimensions: - - name: requests - - name: replies - - name: opensips.core_bad - description: OpenSIPS Core Bad - unit: "queries/s" - chart_type: line - dimensions: - - name: bad_URIs_rcvd - - name: unsupported_methods - - name: bad_msg_hdr - - name: opensips.tm_replies - description: OpenSIPS TM Replies - unit: "replies/s" - chart_type: line - dimensions: - - name: received - - name: relayed - - name: local - - name: opensips.transactions_status - description: OpenSIPS Transactions Status - unit: "transactions/s" - chart_type: line - dimensions: - - name: 2xx - - name: 3xx - - name: 4xx - - name: 5xx - - name: 6xx - - name: opensips.transactions_inuse - description: OpenSIPS InUse Transactions - unit: "transactions" - chart_type: line - dimensions: - - name: inuse - - name: opensips.sl_replies - description: OpenSIPS SL Replies - unit: "replies/s" - chart_type: line - dimensions: - - name: 1xx - - name: 2xx - - name: 3xx - - name: 4xx - - name: 5xx - - name: 6xx - - name: sent - - name: error - - name: ACKed - - name: opensips.dialogs - description: OpenSIPS Dialogs - unit: "dialogs/s" - chart_type: line - dimensions: - - name: processed - - name: expire - - name: failed - - name: opensips.net_waiting - description: OpenSIPS Network Waiting - unit: "kilobytes" - chart_type: line - dimensions: - - name: UDP - - name: TCP - - name: opensips.uri_checks - description: OpenSIPS URI Checks - unit: "checks / sec" - chart_type: line - dimensions: - - name: positive - - name: negative - - name: opensips.traces - description: OpenSIPS Traces - unit: "traces / sec" - chart_type: line - dimensions: - - name: requests - - name: replies - - name: opensips.shmem - description: OpenSIPS Shared Memory - unit: "kilobytes" - chart_type: line - dimensions: - - name: total - - name: used - - name: real_used - - name: max_used - - name: free - - name: opensips.shmem_fragment - description: OpenSIPS Shared Memory Fragmentation - unit: "fragments" - chart_type: line - dimensions: - - name: fragments + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: opensips.dialogs_active + description: OpenSIPS Active Dialogs + unit: "dialogs" + chart_type: area + dimensions: + - name: active + - name: early + - name: opensips.users + description: OpenSIPS Users + unit: "users" + chart_type: line + dimensions: + - name: registered + - name: location + - name: contacts + - name: expires + - name: opensips.registrar + description: OpenSIPS Registrar + unit: "registrations/s" + chart_type: line + dimensions: + - name: accepted + - name: rejected + - name: opensips.transactions + description: OpenSIPS Transactions + unit: "transactions/s" + chart_type: line + dimensions: + - name: UAS + - name: UAC + - name: opensips.core_rcv + description: OpenSIPS Core Receives + unit: "queries/s" + chart_type: line + dimensions: + - name: requests + - name: replies + - name: opensips.core_fwd + description: OpenSIPS Core Forwards + unit: "queries/s" + chart_type: line + dimensions: + - name: requests + - name: replies + - name: opensips.core_drop + description: OpenSIPS Core Drops + unit: "queries/s" + chart_type: line + dimensions: + - name: requests + - name: replies + - name: opensips.core_err + description: OpenSIPS Core Errors + unit: "queries/s" + chart_type: line + dimensions: + - name: requests + - name: replies + - name: opensips.core_bad + description: OpenSIPS Core Bad + unit: "queries/s" + chart_type: line + dimensions: + - name: bad_URIs_rcvd + - name: unsupported_methods + - name: bad_msg_hdr + - name: opensips.tm_replies + description: OpenSIPS TM Replies + unit: "replies/s" + chart_type: line + dimensions: + - name: received + - name: relayed + - name: local + - name: opensips.transactions_status + description: OpenSIPS Transactions Status + unit: "transactions/s" + chart_type: line + dimensions: + - name: 2xx + - name: 3xx + - name: 4xx + - name: 5xx + - name: 6xx + - name: opensips.transactions_inuse + description: OpenSIPS InUse Transactions + unit: "transactions" + chart_type: line + dimensions: + - name: inuse + - name: opensips.sl_replies + description: OpenSIPS SL Replies + unit: "replies/s" + chart_type: line + dimensions: + - name: 1xx + - name: 2xx + - name: 3xx + - name: 4xx + - name: 5xx + - name: 6xx + - name: sent + - name: error + - name: ACKed + - name: opensips.dialogs + description: OpenSIPS Dialogs + unit: "dialogs/s" + chart_type: line + dimensions: + - name: processed + - name: expire + - name: failed + - name: opensips.net_waiting + description: OpenSIPS Network Waiting + unit: "kilobytes" + chart_type: line + dimensions: + - name: UDP + - name: TCP + - name: opensips.uri_checks + description: OpenSIPS URI Checks + unit: "checks / sec" + chart_type: line + dimensions: + - name: positive + - name: negative + - name: opensips.traces + description: OpenSIPS Traces + unit: "traces / sec" + chart_type: line + dimensions: + - name: requests + - name: replies + - name: opensips.shmem + description: OpenSIPS Shared Memory + unit: "kilobytes" + chart_type: line + dimensions: + - name: total + - name: used + - name: real_used + - name: max_used + - name: free + - name: opensips.shmem_fragment + description: OpenSIPS Shared Memory Fragmentation + unit: "fragments" + chart_type: line + dimensions: + - name: fragments diff --git a/collectors/charts.d.plugin/opensips/metrics.csv b/collectors/charts.d.plugin/opensips/metrics.csv deleted file mode 100644 index 2efab370..00000000 --- a/collectors/charts.d.plugin/opensips/metrics.csv +++ /dev/null @@ -1,20 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -opensips.dialogs_active,,"active, early",dialogs,"OpenSIPS Active Dialogs",area,,charts.d.plugin,opensips -opensips.users,,"registered, location, contacts, expires",users,"OpenSIPS Users",line,,charts.d.plugin,opensips -opensips.registrar,,"accepted, rejected",registrations/s,"OpenSIPS Registrar",line,,charts.d.plugin,opensips -opensips.transactions,,"UAS, UAC",transactions/s,"OpenSIPS Transactions",line,,charts.d.plugin,opensips -opensips.core_rcv,,"requests, replies",queries/s,"OpenSIPS Core Receives",line,,charts.d.plugin,opensips -opensips.core_fwd,,"requests, replies",queries/s,"OpenSIPS Core Forwards",line,,charts.d.plugin,opensips -opensips.core_drop,,"requests, replies",queries/s,"OpenSIPS Core Drops",line,,charts.d.plugin,opensips -opensips.core_err,,"requests, replies",queries/s,"OpenSIPS Core Errors",line,,charts.d.plugin,opensips -opensips.core_bad,,"bad_URIs_rcvd, unsupported_methods, bad_msg_hdr",queries/s,"OpenSIPS Core Bad",line,,charts.d.plugin,opensips -opensips.tm_replies,,"received, relayed, local",replies/s,"OpenSIPS TM Replies",line,,charts.d.plugin,opensips -opensips.transactions_status,,"2xx, 3xx, 4xx, 5xx, 6xx",transactions/s,"OpenSIPS Transactions Status",line,,charts.d.plugin,opensips -opensips.transactions_inuse,,inuse,transactions,"OpenSIPS InUse Transactions",line,,charts.d.plugin,opensips -opensips.sl_replies,,"1xx, 2xx, 3xx, 4xx, 5xx, 6xx, sent, error, ACKed",replies/s,OpenSIPS SL Replies,line,,charts.d.plugin,opensips -opensips.dialogs,,"processed, expire, failed",dialogs/s,"OpenSIPS Dialogs",line,,charts.d.plugin,opensips -opensips.net_waiting,,"UDP, TCP",kilobytes,"OpenSIPS Network Waiting",line,,charts.d.plugin,opensips -opensips.uri_checks,,"positive, negative","checks / sec","OpenSIPS URI Checks",line,,charts.d.plugin,opensips -opensips.traces,,"requests, replies","traces / sec","OpenSIPS Traces",line,,charts.d.plugin,opensips -opensips.shmem,,"total, used, real_used, max_used, free",kilobytes,"OpenSIPS Shared Memory",line,,charts.d.plugin,opensips -opensips.shmem_fragment,,fragments,fragments,"OpenSIPS Shared Memory Fragmentation",line,,charts.d.plugin,opensips
\ No newline at end of file diff --git a/collectors/charts.d.plugin/sensors/metadata.yaml b/collectors/charts.d.plugin/sensors/metadata.yaml index e56b97e6..33beaad2 100644 --- a/collectors/charts.d.plugin/sensors/metadata.yaml +++ b/collectors/charts.d.plugin/sensors/metadata.yaml @@ -1,107 +1,169 @@ -meta: - plugin_name: charts.d.plugin - module_name: sensors - monitored_instance: - name: charts.d sensors - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: sensor chip - description: "" - labels: [] +plugin_name: charts.d.plugin +modules: + - meta: + plugin_name: charts.d.plugin + module_name: sensors + monitored_instance: + name: Linux Sensors (sysfs) + link: "https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface" + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: "microchip.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - sensors + - sysfs + - hwmon + - rpi + - raspberry pi + most_popular: false + overview: + data_collection: + metrics_description: | + Use this collector when `lm-sensors` doesn't work on your device (e.g. for RPi temperatures). + For all other cases use the [Python collector](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/sensors), which supports multiple jobs, is more efficient and performs calculations on top of the kernel provided values." + method_description: | + It will provide charts for all configured system sensors, by reading sensors directly from the kernel. + The values graphed are the raw hardware values of the sensors. + supported_platforms: + include: [Linux] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "By default, the collector will try to read entries under `/sys/devices`" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: charts.d/sensors.conf + options: + description: | + The config file is sourced by the charts.d plugin. It's a standard bash file. + + The following collapsed table contains all the options that can be configured for the sensors collector. + folding: + title: "Config options" + enabled: true + list: + - name: sensors_sys_dir + description: The directory the kernel exposes sensor data. + default_value: "/sys/devices" + required: false + - name: sensors_sys_depth + description: How deep in the tree to check for sensor data. + default_value: 10 + required: false + - name: sensors_source_update + description: If set to 1, the script will overwrite internal script functions with code generated ones. + default_value: 1 + required: false + - name: sensors_update_every + description: The data collection frequency. If unset, will inherit the netdata update frequency. + default_value: 1 + required: false + - name: sensors_priority + description: The charts priority on the dashboard. + default_value: 90000 + required: false + - name: sensors_retries + description: The number of retries to do in case of failure before disabling the collector. + default_value: 10 + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Set sensors path depth + description: Set a different sensors path depth + config: | + # the directory the kernel keeps sensor data + #sensors_sys_dir="/sys/devices" + + # how deep in the tree to check for sensor data + sensors_sys_depth=5 + + # if set to 1, the script will overwrite internal + # script functions with code generated ones + # leave to 1, is faster + #sensors_source_update=1 + + # the data collection frequency + # if unset, will inherit the netdata update frequency + #sensors_update_every= + + # the charts priority on the dashboard + #sensors_priority=90000 + + # the number of retries to do in case of failure + # before disabling the module + #sensors_retries=10 + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: sensors.temp - description: Temperature - unit: "Celsius" - chart_type: line - dimensions: - - name: '{filename}' - - name: sensors.volt - description: Voltage - unit: "Volts" - chart_type: line - dimensions: - - name: '{filename}' - - name: sensors.curr - description: Current - unit: "Ampere" - chart_type: line - dimensions: - - name: '{filename}' - - name: sensors.power - description: Power - unit: "Watt" - chart_type: line - dimensions: - - name: '{filename}' - - name: sensors.fans - description: Fans Speed - unit: "Rotations / Minute" - chart_type: line - dimensions: - - name: '{filename}' - - name: sensors.energy - description: Energy - unit: "Joule" - chart_type: area - dimensions: - - name: '{filename}' - - name: sensors.humidity - description: Humidity - unit: "Percent" - chart_type: line - dimensions: - - name: '{filename}' + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: sensor chip + description: "Metrics related to sensor chips. Each chip provides its own set of the following metrics." + labels: [] + metrics: + - name: sensors.temp + description: Temperature + unit: "Celsius" + chart_type: line + dimensions: + - name: "{filename}" + - name: sensors.volt + description: Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: "{filename}" + - name: sensors.curr + description: Current + unit: "Ampere" + chart_type: line + dimensions: + - name: "{filename}" + - name: sensors.power + description: Power + unit: "Watt" + chart_type: line + dimensions: + - name: "{filename}" + - name: sensors.fans + description: Fans Speed + unit: "Rotations / Minute" + chart_type: line + dimensions: + - name: "{filename}" + - name: sensors.energy + description: Energy + unit: "Joule" + chart_type: area + dimensions: + - name: "{filename}" + - name: sensors.humidity + description: Humidity + unit: "Percent" + chart_type: line + dimensions: + - name: "{filename}" diff --git a/collectors/charts.d.plugin/sensors/metrics.csv b/collectors/charts.d.plugin/sensors/metrics.csv deleted file mode 100644 index 5b5a4c57..00000000 --- a/collectors/charts.d.plugin/sensors/metrics.csv +++ /dev/null @@ -1,8 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -sensors.temp,sensor chip,"{filename}",Celsius,"Temperature",line,,charts.d.plugin,sensors -sensors.volt,sensor chip,"{filename}",Volts,"Voltage",line,,charts.d.plugin,sensors -sensors.curr,sensor chip,"{filename}",Ampere,"Current",line,,charts.d.plugin,sensors -sensors.power,sensor chip,"{filename}",Watt,"Power",line,,charts.d.plugin,sensors -sensors.fans,sensor chip,"{filename}","Rotations / Minute","Fans Speed",line,,charts.d.plugin,sensors -sensors.energy,sensor chip,"{filename}",Joule,"Energy",area,,charts.d.plugin,sensors -sensors.humidity,sensor chip,"{filename}",Percent,"Humidity",line,,charts.d.plugin,sensors
\ No newline at end of file diff --git a/collectors/cups.plugin/metadata.yaml b/collectors/cups.plugin/metadata.yaml index c8a7e083..a416d392 100644 --- a/collectors/cups.plugin/metadata.yaml +++ b/collectors/cups.plugin/metadata.yaml @@ -1,118 +1,131 @@ -meta: - plugin_name: cups.plugin - module_name: cups.plugin - monitored_instance: - name: CUPS - link: '' - categories: - - data-collection.hardware-devices-and-sensors - icon_filename: 'cups.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor CUPS performance for achieving optimal printing system operations. Monitor job statuses, queue lengths, and error rates to ensure smooth printing tasks.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: cups.plugin +modules: + - meta: + plugin_name: cups.plugin + module_name: cups.plugin + monitored_instance: + name: CUPS + link: "https://www.cups.org/" + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: "cups.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: "Monitor CUPS performance for achieving optimal printing system operations. Monitor job statuses, queue lengths, and error rates to ensure smooth printing tasks." + method_description: "The plugin uses CUPS shared library to connect and monitor the server." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs to access the server. Netdata sets permissions during installation time to reach the server through its library." + default_behavior: + auto_detection: + description: "The plugin detects when CUPS server is running and tries to connect to it." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Minimum setup + description: "The CUPS server must be installed and running." + configuration: + file: + name: "netdata.conf" + section_name: "[plugin:cups]" + description: "The netdata main configuration file." + options: + description: "" + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 1 + required: false + - name: command options + description: Additional parameters for the collector + default_value: "" + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: cups.dests_state - description: Destinations by state - unit: "dests" - chart_type: stacked - dimensions: - - name: idle - - name: printing - - name: stopped - - name: cups.dests_option - description: Destinations by option - unit: "dests" - chart_type: line - dimensions: - - name: total - - name: acceptingjobs - - name: shared - - name: cups.job_num - description: Active jobs - unit: "jobs" - chart_type: stacked - dimensions: - - name: pending - - name: held - - name: processing - - name: cups.job_size - description: Active jobs size - unit: "KB" - chart_type: stacked - dimensions: - - name: pending - - name: held - - name: processing - - name: destination - description: "" - labels: [] - metrics: - - name: cups.destination_job_num - description: Active jobs of {destination} - unit: "jobs" - chart_type: stacked - dimensions: - - name: pending - - name: held - - name: processing - - name: cups.destination_job_size - description: Active jobs size of {destination} - unit: "KB" - chart_type: stacked - dimensions: - - name: pending - - name: held - - name: processing + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: cups.dests_state + description: Destinations by state + unit: "dests" + chart_type: stacked + dimensions: + - name: idle + - name: printing + - name: stopped + - name: cups.dests_option + description: Destinations by option + unit: "dests" + chart_type: line + dimensions: + - name: total + - name: acceptingjobs + - name: shared + - name: cups.job_num + description: Active jobs + unit: "jobs" + chart_type: stacked + dimensions: + - name: pending + - name: held + - name: processing + - name: cups.job_size + description: Active jobs size + unit: "KB" + chart_type: stacked + dimensions: + - name: pending + - name: held + - name: processing + - name: destination + description: "" + labels: [] + metrics: + - name: cups.destination_job_num + description: Active jobs of {destination} + unit: "jobs" + chart_type: stacked + dimensions: + - name: pending + - name: held + - name: processing + - name: cups.destination_job_size + description: Active jobs size of {destination} + unit: "KB" + chart_type: stacked + dimensions: + - name: pending + - name: held + - name: processing diff --git a/collectors/cups.plugin/metrics.csv b/collectors/cups.plugin/metrics.csv deleted file mode 100644 index 0262f58a..00000000 --- a/collectors/cups.plugin/metrics.csv +++ /dev/null @@ -1,7 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -cups.dests_state,,"idle, printing, stopped",dests,"Destinations by state",stacked,,cups.plugin, -cups.dests_option,,"total, acceptingjobs, shared",dests,"Destinations by option",line,,cups.plugin, -cups.job_num,,"pending, held, processing",jobs,"Active jobs",stacked,,cups.plugin, -cups.job_size,,"pending, held, processing",KB,"Active jobs size",stacked,,cups.plugin, -cups.destination_job_num,destination,"pending, held, processing",jobs,"Active jobs of {destination}",stacked,,cups.plugin, -cups.destination_job_size,destination,"pending, held, processing",KB,"Active jobs size of {destination}",stacked,,cups.plugin,
\ No newline at end of file diff --git a/collectors/debugfs.plugin/debugfs_plugin.c b/collectors/debugfs.plugin/debugfs_plugin.c index 1c5bf106..c189f908 100644 --- a/collectors/debugfs.plugin/debugfs_plugin.c +++ b/collectors/debugfs.plugin/debugfs_plugin.c @@ -20,6 +20,9 @@ static struct debugfs_module { .func = do_debugfs_extfrag}, { .name = "/sys/kernel/debug/zswap", .enabled = CONFIG_BOOLEAN_YES, .func = do_debugfs_zswap}, + // Linux powercap metrics is here because it needs privilege to read each RAPL zone + { .name = "/sys/devices/virtual/powercap", .enabled = CONFIG_BOOLEAN_YES, + .func = do_sys_devices_virtual_powercap}, // The terminator { .name = NULL, .enabled = CONFIG_BOOLEAN_NO, .func = NULL} diff --git a/collectors/debugfs.plugin/debugfs_plugin.h b/collectors/debugfs.plugin/debugfs_plugin.h index c53187d6..903e4a19 100644 --- a/collectors/debugfs.plugin/debugfs_plugin.h +++ b/collectors/debugfs.plugin/debugfs_plugin.h @@ -9,6 +9,7 @@ int do_debugfs_extfrag(int update_every, const char *name); int do_debugfs_zswap(int update_every, const char *name); +int do_sys_devices_virtual_powercap(int update_every, const char *name); void debugfs2lower(char *name); const char *debugfs_rrdset_type_name(RRDSET_TYPE chart_type); const char *debugfs_rrd_algorithm_name(RRD_ALGORITHM algorithm); diff --git a/collectors/debugfs.plugin/debugfs_zswap.c b/collectors/debugfs.plugin/debugfs_zswap.c index c8fc0f03..502a04f1 100644 --- a/collectors/debugfs.plugin/debugfs_zswap.c +++ b/collectors/debugfs.plugin/debugfs_zswap.c @@ -38,7 +38,7 @@ static struct netdata_zswap_metric zswap_calculated_metrics[] = { .charttype = RRDSET_TYPE_LINE, .enabled = CONFIG_BOOLEAN_YES, .chart_created = CONFIG_BOOLEAN_NO, - .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_COMPRESS_RATIO, + .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_COMPRESS_RATIO, .divisor = 100, .convertv = NULL, .value = -1}, @@ -71,7 +71,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = { .charttype = RRDSET_TYPE_AREA, .enabled = CONFIG_BOOLEAN_YES, .chart_created = CONFIG_BOOLEAN_NO, - .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_TOT_SIZE, + .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_TOT_SIZE, .divisor = 1, .convertv = NULL, .value = -1}, @@ -84,7 +84,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = { .charttype = RRDSET_TYPE_AREA, .enabled = CONFIG_BOOLEAN_YES, .chart_created = CONFIG_BOOLEAN_NO, - .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_STORED_PAGE, + .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_STORED_PAGE, .divisor = 1, .convertv = pages_to_bytes, .value = -1}, @@ -97,7 +97,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = { .charttype = RRDSET_TYPE_LINE, .enabled = CONFIG_BOOLEAN_YES, .chart_created = CONFIG_BOOLEAN_NO, - .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_LIM_HIT, + .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_LIM_HIT, .divisor = 1, .convertv = NULL, .value = -1}, @@ -110,7 +110,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = { .charttype = RRDSET_TYPE_AREA, .enabled = CONFIG_BOOLEAN_YES, .chart_created = CONFIG_BOOLEAN_NO, - .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_WRT_BACK_PAGES, + .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_WRT_BACK_PAGES, .divisor = 1, .convertv = pages_to_bytes, .value = -1}, @@ -123,7 +123,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = { .charttype = RRDSET_TYPE_AREA, .enabled = CONFIG_BOOLEAN_YES, .chart_created = CONFIG_BOOLEAN_NO, - .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_SAME_FILL_PAGE, + .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_SAME_FILL_PAGE, .divisor = 1, .convertv = pages_to_bytes, .value = -1}, @@ -136,7 +136,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = { .charttype = RRDSET_TYPE_LINE, .enabled = CONFIG_BOOLEAN_YES, .chart_created = CONFIG_BOOLEAN_NO, - .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_DUPP_ENTRY, + .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_DUPP_ENTRY, .divisor = 1, .convertv = NULL, .value = -1}, @@ -175,7 +175,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = { .charttype = RRDSET_TYPE_STACKED, .enabled = CONFIG_BOOLEAN_YES, .chart_created = CONFIG_BOOLEAN_NO, - .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS, + .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS, .divisor = 1, .convertv = NULL, .value = -1}, @@ -188,7 +188,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = { .charttype = RRDSET_TYPE_STACKED, .enabled = CONFIG_BOOLEAN_YES, .chart_created = CONFIG_BOOLEAN_NO, - .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS, + .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS, .divisor = 1, .convertv = NULL, .value = -1}, @@ -201,7 +201,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = { .charttype = RRDSET_TYPE_STACKED, .enabled = CONFIG_BOOLEAN_YES, .chart_created = CONFIG_BOOLEAN_NO, - .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS, + .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS, .divisor = 1, .convertv = NULL, .value = -1}, @@ -214,7 +214,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = { .charttype = RRDSET_TYPE_STACKED, .enabled = CONFIG_BOOLEAN_YES, .chart_created = CONFIG_BOOLEAN_NO, - .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS, + .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS, .divisor = 1, .convertv = NULL, .value = -1}, @@ -227,7 +227,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = { .charttype = RRDSET_TYPE_STACKED, .enabled = CONFIG_BOOLEAN_YES, .chart_created = CONFIG_BOOLEAN_NO, - .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS, + .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS, .divisor = 1, .convertv = NULL, .value = -1}, @@ -266,7 +266,7 @@ zswap_send_chart(struct netdata_zswap_metric *metric, int update_every, const ch { fprintf( stdout, - "CHART system.zswap_%s '' '%s' '%s' 'zswap' '' '%s' %d %d '%s' 'debugfs.plugin' '%s'\n", + "CHART mem.zswap_%s '' '%s' '%s' 'zswap' '' '%s' %d %d '%s' 'debugfs.plugin' '%s'\n", metric->chart_id, metric->title, metric->units, @@ -291,7 +291,7 @@ static void zswap_send_dimension(struct netdata_zswap_metric *metric) static void zswap_send_begin(struct netdata_zswap_metric *metric) { - fprintf(stdout, "BEGIN system.zswap_%s\n", metric->chart_id); + fprintf(stdout, "BEGIN mem.zswap_%s\n", metric->chart_id); } static void zswap_send_set(struct netdata_zswap_metric *metric) diff --git a/collectors/debugfs.plugin/metadata.yaml b/collectors/debugfs.plugin/metadata.yaml new file mode 100644 index 00000000..d3bf0a0d --- /dev/null +++ b/collectors/debugfs.plugin/metadata.yaml @@ -0,0 +1,395 @@ +plugin_name: debugfs.plugin +modules: + - meta: + plugin_name: debugfs.plugin + module_name: /sys/kernel/debug/extfrag + monitored_instance: + name: System Memory Fragmentation + link: 'https://www.kernel.org/doc/html/next/admin-guide/sysctl/vm.html' + categories: + - data-collection.linux-systems.memory-metrics + icon_filename: 'microchip.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - extfrag + - extfrag_threshold + - memory fragmentation + most_popular: false + overview: + data_collection: + metrics_description: 'Collects memory fragmentation statistics from the Linux kernel' + method_description: 'Parse data from `debugfs` file' + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: false + additional_permissions: + description: > + This integration requires read access to files under `/sys/kernel/debug/extfrag`, which are accessible + only to the root user by default. Netdata uses Linux Capabilities to give the plugin access to + debugfs. `CAP_DAC_READ_SEARCH` is added automatically during installation. This capability allows bypassing + file read permission checks and directory read and execute permission checks. If file capabilities are not + usable, then the plugin is instead installed with the SUID bit set in permissions so that it runs as root. + default_behavior: + auto_detection: + description: > + Assuming that debugfs is mounted and the required permissions are available, this integration will + automatically run by default. + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: + - title: 'filesystem' + description: > + The debugfs filesystem must be mounted on your host for plugin to collect data. + You can run the command-line (`sudo mount -t debugfs none /sys/kernel/debug/`) to mount it locally. + It is also recommended to modify your fstab (5) avoiding necessity to mount the filesystem + before starting netdata. + configuration: + file: + name: 'netdata.conf' + section_name: '[plugin:debugfs]' + description: 'This is netdata main configuration file.' + options: + description: '' + folding: + title: 'Config options' + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 1 + required: false + - name: command options + description: Additinal parameters for collector + default_value: "" + required: false + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "Monitor the overall memory fragmentation of the system." + availability: [] + scopes: + - name: node + description: "Memory fragmentation statistics for each NUMA node in the system." + labels: + - name: numa_node + description: The NUMA node the metrics are associated with. + metrics: + - name: mem.fragmentation_index_dma + description: Memory fragmentation index for each order + unit: "index" + chart_type: line + dimensions: + - name: order0 + - name: order1 + - name: order2 + - name: order3 + - name: order4 + - name: order5 + - name: order6 + - name: order7 + - name: order8 + - name: order9 + - name: order10 + - name: mem.fragmentation_index_dma32 + description: Memory fragmentation index for each order + unit: "index" + chart_type: line + dimensions: + - name: order0 + - name: order1 + - name: order2 + - name: order3 + - name: order4 + - name: order5 + - name: order6 + - name: order7 + - name: order8 + - name: order9 + - name: order10 + - name: mem.fragmentation_index_normal + description: Memory fragmentation index for each order + unit: "index" + chart_type: line + dimensions: + - name: order0 + - name: order1 + - name: order2 + - name: order3 + - name: order4 + - name: order5 + - name: order6 + - name: order7 + - name: order8 + - name: order9 + - name: order10 + - meta: + plugin_name: debugfs.plugin + module_name: /sys/kernel/debug/zswap + monitored_instance: + name: Linux ZSwap + link: 'https://www.kernel.org/doc/html/latest/admin-guide/mm/zswap.html' + categories: + - data-collection.linux-systems.memory-metrics + icon_filename: 'microchip.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - swap + - zswap + - frontswap + - swap cache + most_popular: false + overview: + data_collection: + metrics_description: > + Collects zswap performance metrics on Linux systems. + method_description: 'Parse data from `debugfs file.' + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: false + additional_permissions: + description: > + This integration requires read access to files under `/sys/kernel/debug/zswap`, which are accessible + only to the root user by default. Netdata uses Linux Capabilities to give the plugin access to + debugfs. `CAP_DAC_READ_SEARCH` is added automatically during installation. This capability allows bypassing + file read permission checks and directory read and execute permission checks. If file capabilities are not + usable, then the plugin is instead installed with the SUID bit set in permissions so that it runs as root. + default_behavior: + auto_detection: + description: > + Assuming that debugfs is mounted and the required permissions are available, this integration will + automatically detect whether or not the system is using zswap. + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: + - title: 'filesystem' + description: > + The debugfs filesystem must be mounted on your host for plugin to collect data. + You can run the command-line (`sudo mount -t debugfs none /sys/kernel/debug/`) to mount it locally. + It is also recommended to modify your fstab (5) avoiding necessity to mount the filesystem + before starting netdata. + configuration: + file: + name: 'netdata.conf' + section_name: '[plugin:debugfs]' + description: 'This is netdata main configuration file.' + options: + description: '' + folding: + title: 'Config options' + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 1 + required: false + - name: command options + description: Additinal parameters for collector + default_value: "" + required: false + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "Monitor the performance statistics of zswap." + availability: [] + scopes: + - name: global + description: "Global zswap performance metrics." + labels: [] + metrics: + - name: system.zswap_pool_compression_ratio + description: Zswap compression ratio + unit: "ratio" + chart_type: line + dimensions: + - name: compression_ratio + - name: system.zswap_pool_compressed_size + description: Zswap compressed bytes currently stored + unit: "bytes" + chart_type: area + dimensions: + - name: compressed_size + - name: system.zswap_pool_raw_size + description: Zswap uncompressed bytes currently stored + unit: "bytes" + chart_type: area + dimensions: + - name: uncompressed_size + - name: system.zswap_rejections + description: Zswap rejections + unit: "rejections/s" + chart_type: stacked + dimensions: + - name: compress_poor + - name: kmemcache_fail + - name: alloc_fail + - name: reclaim_fail + - name: system.zswap_pool_limit_hit + description: Zswap pool limit was reached + unit: "events/s" + chart_type: line + dimensions: + - name: limit + - name: system.zswap_written_back_raw_bytes + description: Zswap uncomressed bytes written back when pool limit was reached + unit: "bytes/s" + chart_type: area + dimensions: + - name: written_back + - name: system.zswap_same_filled_raw_size + description: Zswap same-value filled uncompressed bytes currently stored + unit: "bytes" + chart_type: area + dimensions: + - name: same_filled + - name: system.zswap_duplicate_entry + description: Zswap duplicate store was encountered + unit: "entries/s" + chart_type: line + dimensions: + - name: duplicate + - meta: + plugin_name: debugfs.plugin + module_name: intel_rapl + monitored_instance: + name: Power Capping + link: 'https://www.kernel.org/doc/html/next/power/powercap/powercap.html' + categories: + - data-collection.linux-systems.kernel-metrics + icon_filename: 'powersupply.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - power capping + - energy + most_popular: false + overview: + data_collection: + metrics_description: > + Collects power capping performance metrics on Linux systems. + method_description: 'Parse data from `debugfs file.' + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: false + additional_permissions: + description: > + This integration requires read access to files under `/sys/devices/virtual/powercap`, which are accessible + only to the root user by default. Netdata uses Linux Capabilities to give the plugin access to + debugfs. `CAP_DAC_READ_SEARCH` is added automatically during installation. This capability allows bypassing + file read permission checks and directory read and execute permission checks. If file capabilities are not + usable, then the plugin is instead installed with the SUID bit set in permissions so that it runs as root. + default_behavior: + auto_detection: + description: > + Assuming that debugfs is mounted and the required permissions are available, this integration will + automatically detect whether or not the system is using zswap. + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: + - title: 'filesystem' + description: > + The debugfs filesystem must be mounted on your host for plugin to collect data. + You can run the command-line (`sudo mount -t debugfs none /sys/kernel/debug/`) to mount it locally. + It is also recommended to modify your fstab (5) avoiding necessity to mount the filesystem + before starting netdata. + configuration: + file: + name: 'netdata.conf' + section_name: '[plugin:debugfs]' + description: 'This is netdata main configuration file.' + options: + description: '' + folding: + title: 'Config options' + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 1 + required: false + - name: command options + description: Additinal parameters for collector + default_value: "" + required: false + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "Monitor the Intel RAPL zones Consumption." + availability: [] + scopes: + - name: global + description: "Global Intel RAPL zones." + labels: [] + metrics: + - name: cpu.powercap_intel_rapl_zone + description: Intel RAPL Zone Power Consumption + unit: "Watts" + chart_type: line + dimensions: + - name: Power + - name: cpu.powercap_intel_rapl_subzones + description: Intel RAPL Subzones Power Consumption + unit: "Watts" + chart_type: line + dimensions: + - name: dram + - name: core + - name: uncore diff --git a/collectors/debugfs.plugin/metrics.csv b/collectors/debugfs.plugin/metrics.csv deleted file mode 100644 index a2138394..00000000 --- a/collectors/debugfs.plugin/metrics.csv +++ /dev/null @@ -1,12 +0,0 @@ -metric,scode,dimensions,unit,description,chart_type,labels,plugin,module -mem.fragmentation_index_dma,node,"order0, order1, order2, order3, order4, order5, order6, order7, order8, order9, order10",index,Memory fragmentation index for each order,line,numa_node,debugfs.plugin,/sys/kernel/debug/extfrag -mem.fragmentation_index_dma32,node,"order0, order1, order2, order3, order4, order5, order6, order7, order8, order9, order10",index,Memory fragmentation index for each order,line,numa_node,debugfs.plugin,/sys/kernel/debug/extfrag -mem.fragmentation_index_normal,node,"order0, order1, order2, order3, order4, order5, order6, order7, order8, order9, order10",index,Memory fragmentation index for each order,line,numa_node,debugfs.plugin,/sys/kernel/debug/extfrag -system.zswap_pool_compression_ratio,,compression_ratio,ratio,Zswap compression ratio,line,,debugfs.plugin,/sys/kernel/debug/zswap -system.zswap_pool_compressed_size,,compressed_size,bytes,Zswap compressed bytes currently stored,area,,debugfs.plugin,/sys/kernel/debug/zswap -system.zswap_pool_raw_size,,uncompressed_size,bytes,Zswap uncompressed bytes currently stored,area,,debugfs.plugin,/sys/kernel/debug/zswap -system.zswap_rejections,,"compress_poor, kmemcache_fail, alloc_fail, reclaim_fail",rejections/s,Zswap rejections,stacked,,debugfs.plugin,/sys/kernel/debug/zswap -system.zswap_pool_limit_hit,,limit,events/s,Zswap pool limit was reached,line,,debugfs.plugin,/sys/kernel/debug/zswap -system.zswap_written_back_raw_bytes,,written_back,bytes/s,Zswap uncomressed bytes written back when pool limit was reached,area,,debugfs.plugin,/sys/kernel/debug/zswap -system.zswap_same_filled_raw_size,,same_filled,bytes,Zswap same-value filled uncompressed bytes currently stored,area,,debugfs.plugin,/sys/kernel/debug/zswap -system.zswap_duplicate_entry,,duplicate,entries/s,Zswap duplicate store was encountered,line,,debugfs.plugin,/sys/kernel/debug/zswap diff --git a/collectors/debugfs.plugin/multi_metadata.yaml b/collectors/debugfs.plugin/multi_metadata.yaml deleted file mode 100644 index 652aff7a..00000000 --- a/collectors/debugfs.plugin/multi_metadata.yaml +++ /dev/null @@ -1,233 +0,0 @@ -name: debugfs.plugin -modules: - - meta: - plugin_name: debugfs.plugin - module_name: /sys/kernel/debug/extfrag - monitored_instance: - name: debugfs /sys/kernel/debug/extfrag - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: node - description: "" - labels: - - name: numa_node - description: TBD - metrics: - - name: mem.fragmentation_index_dma - description: Memory fragmentation index for each order - unit: "index" - chart_type: line - dimensions: - - name: order0 - - name: order1 - - name: order2 - - name: order3 - - name: order4 - - name: order5 - - name: order6 - - name: order7 - - name: order8 - - name: order9 - - name: order10 - - name: mem.fragmentation_index_dma32 - description: Memory fragmentation index for each order - unit: "index" - chart_type: line - dimensions: - - name: order0 - - name: order1 - - name: order2 - - name: order3 - - name: order4 - - name: order5 - - name: order6 - - name: order7 - - name: order8 - - name: order9 - - name: order10 - - name: mem.fragmentation_index_normal - description: Memory fragmentation index for each order - unit: "index" - chart_type: line - dimensions: - - name: order0 - - name: order1 - - name: order2 - - name: order3 - - name: order4 - - name: order5 - - name: order6 - - name: order7 - - name: order8 - - name: order9 - - name: order10 - - meta: - plugin_name: debugfs.plugin - module_name: /sys/kernel/debug/zswap - monitored_instance: - name: debugfs /sys/kernel/debug/zswap - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: system.zswap_pool_compression_ratio - description: Zswap compression ratio - unit: "ratio" - chart_type: line - dimensions: - - name: compression_ratio - - name: system.zswap_pool_compressed_size - description: Zswap compressed bytes currently stored - unit: "bytes" - chart_type: area - dimensions: - - name: compressed_size - - name: system.zswap_pool_raw_size - description: Zswap uncompressed bytes currently stored - unit: "bytes" - chart_type: area - dimensions: - - name: uncompressed_size - - name: system.zswap_rejections - description: Zswap rejections - unit: "rejections/s" - chart_type: stacked - dimensions: - - name: compress_poor - - name: kmemcache_fail - - name: alloc_fail - - name: reclaim_fail - - name: system.zswap_pool_limit_hit - description: Zswap pool limit was reached - unit: "events/s" - chart_type: line - dimensions: - - name: limit - - name: system.zswap_written_back_raw_bytes - description: Zswap uncomressed bytes written back when pool limit was reached - unit: "bytes/s" - chart_type: area - dimensions: - - name: written_back - - name: system.zswap_same_filled_raw_size - description: Zswap same-value filled uncompressed bytes currently stored - unit: "bytes" - chart_type: area - dimensions: - - name: same_filled - - name: system.zswap_duplicate_entry - description: Zswap duplicate store was encountered - unit: "entries/s" - chart_type: line - dimensions: - - name: duplicate diff --git a/collectors/debugfs.plugin/sys_devices_virtual_powercap.c b/collectors/debugfs.plugin/sys_devices_virtual_powercap.c new file mode 100644 index 00000000..5f22b19e --- /dev/null +++ b/collectors/debugfs.plugin/sys_devices_virtual_powercap.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "debugfs_plugin.h" + +struct zone_t { + char *zone_chart_id; + char *subzone_chart_id; + char *name; + char *path; + + unsigned long long max_energy_range_uj; + unsigned long long energy_uj; + + struct zone_t *subzones; + + struct zone_t *prev, *next; +}; + +static struct zone_t *rapl_zones = NULL; + +static bool get_measurement(const char *path, unsigned long long *energy_uj) { + return read_single_number_file(path, energy_uj) == 0; +} + +static struct zone_t *get_rapl_zone(const char *control_type __maybe_unused, struct zone_t *parent __maybe_unused, const char *dirname) { + char temp[FILENAME_MAX + 1]; + snprintfz(temp, FILENAME_MAX, "%s/%s", dirname, "name"); + + char name[FILENAME_MAX + 1] = ""; + if (read_file(temp, name, sizeof(name) - 1) != 0) + return NULL; + + char *trimmed = trim(name); + if (unlikely(trimmed == NULL || trimmed[0] == 0)) + return NULL; + + snprintfz(temp, FILENAME_MAX, "%s/%s", dirname, "max_energy_range_uj"); + unsigned long long max_energy_range_uj = 0; + if (unlikely(read_single_number_file(temp, &max_energy_range_uj) != 0)) { + collector_error("Cannot read %s", temp); + return NULL; + } + + snprintfz(temp, FILENAME_MAX, "%s/%s", dirname, "energy_uj"); + unsigned long long energy_uj; + if (unlikely(!get_measurement(temp, &energy_uj))) { + collector_info("%s: Cannot read %s", trimmed, temp); + return NULL; + } + + struct zone_t *zone = callocz(1, sizeof(*zone)); + + zone->name = strdupz(trimmed); + zone->path = strdupz(temp); + + zone->max_energy_range_uj = max_energy_range_uj; + zone->energy_uj = energy_uj; + + collector_info("Found zone: \"%s\"", zone->name); + + return zone; +} + +static struct zone_t *look_for_rapl_zones(const char *control_type, struct zone_t *parent, const char *path, int depth) { + if(depth > 2) + return NULL; + + struct zone_t *base = NULL; + + DIR *dir = opendir(path); + if (unlikely(dir == NULL)) + return NULL; + + struct dirent *de = NULL; + while ((de = readdir(dir))) { + if (de->d_type != DT_DIR || de->d_name[0] == '.') + continue; + + if(strncmp(de->d_name, "intel-rapl:", 11) != 0) + continue; + + char zone_path[FILENAME_MAX + 1]; + snprintfz(zone_path, FILENAME_MAX, "%s/%s", path, de->d_name); + + struct zone_t *zone = get_rapl_zone(control_type, parent, zone_path); + if(zone) { + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(base, zone, prev, next); + + if(!parent) + zone->subzones = look_for_rapl_zones(control_type, zone, zone_path, depth + 1); + } + } + + closedir(dir); + return base; +} + +static struct zone_t *get_main_rapl_zones(void) { + struct zone_t *base = NULL; + + char dirname[FILENAME_MAX + 1]; + snprintfz(dirname, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/virtual/powercap"); + + DIR *dir = opendir(dirname); + if (unlikely(dir == NULL)) + return 0; + + struct dirent *de = NULL; + while ((de = readdir(dir))) { + if (de->d_type != DT_DIR || de->d_name[0] == '.') + continue; + + if(strncmp(de->d_name, "intel-rapl", 10) != 0) + continue; + + char control_type_path[FILENAME_MAX + 1]; + snprintfz(control_type_path, FILENAME_MAX, "%s/%s", dirname, de->d_name); + + collector_info("Looking at control type \"%s\"", de->d_name); + struct zone_t *zone = look_for_rapl_zones(de->d_name, NULL, control_type_path, 0); + if(zone) + DOUBLE_LINKED_LIST_APPEND_LIST_UNSAFE(base, zone, prev, next); + } + closedir(dir); + + return base; +} + +int do_sys_devices_virtual_powercap(int update_every, const char *name __maybe_unused) { + + if (unlikely(!rapl_zones)) { + rapl_zones = get_main_rapl_zones(); + if (unlikely(!rapl_zones)) { + collector_info("Failed to find powercap zones."); + return 1; + } + } + + for(struct zone_t *zone = rapl_zones; zone ; zone = zone->next) { + if(!zone->zone_chart_id) { + char id[1000 + 1]; + snprintf(id, 1000, "cpu.powercap_intel_rapl_zone_%s", zone->name); + zone->zone_chart_id = strdupz(id); + + fprintf(stdout, + "CHART '%s' '' 'Intel RAPL Zone Power Consumption' 'Watts' 'powercap' '%s' '%s' %d %d '' 'debugfs.plugin' 'intel_rapl'\n", + zone->zone_chart_id, + "cpu.powercap_intel_rapl_zone", + debugfs_rrdset_type_name(RRDSET_TYPE_LINE), + NETDATA_CHART_PRIO_POWERCAP, + update_every); + + fprintf(stdout, + "CLABEL 'zone' '%s' 0\n" + "CLABEL_COMMIT\n", + zone->name); + + fprintf(stdout, + "DIMENSION 'power' '' %s 1 1000000 ''\n", + debugfs_rrd_algorithm_name(RRD_ALGORITHM_INCREMENTAL)); + + // for the sub-zones + snprintf(id, 1000, "cpu.powercap_intel_rapl_subzones_%s", zone->name); + zone->subzone_chart_id = strdupz(id); + fprintf(stdout, + "CHART '%s' '' 'Intel RAPL Subzones Power Consumption' 'Watts' 'powercap' '%s' '%s' %d %d '' 'debugfs.plugin' 'intel_rapl'\n", + zone->subzone_chart_id, + "cpu.powercap_intel_rapl_subzones", + debugfs_rrdset_type_name(RRDSET_TYPE_LINE), + NETDATA_CHART_PRIO_POWERCAP + 1, + update_every); + + fprintf(stdout, + "CLABEL 'zone' '%s' 0\n" + "CLABEL_COMMIT\n", + zone->name); + + for(struct zone_t *subzone = zone->subzones; subzone ; subzone = subzone->next) { + fprintf(stdout, + "DIMENSION '%s' '' %s 1 1000000 ''\n", + subzone->name, + debugfs_rrd_algorithm_name(RRD_ALGORITHM_INCREMENTAL)); + } + } + + if(get_measurement(zone->path, &zone->energy_uj)) { + fprintf(stdout, + "BEGIN '%s'\n" + "SET power = %lld\n" + "END\n" + , zone->zone_chart_id + , zone->energy_uj); + } + + if(zone->subzones) { + fprintf(stdout, + "BEGIN '%s'\n", + zone->subzone_chart_id); + + for (struct zone_t *subzone = zone->subzones; subzone; subzone = subzone->next) { + if(get_measurement(subzone->path, &subzone->energy_uj)) { + fprintf(stdout, + "SET '%s' = %lld\n", + subzone->name, + subzone->energy_uj); + } + } + + fprintf(stdout, "END\n"); + } + + } + + fflush(stdout); + + return 0; +} diff --git a/collectors/diskspace.plugin/metadata.yaml b/collectors/diskspace.plugin/metadata.yaml index 0e8e7f35..cb45edf3 100644 --- a/collectors/diskspace.plugin/metadata.yaml +++ b/collectors/diskspace.plugin/metadata.yaml @@ -1,98 +1,139 @@ -meta: - plugin_name: diskspace.plugin - module_name: diskspace.plugin - monitored_instance: - name: Disk space - link: '' - categories: - - data-collection.linux-systems - icon_filename: 'hard-drive.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Disk space metrics for proficient storage management. Keep track of usage, free space, and error rates to prevent disk space issues.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: disk_space_usage - link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf - metric: disk.space - info: disk ${label:mount_point} space utilization - os: "linux freebsd" -- name: disk_inode_usage - link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf - metric: disk.inodes - info: disk ${label:mount_point} inode utilization - os: "linux freebsd" -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: mount point - description: "" - labels: - - name: mount_point - description: TBD - - name: filesystem - description: TBD - - name: mount_root - description: TBD +plugin_name: diskspace.plugin +modules: + - meta: + plugin_name: diskspace.plugin + module_name: diskspace.plugin + monitored_instance: + name: Disk space + link: "" + categories: + - data-collection.linux-systems + icon_filename: "hard-drive.svg" + related_resources: + integrations: + list: + - plugin_name: ebpf.plugin + module_name: disk + info_provided_to_referring_integrations: + description: "" + keywords: + - disk + - I/O + - space + - inode + most_popular: false + overview: + data_collection: + metrics_description: "Monitor Disk space metrics for proficient storage management. Keep track of usage, free space, and error rates to prevent disk space issues." + method_description: "" + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "The plugin reads data from `/proc/self/mountinfo` and `/proc/diskstats file`." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: "netdata.conf" + section_name: "[plugin:proc:diskspace]" + description: "This is netdata main configuration file" + options: + description: "You can also specify per mount point `[plugin:proc:diskspace:mountpoint]`" + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 1 + required: false + - name: remove charts of unmounted disks + description: Remove chart when a device is unmounted on host. + default_value: yes + required: false + - name: check for new mount points every + description: Parse proc files frequency. + default_value: 15 + required: false + - name: exclude space metrics on paths + description: Do not show metrics (charts) for listed paths. This option accepts netdata simple pattern. + default_value: /proc/* /sys/* /var/run/user/* /run/user/* /snap/* /var/lib/docker/* + required: false + - name: exclude space metrics on filesystems + description: Do not show metrics (charts) for listed filesystems. This option accepts netdata simple pattern. + default_value: "*gvfs *gluster* *s3fs *ipfs *davfs2 *httpfs *sshfs *gdfs *moosefs fusectl autofs" + required: false + - name: exclude inode metrics on filesystems + description: Do not show metrics (charts) for listed filesystems. This option accepts netdata simple pattern. + default_value: msdosfs msdos vfat overlayfs aufs* *unionfs + required: false + - name: space usage for all disks + description: Define if plugin will show metrics for space usage. When value is set to `auto` plugin will try to access information to display if filesystem or path was not discarded with previous option. + default_value: auto + required: false + - name: inodes usage for all disks + description: Define if plugin will show metrics for inode usage. When value is set to `auto` plugin will try to access information to display if filesystem or path was not discarded with previous option. + default_value: auto + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: disk_space_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf + metric: disk.space + info: disk ${label:mount_point} space utilization + os: "linux freebsd" + - name: disk_inode_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf + metric: disk.inodes + info: disk ${label:mount_point} inode utilization + os: "linux freebsd" metrics: - - name: disk.space - description: Disk Space Usage - unit: "GiB" - chart_type: stacked - dimensions: - - name: avail - - name: used - - name: reserved_for_root - - name: disk.inodes - description: Disk Files (inodes) Usage - unit: "inodes" - chart_type: stacked - dimensions: - - name: avail - - name: used - - name: reserved_for_root + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: mount point + description: "" + labels: + - name: mount_point + description: Path used to mount a filesystem + - name: filesystem + description: The filesystem used to format a partition. + - name: mount_root + description: Root directory where mount points are present. + metrics: + - name: disk.space + description: Disk Space Usage + unit: "GiB" + chart_type: stacked + dimensions: + - name: avail + - name: used + - name: reserved_for_root + - name: disk.inodes + description: Disk Files (inodes) Usage + unit: "inodes" + chart_type: stacked + dimensions: + - name: avail + - name: used + - name: reserved_for_root diff --git a/collectors/diskspace.plugin/metrics.csv b/collectors/diskspace.plugin/metrics.csv deleted file mode 100644 index 2b61ee9a..00000000 --- a/collectors/diskspace.plugin/metrics.csv +++ /dev/null @@ -1,3 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -disk.space,mount point,"avail, used, reserved_for_root",GiB,"Disk Space Usage",stacked,"mount_point, filesystem, mount_root",diskspace.plugin, -disk.inodes,mount point,"avail, used, reserved_for_root",inodes,"Disk Files (inodes) Usage",stacked,"mount_point, filesystem, mount_root",diskspace.plugin,
\ No newline at end of file diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c index 72aedba6..84404730 100644 --- a/collectors/ebpf.plugin/ebpf.c +++ b/collectors/ebpf.plugin/ebpf.c @@ -60,7 +60,7 @@ ebpf_module_t ebpf_modules[] = { NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0 }, - { .thread_name = "socket", .config_name = "socket", .thread_description = NETDATA_EBPF_SOCKET_MODULE_DESC, + { .thread_name = "socket", .config_name = "socket", .thread_description = NETDATA_EBPF_SOCKET_MODULE_DESC, .enabled = 0, .start_routine = ebpf_socket_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, @@ -1229,7 +1229,7 @@ void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, */ int ebpf_statistic_create_aral_chart(char *name, ebpf_module_t *em) { - static int priority = 140100; + static int priority = NETATA_EBPF_ORDER_STAT_ARAL_BEGIN; char *mem = { NETDATA_EBPF_STAT_DIMENSION_MEMORY }; char *aral = { NETDATA_EBPF_STAT_DIMENSION_ARAL }; @@ -1327,6 +1327,49 @@ void ebpf_send_data_aral_chart(ARAL *memory, ebpf_module_t *em) /***************************************************************** * + * FUNCTIONS TO READ GLOBAL HASH TABLES + * + *****************************************************************/ + +/** + * Read Global Table Stats + * + * Read data from specified table (map_fd) using array allocated inside thread(values) and storing + * them in stats vector starting from the first position. + * + * For PID tables is recommended to use a function to parse the specific data. + * + * @param stats vector used to store data + * @param values helper to read data from hash tables. + * @param map_fd table that has data + * @param maps_per_core Is necessary to read data from all cores? + * @param begin initial value to query hash table + * @param end last value that will not be used. + */ +void ebpf_read_global_table_stats(netdata_idx_t *stats, + netdata_idx_t *values, + int map_fd, + int maps_per_core, + uint32_t begin, + uint32_t end) +{ + uint32_t idx, order; + + for (idx = begin, order = 0; idx < end; idx++, order++) { + if (!bpf_map_lookup_elem(map_fd, &idx, values)) { + int i; + int before = (maps_per_core) ? ebpf_nprocs: 1; + netdata_idx_t total = 0; + for (i = 0; i < before; i++) + total += values[i]; + + stats[order] = total; + } + } +} + +/***************************************************************** + * * FUNCTIONS TO DEFINE OPTIONS * *****************************************************************/ @@ -2454,6 +2497,47 @@ static char *hash_table_stat = {"hash_table"}; static char *hash_table_core[NETDATA_EBPF_LOAD_STAT_END] = {"per_core", "unique"}; /** + * Send Hash Table PID data + * + * Send all information associated with a specific pid table. + * + * @param chart chart id + * @param idx index position in hash_table_stats + */ +static inline void ebpf_send_hash_table_pid_data(char *chart, uint32_t idx) +{ + int i; + write_begin_chart(NETDATA_MONITORING_FAMILY, chart); + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + ebpf_module_t *wem = &ebpf_modules[i]; + if (wem->apps_routine) + write_chart_dimension((char *)wem->thread_name, + (wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? + wem->hash_table_stats[idx]: + 0); + } + write_end_chart(); +} + +/** + * Send Global Hash Table data + * + * Send all information associated with a specific pid table. + * + */ +static inline void ebpf_send_global_hash_table_data() +{ + int i; + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_GLOBAL_ELEMENTS); + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + ebpf_module_t *wem = &ebpf_modules[i]; + write_chart_dimension((char *)wem->thread_name, + (wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? NETDATA_CONTROLLER_END: 0); + } + write_end_chart(); +} + +/** * Send Statistic Data * * Send statistic information to netdata. @@ -2500,6 +2584,11 @@ void ebpf_send_statistic_data() write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], (long long)plugin_statistics.hash_percpu); write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], (long long)plugin_statistics.hash_unique); write_end_chart(); + + ebpf_send_global_hash_table_data(); + + ebpf_send_hash_table_pid_data(NETDATA_EBPF_HASH_TABLES_INSERT_PID_ELEMENTS, NETDATA_EBPF_GLOBAL_TABLE_PID_TABLE_ADD); + ebpf_send_hash_table_pid_data(NETDATA_EBPF_HASH_TABLES_REMOVE_PID_ELEMENTS, NETDATA_EBPF_GLOBAL_TABLE_PID_TABLE_DEL); } /** @@ -2681,6 +2770,66 @@ static inline void ebpf_create_statistic_hash_per_core(int update_every) ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); } +/** + * Hash table global elements + * + * Write to standard output current values inside global tables. + * + * @param update_every time used to update charts + */ +static void ebpf_create_statistic_hash_global_elements(int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_HASH_TABLES_GLOBAL_ELEMENTS, + "Controllers inside global table", + "rows", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_EBPF_ORDER_STAT_HASH_GLOBAL_TABLE_TOTAL, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + int i; + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + ebpf_write_global_dimension((char *)ebpf_modules[i].thread_name, + (char *)ebpf_modules[i].thread_name, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + } +} + +/** + * Hash table global elements + * + * Write to standard output current values inside global tables. + * + * @param update_every time used to update charts + * @param id chart id + * @param title chart title + * @param order ordder chart will be shown on dashboard. + */ +static void ebpf_create_statistic_hash_pid_table(int update_every, char *id, char *title, int order) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + id, + title, + "rows", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + order, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + int i; + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + ebpf_module_t *wem = &ebpf_modules[i]; + if (wem->apps_routine) + ebpf_write_global_dimension((char *)wem->thread_name, + (char *)wem->thread_name, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + } +} /** * Create Statistics Charts @@ -2718,6 +2867,20 @@ static void ebpf_create_statistic_charts(int update_every) ebpf_create_statistic_hash_tables(update_every); ebpf_create_statistic_hash_per_core(update_every); + + ebpf_create_statistic_hash_global_elements(update_every); + + ebpf_create_statistic_hash_pid_table(update_every, + NETDATA_EBPF_HASH_TABLES_INSERT_PID_ELEMENTS, + "Elements inserted into PID table", + NETDATA_EBPF_ORDER_STAT_HASH_PID_TABLE_ADDED); + + ebpf_create_statistic_hash_pid_table(update_every, + NETDATA_EBPF_HASH_TABLES_REMOVE_PID_ELEMENTS, + "Elements removed from PID table", + NETDATA_EBPF_ORDER_STAT_HASH_PID_TABLE_REMOVED); + + fflush(stdout); } /***************************************************************** diff --git a/collectors/ebpf.plugin/ebpf.h b/collectors/ebpf.plugin/ebpf.h index f008d21a..78e3a925 100644 --- a/collectors/ebpf.plugin/ebpf.h +++ b/collectors/ebpf.plugin/ebpf.h @@ -69,8 +69,6 @@ typedef struct netdata_syscall_stat { struct netdata_syscall_stat *next; // Link list } netdata_syscall_stat_t; -typedef uint64_t netdata_idx_t; - typedef struct netdata_publish_syscall { char *dimension; char *name; @@ -169,6 +167,9 @@ typedef struct ebpf_tracepoint { #define NETDATA_EBPF_KERNEL_MEMORY "ebpf_kernel_memory" #define NETDATA_EBPF_HASH_TABLES_LOADED "ebpf_hash_tables_count" #define NETDATA_EBPF_HASH_TABLES_PER_CORE "ebpf_hash_tables_per_core" +#define NETDATA_EBPF_HASH_TABLES_GLOBAL_ELEMENTS "ebpf_hash_tables_global_elements" +#define NETDATA_EBPF_HASH_TABLES_INSERT_PID_ELEMENTS "ebpf_hash_tables_insert_pid_elements" +#define NETDATA_EBPF_HASH_TABLES_REMOVE_PID_ELEMENTS "ebpf_hash_tables_remove_pid_elements" // Log file #define NETDATA_DEVELOPER_LOG_FILE "developer.log" @@ -319,6 +320,9 @@ void ebpf_update_disabled_plugin_stats(ebpf_module_t *em); ARAL *ebpf_allocate_pid_aral(char *name, size_t size); void ebpf_unload_legacy_code(struct bpf_object *objects, struct bpf_link **probe_links); +void ebpf_read_global_table_stats(netdata_idx_t *stats, netdata_idx_t *values, int map_fd, + int maps_per_core, uint32_t begin, uint32_t end); + extern ebpf_filesystem_partitions_t localfs[]; extern ebpf_sync_syscalls_t local_syscalls[]; extern int ebpf_exit_plugin; diff --git a/collectors/ebpf.plugin/ebpf_cachestat.c b/collectors/ebpf.plugin/ebpf_cachestat.c index 72c33794..affecdea 100644 --- a/collectors/ebpf.plugin/ebpf_cachestat.c +++ b/collectors/ebpf.plugin/ebpf_cachestat.c @@ -854,26 +854,24 @@ void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *ptr) * * Read the table with number of calls for all functions * + * @param stats vector used to read data from control table. * @param maps_per_core do I need to read all cores? */ -static void ebpf_cachestat_read_global_table(int maps_per_core) +static void ebpf_cachestat_read_global_tables(netdata_idx_t *stats, int maps_per_core) { - uint32_t idx; - netdata_idx_t *val = cachestat_hash_values; - netdata_idx_t *stored = cachestat_values; - int fd = cachestat_maps[NETDATA_CACHESTAT_GLOBAL_STATS].map_fd; - - for (idx = NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU; idx < NETDATA_CACHESTAT_END; idx++) { - if (!bpf_map_lookup_elem(fd, &idx, stored)) { - int i; - int end = (maps_per_core) ? ebpf_nprocs: 1; - netdata_idx_t total = 0; - for (i = 0; i < end; i++) - total += stored[i]; - - val[idx] = total; - } - } + ebpf_read_global_table_stats(cachestat_hash_values, + cachestat_values, + cachestat_maps[NETDATA_CACHESTAT_GLOBAL_STATS].map_fd, + maps_per_core, + NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU, + NETDATA_CACHESTAT_END); + + ebpf_read_global_table_stats(stats, + cachestat_values, + cachestat_maps[NETDATA_CACHESTAT_CTRL].map_fd, + maps_per_core, + NETDATA_CONTROLLER_PID_TABLE_ADD, + NETDATA_CONTROLLER_END); } /** @@ -1288,6 +1286,8 @@ static void cachestat_collector(ebpf_module_t *em) //This will be cancelled by its parent uint32_t running_time = 0; uint32_t lifetime = em->lifetime; + netdata_idx_t *stats = em->hash_table_stats; + memset(stats, 0, sizeof(em->hash_table_stats)); while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); @@ -1296,7 +1296,7 @@ static void cachestat_collector(ebpf_module_t *em) counter = 0; netdata_apps_integration_flags_t apps = em->apps_charts; - ebpf_cachestat_read_global_table(maps_per_core); + ebpf_cachestat_read_global_tables(stats, maps_per_core); pthread_mutex_lock(&collect_data_mutex); if (apps) ebpf_read_cachestat_apps_table(maps_per_core); diff --git a/collectors/ebpf.plugin/ebpf_dcstat.c b/collectors/ebpf.plugin/ebpf_dcstat.c index dba3f44d..feb935b9 100644 --- a/collectors/ebpf.plugin/ebpf_dcstat.c +++ b/collectors/ebpf.plugin/ebpf_dcstat.c @@ -690,26 +690,24 @@ static void ebpf_update_dc_cgroup(int maps_per_core) * * Read the table with number of calls for all functions * + * @param stats vector used to read data from control table. * @param maps_per_core do I need to read all cores? */ -static void ebpf_dc_read_global_table(int maps_per_core) +static void ebpf_dc_read_global_tables(netdata_idx_t *stats, int maps_per_core) { - uint32_t idx; - netdata_idx_t *val = dcstat_hash_values; - netdata_idx_t *stored = dcstat_values; - int fd = dcstat_maps[NETDATA_DCSTAT_GLOBAL_STATS].map_fd; - - for (idx = NETDATA_KEY_DC_REFERENCE; idx < NETDATA_DIRECTORY_CACHE_END; idx++) { - if (!bpf_map_lookup_elem(fd, &idx, stored)) { - int i; - int end = (maps_per_core) ? ebpf_nprocs: 1; - netdata_idx_t total = 0; - for (i = 0; i < end; i++) - total += stored[i]; - - val[idx] = total; - } - } + ebpf_read_global_table_stats(dcstat_hash_values, + dcstat_values, + dcstat_maps[NETDATA_DCSTAT_GLOBAL_STATS].map_fd, + maps_per_core, + NETDATA_KEY_DC_REFERENCE, + NETDATA_DIRECTORY_CACHE_END); + + ebpf_read_global_table_stats(stats, + dcstat_values, + dcstat_maps[NETDATA_DCSTAT_CTRL].map_fd, + maps_per_core, + NETDATA_CONTROLLER_PID_TABLE_ADD, + NETDATA_CONTROLLER_END); } /** @@ -1169,6 +1167,8 @@ static void dcstat_collector(ebpf_module_t *em) int maps_per_core = em->maps_per_core; uint32_t running_time = 0; uint32_t lifetime = em->lifetime; + netdata_idx_t *stats = em->hash_table_stats; + memset(stats, 0, sizeof(em->hash_table_stats)); while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); @@ -1177,7 +1177,7 @@ static void dcstat_collector(ebpf_module_t *em) counter = 0; netdata_apps_integration_flags_t apps = em->apps_charts; - ebpf_dc_read_global_table(maps_per_core); + ebpf_dc_read_global_tables(stats, maps_per_core); pthread_mutex_lock(&collect_data_mutex); if (apps) read_dc_apps_table(maps_per_core); diff --git a/collectors/ebpf.plugin/ebpf_fd.c b/collectors/ebpf.plugin/ebpf_fd.c index 214d2955..f039647a 100644 --- a/collectors/ebpf.plugin/ebpf_fd.c +++ b/collectors/ebpf.plugin/ebpf_fd.c @@ -360,7 +360,7 @@ static inline int ebpf_fd_load_and_attach(struct fd_bpf *obj, ebpf_module_t *em) if (!ret) { ebpf_fd_set_hash_tables(obj); - ebpf_update_controller(fd_maps[NETDATA_CACHESTAT_CTRL].map_fd, em); + ebpf_update_controller(fd_maps[NETDATA_FD_CONTROLLER].map_fd, em); } return ret; @@ -624,26 +624,24 @@ static void ebpf_fd_send_data(ebpf_module_t *em) * * Read the table with number of calls for all functions * + * @param stats vector used to read data from control table. * @param maps_per_core do I need to read all cores? */ -static void ebpf_fd_read_global_table(int maps_per_core) +static void ebpf_fd_read_global_tables(netdata_idx_t *stats, int maps_per_core) { - uint32_t idx; - netdata_idx_t *val = fd_hash_values; - netdata_idx_t *stored = fd_values; - int fd = fd_maps[NETDATA_FD_GLOBAL_STATS].map_fd; - - for (idx = NETDATA_KEY_CALLS_DO_SYS_OPEN; idx < NETDATA_FD_COUNTER; idx++) { - if (!bpf_map_lookup_elem(fd, &idx, stored)) { - int i; - int end = (maps_per_core) ? ebpf_nprocs: 1; - netdata_idx_t total = 0; - for (i = 0; i < end; i++) - total += stored[i]; - - val[idx] = total; - } - } + ebpf_read_global_table_stats(fd_hash_values, + fd_values, + fd_maps[NETDATA_FD_GLOBAL_STATS].map_fd, + maps_per_core, + NETDATA_KEY_CALLS_DO_SYS_OPEN, + NETDATA_FD_COUNTER); + + ebpf_read_global_table_stats(stats, + fd_values, + fd_maps[NETDATA_FD_CONTROLLER].map_fd, + maps_per_core, + NETDATA_CONTROLLER_PID_TABLE_ADD, + NETDATA_CONTROLLER_END); } /** @@ -1136,6 +1134,8 @@ static void fd_collector(ebpf_module_t *em) int maps_per_core = em->maps_per_core; uint32_t running_time = 0; uint32_t lifetime = em->lifetime; + netdata_idx_t *stats = em->hash_table_stats; + memset(stats, 0, sizeof(em->hash_table_stats)); while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); @@ -1144,7 +1144,7 @@ static void fd_collector(ebpf_module_t *em) counter = 0; netdata_apps_integration_flags_t apps = em->apps_charts; - ebpf_fd_read_global_table(maps_per_core); + ebpf_fd_read_global_tables(stats, maps_per_core); pthread_mutex_lock(&collect_data_mutex); if (apps) read_fd_apps_table(maps_per_core); diff --git a/collectors/ebpf.plugin/ebpf_functions.c b/collectors/ebpf.plugin/ebpf_functions.c index cc26044c..7a43692b 100644 --- a/collectors/ebpf.plugin/ebpf_functions.c +++ b/collectors/ebpf.plugin/ebpf_functions.c @@ -206,7 +206,7 @@ static void ebpf_function_thread_manipulation(const char *transaction, time_t expires = now_realtime_sec() + em->update_every; BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX, NULL); - buffer_json_initialize(wb, "\"", "\"", 0, true, false); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_NEWLINE_ON_ARRAY_ITEMS); buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); buffer_json_member_add_string(wb, "type", "table"); buffer_json_member_add_time_t(wb, "update_every", em->update_every); diff --git a/collectors/ebpf.plugin/ebpf_oomkill.c b/collectors/ebpf.plugin/ebpf_oomkill.c index 66421d27..84830160 100644 --- a/collectors/ebpf.plugin/ebpf_oomkill.c +++ b/collectors/ebpf.plugin/ebpf_oomkill.c @@ -419,6 +419,7 @@ static void oomkill_collector(ebpf_module_t *em) int counter = update_every - 1; uint32_t running_time = 0; uint32_t lifetime = em->lifetime; + netdata_idx_t *stats = em->hash_table_stats; while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -432,6 +433,9 @@ static void oomkill_collector(ebpf_module_t *em) continue; } + stats[NETDATA_CONTROLLER_PID_TABLE_ADD] += (uint64_t) count; + stats[NETDATA_CONTROLLER_PID_TABLE_DEL] += (uint64_t) count; + pthread_mutex_lock(&collect_data_mutex); pthread_mutex_lock(&lock); if (cgroups) { diff --git a/collectors/ebpf.plugin/ebpf_process.c b/collectors/ebpf.plugin/ebpf_process.c index 4d915e13..3537efc5 100644 --- a/collectors/ebpf.plugin/ebpf_process.c +++ b/collectors/ebpf.plugin/ebpf_process.c @@ -267,26 +267,22 @@ void ebpf_process_send_apps_data(struct ebpf_target *root, ebpf_module_t *em) * * @param maps_per_core do I need to read all cores? */ -static void ebpf_read_process_hash_global_tables(int maps_per_core) +static void ebpf_read_process_hash_global_tables(netdata_idx_t *stats, int maps_per_core) { - uint64_t idx; netdata_idx_t res[NETDATA_KEY_END_VECTOR]; - - netdata_idx_t *val = process_hash_values; - int fd = process_maps[NETDATA_PROCESS_GLOBAL_TABLE].map_fd; - for (idx = 0; idx < NETDATA_KEY_END_VECTOR; idx++) { - if (!bpf_map_lookup_elem(fd, &idx, val)) { - uint64_t total = 0; - int i; - int end = (maps_per_core) ? ebpf_nprocs : 1; - for (i = 0; i < end; i++) - total += val[i]; - - res[idx] = total; - } else { - res[idx] = 0; - } - } + ebpf_read_global_table_stats(res, + process_hash_values, + process_maps[NETDATA_PROCESS_GLOBAL_TABLE].map_fd, + maps_per_core, + 0, + NETDATA_KEY_END_VECTOR); + + ebpf_read_global_table_stats(stats, + process_hash_values, + process_maps[NETDATA_PROCESS_CTRL_TABLE].map_fd, + maps_per_core, + NETDATA_CONTROLLER_PID_TABLE_ADD, + NETDATA_CONTROLLER_END); process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_EXIT].call = res[NETDATA_KEY_CALLS_DO_EXIT]; process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].call = res[NETDATA_KEY_CALLS_RELEASE_TASK]; @@ -747,7 +743,6 @@ static void ebpf_process_exit(void *ptr) ebpf_statistic_obsolete_aral_chart(em, process_disable_priority); #endif - fflush(stdout); pthread_mutex_unlock(&lock); } @@ -1121,6 +1116,8 @@ static void process_collector(ebpf_module_t *em) int maps_per_core = em->maps_per_core; uint32_t running_time = 0; uint32_t lifetime = em->lifetime; + netdata_idx_t *stats = em->hash_table_stats; + memset(stats, 0, sizeof(em->hash_table_stats)); while (!ebpf_exit_plugin && running_time < lifetime) { usec_t dt = heartbeat_next(&hb, USEC_PER_SEC); (void)dt; @@ -1130,7 +1127,7 @@ static void process_collector(ebpf_module_t *em) if (++counter == update_every) { counter = 0; - ebpf_read_process_hash_global_tables(maps_per_core); + ebpf_read_process_hash_global_tables(stats, maps_per_core); netdata_apps_integration_flags_t apps_enabled = em->apps_charts; pthread_mutex_lock(&collect_data_mutex); diff --git a/collectors/ebpf.plugin/ebpf_process.h b/collectors/ebpf.plugin/ebpf_process.h index 48267d87..d49e3845 100644 --- a/collectors/ebpf.plugin/ebpf_process.h +++ b/collectors/ebpf.plugin/ebpf_process.h @@ -48,7 +48,11 @@ enum netdata_ebpf_stats_order { NETDATA_EBPF_ORDER_STAT_LOAD_METHOD, NETDATA_EBPF_ORDER_STAT_KERNEL_MEMORY, NETDATA_EBPF_ORDER_STAT_HASH_TABLES, - NETDATA_EBPF_ORDER_STAT_HASH_CORE + NETDATA_EBPF_ORDER_STAT_HASH_CORE, + NETDATA_EBPF_ORDER_STAT_HASH_GLOBAL_TABLE_TOTAL, + NETDATA_EBPF_ORDER_STAT_HASH_PID_TABLE_ADDED, + NETDATA_EBPF_ORDER_STAT_HASH_PID_TABLE_REMOVED, + NETATA_EBPF_ORDER_STAT_ARAL_BEGIN }; enum netdata_ebpf_load_mode_stats{ diff --git a/collectors/ebpf.plugin/ebpf_shm.c b/collectors/ebpf.plugin/ebpf_shm.c index 78ada81f..baeb7204 100644 --- a/collectors/ebpf.plugin/ebpf_shm.c +++ b/collectors/ebpf.plugin/ebpf_shm.c @@ -646,30 +646,24 @@ static void shm_send_global() * * Read the table with number of calls for all functions * + * @param stats vector used to read data from control table. * @param maps_per_core do I need to read all cores? */ -static void ebpf_shm_read_global_table(int maps_per_core) +static void ebpf_shm_read_global_table(netdata_idx_t *stats, int maps_per_core) { - netdata_idx_t *stored = shm_values; - netdata_idx_t *val = shm_hash_values; - int fd = shm_maps[NETDATA_SHM_GLOBAL_TABLE].map_fd; - size_t length = sizeof(netdata_idx_t); - if (maps_per_core) - length *= ebpf_nprocs; - - uint32_t i, end = NETDATA_SHM_END; - for (i = NETDATA_KEY_SHMGET_CALL; i < end; i++) { - if (!bpf_map_lookup_elem(fd, &i, stored)) { - int j; - int last = (maps_per_core) ? ebpf_nprocs : 1; - netdata_idx_t total = 0; - for (j = 0; j < last; j++) - total += stored[j]; - - val[i] = total; - memset(stored, 0 , length); - } - } + ebpf_read_global_table_stats(shm_hash_values, + shm_values, + shm_maps[NETDATA_SHM_GLOBAL_TABLE].map_fd, + maps_per_core, + NETDATA_KEY_SHMGET_CALL, + NETDATA_SHM_END); + + ebpf_read_global_table_stats(stats, + shm_values, + shm_maps[NETDATA_SHM_CONTROLLER].map_fd, + maps_per_core, + NETDATA_CONTROLLER_PID_TABLE_ADD, + NETDATA_CONTROLLER_END); } /** @@ -1039,6 +1033,8 @@ static void shm_collector(ebpf_module_t *em) int maps_per_core = em->maps_per_core; uint32_t running_time = 0; uint32_t lifetime = em->lifetime; + netdata_idx_t *stats = em->hash_table_stats; + memset(stats, 0, sizeof(em->hash_table_stats)); while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -1046,7 +1042,7 @@ static void shm_collector(ebpf_module_t *em) counter = 0; netdata_apps_integration_flags_t apps = em->apps_charts; - ebpf_shm_read_global_table(maps_per_core); + ebpf_shm_read_global_table(stats, maps_per_core); pthread_mutex_lock(&collect_data_mutex); if (apps) { read_shm_apps_table(maps_per_core); diff --git a/collectors/ebpf.plugin/ebpf_socket.c b/collectors/ebpf.plugin/ebpf_socket.c index 2cad8bdf..e4798b30 100644 --- a/collectors/ebpf.plugin/ebpf_socket.c +++ b/collectors/ebpf.plugin/ebpf_socket.c @@ -2205,33 +2205,25 @@ void *ebpf_socket_read_hash(void *ptr) /** * Read the hash table and store data to allocated vectors. * + * @param stats vector used to read data from control table. * @param maps_per_core do I need to read all cores? */ -static void read_hash_global_tables(int maps_per_core) +static void ebpf_socket_read_hash_global_tables(netdata_idx_t *stats, int maps_per_core) { - uint64_t idx; netdata_idx_t res[NETDATA_SOCKET_COUNTER]; - - netdata_idx_t *val = socket_hash_values; - size_t length = sizeof(netdata_idx_t); - if (maps_per_core) - length *= ebpf_nprocs; - - int fd = socket_maps[NETDATA_SOCKET_GLOBAL].map_fd; - for (idx = 0; idx < NETDATA_SOCKET_COUNTER; idx++) { - if (!bpf_map_lookup_elem(fd, &idx, val)) { - uint64_t total = 0; - int i; - int end = (maps_per_core) ? ebpf_nprocs : 1; - for (i = 0; i < end; i++) - total += val[i]; - - res[idx] = total; - memset(socket_hash_values, 0, length); - } else { - res[idx] = 0; - } - } + ebpf_read_global_table_stats(res, + socket_hash_values, + socket_maps[NETDATA_SOCKET_GLOBAL].map_fd, + maps_per_core, + NETDATA_KEY_CALLS_TCP_SENDMSG, + NETDATA_SOCKET_COUNTER); + + ebpf_read_global_table_stats(stats, + socket_hash_values, + socket_maps[NETDATA_SOCKET_TABLE_CTRL].map_fd, + maps_per_core, + NETDATA_CONTROLLER_PID_TABLE_ADD, + NETDATA_CONTROLLER_END); socket_aggregated_data[NETDATA_IDX_TCP_SENDMSG].call = res[NETDATA_KEY_CALLS_TCP_SENDMSG]; socket_aggregated_data[NETDATA_IDX_TCP_CLEANUP_RBUF].call = res[NETDATA_KEY_CALLS_TCP_CLEANUP_RBUF]; @@ -2930,6 +2922,8 @@ static void socket_collector(ebpf_module_t *em) int counter = update_every - 1; uint32_t running_time = 0; uint32_t lifetime = em->lifetime; + netdata_idx_t *stats = em->hash_table_stats; + memset(stats, 0, sizeof(em->hash_table_stats)); while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -2939,7 +2933,7 @@ static void socket_collector(ebpf_module_t *em) netdata_apps_integration_flags_t socket_apps_enabled = em->apps_charts; if (socket_global_enabled) { read_listen_table(); - read_hash_global_tables(maps_per_core); + ebpf_socket_read_hash_global_tables(stats, maps_per_core); } pthread_mutex_lock(&collect_data_mutex); diff --git a/collectors/ebpf.plugin/ebpf_swap.c b/collectors/ebpf.plugin/ebpf_swap.c index 9a1640a3..359fe230 100644 --- a/collectors/ebpf.plugin/ebpf_swap.c +++ b/collectors/ebpf.plugin/ebpf_swap.c @@ -322,13 +322,13 @@ void ebpf_obsolete_swap_apps_charts(struct ebpf_module *em) */ static void ebpf_obsolete_swap_global(ebpf_module_t *em) { - ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, NETDATA_MEM_SWAP_CHART, "Calls to access swap memory", EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NULL, - NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS, + NETDATA_CHART_PRIO_MEM_SWAP_CALLS, em->update_every); } @@ -507,7 +507,7 @@ static void read_swap_apps_table(int maps_per_core) */ static void swap_send_global() { - write_io_chart(NETDATA_MEM_SWAP_CHART, NETDATA_EBPF_SYSTEM_GROUP, + write_io_chart(NETDATA_MEM_SWAP_CHART, NETDATA_EBPF_MEMORY_GROUP, swap_publish_aggregated[NETDATA_KEY_SWAP_WRITEPAGE_CALL].dimension, (long long) swap_hash_values[NETDATA_KEY_SWAP_WRITEPAGE_CALL], swap_publish_aggregated[NETDATA_KEY_SWAP_READPAGE_CALL].dimension, @@ -519,26 +519,24 @@ static void swap_send_global() * * Read the table with number of calls to all functions * + * @param stats vector used to read data from control table. * @param maps_per_core do I need to read all cores? */ -static void ebpf_swap_read_global_table(int maps_per_core) -{ - netdata_idx_t *stored = swap_values; - netdata_idx_t *val = swap_hash_values; - int fd = swap_maps[NETDATA_SWAP_GLOBAL_TABLE].map_fd; - - uint32_t i, end = NETDATA_SWAP_END; - for (i = NETDATA_KEY_SWAP_READPAGE_CALL; i < end; i++) { - if (!bpf_map_lookup_elem(fd, &i, stored)) { - int j; - int last = (maps_per_core) ? ebpf_nprocs : 1; - netdata_idx_t total = 0; - for (j = 0; j < last; j++) - total += stored[j]; - - val[i] = total; - } - } +static void ebpf_swap_read_global_table(netdata_idx_t *stats, int maps_per_core) +{ + ebpf_read_global_table_stats(swap_hash_values, + swap_values, + swap_maps[NETDATA_SWAP_GLOBAL_TABLE].map_fd, + maps_per_core, + NETDATA_KEY_SWAP_READPAGE_CALL, + NETDATA_SWAP_END); + + ebpf_read_global_table_stats(stats, + swap_values, + swap_maps[NETDATA_SWAP_CONTROLLER].map_fd, + maps_per_core, + NETDATA_CONTROLLER_PID_TABLE_ADD, + NETDATA_CONTROLLER_END); } /** @@ -804,6 +802,8 @@ static void swap_collector(ebpf_module_t *em) int maps_per_core = em->maps_per_core; uint32_t running_time = 0; uint32_t lifetime = em->lifetime; + netdata_idx_t *stats = em->hash_table_stats; + memset(stats, 0, sizeof(em->hash_table_stats)); while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -811,7 +811,7 @@ static void swap_collector(ebpf_module_t *em) counter = 0; netdata_apps_integration_flags_t apps = em->apps_charts; - ebpf_swap_read_global_table(maps_per_core); + ebpf_swap_read_global_table(stats, maps_per_core); pthread_mutex_lock(&collect_data_mutex); if (apps) read_swap_apps_table(maps_per_core); @@ -914,12 +914,12 @@ static void ebpf_swap_allocate_global_vectors(int apps) */ static void ebpf_create_swap_charts(int update_every) { - ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_MEM_SWAP_CHART, + ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_MEM_SWAP_CHART, "Calls to access swap memory", EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU, NULL, NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS, + NETDATA_CHART_PRIO_MEM_SWAP_CALLS, ebpf_create_global_dimension, swap_publish_aggregated, NETDATA_SWAP_END, update_every, NETDATA_EBPF_MODULE_NAME_SWAP); diff --git a/collectors/ebpf.plugin/ebpf_vfs.c b/collectors/ebpf.plugin/ebpf_vfs.c index 5747a240..e566e169 100644 --- a/collectors/ebpf.plugin/ebpf_vfs.c +++ b/collectors/ebpf.plugin/ebpf_vfs.c @@ -964,30 +964,25 @@ static void ebpf_vfs_send_data(ebpf_module_t *em) /** * Read the hash table and store data to allocated vectors. * + * @param stats vector used to read data from control table. * @param maps_per_core do I need to read all cores? */ -static void ebpf_vfs_read_global_table(int maps_per_core) +static void ebpf_vfs_read_global_table(netdata_idx_t *stats, int maps_per_core) { - uint64_t idx; netdata_idx_t res[NETDATA_VFS_COUNTER]; - - netdata_idx_t *val = vfs_hash_values; - size_t length = sizeof(netdata_idx_t); - if (maps_per_core) - length *= ebpf_nprocs; - - int fd = vfs_maps[NETDATA_VFS_ALL].map_fd; - for (idx = 0; idx < NETDATA_VFS_COUNTER; idx++) { - uint64_t total = 0; - if (!bpf_map_lookup_elem(fd, &idx, val)) { - int i; - int end = (maps_per_core) ? ebpf_nprocs : 1; - for (i = 0; i < end; i++) - total += val[i]; - } - res[idx] = total; - memset(val, 0, length); - } + ebpf_read_global_table_stats(res, + vfs_hash_values, + vfs_maps[NETDATA_VFS_ALL].map_fd, + maps_per_core, + NETDATA_KEY_CALLS_VFS_WRITE, + NETDATA_VFS_COUNTER); + + ebpf_read_global_table_stats(stats, + vfs_hash_values, + vfs_maps[NETDATA_VFS_CTRL].map_fd, + maps_per_core, + NETDATA_CONTROLLER_PID_TABLE_ADD, + NETDATA_CONTROLLER_END); vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK].ncall = res[NETDATA_KEY_CALLS_VFS_UNLINK]; vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].ncall = res[NETDATA_KEY_CALLS_VFS_READ] + @@ -1963,6 +1958,8 @@ static void vfs_collector(ebpf_module_t *em) int maps_per_core = em->maps_per_core; uint32_t running_time = 0; uint32_t lifetime = em->lifetime; + netdata_idx_t *stats = em->hash_table_stats; + memset(stats, 0, sizeof(em->hash_table_stats)); while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -1970,7 +1967,7 @@ static void vfs_collector(ebpf_module_t *em) counter = 0; netdata_apps_integration_flags_t apps = em->apps_charts; - ebpf_vfs_read_global_table(maps_per_core); + ebpf_vfs_read_global_table(stats, maps_per_core); pthread_mutex_lock(&collect_data_mutex); if (apps) ebpf_vfs_read_apps(maps_per_core); diff --git a/collectors/ebpf.plugin/metadata.yaml b/collectors/ebpf.plugin/metadata.yaml new file mode 100644 index 00000000..23232677 --- /dev/null +++ b/collectors/ebpf.plugin/metadata.yaml @@ -0,0 +1,3308 @@ +plugin_name: ebpf.plugin +modules: + - meta: + plugin_name: ebpf.plugin + module_name: filedescriptor + monitored_instance: + name: eBPF Filedescriptor + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: + - plugin_name: apps.plugin + module_name: apps + - plugin_name: cgroups.plugin + module_name: cgroups + info_provided_to_referring_integrations: + description: "" + keywords: + - file + - eBPF + - fd + - open + - close + most_popular: false + overview: + data_collection: + metrics_description: "Monitor calls for functions responsible to open or close a file descriptor and possible errors." + method_description: "Attach tracing (kprobe and trampoline) to internal kernel functions according options used to compile kernel." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netdata sets necessary permissions during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "Depending of kernel version and frequency that files are open and close, this thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + configuration: + file: + name: "ebpf.d/fd.conf" + description: "Overwrite default configuration helping to reduce memory usage. You can also select charts visible on dashboard." + options: + description: | + All options are defined inside section `[global]`. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: apps + description: Enable or disable integration with apps.plugin + default_value: no + required: false + - name: cgroups + description: Enable or disable integration with cgroup.plugin + default_value: no + required: false + - name: pid table size + description: Number of elements stored inside hash tables used to monitor calls per PID. + default_value: 32768 + required: false + - name: ebpf type format + description: "Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load)." + default_value: auto + required: false + - name: ebpf co-re tracing + description: "Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code)." + default_value: trampoline + required: false + - name: maps per core + description: Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. + default_value: yes + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "These Metrics show grouped information per cgroup/service." + labels: [] + metrics: + - name: cgroup.fd_open + description: Number of open files + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: cgroup.fd_open_error + description: Fails to open files + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: cgroup.fd_closed + description: Files closed + unit: "calls/s" + chart_type: line + dimensions: + - name: close + - name: cgroup.fd_close_error + description: Fails to close files + unit: "calls/s" + chart_type: line + dimensions: + - name: close + - name: services.file_open + description: Number of open files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.file_open_error + description: Fails to open files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.file_closed + description: Files closed + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.file_close_error + description: Fails to close files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: global + description: "These metrics show total number of calls to functions inside kernel." + labels: [] + metrics: + - name: filesystem.file_descriptor + description: Open and close calls + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: close + - name: filesystem.file_error + description: Open fails + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: close + - name: apps + description: "These Metrics show grouped information per apps group." + labels: [] + metrics: + - name: apps.file_open + description: Number of open files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.file_open_error + description: Fails to open files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.file_closed + description: Files closed + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.file_close_error + description: Fails to close files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - meta: + plugin_name: ebpf.plugin + module_name: processes + monitored_instance: + name: eBPF Processes + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: + - plugin_name: apps.plugin + module_name: apps + - plugin_name: cgroups.plugin + module_name: cgroups + info_provided_to_referring_integrations: + description: "" + keywords: + - thread + - fork + - process + - eBPF + most_popular: false + overview: + data_collection: + metrics_description: "Monitor calls for function creating tasks (threads and processes) inside Linux kernel." + method_description: "Attach tracing (kprobe or tracepoint, and trampoline) to internal kernel functions." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that an internal kernel function monitored by this thread is called." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + - title: Debug Filesystem + description: | + This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`). + configuration: + file: + name: "ebpf.d/process.conf" + description: "Overwrite default configuration helping to reduce memory usage. You can also select charts visible on dashboard." + options: + description: | + All options are defined inside section `[global]`. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: apps + description: Enable or disable integration with apps.plugin + default_value: no + required: false + - name: cgroups + description: Enable or disable integration with cgroup.plugin + default_value: no + required: false + - name: pid table size + description: Number of elements stored inside hash tables used to monitor calls per PID. + default_value: 32768 + required: false + - name: ebpf type format + description: "Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load)." + default_value: auto + required: false + - name: ebpf co-re tracing + description: "Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). This plugin will always try to attach a tracepoint, so option here will impact only function used to monitor task (thread and process) creation." + default_value: trampoline + required: false + - name: maps per core + description: Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. + default_value: yes + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics show total number of calls to functions inside kernel." + labels: [] + metrics: + - name: system.process_thread + description: Start process + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: system.process_status + description: Process not closed + unit: "difference" + chart_type: line + dimensions: + - name: process + - name: zombie + - name: system.exit + description: Exit process + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: system.task_error + description: Fails to create process + unit: "calls/s" + chart_type: line + dimensions: + - name: task + - name: apps + description: "These Metrics show grouped information per apps group." + labels: [] + metrics: + - name: apps.process_create + description: Process started + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.thread_create + description: Threads started + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.task_exit + description: Tasks starts exit process + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.task_close + description: Tasks closed + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.task_error + description: Errors to create process or threads + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: cgroup + description: "These Metrics show grouped information per cgroup/service." + labels: [] + metrics: + - name: cgroup.process_create + description: Process started + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: cgroup.thread_create + description: Threads started + unit: "calls/s" + chart_type: line + dimensions: + - name: thread + - name: cgroup.task_exit + description: Tasks starts exit process + unit: "calls/s" + chart_type: line + dimensions: + - name: exit + - name: cgroup.task_close + description: Tasks closed + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: cgroup.task_error + description: Errors to create process or threads + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: services.process_create + description: Process started + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.thread_create + description: Threads started + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.task_close + description: Tasks starts exit process + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.task_exit + description: Tasks closed + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.task_error + description: Errors to create process or threads + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - meta: + plugin_name: ebpf.plugin + module_name: disk + monitored_instance: + name: eBPF Disk + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - hard Disk + - eBPF + - latency + - partition + most_popular: false + overview: + data_collection: + metrics_description: "Measure latency for I/O events on disk." + method_description: "Attach tracepoints to internal kernel functions." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that an internal kernel function monitored by this thread is called." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + - title: Debug Filesystem + description: | + This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`).` + configuration: + file: + name: "ebpf.d/disk.conf" + description: "Overwrite default configuration reducing number of I/O events." + options: + description: | + All options are defined inside section `[global]`. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: disk + description: "These metrics measure latency for I/O events on every hard disk present on host." + labels: [] + metrics: + - name: disk.latency_io + description: Disk latency + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency + - meta: + plugin_name: ebpf.plugin + module_name: hardirq + monitored_instance: + name: eBPF Hardirq + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - HardIRQ + - eBPF + most_popular: false + overview: + data_collection: + metrics_description: "Monitor latency for each HardIRQ available." + method_description: "Attach tracepoints to internal kernel functions." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that an internal kernel function monitored by this thread is called." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + - title: Debug Filesystem + description: | + This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`). + configuration: + file: + name: "ebpf.d/hardirq.conf" + description: "Overwrite default configuration reducing number of I/O events." + options: + description: | + All options are defined inside section `[global]`. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics show latest timestamp for each hardIRQ available on host." + labels: [] + metrics: + - name: system.hardirq_latency + description: Hard IRQ latency + unit: "milliseconds" + chart_type: stacked + dimensions: + - name: hardirq names + - meta: + plugin_name: ebpf.plugin + module_name: cachestat + monitored_instance: + name: eBPF Cachestat + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: + - plugin_name: apps.plugin + module_name: apps + - plugin_name: cgroups.plugin + module_name: cgroups + info_provided_to_referring_integrations: + description: "" + keywords: + - Page cache + - Hit ratio + - eBPF + most_popular: false + overview: + data_collection: + metrics_description: "Monitor Linux page cache events giving for users a general vision about how his kernel is manipulating files." + method_description: "Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + configuration: + file: + name: "ebpf.d/cachestat.conf" + description: "Overwrite default configuration helping to reduce memory usage. You can also select charts visible on dashboard." + options: + description: | + All options are defined inside section `[global]`. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: apps + description: Enable or disable integration with apps.plugin + default_value: no + required: false + - name: cgroups + description: Enable or disable integration with cgroup.plugin + default_value: no + required: false + - name: pid table size + description: Number of elements stored inside hash tables used to monitor calls per PID. + default_value: 32768 + required: false + - name: ebpf type format + description: "Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load)." + default_value: auto + required: false + - name: ebpf co-re tracing + description: "Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code)." + default_value: trampoline + required: false + - name: maps per core + description: Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. + default_value: yes + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics show total number of calls to functions inside kernel." + labels: [] + metrics: + - name: mem.cachestat_ratio + description: Hit ratio + unit: "%" + chart_type: line + dimensions: + - name: ratio + - name: mem.cachestat_dirties + description: Number of dirty pages + unit: "page/s" + chart_type: line + dimensions: + - name: dirty + - name: mem.cachestat_hits + description: Number of accessed files + unit: "hits/s" + chart_type: line + dimensions: + - name: hit + - name: mem.cachestat_misses + description: Files out of page cache + unit: "misses/s" + chart_type: line + dimensions: + - name: miss + - name: apps + description: "These Metrics show grouped information per apps group." + labels: [] + metrics: + - name: apps.cachestat_ratio + description: Hit ratio + unit: "%" + chart_type: line + dimensions: + - name: a dimension per app group + - name: apps.cachestat_dirties + description: Number of dirty pages + unit: "page/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.cachestat_hits + description: Number of accessed files + unit: "hits/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.cachestat_misses + description: Files out of page cache + unit: "misses/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.cachestat_ratio + description: Hit ratio + unit: "%" + chart_type: line + dimensions: + - name: ratio + - name: cgroup.cachestat_dirties + description: Number of dirty pages + unit: "page/s" + chart_type: line + dimensions: + - name: dirty + - name: cgroup.cachestat_hits + description: Number of accessed files + unit: "hits/s" + chart_type: line + dimensions: + - name: hit + - name: cgroup.cachestat_misses + description: Files out of page cache + unit: "misses/s" + chart_type: line + dimensions: + - name: miss + - name: services.cachestat_ratio + description: Hit ratio + unit: "%" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.cachestat_dirties + description: Number of dirty pages + unit: "page/s" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.cachestat_hits + description: Number of accessed files + unit: "hits/s" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.cachestat_misses + description: Files out of page cache + unit: "misses/s" + chart_type: line + dimensions: + - name: a dimension per systemd service + - meta: + plugin_name: ebpf.plugin + module_name: sync + monitored_instance: + name: eBPF Sync + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - syscall + - eBPF + - hard disk + - memory + most_popular: false + overview: + data_collection: + metrics_description: "Monitor syscall responsible to move data from memory to storage device." + method_description: "Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT, CONFIG_HAVE_SYSCALL_TRACEPOINTS), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + - title: Debug Filesystem + description: | + This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug`). + configuration: + file: + name: "ebpf.d/sync.conf" + description: "Overwrite default configuration and allows user to select charts visible on dashboard." + options: + description: | + This configuration file have two different sections. The `[global]` overwrites all default options, while `[syscalls]` allow user to select the syscall to monitor. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: apps + description: Enable or disable integration with apps.plugin + default_value: no + required: false + - name: cgroups + description: Enable or disable integration with cgroup.plugin + default_value: no + required: false + - name: pid table size + description: Number of elements stored inside hash tables used to monitor calls per PID. + default_value: 32768 + required: false + - name: ebpf type format + description: "Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load)." + default_value: auto + required: false + - name: ebpf co-re tracing + description: "Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code)." + default_value: trampoline + required: false + - name: maps per core + description: Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. + default_value: yes + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + - name: sync + description: Enable or disable monitoring for syscall `sync` + default_value: yes + required: false + - name: msync + description: Enable or disable monitoring for syscall `msync` + default_value: yes + required: false + - name: fsync + description: Enable or disable monitoring for syscall `fsync` + default_value: yes + required: false + - name: fdatasync + description: Enable or disable monitoring for syscall `fdatasync` + default_value: yes + required: false + - name: syncfs + description: Enable or disable monitoring for syscall `syncfs` + default_value: yes + required: false + - name: sync_file_range + description: Enable or disable monitoring for syscall `sync_file_range` + default_value: yes + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: sync_freq + link: https://github.com/netdata/netdata/blob/master/health/health.d/synchronization.conf + metric: mem.sync + info: + number of sync() system calls. Every call causes all pending modifications to filesystem metadata and cached file data to be written to the + underlying filesystems. + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics show total number of calls to functions inside kernel." + labels: [] + metrics: + - name: mem.file_sync + description: Monitor calls for <code>fsync(2)</code> and <code>fdatasync(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: fsync + - name: fdatasync + - name: mem.meory_map + description: Monitor calls for <code>msync(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: msync + - name: mem.sync + description: Monitor calls for <code>sync(2)</code> and <code>syncfs(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: sync + - name: syncfs + - name: mem.file_segment + description: Monitor calls for <code>sync_file_range(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: sync_file_range + - meta: + plugin_name: ebpf.plugin + module_name: mdflush + monitored_instance: + name: eBPF MDflush + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - MD + - RAID + - eBPF + most_popular: false + overview: + data_collection: + metrics_description: "Monitor when flush events happen between disks." + method_description: "Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that `md_flush_request` is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + configuration: + file: + name: "ebpf.d/mdflush.conf" + description: "Overwrite default configuration reducing I/O events." + options: + description: | + All options are defined inside section `[global]`. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "Number of times md_flush_request was called since last time." + labels: [] + metrics: + - name: mdstat.mdstat_flush + description: MD flushes + unit: "flushes" + chart_type: stacked + dimensions: + - name: disk + - meta: + plugin_name: ebpf.plugin + module_name: swap + monitored_instance: + name: eBPF SWAP + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: + - plugin_name: apps.plugin + module_name: apps + - plugin_name: cgroups.plugin + module_name: cgroups + info_provided_to_referring_integrations: + description: "" + keywords: + - SWAP + - memory + - eBPF + - Hard Disk + most_popular: false + overview: + data_collection: + metrics_description: "Monitors when swap has I/O events and applications executing events." + method_description: "Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + configuration: + file: + name: "ebpf.d/swap.conf" + description: "Overwrite default configuration helping to reduce memory usage. You can also select charts visible on dashboard." + options: + description: | + All options are defined inside section `[global]`. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: apps + description: Enable or disable integration with apps.plugin + default_value: no + required: false + - name: cgroups + description: Enable or disable integration with cgroup.plugin + default_value: no + required: false + - name: pid table size + description: Number of elements stored inside hash tables used to monitor calls per PID. + default_value: 32768 + required: false + - name: ebpf type format + description: "Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load)." + default_value: auto + required: false + - name: ebpf co-re tracing + description: "Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code)." + default_value: trampoline + required: false + - name: maps per core + description: Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. + default_value: yes + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "These Metrics show grouped information per cgroup/service." + labels: [] + metrics: + - name: cgroup.swap_read + description: Calls to function <code>swap_readpage</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: cgroup.swap_write + description: Calls to function <code>swap_writepage</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: write + - name: services.swap_read + description: Calls to <code>swap_readpage</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.swap_write + description: Calls to function <code>swap_writepage</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: apps + description: "These Metrics show grouped information per apps group." + labels: [] + metrics: + - name: apps.swap_read_call + description: Calls to function <code>swap_readpage</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.swap_write_call + description: Calls to function <code>swap_writepage</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: global + description: "These metrics show total number of calls to functions inside kernel." + labels: [] + metrics: + - name: mem.swapcalls + description: Calls to access swap memory + unit: "calls/s" + chart_type: line + dimensions: + - name: write + - name: read + - meta: + plugin_name: ebpf.plugin + module_name: oomkill + monitored_instance: + name: eBPF OOMkill + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: + - plugin_name: apps.plugin + module_name: apps + - plugin_name: cgroups.plugin + module_name: cgroups + info_provided_to_referring_integrations: + description: "" + keywords: + - application + - memory + most_popular: false + overview: + data_collection: + metrics_description: "Monitor applications that reach out of memory." + method_description: "Attach tracepoint to internal kernel functions." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that an internal kernel function monitored by this thread is called." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + - title: Debug Filesystem + description: | + This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`). + configuration: + file: + name: "ebpf.d/oomkill.conf" + description: "Overwrite default configuration reducing number of I/O events." + options: + description: | + Overwrite default configuration reducing number of I/O events + folding: + title: "Config options" + enabled: true + list: [] + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "These metrics show cgroup/service that reached OOM." + labels: [] + metrics: + - name: cgroup.oomkills + description: OOM kills. This chart is provided by eBPF plugin. + unit: "kills" + chart_type: line + dimensions: + - name: cgroup name + - name: services.oomkills + description: OOM kills. This chart is provided by eBPF plugin. + unit: "kills" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: apps + description: "These metrics show cgroup/service that reached OOM." + labels: [] + metrics: + - name: apps.oomkills + description: OOM kills + unit: "kills" + chart_type: stacked + dimensions: + - name: a dimension per app group + - meta: + plugin_name: ebpf.plugin + module_name: socket + monitored_instance: + name: eBPF Socket + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: + - plugin_name: apps.plugin + module_name: apps + - plugin_name: cgroups.plugin + module_name: cgroups + info_provided_to_referring_integrations: + description: "" + keywords: + - TCP + - UDP + - bandwidth + - server + - connection + - socket + most_popular: false + overview: + data_collection: + metrics_description: "Monitor bandwidth consumption per application for protocols TCP and UDP." + method_description: "Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + configuration: + file: + name: "ebpf.d/network.conf" + description: "Overwrite default configuration helping to reduce memory usage. You can also select charts visible on dashboard." + options: + description: | + All options are defined inside section `[global]`. Options inside `network connections` are ignored for while. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: apps + description: Enable or disable integration with apps.plugin + default_value: no + required: false + - name: cgroups + description: Enable or disable integration with cgroup.plugin + default_value: no + required: false + - name: bandwidth table size + description: Number of elements stored inside hash tables used to monitor calls per PID. + default_value: 16384 + required: false + - name: ipv4 connection table size + description: Number of elements stored inside hash tables used to monitor calls per IPV4 connections. + default_value: 16384 + required: false + - name: ipv6 connection table size + description: Number of elements stored inside hash tables used to monitor calls per IPV6 connections. + default_value: 16384 + required: false + - name: udp connection table size + description: Number of temporary elements stored inside hash tables used to monitor UDP connections. + default_value: 4096 + required: false + - name: ebpf type format + description: "Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load)." + default_value: auto + required: false + - name: ebpf co-re tracing + description: "Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code)." + default_value: trampoline + required: false + - name: maps per core + description: Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. + default_value: yes + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics show total number of calls to functions inside kernel." + labels: [] + metrics: + - name: ip.inbound_conn + description: Inbound connections. + unit: "connections/s" + chart_type: line + dimensions: + - name: connection_tcp + - name: ip.tcp_outbound_conn + description: TCP outbound connections. + unit: "connections/s" + chart_type: line + dimensions: + - name: received + - name: ip.tcp_functions + description: Calls to internal functions + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: closed + - name: ip.total_tcp_bandwidth + description: TCP bandwidth + unit: "kilobits/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: ip.tcp_error + description: TCP errors + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: ip.tcp_retransmit + description: Packages retransmitted + unit: "calls/s" + chart_type: line + dimensions: + - name: retransmited + - name: ip.udp_functions + description: UDP calls + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: ip.total_udp_bandwidth + description: UDP bandwidth + unit: "kilobits/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: ip.udp_error + description: UDP errors + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: apps + description: "These metrics show grouped information per apps group." + labels: [] + metrics: + - name: apps.outbound_conn_v4 + description: Calls to tcp_v4_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.outbound_conn_v6 + description: Calls to tcp_v6_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.total_bandwidth_sent + description: Bytes sent + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.total_bandwidth_recv + description: bytes received + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_tcp_send + description: Calls for tcp_sendmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_tcp_recv + description: Calls for tcp_cleanup_rbuf + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_tcp_retransmit + description: Calls for tcp_retransmit + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_udp_send + description: Calls for udp_sendmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_udp_recv + description: Calls for udp_recvmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: services.net_conn_ipv4 + description: Calls to tcp_v4_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.net_conn_ipv4 + description: Calls to tcp_v4_connection + unit: "connections/s" + chart_type: line + dimensions: + - name: connected_v4 + - name: cgroup.net_conn_ipv6 + description: Calls to tcp_v6_connection + unit: "connections/s" + chart_type: line + dimensions: + - name: connected_v6 + - name: cgroup.net_bytes_recv + description: Bytes received + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: cgroup.net_bytes_sent + description: Bytes sent + unit: "calls/s" + chart_type: line + dimensions: + - name: sent + - name: cgroup.net_tcp_recv + description: Calls to tcp_cleanup_rbuf. + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: cgroup.net_tcp_send + description: Calls to tcp_sendmsg. + unit: "calls/s" + chart_type: line + dimensions: + - name: sent + - name: cgroup.net_retransmit + description: Calls to tcp_retransmit. + unit: "calls/s" + chart_type: line + dimensions: + - name: retransmitted + - name: cgroup.net_udp_send + description: Calls to udp_sendmsg + unit: "calls/s" + chart_type: line + dimensions: + - name: sent + - name: cgroup.net_udp_recv + description: Calls to udp_recvmsg + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: services.net_conn_ipv6 + description: Calls to tcp_v6_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_bytes_recv + description: Bytes received + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_bytes_sent + description: Bytes sent + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_tcp_recv + description: Calls to tcp_cleanup_rbuf. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_tcp_send + description: Calls to tcp_sendmsg. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_tcp_retransmit + description: Calls to tcp_retransmit + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_udp_send + description: Calls to udp_sendmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_udp_recv + description: Calls to udp_recvmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - meta: + plugin_name: ebpf.plugin + module_name: dcstat + monitored_instance: + name: eBPF DCstat + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: + - plugin_name: apps.plugin + module_name: apps + - plugin_name: cgroups.plugin + module_name: cgroups + info_provided_to_referring_integrations: + description: "" + keywords: + - Directory Cache + - File system + - eBPF + most_popular: false + overview: + data_collection: + metrics_description: "Monitor directory cache events per application given an overall vision about files on memory or storage device." + method_description: "Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + configuration: + file: + name: "ebpf.d/dcstat.conf" + description: "Overwrite default configuration helping to reduce memory usage. You can also select charts visible on dashboard." + options: + description: | + All options are defined inside section `[global]`. + folding: + title: "Config option" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: apps + description: Enable or disable integration with apps.plugin + default_value: no + required: false + - name: cgroups + description: Enable or disable integration with cgroup.plugin + default_value: no + required: false + - name: pid table size + description: Number of elements stored inside hash tables used to monitor calls per PID. + default_value: 32768 + required: false + - name: ebpf type format + description: "Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load)." + default_value: auto + required: false + - name: ebpf co-re tracing + description: "Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code)." + default_value: trampoline + required: false + - name: maps per core + description: Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. + default_value: yes + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: apps + description: "These Metrics show grouped information per apps group." + labels: [] + metrics: + - name: apps.dc_ratio + description: Percentage of files inside directory cache + unit: "%" + chart_type: line + dimensions: + - name: a dimension per app group + - name: apps.dc_reference + description: Count file access + unit: "files" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.dc_not_cache + description: Files not present inside directory cache + unit: "files" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.dc_not_found + description: Files not found + unit: "files" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: filesystem + description: "These metrics show total number of calls to functions inside kernel." + labels: [] + metrics: + - name: filesystem.dc_reference + description: Variables used to calculate hit ratio. + unit: "files" + chart_type: line + dimensions: + - name: reference + - name: slow + - name: miss + - name: filesystem.dc_hit_ratio + description: Percentage of files inside directory cache + unit: "%" + chart_type: line + dimensions: + - name: ratio + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.dc_ratio + description: Percentage of files inside directory cache + unit: "%" + chart_type: line + dimensions: + - name: ratio + - name: cgroup.dc_reference + description: Count file access + unit: "files" + chart_type: line + dimensions: + - name: reference + - name: cgroup.dc_not_cache + description: Files not present inside directory cache + unit: "files" + chart_type: line + dimensions: + - name: slow + - name: cgroup.dc_not_found + description: Files not found + unit: "files" + chart_type: line + dimensions: + - name: miss + - name: services.dc_ratio + description: Percentage of files inside directory cache + unit: "%" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.dc_reference + description: Count file access + unit: "files" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.dc_not_cache + description: Files not present inside directory cache + unit: "files" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.dc_not_found + description: Files not found + unit: "files" + chart_type: line + dimensions: + - name: a dimension per systemd service + - meta: + plugin_name: ebpf.plugin + module_name: filesystem + monitored_instance: + name: eBPF Filesystem + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - Filesystem + - ext4 + - btrfs + - nfs + - xfs + - zfs + - eBPF + - latency + - I/O + most_popular: false + overview: + data_collection: + metrics_description: "Monitor latency for main actions on filesystem like I/O events." + method_description: "Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + configuration: + file: + name: "ebpf.d/filesystem.conf" + description: "Overwrite default configuration and allows user to select charts visible on dashboard." + options: + description: | + This configuration file have two different sections. The `[global]` overwrites default options, while `[filesystem]` allow user to select the filesystems to monitor. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + - name: btrfsdist + description: Enable or disable latency monitoring for functions associated with btrfs filesystem. + default_value: yes + required: false + - name: ext4dist + description: Enable or disable latency monitoring for functions associated with ext4 filesystem. + default_value: yes + required: false + - name: nfsdist + description: Enable or disable latency monitoring for functions associated with nfs filesystem. + default_value: yes + required: false + - name: xfsdist + description: Enable or disable latency monitoring for functions associated with xfs filesystem. + default_value: yes + required: false + - name: zfsdist + description: Enable or disable latency monitoring for functions associated with zfs filesystem. + default_value: yes + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: filesystem + description: "Latency charts associate with filesystem actions." + labels: [] + metrics: + - name: filesystem.read_latency + description: ext4 latency for each read request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - name: filesystem.open_latency + description: ext4 latency for each open request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - name: filesystem.sync_latency + description: ext4 latency for each sync request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - name: iilesystem + description: "" + labels: [] + metrics: + - name: filesystem.write_latency + description: ext4 latency for each write request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - name: global + description: "" + labels: [] + metrics: + - name: filesystem.attributte_latency + description: nfs latency for each attribute request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - meta: + plugin_name: ebpf.plugin + module_name: shm + monitored_instance: + name: eBPF SHM + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: + - plugin_name: apps.plugin + module_name: apps + - plugin_name: cgroups.plugin + module_name: cgroups + info_provided_to_referring_integrations: + description: "" + keywords: + - syscall + - shared memory + - eBPF + most_popular: false + overview: + data_collection: + metrics_description: "Monitor syscall responsible to manipulate shared memory." + method_description: "Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + - title: Debug Filesystem + description: | + This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`).` + configuration: + file: + name: "ebpf.d/shm.conf" + description: "Overwrite default configuration and allows user to select charts visible on dashboard." + options: + description: | + This configuration file have two different sections. The `[global]` overwrites all default options, while `[syscalls]` allow user to select the syscall to monitor. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: apps + description: Enable or disable integration with apps.plugin + default_value: no + required: false + - name: cgroups + description: Enable or disable integration with cgroup.plugin + default_value: no + required: false + - name: pid table size + description: Number of elements stored inside hash tables used to monitor calls per PID. + default_value: 32768 + required: false + - name: ebpf type format + description: "Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load)." + default_value: auto + required: false + - name: ebpf co-re tracing + description: "Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code)." + default_value: trampoline + required: false + - name: maps per core + description: Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. + default_value: yes + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + - name: shmget + description: Enable or disable monitoring for syscall `shmget` + default_value: yes + required: false + - name: shmat + description: Enable or disable monitoring for syscall `shmat` + default_value: yes + required: false + - name: shmdt + description: Enable or disable monitoring for syscall `shmdt` + default_value: yes + required: false + - name: shmctl + description: Enable or disable monitoring for syscall `shmctl` + default_value: yes + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "These Metrics show grouped information per cgroup/service." + labels: [] + metrics: + - name: cgroup.shmget + description: Calls to syscall <code>shmget(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: get + - name: cgroup.shmat + description: Calls to syscall <code>shmat(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: at + - name: cgroup.shmdt + description: Calls to syscall <code>shmdt(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: dt + - name: cgroup.shmctl + description: Calls to syscall <code>shmctl(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: ctl + - name: services.shmget + description: Calls to syscall <code>shmget(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.shmat + description: Calls to syscall <code>shmat(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.shmdt + description: Calls to syscall <code>shmdt(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.shmctl + description: Calls to syscall <code>shmctl(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: apps + description: "These Metrics show grouped information per apps group." + labels: [] + metrics: + - name: apps.shmget_call + description: Calls to syscall <code>shmget(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.shmat_call + description: Calls to syscall <code>shmat(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.shmdt_call + description: Calls to syscall <code>shmdt(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.shmctl_call + description: Calls to syscall <code>shmctl(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: global + description: "These Metrics show number of calls for specified syscall." + labels: [] + metrics: + - name: system.shared_memory_calls + description: Calls to shared memory system calls + unit: "calls/s" + chart_type: line + dimensions: + - name: get + - name: at + - name: dt + - name: ctl + - meta: + plugin_name: ebpf.plugin + module_name: softirq + monitored_instance: + name: eBPF SoftIRQ + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - SoftIRQ + - eBPF + most_popular: false + overview: + data_collection: + metrics_description: "Monitor latency for each SoftIRQ available." + method_description: "Attach kprobe to internal kernel functions." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that an internal kernel function monitored by this thread is called." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + - title: Debug Filesystem + description: | + This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`).` + configuration: + file: + name: "ebpf.d/softirq.conf" + description: "Overwrite default configuration reducing number of I/O events." + options: + description: | + All options are defined inside section `[global]`. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics show latest timestamp for each softIRQ available on host." + labels: [] + metrics: + - name: system.softirq_latency + description: Soft IRQ latency + unit: "milliseconds" + chart_type: stacked + dimensions: + - name: soft IRQs + - meta: + plugin_name: ebpf.plugin + module_name: mount + monitored_instance: + name: eBPF Mount + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - mount + - umount + - device + - eBPF + most_popular: false + overview: + data_collection: + metrics_description: "Monitor calls for mount and umount syscall." + method_description: "Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT, CONFIG_HAVE_SYSCALL_TRACEPOINTS), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + - title: Debug Filesystem + description: | + This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`).` + configuration: + file: + name: "ebpf.d/mount.conf" + description: "Overwrite default configuration." + options: + description: | + All options are defined inside section `[global]`. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: ebpf type format + description: "Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load)." + default_value: auto + required: false + - name: ebpf co-re tracing + description: "Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code)." + default_value: trampoline + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "Calls for syscalls mount an umount." + labels: [] + metrics: + - name: mount_points.call + description: Calls to mount and umount syscalls + unit: "calls/s" + chart_type: line + dimensions: + - name: mount + - name: umount + - name: mount_points.error + description: Errors to mount and umount file systems + unit: "calls/s" + chart_type: line + dimensions: + - name: mount + - name: umount + - meta: + plugin_name: ebpf.plugin + module_name: vfs + monitored_instance: + name: eBPF VFS + link: "https://kernel.org/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: + - plugin_name: apps.plugin + module_name: apps + - plugin_name: cgroups.plugin + module_name: cgroups + info_provided_to_referring_integrations: + description: "" + keywords: + - virtual + - filesystem + - eBPF + - I/O + - files + most_popular: false + overview: + data_collection: + metrics_description: "Monitor I/O events on Linux Virtual Filesystem." + method_description: "Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time." + default_behavior: + auto_detection: + description: "The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached." + limits: + description: "" + performance_impact: + description: "This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology." + setup: + prerequisites: + list: + - title: Compile kernel + description: | + Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions. + When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files + with different names. + + Now follow steps: + 1. Copy the configuration file to /usr/src/linux/.config. + 2. Select the necessary options: make oldconfig + 3. Compile your kernel image: make bzImage + 4. Compile your modules: make modules + 5. Copy your new kernel image for boot loader directory + 6. Install the new modules: make modules_install + 7. Generate an initial ramdisk image (`initrd`) if it is necessary. + 8. Update your boot loader + configuration: + file: + name: "ebpf.d/vfs.conf" + description: "Overwrite default configuration helping to reduce memory usage." + options: + description: | + All options are defined inside section `[global]`. + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 5 + required: false + - name: ebpf load mode + description: Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). + default_value: entry + required: false + - name: apps + description: Enable or disable integration with apps.plugin + default_value: no + required: false + - name: cgroups + description: Enable or disable integration with cgroup.plugin + default_value: no + required: false + - name: pid table size + description: Number of elements stored inside hash tables used to monitor calls per PID. + default_value: 32768 + required: false + - name: ebpf type format + description: "Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load)." + default_value: auto + required: false + - name: ebpf co-re tracing + description: "Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code)." + default_value: trampoline + required: false + - name: maps per core + description: Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. + default_value: yes + required: false + - name: lifetime + description: Set default lifetime for thread when enabled by cloud. + default_value: 300 + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "These Metrics show grouped information per cgroup/service." + labels: [] + metrics: + - name: cgroup.vfs_unlink + description: Files deleted + unit: "calls/s" + chart_type: line + dimensions: + - name: delete + - name: cgroup.vfs_write + description: Write to disk + unit: "calls/s" + chart_type: line + dimensions: + - name: write + - name: cgroup.vfs_write_error + description: Fails to write + unit: "calls/s" + chart_type: line + dimensions: + - name: write + - name: cgroup.vfs_read + description: Read from disk + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: cgroup.vfs_read_error + description: Fails to read + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: cgroup.vfs_write_bytes + description: Bytes written on disk + unit: "bytes/s" + chart_type: line + dimensions: + - name: write + - name: cgroup.vfs_read_bytes + description: Bytes read from disk + unit: "bytes/s" + chart_type: line + dimensions: + - name: read + - name: cgroup.vfs_fsync + description: Calls for <code>vfs_fsync</code> + unit: "calls/s" + chart_type: line + dimensions: + - name: fsync + - name: cgroup.vfs_fsync_error + description: Sync error + unit: "calls/s" + chart_type: line + dimensions: + - name: fsync + - name: cgroup.vfs_open + description: Calls for <code>vfs_open</code> + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: cgroup.vfs_open_error + description: Open error + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: cgroup.vfs_create + description: Calls for <code>vfs_create</code> + unit: "calls/s" + chart_type: line + dimensions: + - name: create + - name: cgroup.vfs_create_error + description: Create error + unit: "calls/s" + chart_type: line + dimensions: + - name: create + - name: services.vfs_unlink + description: Files deleted + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_write + description: Write to disk + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_write_error + description: Fails to write + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_read + description: Read from disk + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_read_error + description: Fails to read + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_write_bytes + description: Bytes written on disk + unit: "bytes/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_read_bytes + description: Bytes read from disk + unit: "bytes/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_fsync + description: Calls to <code>vfs_fsync</code> + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_fsync_error + description: Sync error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_open + description: Calls to <code>vfs_open</code> + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_open_error + description: Open error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_create + description: Calls to <code>vfs_create</code> + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_create_error + description: Create error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: global + description: "These Metrics show grouped information per cgroup/service." + labels: [] + metrics: + - name: filesystem.vfs_deleted_objects + description: Remove files + unit: "calls/s" + chart_type: line + dimensions: + - name: delete + - name: filesystem.vfs_io + description: Calls to IO + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: filesystem.vfs_io_bytes + description: Bytes written and read + unit: "bytes/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: filesystem.vfs_io_error + description: Fails to write or read + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: filesystem.vfs_fsync + description: Calls for <code>vfs_fsync</code> + unit: "calls/s" + chart_type: line + dimensions: + - name: fsync + - name: filesystem.vfs_fsync_error + description: Fails to synchronize + unit: "calls/s" + chart_type: line + dimensions: + - name: fsync + - name: filesystem.vfs_open + description: Calls for <code>vfs_open</code> + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: filesystem.vfs_open_error + description: Fails to open a file + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: filesystem.vfs_create + description: Calls for <code>vfs_create</code> + unit: "calls/s" + chart_type: line + dimensions: + - name: create + - name: filesystem.vfs_create_error + description: Fails to create a file. + unit: "calls/s" + chart_type: line + dimensions: + - name: create + - name: apps + description: "These Metrics show grouped information per apps group." + labels: [] + metrics: + - name: apps.file_deleted + description: Files deleted + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_write_call + description: Write to disk + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_write_error + description: Fails to write + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_read_call + description: Read from disk + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_read_error + description: Fails to read + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_write_bytes + description: Bytes written on disk + unit: "bytes/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_read_bytes + description: Bytes read on disk + unit: "bytes/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_fsync + description: Calls for <code>vfs_fsync</code> + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_fsync_error + description: Sync error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_open + description: Calls for <code>vfs_open</code> + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_open_error + description: Open error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_create + description: Calls for <code>vfs_create</code> + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_create_error + description: Create error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - meta: + plugin_name: ebpf.plugin + module_name: process + monitored_instance: + name: eBPF Process + link: "https://github.com/netdata/netdata/" + categories: + - data-collection.ebpf + icon_filename: "ebpf.jpg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - Memory + - plugin + - eBPF + most_popular: false + overview: + data_collection: + metrics_description: "Monitor internal memory usage." + method_description: "Uses netdata internal statistic to monitor memory management by plugin." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Netdata flags. + description: "To have these charts you need to compile netdata with flag `NETDATA_DEV_MODE`." + configuration: + file: + name: "" + description: "" + options: + description: "" + folding: + title: "" + enabled: true + list: [] + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "How plugin is allocating memory." + labels: [] + metrics: + - name: netdata.ebpf_aral_stat_size + description: Bytes allocated for ARAL. + unit: "bytes" + chart_type: stacked + dimensions: + - name: memory + - name: netdata.ebpf_aral_stat_alloc + description: Calls to allocate memory. + unit: "calls" + chart_type: stacked + dimensions: + - name: aral + - name: netdata.ebpf_threads + description: Threads info + unit: "threads" + chart_type: line + dimensions: + - name: total + - name: running + - name: netdata.ebpf_load_methods + description: Load info + unit: "methods" + chart_type: line + dimensions: + - name: legacy + - name: co-re + - name: netdata.ebpf_kernel_memory + description: Memory allocated for hash tables. + unit: "bytes" + chart_type: line + dimensions: + - name: memory_locked + - name: netdata.ebpf_hash_tables_count + description: Number of hash tables loaded + unit: "hash tables" + chart_type: line + dimensions: + - name: hash_table + - name: netdata.ebpf_aral_stat_size + description: Bytes allocated for ARAL + unit: "bytes" + chart_type: stacked + dimensions: + - name: memory + - name: netdata.ebpf_aral_stat_alloc + description: Calls to allocate memory + unit: "calls" + chart_type: stacked + dimensions: + - name: aral + - name: netdata.ebpf_aral_stat_size + description: Bytes allocated for ARAL. + unit: "bytes" + chart_type: stacked + dimensions: + - name: memory + - name: netdata.ebpf_aral_stat_alloc + description: Calls to allocate memory + unit: "calls" + chart_type: stacked + dimensions: + - name: aral + - name: netdata.ebpf_hash_tables_insert_pid_elements + description: Number of times an element was inserted in a hash table. + unit: "rows" + chart_type: line + dimensions: + - name: thread + - name: netdata.ebpf_hash_tables_remove_pid_elements + description: Number of times an element was removed in a hash table. + unit: "rows" + chart_type: line + dimensions: + - name: thread diff --git a/collectors/ebpf.plugin/metrics.csv b/collectors/ebpf.plugin/metrics.csv deleted file mode 100644 index 5714c976..00000000 --- a/collectors/ebpf.plugin/metrics.csv +++ /dev/null @@ -1,197 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -cgroup.fd_open,cgroup,open,calls/s,Number of open files,line,,ebpf.plugin,filedescriptor -cgroup.fd_open_error,cgroup,open,calls/s,Fails to open files,line,,ebpf.plugin,filedescriptor -cgroup.fd_closed,cgroup,close,calls/s,Files closed,line,,ebpf.plugin,filedescriptor -cgroup.fd_close_error,cgroup,close,calls/s,Fails to close files,line,,ebpf.plugin,filedescriptor -services.file_open,,a dimension per systemd service,calls/s,Number of open files,stacked,,ebpf.plugin,filedescriptor -services.file_open_error,,a dimension per systemd service,calls/s,Fails to open files,stacked,,ebpf.plugin,filedescriptor -services.file_closed,,a dimension per systemd service,calls/s,Files closed,stacked,,ebpf.plugin,filedescriptor -services.file_close_error,,a dimension per systemd service,calls/s,Fails to close files,stacked,,ebpf.plugin,filedescriptor -apps.file_open,,a dimension per app group,calls/s,Number of open files,stacked,,ebpf.plugin,filedescriptor -apps.file_open_error,,a dimension per app group,calls/s,Fails to open files,stacked,,ebpf.plugin,filedescriptor -apps.file_closed,,a dimension per app group,calls/s,Files closed,stacked,,ebpf.plugin,filedescriptor -apps.file_close_error,,a dimension per app group,calls/s,Fails to close files,stacked,,ebpf.plugin,filedescriptor -filesystem.file_descriptor,,"open, close",calls/s,Open and close calls,line,,ebpf.plugin,filedescriptor -filesystem.file_error,,"open, close",calls/s,Open fails,line,,ebpf.plugin,filedescriptor -system.process_thread,,process,calls/s,Start process,line,,ebpf.plugin,processes -system.process_status,,"process, zombie",difference,Process not closed,line,,ebpf.plugin,processes -system.exit,,process,calls/s,Exit process,line,,ebpf.plugin,processes -system.task_error,,task,calls/s,Fails to create process,line,,ebpf.plugin,processes -apps.process_create,,a dimension per app group,calls/s,Process started,stacked,,ebpf.plugin,processes -apps.thread_create,,a dimension per app group,calls/s,Threads started,stacked,,ebpf.plugin,processes -apps.task_exit,,a dimension per app group,calls/s,Tasks starts exit process,stacked,,ebpf.plugin,processes -apps.task_close,,a dimension per app group,calls/s,Tasks closed,stacked,,ebpf.plugin,processes -apps.task_error,,a dimension per app group,calls/s,Errors to create process or threads,stacked,,ebpf.plugin,processes -cgroup.process_create,cgroup,process,calls/s,Process started,line,,ebpf.plugin,processes -cgroup.thread_create,cgroup,thread,calls/s,Threads started,line,,ebpf.plugin,processes -cgroup.task_exit,cgroup,exit,calls/s,Tasks starts exit process,line,,ebpf.plugin,processes -cgroup.task_close,cgroup,process,calls/s,Tasks closed,line,,ebpf.plugin,processes -cgroup.task_error,cgroup,process,calls/s,Errors to create process or threads,line,,ebpf.plugin,processes -services.process_create,cgroup,a dimension per systemd service,calls/s,Process started,stacked,,ebpf.plugin,processes -services.thread_create,cgroup,a dimension per systemd service,calls/s,Threads started,stacked,,ebpf.plugin,processes -services.task_close,cgroup,a dimension per systemd service,calls/s,Tasks starts exit process,stacked,,ebpf.plugin,processes -services.task_exit,cgroup,a dimension per systemd service,calls/s,Tasks closed,stacked,,ebpf.plugin,processes -services.task_error,cgroup,a dimension per systemd service,calls/s,Errors to create process or threads,stacked,,ebpf.plugin,processes -disk.latency_io,disk,latency,calls/s,Disk latency,stacked,,ebpf.plugin,disk -system.hardirq_latency,,hardirq names,milisecondds,Hardware IRQ latency,stacked,,ebpf.plugin,hardirq -apps.cachestat_ratio,,a dimension per app group,%,Hit ratio,line,,ebpf.plugin,cachestat -apps.cachestat_dirties,,a dimension per app group,page/s,Number of dirty pages,stacked,,ebpf.plugin,cachestat -apps.cachestat_hits,,a dimension per app group,hits/s,Number of accessed files,stacked,,ebpf.plugin,cachestat -apps.cachestat_misses,,a dimension per app group,misses/s,Files out of page cache,stacked,,ebpf.plugin,cachestat -services.cachestat_ratio,,a dimension per systemd service,%,Hit ratio,line,,ebpf.plugin,cachestat -services.cachestat_dirties,,a dimension per systemd service,page/s,Number of dirty pages,line,,ebpf.plugin,cachestat -services.cachestat_hits,,a dimension per systemd service,hits/s,Number of accessed files,line,,ebpf.plugin,cachestat -services.cachestat_misses,,a dimension per systemd service,misses/s,Files out of page cache,line,,ebpf.plugin,cachestat -cgroup.cachestat_ratio,cgroup,ratio,%,Hit ratio,line,,ebpf.plugin,cachestat -cgroup.cachestat_dirties,cgroup,dirty,page/s,Number of dirty pages,line,,ebpf.plugin,cachestat -cgroup.cachestat_hits,cgroup,hit,hits/s,Number of accessed files,line,,ebpf.plugin,cachestat -cgroup.cachestat_misses,cgroup,miss,misses/s,Files out of page cache,line,,ebpf.plugin,cachestat -mem.file_sync,,"fsync, fdatasync",calls/s,Monitor calls for <code>fsync(2)</code> and <code>fdatasync(2)</code>.,stacked,,ebpf.plugin,sync -mem.meory_map,,msync,calls/s,Monitor calls for <code>msync(2)</code>.,line,,ebpf.plugin,sync -mem.sync,,"sync, syncfs",calls/s,Monitor calls for <code>sync(2)</code> and <code>syncfs(2)</code>.,line,,ebpf.plugin,sync -mem.file_segment,,sync_file_range,calls/s,Monitor calls for <code>sync_file_range(2)</code>.,line,,ebpf.plugin,sync -mem.cachestat_ratio,,ratio,%,Hit ratio,line,,ebpf.plugin,cachestat -mem.cachestat_dirties,,dirty,page/s,Number of dirty pages,line,,ebpf.plugin,cachestat -mem.cachestat_hits,,hit,hits/s,Number of accessed files,line,,ebpf.plugin,cachestat -mem.cachestat_misses,,miss,misses/s,Files out of page cache,line,,ebpf.plugin,cachestat -mdstat.mdstat_flush,,disk,flushes,MD flushes,stacked,,ebpf.plugin,mdflush -cgroup.swap_read,cgroup,read,calls/s,Calls to function <code>swap_readpage</code>.,line,,ebpf.plugin,swap -cgroup.swap_write,cgroup,write,calls/s,Calls to function <code>swap_writepage</code>.,line,,ebpf.plugin,swap -services.swap_read,,a dimension per systemd service,calls/s,Calls to <code>swap_readpage</code>.,stacked,,ebpf.plugin,swap -services.swap_write,,a dimension per systemd service,calls/s,Calls to function <code>swap_writepage</code>.,stacked,,ebpf.plugin,swap -apps.swap_read_call,,a dimension per app group,calls/s,Calls to function <code>swap_readpage</code>.,stacked,,ebpf.plugin,swap -apps.swap_write_call,,a dimension per app group,calls/s,Calls to function <code>swap_writepage</code>.,stacked,,ebpf.plugin,swap -system.swapcalls,,"write, read",calls/s,Calls to access swap memory,line,,ebpf.plugin,swap -cgroup.oomkills,cgroup,cgroup name,kills,OOM kills. This chart is provided by eBPF plugin.,line,,ebpf.plugin,oomkill -services.oomkills,,a dimension per systemd service,kills,OOM kills. This chart is provided by eBPF plugin.,line,,ebpf.plugin,oomkill -apps.oomkills,,a dimension per app group,kills,OOM kills,stacked,,ebpf.plugin,oomkill -ip.inbound_conn,,connection_tcp,connections/s,Inbound connections.,line,,ebpf.plugin,socket -ip.tcp_outbound_conn,,received,connections/s,TCP outbound connections.,line,,ebpf.plugin,socket -ip.tcp_functions,,"received, send, closed",calls/s,Calls to internal functions,line,,ebpf.plugin,socket -ip.total_tcp_bandwidth,,"received, send",kilobits/s,TCP bandwidth,line,,ebpf.plugin,socket -ip.tcp_error,,"received, send",calls/s,TCP errors,line,,ebpf.plugin,socket -ip.tcp_retransmit,,retransmited,calls/s,Packages retransmitted,line,,ebpf.plugin,socket -ip.udp_functions,,"received, send",calls/s,UDP calls,line,,ebpf.plugin,socket -ip.total_udp_bandwidth,,"received, send",kilobits/s,UDP bandwidth,line,,ebpf.plugin,socket -ip.udp_error,,"received, send",calls/s,UDP errors,line,,ebpf.plugin,socket -apps.outbound_conn_v4,,a dimension per app group,connections/s,Calls to tcp_v4_connection,stacked,,ebpf.plugin,socket -apps.outbound_conn_v6,,a dimension per app group,connections/s,Calls to tcp_v6_connection,stacked,,ebpf.plugin,socket -apps.total_bandwidth_sent,,a dimension per app group,kilobits/s,Bytes sent,stacked,,ebpf.plugin,socket -apps.total_bandwidth_recv,,a dimension per app group,kilobits/s,bytes received,stacked,,ebpf.plugin,socket -apps.bandwidth_tcp_send,,a dimension per app group,calls/s,Calls for tcp_sendmsg,stacked,,ebpf.plugin,socket -apps.bandwidth_tcp_recv,,a dimension per app group,calls/s,Calls for tcp_cleanup_rbuf,stacked,,ebpf.plugin,socket -apps.bandwidth_tcp_retransmit,,a dimension per app group,calls/s,Calls for tcp_retransmit,stacked,,ebpf.plugin,socket -apps.bandwidth_udp_send,,a dimension per app group,calls/s,Calls for udp_sendmsg,stacked,,ebpf.plugin,socket -apps.bandwidth_udp_recv,,a dimension per app group,calls/s,Calls for udp_recvmsg,stacked,,ebpf.plugin,socket -cgroup.net_conn_ipv4,cgroup,connected_v4,connections/s,Calls to tcp_v4_connection,line,,ebpf.plugin,socket -cgroup.net_conn_ipv6,cgroup,connected_v6,connections/s,Calls to tcp_v6_connection,line,,ebpf.plugin,socket -cgroup.net_bytes_recv,cgroup,received,calls/s,Bytes received,line,,ebpf.plugin,socket -cgroup.net_bytes_sent,cgroup,sent,calls/s,Bytes sent,line,,ebpf.plugin,socket -cgroup.net_tcp_recv,cgroup,received,calls/s,Calls to tcp_cleanup_rbuf.,line,,ebpf.plugin,socket -cgroup.net_tcp_send,cgroup,sent,calls/s,Calls to tcp_sendmsg.,line,,ebpf.plugin,socket -cgroup.net_retransmit,cgroup,retransmitted,calls/s,Calls to tcp_retransmit.,line,,ebpf.plugin,socket -cgroup.net_udp_send,cgroup,sent,calls/s,Calls to udp_sendmsg,line,,ebpf.plugin,socket -cgroup.net_udp_recv,cgroup,received,calls/s,Calls to udp_recvmsg,line,,ebpf.plugin,socket -services.net_conn_ipv4,,a dimension per systemd service,connections/s,Calls to tcp_v4_connection,stacked,,ebpf.plugin,socket -services.net_conn_ipv6,,a dimension per systemd service,connections/s,Calls to tcp_v6_connection,stacked,,ebpf.plugin,socket -services.net_bytes_recv,,a dimension per systemd service,kilobits/s,Bytes received,stacked,,ebpf.plugin,socket -services.net_bytes_sent,,a dimension per systemd service,kilobits/s,Bytes sent,stacked,,ebpf.plugin,socket -services.net_tcp_recv,,a dimension per systemd service,calls/s,Calls to tcp_cleanup_rbuf.,stacked,,ebpf.plugin,socket -services.net_tcp_send,,a dimension per systemd service,calls/s,Calls to tcp_sendmsg.,stacked,,ebpf.plugin,socket -services.net_tcp_retransmit,,a dimension per systemd service,calls/s,Calls to tcp_retransmit,stacked,,ebpf.plugin,socket -services.net_udp_send,,a dimension per systemd service,calls/s,Calls to udp_sendmsg,stacked,,ebpf.plugin,socket -services.net_udp_recv,,a dimension per systemd service,calls/s,Calls to udp_recvmsg,stacked,,ebpf.plugin,socket -apps.dc_ratio,,a dimension per app group,%,Percentage of files inside directory cache,line,,ebpf.plugin,dcstat -apps.dc_reference,,a dimension per app group,files,Count file access,stacked,,ebpf.plugin,dcstat -apps.dc_not_cache,,a dimension per app group,files,Files not present inside directory cache,stacked,,ebpf.plugin,dcstat -apps.dc_not_found,,a dimension per app group,files,Files not found,stacked,,ebpf.plugin,dcstat -cgroup.dc_ratio,cgroup,ratio,%,Percentage of files inside directory cache,line,,ebpf.plugin,dcstat -cgroup.dc_reference,cgroup,reference,files,Count file access,line,,ebpf.plugin,dcstat -cgroup.dc_not_cache,cgroup,slow,files,Files not present inside directory cache,line,,ebpf.plugin,dcstat -cgroup.dc_not_found,cgroup,miss,files,Files not found,line,,ebpf.plugin,dcstat -services.dc_ratio,,a dimension per systemd service,%,Percentage of files inside directory cache,line,,ebpf.plugin,dcstat -services.dc_reference,,a dimension per systemd service,files,Count file access,line,,ebpf.plugin,dcstat -services.dc_not_cache,,a dimension per systemd service,files,Files not present inside directory cache,line,,ebpf.plugin,dcstat -services.dc_not_found,,a dimension per systemd service,files,Files not found,line,,ebpf.plugin,dcstat -filesystem.dc_hit_ratio,,ratio,%,Percentage of files inside directory cache,line,,ebpf.plugin,dcstat -filesystem.dc_reference,filesystem,"reference, slow, miss",files,Variables used to calculate hit ratio.,line,,ebpf.plugin,dcstat -filesystem.read_latency,filesystem,latency period,calls/s,ext4 latency for each read request.,stacked,,ebpf.plugin,filesystem -filesystem.write_latency,iilesystem,latency period,calls/s,ext4 latency for each write request.,stacked,,ebpf.plugin,filesystem -filesystem.open_latency,filesystem,latency period,calls/s,ext4 latency for each open request.,stacked,,ebpf.plugin,filesystem -filesystem.sync_latency,filesystem,latency period,calls/s,ext4 latency for each sync request.,stacked,,ebpf.plugin,filesystem -filesystem.attributte_latency,,latency period,calls/s,nfs latency for each attribute request.,stacked,,ebpf.plugin,filesystem -cgroup.shmget,cgroup,get,calls/s,Calls to syscall <code>shmget(2)</code>.,line,,ebpf.plugin,shm -cgroup.shmat,cgroup,at,calls/s,Calls to syscall <code>shmat(2)</code>.,line,,ebpf.plugin,shm -cgroup.shmdt,cgroup,dt,calls/s,Calls to syscall <code>shmdt(2)</code>.,line,,ebpf.plugin,shm -cgroup.shmctl,cgroup,ctl,calls/s,Calls to syscall <code>shmctl(2)</code>.,line,,ebpf.plugin,shm -services.shmget,,a dimension per systemd service,calls/s,Calls to syscall <code>shmget(2)</code>.,stacked,,ebpf.plugin,shm -services.shmat,,a dimension per systemd service,calls/s,Calls to syscall <code>shmat(2)</code>.,stacked,,ebpf.plugin,shm -services.shmdt,,a dimension per systemd service,calls/s,Calls to syscall <code>shmdt(2)</code>.,stacked,,ebpf.plugin,shm -services.shmctl,,a dimension per systemd service,calls/s,Calls to syscall <code>shmctl(2)</code>.,stacked,,ebpf.plugin,shm -apps.shmget_call,,a dimension per app group,calls/s,Calls to syscall <code>shmget(2)</code>.,stacked,,ebpf.plugin,shm -apps.shmat_call,,a dimension per app group,calls/s,Calls to syscall <code>shmat(2)</code>.,stacked,,ebpf.plugin,shm -apps.shmdt_call,,a dimension per app group,calls/s,Calls to syscall <code>shmdt(2)</code>.,stacked,,ebpf.plugin,shm -apps.shmctl_call,,a dimension per app group,calls/s,Calls to syscall <code>shmctl(2)</code>.,stacked,,ebpf.plugin,shm -system.shared_memory_calls,,"get, at, dt, ctl",calls/s,Calls to shared memory system calls,line,,ebpf.plugin,shm -system.softirq_latency,,soft IRQs,miliseconds,Software IRQ latency,stacked,,ebpf.plugin,softirq -mount_points.call,,"mount, umount",calls/s,Calls to mount and umount syscalls,line,,ebpf.plugin,mount -mount_points.error,,"mount, umount",calls/s,Errors to mount and umount file systems,line,,ebpf.plugin,mount -cgroup.vfs_unlink,cgroup,delete,calls/s,Files deleted,line,,ebpf.plugin,vfs -cgroup.vfs_write,cgroup,write,calls/s,Write to disk,line,,ebpf.plugin,vfs -cgroup.vfs_write_error,cgroup,write,calls/s,Fails to write,line,,ebpf.plugin,vfs -cgroup.vfs_read,cgroup,read,calls/s,Read from disk,line,,ebpf.plugin,vfs -cgroup.vfs_read_error,cgroup,read,calls/s,Fails to read,line,,ebpf.plugin,vfs -cgroup.vfs_write_bytes,cgroup,write,bytes/s,Bytes written on disk,line,,ebpf.plugin,vfs -cgroup.vfs_read_bytes,cgroup,read,bytes/s,Bytes read from disk,line,,ebpf.plugin,vfs -cgroup.vfs_fsync,cgroup,fsync,calls/s,Calls for <code>vfs_fsync</code>,line,,ebpf.plugin,vfs -cgroup.vfs_fsync_error,cgroup,fsync,calls/s,Sync error,line,,ebpf.plugin,vfs -cgroup.vfs_open,cgroup,open,calls/s,Calls for <code>vfs_open</code>,line,,ebpf.plugin,vfs -cgroup.vfs_open_error,cgroup,open,calls/s,Open error,line,,ebpf.plugin,vfs -cgroup.vfs_create,cgroup,create,calls/s,Calls for <code>vfs_create</code>,line,,ebpf.plugin,vfs -cgroup.vfs_create_error,cgroup,create,calls/s,Create error,line,,ebpf.plugin,vfs -services.vfs_unlink,,a dimension per systemd service,calls/s,Files deleted,stacked,,ebpf.plugin,vfs -services.vfs_write,,a dimension per systemd service,calls/s,Write to disk,stacked,,ebpf.plugin,vfs -services.vfs_write_error,,a dimension per systemd service,calls/s,Fails to write,stacked,,ebpf.plugin,vfs -services.vfs_read,,a dimension per systemd service,calls/s,Read from disk,stacked,,ebpf.plugin,vfs -services.vfs_read_error,,a dimension per systemd service,calls/s,Fails to read,stacked,,ebpf.plugin,vfs -services.vfs_write_bytes,,a dimension per systemd service,bytes/s,Bytes written on disk,stacked,,ebpf.plugin,vfs -services.vfs_read_bytes,,a dimension per systemd service,bytes/s,Bytes read from disk,stacked,,ebpf.plugin,vfs -services.vfs_fsync,,a dimension per systemd service,calls/s,Calls to <code>vfs_fsync</code>,stacked,,ebpf.plugin,vfs -services.vfs_fsync_error,,a dimension per systemd service,calls/s,Sync error,stacked,,ebpf.plugin,vfs -services.vfs_open,,a dimension per systemd service,calls/s,Calls to <code>vfs_open</code>,stacked,,ebpf.plugin,vfs -services.vfs_open_error,,a dimension per systemd service,calls/s,Open error,stacked,,ebpf.plugin,vfs -services.vfs_create,,a dimension per systemd service,calls/s,Calls to <code>vfs_create</code>,stacked,,ebpf.plugin,vfs -services.vfs_create_error,,a dimension per systemd service,calls/s,Create error,stacked,,ebpf.plugin,vfs -filesystem.vfs_deleted_objects,,delete,calls/s,Remove files,line,,ebpf.plugin,vfs -filesystem.vfs_io,,"read, write",calls/s,Calls to IO,line,,ebpf.plugin,vfs -filesystem.vfs_io_bytes,,"read, write",bytes/s,Bytes written and read,line,,ebpf.plugin,vfs -filesystem.vfs_io_error,,"read, write",calls/s,Fails to write or read,line,,ebpf.plugin,vfs -filesystem.vfs_fsync,,fsync,calls/s,Calls for <code>vfs_fsync</code>,line,,ebpf.plugin,vfs -filesystem.vfs_fsync_error,,fsync,calls/s,Fails to synchronize,line,,ebpf.plugin,vfs -filesystem.vfs_open,,open,calls/s,Calls for <code>vfs_open</code>,line,,ebpf.plugin,vfs -filesystem.vfs_open_error,,open,calls/s,Fails to open a file,line,,ebpf.plugin,vfs -filesystem.vfs_create,,create,calls/s,Calls for <code>vfs_create</code>,line,,ebpf.plugin,vfs -filesystem.vfs_create_error,,create,calls/s,Fails to create a file.,line,,ebpf.plugin,vfs -apps.file_deleted,,a dimension per app group,calls/s,Files deleted,stacked,,ebpf.plugin,vfs -apps.vfs_write_call,,a dimension per app group,calls/s,Write to disk,stacked,,ebpf.plugin,vfs -apps.vfs_write_error,,a dimension per app group,calls/s,Fails to write,stacked,,ebpf.plugin,vfs -apps.vfs_read_call,,a dimension per app group,calls/s,Read from disk,stacked,,ebpf.plugin,vfs -apps.vfs_read_error,,a dimension per app group,calls/s,Fails to read,stacked,,ebpf.plugin,vfs -apps.vfs_write_bytes,,a dimension per app group,bytes/s,Bytes written on disk,stacked,,ebpf.plugin,vfs -apps.vfs_read_bytes,,a dimension per app group,bytes/s,Bytes read on disk,stacked,,ebpf.plugin,vfs -apps.vfs_fsync,,a dimension per app group,calls/s,Calls for <code>vfs_fsync</code>,stacked,,ebpf.plugin,vfs -apps.vfs_fsync_error,,a dimension per app group,calls/s,Sync error,stacked,,ebpf.plugin,vfs -apps.vfs_open,,a dimension per app group,calls/s,Calls for <code>vfs_open</code>,stacked,,ebpf.plugin,vfs -apps.vfs_open_error,,a dimension per app group,calls/s,Open error,stacked,,ebpf.plugin,vfs -apps.vfs_create,,a dimension per app group,calls/s,Calls for <code>vfs_create</code>,stacked,,ebpf.plugin,vfs -apps.vfs_create_error,,a dimension per app group,calls/s,Create error,stacked,,ebpf.plugin,vfs -netdata.ebpf_aral_stat_size,,memory,bytes,Bytes allocated for ARAL.,stacked,,ebpf.plugin,process -netdata.ebpf_aral_stat_alloc,,aral,calls,Calls to allocate memory.,stacked,,ebpf.plugin,process -netdata.ebpf_threads,,"total, running",threads,Threads info,line,,ebpf.plugin,process -netdata.ebpf_load_methods,,"legacy, co-re",methods,Load info,line,,ebpf.plugin,process -netdata.ebpf_kernel_memory,,memory_locked,bytes,Memory allocated for hash tables.,line,,ebpf.plugin,process -netdata.ebpf_hash_tables_count,,hash_table,hash tables,Number of hash tables loaded,line,,ebpf.plugin,process -netdata.ebpf_aral_stat_size,,memory,bytes,Bytes allocated for ARAL,stacked,,ebpf.plugin,process -netdata.ebpf_aral_stat_alloc,,aral,calls,Calls to allocate memory,stacked,,ebpf.plugin,process -netdata.ebpf_aral_stat_size,,memory,bytes,Bytes allocated for ARAL.,stacked,,ebpf.plugin,process -netdata.ebpf_aral_stat_alloc,,aral,calls,Calls to allocate memory,stacked,,ebpf.plugin,process diff --git a/collectors/ebpf.plugin/multi_metadata.yaml b/collectors/ebpf.plugin/multi_metadata.yaml deleted file mode 100644 index 9a31a403..00000000 --- a/collectors/ebpf.plugin/multi_metadata.yaml +++ /dev/null @@ -1,2360 +0,0 @@ -name: ebpf.plugin -modules: - - meta: - plugin_name: ebpf.plugin - module_name: filedescriptor - monitored_instance: - name: ebpf filedescriptor - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: cgroup - description: "" - labels: [] - metrics: - - name: cgroup.fd_open - description: Number of open files - unit: "calls/s" - chart_type: line - dimensions: - - name: open - - name: cgroup.fd_open_error - description: Fails to open files - unit: "calls/s" - chart_type: line - dimensions: - - name: open - - name: cgroup.fd_closed - description: Files closed - unit: "calls/s" - chart_type: line - dimensions: - - name: close - - name: cgroup.fd_close_error - description: Fails to close files - unit: "calls/s" - chart_type: line - dimensions: - - name: close - - name: global - description: "" - labels: [] - metrics: - - name: services.file_open - description: Number of open files - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.file_open_error - description: Fails to open files - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.file_closed - description: Files closed - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.file_close_error - description: Fails to close files - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: apps.file_open - description: Number of open files - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.file_open_error - description: Fails to open files - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.file_closed - description: Files closed - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.file_close_error - description: Fails to close files - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: filesystem.file_descriptor - description: Open and close calls - unit: "calls/s" - chart_type: line - dimensions: - - name: open - - name: close - - name: filesystem.file_error - description: Open fails - unit: "calls/s" - chart_type: line - dimensions: - - name: open - - name: close - - meta: - plugin_name: ebpf.plugin - module_name: processes - monitored_instance: - name: ebpf processes - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: system.process_thread - description: Start process - unit: "calls/s" - chart_type: line - dimensions: - - name: process - - name: system.process_status - description: Process not closed - unit: "difference" - chart_type: line - dimensions: - - name: process - - name: zombie - - name: system.exit - description: Exit process - unit: "calls/s" - chart_type: line - dimensions: - - name: process - - name: system.task_error - description: Fails to create process - unit: "calls/s" - chart_type: line - dimensions: - - name: task - - name: apps.process_create - description: Process started - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.thread_create - description: Threads started - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.task_exit - description: Tasks starts exit process - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.task_close - description: Tasks closed - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.task_error - description: Errors to create process or threads - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: cgroup - description: "" - labels: [] - metrics: - - name: cgroup.process_create - description: Process started - unit: "calls/s" - chart_type: line - dimensions: - - name: process - - name: cgroup.thread_create - description: Threads started - unit: "calls/s" - chart_type: line - dimensions: - - name: thread - - name: cgroup.task_exit - description: Tasks starts exit process - unit: "calls/s" - chart_type: line - dimensions: - - name: exit - - name: cgroup.task_close - description: Tasks closed - unit: "calls/s" - chart_type: line - dimensions: - - name: process - - name: cgroup.task_error - description: Errors to create process or threads - unit: "calls/s" - chart_type: line - dimensions: - - name: process - - name: services.process_create - description: Process started - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.thread_create - description: Threads started - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.task_close - description: Tasks starts exit process - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.task_exit - description: Tasks closed - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.task_error - description: Errors to create process or threads - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - meta: - plugin_name: ebpf.plugin - module_name: disk - monitored_instance: - name: ebpf disk - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: disk - description: "" - labels: [] - metrics: - - name: disk.latency_io - description: Disk latency - unit: "calls/s" - chart_type: stacked - dimensions: - - name: latency - - meta: - plugin_name: ebpf.plugin - module_name: hardirq - monitored_instance: - name: ebpf hardirq - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: system.hardirq_latency - description: Hardware IRQ latency - unit: "milisecondds" - chart_type: stacked - dimensions: - - name: hardirq names - - meta: - plugin_name: ebpf.plugin - module_name: cachestat - monitored_instance: - name: ebpf cachestat - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: apps.cachestat_ratio - description: Hit ratio - unit: "%" - chart_type: line - dimensions: - - name: a dimension per app group - - name: apps.cachestat_dirties - description: Number of dirty pages - unit: "page/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.cachestat_hits - description: Number of accessed files - unit: "hits/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.cachestat_misses - description: Files out of page cache - unit: "misses/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: services.cachestat_ratio - description: Hit ratio - unit: "%" - chart_type: line - dimensions: - - name: a dimension per systemd service - - name: services.cachestat_dirties - description: Number of dirty pages - unit: "page/s" - chart_type: line - dimensions: - - name: a dimension per systemd service - - name: services.cachestat_hits - description: Number of accessed files - unit: "hits/s" - chart_type: line - dimensions: - - name: a dimension per systemd service - - name: services.cachestat_misses - description: Files out of page cache - unit: "misses/s" - chart_type: line - dimensions: - - name: a dimension per systemd service - - name: mem.cachestat_ratio - description: Hit ratio - unit: "%" - chart_type: line - dimensions: - - name: ratio - - name: mem.cachestat_dirties - description: Number of dirty pages - unit: "page/s" - chart_type: line - dimensions: - - name: dirty - - name: mem.cachestat_hits - description: Number of accessed files - unit: "hits/s" - chart_type: line - dimensions: - - name: hit - - name: mem.cachestat_misses - description: Files out of page cache - unit: "misses/s" - chart_type: line - dimensions: - - name: miss - - name: cgroup - description: "" - labels: [] - metrics: - - name: cgroup.cachestat_ratio - description: Hit ratio - unit: "%" - chart_type: line - dimensions: - - name: ratio - - name: cgroup.cachestat_dirties - description: Number of dirty pages - unit: "page/s" - chart_type: line - dimensions: - - name: dirty - - name: cgroup.cachestat_hits - description: Number of accessed files - unit: "hits/s" - chart_type: line - dimensions: - - name: hit - - name: cgroup.cachestat_misses - description: Files out of page cache - unit: "misses/s" - chart_type: line - dimensions: - - name: miss - - meta: - plugin_name: ebpf.plugin - module_name: sync - monitored_instance: - name: ebpf sync - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: - - name: sync_freq - link: https://github.com/netdata/netdata/blob/master/health/health.d/synchronization.conf - metric: mem.sync - info: number of sync() system calls. Every call causes all pending modifications to filesystem metadata and cached file data to be written to the underlying filesystems. - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: mem.file_sync - description: Monitor calls for <code>fsync(2)</code> and <code>fdatasync(2)</code>. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: fsync - - name: fdatasync - - name: mem.meory_map - description: Monitor calls for <code>msync(2)</code>. - unit: "calls/s" - chart_type: line - dimensions: - - name: msync - - name: mem.sync - description: Monitor calls for <code>sync(2)</code> and <code>syncfs(2)</code>. - unit: "calls/s" - chart_type: line - dimensions: - - name: sync - - name: syncfs - - name: mem.file_segment - description: Monitor calls for <code>sync_file_range(2)</code>. - unit: "calls/s" - chart_type: line - dimensions: - - name: sync_file_range - - meta: - plugin_name: ebpf.plugin - module_name: mdflush - monitored_instance: - name: ebpf mdflush - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: mdstat.mdstat_flush - description: MD flushes - unit: "flushes" - chart_type: stacked - dimensions: - - name: disk - - meta: - plugin_name: ebpf.plugin - module_name: swap - monitored_instance: - name: ebpf swap - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: cgroup - description: "" - labels: [] - metrics: - - name: cgroup.swap_read - description: Calls to function <code>swap_readpage</code>. - unit: "calls/s" - chart_type: line - dimensions: - - name: read - - name: cgroup.swap_write - description: Calls to function <code>swap_writepage</code>. - unit: "calls/s" - chart_type: line - dimensions: - - name: write - - name: global - description: "" - labels: [] - metrics: - - name: services.swap_read - description: Calls to <code>swap_readpage</code>. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.swap_write - description: Calls to function <code>swap_writepage</code>. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: apps.swap_read_call - description: Calls to function <code>swap_readpage</code>. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.swap_write_call - description: Calls to function <code>swap_writepage</code>. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: system.swapcalls - description: Calls to access swap memory - unit: "calls/s" - chart_type: line - dimensions: - - name: write - - name: read - - meta: - plugin_name: ebpf.plugin - module_name: oomkill - monitored_instance: - name: ebpf oomkill - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: cgroup - description: "" - labels: [] - metrics: - - name: cgroup.oomkills - description: OOM kills. This chart is provided by eBPF plugin. - unit: "kills" - chart_type: line - dimensions: - - name: cgroup name - - name: global - description: "" - labels: [] - metrics: - - name: services.oomkills - description: OOM kills. This chart is provided by eBPF plugin. - unit: "kills" - chart_type: line - dimensions: - - name: a dimension per systemd service - - name: apps.oomkills - description: OOM kills - unit: "kills" - chart_type: stacked - dimensions: - - name: a dimension per app group - - meta: - plugin_name: ebpf.plugin - module_name: socket - monitored_instance: - name: ebpf socket - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: ip.inbound_conn - description: Inbound connections. - unit: "connections/s" - chart_type: line - dimensions: - - name: connection_tcp - - name: ip.tcp_outbound_conn - description: TCP outbound connections. - unit: "connections/s" - chart_type: line - dimensions: - - name: received - - name: ip.tcp_functions - description: Calls to internal functions - unit: "calls/s" - chart_type: line - dimensions: - - name: received - - name: send - - name: closed - - name: ip.total_tcp_bandwidth - description: TCP bandwidth - unit: "kilobits/s" - chart_type: line - dimensions: - - name: received - - name: send - - name: ip.tcp_error - description: TCP errors - unit: "calls/s" - chart_type: line - dimensions: - - name: received - - name: send - - name: ip.tcp_retransmit - description: Packages retransmitted - unit: "calls/s" - chart_type: line - dimensions: - - name: retransmited - - name: ip.udp_functions - description: UDP calls - unit: "calls/s" - chart_type: line - dimensions: - - name: received - - name: send - - name: ip.total_udp_bandwidth - description: UDP bandwidth - unit: "kilobits/s" - chart_type: line - dimensions: - - name: received - - name: send - - name: ip.udp_error - description: UDP errors - unit: "calls/s" - chart_type: line - dimensions: - - name: received - - name: send - - name: apps.outbound_conn_v4 - description: Calls to tcp_v4_connection - unit: "connections/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.outbound_conn_v6 - description: Calls to tcp_v6_connection - unit: "connections/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.total_bandwidth_sent - description: Bytes sent - unit: "kilobits/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.total_bandwidth_recv - description: bytes received - unit: "kilobits/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.bandwidth_tcp_send - description: Calls for tcp_sendmsg - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.bandwidth_tcp_recv - description: Calls for tcp_cleanup_rbuf - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.bandwidth_tcp_retransmit - description: Calls for tcp_retransmit - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.bandwidth_udp_send - description: Calls for udp_sendmsg - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.bandwidth_udp_recv - description: Calls for udp_recvmsg - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: services.net_conn_ipv4 - description: Calls to tcp_v4_connection - unit: "connections/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.net_conn_ipv6 - description: Calls to tcp_v6_connection - unit: "connections/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.net_bytes_recv - description: Bytes received - unit: "kilobits/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.net_bytes_sent - description: Bytes sent - unit: "kilobits/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.net_tcp_recv - description: Calls to tcp_cleanup_rbuf. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.net_tcp_send - description: Calls to tcp_sendmsg. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.net_tcp_retransmit - description: Calls to tcp_retransmit - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.net_udp_send - description: Calls to udp_sendmsg - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.net_udp_recv - description: Calls to udp_recvmsg - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: cgroup - description: "" - labels: [] - metrics: - - name: cgroup.net_conn_ipv4 - description: Calls to tcp_v4_connection - unit: "connections/s" - chart_type: line - dimensions: - - name: connected_v4 - - name: cgroup.net_conn_ipv6 - description: Calls to tcp_v6_connection - unit: "connections/s" - chart_type: line - dimensions: - - name: connected_v6 - - name: cgroup.net_bytes_recv - description: Bytes received - unit: "calls/s" - chart_type: line - dimensions: - - name: received - - name: cgroup.net_bytes_sent - description: Bytes sent - unit: "calls/s" - chart_type: line - dimensions: - - name: sent - - name: cgroup.net_tcp_recv - description: Calls to tcp_cleanup_rbuf. - unit: "calls/s" - chart_type: line - dimensions: - - name: received - - name: cgroup.net_tcp_send - description: Calls to tcp_sendmsg. - unit: "calls/s" - chart_type: line - dimensions: - - name: sent - - name: cgroup.net_retransmit - description: Calls to tcp_retransmit. - unit: "calls/s" - chart_type: line - dimensions: - - name: retransmitted - - name: cgroup.net_udp_send - description: Calls to udp_sendmsg - unit: "calls/s" - chart_type: line - dimensions: - - name: sent - - name: cgroup.net_udp_recv - description: Calls to udp_recvmsg - unit: "calls/s" - chart_type: line - dimensions: - - name: received - - meta: - plugin_name: ebpf.plugin - module_name: dcstat - monitored_instance: - name: ebpf dcstat - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: apps.dc_ratio - description: Percentage of files inside directory cache - unit: "%" - chart_type: line - dimensions: - - name: a dimension per app group - - name: apps.dc_reference - description: Count file access - unit: "files" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.dc_not_cache - description: Files not present inside directory cache - unit: "files" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.dc_not_found - description: Files not found - unit: "files" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: services.dc_ratio - description: Percentage of files inside directory cache - unit: "%" - chart_type: line - dimensions: - - name: a dimension per systemd service - - name: services.dc_reference - description: Count file access - unit: "files" - chart_type: line - dimensions: - - name: a dimension per systemd service - - name: services.dc_not_cache - description: Files not present inside directory cache - unit: "files" - chart_type: line - dimensions: - - name: a dimension per systemd service - - name: services.dc_not_found - description: Files not found - unit: "files" - chart_type: line - dimensions: - - name: a dimension per systemd service - - name: filesystem.dc_hit_ratio - description: Percentage of files inside directory cache - unit: "%" - chart_type: line - dimensions: - - name: ratio - - name: cgroup - description: "" - labels: [] - metrics: - - name: cgroup.dc_ratio - description: Percentage of files inside directory cache - unit: "%" - chart_type: line - dimensions: - - name: ratio - - name: cgroup.dc_reference - description: Count file access - unit: "files" - chart_type: line - dimensions: - - name: reference - - name: cgroup.dc_not_cache - description: Files not present inside directory cache - unit: "files" - chart_type: line - dimensions: - - name: slow - - name: cgroup.dc_not_found - description: Files not found - unit: "files" - chart_type: line - dimensions: - - name: miss - - name: filesystem - description: "" - labels: [] - metrics: - - name: filesystem.dc_reference - description: Variables used to calculate hit ratio. - unit: "files" - chart_type: line - dimensions: - - name: reference - - name: slow - - name: miss - - meta: - plugin_name: ebpf.plugin - module_name: filesystem - monitored_instance: - name: ebpf filesystem - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: filesystem - description: "" - labels: [] - metrics: - - name: filesystem.read_latency - description: ext4 latency for each read request. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: latency period - - name: filesystem.open_latency - description: ext4 latency for each open request. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: latency period - - name: filesystem.sync_latency - description: ext4 latency for each sync request. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: latency period - - name: iilesystem - description: "" - labels: [] - metrics: - - name: filesystem.write_latency - description: ext4 latency for each write request. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: latency period - - name: global - description: "" - labels: [] - metrics: - - name: filesystem.attributte_latency - description: nfs latency for each attribute request. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: latency period - - meta: - plugin_name: ebpf.plugin - module_name: shm - monitored_instance: - name: ebpf shm - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: cgroup - description: "" - labels: [] - metrics: - - name: cgroup.shmget - description: Calls to syscall <code>shmget(2)</code>. - unit: "calls/s" - chart_type: line - dimensions: - - name: get - - name: cgroup.shmat - description: Calls to syscall <code>shmat(2)</code>. - unit: "calls/s" - chart_type: line - dimensions: - - name: at - - name: cgroup.shmdt - description: Calls to syscall <code>shmdt(2)</code>. - unit: "calls/s" - chart_type: line - dimensions: - - name: dt - - name: cgroup.shmctl - description: Calls to syscall <code>shmctl(2)</code>. - unit: "calls/s" - chart_type: line - dimensions: - - name: ctl - - name: global - description: "" - labels: [] - metrics: - - name: services.shmget - description: Calls to syscall <code>shmget(2)</code>. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.shmat - description: Calls to syscall <code>shmat(2)</code>. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.shmdt - description: Calls to syscall <code>shmdt(2)</code>. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.shmctl - description: Calls to syscall <code>shmctl(2)</code>. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: apps.shmget_call - description: Calls to syscall <code>shmget(2)</code>. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.shmat_call - description: Calls to syscall <code>shmat(2)</code>. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.shmdt_call - description: Calls to syscall <code>shmdt(2)</code>. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.shmctl_call - description: Calls to syscall <code>shmctl(2)</code>. - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: system.shared_memory_calls - description: Calls to shared memory system calls - unit: "calls/s" - chart_type: line - dimensions: - - name: get - - name: at - - name: dt - - name: ctl - - meta: - plugin_name: ebpf.plugin - module_name: softirq - monitored_instance: - name: ebpf softirq - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: system.softirq_latency - description: Software IRQ latency - unit: "miliseconds" - chart_type: stacked - dimensions: - - name: soft IRQs - - meta: - plugin_name: ebpf.plugin - module_name: mount - monitored_instance: - name: ebpf mount - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: mount_points.call - description: Calls to mount and umount syscalls - unit: "calls/s" - chart_type: line - dimensions: - - name: mount - - name: umount - - name: mount_points.error - description: Errors to mount and umount file systems - unit: "calls/s" - chart_type: line - dimensions: - - name: mount - - name: umount - - meta: - plugin_name: ebpf.plugin - module_name: vfs - monitored_instance: - name: ebpf vfs - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: cgroup - description: "" - labels: [] - metrics: - - name: cgroup.vfs_unlink - description: Files deleted - unit: "calls/s" - chart_type: line - dimensions: - - name: delete - - name: cgroup.vfs_write - description: Write to disk - unit: "calls/s" - chart_type: line - dimensions: - - name: write - - name: cgroup.vfs_write_error - description: Fails to write - unit: "calls/s" - chart_type: line - dimensions: - - name: write - - name: cgroup.vfs_read - description: Read from disk - unit: "calls/s" - chart_type: line - dimensions: - - name: read - - name: cgroup.vfs_read_error - description: Fails to read - unit: "calls/s" - chart_type: line - dimensions: - - name: read - - name: cgroup.vfs_write_bytes - description: Bytes written on disk - unit: "bytes/s" - chart_type: line - dimensions: - - name: write - - name: cgroup.vfs_read_bytes - description: Bytes read from disk - unit: "bytes/s" - chart_type: line - dimensions: - - name: read - - name: cgroup.vfs_fsync - description: Calls for <code>vfs_fsync</code> - unit: "calls/s" - chart_type: line - dimensions: - - name: fsync - - name: cgroup.vfs_fsync_error - description: Sync error - unit: "calls/s" - chart_type: line - dimensions: - - name: fsync - - name: cgroup.vfs_open - description: Calls for <code>vfs_open</code> - unit: "calls/s" - chart_type: line - dimensions: - - name: open - - name: cgroup.vfs_open_error - description: Open error - unit: "calls/s" - chart_type: line - dimensions: - - name: open - - name: cgroup.vfs_create - description: Calls for <code>vfs_create</code> - unit: "calls/s" - chart_type: line - dimensions: - - name: create - - name: cgroup.vfs_create_error - description: Create error - unit: "calls/s" - chart_type: line - dimensions: - - name: create - - name: global - description: "" - labels: [] - metrics: - - name: services.vfs_unlink - description: Files deleted - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.vfs_write - description: Write to disk - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.vfs_write_error - description: Fails to write - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.vfs_read - description: Read from disk - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.vfs_read_error - description: Fails to read - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.vfs_write_bytes - description: Bytes written on disk - unit: "bytes/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.vfs_read_bytes - description: Bytes read from disk - unit: "bytes/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.vfs_fsync - description: Calls to <code>vfs_fsync</code> - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.vfs_fsync_error - description: Sync error - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.vfs_open - description: Calls to <code>vfs_open</code> - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.vfs_open_error - description: Open error - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.vfs_create - description: Calls to <code>vfs_create</code> - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: services.vfs_create_error - description: Create error - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service - - name: filesystem.vfs_deleted_objects - description: Remove files - unit: "calls/s" - chart_type: line - dimensions: - - name: delete - - name: filesystem.vfs_io - description: Calls to IO - unit: "calls/s" - chart_type: line - dimensions: - - name: read - - name: write - - name: filesystem.vfs_io_bytes - description: Bytes written and read - unit: "bytes/s" - chart_type: line - dimensions: - - name: read - - name: write - - name: filesystem.vfs_io_error - description: Fails to write or read - unit: "calls/s" - chart_type: line - dimensions: - - name: read - - name: write - - name: filesystem.vfs_fsync - description: Calls for <code>vfs_fsync</code> - unit: "calls/s" - chart_type: line - dimensions: - - name: fsync - - name: filesystem.vfs_fsync_error - description: Fails to synchronize - unit: "calls/s" - chart_type: line - dimensions: - - name: fsync - - name: filesystem.vfs_open - description: Calls for <code>vfs_open</code> - unit: "calls/s" - chart_type: line - dimensions: - - name: open - - name: filesystem.vfs_open_error - description: Fails to open a file - unit: "calls/s" - chart_type: line - dimensions: - - name: open - - name: filesystem.vfs_create - description: Calls for <code>vfs_create</code> - unit: "calls/s" - chart_type: line - dimensions: - - name: create - - name: filesystem.vfs_create_error - description: Fails to create a file. - unit: "calls/s" - chart_type: line - dimensions: - - name: create - - name: apps.file_deleted - description: Files deleted - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.vfs_write_call - description: Write to disk - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.vfs_write_error - description: Fails to write - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.vfs_read_call - description: Read from disk - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.vfs_read_error - description: Fails to read - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.vfs_write_bytes - description: Bytes written on disk - unit: "bytes/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.vfs_read_bytes - description: Bytes read on disk - unit: "bytes/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.vfs_fsync - description: Calls for <code>vfs_fsync</code> - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.vfs_fsync_error - description: Sync error - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.vfs_open - description: Calls for <code>vfs_open</code> - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.vfs_open_error - description: Open error - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.vfs_create - description: Calls for <code>vfs_create</code> - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - name: apps.vfs_create_error - description: Create error - unit: "calls/s" - chart_type: stacked - dimensions: - - name: a dimension per app group - - meta: - plugin_name: ebpf.plugin - module_name: process - monitored_instance: - name: ebpf process - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: netdata.ebpf_aral_stat_size - description: Bytes allocated for ARAL. - unit: "bytes" - chart_type: stacked - dimensions: - - name: memory - - name: netdata.ebpf_aral_stat_alloc - description: Calls to allocate memory. - unit: "calls" - chart_type: stacked - dimensions: - - name: aral - - name: netdata.ebpf_threads - description: Threads info - unit: "threads" - chart_type: line - dimensions: - - name: total - - name: running - - name: netdata.ebpf_load_methods - description: Load info - unit: "methods" - chart_type: line - dimensions: - - name: legacy - - name: co-re - - name: netdata.ebpf_kernel_memory - description: Memory allocated for hash tables. - unit: "bytes" - chart_type: line - dimensions: - - name: memory_locked - - name: netdata.ebpf_hash_tables_count - description: Number of hash tables loaded - unit: "hash tables" - chart_type: line - dimensions: - - name: hash_table - - name: netdata.ebpf_aral_stat_size - description: Bytes allocated for ARAL - unit: "bytes" - chart_type: stacked - dimensions: - - name: memory - - name: netdata.ebpf_aral_stat_alloc - description: Calls to allocate memory - unit: "calls" - chart_type: stacked - dimensions: - - name: aral - - name: netdata.ebpf_aral_stat_size - description: Bytes allocated for ARAL. - unit: "bytes" - chart_type: stacked - dimensions: - - name: memory - - name: netdata.ebpf_aral_stat_alloc - description: Calls to allocate memory - unit: "calls" - chart_type: stacked - dimensions: - - name: aral diff --git a/collectors/freebsd.plugin/freebsd_sysctl.c b/collectors/freebsd.plugin/freebsd_sysctl.c index c8aa5dad..8a6df509 100644 --- a/collectors/freebsd.plugin/freebsd_sysctl.c +++ b/collectors/freebsd.plugin/freebsd_sysctl.c @@ -834,7 +834,7 @@ int do_vm_swap_info(int update_every, usec_t dt) { static int mib[3] = {0, 0, 0}; if (unlikely(getsysctl_mib("vm.swap_info", mib, 2))) { - collector_error("DISABLED: system.swap chart"); + collector_error("DISABLED: mem.swap chart"); collector_error("DISABLED: vm.swap_info module"); return 1; } else { @@ -853,13 +853,13 @@ int do_vm_swap_info(int update_every, usec_t dt) { if (unlikely(sysctl(mib, 3, &xsw, &size, NULL, 0) == -1 )) { if (unlikely(errno != ENOENT)) { collector_error("FREEBSD: sysctl(%s...) failed: %s", "vm.swap_info", strerror(errno)); - collector_error("DISABLED: system.swap chart"); + collector_error("DISABLED: mem.swap chart"); collector_error("DISABLED: vm.swap_info module"); return 1; } else { if (unlikely(size != sizeof(xsw))) { collector_error("FREEBSD: sysctl(%s...) expected %lu, got %lu", "vm.swap_info", (unsigned long)sizeof(xsw), (unsigned long)size); - collector_error("DISABLED: system.swap chart"); + collector_error("DISABLED: mem.swap chart"); collector_error("DISABLED: vm.swap_info module"); return 1; } else break; @@ -874,7 +874,7 @@ int do_vm_swap_info(int update_every, usec_t dt) { if (unlikely(!st)) { st = rrdset_create_localhost( - "system", + "mem", "swap", NULL, "swap", @@ -883,7 +883,7 @@ int do_vm_swap_info(int update_every, usec_t dt) { "MiB", "freebsd.plugin", "vm.swap_info", - NETDATA_CHART_PRIO_SYSTEM_SWAP, + NETDATA_CHART_PRIO_MEM_SWAP, update_every, RRDSET_TYPE_STACKED ); @@ -1026,7 +1026,7 @@ int do_vm_stats_sys_v_swappgs(int update_every, usec_t dt) { if (unlikely(GETSYSCTL_SIMPLE("vm.stats.vm.v_swappgsin", mib_swappgsin, vmmeter_data.v_swappgsin) || GETSYSCTL_SIMPLE("vm.stats.vm.v_swappgsout", mib_swappgsout, vmmeter_data.v_swappgsout))) { - collector_error("DISABLED: system.swapio chart"); + collector_error("DISABLED: mem.swapio chart"); collector_error("DISABLED: vm.stats.vm.v_swappgs module"); return 1; } else { @@ -1035,7 +1035,7 @@ int do_vm_stats_sys_v_swappgs(int update_every, usec_t dt) { if (unlikely(!st)) { st = rrdset_create_localhost( - "system", + "mem", "swapio", NULL, "swap", @@ -1044,7 +1044,7 @@ int do_vm_stats_sys_v_swappgs(int update_every, usec_t dt) { "KiB/s", "freebsd.plugin", "vm.stats.vm.v_swappgs", - NETDATA_CHART_PRIO_SYSTEM_SWAPIO, + NETDATA_CHART_PRIO_MEM_SWAPIO, update_every, RRDSET_TYPE_AREA ); diff --git a/collectors/freebsd.plugin/multi_metadata.yaml b/collectors/freebsd.plugin/metadata.yaml index 6928df64..fca8982f 100644 --- a/collectors/freebsd.plugin/multi_metadata.yaml +++ b/collectors/freebsd.plugin/metadata.yaml @@ -1,54 +1,60 @@ -name: freebsd.plugin +plugin_name: freebsd.plugin modules: - meta: plugin_name: freebsd.plugin module_name: vm.loadavg monitored_instance: - name: freebsd vm.loadavg - link: '' - categories: [] - icon_filename: '' + name: vm.loadavg + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "freebsd.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "System Load Average" + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: vm.loadavg + description: Enable or disable load average metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -82,7 +88,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "Monitoring for number of threads running or waiting." labels: [] metrics: - name: system.load @@ -97,51 +103,65 @@ modules: plugin_name: freebsd.plugin module_name: vm.vmtotal monitored_instance: - name: freebsd vm.vmtotal - link: '' - categories: [] - icon_filename: '' + name: vm.vmtotal + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "memory.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect Virtual Memory information from host." + method_description: "The plugin calls function `sysctl` to collect data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd:vm.vmtotal]" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "Config Options" enabled: true - list: [] + list: + - name: enable total processes + description: Number of active processes. + default_value: yes + required: false + - name: processes running + description: Show number of processes running or blocked. + default_value: yes + required: false + - name: real memory + description: Memeory used on host. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -159,7 +179,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics show an overall vision about processes running." labels: [] metrics: - name: system.active_processes @@ -185,51 +205,56 @@ modules: plugin_name: freebsd.plugin module_name: kern.cp_time monitored_instance: - name: freebsd kern.cp_time - link: '' - categories: [] - icon_filename: '' + name: kern.cp_time + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "freebsd.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Total CPU utilization" + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + description: "[plugin:freebsd]" options: - description: '' + description: "The netdata main configuration file." folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: kern.cp_time + description: Enable or disable Total CPU usage. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -263,7 +288,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics show CPU usage statistics." labels: [] metrics: - name: system.cpu @@ -294,51 +319,57 @@ modules: plugin_name: freebsd.plugin module_name: dev.cpu.temperature monitored_instance: - name: freebsd dev.cpu.temperature - link: '' - categories: [] - icon_filename: '' + name: dev.cpu.temperature + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "freebsd.org" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Get current CPU temperature" + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: dev.cpu.temperature + description: Enable or disable CPU temperature metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -352,7 +383,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "This metric show latest CPU temperature." labels: [] metrics: - name: cpu.temperature @@ -365,51 +396,57 @@ modules: plugin_name: freebsd.plugin module_name: dev.cpu.0.freq monitored_instance: - name: freebsd dev.cpu.0.freq - link: '' - categories: [] - icon_filename: '' + name: dev.cpu.0.freq + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "freebsd.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Read current CPU Scaling frequency." + method_description: "Current CPU Scaling Frequency" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "Config options" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true - list: [] + list: + - name: dev.cpu.0.freq + description: Enable or disable CPU Scaling frequency metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -423,7 +460,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "The metric shows status of CPU frequency, it is direct affected by system load." labels: [] metrics: - name: cpu.scaling_cur_freq @@ -436,51 +473,57 @@ modules: plugin_name: freebsd.plugin module_name: hw.intrcnt monitored_instance: - name: freebsd hw.intrcnt - link: '' - categories: [] - icon_filename: '' + name: hw.intrcnt + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "freebsd.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Get total number of interrupts" + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config option" enabled: true - list: [] + list: + - name: hw.intrcnt + description: Enable or disable Interrupts metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -494,7 +537,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics show system interrupts frequency." labels: [] metrics: - name: system.intr @@ -513,51 +556,57 @@ modules: plugin_name: freebsd.plugin module_name: vm.stats.sys.v_intr monitored_instance: - name: freebsd vm.stats.sys.v_intr - link: '' - categories: [] - icon_filename: '' + name: vm.stats.sys.v_intr + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "freebsd.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Device interrupts" + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config option" enabled: true - list: [] + list: + - name: vm.stats.sys.v_intr + description: Enable or disable device interrupts metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -571,7 +620,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "The metric show device interrupt frequency." labels: [] metrics: - name: system.dev_intr @@ -584,51 +633,57 @@ modules: plugin_name: freebsd.plugin module_name: vm.stats.sys.v_soft monitored_instance: - name: freebsd vm.stats.sys.v_soft - link: '' - categories: [] - icon_filename: '' + name: vm.stats.sys.v_soft + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "freebsd.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Software Interrupt" + method_description: "vm.stats.sys.v_soft" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config option" enabled: true - list: [] + list: + - name: vm.stats.sys.v_soft + description: Enable or disable software inerrupts metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -642,7 +697,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "This metric shows software interrupt frequency." labels: [] metrics: - name: system.soft_intr @@ -655,51 +710,57 @@ modules: plugin_name: freebsd.plugin module_name: vm.stats.sys.v_swtch monitored_instance: - name: freebsd vm.stats.sys.v_swtch - link: '' - categories: [] - icon_filename: '' + name: vm.stats.sys.v_swtch + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "freebsd.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "CPU context switch" + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: vm.stats.sys.v_swtch + description: Enable or disable CPU context switch metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -713,7 +774,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "The metric count the number of context switches happening on host." labels: [] metrics: - name: system.ctxt @@ -732,51 +793,57 @@ modules: plugin_name: freebsd.plugin module_name: vm.swap_info monitored_instance: - name: freebsd vm.swap_info - link: '' - categories: [] - icon_filename: '' + name: vm.swap_info + link: "" + categories: + - data-collection.freebsd + icon_filename: "freebsd.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect information about SWAP memory." + method_description: "The plugin calls `sysctlnametomib` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: vm.swap_info + description: Enable or disable SWAP metrics. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -784,7 +851,7 @@ modules: alerts: - name: used_swap link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf - metric: system.swap + metric: mem.swap info: swap memory utilization os: "linux freebsd" metrics: @@ -795,10 +862,10 @@ modules: availability: [] scopes: - name: global - description: "" + description: "This metric shows the SWAP usage." labels: [] metrics: - - name: system.swap + - name: mem.swap description: System Swap unit: "MiB" chart_type: stacked @@ -809,51 +876,57 @@ modules: plugin_name: freebsd.plugin module_name: system.ram monitored_instance: - name: freebsd system.ram - link: '' - categories: [] - icon_filename: '' + name: system.ram + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "memory.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Show information about system memory usage." + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: system.ram + description: Enable or disable system RAM metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -887,7 +960,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "This metric shows RAM usage statistics." labels: [] metrics: - name: system.ram @@ -912,51 +985,57 @@ modules: plugin_name: freebsd.plugin module_name: vm.stats.vm.v_swappgs monitored_instance: - name: freebsd vm.stats.vm.v_swappgs - link: '' - categories: [] - icon_filename: '' + name: vm.stats.vm.v_swappgs + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "memory.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "The metric swap amount of data read from and written to SWAP." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: vm.stats.vm.v_swappgs + description: Enable or disable infoormation about SWAP I/O metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -964,7 +1043,7 @@ modules: alerts: - name: 30min_ram_swapped_out link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf - metric: system.swapio + metric: mem.swapio info: percentage of the system RAM swapped in the last 30 minutes os: "linux freebsd" metrics: @@ -975,10 +1054,10 @@ modules: availability: [] scopes: - name: global - description: "" + description: "This metric shows events happening on SWAP." labels: [] metrics: - - name: system.swapio + - name: mem.swapio description: Swap I/O unit: "KiB/s" chart_type: area @@ -989,51 +1068,57 @@ modules: plugin_name: freebsd.plugin module_name: vm.stats.vm.v_pgfaults monitored_instance: - name: freebsd vm.stats.vm.v_pgfaults - link: '' - categories: [] - icon_filename: '' + name: vm.stats.vm.v_pgfaults + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "memory.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect memory page faults events." + method_description: "The plugin calls `sysctl` function to collect necessary data" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: vm.stats.vm.v_pgfaults + description: Enable or disable Memory page fault metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1047,7 +1132,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "The number of page faults happened on host." labels: [] metrics: - name: mem.pgfaults @@ -1064,51 +1149,57 @@ modules: plugin_name: freebsd.plugin module_name: kern.ipc.sem monitored_instance: - name: freebsd kern.ipc.sem - link: '' - categories: [] - icon_filename: '' + name: kern.ipc.sem + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "freebsd.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect information about semaphore." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: kern.ipc.sem + description: Enable or disable semaphore metrics. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1132,7 +1223,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics shows counters for semaphores on host." labels: [] metrics: - name: system.ipc_semaphores @@ -1151,51 +1242,57 @@ modules: plugin_name: freebsd.plugin module_name: kern.ipc.shm monitored_instance: - name: freebsd kern.ipc.shm - link: '' - categories: [] - icon_filename: '' + name: kern.ipc.shm + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "memory.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect shared memory information." + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: kern.ipc.shm + description: Enable or disable shared memory metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1209,7 +1306,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics give status about current shared memory segments." labels: [] metrics: - name: system.ipc_shared_mem_segs @@ -1228,51 +1325,57 @@ modules: plugin_name: freebsd.plugin module_name: kern.ipc.msq monitored_instance: - name: freebsd kern.ipc.msq - link: '' - categories: [] - icon_filename: '' + name: kern.ipc.msq + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "freebsd.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect number of IPC message Queues" + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: kern.ipc.msq + description: Enable or disable IPC message queue metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1286,7 +1389,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics show statistics IPC messages statistics." labels: [] metrics: - name: system.ipc_msq_queues @@ -1312,51 +1415,57 @@ modules: plugin_name: freebsd.plugin module_name: uptime monitored_instance: - name: freebsd uptime - link: '' - categories: [] - icon_filename: '' + name: uptime + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "freebsd.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Show period of time server is up." + method_description: "The plugin calls `clock_gettime` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: vm.loadavg + description: Enable or disable load average metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1370,7 +1479,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "How long the system is running." labels: [] metrics: - name: system.uptime @@ -1383,51 +1492,61 @@ modules: plugin_name: freebsd.plugin module_name: net.isr monitored_instance: - name: freebsd net.isr - link: '' - categories: [] - icon_filename: '' + name: net.isr + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "freebsd.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect information about system softnet stat." + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd:net.isr]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: netisr + description: Enable or disable general vision about softnet stat metrics. + default_value: yes + required: false + - name: netisr per core + description: Enable or disable softnet stat metric per core. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1441,7 +1560,9 @@ modules: - name: 1min_netdev_budget_ran_outs link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf metric: system.softnet_stat - info: average number of times ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs with work remaining over the last minute (this can be a cause for dropped packets) + info: + average number of times ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs with work remaining over the last + minute (this can be a cause for dropped packets) os: "linux" - name: 10min_netisr_backlog_exceeded link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf @@ -1456,7 +1577,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics show statistics about softnet stats." labels: [] metrics: - name: system.softnet_stat @@ -1485,51 +1606,101 @@ modules: plugin_name: freebsd.plugin module_name: devstat monitored_instance: - name: freebsd devstat - link: '' - categories: [] - icon_filename: '' + name: devstat + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "hard-drive.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect information per hard disk available on host." + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd:kern.devstat]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: enable new disks detected at runtime + description: Enable or disable possibility to detect new disks. + default_value: auto + required: false + - name: performance metrics for pass devices + description: Enable or disable metrics for disks with type `PASS`. + default_value: auto + required: false + - name: total bandwidth for all disks + description: Enable or disable total bandwidth metric for all disks. + default_value: yes + required: false + - name: bandwidth for all disks + description: Enable or disable bandwidth for all disks metric. + default_value: auto + required: false + - name: operations for all disks + description: Enable or disable operations for all disks metric. + default_value: auto + required: false + - name: queued operations for all disks + description: Enable or disable queued operations for all disks metric. + default_value: auto + required: false + - name: utilization percentage for all disks + description: Enable or disable utilization percentage for all disks metric. + default_value: auto + required: false + - name: i/o time for all disks + description: Enable or disable I/O time for all disks metric. + default_value: auto + required: false + - name: average completed i/o time for all disks + description: Enable or disable average completed I/O time for all disks metric. + default_value: auto + required: false + - name: average completed i/o bandwidth for all disks + description: Enable or disable average completed I/O bandwidth for all disks metric. + default_value: auto + required: false + - name: average service time for all disks + description: Enable or disable average service time for all disks metric. + default_value: auto + required: false + - name: disable by default disks matching + description: Do not create charts for disks listed. + default_value: "" + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1548,7 +1719,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics give a general vision about I/O events on disks." labels: [] metrics: - name: system.io @@ -1627,51 +1798,57 @@ modules: plugin_name: freebsd.plugin module_name: net.inet.tcp.states monitored_instance: - name: freebsd net.inet.tcp.states - link: '' - categories: [] - icon_filename: '' + name: net.inet.tcp.states + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "network.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "" + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: net.inet.tcp.states + description: Enable or disable TCP state metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1690,7 +1867,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "A counter for TCP connections." labels: [] metrics: - name: ipv4.tcpsock @@ -1703,51 +1880,85 @@ modules: plugin_name: freebsd.plugin module_name: net.inet.tcp.stats monitored_instance: - name: freebsd net.inet.tcp.stats - link: '' - categories: [] - icon_filename: '' + name: net.inet.tcp.stats + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "network.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect overall information about TCP connections." + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd:net.inet.tcp.stats]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: ipv4 TCP packets + description: Enable or disable ipv4 TCP packets metric. + default_value: yes + required: false + - name: ipv4 TCP errors + description: Enable or disable pv4 TCP errors metric. + default_value: yes + required: false + - name: ipv4 TCP handshake issues + description: Enable or disable ipv4 TCP handshake issue metric. + default_value: yes + required: false + - name: TCP connection aborts + description: Enable or disable TCP connection aborts metric. + default_value: auto + required: false + - name: TCP out-of-order queue + description: Enable or disable TCP out-of-order queue metric. + default_value: auto + required: false + - name: TCP SYN cookies + description: Enable or disable TCP SYN cookies metric. + default_value: auto + required: false + - name: TCP listen issues + description: Enable or disable TCP listen issues metric. + default_value: auto + required: false + - name: ECN packets + description: Enable or disable ECN packets metric. + default_value: auto + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1761,7 +1972,9 @@ modules: - name: 10s_ipv4_tcp_resets_sent link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf metric: ipv4.tcphandshake - info: average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has crashed. Netdata will not send a clear notification for this alarm. + info: + average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has + crashed. Netdata will not send a clear notification for this alarm. os: "linux" - name: 1m_ipv4_tcp_resets_received link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf @@ -1771,7 +1984,9 @@ modules: - name: 10s_ipv4_tcp_resets_received link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf metric: ipv4.tcphandshake - info: average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed. Netdata will not send a clear notification for this alarm. + info: + average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed. + Netdata will not send a clear notification for this alarm. os: "linux freebsd" metrics: folding: @@ -1781,7 +1996,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics show TCP connections statistics." labels: [] metrics: - name: ipv4.tcppackets @@ -1852,51 +2067,61 @@ modules: plugin_name: freebsd.plugin module_name: net.inet.udp.stats monitored_instance: - name: freebsd net.inet.udp.stats - link: '' - categories: [] - icon_filename: '' + name: net.inet.udp.stats + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "network.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect information about UDP connections." + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd:net.inet.udp.stats]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: ipv4 UDP packets + description: Enable or disable ipv4 UDP packets metric. + default_value: yes + required: false + - name: ipv4 UDP errors + description: Enable or disable ipv4 UDP errors metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1920,7 +2145,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics show UDP connections statistics." labels: [] metrics: - name: ipv4.udppackets @@ -1944,51 +2169,65 @@ modules: plugin_name: freebsd.plugin module_name: net.inet.icmp.stats monitored_instance: - name: freebsd net.inet.icmp.stats - link: '' - categories: [] - icon_filename: '' + name: net.inet.icmp.stats + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "network.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect information about ICMP traffic." + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd:net.inet.icmp.stats]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: IPv4 ICMP packets + description: Enable or disable IPv4 ICMP packets metric. + default_value: yes + required: false + - name: IPv4 ICMP error + description: Enable or disable IPv4 ICMP error metric. + default_value: yes + required: false + - name: IPv4 ICMP messages + description: Enable or disable IPv4 ICMP messages metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -2002,7 +2241,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics show ICMP connections statistics." labels: [] metrics: - name: ipv4.icmp @@ -2033,51 +2272,69 @@ modules: plugin_name: freebsd.plugin module_name: net.inet.ip.stats monitored_instance: - name: freebsd net.inet.ip.stats - link: '' - categories: [] - icon_filename: '' + name: net.inet.ip.stats + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "network.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect IP stats" + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd:net.inet.ip.stats]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: ipv4 packets + description: Enable or disable IPv4 packets metric. + default_value: yes + required: false + - name: ipv4 fragments sent + description: Enable or disable IPv4 fragments sent metric. + default_value: yes + required: false + - name: ipv4 fragments assembly + description: Enable or disable IPv4 fragments assembly metric. + default_value: yes + required: false + - name: ipv4 errors + description: Enable or disable IPv4 errors metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -2091,7 +2348,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics show IPv4 connections statistics." labels: [] metrics: - name: ipv4.packets @@ -2134,51 +2391,69 @@ modules: plugin_name: freebsd.plugin module_name: net.inet6.ip6.stats monitored_instance: - name: freebsd net.inet6.ip6.stats - link: '' - categories: [] - icon_filename: '' + name: net.inet6.ip6.stats + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "network.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect information abou IPv6 stats." + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd:net.inet6.ip6.stats]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: ipv6 packets + description: Enable or disable ipv6 packet metric. + default_value: auto + required: false + - name: ipv6 fragments sent + description: Enable or disable ipv6 fragments sent metric. + default_value: auto + required: false + - name: ipv6 fragments assembly + description: Enable or disable ipv6 fragments assembly metric. + default_value: auto + required: false + - name: ipv6 errors + description: Enable or disable ipv6 errors metric. + default_value: auto + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -2192,7 +2467,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics show general information about IPv6 connections." labels: [] metrics: - name: ipv6.packets @@ -2237,51 +2512,81 @@ modules: plugin_name: freebsd.plugin module_name: net.inet6.icmp6.stats monitored_instance: - name: freebsd net.inet6.icmp6.stats - link: '' - categories: [] - icon_filename: '' + name: net.inet6.icmp6.stats + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "network.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect information abou IPv6 ICMP" + method_description: "The plugin calls `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd:net.inet6.icmp6.stats]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: icmp + description: Enable or disable ICMP metric. + default_value: auto + required: false + - name: icmp redirects + description: Enable or disable ICMP redirects metric. + default_value: auto + required: false + - name: icmp errors + description: Enable or disable ICMP errors metric. + default_value: auto + required: false + - name: icmp echos + description: Enable or disable ICMP echos metric. + default_value: auto + required: false + - name: icmp router + description: Enable or disable ICMP router metric. + default_value: auto + required: false + - name: icmp neighbor + description: Enable or disable ICMP neighbor metric. + default_value: auto + required: false + - name: icmp types + description: Enable or disable ICMP types metric. + default_value: auto + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -2295,7 +2600,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "Collect IPv6 ICMP traffic statistics." labels: [] metrics: - name: ipv6.icmp @@ -2373,51 +2678,65 @@ modules: plugin_name: freebsd.plugin module_name: ipfw monitored_instance: - name: freebsd ipfw - link: '' - categories: [] - icon_filename: '' + name: ipfw + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "firewall.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect information about FreeBSD firewall." + method_description: "The plugin uses RAW socket to communicate with kernel and collect data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd:ipfw]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: counters for static rules + description: Enable or disable counters for static rules metric. + default_value: yes + required: false + - name: number of dynamic rules + description: Enable or disable number of dynamic rules metric. + default_value: yes + required: false + - name: allocated memory + description: Enable or disable allocated memory metric. + default_value: yes + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -2431,7 +2750,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "Theese metrics show FreeBSD firewall statistics." labels: [] metrics: - name: ipfw.mem @@ -2469,51 +2788,101 @@ modules: plugin_name: freebsd.plugin module_name: getifaddrs monitored_instance: - name: freebsd getifaddrs - link: '' - categories: [] - icon_filename: '' + name: getifaddrs + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "network.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect traffic per network interface." + method_description: "The plugin calls `getifaddrs` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd:getifaddrs]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: enable new interfaces detected at runtime + description: Enable or disable possibility to discover new interface after plugin starts. + default_value: auto + required: false + - name: total bandwidth for physical interfaces + description: Enable or disable total bandwidth for physical interfaces metric. + default_value: auto + required: false + - name: total packets for physical interfaces + description: Enable or disable total packets for physical interfaces metric. + default_value: auto + required: false + - name: total bandwidth for ipv4 interface + description: Enable or disable total bandwidth for IPv4 interface metric. + default_value: auto + required: false + - name: total bandwidth for ipv6 interfaces + description: Enable or disable total bandwidth for ipv6 interfaces metric. + default_value: auto + required: false + - name: bandwidth for all interfaces + description: Enable or disable bandwidth for all interfaces metric. + default_value: auto + required: false + - name: packets for all interfaces + description: Enable or disable packets for all interfaces metric. + default_value: auto + required: false + - name: errors for all interfaces + description: Enable or disable errors for all interfaces metric. + default_value: auto + required: false + - name: drops for all interfaces + description: Enable or disable drops for all interfaces metric. + default_value: auto + required: false + - name: collisions for all interface + description: Enable or disable collisions for all interface metric. + default_value: auto + required: false + - name: disable by default interfaces matching + description: Do not display data for intterfaces listed. + default_value: lo* + required: false + - name: set physical interfaces for system.net + description: Do not show network traffic for listed interfaces. + default_value: igb* ix* cxl* em* ixl* ixlv* bge* ixgbe* vtnet* vmx* re* igc* dwc* + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -2562,7 +2931,9 @@ modules: - name: 10s_received_packets_storm link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf metric: net.packets - info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute + info: + ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over + the last minute os: "linux freebsd" - name: interface_inbound_errors link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf @@ -2592,7 +2963,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "General overview about network traffic." labels: [] metrics: - name: system.net @@ -2669,51 +3040,73 @@ modules: plugin_name: freebsd.plugin module_name: getmntinfo monitored_instance: - name: freebsd getmntinfo - link: '' - categories: [] - icon_filename: '' + name: getmntinfo + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "hard-drive.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect information per mount point." + method_description: "The plugin calls `getmntinfo` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd:getmntinfo]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: enable new mount points detected at runtime + description: Cheeck new mount points during runtime. + default_value: auto + required: false + - name: space usage for all disks + description: Enable or disable space usage for all disks metric. + default_value: auto + required: false + - name: inodes usage for all disks + description: Enable or disable inodes usage for all disks metric. + default_value: auto + required: false + - name: exclude space metrics on paths + description: Do not show metrics for listed paths. + default_value: /proc/* + required: false + - name: exclude space metrics on filesystems + description: Do not monitor listed filesystems. + default_value: autofs procfs subfs devfs none + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -2737,7 +3130,7 @@ modules: availability: [] scopes: - name: mount point - description: "" + description: "These metrics show detailss about mount point usages." labels: [] metrics: - name: disk.space @@ -2760,51 +3153,57 @@ modules: plugin_name: freebsd.plugin module_name: zfs monitored_instance: - name: freebsd zfs - link: '' - categories: [] - icon_filename: '' + name: zfs + link: "https://www.freebsd.org/" + categories: + - data-collection.freebsd + icon_filename: "filesystem.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' + description: "" keywords: [] most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Collect metrics for ZFS filesystem" + method_description: "The plugin uses `sysctl` function to collect necessary data." supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: "[plugin:freebsd:zfs_arcstats]" + description: "The netdata main configuration file." options: - description: '' + description: "" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: show zero charts + description: Do not show charts with zero metrics. + default_value: no + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -2822,7 +3221,7 @@ modules: availability: [] scopes: - name: global - description: "" + description: "These metrics show detailed information about ZFS filesystem." labels: [] metrics: - name: zfs.arc_size diff --git a/collectors/freebsd.plugin/metrics.csv b/collectors/freebsd.plugin/metrics.csv deleted file mode 100644 index 3c02a4c2..00000000 --- a/collectors/freebsd.plugin/metrics.csv +++ /dev/null @@ -1,112 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -system.load,,"load1, load5, load15",load,"System Load Average",line,,freebsd.plugin,vm.loadavg -system.active_processes,,active,processes,"System Active Processes",line,,freebsd.plugin,vm.vmtotal -system.processes,,"running, blocked",processes,"System Processes",line,,freebsd.plugin,vm.vmtotal -mem.real,,used,MiB,"Total Real Memory In Use",area,,freebsd.plugin,vm.vmtotal -system.cpu,,"nice, system, user, interrupt, idle",percentage,"Total CPU utilization",stacked,,freebsd.plugin,kern.cp_time -cpu.cpu,core,"nice, system, user, interrupt, idle",percentage,"Core utilization",stacked,,freebsd.plugin,kern.cp_time -cpu.temperature,,a dimension per core,Celsius,"Core temperature",line,,freebsd.plugin,dev.cpu.temperature -cpu.scaling_cur_freq,,frequency,MHz,"Current CPU Scaling Frequency",line,,freebsd.plugin,dev.cpu.0.freq -system.intr,,interrupts,interrupts/s,"Total Hardware Interrupts",line,,freebsd.plugin,hw.intrcnt -system.interrupts,,a dimension per interrupt,interrupts/s,"System interrupts",stacked,,freebsd.plugin,hw.intrcnt -system.dev_intr,,interrupts,interrupts/s,"Device Interrupts",line,,freebsd.plugin,vm.stats.sys.v_intr -system.soft_intr,,interrupts,interrupts/s,"Software Interrupts",line,,freebsd.plugin,vm.stats.sys.v_soft -system.ctxt,,switches,context switches/s,"CPU Context Switches",line,,freebsd.plugin,vm.stats.sys.v_swtch -system.forks,,started,processes/s,"Started Processes",line,,freebsd.plugin,vm.stats.sys.v_swtch -system.swap,,"free, used",MiB,"System Swap",stacked,,freebsd.plugin,vm.swap_info -system.ram,,"free, active, inactive, wired, cache, laundry, buffers",MiB,"System RAM",stacked,,freebsd.plugin,system.ram -mem.available,,avail,MiB,"Available RAM for applications",line,,freebsd.plugin,system.ram -system.swapio,,"io, out",KiB/s,"Swap I/O",area,,freebsd.plugin,vm.stats.vm.v_swappgs -mem.pgfaults,,"memory, io_requiring, cow, cow_optimized, in_transit",page faults/s,"Memory Page Faults",line,,freebsd.plugin,vm.stats.vm.v_pgfaults -system.ipc_semaphores,,semaphores,semaphores,"IPC Semaphores",area,,freebsd.plugin,kern.ipc.sem -system.ipc_semaphore_arrays,,arrays,arrays,"IPC Semaphore Arrays",area,,freebsd.plugin,kern.ipc.sem -system.ipc_shared_mem_segs,,segments,segments,"IPC Shared Memory Segments",area,,freebsd.plugin,kern.ipc.shm -system.ipc_shared_mem_size,,allocated,KiB,"IPC Shared Memory Segments Size",area,,freebsd.plugin,kern.ipc.shm -system.ipc_msq_queues,,queues,queues,"Number of IPC Message Queues",area,,freebsd.plugin,kern.ipc.msq -system.ipc_msq_messages,,messages,messages,"Number of Messages in IPC Message Queues",area,,freebsd.plugin,kern.ipc.msq -system.ipc_msq_size,,"allocated, used",bytes,"Size of IPC Message Queues",line,,freebsd.plugin,kern.ipc.msq -system.uptime,,uptime,seconds,"System Uptime",line,,freebsd.plugin,uptime -system.softnet_stat,,"dispatched, hybrid_dispatched, qdrops, queued",events/s,"System softnet_stat",line,,freebsd.plugin,net.isr -cpu.softnet_stat,core,"dispatched, hybrid_dispatched, qdrops, queued",events/s,"Per CPU netisr statistics",line,,freebsd.plugin,net.isr -system.io,,"io, out",KiB/s,"Disk I/O",area,,freebsd.plugin,devstat -disk.io,disk,"reads, writes, frees",KiB/s,"Disk I/O Bandwidth",area,,freebsd.plugin,devstat -disk.ops,disk,"reads, writes, other, frees",operations/s,"Disk Completed I/O Operations",line,,freebsd.plugin,devstat -disk.qops,disk,operations,operations,"Disk Current I/O Operations",line,,freebsd.plugin,devstat -disk.util,disk,utilization,% of time working,"Disk Utilization Time",line,,freebsd.plugin,devstat -disk.iotime,disk,"reads, writes, other, frees",milliseconds/s,"Disk Total I/O Time",line,,freebsd.plugin,devstat -disk.await,disk,"reads, writes, other, frees",milliseconds/operation,"Average Completed I/O Operation Time",line,,freebsd.plugin,devstat -disk.avgsz,disk,"reads, writes, frees",KiB/operation,"Average Completed I/O Operation Bandwidth",area,,freebsd.plugin,devstat -disk.svctm,disk,svctm,milliseconds/operation,"Average Service Time",line,,freebsd.plugin,devstat -ipv4.tcpsock,,connections,active connections,"IPv4 TCP Connections",line,,freebsd.plugin,net.inet.tcp.states -ipv4.tcppackets,,"received, sent",packets/s,"IPv4 TCP Packets",line,,freebsd.plugin,net.inet.tcp.stats -ipv4.tcperrors,,"InErrs, InCsumErrors, RetransSegs",packets/s,"IPv4 TCP Errors",line,,freebsd.plugin,net.inet.tcp.stats -ipv4.tcphandshake,,"EstabResets, ActiveOpens, PassiveOpens, AttemptFails",events/s,"IPv4 TCP Handshake Issues",line,,freebsd.plugin,net.inet.tcp.stats -ipv4.tcpconnaborts,,"baddata, userclosed, nomemory, timeout, linger",connections/s,"TCP Connection Aborts",line,,freebsd.plugin,net.inet.tcp.stats -ipv4.tcpofo,,inqueue,packets/s,"TCP Out-Of-Order Queue",line,,freebsd.plugin,net.inet.tcp.stats -ipv4.tcpsyncookies,,"received, sent, failed",packets/s,"TCP SYN Cookies",line,,freebsd.plugin,net.inet.tcp.stats -ipv4.tcplistenissues,,overflows,packets/s,"TCP Listen Socket Issues",line,,freebsd.plugin,net.inet.tcp.stats -ipv4.ecnpkts,,"InCEPkts, InECT0Pkts, InECT1Pkts, OutECT0Pkts, OutECT1Pkts",packets/s,"IPv4 ECN Statistics",line,,freebsd.plugin,net.inet.tcp.stats -ipv4.udppackets,,"received, sent",packets/s,"IPv4 UDP Packets",line,,freebsd.plugin,net.inet.udp.stats -ipv4.udperrors,,"InErrors, NoPorts, RcvbufErrors, InCsumErrors, IgnoredMulti",events/s,"IPv4 UDP Errors",line,,freebsd.plugin,net.inet.udp.stats -ipv4.icmp,,"received, sent",packets/s,"IPv4 ICMP Packets",line,,freebsd.plugin,net.inet.icmp.stats -ipv4.icmp_errors,,"InErrors, OutErrors, InCsumErrors",packets/s,"IPv4 ICMP Errors",line,,freebsd.plugin,net.inet.icmp.stats -ipv4.icmpmsg,,"InEchoReps, OutEchoReps, InEchos, OutEchos",packets/s,"IPv4 ICMP Messages",line,,freebsd.plugin,net.inet.icmp.stats -ipv4.packets,,"received, sent, forwarded, delivered",packets/s,"IPv4 Packets",line,,freebsd.plugin,net.inet.ip.stats -ipv4.fragsout,,"ok, failed, created",packets/s,"IPv4 Fragments Sent",line,,freebsd.plugin,net.inet.ip.stats -ipv4.fragsin,,"ok, failed, all",packets/s,"IPv4 Fragments Reassembly",line,,freebsd.plugin,net.inet.ip.stats -ipv4.errors,,"InDiscards, OutDiscards, InHdrErrors, OutNoRoutes, InAddrErrors, InUnknownProtos",packets/s,"IPv4 Errors",line,,freebsd.plugin,net.inet.ip.stats -ipv6.packets,,"received, sent, forwarded, delivers",packets/s,"IPv6 Packets",line,,freebsd.plugin,net.inet6.ip6.stats -ipv6.fragsout,,"ok, failed, all",packets/s,"IPv6 Fragments Sent",line,,freebsd.plugin,net.inet6.ip6.stats -ipv6.fragsin,,"ok, failed, timeout, all",packets/s,"IPv6 Fragments Reassembly",line,,freebsd.plugin,net.inet6.ip6.stats -ipv6.errors,,"InDiscards, OutDiscards, InHdrErrors, InAddrErrors, InTruncatedPkts, InNoRoutes, OutNoRoutes",packets/s,"IPv6 Errors",line,,freebsd.plugin,net.inet6.ip6.stats -ipv6.icmp,,"received, sent",messages/s,"IPv6 ICMP Messages",line,,freebsd.plugin,net.inet6.icmp6.stats -ipv6.icmpredir,,"received, sent",redirects/s,"IPv6 ICMP Redirects",line,,freebsd.plugin,net.inet6.icmp6.stats -ipv6.icmperrors,,"InErrors, OutErrors, InCsumErrors, InDestUnreachs, InPktTooBigs, InTimeExcds, InParmProblems, OutDestUnreachs, OutTimeExcds, OutParmProblems",errors/s,"IPv6 ICMP Errors",line,,freebsd.plugin,net.inet6.icmp6.stats -ipv6.icmpechos,,"InEchos, OutEchos, InEchoReplies, OutEchoReplies",messages/s,"IPv6 ICMP Echo",line,,freebsd.plugin,net.inet6.icmp6.stats -ipv6.icmprouter,,"InSolicits, OutSolicits, InAdvertisements, OutAdvertisements",messages/s,"IPv6 Router Messages",line,,freebsd.plugin,net.inet6.icmp6.stats -ipv6.icmpneighbor,,"InSolicits, OutSolicits, InAdvertisements, OutAdvertisements",messages/s,"IPv6 Neighbor Messages",line,,freebsd.plugin,net.inet6.icmp6.stats -ipv6.icmptypes,,"InType1, InType128, InType129, InType136, OutType1, OutType128, OutType129, OutType133, OutType135, OutType143",messages/s,"IPv6 ICMP Types",line,,freebsd.plugin,net.inet6.icmp6.stats -ipfw.mem,,"dynamic, static",bytes,"Memory allocated by rules",stacked,,freebsd.plugin,ipfw -ipfw.packets,,a dimension per static rule,packets/s,"Packets",stacked,,freebsd.plugin,ipfw -ipfw.bytes,,a dimension per static rule,bytes/s,"Bytes",stacked,,freebsd.plugin,ipfw -ipfw.active,,a dimension per dynamic rule,rules,"Active rules",stacked,,freebsd.plugin,ipfw -ipfw.expired,,a dimension per dynamic rule,rules,"Expired rules",stacked,,freebsd.plugin,ipfw -system.net,,"received, sent",kilobits/s,"Network Traffic",area,,freebsd.plugin,getifaddrs -system.packets,,"received, sent, multicast_received, multicast_sent",packets/s,"Network Packets",line,,freebsd.plugin,getifaddrs -system.ipv4,,"received, sent",kilobits/s,"IPv4 Bandwidth",area,,freebsd.plugin,getifaddrs -system.ipv6,,"received, sent",kilobits/s,"IPv6 Bandwidth",area,,freebsd.plugin,getifaddrs -net.net,network device,"received, sent",kilobits/s,"Bandwidth",area,,freebsd.plugin,getifaddrs -net.packets,network device,"received, sent, multicast_received, multicast_sent",packets/s,"Packets",line,,freebsd.plugin,getifaddrs -net.errors,network device,"inbound, outbound",errors/s,"Interface Errors",line,,freebsd.plugin,getifaddrs -net.drops,network device,"inbound, outbound",drops/s,"Interface Drops",line,,freebsd.plugin,getifaddrs -net.events,network device,collisions,events/s,"Network Interface Events",line,,freebsd.plugin,getifaddrs -disk.space,mount point,"avail, used, reserved_for_root",GiB,"Disk Space Usage for {mounted dir} [{mounted filesystem}]",stacked,,freebsd.plugin,getmntinfo -disk.inodes,mount point,"avail, used, reserved_for_root",inodes,"Disk Files (inodes) Usage for {mounted dir} [{mounted filesystem}]",stacked,,freebsd.plugin,getmntinfo -zfs.arc_size,,"arcsz, target, min, max",MiB,"ZFS ARC Size",area,,freebsd.plugin,zfs -zfs.l2_size,,"actual, size",MiB,"ZFS L2 ARC Size",area,,freebsd.plugin,zfs -zfs.reads,,"arc, demand, prefetch, metadata, l2",reads/s,"ZFS Reads",area,,freebsd.plugin,zfs -zfs.bytes,,"read, write",KiB/s,"ZFS ARC L2 Read/Write Rate",area,,freebsd.plugin,zfs -zfs.hits,,"hits, misses",percentage,"ZFS ARC Hits",stacked,,freebsd.plugin,zfs -zfs.hits_rate,,"hits, misses",events/s,"ZFS ARC Hits Rate",stacked,,freebsd.plugin,zfs -zfs.dhits,,"hits, misses",percentage,"ZFS Demand Hits",stacked,,freebsd.plugin,zfs -zfs.dhits_rate,,"hits, misses",events/s,"ZFS Demand Hits Rate",stacked,,freebsd.plugin,zfs -zfs.phits,,"hits, misses",percentage,"ZFS Prefetch Hits",stacked,,freebsd.plugin,zfs -zfs.phits_rate,,"hits, misses",events/s,"ZFS Prefetch Hits Rate",stacked,,freebsd.plugin,zfs -zfs.mhits,,"hits, misses",percentage,"ZFS Metadata Hits",stacked,,freebsd.plugin,zfs -zfs.mhits_rate,,"hits, misses",events/s,"ZFS Metadata Hits Rate",stacked,,freebsd.plugin,zfs -zfs.l2hits,,"hits, misses",percentage,"ZFS L2 Hits",stacked,,freebsd.plugin,zfs -zfs.l2hits_rate,,"hits, misses",events/s,"ZFS L2 Hits Rate",stacked,,freebsd.plugin,zfs -zfs.list_hits,,"mfu, mfu_ghost, mru, mru_ghost",hits/s,"ZFS List Hits",area,,freebsd.plugin,zfs -zfs.arc_size_breakdown,,"recent, frequent",percentage,"ZFS ARC Size Breakdown",stacked,,freebsd.plugin,zfs -zfs.memory_ops,,throttled,operations/s,"ZFS Memory Operations",line,,freebsd.plugin,zfs -zfs.important_ops,,"evict_skip, deleted, mutex_miss, hash_collisions",operations/s,"ZFS Important Operations",line,,freebsd.plugin,zfs -zfs.actual_hits,,"hits, misses",percentage,"ZFS Actual Cache Hits",stacked,,freebsd.plugin,zfs -zfs.actual_hits_rate,,"hits, misses",events/s,"ZFS Actual Cache Hits Rate",stacked,,freebsd.plugin,zfs -zfs.demand_data_hits,,"hits, misses",percentage,"ZFS Data Demand Efficiency",stacked,,freebsd.plugin,zfs -zfs.demand_data_hits_rate,,"hits, misses",events/s,"ZFS Data Demand Efficiency Rate",stacked,,freebsd.plugin,zfs -zfs.prefetch_data_hits,,"hits, misses",percentage,"ZFS Data Prefetch Efficiency",stacked,,freebsd.plugin,zfs -zfs.prefetch_data_hits_rate,,"hits, misses",events/s,"ZFS Data Prefetch Efficiency Rate",stacked,,freebsd.plugin,zfs -zfs.hash_elements,,"current, max",elements,"ZFS ARC Hash Elements",line,,freebsd.plugin,zfs -zfs.hash_chains,,"current, max",chains,"ZFS ARC Hash Chains",line,,freebsd.plugin,zfs -zfs.trim_bytes,,TRIMmed,bytes,"Successfully TRIMmed bytes",line,,freebsd.plugin,zfs -zfs.trim_requests,,"successful, failed, unsupported",requests,"TRIM requests",line,,freebsd.plugin,zfs diff --git a/collectors/freeipmi.plugin/freeipmi_plugin.c b/collectors/freeipmi.plugin/freeipmi_plugin.c index bfd867cc..94b4fed8 100644 --- a/collectors/freeipmi.plugin/freeipmi_plugin.c +++ b/collectors/freeipmi.plugin/freeipmi_plugin.c @@ -1799,8 +1799,10 @@ int main (int argc, char **argv) { for(iteration = 0; 1 ; iteration++) { usec_t dt = heartbeat_next(&hb, step); - if(!tty) + if (!tty) { fprintf(stdout, "\n"); // keepalive to avoid parser read timeout (2 minutes) during ipmi_detect_speed_secs() + fflush(stdout); + } struct netdata_ipmi_state state = {0 }; @@ -1890,7 +1892,7 @@ int main (int argc, char **argv) { // restart check (14400 seconds) if (now_monotonic_sec() - started_t > IPMI_RESTART_EVERY_SECONDS) { - collector_error("%s(): reached my lifetime expectancy. Exiting to restart.", __FUNCTION__); + collector_info("%s(): reached my lifetime expectancy. Exiting to restart.", __FUNCTION__); fprintf(stdout, "EXIT\n"); fflush(stdout); exit(0); diff --git a/collectors/freeipmi.plugin/multi_metadata.yaml b/collectors/freeipmi.plugin/metadata.yaml index 21333278..9540410b 100644 --- a/collectors/freeipmi.plugin/multi_metadata.yaml +++ b/collectors/freeipmi.plugin/metadata.yaml @@ -1,125 +1,71 @@ -name: freeipmi.plugin +plugin_name: freeipmi.plugin modules: - meta: plugin_name: freeipmi.plugin - module_name: sel - monitored_instance: - name: freeipmi sel - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: ipmi.sel - description: IPMI Events - unit: "events" - chart_type: area - dimensions: - - name: events - - meta: - plugin_name: freeipmi.plugin module_name: sensors monitored_instance: - name: freeipmi sensors - link: '' - categories: [] - icon_filename: '' + name: Intelligent Platform Management Interface (IPMI) + link: "https://en.wikipedia.org/wiki/Intelligent_Platform_Management_Interface" + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: "netdata.png" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false + description: "" + keywords: + - sensors + - ipmi + - freeipmi + - ipmimonitoring + most_popular: true overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + "Monitor enterprise server sensor readings, event log entries, and hardware statuses to ensure reliable server operations." + method_description: | + The plugin uses open source library IPMImonitoring to communicate with sensors. supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "The plugin needs setuid." default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "Linux kernel module for IPMI can create big overhead." setup: prerequisites: - list: [] + list: + - title: Preliminary actions + description: | + If you have not previously used IPMI on your system, you will probably need to run the ipmimonitoring command as root to initialize IPMI settings so that the Netdata plugin works correctly. It should return information about available sensors on the system. + + In some distributions libipmimonitoring.pc is located in a non-standard directory, which can cause building the plugin to fail when building Netdata from source. In that case you should find the file and link it to the standard pkg-config directory. Usually, running sudo ln -s /usr/lib/$(uname -m)-linux-gnu/pkgconfig/libipmimonitoring.pc/libipmimonitoring.pc /usr/lib/pkgconfig/libipmimonitoring.pc resolves this issue. configuration: file: - name: '' - description: '' + name: "netdata.conf" + section_name: '[plugin:freeipmi]' + description: "This is netdata main configuration file" options: - description: '' + description: "This tool receives command line options that are visible when user run: `./usr/libexec/netdata/plugins.d/freeipmi.plugin --help`" folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: command options + description: Variable used to pass arguments for the plugin. + default_value: 1 + required: false examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -140,12 +86,18 @@ modules: description: "" labels: - name: sensor - description: TBD + description: The sensor name - name: type - description: TBD + description: One of 45 recognized sensor types (Battery, Voltage...) - name: component - description: TBD + description: One of 25 recognized components (Processor, Peripheral). metrics: + - name: ipmi.sel + description: IPMI Events + unit: "events" + chart_type: area + dimensions: + - name: events - name: ipmi.sensor_state description: IPMI Sensors State unit: "state" diff --git a/collectors/freeipmi.plugin/metrics.csv b/collectors/freeipmi.plugin/metrics.csv deleted file mode 100644 index 4c90d5c1..00000000 --- a/collectors/freeipmi.plugin/metrics.csv +++ /dev/null @@ -1,10 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -ipmi.sel,,events,events,"IPMI Events",area,,freeipmi.plugin,sel -ipmi.sensor_state,sensor,"nominal, critical, warning, unknown",state,"IPMI Sensors State",line,"sensor, type, component",freeipmi.plugin,sensors -ipmi.sensor_temperature_c,sensor,temperature,Celsius,"IPMI Sensor Temperature Celsius",line,"sensor, type, component",freeipmi.plugin,sensors -ipmi.sensor_temperature_f,sensor,temperature,Fahrenheit,"IPMI Sensor Temperature Fahrenheit",line,"sensor, type, component",freeipmi.plugin,sensors -ipmi.sensor_voltage,sensor,voltage,Volts,"IPMI Sensor Voltage",line,"sensor, type, component",freeipmi.plugin,sensors -ipmi.sensor_ampere,sensor,ampere,Amps,"IPMI Sensor Current",line,"sensor, type, component",freeipmi.plugin,sensors -ipmi.sensor_fan_speed,sensor,rotations,RPM,"IPMI Sensor Fans Speed",line,"sensor, type, component",freeipmi.plugin,sensors -ipmi.sensor_power,sensor,power,Watts,"IPMI Sensor Power",line,"sensor, type, component",freeipmi.plugin,sensors -ipmi.sensor_reading_percent,sensor,percentage,%,"IPMI Sensor Reading Percentage",line,"sensor, type, component",freeipmi.plugin,sensors diff --git a/collectors/idlejitter.plugin/metadata.yaml b/collectors/idlejitter.plugin/metadata.yaml index cd1737d3..0ad94699 100644 --- a/collectors/idlejitter.plugin/metadata.yaml +++ b/collectors/idlejitter.plugin/metadata.yaml @@ -1,74 +1,90 @@ -meta: - plugin_name: idlejitter.plugin - module_name: idlejitter.plugin - monitored_instance: - name: IdleJitter - link: '' - categories: - - data-collection.synthetic-checks - icon_filename: 'syslog.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor IdleJitter performance for optimal system idle process operations. Monitor CPU idle times, wake-ups, and power states to optimize system resource usage.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: idlejitter.plugin +modules: + - meta: + plugin_name: idlejitter.plugin + module_name: idlejitter.plugin + monitored_instance: + name: Idle OS Jitter + link: '' + categories: + - data-collection.synthetic-checks + icon_filename: 'syslog.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - latency + - jitter + most_popular: false + overview: + data_collection: + metrics_description: > + Monitor delays in timing for user processes caused by scheduling limitations to optimize the system to run + latency sensitive applications with minimal jitter, improving consistency and quality of service. + method_description: > + A thread is spawned that requests to sleep for fixed amount of time. When the system wakes it up, it + measures how many microseconds have passed. The difference between the requested and the actual duration of + the sleep, is the idle jitter. This is done dozens of times per second to ensure we have a representative sample. + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: 'This integration will run by default on all supported systems.' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: 'netdata.conf' + section: 'plugin:idlejitter' + description: '' + options: + description: > + This integration only supports a single configuration option, and most users will not need to change it. + folding: + title: '' + enabled: false + list: + - name: loop time in ms + description: > + Specifies the target time for the data collection thread to sleep, measured in miliseconds. + default_value: 20 + required: false + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: system.idlejitter - description: CPU Idle Jitter - unit: "microseconds lost/s" - chart_type: line - dimensions: - - name: min - - name: max - - name: average + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.idlejitter + description: CPU Idle Jitter + unit: "microseconds lost/s" + chart_type: line + dimensions: + - name: min + - name: max + - name: average diff --git a/collectors/idlejitter.plugin/metrics.csv b/collectors/idlejitter.plugin/metrics.csv deleted file mode 100644 index 05cc1233..00000000 --- a/collectors/idlejitter.plugin/metrics.csv +++ /dev/null @@ -1,2 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -system.idlejitter,,"min, max, average","microseconds lost/s","CPU Idle Jitter",line,,idlejitter.plugin,
\ No newline at end of file diff --git a/collectors/ioping.plugin/metadata.yaml b/collectors/ioping.plugin/metadata.yaml index 52264f17..e3ec9616 100644 --- a/collectors/ioping.plugin/metadata.yaml +++ b/collectors/ioping.plugin/metadata.yaml @@ -1,76 +1,101 @@ -meta: - plugin_name: ioping.plugin - module_name: ioping.plugin - monitored_instance: - name: IOPing - link: '' - categories: - - data-collection.synthetic-checks - icon_filename: 'syslog.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor IOPing metrics for efficient disk I/O latency tracking. Keep track of read/write speeds, latency, and error rates for optimized disk operations.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: ioping_disk_latency - link: https://github.com/netdata/netdata/blob/master/health/health.d/ioping.conf - metric: ioping.latency - info: average I/O latency over the last 10 seconds -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: disk - description: "" - labels: [] +plugin_name: ioping.plugin +modules: + - meta: + plugin_name: ioping.plugin + module_name: ioping.plugin + monitored_instance: + name: IOPing + link: "https://github.com/koct9i/ioping" + categories: + - data-collection.synthetic-checks + icon_filename: "syslog.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: "Monitor IOPing metrics for efficient disk I/O latency tracking. Keep track of read/write speeds, latency, and error rates for optimized disk operations." + method_description: "Plugin uses `ioping` command." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Install ioping + description: | + You can install the command by passing the argument `install` to the plugin (`/usr/libexec/netdata/plugins.d/ioping.plugin install`). + configuration: + file: + name: "ioping.conf" + description: "File with options to specify hardware to monitor and arguments for ioping command." + options: + description: "" + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Data collection frequency. + default_value: 1s + required: false + - name: destination + description: The directory/file/device to ioping. + default_value: "" + required: true + - name: request_size + description: The request size in bytes to ioping the destination (symbolic modifiers are supported) + default_value: 4k + required: false + - name: ioping_opts + description: Options passed to `ioping` commands. + default_value: -T 1000000 + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic Configuration + description: This example has the minimum configuration necessary to have the plugin running. + config: | + destination="/dev/sda" + troubleshooting: + problems: + list: [] + alerts: + - name: ioping_disk_latency + link: https://github.com/netdata/netdata/blob/master/health/health.d/ioping.conf + metric: ioping.latency + info: average I/O latency over the last 10 seconds metrics: - - name: ioping.latency - description: Read Latency - unit: "microseconds" - chart_type: line - dimensions: - - name: latency + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: disk + description: "" + labels: [] + metrics: + - name: ioping.latency + description: Read Latency + unit: "microseconds" + chart_type: line + dimensions: + - name: latency diff --git a/collectors/ioping.plugin/metrics.csv b/collectors/ioping.plugin/metrics.csv deleted file mode 100644 index 040ea856..00000000 --- a/collectors/ioping.plugin/metrics.csv +++ /dev/null @@ -1,2 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -ioping.latency,disk,latency,microseconds,"Read Latency",line,,ioping.plugin,
\ No newline at end of file diff --git a/collectors/macos.plugin/macos_mach_smi.c b/collectors/macos.plugin/macos_mach_smi.c index f21a56af..30c95718 100644 --- a/collectors/macos.plugin/macos_mach_smi.c +++ b/collectors/macos.plugin/macos_mach_smi.c @@ -99,7 +99,7 @@ int do_macos_mach_smi(int update_every, usec_t dt) { do_ram = 0; collector_error("DISABLED: system.ram"); do_swapio = 0; - collector_error("DISABLED: system.swapio"); + collector_error("DISABLED: mem.swapio"); do_pgfaults = 0; collector_error("DISABLED: mem.pgfaults"); } else { @@ -148,10 +148,10 @@ int do_macos_mach_smi(int update_every, usec_t dt) { #if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1090) if (likely(do_swapio)) { - st = rrdset_find_active_localhost("system.swapio"); + st = rrdset_find_active_localhost("mem.swapio"); if (unlikely(!st)) { st = rrdset_create_localhost( - "system" + "mem" , "swapio" , NULL , "swap" diff --git a/collectors/macos.plugin/macos_sysctl.c b/collectors/macos.plugin/macos_sysctl.c index 42f01d85..520d2f93 100644 --- a/collectors/macos.plugin/macos_sysctl.c +++ b/collectors/macos.plugin/macos_sysctl.c @@ -260,12 +260,12 @@ int do_macos_sysctl(int update_every, usec_t dt) { if (likely(do_swap)) { if (unlikely(GETSYSCTL_BY_NAME("vm.swapusage", swap_usage))) { do_swap = 0; - collector_error("DISABLED: system.swap"); + collector_error("DISABLED: mem.swap"); } else { - st = rrdset_find_active_localhost("system.swap"); + st = rrdset_find_active_localhost("mem.swap"); if (unlikely(!st)) { st = rrdset_create_localhost( - "system" + "mem" , "swap" , NULL , "swap" diff --git a/collectors/macos.plugin/multi_metadata.yaml b/collectors/macos.plugin/metadata.yaml index 38668fdc..cc159ad1 100644 --- a/collectors/macos.plugin/multi_metadata.yaml +++ b/collectors/macos.plugin/metadata.yaml @@ -1,94 +1,288 @@ -name: macos.plugin +plugin_name: macos.plugin modules: - meta: plugin_name: macos.plugin module_name: mach_smi monitored_instance: - name: macos mach_smi - link: '' - categories: [] - icon_filename: '' + name: macOS + link: "https://www.apple.com/macos" + categories: + - data-collection.macos-systems + icon_filename: "macos.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - macos + - apple + - darwin most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Monitor macOS metrics for efficient operating system performance." + method_description: | + The plugin uses three different methods to collect data: + - The function `sysctlbyname` is called to collect network, swap, loadavg, and boot time. + - The functtion `host_statistic` is called to collect CPU and Virtual memory data; + - The function `IOServiceGetMatchingServices` to collect storage information. supported_platforms: - include: [] + include: + - macOS exclude: [] - multi-instance: true + multi_instance: false additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "netdata.conf" + description: "The netdata main configuration file." options: - description: '' + description: | + There are three sections in the file which you can configure: + + - `[plugin:macos:sysctl]` - Enable or disable monitoring for network, swap, loadavg, and boot time. + - `[plugin:macos:mach_smi]` - Enable or disable monitoring for CPU and Virtual memory. + - `[plugin:macos:iokit]` - Enable or disable monitoring for storage device. folding: - title: '' + title: "Config options" enabled: true - list: [] + list: + - name: enable load average + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of load average metrics (load1, load5, load15). + default_value: yes + required: false + - name: system swap + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of system swap metrics (free, used). + default_value: yes + required: false + - name: bandwidth + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of network bandwidth metrics (received, sent). + default_value: yes + required: false + - name: ipv4 TCP packets + description: Enable or disable monitoring of IPv4 TCP total packets metrics (received, sent). + section_name: plugin:macos:sysctl + default_value: yes + required: false + - name: ipv4 TCP errors + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of IPv4 TCP packets metrics (Input Errors, Checksum, Retransmission segments). + default_value: yes + required: false + - name: ipv4 TCP handshake issues + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of IPv4 TCP handshake metrics (Established Resets, Active Opens, Passive Opens, Attempt Fails). + default_value: yes + required: false + - name: ECN packets + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of ECN statistics metrics (InCEPkts, InNoECTPkts). + default_value: auto + required: false + - name: TCP SYN cookies + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of TCP SYN cookies metrics (received, sent, failed). + default_value: auto + required: false + - name: TCP out-of-order queue + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of TCP out-of-order queue metrics (inqueue). + default_value: auto + required: false + - name: TCP connection aborts + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of TCP connection aborts metrics (Bad Data, User closed, No memory, Timeout). + default_value: auto + required: false + - name: ipv4 UDP packets + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of ipv4 UDP packets metrics (sent, received.). + default_value: yes + required: false + - name: ipv4 UDP errors + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of ipv4 UDP errors metrics (Recieved Buffer error, Input Errors, No Ports, IN Checksum Errors, Ignore Multi). + default_value: yes + required: false + - name: ipv4 icmp packets + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of IPv4 ICMP packets metrics (sent, received, in error, OUT error, IN Checksum error). + default_value: yes + required: false + - name: ipv4 icmp messages + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of ipv4 ICMP messages metrics (I/O messages, I/O Errors, In Checksum). + default_value: yes + required: false + - name: ipv4 packets + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of ipv4 packets metrics (received, sent, forwarded, delivered). + default_value: yes + required: false + - name: ipv4 fragments sent + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of IPv4 fragments sent metrics (ok, fails, creates). + default_value: yes + required: false + - name: ipv4 fragments assembly + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of IPv4 fragments assembly metrics (ok, failed, all). + default_value: yes + required: false + - name: ipv4 errors + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of IPv4 errors metrics (I/O discard, I/O HDR errors, In Addr errors, In Unknown protos, OUT No Routes). + default_value: yes + required: false + - name: ipv6 packets + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of IPv6 packets metrics (received, sent, forwarded, delivered). + default_value: auto + required: false + - name: ipv6 fragments sent + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of IPv6 fragments sent metrics (ok, failed, all). + default_value: auto + required: false + - name: ipv6 fragments assembly + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of IPv6 fragments assembly metrics (ok, failed, timeout, all). + default_value: auto + required: false + - name: ipv6 errors + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of IPv6 errors metrics (I/O Discards, In Hdr Errors, In Addr Errors, In Truncaedd Packets, I/O No Routes). + default_value: auto + required: false + - name: icmp + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of ICMP metrics (sent, received). + default_value: auto + required: false + - name: icmp redirects + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of ICMP redirects metrics (received, sent). + default_value: auto + required: false + - name: icmp errors + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of ICMP metrics (I/O Errors, In Checksums, In Destination Unreachable, In Packet too big, In Time Exceeds, In Parm Problem, Out Dest Unreachable, Out Timee Exceeds, Out Parm Problems.). + default_value: auto + required: false + - name: icmp echos + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of ICMP echos metrics (I/O Echos, I/O Echo Reply). + default_value: auto + required: false + - name: icmp router + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of ICMP router metrics (I/O Solicits, I/O Advertisements). + default_value: auto + required: false + - name: icmp neighbor + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of ICMP neighbor metrics (I/O Solicits, I/O Advertisements). + default_value: auto + required: false + - name: icmp types + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of ICMP types metrics (I/O Type1, I/O Type128, I/O Type129, Out Type133, Out Type135, In Type136, Out Type145). + default_value: auto + required: false + - name: space usage for all disks + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of space usage for all disks metrics (available, used, reserved for root). + default_value: yes + required: false + - name: inodes usage for all disks + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of inodes usage for all disks metrics (available, used, reserved for root). + default_value: yes + required: false + - name: bandwidth + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of bandwidth metrics (received, sent). + default_value: yes + required: false + - name: system uptime + section_name: plugin:macos:sysctl + description: Enable or disable monitoring of system uptime metrics (uptime). + default_value: yes + required: false + - name: cpu utilization + section_name: plugin:macos:mach_smi + description: Enable or disable monitoring of CPU utilization metrics (user, nice, system, idel). + default_value: yes + required: false + - name: system ram + section_name: plugin:macos:mach_smi + description: Enable or disable monitoring of system RAM metrics (Active, Wired, throttled, compressor, inactive, purgeable, speculative, free). + default_value: yes + required: false + - name: swap i/o + section_name: plugin:macos:mach_smi + description: Enable or disable monitoring of SWAP I/O metrics (I/O Swap). + default_value: yes + required: false + - name: memory page faults + section_name: plugin:macos:mach_smi + description: Enable or disable monitoring of memory page faults metrics (memory, cow, I/O page, compress, decompress, zero fill, reactivate, purge). + default_value: yes + required: false + - name: disk i/o + section_name: plugin:macos:iokit + description: Enable or disable monitoring of disk I/O metrics (In, Out). + default_value: yes + required: false examples: folding: - enabled: true - title: '' - list: [] + enabled: false + title: "Config" + list: + - name: Disable swap monitoring. + folding: + enabled: true + description: A basic example that discards swap monitoring + config: | + [plugin:macos:sysctl] + system swap = no + [plugin:macos:mach_smi] + swap i/o = no + - name: Disable complete Machine SMI section. + folding: + enabled: true + description: A basic example that discards swap monitoring + config: | + [plugin:macos:mach_smi] + cpu utilization = no + system ram = no + swap i/o = no + memory page faults = no + disk i/o = no troubleshooting: problems: list: [] alerts: - - name: 10min_cpu_usage - link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf - metric: system.cpu - info: average CPU utilization over the last 10 minutes (excluding iowait, nice and steal) - os: "linux" - - name: 10min_cpu_iowait - link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf - metric: system.cpu - info: average CPU iowait time over the last 10 minutes - os: "linux" - - name: 20min_steal_cpu - link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf - metric: system.cpu - info: average CPU steal time over the last 20 minutes - os: "linux" - - name: 10min_cpu_usage - link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf - metric: system.cpu - info: average CPU utilization over the last 10 minutes (excluding nice) - os: "freebsd" - - name: ram_in_use - link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf - metric: system.ram - info: system memory utilization - os: "linux" - - name: ram_in_use - link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf - metric: system.ram - info: system memory utilization - os: "freebsd" - - name: 30min_ram_swapped_out - link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf - metric: system.swapio - info: percentage of the system RAM swapped in the last 30 minutes - os: "linux freebsd" + - name: interface_speed + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.net + info: network interface ${label:device} current speed + os: "*" metrics: folding: title: Metrics @@ -97,7 +291,8 @@ modules: availability: [] scopes: - name: global - description: "" + description: | + These metrics refer to hardware and network monitoring. labels: [] metrics: - name: system.cpu @@ -122,7 +317,7 @@ modules: - name: purgeable - name: speculative - name: free - - name: system.swapio + - name: mem.swapio description: Swap I/O unit: "KiB/s" chart_type: area @@ -143,126 +338,6 @@ modules: - name: zero_fill - name: reactivate - name: purge - - meta: - plugin_name: macos.plugin - module_name: sysctl - monitored_instance: - name: macos sysctl - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: - - name: load_cpu_number - link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf - metric: system.load - info: number of active CPU cores in the system - os: "linux" - - name: load_average_15 - link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf - metric: system.load - info: system fifteen-minute load average - os: "linux" - - name: load_average_5 - link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf - metric: system.load - info: system five-minute load average - os: "linux" - - name: load_average_1 - link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf - metric: system.load - info: system one-minute load average - os: "linux" - - name: used_swap - link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf - metric: system.swap - info: swap memory utilization - os: "linux freebsd" - - name: 1m_ipv4_tcp_resets_sent - link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf - metric: ipv4.tcphandshake - info: average number of sent TCP RESETS over the last minute - os: "linux" - - name: 10s_ipv4_tcp_resets_sent - link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf - metric: ipv4.tcphandshake - info: average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has crashed. Netdata will not send a clear notification for this alarm. - os: "linux" - - name: 1m_ipv4_tcp_resets_received - link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf - metric: ipv4.tcphandshake - info: average number of received TCP RESETS over the last minute - os: "linux freebsd" - - name: 10s_ipv4_tcp_resets_received - link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf - metric: ipv4.tcphandshake - info: average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed. Netdata will not send a clear notification for this alarm. - os: "linux freebsd" - - name: 1m_ipv4_udp_receive_buffer_errors - link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf - metric: ipv4.udperrors - info: average number of UDP receive buffer errors over the last minute - os: "linux freebsd" - - name: 1m_ipv4_udp_send_buffer_errors - link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf - metric: ipv4.udperrors - info: average number of UDP send buffer errors over the last minute - os: "linux" - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - name: system.load description: System Load Average unit: "load" @@ -271,7 +346,7 @@ modules: - name: load1 - name: load5 - name: load15 - - name: system.swap + - name: mem.swap description: System Swap unit: "MiB" chart_type: stacked @@ -531,147 +606,13 @@ modules: chart_type: line dimensions: - name: uptime - - meta: - plugin_name: macos.plugin - module_name: iokit - monitored_instance: - name: macos iokit - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] - troubleshooting: - problems: - list: [] - alerts: - - name: 10min_disk_utilization - link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf - metric: disk.util - info: average percentage of time ${label:device} disk was busy over the last 10 minutes - os: "linux freebsd" - - name: disk_space_usage - link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf - metric: disk.space - info: disk ${label:mount_point} space utilization - os: "linux freebsd" - - name: disk_inode_usage - link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf - metric: disk.inodes - info: disk ${label:mount_point} inode utilization - os: "linux freebsd" - - name: interface_speed - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.net - info: network interface ${label:device} current speed - os: "*" - - name: 1m_received_traffic_overflow - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.net - info: average inbound utilization for the network interface ${label:device} over the last minute - os: "linux" - - name: 1m_sent_traffic_overflow - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.net - info: average outbound utilization for the network interface ${label:device} over the last minute - os: "linux" - - name: inbound_packets_dropped_ratio - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.packets - info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes - os: "linux" - - name: outbound_packets_dropped_ratio - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.packets - info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes - os: "linux" - - name: wifi_inbound_packets_dropped_ratio - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.packets - info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes - os: "linux" - - name: wifi_outbound_packets_dropped_ratio - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.packets - info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes - os: "linux" - - name: 1m_received_packets_rate - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.packets - info: average number of packets received by the network interface ${label:device} over the last minute - os: "linux freebsd" - - name: 10s_received_packets_storm - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.packets - info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute - os: "linux freebsd" - - name: interface_inbound_errors - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.errors - info: number of inbound errors for the network interface ${label:device} in the last 10 minutes - os: "freebsd" - - name: interface_outbound_errors - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.errors - info: number of outbound errors for the network interface ${label:device} in the last 10 minutes - os: "freebsd" - - name: inbound_packets_dropped - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.drops - info: number of inbound dropped packets for the network interface ${label:device} in the last 10 minutes - os: "linux" - - name: outbound_packets_dropped - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.drops - info: number of outbound dropped packets for the network interface ${label:device} in the last 10 minutes - os: "linux" - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: + - name: system.io + description: Disk I/O + unit: "KiB/s" + chart_type: area + dimensions: + - name: in + - name: out - name: disk description: "" labels: [] @@ -723,17 +664,6 @@ modules: chart_type: line dimensions: - name: svctm - - name: global - description: "" - labels: [] - metrics: - - name: system.io - description: Disk I/O - unit: "KiB/s" - chart_type: area - dimensions: - - name: in - - name: out - name: mount point description: "" labels: [] diff --git a/collectors/macos.plugin/metrics.csv b/collectors/macos.plugin/metrics.csv deleted file mode 100644 index 4fee1706..00000000 --- a/collectors/macos.plugin/metrics.csv +++ /dev/null @@ -1,51 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -system.cpu,,"user, nice, system, idle",percentage,"Total CPU utilization",stacked,,macos.plugin,mach_smi -system.ram,,"active, wired, throttled, compressor, inactive, purgeable, speculative, free",MiB,"System RAM",stacked,,macos.plugin,mach_smi -system.swapio,,"io, out",KiB/s,"Swap I/O",area,,macos.plugin,mach_smi -mem.pgfaults,,"memory, cow, pagein, pageout, compress, decompress, zero_fill, reactivate, purge",faults/s,"Memory Page Faults",line,,macos.plugin,mach_smi -system.load,,"load1, load5, load15",load,"System Load Average",line,,macos.plugin,sysctl -system.swap,,"free, used",MiB,"System Swap",stacked,,macos.plugin,sysctl -system.ipv4,,"received, sent",kilobits/s,"IPv4 Bandwidth",area,,macos.plugin,sysctl -ipv4.tcppackets,,"received, sent",packets/s,"IPv4 TCP Packets",line,,macos.plugin,sysctl -ipv4.tcperrors,,"InErrs, InCsumErrors, RetransSegs",packets/s,"IPv4 TCP Errors",line,,macos.plugin,sysctl -ipv4.tcphandshake,,"EstabResets, ActiveOpens, PassiveOpens, AttemptFails",events/s,"IPv4 TCP Handshake Issues",line,,macos.plugin,sysctl -ipv4.tcpconnaborts,,"baddata, userclosed, nomemory, timeout",connections/s,"TCP Connection Aborts",line,,macos.plugin,sysctl -ipv4.tcpofo,,inqueue,packets/s,"TCP Out-Of-Order Queue",line,,macos.plugin,sysctl -ipv4.tcpsyncookies,,"received, sent, failed",packets/s,"TCP SYN Cookies",line,,macos.plugin,sysctl -ipv4.ecnpkts,,"CEP, NoECTP",packets/s,"IPv4 ECN Statistics",line,,macos.plugin,sysctl -ipv4.udppackets,,"received, sent",packets/s,"IPv4 UDP Packets",line,,macos.plugin,sysctl -ipv4.udperrors,,"RcvbufErrors, InErrors, NoPorts, InCsumErrors, IgnoredMulti",events/s,"IPv4 UDP Errors",line,,macos.plugin,sysctl -ipv4.icmp,,"received, sent",packets/s,"IPv4 ICMP Packets",line,,macos.plugin,sysctl -ipv4.icmp_errors,,"InErrors, OutErrors, InCsumErrors",packets/s,"IPv4 ICMP Errors",line,,macos.plugin,sysctl -ipv4.icmpmsg,,"InEchoReps, OutEchoReps, InEchos, OutEchos",packets/s,"IPv4 ICMP Messages",line,,macos.plugin,sysctl -ipv4.packets,,"received, sent, forwarded, delivered",packets/s,"IPv4 Packets",line,,macos.plugin,sysctl -ipv4.fragsout,,"ok, failed, created",packets/s,"IPv4 Fragments Sent",line,,macos.plugin,sysctl -ipv4.fragsin,,"ok, failed, all",packets/s,"IPv4 Fragments Reassembly",line,,macos.plugin,sysctl -ipv4.errors,,"InDiscards, OutDiscards, InHdrErrors, OutNoRoutes, InAddrErrors, InUnknownProtos",packets/s,"IPv4 Errors",line,,macos.plugin,sysctl -ipv6.packets,,"received, sent, forwarded, delivers",packets/s,"IPv6 Packets",line,,macos.plugin,sysctl -ipv6.fragsout,,"ok, failed, all",packets/s,"IPv6 Fragments Sent",line,,macos.plugin,sysctl -ipv6.fragsin,,"ok, failed, timeout, all",packets/s,"IPv6 Fragments Reassembly",line,,macos.plugin,sysctl -ipv6.errors,,"InDiscards, OutDiscards, InHdrErrors, InAddrErrors, InTruncatedPkts, InNoRoutes, OutNoRoutes",packets/s,"IPv6 Errors",line,,macos.plugin,sysctl -ipv6.icmp,,"received, sent",messages/s,"IPv6 ICMP Messages",line,,macos.plugin,sysctl -ipv6.icmpredir,,"received, sent",redirects/s,"IPv6 ICMP Redirects",line,,macos.plugin,sysctl -ipv6.icmperrors,,"InErrors, OutErrors, InCsumErrors, InDestUnreachs, InPktTooBigs, InTimeExcds, InParmProblems, OutDestUnreachs, OutTimeExcds, OutParmProblems",errors/s,"IPv6 ICMP Errors",line,,macos.plugin,sysctl -ipv6.icmpechos,,"InEchos, OutEchos, InEchoReplies, OutEchoReplies",messages/s,"IPv6 ICMP Echo",line,,macos.plugin,sysctl -ipv6.icmprouter,,"InSolicits, OutSolicits, InAdvertisements, OutAdvertisements",messages/s,"IPv6 Router Messages",line,,macos.plugin,sysctl -ipv6.icmpneighbor,,"InSolicits, OutSolicits, InAdvertisements, OutAdvertisements",messages/s,"IPv6 Neighbor Messages",line,,macos.plugin,sysctl -ipv6.icmptypes,,"InType1, InType128, InType129, InType136, OutType1, OutType128, OutType129, OutType133, OutType135, OutType143",messages/s,"IPv6 ICMP Types",line,,macos.plugin,sysctl -system.uptime,,uptime,seconds,"System Uptime",line,,macos.plugin,sysctl -disk.io,disk,"read, writes",KiB/s,"Disk I/O Bandwidth",area,,macos.plugin,iokit -disk.ops,disk,"read, writes",operations/s,"Disk Completed I/O Operations",line,,macos.plugin,iokit -disk.util,disk,utilization,% of time working,"Disk Utilization Time",area,,macos.plugin,iokit -disk.iotime,disk,"reads, writes",milliseconds/s,"Disk Total I/O Time",line,,macos.plugin,iokit -disk.await,disk,"reads, writes",milliseconds/operation,"Average Completed I/O Operation Time",line,,macos.plugin,iokit -disk.avgsz,disk,"reads, writes",KiB/operation,"Average Completed I/O Operation Bandwidth",line,,macos.plugin,iokit -disk.svctm,disk,svctm,milliseconds/operation,"Average Service Time",line,,macos.plugin,iokit -system.io,,"in, out",KiB/s,"Disk I/O",area,,macos.plugin,iokit -disk.space,mount point,"avail, used, reserved_for_root",GiB,"Disk Space Usage for {mounted dir} [{mounted filesystem}]",stacked,,macos.plugin,iokit -disk.inodes,mount point,"avail, used, reserved_for_root",inodes,"Disk Files (inodes) Usage for {mounted dir} [{mounted filesystem}]",stacked,,macos.plugin,iokit -net.net,network device,"received, sent",kilobits/s,"Bandwidth",area,,macos.plugin,iokit -net.packets,network device,"received, sent, multicast_received, multicast_sent",packets/s,"Packets",line,,macos.plugin,iokit -net.errors,network device,"inbound, outbound",errors/s,"Interface Errors",line,,macos.plugin,iokit -net.drops,network device,inbound,drops/s,"Interface Drops",line,,macos.plugin,iokit -net.events,network device,"frames, collisions, carrier",events/s,"Network Interface Events",line,,macos.plugin,iokit diff --git a/collectors/metadata/schemas/multi-module.json b/collectors/metadata/schemas/multi-module.json deleted file mode 100644 index 6c332b81..00000000 --- a/collectors/metadata/schemas/multi-module.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "plugin_name": { - "type": "string" - }, - "modules": { - "type": "array", - "description": "A list of single module templates", - "items": { - "$ref": "./single-module.json" - } - } - } -}
\ No newline at end of file diff --git a/collectors/metadata/schemas/single-module.json b/collectors/metadata/schemas/single-module.json deleted file mode 100644 index 75052b07..00000000 --- a/collectors/metadata/schemas/single-module.json +++ /dev/null @@ -1,662 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "title": "Netdata collector single module meta", - "properties": { - "meta": { - "type": "object", - "properties": { - "plugin_name": { - "type": "string", - "description": "Plugin name (e.g. apps.plugin, proc.plugin, go.d.plugin). It must match the name of the executable file in the plugins.d directory." - }, - "module_name": { - "type": "string", - "description": "Module name (e.g. apache, /proc/stat, httpcheck). It usually has the same name as the module configuration file (external plugin) or the section name in netdata.conf (internal plugin)." - }, - "monitored_instance": { - "type": "object", - "description": "Information about the monitored instance (metrics source).", - "properties": { - "name": { - "description": "Metrics source name (e.g. VerneMQ, Network interfaces, Files and directories). Use official spelling for applications.", - "type": "string" - }, - "link": { - "description": "Link to the monitored instance official website if any.", - "type": "string" - }, - "categories": { - "type": "array", - "description": "Category IDs that this integration falls into. IDs can be found in integrations/categories.yaml", - "items": { - "type": "string", - "description": "String defining integration category" - } - }, - "icon_filename": { - "type": "string", - "description": "The filename of the integration's icon, as sourced from https://github.com/netdata/website/tree/master/themes/tailwind/static/img." - } - }, - "required": [ - "name", - "link", - "categories", - "icon_filename" - ] - }, - "keywords": { - "type": "array", - "description": "An array of terms related to the integration.", - "items": { - "type": "string" - } - }, - "related_resources": { - "type": "object", - "description": "Available related resources for the monitored instance.", - "properties": { - "integrations": { - "type": "object", - "description": "All collectors that provide additional metrics for the monitored instance. This may include generic collectors, e.g. 'httpcheck' for web servers to monitor specific endpoints.", - "properties": { - "list": { - "type": "array", - "description": "List of related integrations.", - "items": { - "type": "object", - "properties": { - "plugin_name": { - "type": "string", - "description": "Related integration plugin name." - }, - "module_name": { - "type": "string", - "description": "Related integration module name." - } - }, - "required": [ - "plugin_name", - "module_name" - ] - } - } - }, - "required": [ - "list" - ] - } - }, - "required": [ - "integrations" - ] - }, - "info_provided_to_referring_integrations": { - "type": "object", - "description": "Information that this collector can provide about itself when other integrations mention it. This text will not be appear on this collector's page.", - "properties": { - "description": { - "type": "string", - "description": "TODO" - } - }, - "required": [ - "description" - ] - }, - "most_popular": { - "type": "boolean", - "description": "Whether or not the integration is to be flagged as most-popular, meaning it will show up at the top of the menu." - } - }, - "required": [ - "plugin_name", - "module_name", - "monitored_instance", - "keywords", - "related_resources", - "info_provided_to_referring_integrations", - "most_popular" - ] - }, - "overview": { - "type": "object", - "properties": { - "data_collection": { - "type": "object", - "description": "An overview of the collected metrics and a detailed description of the data collection method.", - "properties": { - "metrics_description": { - "type": "string", - "description": "Brief description of what metrics are collected. A suggested approach here is to talk about the areas covered (e.g. health, performance, errors) and the metric scopes." - }, - "method_description": { - "type": "string", - "description": "Description of how metrics are collected (e.g. HTTP requests, establish a TCP connection and send a command, a specific binary execution). A suggested approach here is to provide a detailed description of how the collector gathers metrics: how many connections are established, exact requests/commands executed, exact endpoints used." - } - }, - "required": [ - "metrics_description", - "method_description" - ] - }, - "supported_platforms": { - "type": "object", - "description": "Supported OS/platforms. By default, all platforms supported by Netdata are considered supported. See https://learn.netdata.cloud/docs/installing/platform-support-policy#currently-supported-platforms.", - "properties": { - "include": { - "type": "array", - "description": "Only supported OS/platforms. Platforms supported by Netdata will be ignored, only those listed are considered supported.", - "items": { - "type": "string", - "minLength": 2 - } - }, - "exclude": { - "type": "array", - "description": "Unsupported OS/platforms. The result set is all platforms supported by Netdata except for those excluded.", - "items": { - "type": "string", - "minLength": 2 - } - } - }, - "required": [ - "include", - "exclude" - ] - }, - "multi-instance": { - "type": "boolean", - "description": "Whether this collector supports collecting metrics from multiple (for example, local and remote) instances." - }, - "additional_permissions": { - "type": "object", - "description": "Information about additional permissions other than those required by the Netdata process (e.g. setuid, specific Linux capabilities).", - "properties": { - "description": { - "type": "string" - } - }, - "required": [ - "description" - ] - }, - "default_behavior": { - "type": "object", - "description": "Descriptions of how the data collector works with the default configuration.", - "properties": { - "auto_detection": { - "type": "object", - "description": "Information about detecting (discovering) monitored instances with default configuration. Example: tries to connect to Apache running on localhost on ports 80, 443 and 8080.", - "properties": { - "description": { - "type": "string" - } - }, - "required": [ - "description" - ] - }, - "limits": { - "type": "object", - "description": "Information about limiting data collection, taking into account the default values of any configuration settings that restrict data collection (including filtering metrics).", - "properties": { - "description": { - "type": "string" - } - }, - "required": [ - "description" - ] - }, - "performance_impact": { - "type": "object", - "description": "Information about the known impact on the performance of the monitored application or system.", - "properties": { - "description": { - "type": "string" - } - }, - "required": [ - "description" - ] - } - }, - "required": [ - "auto_detection", - "limits", - "performance_impact" - ] - } - }, - "required": [ - "data_collection", - "supported_platforms", - "multi-instance", - "additional_permissions", - "default_behavior" - ] - }, - "setup": { - "type": "object", - "description": "Complete information that is needed to enable and configure the data collector.", - "properties": { - "prerequisites": { - "type": "object", - "description": "Actions the user must take to make the collector work, if any. It includes both configuring Netdata (e.g. if the collector is disabled by default) and configuring the monitored instance (e.g. enabling Apache mod_stats).", - "properties": { - "list": { - "type": "array", - "description": "List of prerequisites.", - "items": { - "type": "object", - "properties": { - "title": { - "type": "string", - "description": "Title should reflect the description, be short and in the form of a command (e.g. Create netdata user, Enable build-in web server)." - }, - "description": { - "type": "string", - "description": "Steps to follow to meet this prerequisite." - } - }, - "required": [ - "title", - "description" - ] - } - } - }, - "required": [ - "list" - ] - }, - "configuration": { - "description": "Information on how to configure the collector and available configuration options.", - "type": "object", - "properties": { - "file": { - "type": "object", - "description": "Configuration file.", - "properties": { - "name": { - "type": "string", - "description": "Configuration file name." - }, - "section_name": { - "type": "string", - "description": "The name of the section in the configuration file. Only for data collectors whose configuration is in netdata.conf (e.g. proc.plugin modules)." - } - }, - "required": [ - "name" - ] - }, - "options": { - "type": "object", - "description": "All information about the available configuration options.", - "properties": { - "description": { - "type": "string", - "description": "Optional common information about options." - }, - "folding": { - "$ref": "#/$defs/_folding" - }, - "list": { - "type": "array", - "description": "List of configuration options.", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "Option name." - }, - "description": { - "type": "string", - "description": "Option description. Must be short. Use 'detailed_description' for a long description." - }, - "detailed_description": { - "type": "string", - "description": "Option detailed description. Use it to describe in details complex options." - }, - "default_value": { - "type": [ - "string", - "number", - "boolean" - ], - "description": "Default value. Leave empty if none." - }, - "required": { - "type": "boolean", - "description": "Indicates whether this option is required or not. The option is required if the collector does not work, if it is not set." - } - }, - "required": [ - "name", - "description", - "default_value", - "required" - ] - } - } - }, - "required": [ - "description", - "folding", - "list" - ] - }, - "examples": { - "type": "object", - "description": "Configuration examples. The more examples the better!", - "properties": { - "folding": { - "$ref": "#/$defs/_folding" - }, - "list": { - "type": "array", - "description": "List of configuration examples.", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "Example name." - }, - "folding": { - "$ref": "#/$defs/_folding_relaxed" - }, - "description": { - "type": "string", - "description": "Example description." - }, - "config": { - "type": "string", - "description": "Example configuration." - } - }, - "required": [ - "name", - "description", - "config" - ] - } - } - }, - "required": [ - "folding", - "list" - ] - } - }, - "required": [ - "file", - "options", - "examples" - ] - } - }, - "required": [ - "prerequisites", - "configuration" - ] - }, - "troubleshooting": { - "type": "object", - "description": "Information needed to troubleshoot issues with this collector.", - "properties": { - "problems": { - "type": "object", - "description": "Common problems that users face again and again... and their solutions.", - "properties": { - "list": { - "type": "array", - "description": "List of common problems.", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "Problem name." - }, - "description": { - "type": "string", - "description": "Explanation of the problem and its solution." - } - } - }, - "required": [ - "name", - "description" - ] - } - }, - "required": [ - "list" - ] - } - }, - "required": [ - "problems" - ] - }, - "alerts": { - "type": "array", - "description": "The list of configured alerts shipped with Netdata for this collector.", - "items": { - "type": "object", - "description": "Information about the configured alert.", - "properties": { - "name": { - "type": "string", - "description": "Alert's 'alarm' or 'template' value (https://learn.netdata.cloud/docs/alerting/health-configuration-reference#alarm-line-alarm-or-template)." - }, - "link": { - "type": "string", - "description": "Link to github .conf file that this alert originates from" - }, - "metric": { - "type": "string", - "description": "Alert's 'on' value (https://learn.netdata.cloud/docs/alerting/health-configuration-reference#alarm-line-on)." - }, - "info": { - "type": "string", - "description": "Alert's 'info' value (https://learn.netdata.cloud/docs/alerting/health-configuration-reference#alarm-line-info)." - }, - "os": { - "type": "string", - "description": "Alert's 'os' value (https://learn.netdata.cloud/docs/alerting/health-configuration-reference#alarm-line-os)." - } - }, - "required": [ - "name", - "link", - "metric", - "info" - ] - } - }, - "metrics": { - "type": "object", - "description": "Collected metrics grouped by scope. The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.", - "properties": { - "folding": { - "$ref": "#/$defs/_folding" - }, - "description": { - "type": "string", - "description": "General description of collected metrics/scopes." - }, - "availability": { - "type": "array", - "description": "Metrics collection availability conditions. Some metrics are only available when certain conditions are met. For example, Apache exposes additional metrics when Extended status is configured, Consul exposes different set of metrics depends on its mode. This field should list the available conditions that will later be matched for each of the metrics.", - "items": { - "type": "string", - "description": "Availability condition name." - } - }, - "scopes": { - "type": "array", - "description": "List of scopes and their metrics.", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "Scope name." - }, - "description": { - "type": "string", - "description": "Scope description." - }, - "labels": { - "type": "array", - "description": "Label set of the scope.", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "Label name." - }, - "description": { - "type": "string", - "description": "Label description." - } - }, - "required": [ - "name", - "description" - ] - } - }, - "metrics": { - "type": "array", - "description": "List of collected metrics (chart contexts) in the scope.", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "Metric name (chart context)." - }, - "availability": { - "type": "array", - "description": "Metric collection availability conditions. An empty list means that it is available for all conditions defined in 'metrics.availability'.", - "items": { - "type": "string", - "description": "Availability condition name." - } - }, - "description": { - "type": "string", - "description": "Metric description (chart title)." - }, - "unit": { - "type": "string", - "description": "Metric description (chart unit)." - }, - "chart_type": { - "type": "string", - "description": "Metric description (chart type).", - "enum": [ - "line", - "area", - "stacked" - ] - }, - "dimensions": { - "type": "array", - "description": "", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "Dimension name." - } - }, - "required": [ - "name" - ] - } - } - }, - "required": [ - "name", - "description", - "unit", - "chart_type", - "dimensions" - ] - } - } - }, - "required": [ - "name", - "description", - "labels", - "metrics" - ] - } - } - }, - "required": [ - "folding", - "description", - "availability", - "scopes" - ] - } - }, - "required": [ - "meta", - "overview", - "setup", - "troubleshooting", - "alerts", - "metrics" - ], - "$defs": { - "_folding": { - "type": "object", - "description": "Content folding settings.", - "properties": { - "title": { - "description": "Folded content summary title.", - "type": "string" - }, - "enabled": { - "description": "Determines if this content should be folded.", - "type": "boolean" - } - }, - "required": [ - "title", - "enabled" - ] - }, - "_folding_relaxed": { - "type": "object", - "description": "Content folding settings with optional title.", - "properties": { - "title": { - "description": "Folded content summary title.", - "type": "string" - }, - "enabled": { - "description": "Determines if this content should be folded.", - "type": "boolean" - } - }, - "required": [ - "enabled" - ] - } - } -} diff --git a/collectors/metadata/single-module-template.yaml b/collectors/metadata/single-module-template.yaml deleted file mode 100644 index 7f040350..00000000 --- a/collectors/metadata/single-module-template.yaml +++ /dev/null @@ -1,97 +0,0 @@ -meta: - plugin_name: "" - module_name: "" - alternative_monitored_instances: [] - monitored_instance: - name: "" - link: "" - categories: [] - icon_filename: "" - related_resources: - integrations: - list: - - plugin_name: "" - module_name: "" - info_provided_to_referring_integrations: - description: "" - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: "" - method_description: "" - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "" - limits: - description: "" - performance_impact: - description: "" -setup: - prerequisites: - list: - - title: "" - description: "" - configuration: - file: - name: "" - description: "" - options: - description: "" - folding: - title: "" - enabled: true - list: - - name: "" - default_value: "" - description: "" - required: false - examples: - folding: - enabled: true - title: "" - list: - - name: "" - folding: - enabled: false - description: "" - config: "" -troubleshooting: - problems: - list: - - name: "" - description: "" -alerts: - - info: "" - link: "" - metric: "" - name: "" - os: "" -metrics: - folding: - enabled: false - title: "" - description: "" - availability: - - "" - scopes: - - name: "" - description: "" - labels: - - name: "" - description: "" - metrics: - - name: "" - availability: - - "" - description: "" - unit: "" - chart_type: "" - dimensions: - - name: "" diff --git a/collectors/nfacct.plugin/metadata.yaml b/collectors/nfacct.plugin/metadata.yaml index 2dbd31ec..943471a3 100644 --- a/collectors/nfacct.plugin/metadata.yaml +++ b/collectors/nfacct.plugin/metadata.yaml @@ -1,119 +1,133 @@ -meta: - plugin_name: nfacct.plugin - module_name: nfacct.plugin - monitored_instance: - name: Netfilter - link: '' - categories: - - data-collection.networking-stack-and-network-interfaces - icon_filename: 'netfilter.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Netfilter metrics for optimal packet filtering and manipulation. Keep tabs on packet counts, dropped packets, and error rates to secure network operations.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: nfacct.plugin +modules: + - meta: + plugin_name: nfacct.plugin + module_name: nfacct.plugin + monitored_instance: + name: Netfilter + link: 'https://www.netfilter.org/' + categories: + - data-collection.linux-systems.firewall-metrics + icon_filename: 'netfilter.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: 'Monitor Netfilter metrics for optimal packet filtering and manipulation. Keep tabs on packet counts, dropped packets, and error rates to secure network operations.' + method_description: 'Netdata uses libmnl (https://www.netfilter.org/projects/libmnl/index.html) to collect information.' + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: 'This plugin needs setuid.' + default_behavior: + auto_detection: + description: 'This plugin uses socket to connect with netfilter to collect data' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: + - title: Install required packages + description: | + Install `libmnl-dev` and `libnetfilter-acct-dev` using the package manager of your system. + configuration: + file: + name: 'netdata.conf' + section_name: '[plugin:nfacct]' + description: 'This is netdata main configuration file' + options: + description: '' + folding: + title: 'Config options' + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 1 + required: false + - name: command options + description: Additinal parameters for collector + default_value: "" + required: false + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: netfilter.netlink_new - description: Connection Tracker New Connections - unit: "connections/s" - chart_type: line - dimensions: - - name: new - - name: ignore - - name: invalid - - name: netfilter.netlink_changes - description: Connection Tracker Changes - unit: "changes/s" - chart_type: line - dimensions: - - name: insert - - name: delete - - name: delete_list - - name: netfilter.netlink_search - description: Connection Tracker Searches - unit: "searches/s" - chart_type: line - dimensions: - - name: searched - - name: search_restart - - name: found - - name: netfilter.netlink_errors - description: Connection Tracker Errors - unit: "events/s" - chart_type: line - dimensions: - - name: icmp_error - - name: insert_failed - - name: drop - - name: early_drop - - name: netfilter.netlink_expect - description: Connection Tracker Expectations - unit: "expectations/s" - chart_type: line - dimensions: - - name: created - - name: deleted - - name: new - - name: netfilter.nfacct_packets - description: Netfilter Accounting Packets - unit: "packets/s" - chart_type: line - dimensions: - - name: a dimension per nfacct object - - name: netfilter.nfacct_bytes - description: Netfilter Accounting Bandwidth - unit: "kilobytes/s" - chart_type: line - dimensions: - - name: a dimension per nfacct object + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: netfilter.netlink_new + description: Connection Tracker New Connections + unit: "connections/s" + chart_type: line + dimensions: + - name: new + - name: ignore + - name: invalid + - name: netfilter.netlink_changes + description: Connection Tracker Changes + unit: "changes/s" + chart_type: line + dimensions: + - name: insert + - name: delete + - name: delete_list + - name: netfilter.netlink_search + description: Connection Tracker Searches + unit: "searches/s" + chart_type: line + dimensions: + - name: searched + - name: search_restart + - name: found + - name: netfilter.netlink_errors + description: Connection Tracker Errors + unit: "events/s" + chart_type: line + dimensions: + - name: icmp_error + - name: insert_failed + - name: drop + - name: early_drop + - name: netfilter.netlink_expect + description: Connection Tracker Expectations + unit: "expectations/s" + chart_type: line + dimensions: + - name: created + - name: deleted + - name: new + - name: netfilter.nfacct_packets + description: Netfilter Accounting Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: a dimension per nfacct object + - name: netfilter.nfacct_bytes + description: Netfilter Accounting Bandwidth + unit: "kilobytes/s" + chart_type: line + dimensions: + - name: a dimension per nfacct object diff --git a/collectors/nfacct.plugin/metrics.csv b/collectors/nfacct.plugin/metrics.csv deleted file mode 100644 index 7bd00d3f..00000000 --- a/collectors/nfacct.plugin/metrics.csv +++ /dev/null @@ -1,8 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -netfilter.netlink_new,,"new, ignore, invalid",connections/s,"Connection Tracker New Connections",line,,nfacct.plugin, -netfilter.netlink_changes,,"insert, delete, delete_list",changes/s,"Connection Tracker Changes",line,,nfacct.plugin, -netfilter.netlink_search,,"searched, search_restart, found",searches/s,"Connection Tracker Searches",line,,nfacct.plugin, -netfilter.netlink_errors,,"icmp_error, insert_failed, drop, early_drop",events/s,"Connection Tracker Errors",line,,nfacct.plugin, -netfilter.netlink_expect,,"created, deleted, new",expectations/s,"Connection Tracker Expectations",line,,nfacct.plugin, -netfilter.nfacct_packets,,a dimension per nfacct object,packets/s,"Netfilter Accounting Packets",line,,nfacct.plugin, -netfilter.nfacct_bytes,,a dimension per nfacct object,kilobytes/s,"Netfilter Accounting Bandwidth",line,,nfacct.plugin,
\ No newline at end of file diff --git a/collectors/perf.plugin/metadata.yaml b/collectors/perf.plugin/metadata.yaml index a93970e5..d7539b50 100644 --- a/collectors/perf.plugin/metadata.yaml +++ b/collectors/perf.plugin/metadata.yaml @@ -1,183 +1,216 @@ -meta: - plugin_name: perf.plugin - module_name: perf.plugin - monitored_instance: - name: CPU performance - link: '' - categories: - - data-collection.linux-systems - icon_filename: 'bolt.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor CPU performance to ensure optimal computational operations. Monitor core usage, load averages, and thermal throttling for seamless computation tasks.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: perf.plugin +modules: + - meta: + plugin_name: perf.plugin + module_name: perf.plugin + monitored_instance: + name: CPU performance + link: "https://kernel.org/" + categories: + - data-collection.linux-systems + icon_filename: "bolt.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - linux + - cpu performance + - cpu cache + - perf.plugin + most_popular: false + overview: + data_collection: + metrics_description: "This collector monitors CPU performance metrics about cycles, instructions, migrations, cache operations and more." + method_description: "It uses syscall (2) to open a file descriptior to monitor the perf events." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "It needs setuid to use necessary syscall to collect perf events. Netada sets the permission during installation time." + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: "netdata.conf" + section_name: "[plugin:perf]" + description: "The main netdata configuration file." + options: + description: | + You can get the available options running: + + ```bash + /usr/libexec/netdata/plugins.d/perf.plugin --help + ```` + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 1 + required: false + - name: command options + description: Command options that specify charts shown by plugin. + default_value: 1 + required: true + examples: + folding: + enabled: true + title: "Config" + list: + - name: All metrics + folding: + enabled: false + description: Monitor all metrics available. + config: | + [plugin:perf] + command options = all + - name: CPU cycles + description: Monitor CPU cycles. + config: | + [plugin:perf] + command options = cycles + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: perf.cpu_cycles - description: CPU cycles - unit: "cycles/s" - chart_type: line - dimensions: - - name: cpu - - name: ref_cpu - - name: perf.instructions - description: Instructions - unit: "instructions/s" - chart_type: line - dimensions: - - name: instructions - - name: perf.instructions_per_cycle - description: Instructions per Cycle(IPC) - unit: "instructions/cycle" - chart_type: line - dimensions: - - name: ipc - - name: perf.branch_instructions - description: Branch instructions - unit: "instructions/s" - chart_type: line - dimensions: - - name: instructions - - name: misses - - name: perf.cache - description: Cache operations - unit: "operations/s" - chart_type: line - dimensions: - - name: references - - name: misses - - name: perf.bus_cycles - description: Bus cycles - unit: "cycles/s" - chart_type: line - dimensions: - - name: bus - - name: perf.stalled_cycles - description: Stalled frontend and backend cycles - unit: "cycles/s" - chart_type: line - dimensions: - - name: frontend - - name: backend - - name: perf.migrations - description: CPU migrations - unit: "migrations" - chart_type: line - dimensions: - - name: migrations - - name: perf.alignment_faults - description: Alignment faults - unit: "faults" - chart_type: line - dimensions: - - name: faults - - name: perf.emulation_faults - description: Emulation faults - unit: "faults" - chart_type: line - dimensions: - - name: faults - - name: perf.l1d_cache - description: L1D cache operations - unit: "events/s" - chart_type: line - dimensions: - - name: read_access - - name: read_misses - - name: write_access - - name: write_misses - - name: perf.l1d_cache_prefetch - description: L1D prefetch cache operations - unit: "prefetches/s" - chart_type: line - dimensions: - - name: prefetches - - name: perf.l1i_cache - description: L1I cache operations - unit: "events/s" - chart_type: line - dimensions: - - name: read_access - - name: read_misses - - name: perf.ll_cache - description: LL cache operations - unit: "events/s" - chart_type: line - dimensions: - - name: read_access - - name: read_misses - - name: write_access - - name: write_misses - - name: perf.dtlb_cache - description: DTLB cache operations - unit: "events/s" - chart_type: line - dimensions: - - name: read_access - - name: read_misses - - name: write_access - - name: write_misses - - name: perf.itlb_cache - description: ITLB cache operations - unit: "events/s" - chart_type: line - dimensions: - - name: read_access - - name: read_misses - - name: perf.pbu_cache - description: PBU cache operations - unit: "events/s" - chart_type: line - dimensions: - - name: read_access + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: perf.cpu_cycles + description: CPU cycles + unit: "cycles/s" + chart_type: line + dimensions: + - name: cpu + - name: ref_cpu + - name: perf.instructions + description: Instructions + unit: "instructions/s" + chart_type: line + dimensions: + - name: instructions + - name: perf.instructions_per_cycle + description: Instructions per Cycle(IPC) + unit: "instructions/cycle" + chart_type: line + dimensions: + - name: ipc + - name: perf.branch_instructions + description: Branch instructions + unit: "instructions/s" + chart_type: line + dimensions: + - name: instructions + - name: misses + - name: perf.cache + description: Cache operations + unit: "operations/s" + chart_type: line + dimensions: + - name: references + - name: misses + - name: perf.bus_cycles + description: Bus cycles + unit: "cycles/s" + chart_type: line + dimensions: + - name: bus + - name: perf.stalled_cycles + description: Stalled frontend and backend cycles + unit: "cycles/s" + chart_type: line + dimensions: + - name: frontend + - name: backend + - name: perf.migrations + description: CPU migrations + unit: "migrations" + chart_type: line + dimensions: + - name: migrations + - name: perf.alignment_faults + description: Alignment faults + unit: "faults" + chart_type: line + dimensions: + - name: faults + - name: perf.emulation_faults + description: Emulation faults + unit: "faults" + chart_type: line + dimensions: + - name: faults + - name: perf.l1d_cache + description: L1D cache operations + unit: "events/s" + chart_type: line + dimensions: + - name: read_access + - name: read_misses + - name: write_access + - name: write_misses + - name: perf.l1d_cache_prefetch + description: L1D prefetch cache operations + unit: "prefetches/s" + chart_type: line + dimensions: + - name: prefetches + - name: perf.l1i_cache + description: L1I cache operations + unit: "events/s" + chart_type: line + dimensions: + - name: read_access + - name: read_misses + - name: perf.ll_cache + description: LL cache operations + unit: "events/s" + chart_type: line + dimensions: + - name: read_access + - name: read_misses + - name: write_access + - name: write_misses + - name: perf.dtlb_cache + description: DTLB cache operations + unit: "events/s" + chart_type: line + dimensions: + - name: read_access + - name: read_misses + - name: write_access + - name: write_misses + - name: perf.itlb_cache + description: ITLB cache operations + unit: "events/s" + chart_type: line + dimensions: + - name: read_access + - name: read_misses + - name: perf.pbu_cache + description: PBU cache operations + unit: "events/s" + chart_type: line + dimensions: + - name: read_access diff --git a/collectors/perf.plugin/metrics.csv b/collectors/perf.plugin/metrics.csv deleted file mode 100644 index 786e0743..00000000 --- a/collectors/perf.plugin/metrics.csv +++ /dev/null @@ -1,18 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -perf.cpu_cycles,,"cpu, ref_cpu",cycles/s,"CPU cycles",line,,perf.plugin, -perf.instructions,,instructions,instructions/s,"Instructions",line,,perf.plugin, -perf.instructions_per_cycle,,ipc,instructions/cycle,"Instructions per Cycle(IPC)",line,,perf.plugin, -perf.branch_instructions,,"instructions, misses",instructions/s,"Branch instructions",line,,perf.plugin, -perf.cache,,"references, misses",operations/s,"Cache operations",line,,perf.plugin, -perf.bus_cycles,,bus,cycles/s,"Bus cycles",line,,perf.plugin, -perf.stalled_cycles,,"frontend, backend",cycles/s,"Stalled frontend and backend cycles",line,,perf.plugin, -perf.migrations,,migrations,migrations,"CPU migrations",line,,perf.plugin, -perf.alignment_faults,,faults,faults,"Alignment faults",line,,perf.plugin, -perf.emulation_faults,,faults,faults,"Emulation faults",line,,perf.plugin, -perf.l1d_cache,,"read_access, read_misses, write_access, write_misses",events/s,"L1D cache operations",line,,perf.plugin, -perf.l1d_cache_prefetch,,prefetches,prefetches/s,"L1D prefetch cache operations",line,,perf.plugin, -perf.l1i_cache,,"read_access, read_misses",events/s,"L1I cache operations",line,,perf.plugin, -perf.ll_cache,,"read_access, read_misses, write_access, write_misses",events/s,"LL cache operations",line,,perf.plugin, -perf.dtlb_cache,,"read_access, read_misses, write_access, write_misses",events/s,"DTLB cache operations",line,,perf.plugin, -perf.itlb_cache,,"read_access, read_misses",events/s,"ITLB cache operations",line,,perf.plugin, -perf.pbu_cache,,read_access,events/s,"PBU cache operations",line,,perf.plugin,
\ No newline at end of file diff --git a/collectors/plugins.d/gperf-config.txt b/collectors/plugins.d/gperf-config.txt index 43be129e..b8140e66 100644 --- a/collectors/plugins.d/gperf-config.txt +++ b/collectors/plugins.d/gperf-config.txt @@ -12,41 +12,44 @@ PARSER_KEYWORD; # # Plugins Only Keywords # -FLUSH, 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1 -DISABLE, 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2 -EXIT, 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3 -HOST, 71, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 4 -HOST_DEFINE, 72, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 5 -HOST_DEFINE_END, 73, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 6 -HOST_LABEL, 74, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 7 +FLUSH, 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1 +DISABLE, 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2 +EXIT, 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3 +HOST, 71, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 4 +HOST_DEFINE, 72, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 5 +HOST_DEFINE_END, 73, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 6 +HOST_LABEL, 74, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 7 # # Common keywords # -BEGIN, 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8 -CHART, 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 9 -CLABEL, 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 10 -CLABEL_COMMIT, 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 11 -DIMENSION, 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 12 -END, 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13 -FUNCTION, 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 14 -FUNCTION_RESULT_BEGIN, 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15 -LABEL, 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 16 -OVERWRITE, 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 17 -SET, 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18 -VARIABLE, 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 19 +BEGIN, 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8 +CHART, 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 9 +CLABEL, 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 10 +CLABEL_COMMIT, 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 11 +DIMENSION, 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 12 +END, 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13 +FUNCTION, 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 14 +FUNCTION_RESULT_BEGIN, 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15 +LABEL, 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 16 +OVERWRITE, 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 17 +SET, 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18 +VARIABLE, 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 19 +DYNCFG_ENABLE, 101, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20 +DYNCFG_REGISTER_MODULE, 102, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21 +REPORT_JOB_STATUS, 110, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22 # # Streaming only keywords # -CLAIMED_ID, 61, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20 -BEGIN2, 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21 -SET2, 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22 -END2, 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23 +CLAIMED_ID, 61, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23 +BEGIN2, 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24 +SET2, 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25 +END2, 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 26 # # Streaming Replication keywords # -CHART_DEFINITION_END, 33, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24 -RBEGIN, 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25 -RDSTATE, 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 26 -REND, 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27 -RSET, 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28 -RSSTATE, 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29 +CHART_DEFINITION_END, 33, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27 +RBEGIN, 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28 +RDSTATE, 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29 +REND, 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 30 +RSET, 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 31 +RSSTATE, 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 32 diff --git a/collectors/plugins.d/gperf-hashtable.h b/collectors/plugins.d/gperf-hashtable.h index b9e58975..e7d20126 100644 --- a/collectors/plugins.d/gperf-hashtable.h +++ b/collectors/plugins.d/gperf-hashtable.h @@ -30,12 +30,12 @@ #endif -#define GPERF_PARSER_TOTAL_KEYWORDS 29 +#define GPERF_PARSER_TOTAL_KEYWORDS 32 #define GPERF_PARSER_MIN_WORD_LENGTH 3 -#define GPERF_PARSER_MAX_WORD_LENGTH 21 -#define GPERF_PARSER_MIN_HASH_VALUE 4 -#define GPERF_PARSER_MAX_HASH_VALUE 36 -/* maximum key range = 33, duplicates = 0 */ +#define GPERF_PARSER_MAX_WORD_LENGTH 22 +#define GPERF_PARSER_MIN_HASH_VALUE 3 +#define GPERF_PARSER_MAX_HASH_VALUE 41 +/* maximum key range = 39, duplicates = 0 */ #ifdef __GNUC__ __inline @@ -49,99 +49,105 @@ gperf_keyword_hash_function (register const char *str, register size_t len) { static unsigned char asso_values[] = { - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 15, 10, 1, 1, 9, - 4, 37, 0, 20, 37, 37, 9, 37, 14, 0, - 37, 37, 1, 0, 37, 7, 13, 37, 18, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37 + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 16, 7, 2, 11, 0, + 8, 42, 3, 9, 42, 42, 9, 42, 0, 2, + 42, 42, 1, 3, 42, 7, 17, 42, 27, 2, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42 }; return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]]; } static PARSER_KEYWORD gperf_keywords[] = { - {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, + {(char*)0}, {(char*)0}, {(char*)0}, +#line 30 "gperf-config.txt" + {"END", 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13}, +#line 46 "gperf-config.txt" + {"END2", 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 26}, +#line 53 "gperf-config.txt" + {"REND", 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 30}, +#line 35 "gperf-config.txt" + {"SET", 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18}, +#line 45 "gperf-config.txt" + {"SET2", 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25}, +#line 54 "gperf-config.txt" + {"RSET", 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 31}, #line 18 "gperf-config.txt" - {"HOST", 71, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 4}, -#line 51 "gperf-config.txt" - {"RSET", 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28}, + {"HOST", 71, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 4}, #line 26 "gperf-config.txt" - {"CHART", 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 9}, - {(char*)0}, -#line 52 "gperf-config.txt" - {"RSSTATE", 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29}, -#line 49 "gperf-config.txt" - {"RDSTATE", 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 26}, + {"CHART", 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 9}, +#line 55 "gperf-config.txt" + {"RSSTATE", 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 32}, +#line 25 "gperf-config.txt" + {"BEGIN", 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8}, +#line 44 "gperf-config.txt" + {"BEGIN2", 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24}, +#line 51 "gperf-config.txt" + {"RBEGIN", 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28}, #line 21 "gperf-config.txt" - {"HOST_LABEL", 74, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 7}, + {"HOST_LABEL", 74, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 7}, #line 19 "gperf-config.txt" - {"HOST_DEFINE", 72, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 5}, -#line 35 "gperf-config.txt" - {"SET", 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18}, -#line 42 "gperf-config.txt" - {"SET2", 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22}, -#line 50 "gperf-config.txt" - {"REND", 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27}, -#line 20 "gperf-config.txt" - {"HOST_DEFINE_END", 73, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 6}, + {"HOST_DEFINE", 72, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 5}, #line 27 "gperf-config.txt" - {"CLABEL", 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 10}, -#line 48 "gperf-config.txt" - {"RBEGIN", 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25}, + {"CLABEL", 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 10}, +#line 39 "gperf-config.txt" + {"REPORT_JOB_STATUS", 110, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22}, +#line 52 "gperf-config.txt" + {"RDSTATE", 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29}, +#line 20 "gperf-config.txt" + {"HOST_DEFINE_END", 73, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 6}, +#line 43 "gperf-config.txt" + {"CLAIMED_ID", 61, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23}, #line 15 "gperf-config.txt" - {"FLUSH", 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1}, + {"FLUSH", 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1}, #line 31 "gperf-config.txt" - {"FUNCTION", 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 14}, -#line 40 "gperf-config.txt" - {"CLAIMED_ID", 61, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20}, -#line 47 "gperf-config.txt" - {"CHART_DEFINITION_END", 33, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24}, -#line 34 "gperf-config.txt" - {"OVERWRITE", 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 17}, + {"FUNCTION", 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 14}, #line 28 "gperf-config.txt" - {"CLABEL_COMMIT", 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 11}, -#line 25 "gperf-config.txt" - {"BEGIN", 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8}, -#line 41 "gperf-config.txt" - {"BEGIN2", 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21}, -#line 30 "gperf-config.txt" - {"END", 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13}, -#line 43 "gperf-config.txt" - {"END2", 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23}, + {"CLABEL_COMMIT", 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 11}, +#line 50 "gperf-config.txt" + {"CHART_DEFINITION_END", 33, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27}, +#line 37 "gperf-config.txt" + {"DYNCFG_ENABLE", 101, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20}, #line 16 "gperf-config.txt" - {"DISABLE", 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2}, -#line 33 "gperf-config.txt" - {"LABEL", 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 16}, + {"DISABLE", 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2}, +#line 34 "gperf-config.txt" + {"OVERWRITE", 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 17}, #line 29 "gperf-config.txt" - {"DIMENSION", 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 12}, + {"DIMENSION", 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 12}, +#line 33 "gperf-config.txt" + {"LABEL", 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 16}, #line 17 "gperf-config.txt" - {"EXIT", 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3}, -#line 32 "gperf-config.txt" - {"FUNCTION_RESULT_BEGIN", 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15}, + {"EXIT", 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3}, {(char*)0}, {(char*)0}, {(char*)0}, +#line 38 "gperf-config.txt" + {"DYNCFG_REGISTER_MODULE", 102, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21}, +#line 32 "gperf-config.txt" + {"FUNCTION_RESULT_BEGIN", 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15}, + {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, #line 36 "gperf-config.txt" - {"VARIABLE", 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 19} + {"VARIABLE", 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 19} }; PARSER_KEYWORD * diff --git a/collectors/plugins.d/plugins_d.c b/collectors/plugins.d/plugins_d.c index 6a235b4e..08c26a19 100644 --- a/collectors/plugins.d/plugins_d.c +++ b/collectors/plugins.d/plugins_d.c @@ -217,6 +217,11 @@ void *pluginsd_main(void *ptr) // disable some plugins by default config_get_boolean(CONFIG_SECTION_PLUGINS, "slabinfo", CONFIG_BOOLEAN_NO); + // it crashes (both threads) on Alpine after we made it multi-threaded + // works with "--device /dev/ipmi0", but this is not default + // see https://github.com/netdata/netdata/pull/15564 for details + if (getenv("NETDATA_LISTENER_PORT")) + config_get_boolean(CONFIG_SECTION_PLUGINS, "freeipmi", CONFIG_BOOLEAN_NO); // store the errno for each plugins directory // so that we don't log broken directories on each loop diff --git a/collectors/plugins.d/plugins_d.h b/collectors/plugins.d/plugins_d.h index fe43a19f..4988b507 100644 --- a/collectors/plugins.d/plugins_d.h +++ b/collectors/plugins.d/plugins_d.h @@ -43,6 +43,11 @@ #define PLUGINSD_KEYWORD_HOST_LABEL "HOST_LABEL" #define PLUGINSD_KEYWORD_HOST "HOST" +#define PLUGINSD_KEYWORD_DYNCFG_ENABLE "DYNCFG_ENABLE" +#define PLUGINSD_KEYWORD_DYNCFG_REGISTER_MODULE "DYNCFG_REGISTER_MODULE" + +#define PLUGINSD_KEYWORD_REPORT_JOB_STATUS "REPORT_JOB_STATUS" + #define PLUGINSD_KEYWORD_EXIT "EXIT" #define PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT 10 // seconds @@ -80,6 +85,9 @@ struct plugind { time_t started_t; + const DICTIONARY_ITEM *cfg_dict_item; + struct configurable_plugin *configuration; + struct plugind *prev; struct plugind *next; }; @@ -91,8 +99,6 @@ void pluginsd_process_thread_cleanup(void *ptr); size_t pluginsd_initialize_plugin_directories(); - - #define pluginsd_function_result_begin_to_buffer(wb, transaction, code, content_type, expires) \ buffer_sprintf(wb \ , PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN " \"%s\" %d \"%s\" %ld\n" \ @@ -117,4 +123,13 @@ size_t pluginsd_initialize_plugin_directories(); #define pluginsd_function_result_end_to_stdout() \ fprintf(stdout, "\n" PLUGINSD_KEYWORD_FUNCTION_RESULT_END "\n") +static inline void pluginsd_function_json_error(const char *transaction, int code, const char *msg) { + char buffer[PLUGINSD_LINE_MAX + 1]; + json_escape_string(buffer, msg, PLUGINSD_LINE_MAX); + + pluginsd_function_result_begin_to_stdout(transaction, code, "application/json", now_realtime_sec()); + fprintf(stdout, "{\"status\":%d,\"error_message\":\"%s\"}", code, buffer); + pluginsd_function_result_end_to_stdout(); +} + #endif /* NETDATA_PLUGINS_D_H */ diff --git a/collectors/plugins.d/pluginsd_parser.c b/collectors/plugins.d/pluginsd_parser.c index cda17710..19aa4544 100644 --- a/collectors/plugins.d/pluginsd_parser.c +++ b/collectors/plugins.d/pluginsd_parser.c @@ -699,6 +699,7 @@ struct inflight_function { usec_t timeout_ut; usec_t started_ut; usec_t sent_ut; + const char *payload; }; static void inflight_functions_insert_callback(const DICTIONARY_ITEM *item, void *func, void *parser_ptr) { @@ -710,7 +711,8 @@ static void inflight_functions_insert_callback(const DICTIONARY_ITEM *item, void pf->code = HTTP_RESP_GATEWAY_TIMEOUT; char buffer[2048 + 1]; - snprintfz(buffer, 2048, "FUNCTION %s %d \"%s\"\n", + snprintfz(buffer, 2048, "%s %s %d \"%s\"\n", + pf->payload ? "FUNCTION_PAYLOAD" : "FUNCTION", dictionary_acquired_item_name(item), pf->timeout, string2str(pf->function)); @@ -730,6 +732,25 @@ static void inflight_functions_insert_callback(const DICTIONARY_ITEM *item, void string2str(pf->function), dictionary_acquired_item_name(item), ret, pf->sent_ut - pf->started_ut); } + + if (!pf->payload) + return; + + // send the payload to the plugin + ret = send_to_plugin(pf->payload, parser); + + if(ret < 0) { + netdata_log_error("FUNCTION_PAYLOAD: failed to send function to plugin, error %d", ret); + rrd_call_function_error(pf->destination_wb, "Failed to communicate with collector", HTTP_RESP_BACKEND_FETCH_FAILED); + } + else { + internal_error(LOG_FUNCTIONS, + "FUNCTION_PAYLOAD '%s' with transaction '%s' sent to collector (%d bytes, in %llu usec)", + string2str(pf->function), dictionary_acquired_item_name(item), ret, + pf->sent_ut - pf->started_ut); + } + + send_to_plugin("\nFUNCTION_PAYLOAD_END\n", parser); } static bool inflight_functions_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func __maybe_unused, void *new_func, void *parser_ptr __maybe_unused) { @@ -800,6 +821,7 @@ static int pluginsd_execute_function_callback(BUFFER *destination_wb, int timeou .function = string_strdupz(function), .callback = callback, .callback_data = callback_data, + .payload = NULL }; uuid_t uuid; @@ -1807,6 +1829,264 @@ static inline PARSER_RC pluginsd_exit(char **words __maybe_unused, size_t num_wo return PARSER_RC_STOP; } +struct mutex_cond { + pthread_mutex_t lock; + pthread_cond_t cond; + int rc; +}; + +static void virt_fnc_got_data_cb(BUFFER *wb, int code, void *callback_data) +{ + struct mutex_cond *ctx = callback_data; + pthread_mutex_lock(&ctx->lock); + ctx->rc = code; + pthread_cond_broadcast(&ctx->cond); + pthread_mutex_unlock(&ctx->lock); +} + +#define VIRT_FNC_TIMEOUT 1 +dyncfg_config_t call_virtual_function_blocking(PARSER *parser, const char *name, int *rc, const char *payload) { + usec_t now = now_realtime_usec(); + BUFFER *wb = buffer_create(4096, NULL); + + struct mutex_cond cond = { + .lock = PTHREAD_MUTEX_INITIALIZER, + .cond = PTHREAD_COND_INITIALIZER + }; + + struct inflight_function tmp = { + .started_ut = now, + .timeout_ut = now + VIRT_FNC_TIMEOUT + USEC_PER_SEC, + .destination_wb = wb, + .timeout = VIRT_FNC_TIMEOUT, + .function = string_strdupz(name), + .callback = virt_fnc_got_data_cb, + .callback_data = &cond, + .payload = payload, + }; + + uuid_t uuid; + uuid_generate_time(uuid); + + char key[UUID_STR_LEN]; + uuid_unparse_lower(uuid, key); + + dictionary_write_lock(parser->inflight.functions); + + // if there is any error, our dictionary callbacks will call the caller callback to notify + // the caller about the error - no need for error handling here. + dictionary_set(parser->inflight.functions, key, &tmp, sizeof(struct inflight_function)); + + if(!parser->inflight.smaller_timeout || tmp.timeout_ut < parser->inflight.smaller_timeout) + parser->inflight.smaller_timeout = tmp.timeout_ut; + + // garbage collect stale inflight functions + if(parser->inflight.smaller_timeout < now) + inflight_functions_garbage_collect(parser, now); + + dictionary_write_unlock(parser->inflight.functions); + + struct timespec tp; + clock_gettime(CLOCK_REALTIME, &tp); + tp.tv_sec += (time_t)VIRT_FNC_TIMEOUT; + + pthread_mutex_lock(&cond.lock); + + int ret = pthread_cond_timedwait(&cond.cond, &cond.lock, &tp); + if (ret == ETIMEDOUT) + netdata_log_error("PLUGINSD: DYNCFG virtual function %s timed out", name); + + pthread_mutex_unlock(&cond.lock); + + dyncfg_config_t cfg; + cfg.data = strdupz(buffer_tostring(wb)); + cfg.data_size = buffer_strlen(wb); + + if (rc != NULL) + *rc = cond.rc; + + buffer_free(wb); + return cfg; +} + +static dyncfg_config_t get_plugin_config_cb(void *usr_ctx) +{ + PARSER *parser = usr_ctx; + return call_virtual_function_blocking(parser, "get_plugin_config", NULL, NULL); +} + +static dyncfg_config_t get_plugin_config_schema_cb(void *usr_ctx) +{ + PARSER *parser = usr_ctx; + return call_virtual_function_blocking(parser, "get_plugin_config_schema", NULL, NULL); +} + +static dyncfg_config_t get_module_config_cb(void *usr_ctx, const char *module_name) +{ + PARSER *parser = usr_ctx; + char buf[1024]; + snprintfz(buf, sizeof(buf), "get_module_config %s", module_name); + return call_virtual_function_blocking(parser, buf, NULL, NULL); +} + +static dyncfg_config_t get_module_config_schema_cb(void *usr_ctx, const char *module_name) +{ + PARSER *parser = usr_ctx; + char buf[1024]; + snprintfz(buf, sizeof(buf), "get_module_config_schema %s", module_name); + return call_virtual_function_blocking(parser, buf, NULL, NULL); +} + +static dyncfg_config_t get_job_config_schema_cb(void *usr_ctx, const char *module_name) +{ + PARSER *parser = usr_ctx; + char buf[1024]; + snprintfz(buf, sizeof(buf), "get_job_config_schema %s", module_name); + return call_virtual_function_blocking(parser, buf, NULL, NULL); +} + +static dyncfg_config_t get_job_config_cb(void *usr_ctx, const char *module_name, const char* job_name) +{ + PARSER *parser = usr_ctx; + char buf[1024]; + snprintfz(buf, sizeof(buf), "get_job_config %s %s", module_name, job_name); + return call_virtual_function_blocking(parser, buf, NULL, NULL); +} + +enum set_config_result set_plugin_config_cb(void *usr_ctx, dyncfg_config_t *cfg) +{ + PARSER *parser = usr_ctx; + int rc; + call_virtual_function_blocking(parser, "set_plugin_config", &rc, cfg->data); + if(rc != 1) + return SET_CONFIG_REJECTED; + return SET_CONFIG_ACCEPTED; +} + +enum set_config_result set_module_config_cb(void *usr_ctx, const char *module_name, dyncfg_config_t *cfg) +{ + PARSER *parser = usr_ctx; + int rc; + + char buf[1024]; + snprintfz(buf, sizeof(buf), "set_module_config %s", module_name); + call_virtual_function_blocking(parser, buf, &rc, cfg->data); + + if(rc != 1) + return SET_CONFIG_REJECTED; + return SET_CONFIG_ACCEPTED; +} + +enum set_config_result set_job_config_cb(void *usr_ctx, const char *module_name, const char *job_name, dyncfg_config_t *cfg) +{ + PARSER *parser = usr_ctx; + int rc; + + char buf[1024]; + snprintfz(buf, sizeof(buf), "set_job_config %s %s", module_name, job_name); + call_virtual_function_blocking(parser, buf, &rc, cfg->data); + + if(rc != 1) + return SET_CONFIG_REJECTED; + return SET_CONFIG_ACCEPTED; +} + +enum set_config_result delete_job_cb(void *usr_ctx, const char *module_name, const char *job_name) +{ + PARSER *parser = usr_ctx; + int rc; + + char buf[1024]; + snprintfz(buf, sizeof(buf), "delete_job %s %s", module_name, job_name); + call_virtual_function_blocking(parser, buf, &rc, NULL); + + if(rc != 1) + return SET_CONFIG_REJECTED; + return SET_CONFIG_ACCEPTED; +} + + +static inline PARSER_RC pluginsd_register_plugin(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser __maybe_unused) { + netdata_log_info("PLUGINSD: DYNCFG_ENABLE"); + + if (unlikely (num_words != 2)) + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_ENABLE, "missing name parameter"); + + struct configurable_plugin *cfg = callocz(1, sizeof(struct configurable_plugin)); + + cfg->name = strdupz(words[1]); + cfg->set_config_cb = set_plugin_config_cb; + cfg->get_config_cb = get_plugin_config_cb; + cfg->get_config_schema_cb = get_plugin_config_schema_cb; + cfg->cb_usr_ctx = parser; + + parser->user.cd->cfg_dict_item = register_plugin(cfg); + + if (unlikely(parser->user.cd->cfg_dict_item == NULL)) { + freez(cfg->name); + freez(cfg); + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_ENABLE, "error registering plugin"); + } + + parser->user.cd->configuration = cfg; + return PARSER_RC_OK; +} + +static inline PARSER_RC pluginsd_register_module(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser __maybe_unused) { + netdata_log_info("PLUGINSD: DYNCFG_REG_MODULE"); + + struct configurable_plugin *plug_cfg = parser->user.cd->configuration; + if (unlikely(plug_cfg == NULL)) + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_MODULE, "you have to enable dynamic configuration first using " PLUGINSD_KEYWORD_DYNCFG_ENABLE); + + if (unlikely(num_words != 3)) + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_MODULE, "expected 2 parameters module_name followed by module_type"); + + struct module *mod = callocz(1, sizeof(struct module)); + + mod->type = str2_module_type(words[2]); + if (unlikely(mod->type == MOD_TYPE_UNKNOWN)) { + freez(mod); + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_MODULE, "unknown module type (allowed: job_array, single)"); + } + + mod->name = strdupz(words[1]); + + mod->set_config_cb = set_module_config_cb; + mod->get_config_cb = get_module_config_cb; + mod->get_config_schema_cb = get_module_config_schema_cb; + mod->config_cb_usr_ctx = parser; + + mod->get_job_config_cb = get_job_config_cb; + mod->get_job_config_schema_cb = get_job_config_schema_cb; + mod->set_job_config_cb = set_job_config_cb; + mod->delete_job_cb = delete_job_cb; + mod->job_config_cb_usr_ctx = parser; + + register_module(plug_cfg, mod); + return PARSER_RC_OK; +} + +// job_status <module_name> <job_name> <status_code> <state> <message> +static inline PARSER_RC pluginsd_job_status(char **words, size_t num_words, PARSER *parser) +{ + if (unlikely(num_words != 6 && num_words != 5)) + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_REPORT_JOB_STATUS, "expected 4 or 5 parameters: module_name, job_name, status_code, state, [optional: message]"); + + int state = atoi(words[4]); + + enum job_status job_status = str2job_state(words[3]); + if (unlikely(job_status == JOB_STATUS_UNKNOWN)) + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_REPORT_JOB_STATUS, "unknown job state"); + + char *message = NULL; + if (num_words == 6) + message = strdupz(words[5]); + + report_job_status(parser->user.cd->configuration, words[1], words[2], job_status, state, message); + return PARSER_RC_OK; +} + static inline PARSER_RC streaming_claimed_id(char **words, size_t num_words, PARSER *parser) { const char *host_uuid_str = get_word(words, num_words, 1); @@ -2111,6 +2391,12 @@ PARSER_RC parser_execute(PARSER *parser, PARSER_KEYWORD *keyword, char **words, case 99: return pluginsd_exit(words, num_words, parser); + case 101: + return pluginsd_register_plugin(words, num_words, parser); + + case 102: + return pluginsd_register_module(words, num_words, parser); + default: fatal("Unknown keyword '%s' with id %zu", keyword->keyword, keyword->id); } @@ -2131,6 +2417,11 @@ void parser_destroy(PARSER *parser) { if (unlikely(!parser)) return; + if (parser->user.cd != NULL && parser->user.cd->configuration != NULL) { + unregister_plugin(parser->user.cd->cfg_dict_item); + parser->user.cd->configuration = NULL; + } + dictionary_destroy(parser->inflight.functions); freez(parser); } diff --git a/collectors/proc.plugin/README.md b/collectors/proc.plugin/README.md index 6c1335a7..16ae6f41 100644 --- a/collectors/proc.plugin/README.md +++ b/collectors/proc.plugin/README.md @@ -31,6 +31,7 @@ In detail, it collects metrics from: - `/proc/spl/kstat/zfs/pool/state` (state of ZFS pools) - `/sys/class/power_supply` (power supply properties) - `/sys/class/infiniband` (infiniband interconnect) +- `/sys/class/drm` (AMD GPUs) - `ipc` (IPC semaphores and message queues) - `ksm` Kernel Same-Page Merging performance (several files under `/sys/kernel/mm/ksm`). - `netdata` (internal Netdata resources utilization) @@ -579,6 +580,36 @@ Default configuration will monitor only enabled infiniband ports, and refresh ne # refresh ports state every seconds = 30 ``` +## AMD GPUs + +This module monitors every AMD GPU card discovered at agent startup. + +### Monitored GPU metrics + +The following charts will be provided: + +- **GPU utilization** +- **GPU memory utilization** +- **GPU clock frequency** +- **GPU memory clock frequency** +- **VRAM memory usage percentage** +- **VRAM memory usage** +- **visible VRAM memory usage percentage** +- **visible VRAM memory usage** +- **GTT memory usage percentage** +- **GTT memory usage** + +### configuration + +The `drm` path can be configured if it differs from the default: + +``` +[plugin:proc:/sys/class/drm] + # directory to monitor = /sys/class/drm +``` + +> [!NOTE] +> Temperature, fan speed, voltage and power metrics for AMD GPUs can be monitored using the [Sensors](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/sensors/README.md) plugin. ## IPC diff --git a/collectors/proc.plugin/multi_metadata.yaml b/collectors/proc.plugin/metadata.yaml index e78ec795..81d83f50 100644 --- a/collectors/proc.plugin/multi_metadata.yaml +++ b/collectors/proc.plugin/metadata.yaml @@ -1,54 +1,80 @@ -name: proc.plugin +plugin_name: proc.plugin modules: - meta: plugin_name: proc.plugin module_name: /proc/stat monitored_instance: - name: proc /proc/stat - link: '' - categories: [] - icon_filename: '' + name: System statistics + link: "" + categories: + - data-collection.linux-systems.system-metrics + icon_filename: "linuxserver.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - cpu utilization + - process counts most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + CPU utilization, states and frequencies and key Linux system performance metrics. + + The `/proc/stat` file provides various types of system statistics: + + - The overall system CPU usage statistics + - Per CPU core statistics + - The total context switching of the system + - The total number of processes running + - The total CPU interrupts + - The total CPU softirqs + + The collector also reads: + + - `/proc/schedstat` for statistics about the process scheduler in the Linux kernel. + - `/sys/devices/system/cpu/[X]/thermal_throttle/core_throttle_count` to get the count of thermal throttling events for a specific CPU core on Linux systems. + - `/sys/devices/system/cpu/[X]/thermal_throttle/package_throttle_count` to get the count of thermal throttling events for a specific CPU package on a Linux system. + - `/sys/devices/system/cpu/[X]/cpufreq/scaling_cur_freq` to get the current operating frequency of a specific CPU core. + - `/sys/devices/system/cpu/[X]/cpufreq/stats/time_in_state` to get the amount of time the CPU has spent in each of its available frequency states. + - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/name` to get the names of the idle states for each CPU core in a Linux system. + - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/time` to get the total time each specific CPU core has spent in each idle state since the system was started. + method_description: "" supported_platforms: - include: [] + include: ["linux"] exclude: [] - multi-instance: true + multi_instance: false additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: | + The collector auto-detects all metrics. No configuration is needed. limits: - description: '' + description: "" performance_impact: - description: '' + description: | + The collector disables cpu frequency and idle state monitoring when there are more than 128 CPU cores available. setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + section_name: "plugin:proc:/proc/stat" + name: "netdata.conf" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -69,11 +95,6 @@ modules: metric: system.cpu info: average CPU steal time over the last 20 minutes os: "linux" - - name: 10min_cpu_usage - link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf - metric: system.cpu - info: average CPU utilization over the last 10 minutes (excluding nice) - os: "freebsd" metrics: folding: title: Metrics @@ -174,51 +195,84 @@ modules: plugin_name: proc.plugin module_name: /proc/sys/kernel/random/entropy_avail monitored_instance: - name: proc /proc/sys/kernel/random/entropy_avail - link: '' - categories: [] - icon_filename: '' + name: Entropy + link: "" + categories: + - data-collection.linux-systems.system-metrics + icon_filename: "syslog.png" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - entropy most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + Entropy, a measure of the randomness or unpredictability of data. + + In the context of cryptography, entropy is used to generate random numbers or keys that are essential for + secure communication and encryption. Without a good source of entropy, cryptographic protocols can become + vulnerable to attacks that exploit the predictability of the generated keys. + + In most operating systems, entropy is generated by collecting random events from various sources, such as + hardware interrupts, mouse movements, keyboard presses, and disk activity. These events are fed into a pool + of entropy, which is then used to generate random numbers when needed. + + The `/dev/random` device in Linux is one such source of entropy, and it provides an interface for programs + to access the pool of entropy. When a program requests random numbers, it reads from the `/dev/random` device, + which blocks until enough entropy is available to generate the requested numbers. This ensures that the + generated numbers are truly random and not predictable. + + However, if the pool of entropy gets depleted, the `/dev/random` device may block indefinitely, causing + programs that rely on random numbers to slow down or even freeze. This is especially problematic for + cryptographic protocols that require a continuous stream of random numbers, such as SSL/TLS and SSH. + + To avoid this issue, some systems use a hardware random number generator (RNG) to generate high-quality + entropy. A hardware RNG generates random numbers by measuring physical phenomena, such as thermal noise or + radioactive decay. These sources of randomness are considered to be more reliable and unpredictable than + software-based sources. + + One such hardware RNG is the Trusted Platform Module (TPM), which is a dedicated hardware chip that is used + for cryptographic operations and secure boot. The TPM contains a built-in hardware RNG that generates + high-quality entropy, which can be used to seed the pool of entropy in the operating system. + + Alternatively, software-based solutions such as `Haveged` can be used to generate additional entropy by + exploiting sources of randomness in the system, such as CPU utilization and network traffic. These solutions + can help to mitigate the risk of entropy depletion, but they may not be as reliable as hardware-based solutions. + method_description: "" supported_platforms: - include: [] + include: ["linux"] exclude: [] - multi-instance: true + multi_instance: false additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -227,8 +281,7 @@ modules: - name: lowest_entropy link: https://github.com/netdata/netdata/blob/master/health/health.d/entropy.conf metric: system.entropy - info: minimum number of entries in the random numbers pool in the last 5 minutes - os: "linux" + info: minimum number of bits of entropy available for the kernel’s random number generator metrics: folding: title: Metrics @@ -250,51 +303,64 @@ modules: plugin_name: proc.plugin module_name: /proc/uptime monitored_instance: - name: proc /proc/uptime - link: '' - categories: [] - icon_filename: '' + name: System Uptime + link: "" + categories: + - data-collection.linux-systems.system-metrics + icon_filename: "linuxserver.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - uptime most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + The amount of time the system has been up (running). + + Uptime is a critical aspect of overall system performance: + + - **Availability**: Uptime monitoring can show whether a server is consistently available or experiences frequent downtimes. + - **Performance Monitoring**: While server uptime alone doesn't provide detailed performance data, analyzing the duration and frequency of downtimes can help identify patterns or trends. + - **Proactive problem detection**: If server uptime monitoring reveals unexpected downtimes or a decreasing uptime trend, it can serve as an early warning sign of potential problems. + - **Root cause analysis**: When investigating server downtime, the uptime metric alone may not provide enough information to pinpoint the exact cause. + - **Load balancing**: Uptime data can indirectly indicate load balancing issues if certain servers have significantly lower uptimes than others. + - **Optimize maintenance efforts**: Servers with consistently low uptimes or frequent downtimes may require more attention. + - **Compliance requirements**: Server uptime data can be used to demonstrate compliance with regulatory requirements or SLAs that mandate a minimum level of server availability. + method_description: "" supported_platforms: - include: [] + include: ["linux"] exclude: [] - multi-instance: true + multi_instance: false additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -321,51 +387,77 @@ modules: plugin_name: proc.plugin module_name: /proc/vmstat monitored_instance: - name: proc /proc/vmstat - link: '' - categories: [] - icon_filename: '' + name: Memory Statistics + link: "" + categories: + - data-collection.linux-systems.memory-metrics + icon_filename: "linuxserver.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - swap + - page faults + - oom + - numa most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + Linux Virtual memory subsystem. + + Information about memory management, indicating how effectively the kernel allocates and frees + memory resources in response to system demands. + + Monitors page faults, which occur when a process requests a portion of its memory that isn't + immediately available. Monitoring these events can help diagnose inefficiencies in memory management and + provide insights into application behavior. + + Tracks swapping activity — a vital aspect of memory management where the kernel moves data from RAM to + swap space, and vice versa, based on memory demand and usage. It also monitors the utilization of zswap, + a compressed cache for swap pages, and provides insights into its usage and performance implications. + + In the context of virtualized environments, it tracks the ballooning mechanism which is used to balance + memory resources between host and guest systems. + + For systems using NUMA architecture, it provides insights into the local and remote memory accesses, which + can impact the performance based on the memory access times. + + The collector also watches for 'Out of Memory' kills, a drastic measure taken by the system when it runs out + of memory resources. + method_description: "" supported_platforms: - include: [] + include: ["linux"] exclude: [] - multi-instance: true + multi_instance: false additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -373,7 +465,7 @@ modules: alerts: - name: 30min_ram_swapped_out link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf - metric: system.swapio + metric: mem.swapio info: percentage of the system RAM swapped in the last 30 minutes os: "linux freebsd" - name: oom_kill @@ -392,7 +484,7 @@ modules: description: "" labels: [] metrics: - - name: system.swapio + - name: mem.swapio description: Swap I/O unit: "KiB/s" chart_type: area @@ -514,51 +606,83 @@ modules: plugin_name: proc.plugin module_name: /proc/interrupts monitored_instance: - name: proc /proc/interrupts - link: '' - categories: [] - icon_filename: '' + name: Interrupts + link: "" + categories: + - data-collection.linux-systems.cpu-metrics + icon_filename: "linuxserver.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - interrupts most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + Monitors `/proc/interrupts`, a file organized by CPU and then by the type of interrupt. + The numbers reported are the counts of the interrupts that have occurred of each type. + + An interrupt is a signal to the processor emitted by hardware or software indicating an event that needs + immediate attention. The processor then interrupts its current activities and executes the interrupt handler + to deal with the event. This is part of the way a computer multitasks and handles concurrent processing. + + The types of interrupts include: + + - **I/O interrupts**: These are caused by I/O devices like the keyboard, mouse, printer, etc. For example, when + you type something on the keyboard, an interrupt is triggered so the processor can handle the new input. + + - **Timer interrupts**: These are generated at regular intervals by the system's timer circuit. It's primarily + used to switch the CPU among different tasks. + + - **Software interrupts**: These are generated by a program requiring disk I/O operations, or other system resources. + + - **Hardware interrupts**: These are caused by hardware conditions such as power failure, overheating, etc. + + Monitoring `/proc/interrupts` can be used for: + + - **Performance tuning**: If an interrupt is happening very frequently, it could be a sign that a device is not + configured correctly, or there is a software bug causing unnecessary interrupts. This could lead to system + performance degradation. + + - **System troubleshooting**: If you're seeing a lot of unexpected interrupts, it could be a sign of a hardware problem. + + - **Understanding system behavior**: More generally, keeping an eye on what interrupts are occurring can help you + understand what your system is doing. It can provide insights into the system's interaction with hardware, + drivers, and other parts of the kernel. + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -597,51 +721,77 @@ modules: plugin_name: proc.plugin module_name: /proc/loadavg monitored_instance: - name: proc /proc/loadavg - link: '' - categories: [] - icon_filename: '' + name: System Load Average + link: "" + categories: + - data-collection.linux-systems.system-metrics + icon_filename: "linuxserver.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - load + - load average most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + The `/proc/loadavg` file provides information about the system load average. + + The load average is a measure of the amount of computational work that a system performs. It is a + representation of the average system load over a period of time. + + This file contains three numbers representing the system load averages for the last 1, 5, and 15 minutes, + respectively. It also includes the currently running processes and the total number of processes. + + Monitoring the load average can be used for: + + - **System performance**: If the load average is too high, it may indicate that your system is overloaded. + On a system with a single CPU, if the load average is 1, it means the single CPU is fully utilized. If the + load averages are consistently higher than the number of CPUs/cores, it may indicate that your system is + overloaded and tasks are waiting for CPU time. + + - **Troubleshooting**: If the load average is unexpectedly high, it can be a sign of a problem. This could be + due to a runaway process, a software bug, or a hardware issue. + + - **Capacity planning**: By monitoring the load average over time, you can understand the trends in your + system's workload. This can help with capacity planning and scaling decisions. + + Remember that load average not only considers CPU usage, but also includes processes waiting for disk I/O. + Therefore, high load averages could be due to I/O contention as well as CPU contention. + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: false additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -700,51 +850,76 @@ modules: plugin_name: proc.plugin module_name: /proc/pressure monitored_instance: - name: proc /proc/pressure - link: '' - categories: [] - icon_filename: '' + name: Pressure Stall Information + link: "" + categories: + - data-collection.linux-systems.pressure-metrics + icon_filename: "linuxserver.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - pressure most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + Introduced in Linux kernel 4.20, `/proc/pressure` provides information about system pressure stall information + (PSI). PSI is a feature that allows the system to track the amount of time the system is stalled due to + resource contention, such as CPU, memory, or I/O. + + The collectors monitored 3 separate files for CPU, memory, and I/O: + + - **cpu**: Tracks the amount of time tasks are stalled due to CPU contention. + - **memory**: Tracks the amount of time tasks are stalled due to memory contention. + - **io**: Tracks the amount of time tasks are stalled due to I/O contention. + - **irq**: Tracks the amount of time tasks are stalled due to IRQ contention. + + Each of them provides metrics for stall time over the last 10 seconds, 1 minute, 5 minutes, and 15 minutes. + + Monitoring the /proc/pressure files can provide important insights into system performance and capacity planning: + + - **Identifying resource contention**: If these metrics are consistently high, it indicates that tasks are + frequently being stalled due to lack of resources, which can significantly degrade system performance. + + - **Troubleshooting performance issues**: If a system is experiencing performance issues, these metrics can + help identify whether resource contention is the cause. + + - **Capacity planning**: By monitoring these metrics over time, you can understand trends in resource + utilization and make informed decisions about when to add more resources to your system. + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: false additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -849,51 +1024,76 @@ modules: plugin_name: proc.plugin module_name: /proc/softirqs monitored_instance: - name: proc /proc/softirqs - link: '' - categories: [] - icon_filename: '' + name: SoftIRQ statistics + link: "" + categories: + - data-collection.linux-systems.cpu-metrics + icon_filename: "linuxserver.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - softirqs + - interrupts most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + In the Linux kernel, handling of hardware interrupts is split into two halves: the top half and the bottom half. + The top half is the routine that responds immediately to an interrupt, while the bottom half is deferred to be processed later. + + Softirqs are a mechanism in the Linux kernel used to handle the bottom halves of interrupts, which can be + deferred and processed later in a context where it's safe to enable interrupts. + + The actual work of handling the interrupt is offloaded to a softirq and executed later when the system + decides it's a good time to process them. This helps to keep the system responsive by not blocking the top + half for too long, which could lead to missed interrupts. + + Monitoring `/proc/softirqs` is useful for: + + - **Performance tuning**: A high rate of softirqs could indicate a performance issue. For instance, a high + rate of network softirqs (`NET_RX` and `NET_TX`) could indicate a network performance issue. + + - **Troubleshooting**: If a system is behaving unexpectedly, checking the softirqs could provide clues about + what is going on. For example, a sudden increase in block device softirqs (BLOCK) might indicate a problem + with a disk. + + - **Understanding system behavior**: Knowing what types of softirqs are happening can help you understand what + your system is doing, particularly in terms of how it's interacting with hardware and how it's handling + interrupts. + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -932,51 +1132,75 @@ modules: plugin_name: proc.plugin module_name: /proc/net/softnet_stat monitored_instance: - name: proc /proc/net/softnet_stat - link: '' - categories: [] - icon_filename: '' + name: Softnet Statistics + link: "" + categories: + - data-collection.linux-systems.network-metrics + icon_filename: "linuxserver.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - softnet most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + `/proc/net/softnet_stat` provides statistics that relate to the handling of network packets by softirq. + + It provides information about: + + - Total number of processed packets (`processed`). + - Times ksoftirq ran out of quota (`dropped`). + - Times net_rx_action was rescheduled. + - Number of times processed all lists before quota. + - Number of times did not process all lists due to quota. + - Number of times net_rx_action was rescheduled for GRO (Generic Receive Offload) cells. + - Number of times GRO cells were processed. + + Monitoring the /proc/net/softnet_stat file can be useful for: + + - **Network performance monitoring**: By tracking the total number of processed packets and how many packets + were dropped, you can gain insights into your system's network performance. + + - **Troubleshooting**: If you're experiencing network-related issues, this collector can provide valuable clues. + For instance, a high number of dropped packets may indicate a network problem. + + - **Capacity planning**: If your system is consistently processing near its maximum capacity of network + packets, it might be time to consider upgrading your network infrastructure. + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -990,13 +1214,10 @@ modules: - name: 1min_netdev_budget_ran_outs link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf metric: system.softnet_stat - info: average number of times ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs with work remaining over the last minute (this can be a cause for dropped packets) + info: + average number of times ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs with work remaining over the last + minute (this can be a cause for dropped packets) os: "linux" - - name: 10min_netisr_backlog_exceeded - link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf - metric: system.softnet_stat - info: average number of drops in the last minute due to exceeded sysctl net.route.netisr_maxqlen (this can be a cause for dropped packets) - os: "freebsd" metrics: folding: title: Metrics @@ -1036,51 +1257,72 @@ modules: plugin_name: proc.plugin module_name: /proc/meminfo monitored_instance: - name: proc /proc/meminfo - link: '' - categories: [] - icon_filename: '' + name: Memory Usage + link: "" + categories: + - data-collection.linux-systems.memory-metrics + icon_filename: "linuxserver.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - memory + - ram + - available + - committed most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + `/proc/meminfo` provides detailed information about the system's current memory usage. It includes information + about different types of memory, RAM, Swap, ZSwap, HugePages, Transparent HugePages (THP), Kernel memory, + SLAB memory, memory mappings, and more. + + Monitoring /proc/meminfo can be useful for: + + - **Performance Tuning**: Understanding your system's memory usage can help you make decisions about system + tuning and optimization. For example, if your system is frequently low on free memory, it might benefit + from more RAM. + + - **Troubleshooting**: If your system is experiencing problems, `/proc/meminfo` can provide clues about + whether memory usage is a factor. For example, if your system is slow and cached swap is high, it could + mean that your system is swapping out a lot of memory to disk, which can degrade performance. + + - **Capacity Planning**: By monitoring memory usage over time, you can understand trends and make informed + decisions about future capacity needs. + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: false additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1091,24 +1333,14 @@ modules: metric: system.ram info: system memory utilization os: "linux" - - name: ram_in_use - link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf - metric: system.ram - info: system memory utilization - os: "freebsd" - name: ram_available link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf metric: mem.available info: percentage of estimated amount of RAM available for userspace processes, without causing swapping os: "linux" - - name: ram_available - link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf - metric: mem.available - info: percentage of estimated amount of RAM available for userspace processes, without causing swapping - os: "freebsd" - name: used_swap link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf - metric: system.swap + metric: mem.swap info: swap memory utilization os: "linux freebsd" - name: 1hour_memory_hw_corrupted @@ -1142,13 +1374,26 @@ modules: chart_type: area dimensions: - name: avail - - name: system.swap + - name: mem.swap description: System Swap unit: "MiB" chart_type: stacked dimensions: - name: free - name: used + - name: mem.swap_cached + description: Swap Memory Cached in RAM + unit: "MiB" + chart_type: stacked + dimensions: + - name: cached + - name: mem.zswap + description: Zswap Usage + unit: "MiB" + chart_type: stacked + dimensions: + - name: in-ram + - name: on-disk - name: mem.hwcorrupt description: Corrupted Memory detected by ECC unit: "MiB" @@ -1188,7 +1433,7 @@ modules: dimensions: - name: reclaimable - name: unreclaimable - - name: mem.hugepage + - name: mem.hugepages description: Dedicated HugePages Memory unit: "MiB" chart_type: stacked @@ -1197,62 +1442,110 @@ modules: - name: used - name: surplus - name: reserved - - name: mem.transparent_hugepages + - name: mem.thp description: Transparent HugePages Memory unit: "MiB" chart_type: stacked dimensions: - name: anonymous - name: shmem + - name: mem.thp_details + description: Details of Transparent HugePages Usage + unit: "MiB" + chart_type: line + dimensions: + - name: ShmemPmdMapped + - name: FileHugePages + - name: FilePmdMapped + - name: mem.reclaiming + description: Memory Reclaiming + unit: "MiB" + chart_type: line + dimensions: + - name: Active + - name: Inactive + - name: Active(anon) + - name: Inactive(anon) + - name: Active(file) + - name: Inactive(file) + - name: Unevictable + - name: Mlocked + - name: mem.high_low + description: High and Low Used and Free Memory Areas + unit: "MiB" + chart_type: stacked + dimensions: + - name: high_used + - name: low_used + - name: high_free + - name: low_free + - name: mem.cma + description: Contiguous Memory Allocator (CMA) Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: used + - name: free + - name: mem.directmaps + description: Direct Memory Mappings + unit: "MiB" + chart_type: stacked + dimensions: + - name: 4k + - name: 2m + - name: 4m + - name: 1g - meta: plugin_name: proc.plugin module_name: /proc/pagetypeinfo monitored_instance: - name: proc /proc/pagetypeinfo - link: '' - categories: [] - icon_filename: '' + name: Page types + link: "" + categories: + - data-collection.linux-systems.memory-metrics + icon_filename: "microchip.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - memory page types most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration provides metrics about the system's memory page types" + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: false additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1295,66 +1588,91 @@ modules: plugin_name: proc.plugin module_name: /sys/devices/system/edac/mc monitored_instance: - name: proc /sys/devices/system/edac/mc - link: '' - categories: [] - icon_filename: '' + name: Memory modules (DIMMs) + link: "" + categories: + - data-collection.linux-systems.memory-metrics + icon_filename: "microchip.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - edac + - ecc + - dimm + - ram + - hardware most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + The Error Detection and Correction (EDAC) subsystem is detecting and reporting errors in the system's memory, + primarily ECC (Error-Correcting Code) memory errors. + + The collector provides data for: + + - Per memory controller (MC): correctable and uncorrectable errors. These can be of 2 kinds: + - errors related to a DIMM + - errors that cannot be associated with a DIMM + + - Per memory DIMM: correctable and uncorrectable errors. There are 2 kinds: + - memory controllers that can identify the physical DIMMS and report errors directly for them, + - memory controllers that report errors for memory address ranges that can be linked to dimms. + In this case the DIMMS reported may be more than the physical DIMMS installed. + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: list: [] alerts: - - name: 1hour_ecc_memory_correctable + - name: ecc_memory_mc_noinfo_correctable + metric: mem.edac_mc + info: memory controller ${label:controller} ECC correctable errors (unknown DIMM slot) in the last 10 minutes link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf - metric: mem.ecc_ce - info: number of ECC correctable errors in the last 10 minutes - os: "linux" - - name: 1hour_ecc_memory_uncorrectable + - name: ecc_memory_mc_noinfo_uncorrectable + metric: mem.edac_mc + info: memory controller ${label:controller} ECC uncorrectable errors (unknown DIMM slot) in the last 10 minutes + link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf + - name: ecc_memory_dimm_correctable + metric: mem.edac_mc_dimm + info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC correctable errors in the last 10 minutes + link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf + - name: ecc_memory_dimm_uncorrectable + metric: mem.edac_mc_dimm + info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC uncorrectable errors in the last 10 minutes link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf - metric: mem.ecc_ue - info: number of ECC uncorrectable errors in the last 10 minutes - os: "linux" metrics: folding: title: Metrics @@ -1362,71 +1680,117 @@ modules: description: "" availability: [] scopes: - - name: global - description: "" - labels: [] + - name: memory controller + description: These metrics refer to the memory controller. + labels: + - name: controller + description: "[mcX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#mcx-directories) directory name of this memory controller." + - name: mc_name + description: Memory controller type. + - name: size_mb + description: The amount of memory in megabytes that this memory controller manages. + - name: max_location + description: Last available memory slot in this memory controller. metrics: - - name: mem.ecc_ce - description: ECC Memory Correctable Errors - unit: "errors" + - name: mem.edac_mc + description: Memory Controller (MC) Error Detection And Correction (EDAC) Errors + unit: errors/s chart_type: line dimensions: - - name: a dimension per mem controller - - name: mem.ecc_ue - description: ECC Memory Uncorrectable Errors - unit: "errors" + - name: correctable + - name: uncorrectable + - name: correctable_noinfo + - name: uncorrectable_noinfo + - name: memory module + description: These metrics refer to the memory module (or rank, [depends on the memory controller](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#f5)). + labels: + - name: controller + description: "[mcX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#mcx-directories) directory name of this memory controller." + - name: dimm + description: "[dimmX or rankX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#dimmx-or-rankx-directories) directory name of this memory module." + - name: dimm_dev_type + description: Type of DRAM device used in this memory module. For example, x1, x2, x4, x8. + - name: dimm_edac_mode + description: Used type of error detection and correction. For example, S4ECD4ED would mean a Chipkill with x4 DRAM. + - name: dimm_label + description: Label assigned to this memory module. + - name: dimm_location + description: Location of the memory module. + - name: dimm_mem_type + description: Type of the memory module. + - name: size + description: The amount of memory in megabytes that this memory module manages. + metrics: + - name: mem.edac_mc + description: DIMM Error Detection And Correction (EDAC) Errors + unit: errors/s chart_type: line dimensions: - - name: a dimension per mem controller + - name: correctable + - name: uncorrectable - meta: plugin_name: proc.plugin module_name: /sys/devices/system/node monitored_instance: - name: proc /sys/devices/system/node - link: '' - categories: [] - icon_filename: '' + name: Non-Uniform Memory Access + link: "" + categories: + - data-collection.linux-systems.memory-metrics + icon_filename: "linuxserver.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - numa most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + Information about NUMA (Non-Uniform Memory Access) nodes on the system. + + NUMA is a method of configuring a cluster of microprocessor in a multiprocessing system so that they can + share memory locally, improving performance and the ability of the system to be expanded. NUMA is used in a + symmetric multiprocessing (SMP) system. + + In a NUMA system, processors, memory, and I/O devices are grouped together into cells, also known as nodes. + Each node has its own memory and set of I/O devices, and one or more processors. While a processor can access + memory in any of the nodes, it does so faster when accessing memory within its own node. + + The collector provides statistics on memory allocations for processes running on the NUMA nodes, revealing the + efficiency of memory allocations in multi-node systems. + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1460,51 +1824,61 @@ modules: plugin_name: proc.plugin module_name: /sys/kernel/mm/ksm monitored_instance: - name: proc /sys/kernel/mm/ksm - link: '' - categories: [] - icon_filename: '' + name: Kernel Same-Page Merging + link: "" + categories: + - data-collection.linux-systems.memory-metrics + icon_filename: "microchip.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - ksm + - samepage + - merging most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + Kernel Samepage Merging (KSM) is a memory-saving feature in Linux that enables the kernel to examine the + memory of different processes and identify identical pages. It then merges these identical pages into a + single page that the processes share. This is particularly useful for virtualization, where multiple virtual + machines might be running the same operating system or applications and have many identical pages. + + The collector provides information about the operation and effectiveness of KSM on your system. + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: false additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1547,51 +1921,57 @@ modules: plugin_name: proc.plugin module_name: /sys/block/zram monitored_instance: - name: proc /sys/block/zram - link: '' - categories: [] - icon_filename: '' + name: ZRAM + link: "" + categories: + - data-collection.linux-systems.memory-metrics + icon_filename: "microchip.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - zram most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + zRAM, or compressed RAM, is a block device that uses a portion of your system's RAM as a block device. + The data written to this block device is compressed and stored in memory. + + The collectors provides information about the operation and the effectiveness of zRAM on your system. + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1640,51 +2020,69 @@ modules: plugin_name: proc.plugin module_name: ipc monitored_instance: - name: proc ipc - link: '' - categories: [] - icon_filename: '' + name: Inter Process Communication + link: "" + categories: + - data-collection.linux-systems.ipc-metrics + icon_filename: "network-wired.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - ipc + - semaphores + - shared memory most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + IPC stands for Inter-Process Communication. It is a mechanism which allows processes to communicate with each + other and synchronize their actions. + + This collector exposes information about: + + - Message Queues: This allows messages to be exchanged between processes. It's a more flexible method that + allows messages to be placed onto a queue and read at a later time. + + - Shared Memory: This method allows for the fastest form of IPC because processes can exchange data by + reading/writing into shared memory segments. + + - Semaphores: They are used to synchronize the operations performed by independent processes. So, if multiple + processes are trying to access a single shared resource, semaphores can ensure that only one process + accesses the resource at a given time. + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: false additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1751,51 +2149,61 @@ modules: plugin_name: proc.plugin module_name: /proc/diskstats monitored_instance: - name: proc /proc/diskstats - link: '' - categories: [] - icon_filename: '' + name: Disk Statistics + link: "" + categories: + - data-collection.linux-systems.disk-metrics + icon_filename: "hard-drive.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - disk + - disks + - io + - bcache + - block devices most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: | + Detailed statistics for each of your system's disk devices and partitions. + The data is reported by the kernel and can be used to monitor disk activity on a Linux system. + + Get valuable insight into how your disks are performing and where potential bottlenecks might be. + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -1818,7 +2226,9 @@ modules: - name: bcache_cache_errors link: https://github.com/netdata/netdata/blob/master/health/health.d/bcache.conf metric: disk.bcache_cache_read_races - info: number of times data was read from the cache, the bucket was reused and invalidated in the last 10 minutes (when this occurs the data is reread from the backing device) + info: + number of times data was read from the cache, the bucket was reused and invalidated in the last 10 minutes (when this occurs the data is + reread from the backing device) metrics: folding: title: Metrics @@ -2023,51 +2433,56 @@ modules: plugin_name: proc.plugin module_name: /proc/mdstat monitored_instance: - name: proc /proc/mdstat - link: '' - categories: [] - icon_filename: '' + name: MD RAID + link: "" + categories: + - data-collection.linux-systems.disk-metrics + icon_filename: "hard-drive.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - raid + - mdadm + - mdstat + - raid most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration monitors the status of MD RAID devices." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -2080,7 +2495,8 @@ modules: - name: mdstat_disks link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf metric: md.disks - info: number of devices in the down state for the ${label:device} ${label:raid_level} array. Any number > 0 indicates that the array is degraded. + info: + number of devices in the down state for the ${label:device} ${label:raid_level} array. Any number > 0 indicates that the array is degraded. - name: mdstat_mismatch_cnt link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf metric: md.mismatch_cnt @@ -2158,51 +2574,53 @@ modules: plugin_name: proc.plugin module_name: /proc/net/dev monitored_instance: - name: proc /proc/net/dev - link: '' - categories: [] - icon_filename: '' + name: Network interfaces + link: "" + categories: + - data-collection.linux-systems.network-metrics + icon_filename: "network-wired.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - network interfaces most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Monitor network interface metrics about bandwidth, state, errors and more." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -2251,18 +2669,10 @@ modules: - name: 10s_received_packets_storm link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf metric: net.packets - info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute + info: + ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over + the last minute os: "linux freebsd" - - name: interface_inbound_errors - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.errors - info: number of inbound errors for the network interface ${label:device} in the last 10 minutes - os: "freebsd" - - name: interface_outbound_errors - link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf - metric: net.errors - info: number of outbound errors for the network interface ${label:device} in the last 10 minutes - os: "freebsd" - name: inbound_packets_dropped link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf metric: net.drops @@ -2398,51 +2808,53 @@ modules: plugin_name: proc.plugin module_name: /proc/net/wireless monitored_instance: - name: proc /proc/net/wireless - link: '' - categories: [] - icon_filename: '' + name: Wireless network interfaces + link: "" + categories: + - data-collection.linux-systems.network-metrics + icon_filename: "network-wired.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - wireless devices most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "Monitor wireless devices with metrics about status, link quality, signal level, noise level and more." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -2472,13 +2884,16 @@ modules: dimensions: - name: link_quality - name: wireless.signal_level - description: The signal level is the wireless signal power level received by the wireless client. The closer the value is to 0, the stronger the signal. + description: + The signal level is the wireless signal power level received by the wireless client. The closer the value is to 0, the stronger the + signal. unit: "dBm" chart_type: line dimensions: - name: signal_level - name: wireless.noise_level - description: The noise level indicates the amount of background noise in your environment. The closer the value to 0, the greater the noise level. + description: + The noise level indicates the amount of background noise in your environment. The closer the value to 0, the greater the noise level. unit: "dBm" chart_type: line dimensions: @@ -2503,51 +2918,54 @@ modules: plugin_name: proc.plugin module_name: /sys/class/infiniband monitored_instance: - name: proc /sys/class/infiniband - link: '' - categories: [] - icon_filename: '' + name: InfiniBand + link: "" + categories: + - data-collection.linux-systems.network-metrics + icon_filename: "network-wired.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - infiniband + - rdma most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration monitors InfiniBand network inteface statistics." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -2643,51 +3061,58 @@ modules: plugin_name: proc.plugin module_name: /proc/net/netstat monitored_instance: - name: proc /proc/net/netstat - link: '' - categories: [] - icon_filename: '' + name: Network statistics + link: "" + categories: + - data-collection.linux-systems.network-metrics + icon_filename: "network-wired.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - ip + - udp + - udplite + - icmp + - netstat + - snmp most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration provides metrics from the `netstat`, `snmp` and `snmp6` modules." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -2726,7 +3151,9 @@ modules: - name: 10s_ipv4_tcp_resets_sent link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf metric: ipv4.tcphandshake - info: average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has crashed. Netdata will not send a clear notification for this alarm. + info: + average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has + crashed. Netdata will not send a clear notification for this alarm. os: "linux" - name: 1m_ipv4_tcp_resets_received link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf @@ -2736,7 +3163,9 @@ modules: - name: 10s_ipv4_tcp_resets_received link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf metric: ipv4.tcphandshake - info: average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed. Netdata will not send a clear notification for this alarm. + info: + average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed. + Netdata will not send a clear notification for this alarm. os: "linux freebsd" - name: 1m_ipv4_udp_receive_buffer_errors link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf @@ -3232,51 +3661,53 @@ modules: plugin_name: proc.plugin module_name: /proc/net/sockstat monitored_instance: - name: proc /proc/net/sockstat - link: '' - categories: [] - icon_filename: '' + name: Socket statistics + link: "" + categories: + - data-collection.linux-systems.network-metrics + icon_filename: "network-wired.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - sockets most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration provides socket statistics." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -3364,51 +3795,53 @@ modules: plugin_name: proc.plugin module_name: /proc/net/sockstat6 monitored_instance: - name: proc /proc/net/sockstat6 - link: '' - categories: [] - icon_filename: '' + name: IPv6 Socket Statistics + link: "" + categories: + - data-collection.linux-systems.network-metrics + icon_filename: "network-wired.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - ipv6 sockets most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration provides IPv6 socket statistics." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -3459,51 +3892,53 @@ modules: plugin_name: proc.plugin module_name: /proc/net/ip_vs_stats monitored_instance: - name: proc /proc/net/ip_vs_stats - link: '' - categories: [] - icon_filename: '' + name: IP Virtual Server + link: "" + categories: + - data-collection.linux-systems.network-metrics + icon_filename: "network-wired.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - ip virtual server most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration monitors IP Virtual Server statistics" + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -3544,51 +3979,54 @@ modules: plugin_name: proc.plugin module_name: /proc/net/rpc/nfs monitored_instance: - name: proc /proc/net/rpc/nfs - link: '' - categories: [] - icon_filename: '' + name: NFS Client + link: "" + categories: + - data-collection.linux-systems.filesystem-metrics.nfs + icon_filename: "nfs.png" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - nfs client + - filesystem most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration provides statistics from the Linux kernel's NFS Client." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -3642,51 +4080,54 @@ modules: plugin_name: proc.plugin module_name: /proc/net/rpc/nfsd monitored_instance: - name: proc /proc/net/rpc/nfsd - link: '' - categories: [] - icon_filename: '' + name: NFS Server + link: "" + categories: + - data-collection.linux-systems.filesystem-metrics.nfs + icon_filename: "nfs.png" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - nfs server + - filesystem most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration provides statistics from the Linux kernel's NFS Server." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -3773,51 +4214,54 @@ modules: plugin_name: proc.plugin module_name: /proc/net/sctp/snmp monitored_instance: - name: proc /proc/net/sctp/snmp - link: '' - categories: [] - icon_filename: '' + name: SCTP Statistics + link: "" + categories: + - data-collection.linux-systems.network-metrics + icon_filename: "network-wired.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - sctp + - stream control transmission protocol most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration provides statistics about the Stream Control Transmission Protocol (SCTP)." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -3874,51 +4318,55 @@ modules: plugin_name: proc.plugin module_name: /proc/net/stat/nf_conntrack monitored_instance: - name: proc /proc/net/stat/nf_conntrack - link: '' - categories: [] - icon_filename: '' + name: Conntrack + link: "" + categories: + - data-collection.linux-systems.firewall-metrics + icon_filename: "firewall.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - connection tracking mechanism + - netfilter + - conntrack most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration monitors the connection tracking mechanism of Netfilter in the Linux Kernel." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -3991,51 +4439,53 @@ modules: plugin_name: proc.plugin module_name: /proc/net/stat/synproxy monitored_instance: - name: proc /proc/net/stat/synproxy - link: '' - categories: [] - icon_filename: '' + name: Synproxy + link: "" + categories: + - data-collection.linux-systems.firewall-metrics + icon_filename: "firewall.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - synproxy most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration provides statistics about the Synproxy netfilter module." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -4076,51 +4526,56 @@ modules: plugin_name: proc.plugin module_name: /proc/spl/kstat/zfs monitored_instance: - name: proc /proc/spl/kstat/zfs - link: '' - categories: [] - icon_filename: '' + name: ZFS Pools + link: "" + categories: + - data-collection.linux-systems.filesystem-metrics.zfs + icon_filename: "filesystem.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - zfs pools + - pools + - zfs + - filesystem most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration provides metrics about the state of ZFS pools." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -4163,51 +4618,56 @@ modules: plugin_name: proc.plugin module_name: /proc/spl/kstat/zfs/arcstats monitored_instance: - name: proc /proc/spl/kstat/zfs/arcstats - link: '' - categories: [] - icon_filename: '' + name: ZFS Adaptive Replacement Cache + link: "" + categories: + - data-collection.linux-systems.filesystem-metrics.zfs + icon_filename: "filesystem.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - zfs arc + - arc + - zfs + - filesystem most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration monitors ZFS Adadptive Replacement Cache (ARC) statistics." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -4424,51 +4884,54 @@ modules: plugin_name: proc.plugin module_name: /sys/fs/btrfs monitored_instance: - name: proc /sys/fs/btrfs - link: '' - categories: [] - icon_filename: '' + name: BTRFS + link: "" + categories: + - data-collection.linux-systems.filesystem-metrics.btrfs + icon_filename: "filesystem.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - btrfs + - filesystem most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration provides usage and error statistics from the BTRFS filesystem." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -4611,51 +5074,54 @@ modules: plugin_name: proc.plugin module_name: /sys/class/power_supply monitored_instance: - name: proc /sys/class/power_supply - link: '' - categories: [] - icon_filename: '' + name: Power Supply + link: "" + categories: + - data-collection.linux-systems.power-supply-metrics + icon_filename: "powersupply.svg" related_resources: integrations: list: [] info_provided_to_referring_integrations: - description: '' - keywords: [] + description: "" + keywords: + - psu + - power supply most_popular: false overview: data_collection: - metrics_description: '' - method_description: '' + metrics_description: "This integration monitors Power supply metrics, such as battery status, AC power status and more." + method_description: "" supported_platforms: include: [] exclude: [] - multi-instance: true + multi_instance: true additional_permissions: - description: '' + description: "" default_behavior: auto_detection: - description: '' + description: "" limits: - description: '' + description: "" performance_impact: - description: '' + description: "" setup: prerequisites: list: [] configuration: file: - name: '' - description: '' + name: "" + description: "" options: - description: '' + description: "" folding: - title: '' + title: "" enabled: true list: [] examples: folding: enabled: true - title: '' + title: "" list: [] troubleshooting: problems: @@ -4714,3 +5180,138 @@ modules: - name: now - name: max - name: max_design + - meta: + plugin_name: proc.plugin + module_name: /sys/class/drm + monitored_instance: + name: AMD GPU + link: "https://www.amd.com" + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: amd.svg + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - amd + - gpu + - hardware + most_popular: false + overview: + data_collection: + metrics_description: "This integration monitors AMD GPU metrics, such as utilization, clock frequency and memory usage." + method_description: "It reads `/sys/class/drm` to collect metrics for every AMD GPU card instance it encounters." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: "" + description: "" + options: + description: "" + folding: + title: "" + enabled: true + list: [] + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: gpu + description: "These metrics refer to the GPU." + labels: + - name: product_name + description: GPU product name (e.g. AMD RX 6600) + metrics: + - name: amdgpu.gpu_utilization + description: GPU utilization + unit: "percentage" + chart_type: line + dimensions: + - name: utilization + - name: amdgpu.gpu_mem_utilization + description: GPU memory utilization + unit: "percentage" + chart_type: line + dimensions: + - name: utilization + - name: amdgpu.gpu_clk_frequency + description: GPU clock frequency + unit: "MHz" + chart_type: line + dimensions: + - name: frequency + - name: amdgpu.gpu_mem_clk_frequency + description: GPU memory clock frequency + unit: "MHz" + chart_type: line + dimensions: + - name: frequency + - name: amdgpu.gpu_mem_vram_usage_perc + description: VRAM memory usage percentage + unit: "percentage" + chart_type: line + dimensions: + - name: usage + - name: amdgpu.gpu_mem_vram_usage + description: VRAM memory usage + unit: "bytes" + chart_type: area + dimensions: + - name: free + - name: used + - name: amdgpu.gpu_mem_vis_vram_usage_perc + description: visible VRAM memory usage percentage + unit: "percentage" + chart_type: line + dimensions: + - name: usage + - name: amdgpu.gpu_mem_vis_vram_usage + description: visible VRAM memory usage + unit: "bytes" + chart_type: area + dimensions: + - name: free + - name: used + - name: amdgpu.gpu_mem_gtt_usage_perc + description: GTT memory usage percentage + unit: "percentage" + chart_type: line + dimensions: + - name: usage + - name: amdgpu.gpu_mem_gtt_usage + description: GTT memory usage + unit: "bytes" + chart_type: area + dimensions: + - name: free + - name: used diff --git a/collectors/proc.plugin/metrics.csv b/collectors/proc.plugin/metrics.csv deleted file mode 100644 index ea0d1b36..00000000 --- a/collectors/proc.plugin/metrics.csv +++ /dev/null @@ -1,271 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -system.cpu,,"guest_nice, guest, steal, softirq, irq, user, system, nice, iowait, idle",percentage,Total CPU utilization,stacked,,proc.plugin,/proc/stat -cpu.cpu,cpu core,"guest_nice, guest, steal, softirq, irq, user, system, nice, iowait, idle",percentage,Core utilization,stacked,cpu,proc.plugin,/proc/stat -system.intr,,interrupts,interrupts/s,CPU Interrupts,line,,proc.plugin,/proc/stat -system.ctxt,,switches,context switches/s,CPU Context Switches,line,,proc.plugin,/proc/stat -system.forks,,started,processes/s,Started Processes,line,,proc.plugin,/proc/stat -system.processes,,"running, blocked",processes,System Processes,line,,proc.plugin,/proc/stat -cpu.core_throttling,,a dimension per cpu core,events/s,Core Thermal Throttling Events,line,,proc.plugin,/proc/stat -cpu.package_throttling,,a dimension per package,events/s,Package Thermal Throttling Events,line,,proc.plugin,/proc/stat -cpu.cpufreq,,a dimension per cpu core,MHz,Current CPU Frequency,line,,proc.plugin,/proc/stat -cpuidle.cpu_cstate_residency_time,cpu core,a dimension per c-state,percentage,C-state residency time,stacked,cpu,proc.plugin,/proc/stat -system.entropy,,entropy,entropy,Available Entropy,line,,proc.plugin,/proc/sys/kernel/random/entropy_avail -system.uptime,,uptime,seconds,System Uptime,line,,proc.plugin,/proc/uptime -system.swapio,,"in, out",KiB/s,Swap I/O,area,,proc.plugin,/proc/vmstat -system.pgpgio,,"in, out",KiB/s,Memory Paged from/to disk,area,,proc.plugin,/proc/vmstat -system.pgfaults,,"minor, major",faults/s,Memory Page Faults,line,,proc.plugin,/proc/vmstat -system.interrupts,,a dimension per device,interrupts/s,System interrupts,stacked,,proc.plugin,/proc/interrupts -cpu.interrupts,cpu core,a dimension per device,interrupts/s,CPU interrupts,stacked,cpu,proc.plugin,/proc/interrupts -system.load,,"load1, load5, load15",load,System Load Average,line,,proc.plugin,/proc/loadavg -system.active_processes,,active,processes,System Active Processes,line,,proc.plugin,/proc/loadavg -system.cpu_some_pressure,,"some10, some60, some300",percentage,"CPU some pressure",line,,proc.plugin,/proc/pressure -system.cpu_some_pressure_stall_time,,time,ms,"CPU some pressure stall time",line,,proc.plugin,/proc/pressure -system.cpu_full_pressure,,"some10, some60, some300",percentage,"CPU full pressure",line,,proc.plugin,/proc/pressure -system.cpu_full_pressure_stall_time,,time,ms,"CPU full pressure stall time",line,,proc.plugin,/proc/pressure -system.memory_some_pressure,,"some10, some60, some300",percentage,"Memory some pressure",line,,proc.plugin,/proc/pressure -system.memory_some_pressure_stall_time,,time,ms,"Memory some pressure stall time",line,,proc.plugin,/proc/pressure -system.memory_full_pressure,,"some10, some60, some300",percentage,"Memory full pressure",line,,proc.plugin,/proc/pressure -system.memory_full_pressure_stall_time,,time,ms,"Memory full pressure stall time",line,,proc.plugin,/proc/pressure -system.io_some_pressure,,"some10, some60, some300",percentage,"I/O some pressure",line,,proc.plugin,/proc/pressure -system.io_some_pressure_stall_time,,time,ms,"I/O some pressure stall time",line,,proc.plugin,/proc/pressure -system.io_full_pressure,,"some10, some60, some300",percentage,"I/O some pressure",line,,proc.plugin,/proc/pressure -system.io_full_pressure_stall_time,,time,ms,"I/O some pressure stall time",line,,proc.plugin,/proc/pressure -system.softirqs,,a dimension per softirq,softirqs/s,System softirqs,stacked,,proc.plugin,/proc/softirqs -cpu.softirqs,cpu core,a dimension per softirq,softirqs/s,CPU softirqs,stacked,cpu,proc.plugin,/proc/softirqs -system.softnet_stat,,"processed, dropped, squeezed, received_rps, flow_limit_count",events/s,System softnet_stat,line,,proc.plugin,/proc/net/softnet_stat -cpu.softnet_stat,cpu core,"processed, dropped, squeezed, received_rps, flow_limit_count",events/s,CPU softnet_stat,line,,proc.plugin,/proc/net/softnet_stat -system.ram,,"free, used, cached, buffers",MiB,System RAM,stacked,,proc.plugin,/proc/meminfo -mem.available,,avail,MiB,Available RAM for applications,area,,proc.plugin,/proc/meminfo -system.swap,,"free, used",MiB,System Swap,stacked,,proc.plugin,/proc/meminfo -mem.hwcorrupt,,HardwareCorrupted,MiB,Corrupted Memory detected by ECC,line,,proc.plugin,/proc/meminfo -mem.commited,,Commited_AS,MiB,Committed (Allocated) Memory,area,,proc.plugin,/proc/meminfo -mem.writeback,,"Dirty, Writeback, FuseWriteback, NfsWriteback, Bounce",MiB,Writeback Memory,line,,proc.plugin,/proc/meminfo -mem.kernel,,"Slab, KernelStack, PageTables, VmallocUsed, Percpu",MiB,Memory Used by Kernel,stacked,,proc.plugin,/proc/meminfo -mem.slab,,"reclaimable, unreclaimable",MiB,Reclaimable Kernel Memory,stacked,,proc.plugin,/proc/meminfo -mem.hugepage,,"free, used, surplus, reserved",MiB,Dedicated HugePages Memory,stacked,,proc.plugin,/proc/meminfo -mem.transparent_hugepages,,"anonymous, shmem",MiB,Transparent HugePages Memory,stacked,,proc.plugin,/proc/meminfo -mem.balloon,,"inflate, deflate, migrate",KiB/s,Memory Ballooning Operations,line,,proc.plugin,/proc/vmstat -mem.zswapio,,"in, out",KiB/s,ZSwap I/O,area,,proc.plugin,/proc/vmstat -mem.ksm_cow,,"swapin, write",KiB/s,KSM Copy On Write Operations,line,,proc.plugin,/proc/vmstat -mem.thp_faults,,"alloc, fallback, fallback_charge",events/s,Transparent Huge Page Fault Allocations,line,,proc.plugin,/proc/vmstat -mem.thp_file,,"alloc, fallback, mapped, fallback_charge",events/s,Transparent Huge Page File Allocations,line,,proc.plugin,/proc/vmstat -mem.thp_zero,,"alloc, failed",events/s,Transparent Huge Zero Page Allocations,line,,proc.plugin,/proc/vmstat -mem.thp_collapse,,"alloc, failed",events/s,Transparent Huge Pages Collapsed by khugepaged,line,,proc.plugin,/proc/vmstat -mem.thp_split,,"split, failed, split_pmd, split_deferred",events/s,Transparent Huge Page Splits,line,,proc.plugin,/proc/vmstat -mem.thp_swapout,,"swapout, fallback",events/s,Transparent Huge Pages Swap Out,line,,proc.plugin,/proc/vmstat -mem.thp_compact,,"success, fail, stall",events/s,Transparent Huge Pages Compaction,line,,proc.plugin,/proc/vmstat -mem.pagetype_global,,a dimension per pagesize,B,System orders available,stacked,,proc.plugin,/proc/pagetypeinfo -mem.pagetype,"node, zone, type",a dimension per pagesize,B,"pagetype_Node{node}_{zone}_{type}",stacked,"node_id, node_zone, node_type",proc.plugin,/proc/pagetypeinfo -mem.oom_kill,,kills,kills/s,Out of Memory Kills,line,,proc.plugin,/proc/vmstat -mem.numa,,"local, foreign, interleave, other, pte_updates, huge_pte_updates, hint_faults, hint_faults_local, pages_migrated",events/s,NUMA events,line,,proc.plugin,/proc/vmstat -mem.ecc_ce,,a dimension per mem controller,errors,ECC Memory Correctable Errors,line,,proc.plugin,/sys/devices/system/edac/mc -mem.ecc_ue,,a dimension per mem controller,errors,ECC Memory Uncorrectable Errors,line,,proc.plugin,/sys/devices/system/edac/mc -mem.numa_nodes,numa node,"hit, miss, local, foreign, interleave, other",events/s,NUMA events,line,numa_node,proc.plugin,/sys/devices/system/node -mem.ksm,,"shared, unshared, sharing, volatile",MiB,Kernel Same Page Merging,stacked,,proc.plugin,/sys/kernel/mm/ksm -mem.ksm_savings,,"savings, offered",MiB,Kernel Same Page Merging Savings,area,,proc.plugin,/sys/kernel/mm/ksm -mem.ksm_ratios,,savings,percentage,Kernel Same Page Merging Effectiveness,line,,proc.plugin,/sys/kernel/mm/ksm -mem.zram_usage,zram device,"compressed, metadata",MiB,ZRAM Memory Usage,area,device,proc.plugin,/sys/block/zram -mem.zram_savings,zram device,"savings, original",MiB,ZRAM Memory Savings,area,device,proc.plugin,/sys/block/zram -mem.zram_ratio,zram device,ratio,ratio,ZRAM Compression Ratio (original to compressed),line,device,proc.plugin,/sys/block/zram -mem.zram_efficiency,zram device,percent,percentage,ZRAM Efficiency,line,device,proc.plugin,/sys/block/zram -system.ipc_semaphores,,semaphores,semaphores,IPC Semaphores,area,,proc.plugin,ipc -system.ipc_semaphore_arrays,,arrays,arrays,IPC Semaphore Arrays,area,,proc.plugin,ipc -system.message_queue_message,,a dimension per queue,messages,IPC Message Queue Number of Messages,stacked,,proc.plugin,ipc -system.message_queue_bytes,,a dimension per queue,bytes,IPC Message Queue Used Bytes,stacked,,proc.plugin,ipc -system.shared_memory_segments,,segments,segments,IPC Shared Memory Number of Segments,stacked,,proc.plugin,ipc -system.shared_memory_bytes,,bytes,bytes,IPC Shared Memory Used Bytes,stacked,,proc.plugin,ipc -system.io,,"in, out",KiB/s,Disk I/O,area,,proc.plugin,/proc/diskstats -disk.io,disk,"reads, writes",KiB/s,Disk I/O Bandwidth,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk_ext.io,disk,discards,KiB/s,Amount of Discarded Data,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.ops,disk,"reads, writes",operations/s,Disk Completed I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk_ext.ops,disk,"discards, flushes",operations/s,Disk Completed Extended I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.qops,disk,operations,operations,Disk Current I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.backlog,disk,backlog,milliseconds,Disk Backlog,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.busy,disk,busy,milliseconds,Disk Busy Time,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.util,disk,utilization,% of time working,Disk Utilization Time,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.mops,disk,"reads, writes",merged operations/s,Disk Merged Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk_ext.mops,disk,discards,merged operations/s,Disk Merged Discard Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.iotime,disk,"reads, writes",milliseconds/s,Disk Total I/O Time,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk_ext.iotime,disk,"discards, flushes",milliseconds/s,Disk Total I/O Time for Extended Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.await,disk,"reads, writes",milliseconds/operation,Average Completed I/O Operation Time,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk_ext.await,disk,"discards, flushes",milliseconds/operation,Average Completed Extended I/O Operation Time,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.avgsz,disk,"reads, writes",KiB/operation,Average Completed I/O Operation Bandwidth,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk_ext.avgsz,disk,discards,KiB/operation,Average Amount of Discarded Data,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.svctm,disk,svctm,milliseconds/operation,Average Service Time,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.bcache_cache_alloc,disk,"ununsed, dirty, clean, metadata, undefined",percentage,BCache Cache Allocations,stacked,,proc.plugin,/proc/diskstats -disk.bcache_hit_ratio,disk,"5min, 1hour, 1day, ever",percentage,BCache Cache Hit Ratio,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.bcache_rates,disk,"congested, writeback",KiB/s,BCache Rates,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.bcache_size,disk,dirty,MiB,BCache Cache Sizes,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.bcache_usage,disk,avail,percentage,BCache Cache Usage,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.bcache_cache_read_races,disk,"races, errors",operations/s,BCache Cache Read Races,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.bcache,disk,"hits, misses, collisions, readaheads",operations/s,BCache Cache I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -disk.bcache_bypass,disk,"hits, misses",operations/s,BCache Cache Bypass I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats -md.health,,a dimension per md array,failed disks,Faulty Devices In MD,line,,proc.plugin,/proc/mdstat -md.disks,md array,"inuse, down",disks,Disks Stats,stacked,"device, raid_level",proc.plugin,/proc/mdstat -md.mismatch_cnt,md array,count,unsynchronized blocks,Mismatch Count,line,"device, raid_level",proc.plugin,/proc/mdstat -md.status,md array,"check, resync, recovery, reshape",percent,Current Status,line,"device, raid_level",proc.plugin,/proc/mdstat -md.expected_time_until_operation_finish,md array,finish_in,seconds,Approximate Time Until Finish,line,"device, raid_level",proc.plugin,/proc/mdstat -md.operation_speed,md array,speed,KiB/s,Operation Speed,line,"device, raid_level",proc.plugin,/proc/mdstat -md.nonredundant,md array,available,boolean,Nonredundant Array Availability,line,"device, raid_level",proc.plugin,/proc/mdstat -system.net,,"received, sent",kilobits/s,Physical Network Interfaces Aggregated Bandwidth,area,,proc.plugin,/proc/net/dev -net.net,network device,"received, sent",kilobits/s,Bandwidth,area,"interface_type, device",proc.plugin,/proc/net/dev -net.speed,network device,speed,kilobits/s,Interface Speed,line,"interface_type, device",proc.plugin,/proc/net/dev -net.duplex,network device,"full, half, unknown",state,Interface Duplex State,line,"interface_type, device",proc.plugin,/proc/net/dev -net.operstate,network device,"up, down, notpresent, lowerlayerdown, testing, dormant, unknown",state,Interface Operational State,line,"interface_type, device",proc.plugin,/proc/net/dev -net.carrier,network device,"up, down",state,Interface Physical Link State,line,"interface_type, device",proc.plugin,/proc/net/dev -net.mtu,network device,mtu,octets,Interface MTU,line,"interface_type, device",proc.plugin,/proc/net/dev -net.packets,network device,"received, sent, multicast",packets/s,Packets,line,"interface_type, device",proc.plugin,/proc/net/dev -net.errors,network device,"inbound, outbound",errors/s,Interface Errors,line,"interface_type, device",proc.plugin,/proc/net/dev -net.drops,network device,"inbound, outbound",drops/s,Interface Drops,line,"interface_type, device",proc.plugin,/proc/net/dev -net.fifo,network device,"receive, transmit",errors,Interface FIFO Buffer Errors,line,"interface_type, device",proc.plugin,/proc/net/dev -net.compressed,network device,"received, sent",packets/s,Compressed Packets,line,"interface_type, device",proc.plugin,/proc/net/dev -net.events,network device,"frames, collisions, carrier",events/s,Network Interface Events,line,"interface_type, device",proc.plugin,/proc/net/dev -wireless.status,wireless device,status,status,Internal status reported by interface.,line,,proc.plugin,/proc/net/wireless -wireless.link_quality,wireless device,link_quality,value,"Overall quality of the link. This is an aggregate value, and depends on the driver and hardware.",line,,proc.plugin,/proc/net/wireless -wireless.signal_level,wireless device,signal_level,dBm,"The signal level is the wireless signal power level received by the wireless client. The closer the value is to 0, the stronger the signal.",line,,proc.plugin,/proc/net/wireless -wireless.noise_level,wireless device,noise_level,dBm,"The noise level indicates the amount of background noise in your environment. The closer the value to 0, the greater the noise level.",line,,proc.plugin,/proc/net/wireless -wireless.discarded_packets,wireless device,"nwid, crypt, frag, retry, misc",packets/s,"Packet discarded in the wireless adapter due to wireless specific problems.",line,,proc.plugin,/proc/net/wireless -wireless.missed_beacons,wireless device,missed_beacons,frames/s,Number of missed beacons.,line,,proc.plugin,/proc/net/wireless -ib.bytes,infiniband port,"Received, Sent",kilobits/s,Bandwidth usage,area,,proc.plugin,/sys/class/infiniband -ib.packets,infiniband port,"Received, Sent, Mcast_rcvd, Mcast_sent, Ucast_rcvd, Ucast_sent",packets/s,Packets Statistics,area,,proc.plugin,/sys/class/infiniband -ib.errors,infiniband port,"Pkts_malformated, Pkts_rcvd_discarded, Pkts_sent_discarded, Tick_Wait_to_send, Pkts_missed_resource, Buffer_overrun, Link_Downed, Link_recovered, Link_integrity_err, Link_minor_errors, Pkts_rcvd_with_EBP, Pkts_rcvd_discarded_by_switch, Pkts_sent_discarded_by_switch",errors/s,Error Counters,line,,proc.plugin,/sys/class/infiniband -ib.hwerrors,infiniband port,"Duplicated_packets, Pkt_Seq_Num_gap, Ack_timer_expired, Drop_missing_buffer, Drop_out_of_sequence, NAK_sequence_rcvd, CQE_err_Req, CQE_err_Resp, CQE_Flushed_err_Req, CQE_Flushed_err_Resp, Remote_access_err_Req, Remote_access_err_Resp, Remote_invalid_req, Local_length_err_Resp, RNR_NAK_Packets, CNP_Pkts_ignored, RoCE_ICRC_Errors",errors/s,Hardware Errors,line,,proc.plugin,/sys/class/infiniband -ib.hwpackets,infiniband port,"RoCEv2_Congestion_sent, RoCEv2_Congestion_rcvd, IB_Congestion_handled, ATOMIC_req_rcvd, Connection_req_rcvd, Read_req_rcvd, Write_req_rcvd, RoCE_retrans_adaptive, RoCE_retrans_timeout, RoCE_slow_restart, RoCE_slow_restart_congestion, RoCE_slow_restart_count",packets/s,Hardware Packets Statistics,line,,proc.plugin,/sys/class/infiniband -system.ip,,"received, sent",kilobits/s,IP Bandwidth,area,,proc.plugin,/proc/net/netstat -ip.inerrors,,"noroutes, truncated, checksum",packets/s,IP Input Errors,line,,proc.plugin,/proc/net/netstat -ip.mcast,,"received, sent",kilobits/s,IP Multicast Bandwidth,area,,proc.plugin,/proc/net/netstat -ip.bcast,,"received, sent",kilobits/s,IP Broadcast Bandwidth,area,,proc.plugin,/proc/net/netstat -ip.mcastpkts,,"received, sent",packets/s,IP Multicast Packets,line,,proc.plugin,/proc/net/netstat -ip.bcastpkts,,"received, sent",packets/s,IP Broadcast Packets,line,,proc.plugin,/proc/net/netstat -ip.ecnpkts,,"CEP, NoECTP, ECTP0, ECTP1",packets/s,IP ECN Statistics,line,,proc.plugin,/proc/net/netstat -ip.tcpmemorypressures,,pressures,events/s,TCP Memory Pressures,line,,proc.plugin,/proc/net/netstat -ip.tcpconnaborts,,"baddata, userclosed, nomemory, timeout, linger, failed",connections/s,TCP Connection Aborts,line,,proc.plugin,/proc/net/netstat -ip.tcpreorders,,"timestamp, sack, fack, reno",packets/s,TCP Reordered Packets by Detection Method,line,,proc.plugin,/proc/net/netstat -ip.tcpofo,,"inqueue, dropped, merged, pruned",packets/s,TCP Out-Of-Order Queue,line,,proc.plugin,/proc/net/netstat -ip.tcpsyncookies,,"received, sent, failed",packets/s,TCP SYN Cookies,line,,proc.plugin,/proc/net/netstat -ip.tcp_syn_queue,,"drops, cookies",packets/s,TCP SYN Queue Issues,line,,proc.plugin,/proc/net/netstat -ip.tcp_accept_queue,,"overflows, drops",packets/s,TCP Accept Queue Issues,line,,proc.plugin,/proc/net/netstat -ipv4.packets,,"received, sent, forwarded, delivered",packets/s,IPv4 Packets,line,,proc.plugin,/proc/net/netstat -ipv4.fragsout,,"ok, failed, created",packets/s,IPv4 Fragments Sent,line,,proc.plugin,/proc/net/netstat -ipv4.fragsin,,"ok, failed, all",packets/s,IPv4 Fragments Reassembly,line,,proc.plugin,/proc/net/netstat -ipv4.errors,,"InDiscards, OutDiscards, InHdrErrors, OutNoRoutes, InAddrErrors, InUnknownProtos",packets/s,IPv4 Errors,line,,proc.plugin,/proc/net/netstat -ipv4.icmp,,"received, sent",packets/s,IPv4 ICMP Packets,line,,proc.plugin,/proc/net/netstat -ipv4.icmp_errors,,"InErrors, OutErrors, InCsumErrors",packets/s,IPv4 ICMP Errors,line,,proc.plugin,/proc/net/netstat -ipv4.icmpmsg,,"InEchoReps, OutEchoReps, InDestUnreachs, OutDestUnreachs, InRedirects, OutRedirects, InEchos, OutEchos, InRouterAdvert, OutRouterAdvert, InRouterSelect, OutRouterSelect, InTimeExcds, OutTimeExcds, InParmProbs, OutParmProbs, InTimestamps, OutTimestamps, InTimestampReps, OutTimestampReps",packets/s,IPv4 ICMP Messages,line,,proc.plugin,/proc/net/netstat -ipv4.tcpsock,,connections,active connections,IPv4 TCP Connections,line,,proc.plugin,/proc/net/netstat -ipv4.tcppackets,,"received, sent",packets/s,IPv4 TCP Packets,line,,proc.plugin,/proc/net/netstat -ipv4.tcperrors,,"InErrs, InCsumErrors, RetransSegs",packets/s,IPv4 TCP Errors,line,,proc.plugin,/proc/net/netstat -ipv4.tcpopens,,"active, passive",connections/s,IPv4 TCP Opens,line,,proc.plugin,/proc/net/netstat -ipv4.tcphandshake,,"EstabResets, OutRsts, AttemptFails, SynRetrans",events/s,IPv4 TCP Handshake Issues,line,,proc.plugin,/proc/net/netstat -ipv4.udppackets,,"received, sent",packets/s,IPv4 UDP Packets,line,,proc.plugin,/proc/net/netstat -ipv4.udperrors,,"RcvbufErrors, SndbufErrors, InErrors, NoPorts, InCsumErrors, IgnoredMulti",events/s,IPv4 UDP Errors,line,,proc.plugin,/proc/net/netstat -ipv4.udplite,,"received, sent",packets/s,IPv4 UDPLite Packets,line,,proc.plugin,/proc/net/netstat -ipv4.udplite_errors,,"RcvbufErrors, SndbufErrors, InErrors, NoPorts, InCsumErrors, IgnoredMulti",packets/s,IPv4 UDPLite Errors,line,,proc.plugin,/proc/net/netstat -system.ipv6,,"received, sent",kilobits/s,IPv6 Bandwidth,area,,proc.plugin,/proc/net/netstat -system.ipv6,,"received, sent, forwarded, delivers",packets/s,IPv6 Packets,line,,proc.plugin,/proc/net/netstat -ipv6.fragsout,,"ok, failed, all",packets/s,IPv6 Fragments Sent,line,,proc.plugin,/proc/net/netstat -ipv6.fragsin,,"ok, failed, timeout, all",packets/s,IPv6 Fragments Reassembly,line,,proc.plugin,/proc/net/netstat -ipv6.errors,,"InDiscards, OutDiscards, InHdrErrors, InAddrErrors, InUnknownProtos, InTooBigErrors, InTruncatedPkts, InNoRoutes, OutNoRoutes",packets/s,IPv6 Errors,line,,proc.plugin,/proc/net/netstat -ipv6.udppackets,,"received, sent",packets/s,IPv6 UDP Packets,line,,proc.plugin,/proc/net/netstat -ipv6.udperrors,,"RcvbufErrors, SndbufErrors, InErrors, NoPorts, InCsumErrors, IgnoredMulti",events/s,IPv6 UDP Errors,line,,proc.plugin,/proc/net/netstat -ipv6.udplitepackets,,"received, sent",packets/s,IPv6 UDPlite Packets,line,,proc.plugin,/proc/net/netstat -ipv6.udpliteerrors,,"RcvbufErrors, SndbufErrors, InErrors, NoPorts, InCsumErrors",events/s,IPv6 UDP Lite Errors,line,,proc.plugin,/proc/net/netstat -ipv6.mcast,,"received, sent",kilobits/s,IPv6 Multicast Bandwidth,area,,proc.plugin,/proc/net/netstat -ipv6.bcast,,"received, sent",kilobits/s,IPv6 Broadcast Bandwidth,area,,proc.plugin,/proc/net/netstat -ipv6.mcastpkts,,"received, sent",packets/s,IPv6 Multicast Packets,line,,proc.plugin,/proc/net/netstat -ipv6.icmp,,"received, sent",messages/s,IPv6 ICMP Messages,line,,proc.plugin,/proc/net/netstat -ipv6.icmpredir,,"received, sent",redirects/s,IPv6 ICMP Redirects,line,,proc.plugin,/proc/net/netstat -ipv6.icmperrors,,"InErrors, OutErrors, InCsumErrors, InDestUnreachs, InPktTooBigs, InTimeExcds, InParmProblems, OutDestUnreachs, OutPktTooBigs, OutTimeExcds, OutParmProblems",errors/s,IPv6 ICMP Errors,line,,proc.plugin,/proc/net/netstat -ipv6.icmpechos,,"InEchos, OutEchos, InEchoReplies, OutEchoReplies",messages/s,IPv6 ICMP Echo,line,,proc.plugin,/proc/net/netstat -ipv6.groupmemb,,"InQueries, OutQueries, InResponses, OutResponses, InReductions, OutReductions",messages/s,IPv6 ICMP Group Membership,line,,proc.plugin,/proc/net/netstat -ipv6.icmprouter,,"InSolicits, OutSolicits, InAdvertisements, OutAdvertisements",messages/s,IPv6 Router Messages,line,,proc.plugin,/proc/net/netstat -ipv6.icmpneighbor,,"InSolicits, OutSolicits, InAdvertisements, OutAdvertisements",messages/s,IPv6 Neighbor Messages,line,,proc.plugin,/proc/net/netstat -ipv6.icmpmldv2,,"received, sent",reports/s,IPv6 ICMP MLDv2 Reports,line,,proc.plugin,/proc/net/netstat -ipv6.icmptypes,,"InType1, InType128, InType129, InType136, OutType1, OutType128, OutType129, OutType133, OutType135, OutType143",messages/s,IPv6 ICMP Types,line,,proc.plugin,/proc/net/netstat -ipv6.ect,,"InNoECTPkts, InECT1Pkts, InECT0Pkts, InCEPkts",packets/s,IPv6 ECT Packets,line,,proc.plugin,/proc/net/netstat -ipv6.ect,,"InNoECTPkts, InECT1Pkts, InECT0Pkts, InCEPkts",packets/s,IPv6 ECT Packets,line,,proc.plugin,/proc/net/netstat -ipv4.sockstat_sockets,,used,sockets,IPv4 Sockets Used,line,,proc.plugin,/proc/net/sockstat -ipv4.sockstat_tcp_sockets,,"alloc, orphan, inuse, timewait",sockets,IPv4 TCP Sockets,line,,proc.plugin,/proc/net/sockstat -ipv4.sockstat_tcp_mem,,mem,KiB,IPv4 TCP Sockets Memory,area,,proc.plugin,/proc/net/sockstat -ipv4.sockstat_udp_sockets,,inuse,sockets,IPv4 UDP Sockets,line,,proc.plugin,/proc/net/sockstat -ipv4.sockstat_udp_mem,,mem,sockets,IPv4 UDP Sockets Memory,line,,proc.plugin,/proc/net/sockstat -ipv4.sockstat_udplite_sockets,,inuse,sockets,IPv4 UDPLITE Sockets,line,,proc.plugin,/proc/net/sockstat -ipv4.sockstat_raw_sockets,,inuse,sockets,IPv4 RAW Sockets,line,,proc.plugin,/proc/net/sockstat -ipv4.sockstat_frag_sockets,,inuse,fragments,IPv4 FRAG Sockets,line,,proc.plugin,/proc/net/sockstat -ipv4.sockstat_frag_mem,,mem,KiB,IPv4 FRAG Sockets Memory,area,,proc.plugin,/proc/net/sockstat -ipv6.sockstat6_tcp_sockets,,inuse,sockets,IPv6 TCP Sockets,line,,proc.plugin,/proc/net/sockstat6 -ipv6.sockstat6_udp_sockets,,inuse,sockets,IPv6 UDP Sockets,line,,proc.plugin,/proc/net/sockstat6 -ipv6.sockstat6_udplite_sockets,,inuse,sockets,IPv6 UDPLITE Sockets,line,,proc.plugin,/proc/net/sockstat6 -ipv6.sockstat6_raw_sockets,,inuse,sockets,IPv6 RAW Sockets,line,,proc.plugin,/proc/net/sockstat6 -ipv6.sockstat6_frag_sockets,,inuse,fragments,IPv6 FRAG Sockets,line,,proc.plugin,/proc/net/sockstat6 -ipvs.sockets,,connections,connections/s,IPVS New Connections,line,,proc.plugin,/proc/net/ip_vs_stats -ipvs.packets,,"received, sent",packets/s,IPVS Packets,line,,proc.plugin,/proc/net/ip_vs_stats -ipvs.net,,"received, sent",kilobits/s,IPVS Bandwidth,area,,proc.plugin,/proc/net/ip_vs_stats -nfs.net,,"udp, tcp",operations/s,NFS Client Network,stacked,,proc.plugin,/proc/net/rpc/nfs -nfs.rpc,,"calls, retransmits, auth_refresh",calls/s,NFS Client Remote Procedure Calls Statistics,line,,proc.plugin,/proc/net/rpc/nfs -nfs.proc2,,a dimension per proc2 call,calls/s,NFS v2 Client Remote Procedure Calls,stacked,,proc.plugin,/proc/net/rpc/nfs -nfs.proc3,,a dimension per proc3 call,calls/s,NFS v3 Client Remote Procedure Calls,stacked,,proc.plugin,/proc/net/rpc/nfs -nfs.proc4,,a dimension per proc4 call,calls/s,NFS v4 Client Remote Procedure Calls,stacked,,proc.plugin,/proc/net/rpc/nfs -nfsd.readcache,,"hits, misses, nocache",reads/s,NFS Server Read Cache,stacked,,proc.plugin,/proc/net/rpc/nfsd -nfsd.filehandles,,stale,handles/s,NFS Server File Handles,line,,proc.plugin,/proc/net/rpc/nfsd -nfsd.io,,"read, write",kilobytes/s,NFS Server I/O,area,,proc.plugin,/proc/net/rpc/nfsd -nfsd.threads,,threads,threads,NFS Server Threads,line,,proc.plugin,/proc/net/rpc/nfsd -nfsd.net,,"udp, tcp",packets/s,NFS Server Network Statistics,line,,proc.plugin,/proc/net/rpc/nfsd -nfsd.rpc,,"calls, bad_format, bad_auth",calls/s,NFS Server Remote Procedure Calls Statistics,line,,proc.plugin,/proc/net/rpc/nfsd -nfsd.proc2,,a dimension per proc2 call,calls/s,NFS v2 Server Remote Procedure Calls,stacked,,proc.plugin,/proc/net/rpc/nfsd -nfsd.proc3,,a dimension per proc3 call,calls/s,NFS v3 Server Remote Procedure Calls,stacked,,proc.plugin,/proc/net/rpc/nfsd -nfsd.proc4,,a dimension per proc4 call,calls/s,NFS v4 Server Remote Procedure Calls,stacked,,proc.plugin,/proc/net/rpc/nfsd -nfsd.proc4ops,,a dimension per proc4 operation,operations/s,NFS v4 Server Operations,stacked,,proc.plugin,/proc/net/rpc/nfsd -sctp.established,,established,associations,SCTP current total number of established associations,line,,proc.plugin,/proc/net/sctp/snmp -sctp.transitions,,"active, passive, aborted, shutdown",transitions/s,SCTP Association Transitions,line,,proc.plugin,/proc/net/sctp/snmp -sctp.packets,,"received, sent",packets/s,SCTP Packets,line,,proc.plugin,/proc/net/sctp/snmp -sctp.packet_errors,,"invalid, checksum",packets/s,SCTP Packet Errors,line,,proc.plugin,/proc/net/sctp/snmp -sctp.fragmentation,,"reassembled, fragmented",packets/s,SCTP Fragmentation,line,,proc.plugin,/proc/net/sctp/snmp -netfilter.conntrack_sockets,,connections,active connections,Connection Tracker Connections,line,,proc.plugin,/proc/net/stat/nf_conntrack -netfilter.conntrack_new,,"new, ignore, invalid",connections/s,Connection Tracker New Connections,line,,proc.plugin,/proc/net/stat/nf_conntrack -netfilter.conntrack_changes,,"inserted, deleted, delete_list",changes/s,Connection Tracker Changes,line,,proc.plugin,/proc/net/stat/nf_conntrack -netfilter.conntrack_expect,,"created, deleted, new",expectations/s,Connection Tracker Expectations,line,,proc.plugin,/proc/net/stat/nf_conntrack -netfilter.conntrack_search,,"searched, restarted, found",searches/s,Connection Tracker Searches,line,,proc.plugin,/proc/net/stat/nf_conntrack -netfilter.conntrack_errors,,"icmp_error, error_failed, drop, early_drop",events/s,Connection Tracker Errors,line,,proc.plugin,/proc/net/stat/nf_conntrack -netfilter.synproxy_syn_received,,received,packets/s,SYNPROXY SYN Packets received,line,,proc.plugin,/proc/net/stat/synproxy -netfilter.synproxy_conn_reopened,,reopened,connections/s,SYNPROXY Connections Reopened,line,,proc.plugin,/proc/net/stat/synproxy -netfilter.synproxy_cookies,,"valid, invalid, retransmits",cookies/s,SYNPROXY TCP Cookies,line,,proc.plugin,/proc/net/stat/synproxy -zfspool.state,zfs pool,"online, degraded, faulted, offline, removed, unavail, suspended",boolean,"ZFS pool state",line,pool,proc.plugin,/proc/spl/kstat/zfs -zfs.arc_size,,"arcsz, target, min, max",MiB,"ZFS ARC Size",area,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.l2_size,,"actual, size",MiB,"ZFS L2 ARC Size",area,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.reads,,"arc, demand, prefetch, metadata, l2",reads/s,"ZFS Reads",area,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.bytes,,"read, write",KiB/s,"ZFS ARC L2 Read/Write Rate",area,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.hits,,"hits, misses",percentage,"ZFS ARC Hits",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.hits_rate,,"hits, misses",events/s,"ZFS ARC Hits Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.dhits,,"hits, misses",percentage,"ZFS Demand Hits",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.dhits_rate,,"hits, misses",events/s,"ZFS Demand Hits Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.phits,,"hits, misses",percentage,"ZFS Prefetch Hits",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.phits_rate,,"hits, misses",events/s,"ZFS Prefetch Hits Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.mhits,,"hits, misses",percentage,"ZFS Metadata Hits",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.mhits_rate,,"hits, misses",events/s,"ZFS Metadata Hits Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.l2hits,,"hits, misses",percentage,"ZFS L2 Hits",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.l2hits_rate,,"hits, misses",events/s,"ZFS L2 Hits Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.list_hits,,"mfu, mfu_ghost, mru, mru_ghost",hits/s,"ZFS List Hits",area,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.arc_size_breakdown,,"recent, frequent",percentage,"ZFS ARC Size Breakdown",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.memory_ops,,"direct, throttled, indirect",operations/s,"ZFS Memory Operations",line,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.important_ops,,"evict_skip, deleted, mutex_miss, hash_collisions",operations/s,"ZFS Important Operations",line,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.actual_hits,,"hits, misses",percentage,"ZFS Actual Cache Hits",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.actual_hits_rate,,"hits, misses",events/s,"ZFS Actual Cache Hits Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.demand_data_hits,,"hits, misses",percentage,"ZFS Data Demand Efficiency",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.demand_data_hits_rate,,"hits, misses",events/s,"ZFS Data Demand Efficiency Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.prefetch_data_hits,,"hits, misses",percentage,"ZFS Data Prefetch Efficiency",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.prefetch_data_hits_rate,,"hits, misses",events/s,"ZFS Data Prefetch Efficiency Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.hash_elements,,"current, max",elements,"ZFS ARC Hash Elements",line,,proc.plugin,/proc/spl/kstat/zfs/arcstats -zfs.hash_chains,,"current, max",chains,"ZFS ARC Hash Chains",line,,proc.plugin,/proc/spl/kstat/zfs/arcstats -btrfs.disk,btrfs filesystem,"unallocated, data_free, data_used, meta_free, meta_used, sys_free, sys_used",MiB,"BTRFS Physical Disk Allocation",stacked,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs -btrfs.data,btrfs filesystem,"free, used",MiB,"BTRFS Data Allocation",stacked,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs -btrfs.metadata,btrfs filesystem,"free, used, reserved",MiB,"BTRFS Metadata Allocation",stacked,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs -btrfs.system,btrfs filesystem,"free, used",MiB,"BTRFS System Allocation",stacked,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs -btrfs.commits,btrfs filesystem,commits,commits,"BTRFS Commits",line,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs -btrfs.commits_perc_time,btrfs filesystem,commits,percentage,"BTRFS Commits Time Share",line,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs -btrfs.commit_timings,btrfs filesystem,"last, max",ms,"BTRFS Commit Timings",line,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs -btrfs.device_errors,btrfs device,"write_errs, read_errs, flush_errs, corruption_errs, generation_errs",errors,"BTRFS Device Errors",line,"device_id, filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs -powersupply.capacity,power device,capacity,percentage,Battery capacity,line,device,proc.plugin,/sys/class/power_supply -powersupply.charge,power device,"empty_design, empty, now, full, full_design",Ah,Battery charge,line,device,proc.plugin,/sys/class/power_supply -powersupply.energy,power device,"empty_design, empty, now, full, full_design",Wh,Battery energy,line,device,proc.plugin,/sys/class/power_supply -powersupply.voltage,power device,"min_design, min, now, max, max_design",V,Power supply voltage,line,device,proc.plugin,/sys/class/power_supply
\ No newline at end of file diff --git a/collectors/proc.plugin/plugin_proc.c b/collectors/proc.plugin/plugin_proc.c index c1a3293f..fbcaa614 100644 --- a/collectors/proc.plugin/plugin_proc.c +++ b/collectors/proc.plugin/plugin_proc.c @@ -33,7 +33,8 @@ static struct proc_module { {.name = "/proc/meminfo", .dim = "meminfo", .func = do_proc_meminfo}, {.name = "/sys/kernel/mm/ksm", .dim = "ksm", .func = do_sys_kernel_mm_ksm}, {.name = "/sys/block/zram", .dim = "zram", .func = do_sys_block_zram}, - {.name = "/sys/devices/system/edac/mc", .dim = "ecc", .func = do_proc_sys_devices_system_edac_mc}, + {.name = "/sys/devices/system/edac/mc", .dim = "edac", .func = do_proc_sys_devices_system_edac_mc}, + {.name = "/sys/devices/pci/aer", .dim = "pci_aer", .func = do_proc_sys_devices_pci_aer}, {.name = "/sys/devices/system/node", .dim = "numa", .func = do_proc_sys_devices_system_node}, {.name = "/proc/pagetypeinfo", .dim = "pagetypeinfo", .func = do_proc_pagetypeinfo}, @@ -69,8 +70,11 @@ static struct proc_module { // IPC metrics {.name = "ipc", .dim = "ipc", .func = do_ipc}, - {.name = "/sys/class/power_supply", .dim = "power_supply", .func = do_sys_class_power_supply}, // linux power supply metrics + {.name = "/sys/class/power_supply", .dim = "power_supply", .func = do_sys_class_power_supply}, + + // GPU metrics + {.name = "/sys/class/drm", .dim = "drm", .func = do_sys_class_drm}, // the terminator of this array {.name = NULL, .dim = NULL, .func = NULL} diff --git a/collectors/proc.plugin/plugin_proc.h b/collectors/proc.plugin/plugin_proc.h index 2b2cabca..a90f4838 100644 --- a/collectors/proc.plugin/plugin_proc.h +++ b/collectors/proc.plugin/plugin_proc.h @@ -34,6 +34,7 @@ int do_proc_net_stat_synproxy(int update_every, usec_t dt); int do_proc_net_softnet_stat(int update_every, usec_t dt); int do_proc_uptime(int update_every, usec_t dt); int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt); +int do_proc_sys_devices_pci_aer(int update_every, usec_t dt); int do_proc_sys_devices_system_node(int update_every, usec_t dt); int do_proc_spl_kstat_zfs_arcstats(int update_every, usec_t dt); int do_proc_spl_kstat_zfs_pool_state(int update_every, usec_t dt); @@ -45,6 +46,7 @@ int do_ipc(int update_every, usec_t dt); int do_sys_class_power_supply(int update_every, usec_t dt); int do_proc_pagetypeinfo(int update_every, usec_t dt); int do_sys_class_infiniband(int update_every, usec_t dt); +int do_sys_class_drm(int update_every, usec_t dt); int get_numa_node_count(void); // metrics that need to be shared among data collectors diff --git a/collectors/proc.plugin/proc_meminfo.c b/collectors/proc.plugin/proc_meminfo.c index 6988c70e..cd1ba872 100644 --- a/collectors/proc.plugin/proc_meminfo.c +++ b/collectors/proc.plugin/proc_meminfo.c @@ -9,58 +9,92 @@ int do_proc_meminfo(int update_every, usec_t dt) { (void)dt; static procfile *ff = NULL; - static int do_ram = -1, do_swap = -1, do_hwcorrupt = -1, do_committed = -1, do_writeback = -1, do_kernel = -1, do_slab = -1, do_hugepages = -1, do_transparent_hugepages = -1; - static int do_percpu = 0; + static int do_ram = -1 + , do_swap = -1 + , do_hwcorrupt = -1 + , do_committed = -1 + , do_writeback = -1 + , do_kernel = -1 + , do_slab = -1 + , do_hugepages = -1 + , do_transparent_hugepages = -1 + , do_reclaiming = -1 + , do_high_low = -1 + , do_cma = -1 + , do_directmap = -1; static ARL_BASE *arl_base = NULL; - static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL; + static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL, *arl_hugepages_total = NULL, + *arl_zswapped = NULL, *arl_high_low = NULL, *arl_cma_total = NULL, + *arl_directmap4k = NULL, *arl_directmap2m = NULL, *arl_directmap4m = NULL, *arl_directmap1g = NULL; static unsigned long long - MemTotal = 0, - MemFree = 0, - MemAvailable = 0, - Buffers = 0, - Cached = 0, - //SwapCached = 0, - //Active = 0, - //Inactive = 0, - //ActiveAnon = 0, - //InactiveAnon = 0, - //ActiveFile = 0, - //InactiveFile = 0, - //Unevictable = 0, - //Mlocked = 0, - SwapTotal = 0, - SwapFree = 0, - Dirty = 0, - Writeback = 0, - //AnonPages = 0, - //Mapped = 0, - Shmem = 0, - Slab = 0, - SReclaimable = 0, - SUnreclaim = 0, - KernelStack = 0, - PageTables = 0, - NFS_Unstable = 0, - Bounce = 0, - WritebackTmp = 0, - //CommitLimit = 0, - Committed_AS = 0, - //VmallocTotal = 0, - VmallocUsed = 0, - //VmallocChunk = 0, - Percpu = 0, - AnonHugePages = 0, - ShmemHugePages = 0, - HugePages_Total = 0, - HugePages_Free = 0, - HugePages_Rsvd = 0, - HugePages_Surp = 0, - Hugepagesize = 0, - //DirectMap4k = 0, - //DirectMap2M = 0, - HardwareCorrupted = 0; + MemTotal = 0 + , MemFree = 0 + , MemAvailable = 0 + , Buffers = 0 + , Cached = 0 + , SwapCached = 0 + , Active = 0 + , Inactive = 0 + , ActiveAnon = 0 + , InactiveAnon = 0 + , ActiveFile = 0 + , InactiveFile = 0 + , Unevictable = 0 + , Mlocked = 0 + , HighTotal = 0 + , HighFree = 0 + , LowTotal = 0 + , LowFree = 0 + , MmapCopy = 0 + , SwapTotal = 0 + , SwapFree = 0 + , Zswap = 0 + , Zswapped = 0 + , Dirty = 0 + , Writeback = 0 + , AnonPages = 0 + , Mapped = 0 + , Shmem = 0 + , KReclaimable = 0 + , Slab = 0 + , SReclaimable = 0 + , SUnreclaim = 0 + , KernelStack = 0 + , ShadowCallStack = 0 + , PageTables = 0 + , SecPageTables = 0 + , NFS_Unstable = 0 + , Bounce = 0 + , WritebackTmp = 0 + , CommitLimit = 0 + , Committed_AS = 0 + , VmallocTotal = 0 + , VmallocUsed = 0 + , VmallocChunk = 0 + , Percpu = 0 + //, EarlyMemtestBad = 0 + , HardwareCorrupted = 0 + , AnonHugePages = 0 + , ShmemHugePages = 0 + , ShmemPmdMapped = 0 + , FileHugePages = 0 + , FilePmdMapped = 0 + , CmaTotal = 0 + , CmaFree = 0 + //, Unaccepted = 0 + , HugePages_Total = 0 + , HugePages_Free = 0 + , HugePages_Rsvd = 0 + , HugePages_Surp = 0 + , Hugepagesize = 0 + //, Hugetlb = 0 + , DirectMap4k = 0 + , DirectMap2M = 0 + , DirectMap4M = 0 + , DirectMap1G = 0 + ; if(unlikely(!arl_base)) { do_ram = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "system ram", 1); @@ -72,6 +106,12 @@ int do_proc_meminfo(int update_every, usec_t dt) { do_slab = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "slab memory", 1); do_hugepages = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "hugepages", CONFIG_BOOLEAN_AUTO); do_transparent_hugepages = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "transparent hugepages", CONFIG_BOOLEAN_AUTO); + do_reclaiming = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "memory reclaiming", CONFIG_BOOLEAN_AUTO); + do_high_low = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "high low memory", CONFIG_BOOLEAN_AUTO); + do_cma = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "cma memory", CONFIG_BOOLEAN_AUTO); + do_directmap = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "direct maps", CONFIG_BOOLEAN_AUTO); + + // https://github.com/torvalds/linux/blob/master/fs/proc/meminfo.c arl_base = arl_create("meminfo", NULL, 60); arl_expect(arl_base, "MemTotal", &MemTotal); @@ -79,46 +119,90 @@ int do_proc_meminfo(int update_every, usec_t dt) { arl_memavailable = arl_expect(arl_base, "MemAvailable", &MemAvailable); arl_expect(arl_base, "Buffers", &Buffers); arl_expect(arl_base, "Cached", &Cached); - //arl_expect(arl_base, "SwapCached", &SwapCached); - //arl_expect(arl_base, "Active", &Active); - //arl_expect(arl_base, "Inactive", &Inactive); - //arl_expect(arl_base, "ActiveAnon", &ActiveAnon); - //arl_expect(arl_base, "InactiveAnon", &InactiveAnon); - //arl_expect(arl_base, "ActiveFile", &ActiveFile); - //arl_expect(arl_base, "InactiveFile", &InactiveFile); - //arl_expect(arl_base, "Unevictable", &Unevictable); - //arl_expect(arl_base, "Mlocked", &Mlocked); + arl_expect(arl_base, "SwapCached", &SwapCached); + arl_expect(arl_base, "Active", &Active); + arl_expect(arl_base, "Inactive", &Inactive); + arl_expect(arl_base, "Active(anon)", &ActiveAnon); + arl_expect(arl_base, "Inactive(anon)", &InactiveAnon); + arl_expect(arl_base, "Active(file)", &ActiveFile); + arl_expect(arl_base, "Inactive(file)", &InactiveFile); + arl_expect(arl_base, "Unevictable", &Unevictable); + arl_expect(arl_base, "Mlocked", &Mlocked); + + // CONFIG_HIGHMEM + arl_high_low = arl_expect(arl_base, "HighTotal", &HighTotal); + arl_expect(arl_base, "HighFree", &HighFree); + arl_expect(arl_base, "LowTotal", &LowTotal); + arl_expect(arl_base, "LowFree", &LowFree); + + // CONFIG_MMU + arl_expect(arl_base, "MmapCopy", &MmapCopy); + arl_expect(arl_base, "SwapTotal", &SwapTotal); arl_expect(arl_base, "SwapFree", &SwapFree); + + // CONFIG_ZSWAP + arl_zswapped = arl_expect(arl_base, "Zswap", &Zswap); + arl_expect(arl_base, "Zswapped", &Zswapped); + arl_expect(arl_base, "Dirty", &Dirty); arl_expect(arl_base, "Writeback", &Writeback); - //arl_expect(arl_base, "AnonPages", &AnonPages); - //arl_expect(arl_base, "Mapped", &Mapped); + arl_expect(arl_base, "AnonPages", &AnonPages); + arl_expect(arl_base, "Mapped", &Mapped); arl_expect(arl_base, "Shmem", &Shmem); + arl_expect(arl_base, "KReclaimable", &KReclaimable); arl_expect(arl_base, "Slab", &Slab); arl_expect(arl_base, "SReclaimable", &SReclaimable); arl_expect(arl_base, "SUnreclaim", &SUnreclaim); arl_expect(arl_base, "KernelStack", &KernelStack); + + // CONFIG_SHADOW_CALL_STACK + arl_expect(arl_base, "ShadowCallStack", &ShadowCallStack); + arl_expect(arl_base, "PageTables", &PageTables); + arl_expect(arl_base, "SecPageTables", &SecPageTables); arl_expect(arl_base, "NFS_Unstable", &NFS_Unstable); arl_expect(arl_base, "Bounce", &Bounce); arl_expect(arl_base, "WritebackTmp", &WritebackTmp); - //arl_expect(arl_base, "CommitLimit", &CommitLimit); + arl_expect(arl_base, "CommitLimit", &CommitLimit); arl_expect(arl_base, "Committed_AS", &Committed_AS); - //arl_expect(arl_base, "VmallocTotal", &VmallocTotal); + arl_expect(arl_base, "VmallocTotal", &VmallocTotal); arl_expect(arl_base, "VmallocUsed", &VmallocUsed); - //arl_expect(arl_base, "VmallocChunk", &VmallocChunk); + arl_expect(arl_base, "VmallocChunk", &VmallocChunk); arl_expect(arl_base, "Percpu", &Percpu); + + // CONFIG_MEMTEST + //arl_expect(arl_base, "EarlyMemtestBad", &EarlyMemtestBad); + + // CONFIG_MEMORY_FAILURE arl_hwcorrupted = arl_expect(arl_base, "HardwareCorrupted", &HardwareCorrupted); + + // CONFIG_TRANSPARENT_HUGEPAGE arl_expect(arl_base, "AnonHugePages", &AnonHugePages); arl_expect(arl_base, "ShmemHugePages", &ShmemHugePages); - arl_expect(arl_base, "HugePages_Total", &HugePages_Total); + arl_expect(arl_base, "ShmemPmdMapped", &ShmemPmdMapped); + arl_expect(arl_base, "FileHugePages", &FileHugePages); + arl_expect(arl_base, "FilePmdMapped", &FilePmdMapped); + + // CONFIG_CMA + arl_cma_total = arl_expect(arl_base, "CmaTotal", &CmaTotal); + arl_expect(arl_base, "CmaFree", &CmaFree); + + // CONFIG_UNACCEPTED_MEMORY + //arl_expect(arl_base, "Unaccepted", &Unaccepted); + + // these appear only when hugepages are supported + arl_hugepages_total = arl_expect(arl_base, "HugePages_Total", &HugePages_Total); arl_expect(arl_base, "HugePages_Free", &HugePages_Free); arl_expect(arl_base, "HugePages_Rsvd", &HugePages_Rsvd); arl_expect(arl_base, "HugePages_Surp", &HugePages_Surp); arl_expect(arl_base, "Hugepagesize", &Hugepagesize); - //arl_expect(arl_base, "DirectMap4k", &DirectMap4k); - //arl_expect(arl_base, "DirectMap2M", &DirectMap2M); + //arl_expect(arl_base, "Hugetlb", &Hugetlb); + + arl_directmap4k = arl_expect(arl_base, "DirectMap4k", &DirectMap4k); + arl_directmap2m = arl_expect(arl_base, "DirectMap2M", &DirectMap2M); + arl_directmap4m = arl_expect(arl_base, "DirectMap4M", &DirectMap4M); + arl_directmap1g = arl_expect(arl_base, "DirectMap1G", &DirectMap1G); } if(unlikely(!ff)) { @@ -136,26 +220,17 @@ int do_proc_meminfo(int update_every, usec_t dt) { size_t lines = procfile_lines(ff), l; arl_begin(arl_base); - - static int first_ff_read = 1; - for(l = 0; l < lines ;l++) { size_t words = procfile_linewords(ff, l); if(unlikely(words < 2)) continue; - if (first_ff_read && !strcmp(procfile_lineword(ff, l, 0), "Percpu")) - do_percpu = 1; - if(unlikely(arl_check(arl_base, procfile_lineword(ff, l, 0), procfile_lineword(ff, l, 1)))) break; } - if (first_ff_read) - first_ff_read = 0; - // http://calimeroteknik.free.fr/blag/?article20/really-used-memory-on-gnu-linux - unsigned long long MemCached = Cached + SReclaimable - Shmem; + unsigned long long MemCached = Cached + SReclaimable + KReclaimable - Shmem; unsigned long long MemUsed = MemTotal - MemFree - MemCached - Buffers; // The Linux kernel doesn't report ZFS ARC usage as cache memory (the ARC is included in the total used system memory) if (!inside_lxc_container) { @@ -207,7 +282,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { "mem" , "available" , NULL - , "system" + , "overview" , NULL , "Available RAM for applications" , "MiB" @@ -238,7 +313,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { if(unlikely(!st_system_swap)) { st_system_swap = rrdset_create_localhost( - "system" + "mem" , "swap" , NULL , "swap" @@ -247,7 +322,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { , "MiB" , PLUGIN_PROC_NAME , PLUGIN_PROC_MODULE_MEMINFO_NAME - , NETDATA_CHART_PRIO_SYSTEM_SWAP + , NETDATA_CHART_PRIO_MEM_SWAP , update_every , RRDSET_TYPE_STACKED ); @@ -261,6 +336,62 @@ int do_proc_meminfo(int update_every, usec_t dt) { rrddim_set_by_pointer(st_system_swap, rd_used, SwapUsed); rrddim_set_by_pointer(st_system_swap, rd_free, SwapFree); rrdset_done(st_system_swap); + + { + static RRDSET *st_mem_swap_cached = NULL; + static RRDDIM *rd_cached = NULL; + + if (unlikely(!st_mem_swap_cached)) { + st_mem_swap_cached = rrdset_create_localhost( + "mem" + , "swap_cached" + , NULL + , "swap" + , NULL + , "Swap Memory Cached in RAM" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_SWAP + 1 + , update_every + , RRDSET_TYPE_AREA + ); + + rd_cached = rrddim_add(st_mem_swap_cached, "cached", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_swap_cached, rd_cached, SwapCached); + rrdset_done(st_mem_swap_cached); + } + + if(arl_zswapped->flags & ARL_ENTRY_FLAG_FOUND) { + static RRDSET *st_mem_zswap = NULL; + static RRDDIM *rd_zswap = NULL, *rd_zswapped = NULL; + + if (unlikely(!st_mem_zswap)) { + st_mem_zswap = rrdset_create_localhost( + "mem" + , "zswap" + , NULL + , "zswap" + , NULL + , "Zswap Usage" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_ZSWAP + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_zswap = rrddim_add(st_mem_zswap, "zswap", "in-ram", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_zswapped = rrddim_add(st_mem_zswap, "zswapped", "on-disk", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_zswap, rd_zswap, Zswap); + rrddim_set_by_pointer(st_mem_zswap, rd_zswapped, Zswapped); + rrdset_done(st_mem_zswap); + } } if(arl_hwcorrupted->flags & ARL_ENTRY_FLAG_FOUND && @@ -306,7 +437,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { "mem" , "committed" , NULL - , "system" + , "overview" , NULL , "Committed (Allocated) Memory" , "MiB" @@ -335,7 +466,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { "mem" , "writeback" , NULL - , "kernel" + , "writeback" , NULL , "Writeback Memory" , "MiB" @@ -367,7 +498,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { if(do_kernel) { static RRDSET *st_mem_kernel = NULL; static RRDDIM *rd_slab = NULL, *rd_kernelstack = NULL, *rd_pagetables = NULL, *rd_vmallocused = NULL, - *rd_percpu = NULL; + *rd_percpu = NULL, *rd_kreclaimable = NULL; if(unlikely(!st_mem_kernel)) { st_mem_kernel = rrdset_create_localhost( @@ -391,16 +522,16 @@ int do_proc_meminfo(int update_every, usec_t dt) { rd_kernelstack = rrddim_add(st_mem_kernel, "KernelStack", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); rd_pagetables = rrddim_add(st_mem_kernel, "PageTables", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); rd_vmallocused = rrddim_add(st_mem_kernel, "VmallocUsed", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); - if (do_percpu) - rd_percpu = rrddim_add(st_mem_kernel, "Percpu", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_percpu = rrddim_add(st_mem_kernel, "Percpu", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_kreclaimable = rrddim_add(st_mem_kernel, "KReclaimable", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); } - rrddim_set_by_pointer(st_mem_kernel, rd_slab, Slab); - rrddim_set_by_pointer(st_mem_kernel, rd_kernelstack, KernelStack); - rrddim_set_by_pointer(st_mem_kernel, rd_pagetables, PageTables); - rrddim_set_by_pointer(st_mem_kernel, rd_vmallocused, VmallocUsed); - if (do_percpu) - rrddim_set_by_pointer(st_mem_kernel, rd_percpu, Percpu); + rrddim_set_by_pointer(st_mem_kernel, rd_slab, Slab); + rrddim_set_by_pointer(st_mem_kernel, rd_kernelstack, KernelStack); + rrddim_set_by_pointer(st_mem_kernel, rd_pagetables, PageTables); + rrddim_set_by_pointer(st_mem_kernel, rd_vmallocused, VmallocUsed); + rrddim_set_by_pointer(st_mem_kernel, rd_percpu, Percpu); + rrddim_set_by_pointer(st_mem_kernel, rd_kreclaimable, KReclaimable); rrdset_done(st_mem_kernel); } @@ -436,9 +567,10 @@ int do_proc_meminfo(int update_every, usec_t dt) { rrdset_done(st_mem_slab); } - if(do_hugepages == CONFIG_BOOLEAN_YES || (do_hugepages == CONFIG_BOOLEAN_AUTO && + if(arl_hugepages_total->flags & ARL_ENTRY_FLAG_FOUND && + (do_hugepages == CONFIG_BOOLEAN_YES || (do_hugepages == CONFIG_BOOLEAN_AUTO && ((Hugepagesize && HugePages_Total) || - netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)))) { do_hugepages = CONFIG_BOOLEAN_YES; static RRDSET *st_mem_hugepages = NULL; @@ -455,7 +587,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { , "MiB" , PLUGIN_PROC_NAME , PLUGIN_PROC_MODULE_MEMINFO_NAME - , NETDATA_CHART_PRIO_MEM_HUGEPAGES + 1 + , NETDATA_CHART_PRIO_MEM_HUGEPAGES , update_every , RRDSET_TYPE_STACKED ); @@ -487,7 +619,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { if(unlikely(!st_mem_transparent_hugepages)) { st_mem_transparent_hugepages = rrdset_create_localhost( "mem" - , "transparent_hugepages" + , "thp" , NULL , "hugepages" , NULL @@ -495,7 +627,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { , "MiB" , PLUGIN_PROC_NAME , PLUGIN_PROC_MODULE_MEMINFO_NAME - , NETDATA_CHART_PRIO_MEM_HUGEPAGES + , NETDATA_CHART_PRIO_MEM_HUGEPAGES + 1 , update_every , RRDSET_TYPE_STACKED ); @@ -509,6 +641,206 @@ int do_proc_meminfo(int update_every, usec_t dt) { rrddim_set_by_pointer(st_mem_transparent_hugepages, rd_anonymous, AnonHugePages); rrddim_set_by_pointer(st_mem_transparent_hugepages, rd_shared, ShmemHugePages); rrdset_done(st_mem_transparent_hugepages); + + { + static RRDSET *st_mem_thp_details = NULL; + static RRDDIM *rd_shmem_pmd_mapped = NULL, *rd_file_huge_pages = NULL, *rd_file_pmd_mapped = NULL; + + if(unlikely(!st_mem_thp_details)) { + st_mem_thp_details = rrdset_create_localhost( + "mem" + , "thp_details" + , NULL + , "hugepages" + , NULL + , "Details of Transparent HugePages Usage" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_HUGEPAGES_DETAILS + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st_mem_thp_details, RRDSET_FLAG_DETAIL); + + rd_shmem_pmd_mapped = rrddim_add(st_mem_thp_details, "shmem_pmd", "ShmemPmdMapped", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_file_huge_pages = rrddim_add(st_mem_thp_details, "file", "FileHugePages", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_file_pmd_mapped = rrddim_add(st_mem_thp_details, "file_pmd", "FilePmdMapped", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_thp_details, rd_shmem_pmd_mapped, ShmemPmdMapped); + rrddim_set_by_pointer(st_mem_thp_details, rd_file_huge_pages, FileHugePages); + rrddim_set_by_pointer(st_mem_thp_details, rd_file_pmd_mapped, FilePmdMapped); + rrdset_done(st_mem_thp_details); + } + } + + if(do_reclaiming != CONFIG_BOOLEAN_NO) { + static RRDSET *st_mem_reclaiming = NULL; + static RRDDIM *rd_active = NULL, *rd_inactive = NULL, + *rd_active_anon = NULL, *rd_inactive_anon = NULL, + *rd_active_file = NULL, *rd_inactive_file = NULL, + *rd_unevictable = NULL, *rd_mlocked = NULL; + + if(unlikely(!st_mem_reclaiming)) { + st_mem_reclaiming = rrdset_create_localhost( + "mem" + , "reclaiming" + , NULL + , "reclaiming" + , NULL + , "Memory Reclaiming" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_RECLAIMING + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_flag_set(st_mem_reclaiming, RRDSET_FLAG_DETAIL); + + rd_active = rrddim_add(st_mem_reclaiming, "active", "Active", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_inactive = rrddim_add(st_mem_reclaiming, "inactive", "Inactive", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_active_anon = rrddim_add(st_mem_reclaiming, "active_anon", "Active(anon)", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_inactive_anon = rrddim_add(st_mem_reclaiming, "inactive_anon", "Inactive(anon)", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_active_file = rrddim_add(st_mem_reclaiming, "active_file", "Active(file)", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_inactive_file = rrddim_add(st_mem_reclaiming, "inactive_file", "Inactive(file)", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_unevictable = rrddim_add(st_mem_reclaiming, "unevictable", "Unevictable", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_mlocked = rrddim_add(st_mem_reclaiming, "mlocked", "Mlocked", 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_reclaiming, rd_active, Active); + rrddim_set_by_pointer(st_mem_reclaiming, rd_inactive, Inactive); + rrddim_set_by_pointer(st_mem_reclaiming, rd_active_anon, ActiveAnon); + rrddim_set_by_pointer(st_mem_reclaiming, rd_inactive_anon, InactiveAnon); + rrddim_set_by_pointer(st_mem_reclaiming, rd_active_file, ActiveFile); + rrddim_set_by_pointer(st_mem_reclaiming, rd_inactive_file, InactiveFile); + rrddim_set_by_pointer(st_mem_reclaiming, rd_unevictable, Unevictable); + rrddim_set_by_pointer(st_mem_reclaiming, rd_mlocked, Mlocked); + + rrdset_done(st_mem_reclaiming); + } + + if(do_high_low != CONFIG_BOOLEAN_NO && (arl_high_low->flags & ARL_ENTRY_FLAG_FOUND)) { + static RRDSET *st_mem_high_low = NULL; + static RRDDIM *rd_high_used = NULL, *rd_low_used = NULL; + static RRDDIM *rd_high_free = NULL, *rd_low_free = NULL; + + if(unlikely(!st_mem_high_low)) { + st_mem_high_low = rrdset_create_localhost( + "mem" + , "high_low" + , NULL + , "high_low" + , NULL + , "High and Low Used and Free Memory Areas" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_HIGH_LOW + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_flag_set(st_mem_high_low, RRDSET_FLAG_DETAIL); + + rd_high_used = rrddim_add(st_mem_high_low, "high_used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_low_used = rrddim_add(st_mem_high_low, "low_used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_high_free = rrddim_add(st_mem_high_low, "high_free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_low_free = rrddim_add(st_mem_high_low, "low_free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_high_low, rd_high_used, HighTotal - HighFree); + rrddim_set_by_pointer(st_mem_high_low, rd_low_used, LowTotal - LowFree); + rrddim_set_by_pointer(st_mem_high_low, rd_high_free, HighFree); + rrddim_set_by_pointer(st_mem_high_low, rd_low_free, LowFree); + rrdset_done(st_mem_high_low); + } + + if(do_cma == CONFIG_BOOLEAN_YES || (do_cma == CONFIG_BOOLEAN_AUTO && (arl_cma_total->flags & ARL_ENTRY_FLAG_FOUND) && CmaTotal)) { + do_cma = CONFIG_BOOLEAN_YES; + + static RRDSET *st_mem_cma = NULL; + static RRDDIM *rd_used = NULL, *rd_free = NULL; + + if(unlikely(!st_mem_cma)) { + st_mem_cma = rrdset_create_localhost( + "mem" + , "cma" + , NULL + , "cma" + , NULL + , "Contiguous Memory Allocator (CMA) Memory" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_CMA + , update_every + , RRDSET_TYPE_STACKED + ); + + rd_used = rrddim_add(st_mem_cma, "used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rd_free = rrddim_add(st_mem_cma, "free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem_cma, rd_used, CmaTotal - CmaFree); + rrddim_set_by_pointer(st_mem_cma, rd_free, CmaFree); + rrdset_done(st_mem_cma); + } + + if(do_directmap != CONFIG_BOOLEAN_NO && + ((arl_directmap4k->flags & ARL_ENTRY_FLAG_FOUND) || + (arl_directmap2m->flags & ARL_ENTRY_FLAG_FOUND) || + (arl_directmap4m->flags & ARL_ENTRY_FLAG_FOUND) || + (arl_directmap1g->flags & ARL_ENTRY_FLAG_FOUND))) + { + static RRDSET *st_mem_directmap = NULL; + static RRDDIM *rd_4k = NULL, *rd_2m = NULL, *rd_1g = NULL, *rd_4m = NULL; + + if(unlikely(!st_mem_directmap)) { + st_mem_directmap = rrdset_create_localhost( + "mem" + , "directmaps" + , NULL + , "overview" + , NULL + , "Direct Memory Mappings" + , "MiB" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_MEMINFO_NAME + , NETDATA_CHART_PRIO_MEM_DIRECTMAP + , update_every + , RRDSET_TYPE_STACKED + ); + + if(arl_directmap4k->flags & ARL_ENTRY_FLAG_FOUND) + rd_4k = rrddim_add(st_mem_directmap, "4k", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + + if(arl_directmap2m->flags & ARL_ENTRY_FLAG_FOUND) + rd_2m = rrddim_add(st_mem_directmap, "2m", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + + if(arl_directmap4m->flags & ARL_ENTRY_FLAG_FOUND) + rd_4m = rrddim_add(st_mem_directmap, "4m", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + + if(arl_directmap1g->flags & ARL_ENTRY_FLAG_FOUND) + rd_1g = rrddim_add(st_mem_directmap, "1g", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + if(rd_4k) + rrddim_set_by_pointer(st_mem_directmap, rd_4k, DirectMap4k); + + if(rd_2m) + rrddim_set_by_pointer(st_mem_directmap, rd_2m, DirectMap2M); + + if(rd_4m) + rrddim_set_by_pointer(st_mem_directmap, rd_4m, DirectMap4M); + + if(rd_1g) + rrddim_set_by_pointer(st_mem_directmap, rd_1g, DirectMap1G); + + rrdset_done(st_mem_directmap); } return 0; diff --git a/collectors/proc.plugin/proc_pressure.c b/collectors/proc.plugin/proc_pressure.c index 28e4c592..4037e60a 100644 --- a/collectors/proc.plugin/proc_pressure.c +++ b/collectors/proc.plugin/proc_pressure.c @@ -12,28 +12,55 @@ static int pressure_update_every = 0; static struct pressure resources[PRESSURE_NUM_RESOURCES] = { { - .some = - {.share_time = {.id = "cpu_some_pressure", .title = "CPU some pressure"}, - .total_time = {.id = "cpu_some_pressure_stall_time", .title = "CPU some pressure stall time"}}, - .full = - {.share_time = {.id = "cpu_full_pressure", .title = "CPU full pressure"}, - .total_time = {.id = "cpu_full_pressure_stall_time", .title = "CPU full pressure stall time"}}, + .some = { + .available = true, + .share_time = {.id = "cpu_some_pressure", .title = "CPU some pressure"}, + .total_time = {.id = "cpu_some_pressure_stall_time", .title = "CPU some pressure stall time"} + }, + .full = { + // Disable CPU full pressure. + // See https://github.com/torvalds/linux/commit/890d550d7dbac7a31ecaa78732aa22be282bb6b8 + .available = false, + .share_time = {.id = "cpu_full_pressure", .title = "CPU full pressure"}, + .total_time = {.id = "cpu_full_pressure_stall_time", .title = "CPU full pressure stall time"} + }, }, { - .some = - {.share_time = {.id = "memory_some_pressure", .title = "Memory some pressure"}, - .total_time = {.id = "memory_some_pressure_stall_time", .title = "Memory some pressure stall time"}}, - .full = - {.share_time = {.id = "memory_full_pressure", .title = "Memory full pressure"}, - .total_time = {.id = "memory_full_pressure_stall_time", .title = "Memory full pressure stall time"}}, + .some = { + .available = true, + .share_time = {.id = "memory_some_pressure", .title = "Memory some pressure"}, + .total_time = {.id = "memory_some_pressure_stall_time", .title = "Memory some pressure stall time"} + }, + .full = { + .available = true, + .share_time = {.id = "memory_full_pressure", .title = "Memory full pressure"}, + .total_time = {.id = "memory_full_pressure_stall_time", .title = "Memory full pressure stall time"} + }, }, { - .some = - {.share_time = {.id = "io_some_pressure", .title = "I/O some pressure"}, - .total_time = {.id = "io_some_pressure_stall_time", .title = "I/O some pressure stall time"}}, - .full = - {.share_time = {.id = "io_full_pressure", .title = "I/O full pressure"}, - .total_time = {.id = "io_full_pressure_stall_time", .title = "I/O full pressure stall time"}}, + .some = { + .available = true, + .share_time = {.id = "io_some_pressure", .title = "I/O some pressure"}, + .total_time = {.id = "io_some_pressure_stall_time", .title = "I/O some pressure stall time"} + }, + .full = { + .available = true, + .share_time = {.id = "io_full_pressure", .title = "I/O full pressure"}, + .total_time = {.id = "io_full_pressure_stall_time", .title = "I/O full pressure stall time"} + }, + }, + { + .some = { + // this is not available + .available = false, + .share_time = {.id = "irq_some_pressure", .title = "IRQ some pressure"}, + .total_time = {.id = "irq_some_pressure_stall_time", .title = "IRQ some pressure stall time"} + }, + .full = { + .available = true, + .share_time = {.id = "irq_full_pressure", .title = "IRQ full pressure"}, + .total_time = {.id = "irq_full_pressure_stall_time", .title = "IRQ full pressure stall time"} + }, }, }; @@ -46,6 +73,7 @@ static struct resource_info { { .name = "cpu", .family = "cpu", .section_priority = NETDATA_CHART_PRIO_SYSTEM_CPU }, { .name = "memory", .family = "ram", .section_priority = NETDATA_CHART_PRIO_SYSTEM_RAM }, { .name = "io", .family = "disk", .section_priority = NETDATA_CHART_PRIO_SYSTEM_IO }, + { .name = "irq", .family = "interrupts", .section_priority = NETDATA_CHART_PRIO_SYSTEM_INTERRUPTS }, }; void update_pressure_charts(struct pressure_charts *pcs) { @@ -65,7 +93,7 @@ void update_pressure_charts(struct pressure_charts *pcs) { } } -static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) { +static void proc_pressure_do_resource(procfile *ff, int res_idx, size_t line, bool some) { struct pressure_charts *pcs; struct resource_info ri; pcs = some ? &resources[res_idx].some : &resources[res_idx].full; @@ -93,9 +121,9 @@ static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) { rrddim_add(pcs->share_time.st, some ? "some 300" : "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); } - pcs->share_time.value10 = strtod(procfile_lineword(ff, some ? 0 : 1, 2), NULL); - pcs->share_time.value60 = strtod(procfile_lineword(ff, some ? 0 : 1, 4), NULL); - pcs->share_time.value300 = strtod(procfile_lineword(ff, some ? 0 : 1, 6), NULL); + pcs->share_time.value10 = strtod(procfile_lineword(ff, line, 2), NULL); + pcs->share_time.value60 = strtod(procfile_lineword(ff, line, 4), NULL); + pcs->share_time.value300 = strtod(procfile_lineword(ff, line, 6), NULL); if (unlikely(!pcs->total_time.st)) { pcs->total_time.st = rrdset_create_localhost( @@ -114,19 +142,19 @@ static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) { pcs->total_time.rdtotal = rrddim_add(pcs->total_time.st, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } - pcs->total_time.value_total = str2ull(procfile_lineword(ff, some ? 0 : 1, 8), NULL) / 1000; + pcs->total_time.value_total = str2ull(procfile_lineword(ff, line, 8), NULL) / 1000; } -static void proc_pressure_do_resource_some(procfile *ff, int res_idx) { - proc_pressure_do_resource(ff, res_idx, 1); +static void proc_pressure_do_resource_some(procfile *ff, int res_idx, size_t line) { + proc_pressure_do_resource(ff, res_idx, line, true); } -static void proc_pressure_do_resource_full(procfile *ff, int res_idx) { - proc_pressure_do_resource(ff, res_idx, 0); +static void proc_pressure_do_resource_full(procfile *ff, int res_idx, size_t line) { + proc_pressure_do_resource(ff, res_idx, line, false); } int do_proc_pressure(int update_every, usec_t dt) { - int fail_count = 0; + int ok_count = 0; int i; static usec_t next_pressure_dt = 0; @@ -150,6 +178,9 @@ int do_proc_pressure(int update_every, usec_t dt) { procfile *ff = resource_info[i].pf; int do_some = resources[i].some.enabled, do_full = resources[i].full.enabled; + if (!resources[i].some.available && !resources[i].full.available) + continue; + if (unlikely(!ff)) { char filename[FILENAME_MAX + 1]; char config_key[CONFIG_MAX_NAME + 1]; @@ -161,56 +192,66 @@ int do_proc_pressure(int update_every, usec_t dt) { , base_path , resource_info[i].name); + do_some = resources[i].some.available ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO; + do_full = resources[i].full.available ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO; + snprintfz(config_key, CONFIG_MAX_NAME, "enable %s some pressure", resource_info[i].name); - do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES); + do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, do_some); resources[i].some.enabled = do_some; - // Disable CPU full pressure. - // See https://github.com/torvalds/linux/commit/890d550d7dbac7a31ecaa78732aa22be282bb6b8 - if (i == 0) { - do_full = CONFIG_BOOLEAN_NO; - resources[i].full.enabled = do_full; - } else { - snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name); - do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES); - resources[i].full.enabled = do_full; + snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name); + do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, do_full); + resources[i].full.enabled = do_full; + + if (!do_full && !do_some) { + resources[i].some.available = false; + resources[i].full.available = false; + continue; } - ff = procfile_open(filename, " =", PROCFILE_FLAG_DEFAULT); + ff = procfile_open(filename, " =", PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); if (unlikely(!ff)) { - collector_error("Cannot read pressure information from %s.", filename); - fail_count++; + // PSI IRQ was added recently (https://github.com/torvalds/linux/commit/52b1364ba0b105122d6de0e719b36db705011ac1) + if (strcmp(resource_info[i].name, "irq") != 0) + collector_error("Cannot read pressure information from %s.", filename); + resources[i].some.available = false; + resources[i].full.available = false; continue; } } ff = procfile_readall(ff); resource_info[i].pf = ff; - if (unlikely(!ff)) { - fail_count++; + if (unlikely(!ff)) continue; - } size_t lines = procfile_lines(ff); if (unlikely(lines < 1)) { collector_error("%s has no lines.", procfile_filename(ff)); - fail_count++; continue; } - if (do_some) { - proc_pressure_do_resource_some(ff, i); - update_pressure_charts(&resources[i].some); - } - if (do_full && lines > 2) { - proc_pressure_do_resource_full(ff, i); - update_pressure_charts(&resources[i].full); + for(size_t l = 0; l < lines ;l++) { + const char *key = procfile_lineword(ff, l, 0); + if(strcmp(key, "some") == 0) { + if(do_some) { + proc_pressure_do_resource_some(ff, i, l); + update_pressure_charts(&resources[i].some); + ok_count++; + } + } + else if(strcmp(key, "full") == 0) { + if(do_full) { + proc_pressure_do_resource_full(ff, i, l); + update_pressure_charts(&resources[i].full); + ok_count++; + } + } } } - if (PRESSURE_NUM_RESOURCES == fail_count) { + if(!ok_count) return 1; - } return 0; } diff --git a/collectors/proc.plugin/proc_pressure.h b/collectors/proc.plugin/proc_pressure.h index 0cb23315..2e5cab2c 100644 --- a/collectors/proc.plugin/proc_pressure.h +++ b/collectors/proc.plugin/proc_pressure.h @@ -3,13 +3,14 @@ #ifndef NETDATA_PROC_PRESSURE_H #define NETDATA_PROC_PRESSURE_H -#define PRESSURE_NUM_RESOURCES 3 +#define PRESSURE_NUM_RESOURCES 4 struct pressure { int updated; char *filename; struct pressure_charts { + bool available; int enabled; struct pressure_share_time_chart { diff --git a/collectors/proc.plugin/proc_stat.c b/collectors/proc.plugin/proc_stat.c index f0f31935..a4f76796 100644 --- a/collectors/proc.plugin/proc_stat.c +++ b/collectors/proc.plugin/proc_stat.c @@ -494,7 +494,7 @@ int do_proc_stat(int update_every, usec_t dt) { do_processes = config_get_boolean("plugin:proc:/proc/stat", "processes running", CONFIG_BOOLEAN_YES); // give sane defaults based on the number of processors - if(unlikely(get_system_cpus() > 50)) { + if(unlikely(get_system_cpus() > 128)) { // the system has too many processors keep_per_core_fds_open = CONFIG_BOOLEAN_NO; do_core_throttle_count = CONFIG_BOOLEAN_NO; diff --git a/collectors/proc.plugin/proc_vmstat.c b/collectors/proc.plugin/proc_vmstat.c index ca56e900..b44733b6 100644 --- a/collectors/proc.plugin/proc_vmstat.c +++ b/collectors/proc.plugin/proc_vmstat.c @@ -271,7 +271,7 @@ int do_proc_vmstat(int update_every, usec_t dt) { if(unlikely(!st_swapio)) { st_swapio = rrdset_create_localhost( - "system" + "mem" , "swapio" , NULL , "swap" @@ -280,7 +280,7 @@ int do_proc_vmstat(int update_every, usec_t dt) { , "KiB/s" , PLUGIN_PROC_NAME , PLUGIN_PROC_MODULE_VMSTAT_NAME - , NETDATA_CHART_PRIO_SYSTEM_SWAPIO + , NETDATA_CHART_PRIO_MEM_SWAPIO , update_every , RRDSET_TYPE_AREA ); @@ -336,7 +336,7 @@ int do_proc_vmstat(int update_every, usec_t dt) { "mem" , "pgfaults" , NULL - , "system" + , "page faults" , NULL , "Memory Page Faults" , "faults/s" @@ -372,7 +372,7 @@ int do_proc_vmstat(int update_every, usec_t dt) { "mem" , "oom_kill" , NULL - , "system" + , "OOM kills" , NULL , "Out of Memory Kills" , "kills/s" @@ -505,7 +505,7 @@ int do_proc_vmstat(int update_every, usec_t dt) { if(unlikely(!st_zswapio)) { st_zswapio = rrdset_create_localhost( - "system" + "mem" , "zswapio" , NULL , "zswap" @@ -514,7 +514,7 @@ int do_proc_vmstat(int update_every, usec_t dt) { , "KiB/s" , PLUGIN_PROC_NAME , PLUGIN_PROC_MODULE_VMSTAT_NAME - , NETDATA_CHART_PRIO_SYSTEM_ZSWAPIO + , NETDATA_CHART_PRIO_MEM_ZSWAPIO , update_every , RRDSET_TYPE_AREA ); diff --git a/collectors/proc.plugin/sys_class_drm.c b/collectors/proc.plugin/sys_class_drm.c new file mode 100644 index 00000000..284662cf --- /dev/null +++ b/collectors/proc.plugin/sys_class_drm.c @@ -0,0 +1,1179 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_DRM_NAME "/sys/class/drm" +#define CONFIG_SECTION_PLUGIN_PROC_DRM "plugin:proc:/sys/class/drm" +#define AMDGPU_CHART_TYPE "amdgpu" + +struct amdgpu_id_struct { + unsigned long long asic_id; + unsigned long long pci_rev_id; + const char *marketing_name; +}; + +/* + * About amdgpu_ids list: + * ------------------------------------------------------------------------ + * Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. + * + * The list is copied from: + * https://raw.githubusercontent.com/Syllo/nvtop/master/src/amdgpu_ids.h + * + * which is modified from libdrm (MIT License): + * + * URL: https://gitlab.freedesktop.org/mesa/drm/-/blob/main/data/amdgpu.ids + * ------------------------------------------------------------------------ + * **IMPORTANT**: The amdgpu_ids has to be modified after new GPU releases. + * ------------------------------------------------------------------------*/ + +static const struct amdgpu_id_struct amdgpu_ids[] = { + {0x1309, 0x00, "AMD Radeon R7 Graphics"}, + {0x130A, 0x00, "AMD Radeon R6 Graphics"}, + {0x130B, 0x00, "AMD Radeon R4 Graphics"}, + {0x130C, 0x00, "AMD Radeon R7 Graphics"}, + {0x130D, 0x00, "AMD Radeon R6 Graphics"}, + {0x130E, 0x00, "AMD Radeon R5 Graphics"}, + {0x130F, 0x00, "AMD Radeon R7 Graphics"}, + {0x130F, 0xD4, "AMD Radeon R7 Graphics"}, + {0x130F, 0xD5, "AMD Radeon R7 Graphics"}, + {0x130F, 0xD6, "AMD Radeon R7 Graphics"}, + {0x130F, 0xD7, "AMD Radeon R7 Graphics"}, + {0x1313, 0x00, "AMD Radeon R7 Graphics"}, + {0x1313, 0xD4, "AMD Radeon R7 Graphics"}, + {0x1313, 0xD5, "AMD Radeon R7 Graphics"}, + {0x1313, 0xD6, "AMD Radeon R7 Graphics"}, + {0x1315, 0x00, "AMD Radeon R5 Graphics"}, + {0x1315, 0xD4, "AMD Radeon R5 Graphics"}, + {0x1315, 0xD5, "AMD Radeon R5 Graphics"}, + {0x1315, 0xD6, "AMD Radeon R5 Graphics"}, + {0x1315, 0xD7, "AMD Radeon R5 Graphics"}, + {0x1316, 0x00, "AMD Radeon R5 Graphics"}, + {0x1318, 0x00, "AMD Radeon R5 Graphics"}, + {0x131B, 0x00, "AMD Radeon R4 Graphics"}, + {0x131C, 0x00, "AMD Radeon R7 Graphics"}, + {0x131D, 0x00, "AMD Radeon R6 Graphics"}, + {0x15D8, 0x00, "AMD Radeon RX Vega 8 Graphics WS"}, + {0x15D8, 0x91, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0x91, "AMD Ryzen Embedded R1606G with Radeon Vega Gfx"}, + {0x15D8, 0x92, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0x92, "AMD Ryzen Embedded R1505G with Radeon Vega Gfx"}, + {0x15D8, 0x93, "AMD Radeon Vega 1 Graphics"}, + {0x15D8, 0xA1, "AMD Radeon Vega 10 Graphics"}, + {0x15D8, 0xA2, "AMD Radeon Vega 8 Graphics"}, + {0x15D8, 0xA3, "AMD Radeon Vega 6 Graphics"}, + {0x15D8, 0xA4, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0xB1, "AMD Radeon Vega 10 Graphics"}, + {0x15D8, 0xB2, "AMD Radeon Vega 8 Graphics"}, + {0x15D8, 0xB3, "AMD Radeon Vega 6 Graphics"}, + {0x15D8, 0xB4, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0xC1, "AMD Radeon Vega 10 Graphics"}, + {0x15D8, 0xC2, "AMD Radeon Vega 8 Graphics"}, + {0x15D8, 0xC3, "AMD Radeon Vega 6 Graphics"}, + {0x15D8, 0xC4, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0xC5, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0xC8, "AMD Radeon Vega 11 Graphics"}, + {0x15D8, 0xC9, "AMD Radeon Vega 8 Graphics"}, + {0x15D8, 0xCA, "AMD Radeon Vega 11 Graphics"}, + {0x15D8, 0xCB, "AMD Radeon Vega 8 Graphics"}, + {0x15D8, 0xCC, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0xCE, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0xCF, "AMD Ryzen Embedded R1305G with Radeon Vega Gfx"}, + {0x15D8, 0xD1, "AMD Radeon Vega 10 Graphics"}, + {0x15D8, 0xD2, "AMD Radeon Vega 8 Graphics"}, + {0x15D8, 0xD3, "AMD Radeon Vega 6 Graphics"}, + {0x15D8, 0xD4, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0xD8, "AMD Radeon Vega 11 Graphics"}, + {0x15D8, 0xD9, "AMD Radeon Vega 8 Graphics"}, + {0x15D8, 0xDA, "AMD Radeon Vega 11 Graphics"}, + {0x15D8, 0xDB, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0xDB, "AMD Radeon Vega 8 Graphics"}, + {0x15D8, 0xDC, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0xDD, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0xDE, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0xDF, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0xE3, "AMD Radeon Vega 3 Graphics"}, + {0x15D8, 0xE4, "AMD Ryzen Embedded R1102G with Radeon Vega Gfx"}, + {0x15DD, 0x81, "AMD Ryzen Embedded V1807B with Radeon Vega Gfx"}, + {0x15DD, 0x82, "AMD Ryzen Embedded V1756B with Radeon Vega Gfx"}, + {0x15DD, 0x83, "AMD Ryzen Embedded V1605B with Radeon Vega Gfx"}, + {0x15DD, 0x84, "AMD Radeon Vega 6 Graphics"}, + {0x15DD, 0x85, "AMD Ryzen Embedded V1202B with Radeon Vega Gfx"}, + {0x15DD, 0x86, "AMD Radeon Vega 11 Graphics"}, + {0x15DD, 0x88, "AMD Radeon Vega 8 Graphics"}, + {0x15DD, 0xC1, "AMD Radeon Vega 11 Graphics"}, + {0x15DD, 0xC2, "AMD Radeon Vega 8 Graphics"}, + {0x15DD, 0xC3, "AMD Radeon Vega 3 / 10 Graphics"}, + {0x15DD, 0xC4, "AMD Radeon Vega 8 Graphics"}, + {0x15DD, 0xC5, "AMD Radeon Vega 3 Graphics"}, + {0x15DD, 0xC6, "AMD Radeon Vega 11 Graphics"}, + {0x15DD, 0xC8, "AMD Radeon Vega 8 Graphics"}, + {0x15DD, 0xC9, "AMD Radeon Vega 11 Graphics"}, + {0x15DD, 0xCA, "AMD Radeon Vega 8 Graphics"}, + {0x15DD, 0xCB, "AMD Radeon Vega 3 Graphics"}, + {0x15DD, 0xCC, "AMD Radeon Vega 6 Graphics"}, + {0x15DD, 0xCE, "AMD Radeon Vega 3 Graphics"}, + {0x15DD, 0xCF, "AMD Radeon Vega 3 Graphics"}, + {0x15DD, 0xD0, "AMD Radeon Vega 10 Graphics"}, + {0x15DD, 0xD1, "AMD Radeon Vega 8 Graphics"}, + {0x15DD, 0xD3, "AMD Radeon Vega 11 Graphics"}, + {0x15DD, 0xD5, "AMD Radeon Vega 8 Graphics"}, + {0x15DD, 0xD6, "AMD Radeon Vega 11 Graphics"}, + {0x15DD, 0xD7, "AMD Radeon Vega 8 Graphics"}, + {0x15DD, 0xD8, "AMD Radeon Vega 3 Graphics"}, + {0x15DD, 0xD9, "AMD Radeon Vega 6 Graphics"}, + {0x15DD, 0xE1, "AMD Radeon Vega 3 Graphics"}, + {0x15DD, 0xE2, "AMD Radeon Vega 3 Graphics"}, + {0x163F, 0xAE, "AMD Custom GPU 0405"}, + {0x6600, 0x00, "AMD Radeon HD 8600 / 8700M"}, + {0x6600, 0x81, "AMD Radeon R7 M370"}, + {0x6601, 0x00, "AMD Radeon HD 8500M / 8700M"}, + {0x6604, 0x00, "AMD Radeon R7 M265 Series"}, + {0x6604, 0x81, "AMD Radeon R7 M350"}, + {0x6605, 0x00, "AMD Radeon R7 M260 Series"}, + {0x6605, 0x81, "AMD Radeon R7 M340"}, + {0x6606, 0x00, "AMD Radeon HD 8790M"}, + {0x6607, 0x00, "AMD Radeon R5 M240"}, + {0x6608, 0x00, "AMD FirePro W2100"}, + {0x6610, 0x00, "AMD Radeon R7 200 Series"}, + {0x6610, 0x81, "AMD Radeon R7 350"}, + {0x6610, 0x83, "AMD Radeon R5 340"}, + {0x6610, 0x87, "AMD Radeon R7 200 Series"}, + {0x6611, 0x00, "AMD Radeon R7 200 Series"}, + {0x6611, 0x87, "AMD Radeon R7 200 Series"}, + {0x6613, 0x00, "AMD Radeon R7 200 Series"}, + {0x6617, 0x00, "AMD Radeon R7 240 Series"}, + {0x6617, 0x87, "AMD Radeon R7 200 Series"}, + {0x6617, 0xC7, "AMD Radeon R7 240 Series"}, + {0x6640, 0x00, "AMD Radeon HD 8950"}, + {0x6640, 0x80, "AMD Radeon R9 M380"}, + {0x6646, 0x00, "AMD Radeon R9 M280X"}, + {0x6646, 0x80, "AMD Radeon R9 M385"}, + {0x6646, 0x80, "AMD Radeon R9 M470X"}, + {0x6647, 0x00, "AMD Radeon R9 M200X Series"}, + {0x6647, 0x80, "AMD Radeon R9 M380"}, + {0x6649, 0x00, "AMD FirePro W5100"}, + {0x6658, 0x00, "AMD Radeon R7 200 Series"}, + {0x665C, 0x00, "AMD Radeon HD 7700 Series"}, + {0x665D, 0x00, "AMD Radeon R7 200 Series"}, + {0x665F, 0x81, "AMD Radeon R7 360 Series"}, + {0x6660, 0x00, "AMD Radeon HD 8600M Series"}, + {0x6660, 0x81, "AMD Radeon R5 M335"}, + {0x6660, 0x83, "AMD Radeon R5 M330"}, + {0x6663, 0x00, "AMD Radeon HD 8500M Series"}, + {0x6663, 0x83, "AMD Radeon R5 M320"}, + {0x6664, 0x00, "AMD Radeon R5 M200 Series"}, + {0x6665, 0x00, "AMD Radeon R5 M230 Series"}, + {0x6665, 0x83, "AMD Radeon R5 M320"}, + {0x6665, 0xC3, "AMD Radeon R5 M435"}, + {0x6666, 0x00, "AMD Radeon R5 M200 Series"}, + {0x6667, 0x00, "AMD Radeon R5 M200 Series"}, + {0x666F, 0x00, "AMD Radeon HD 8500M"}, + {0x66A1, 0x02, "AMD Instinct MI60 / MI50"}, + {0x66A1, 0x06, "AMD Radeon Pro VII"}, + {0x66AF, 0xC1, "AMD Radeon VII"}, + {0x6780, 0x00, "AMD FirePro W9000"}, + {0x6784, 0x00, "ATI FirePro V (FireGL V) Graphics Adapter"}, + {0x6788, 0x00, "ATI FirePro V (FireGL V) Graphics Adapter"}, + {0x678A, 0x00, "AMD FirePro W8000"}, + {0x6798, 0x00, "AMD Radeon R9 200 / HD 7900 Series"}, + {0x6799, 0x00, "AMD Radeon HD 7900 Series"}, + {0x679A, 0x00, "AMD Radeon HD 7900 Series"}, + {0x679B, 0x00, "AMD Radeon HD 7900 Series"}, + {0x679E, 0x00, "AMD Radeon HD 7800 Series"}, + {0x67A0, 0x00, "AMD Radeon FirePro W9100"}, + {0x67A1, 0x00, "AMD Radeon FirePro W8100"}, + {0x67B0, 0x00, "AMD Radeon R9 200 Series"}, + {0x67B0, 0x80, "AMD Radeon R9 390 Series"}, + {0x67B1, 0x00, "AMD Radeon R9 200 Series"}, + {0x67B1, 0x80, "AMD Radeon R9 390 Series"}, + {0x67B9, 0x00, "AMD Radeon R9 200 Series"}, + {0x67C0, 0x00, "AMD Radeon Pro WX 7100 Graphics"}, + {0x67C0, 0x80, "AMD Radeon E9550"}, + {0x67C2, 0x01, "AMD Radeon Pro V7350x2"}, + {0x67C2, 0x02, "AMD Radeon Pro V7300X"}, + {0x67C4, 0x00, "AMD Radeon Pro WX 7100 Graphics"}, + {0x67C4, 0x80, "AMD Radeon E9560 / E9565 Graphics"}, + {0x67C7, 0x00, "AMD Radeon Pro WX 5100 Graphics"}, + {0x67C7, 0x80, "AMD Radeon E9390 Graphics"}, + {0x67D0, 0x01, "AMD Radeon Pro V7350x2"}, + {0x67D0, 0x02, "AMD Radeon Pro V7300X"}, + {0x67DF, 0xC0, "AMD Radeon Pro 580X"}, + {0x67DF, 0xC1, "AMD Radeon RX 580 Series"}, + {0x67DF, 0xC2, "AMD Radeon RX 570 Series"}, + {0x67DF, 0xC3, "AMD Radeon RX 580 Series"}, + {0x67DF, 0xC4, "AMD Radeon RX 480 Graphics"}, + {0x67DF, 0xC5, "AMD Radeon RX 470 Graphics"}, + {0x67DF, 0xC6, "AMD Radeon RX 570 Series"}, + {0x67DF, 0xC7, "AMD Radeon RX 480 Graphics"}, + {0x67DF, 0xCF, "AMD Radeon RX 470 Graphics"}, + {0x67DF, 0xD7, "AMD Radeon RX 470 Graphics"}, + {0x67DF, 0xE0, "AMD Radeon RX 470 Series"}, + {0x67DF, 0xE1, "AMD Radeon RX 590 Series"}, + {0x67DF, 0xE3, "AMD Radeon RX Series"}, + {0x67DF, 0xE7, "AMD Radeon RX 580 Series"}, + {0x67DF, 0xEB, "AMD Radeon Pro 580X"}, + {0x67DF, 0xEF, "AMD Radeon RX 570 Series"}, + {0x67DF, 0xF7, "AMD Radeon RX P30PH"}, + {0x67DF, 0xFF, "AMD Radeon RX 470 Series"}, + {0x67E0, 0x00, "AMD Radeon Pro WX Series"}, + {0x67E3, 0x00, "AMD Radeon Pro WX 4100"}, + {0x67E8, 0x00, "AMD Radeon Pro WX Series"}, + {0x67E8, 0x01, "AMD Radeon Pro WX Series"}, + {0x67E8, 0x80, "AMD Radeon E9260 Graphics"}, + {0x67EB, 0x00, "AMD Radeon Pro V5300X"}, + {0x67EF, 0xC0, "AMD Radeon RX Graphics"}, + {0x67EF, 0xC1, "AMD Radeon RX 460 Graphics"}, + {0x67EF, 0xC2, "AMD Radeon Pro Series"}, + {0x67EF, 0xC3, "AMD Radeon RX Series"}, + {0x67EF, 0xC5, "AMD Radeon RX 460 Graphics"}, + {0x67EF, 0xC7, "AMD Radeon RX Graphics"}, + {0x67EF, 0xCF, "AMD Radeon RX 460 Graphics"}, + {0x67EF, 0xE0, "AMD Radeon RX 560 Series"}, + {0x67EF, 0xE1, "AMD Radeon RX Series"}, + {0x67EF, 0xE2, "AMD Radeon RX 560X"}, + {0x67EF, 0xE3, "AMD Radeon RX Series"}, + {0x67EF, 0xE5, "AMD Radeon RX 560 Series"}, + {0x67EF, 0xE7, "AMD Radeon RX 560 Series"}, + {0x67EF, 0xEF, "AMD Radeon 550 Series"}, + {0x67EF, 0xFF, "AMD Radeon RX 460 Graphics"}, + {0x67FF, 0xC0, "AMD Radeon Pro 465"}, + {0x67FF, 0xC1, "AMD Radeon RX 560 Series"}, + {0x67FF, 0xCF, "AMD Radeon RX 560 Series"}, + {0x67FF, 0xEF, "AMD Radeon RX 560 Series"}, + {0x67FF, 0xFF, "AMD Radeon RX 550 Series"}, + {0x6800, 0x00, "AMD Radeon HD 7970M"}, + {0x6801, 0x00, "AMD Radeon HD 8970M"}, + {0x6806, 0x00, "AMD Radeon R9 M290X"}, + {0x6808, 0x00, "AMD FirePro W7000"}, + {0x6808, 0x00, "ATI FirePro V (FireGL V) Graphics Adapter"}, + {0x6809, 0x00, "ATI FirePro W5000"}, + {0x6810, 0x00, "AMD Radeon R9 200 Series"}, + {0x6810, 0x81, "AMD Radeon R9 370 Series"}, + {0x6811, 0x00, "AMD Radeon R9 200 Series"}, + {0x6811, 0x81, "AMD Radeon R7 370 Series"}, + {0x6818, 0x00, "AMD Radeon HD 7800 Series"}, + {0x6819, 0x00, "AMD Radeon HD 7800 Series"}, + {0x6820, 0x00, "AMD Radeon R9 M275X"}, + {0x6820, 0x81, "AMD Radeon R9 M375"}, + {0x6820, 0x83, "AMD Radeon R9 M375X"}, + {0x6821, 0x00, "AMD Radeon R9 M200X Series"}, + {0x6821, 0x83, "AMD Radeon R9 M370X"}, + {0x6821, 0x87, "AMD Radeon R7 M380"}, + {0x6822, 0x00, "AMD Radeon E8860"}, + {0x6823, 0x00, "AMD Radeon R9 M200X Series"}, + {0x6825, 0x00, "AMD Radeon HD 7800M Series"}, + {0x6826, 0x00, "AMD Radeon HD 7700M Series"}, + {0x6827, 0x00, "AMD Radeon HD 7800M Series"}, + {0x6828, 0x00, "AMD FirePro W600"}, + {0x682B, 0x00, "AMD Radeon HD 8800M Series"}, + {0x682B, 0x87, "AMD Radeon R9 M360"}, + {0x682C, 0x00, "AMD FirePro W4100"}, + {0x682D, 0x00, "AMD Radeon HD 7700M Series"}, + {0x682F, 0x00, "AMD Radeon HD 7700M Series"}, + {0x6830, 0x00, "AMD Radeon 7800M Series"}, + {0x6831, 0x00, "AMD Radeon 7700M Series"}, + {0x6835, 0x00, "AMD Radeon R7 Series / HD 9000 Series"}, + {0x6837, 0x00, "AMD Radeon HD 7700 Series"}, + {0x683D, 0x00, "AMD Radeon HD 7700 Series"}, + {0x683F, 0x00, "AMD Radeon HD 7700 Series"}, + {0x684C, 0x00, "ATI FirePro V (FireGL V) Graphics Adapter"}, + {0x6860, 0x00, "AMD Radeon Instinct MI25"}, + {0x6860, 0x01, "AMD Radeon Instinct MI25"}, + {0x6860, 0x02, "AMD Radeon Instinct MI25"}, + {0x6860, 0x03, "AMD Radeon Pro V340"}, + {0x6860, 0x04, "AMD Radeon Instinct MI25x2"}, + {0x6860, 0x07, "AMD Radeon Pro V320"}, + {0x6861, 0x00, "AMD Radeon Pro WX 9100"}, + {0x6862, 0x00, "AMD Radeon Pro SSG"}, + {0x6863, 0x00, "AMD Radeon Vega Frontier Edition"}, + {0x6864, 0x03, "AMD Radeon Pro V340"}, + {0x6864, 0x04, "AMD Radeon Instinct MI25x2"}, + {0x6864, 0x05, "AMD Radeon Pro V340"}, + {0x6868, 0x00, "AMD Radeon Pro WX 8200"}, + {0x686C, 0x00, "AMD Radeon Instinct MI25 MxGPU"}, + {0x686C, 0x01, "AMD Radeon Instinct MI25 MxGPU"}, + {0x686C, 0x02, "AMD Radeon Instinct MI25 MxGPU"}, + {0x686C, 0x03, "AMD Radeon Pro V340 MxGPU"}, + {0x686C, 0x04, "AMD Radeon Instinct MI25x2 MxGPU"}, + {0x686C, 0x05, "AMD Radeon Pro V340L MxGPU"}, + {0x686C, 0x06, "AMD Radeon Instinct MI25 MxGPU"}, + {0x687F, 0x01, "AMD Radeon RX Vega"}, + {0x687F, 0xC0, "AMD Radeon RX Vega"}, + {0x687F, 0xC1, "AMD Radeon RX Vega"}, + {0x687F, 0xC3, "AMD Radeon RX Vega"}, + {0x687F, 0xC7, "AMD Radeon RX Vega"}, + {0x6900, 0x00, "AMD Radeon R7 M260"}, + {0x6900, 0x81, "AMD Radeon R7 M360"}, + {0x6900, 0x83, "AMD Radeon R7 M340"}, + {0x6900, 0xC1, "AMD Radeon R5 M465 Series"}, + {0x6900, 0xC3, "AMD Radeon R5 M445 Series"}, + {0x6900, 0xD1, "AMD Radeon 530 Series"}, + {0x6900, 0xD3, "AMD Radeon 530 Series"}, + {0x6901, 0x00, "AMD Radeon R5 M255"}, + {0x6902, 0x00, "AMD Radeon Series"}, + {0x6907, 0x00, "AMD Radeon R5 M255"}, + {0x6907, 0x87, "AMD Radeon R5 M315"}, + {0x6920, 0x00, "AMD Radeon R9 M395X"}, + {0x6920, 0x01, "AMD Radeon R9 M390X"}, + {0x6921, 0x00, "AMD Radeon R9 M390X"}, + {0x6929, 0x00, "AMD FirePro S7150"}, + {0x6929, 0x01, "AMD FirePro S7100X"}, + {0x692B, 0x00, "AMD FirePro W7100"}, + {0x6938, 0x00, "AMD Radeon R9 200 Series"}, + {0x6938, 0xF0, "AMD Radeon R9 200 Series"}, + {0x6938, 0xF1, "AMD Radeon R9 380 Series"}, + {0x6939, 0x00, "AMD Radeon R9 200 Series"}, + {0x6939, 0xF0, "AMD Radeon R9 200 Series"}, + {0x6939, 0xF1, "AMD Radeon R9 380 Series"}, + {0x694C, 0xC0, "AMD Radeon RX Vega M GH Graphics"}, + {0x694E, 0xC0, "AMD Radeon RX Vega M GL Graphics"}, + {0x6980, 0x00, "AMD Radeon Pro WX 3100"}, + {0x6981, 0x00, "AMD Radeon Pro WX 3200 Series"}, + {0x6981, 0x01, "AMD Radeon Pro WX 3200 Series"}, + {0x6981, 0x10, "AMD Radeon Pro WX 3200 Series"}, + {0x6985, 0x00, "AMD Radeon Pro WX 3100"}, + {0x6986, 0x00, "AMD Radeon Pro WX 2100"}, + {0x6987, 0x80, "AMD Embedded Radeon E9171"}, + {0x6987, 0xC0, "AMD Radeon 550X Series"}, + {0x6987, 0xC1, "AMD Radeon RX 640"}, + {0x6987, 0xC3, "AMD Radeon 540X Series"}, + {0x6987, 0xC7, "AMD Radeon 540"}, + {0x6995, 0x00, "AMD Radeon Pro WX 2100"}, + {0x6997, 0x00, "AMD Radeon Pro WX 2100"}, + {0x699F, 0x81, "AMD Embedded Radeon E9170 Series"}, + {0x699F, 0xC0, "AMD Radeon 500 Series"}, + {0x699F, 0xC1, "AMD Radeon 540 Series"}, + {0x699F, 0xC3, "AMD Radeon 500 Series"}, + {0x699F, 0xC7, "AMD Radeon RX 550 / 550 Series"}, + {0x699F, 0xC9, "AMD Radeon 540"}, + {0x6FDF, 0xE7, "AMD Radeon RX 590 GME"}, + {0x6FDF, 0xEF, "AMD Radeon RX 580 2048SP"}, + {0x7300, 0xC1, "AMD FirePro S9300 x2"}, + {0x7300, 0xC8, "AMD Radeon R9 Fury Series"}, + {0x7300, 0xC9, "AMD Radeon Pro Duo"}, + {0x7300, 0xCA, "AMD Radeon R9 Fury Series"}, + {0x7300, 0xCB, "AMD Radeon R9 Fury Series"}, + {0x7312, 0x00, "AMD Radeon Pro W5700"}, + {0x731E, 0xC6, "AMD Radeon RX 5700XTB"}, + {0x731E, 0xC7, "AMD Radeon RX 5700B"}, + {0x731F, 0xC0, "AMD Radeon RX 5700 XT 50th Anniversary"}, + {0x731F, 0xC1, "AMD Radeon RX 5700 XT"}, + {0x731F, 0xC2, "AMD Radeon RX 5600M"}, + {0x731F, 0xC3, "AMD Radeon RX 5700M"}, + {0x731F, 0xC4, "AMD Radeon RX 5700"}, + {0x731F, 0xC5, "AMD Radeon RX 5700 XT"}, + {0x731F, 0xCA, "AMD Radeon RX 5600 XT"}, + {0x731F, 0xCB, "AMD Radeon RX 5600 OEM"}, + {0x7340, 0xC1, "AMD Radeon RX 5500M"}, + {0x7340, 0xC3, "AMD Radeon RX 5300M"}, + {0x7340, 0xC5, "AMD Radeon RX 5500 XT"}, + {0x7340, 0xC7, "AMD Radeon RX 5500"}, + {0x7340, 0xC9, "AMD Radeon RX 5500XTB"}, + {0x7340, 0xCF, "AMD Radeon RX 5300"}, + {0x7341, 0x00, "AMD Radeon Pro W5500"}, + {0x7347, 0x00, "AMD Radeon Pro W5500M"}, + {0x7360, 0x41, "AMD Radeon Pro 5600M"}, + {0x7360, 0xC3, "AMD Radeon Pro V520"}, + {0x738C, 0x01, "AMD Instinct MI100"}, + {0x73A3, 0x00, "AMD Radeon Pro W6800"}, + {0x73A5, 0xC0, "AMD Radeon RX 6950 XT"}, + {0x73AF, 0xC0, "AMD Radeon RX 6900 XT"}, + {0x73BF, 0xC0, "AMD Radeon RX 6900 XT"}, + {0x73BF, 0xC1, "AMD Radeon RX 6800 XT"}, + {0x73BF, 0xC3, "AMD Radeon RX 6800"}, + {0x73DF, 0xC0, "AMD Radeon RX 6750 XT"}, + {0x73DF, 0xC1, "AMD Radeon RX 6700 XT"}, + {0x73DF, 0xC2, "AMD Radeon RX 6800M"}, + {0x73DF, 0xC3, "AMD Radeon RX 6800M"}, + {0x73DF, 0xC5, "AMD Radeon RX 6700 XT"}, + {0x73DF, 0xCF, "AMD Radeon RX 6700M"}, + {0x73DF, 0xD7, "AMD TDC-235"}, + {0x73E1, 0x00, "AMD Radeon Pro W6600M"}, + {0x73E3, 0x00, "AMD Radeon Pro W6600"}, + {0x73EF, 0xC0, "AMD Radeon RX 6800S"}, + {0x73EF, 0xC1, "AMD Radeon RX 6650 XT"}, + {0x73EF, 0xC2, "AMD Radeon RX 6700S"}, + {0x73EF, 0xC3, "AMD Radeon RX 6650M"}, + {0x73EF, 0xC4, "AMD Radeon RX 6650M XT"}, + {0x73FF, 0xC1, "AMD Radeon RX 6600 XT"}, + {0x73FF, 0xC3, "AMD Radeon RX 6600M"}, + {0x73FF, 0xC7, "AMD Radeon RX 6600"}, + {0x73FF, 0xCB, "AMD Radeon RX 6600S"}, + {0x7408, 0x00, "AMD Instinct MI250X"}, + {0x740C, 0x01, "AMD Instinct MI250X / MI250"}, + {0x740F, 0x02, "AMD Instinct MI210"}, + {0x7421, 0x00, "AMD Radeon Pro W6500M"}, + {0x7422, 0x00, "AMD Radeon Pro W6400"}, + {0x7423, 0x00, "AMD Radeon Pro W6300M"}, + {0x7423, 0x01, "AMD Radeon Pro W6300"}, + {0x7424, 0x00, "AMD Radeon RX 6300"}, + {0x743F, 0xC1, "AMD Radeon RX 6500 XT"}, + {0x743F, 0xC3, "AMD Radeon RX 6500"}, + {0x743F, 0xC3, "AMD Radeon RX 6500M"}, + {0x743F, 0xC7, "AMD Radeon RX 6400"}, + {0x743F, 0xCF, "AMD Radeon RX 6300M"}, + {0x744C, 0xC8, "AMD Radeon RX 7900 XTX"}, + {0x744C, 0xCC, "AMD Radeon RX 7900 XT"}, + {0x7480, 0xC1, "AMD Radeon RX 7700S"}, + {0x7480, 0xC3, "AMD Radeon RX 7600S"}, + {0x7480, 0xC7, "AMD Radeon RX 7600M XT"}, + {0x7483, 0xCF, "AMD Radeon RX 7600M"}, + {0x9830, 0x00, "AMD Radeon HD 8400 / R3 Series"}, + {0x9831, 0x00, "AMD Radeon HD 8400E"}, + {0x9832, 0x00, "AMD Radeon HD 8330"}, + {0x9833, 0x00, "AMD Radeon HD 8330E"}, + {0x9834, 0x00, "AMD Radeon HD 8210"}, + {0x9835, 0x00, "AMD Radeon HD 8210E"}, + {0x9836, 0x00, "AMD Radeon HD 8200 / R3 Series"}, + {0x9837, 0x00, "AMD Radeon HD 8280E"}, + {0x9838, 0x00, "AMD Radeon HD 8200 / R3 series"}, + {0x9839, 0x00, "AMD Radeon HD 8180"}, + {0x983D, 0x00, "AMD Radeon HD 8250"}, + {0x9850, 0x00, "AMD Radeon R3 Graphics"}, + {0x9850, 0x03, "AMD Radeon R3 Graphics"}, + {0x9850, 0x40, "AMD Radeon R2 Graphics"}, + {0x9850, 0x45, "AMD Radeon R3 Graphics"}, + {0x9851, 0x00, "AMD Radeon R4 Graphics"}, + {0x9851, 0x01, "AMD Radeon R5E Graphics"}, + {0x9851, 0x05, "AMD Radeon R5 Graphics"}, + {0x9851, 0x06, "AMD Radeon R5E Graphics"}, + {0x9851, 0x40, "AMD Radeon R4 Graphics"}, + {0x9851, 0x45, "AMD Radeon R5 Graphics"}, + {0x9852, 0x00, "AMD Radeon R2 Graphics"}, + {0x9852, 0x40, "AMD Radeon E1 Graphics"}, + {0x9853, 0x00, "AMD Radeon R2 Graphics"}, + {0x9853, 0x01, "AMD Radeon R4E Graphics"}, + {0x9853, 0x03, "AMD Radeon R2 Graphics"}, + {0x9853, 0x05, "AMD Radeon R1E Graphics"}, + {0x9853, 0x06, "AMD Radeon R1E Graphics"}, + {0x9853, 0x07, "AMD Radeon R1E Graphics"}, + {0x9853, 0x08, "AMD Radeon R1E Graphics"}, + {0x9853, 0x40, "AMD Radeon R2 Graphics"}, + {0x9854, 0x00, "AMD Radeon R3 Graphics"}, + {0x9854, 0x01, "AMD Radeon R3E Graphics"}, + {0x9854, 0x02, "AMD Radeon R3 Graphics"}, + {0x9854, 0x05, "AMD Radeon R2 Graphics"}, + {0x9854, 0x06, "AMD Radeon R4 Graphics"}, + {0x9854, 0x07, "AMD Radeon R3 Graphics"}, + {0x9855, 0x02, "AMD Radeon R6 Graphics"}, + {0x9855, 0x05, "AMD Radeon R4 Graphics"}, + {0x9856, 0x00, "AMD Radeon R2 Graphics"}, + {0x9856, 0x01, "AMD Radeon R2E Graphics"}, + {0x9856, 0x02, "AMD Radeon R2 Graphics"}, + {0x9856, 0x05, "AMD Radeon R1E Graphics"}, + {0x9856, 0x06, "AMD Radeon R2 Graphics"}, + {0x9856, 0x07, "AMD Radeon R1E Graphics"}, + {0x9856, 0x08, "AMD Radeon R1E Graphics"}, + {0x9856, 0x13, "AMD Radeon R1E Graphics"}, + {0x9874, 0x81, "AMD Radeon R6 Graphics"}, + {0x9874, 0x84, "AMD Radeon R7 Graphics"}, + {0x9874, 0x85, "AMD Radeon R6 Graphics"}, + {0x9874, 0x87, "AMD Radeon R5 Graphics"}, + {0x9874, 0x88, "AMD Radeon R7E Graphics"}, + {0x9874, 0x89, "AMD Radeon R6E Graphics"}, + {0x9874, 0xC4, "AMD Radeon R7 Graphics"}, + {0x9874, 0xC5, "AMD Radeon R6 Graphics"}, + {0x9874, 0xC6, "AMD Radeon R6 Graphics"}, + {0x9874, 0xC7, "AMD Radeon R5 Graphics"}, + {0x9874, 0xC8, "AMD Radeon R7 Graphics"}, + {0x9874, 0xC9, "AMD Radeon R7 Graphics"}, + {0x9874, 0xCA, "AMD Radeon R5 Graphics"}, + {0x9874, 0xCB, "AMD Radeon R5 Graphics"}, + {0x9874, 0xCC, "AMD Radeon R7 Graphics"}, + {0x9874, 0xCD, "AMD Radeon R7 Graphics"}, + {0x9874, 0xCE, "AMD Radeon R5 Graphics"}, + {0x9874, 0xE1, "AMD Radeon R7 Graphics"}, + {0x9874, 0xE2, "AMD Radeon R7 Graphics"}, + {0x9874, 0xE3, "AMD Radeon R7 Graphics"}, + {0x9874, 0xE4, "AMD Radeon R7 Graphics"}, + {0x9874, 0xE5, "AMD Radeon R5 Graphics"}, + {0x9874, 0xE6, "AMD Radeon R5 Graphics"}, + {0x98E4, 0x80, "AMD Radeon R5E Graphics"}, + {0x98E4, 0x81, "AMD Radeon R4E Graphics"}, + {0x98E4, 0x83, "AMD Radeon R2E Graphics"}, + {0x98E4, 0x84, "AMD Radeon R2E Graphics"}, + {0x98E4, 0x86, "AMD Radeon R1E Graphics"}, + {0x98E4, 0xC0, "AMD Radeon R4 Graphics"}, + {0x98E4, 0xC1, "AMD Radeon R5 Graphics"}, + {0x98E4, 0xC2, "AMD Radeon R4 Graphics"}, + {0x98E4, 0xC4, "AMD Radeon R5 Graphics"}, + {0x98E4, 0xC6, "AMD Radeon R5 Graphics"}, + {0x98E4, 0xC8, "AMD Radeon R4 Graphics"}, + {0x98E4, 0xC9, "AMD Radeon R4 Graphics"}, + {0x98E4, 0xCA, "AMD Radeon R5 Graphics"}, + {0x98E4, 0xD0, "AMD Radeon R2 Graphics"}, + {0x98E4, 0xD1, "AMD Radeon R2 Graphics"}, + {0x98E4, 0xD2, "AMD Radeon R2 Graphics"}, + {0x98E4, 0xD4, "AMD Radeon R2 Graphics"}, + {0x98E4, 0xD9, "AMD Radeon R5 Graphics"}, + {0x98E4, 0xDA, "AMD Radeon R5 Graphics"}, + {0x98E4, 0xDB, "AMD Radeon R3 Graphics"}, + {0x98E4, 0xE1, "AMD Radeon R3 Graphics"}, + {0x98E4, 0xE2, "AMD Radeon R3 Graphics"}, + {0x98E4, 0xE9, "AMD Radeon R4 Graphics"}, + {0x98E4, 0xEA, "AMD Radeon R4 Graphics"}, + {0x98E4, 0xEB, "AMD Radeon R3 Graphics"}, + {0x98E4, 0xEC, "AMD Radeon R4 Graphics"}, + {0x0000, 0x00, "unknown AMD GPU"} // this must always be the last item +}; + +struct card { + const char *pathname; + struct amdgpu_id_struct id; + + /* GPU and VRAM utilizations */ + + const char *pathname_util_gpu; + RRDSET *st_util_gpu; + RRDDIM *rd_util_gpu; + collected_number util_gpu; + + const char *pathname_util_mem; + RRDSET *st_util_mem; + RRDDIM *rd_util_mem; + collected_number util_mem; + + + /* GPU and VRAM clock frequencies */ + + const char *pathname_clk_gpu; + procfile *ff_clk_gpu; + RRDSET *st_clk_gpu; + RRDDIM *rd_clk_gpu; + collected_number clk_gpu; + + const char *pathname_clk_mem; + procfile *ff_clk_mem; + RRDSET *st_clk_mem; + RRDDIM *rd_clk_mem; + collected_number clk_mem; + + + /* GPU memory usage */ + + const char *pathname_mem_used_vram; + const char *pathname_mem_total_vram; + + RRDSET *st_mem_usage_perc_vram; + RRDDIM *rd_mem_used_perc_vram; + + RRDSET *st_mem_usage_vram; + RRDDIM *rd_mem_used_vram; + RRDDIM *rd_mem_free_vram; + + collected_number used_vram; + collected_number total_vram; + + + const char *pathname_mem_used_vis_vram; + const char *pathname_mem_total_vis_vram; + + RRDSET *st_mem_usage_perc_vis_vram; + RRDDIM *rd_mem_used_perc_vis_vram; + + RRDSET *st_mem_usage_vis_vram; + RRDDIM *rd_mem_used_vis_vram; + RRDDIM *rd_mem_free_vis_vram; + + collected_number used_vis_vram; + collected_number total_vis_vram; + + + const char *pathname_mem_used_gtt; + const char *pathname_mem_total_gtt; + + RRDSET *st_mem_usage_perc_gtt; + RRDDIM *rd_mem_used_perc_gtt; + + RRDSET *st_mem_usage_gtt; + RRDDIM *rd_mem_used_gtt; + RRDDIM *rd_mem_free_gtt; + + collected_number used_gtt; + collected_number total_gtt; + + struct do_rrd_x *do_rrd_x_root; + + struct card *next; +}; +static struct card *card_root = NULL; + +static void card_free(struct card *c){ + if(c->pathname) freez((void *) c->pathname); + if(c->id.marketing_name) freez((void *) c->id.marketing_name); + + /* remove card from linked list */ + if(c == card_root) card_root = c->next; + else { + struct card *last; + for(last = card_root; last && last->next != c; last = last->next); + if(last) last->next = c->next; + } + + freez(c); +} + +static int check_card_is_amdgpu(const char *const pathname){ + int rc = -1; + + procfile *ff = procfile_open(pathname, " ", PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + if(unlikely(!ff)){ + rc = -1; + goto cleanup; + } + + ff = procfile_readall(ff); + if(unlikely(!ff || procfile_lines(ff) < 1 || procfile_linewords(ff, 0) < 1)){ + rc = -2; + goto cleanup; + } + + for(size_t l = 0; l < procfile_lines(ff); l++) { + if(!strcmp(procfile_lineword(ff, l, 0), "DRIVER=amdgpu")){ + rc = 0; + goto cleanup; + } + } + + rc = -3; // no match + +cleanup: + procfile_close(ff); + return rc; +} + +static int read_clk_freq_file(procfile **p_ff, const char *const pathname, collected_number *num){ + if(unlikely(!*p_ff)){ + *p_ff = procfile_open(pathname, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + if(unlikely(!*p_ff)) return -2; + } + + if(unlikely(NULL == (*p_ff = procfile_readall(*p_ff)))) return -3; + + for(size_t l = 0; l < procfile_lines(*p_ff) ; l++) { + + if((*p_ff)->lines->lines[l].words >= 3 && !strcmp(procfile_lineword((*p_ff), l, 2), "*")){ + char *str_with_units = procfile_lineword((*p_ff), l, 1); + char *delim = strchr(str_with_units, 'M'); + char str_without_units[10]; + memcpy(str_without_units, str_with_units, delim - str_with_units); + *num = str2ll(str_without_units, NULL); + return 0; + } + } + + procfile_close((*p_ff)); + return -4; +} + +static char *set_id(const char *const suf_1, const char *const suf_2, const char *const suf_3){ + static char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s_%s_%s", suf_1, suf_2, suf_3); + return id; +} + +typedef int (*do_rrd_x_func)(struct card *const c); + +struct do_rrd_x { + do_rrd_x_func func; + struct do_rrd_x *next; +}; + +static void add_do_rrd_x(struct card *const c, const do_rrd_x_func func){ + struct do_rrd_x *const drrd = callocz(1, sizeof(struct do_rrd_x)); + drrd->func = func; + drrd->next = c->do_rrd_x_root; + c->do_rrd_x_root = drrd; +} + +static void rm_do_rrd_x(struct card *const c, struct do_rrd_x *const drrd){ + if(drrd == c->do_rrd_x_root) c->do_rrd_x_root = drrd->next; + else { + struct do_rrd_x *last; + for(last = c->do_rrd_x_root; last && last->next != drrd; last = last->next); + if(last) last->next = drrd->next; + } + + freez(drrd); +} + +static int do_rrd_util_gpu(struct card *const c){ + if(likely(!read_single_number_file(c->pathname_util_gpu, (unsigned long long *) &c->util_gpu))){ + rrddim_set_by_pointer(c->st_util_gpu, c->rd_util_gpu, c->util_gpu); + rrdset_done(c->st_util_gpu); + return 0; + } + else { + collector_error("Cannot read util_gpu for %s: [%s]", c->pathname, c->id.marketing_name); + freez((void *) c->pathname_util_gpu); + rrdset_is_obsolete(c->st_util_gpu); + return 1; + } +} + +static int do_rrd_util_mem(struct card *const c){ + if(likely(!read_single_number_file(c->pathname_util_mem, (unsigned long long *) &c->util_mem))){ + rrddim_set_by_pointer(c->st_util_mem, c->rd_util_mem, c->util_mem); + rrdset_done(c->st_util_mem); + return 0; + } + else { + collector_error("Cannot read util_mem for %s: [%s]", c->pathname, c->id.marketing_name); + freez((void *) c->pathname_util_mem); + rrdset_is_obsolete(c->st_util_mem); + return 1; + } +} + +static int do_rrd_clk_gpu(struct card *const c){ + if(likely(!read_clk_freq_file(&c->ff_clk_gpu, (char *) c->pathname_clk_gpu, &c->clk_gpu))){ + rrddim_set_by_pointer(c->st_clk_gpu, c->rd_clk_gpu, c->clk_gpu); + rrdset_done(c->st_clk_gpu); + return 0; + } + else { + collector_error("Cannot read clk_gpu for %s: [%s]", c->pathname, c->id.marketing_name); + freez((void *) c->pathname_clk_gpu); + rrdset_is_obsolete(c->st_clk_gpu); + return 1; + } +} + +static int do_rrd_clk_mem(struct card *const c){ + if(likely(!read_clk_freq_file(&c->ff_clk_mem, (char *) c->pathname_clk_mem, &c->clk_mem))){ + rrddim_set_by_pointer(c->st_clk_mem, c->rd_clk_mem, c->clk_mem); + rrdset_done(c->st_clk_mem); + return 0; + } + else { + collector_error("Cannot read clk_mem for %s: [%s]", c->pathname, c->id.marketing_name); + freez((void *) c->pathname_clk_mem); + rrdset_is_obsolete(c->st_clk_mem); + return 1; + } +} + +static int do_rrd_vram(struct card *const c){ + if(likely(!read_single_number_file(c->pathname_mem_used_vram, (unsigned long long *) &c->used_vram) && + c->total_vram)){ + rrddim_set_by_pointer( c->st_mem_usage_perc_vram, + c->rd_mem_used_perc_vram, + c->used_vram * 10000 / c->total_vram); + rrdset_done(c->st_mem_usage_perc_vram); + + rrddim_set_by_pointer(c->st_mem_usage_vram, c->rd_mem_used_vram, c->used_vram); + rrddim_set_by_pointer(c->st_mem_usage_vram, c->rd_mem_free_vram, c->total_vram - c->used_vram); + rrdset_done(c->st_mem_usage_vram); + return 0; + } + else { + collector_error("Cannot read used_vram for %s: [%s]", c->pathname, c->id.marketing_name); + freez((void *) c->pathname_mem_used_vram); + freez((void *) c->pathname_mem_total_vram); + rrdset_is_obsolete(c->st_mem_usage_perc_vram); + rrdset_is_obsolete(c->st_mem_usage_vram); + return 1; + } +} + +static int do_rrd_vis_vram(struct card *const c){ + if(likely(!read_single_number_file(c->pathname_mem_used_vis_vram, (unsigned long long *) &c->used_vis_vram) && + c->total_vis_vram)){ + rrddim_set_by_pointer( c->st_mem_usage_perc_vis_vram, + c->rd_mem_used_perc_vis_vram, + c->used_vis_vram * 10000 / c->total_vis_vram); + rrdset_done(c->st_mem_usage_perc_vis_vram); + + rrddim_set_by_pointer(c->st_mem_usage_vis_vram, c->rd_mem_used_vis_vram, c->used_vis_vram); + rrddim_set_by_pointer(c->st_mem_usage_vis_vram, c->rd_mem_free_vis_vram, c->total_vis_vram - c->used_vis_vram); + rrdset_done(c->st_mem_usage_vis_vram); + return 0; + } + else { + collector_error("Cannot read used_vis_vram for %s: [%s]", c->pathname, c->id.marketing_name); + freez((void *) c->pathname_mem_used_vis_vram); + freez((void *) c->pathname_mem_total_vis_vram); + rrdset_is_obsolete(c->st_mem_usage_perc_vis_vram); + rrdset_is_obsolete(c->st_mem_usage_vis_vram); + return 1; + } +} + +static int do_rrd_gtt(struct card *const c){ + if(likely(!read_single_number_file(c->pathname_mem_used_gtt, (unsigned long long *) &c->used_gtt) && + c->total_gtt)){ + rrddim_set_by_pointer( c->st_mem_usage_perc_gtt, + c->rd_mem_used_perc_gtt, + c->used_gtt * 10000 / c->total_gtt); + rrdset_done(c->st_mem_usage_perc_gtt); + + rrddim_set_by_pointer(c->st_mem_usage_gtt, c->rd_mem_used_gtt, c->used_gtt); + rrddim_set_by_pointer(c->st_mem_usage_gtt, c->rd_mem_free_gtt, c->total_gtt - c->used_gtt); + rrdset_done(c->st_mem_usage_gtt); + return 0; + } + else { + collector_error("Cannot read used_gtt for %s: [%s]", c->pathname, c->id.marketing_name); + freez((void *) c->pathname_mem_used_gtt); + freez((void *) c->pathname_mem_total_gtt); + rrdset_is_obsolete(c->st_mem_usage_perc_gtt); + rrdset_is_obsolete(c->st_mem_usage_gtt); + return 1; + } +} + +int do_sys_class_drm(int update_every, usec_t dt) { + (void)dt; + + static DIR *drm_dir = NULL; + + int chart_prio = NETDATA_CHART_PRIO_DRM_AMDGPU; + + if(unlikely(!drm_dir)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/class/drm"); + char *drm_dir_name = config_get(CONFIG_SECTION_PLUGIN_PROC_DRM, "directory to monitor", filename); + if(unlikely(NULL == (drm_dir = opendir(drm_dir_name)))){ + collector_error("Cannot read directory '%s'", drm_dir_name); + return 1; + } + + struct dirent *de = NULL; + while(likely(de = readdir(drm_dir))) { + if( de->d_type == DT_DIR && ((de->d_name[0] == '.' && de->d_name[1] == '\0') || + (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0'))) continue; + + if(de->d_type == DT_LNK && !strncmp(de->d_name, "card", 4) && !strchr(de->d_name, '-')) { + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s/%s/%s", drm_dir_name, de->d_name, "device/uevent"); + if(check_card_is_amdgpu(filename)) continue; + + /* Get static info */ + + struct card *const c = callocz(1, sizeof(struct card)); + snprintfz(filename, FILENAME_MAX, "%s/%s", drm_dir_name, de->d_name); + c->pathname = strdupz(filename); + + snprintfz(filename, FILENAME_MAX, "%s/%s", c->pathname, "device/device"); + if(read_single_base64_or_hex_number_file(filename, &c->id.asic_id)){ + collector_error("Cannot read asic_id from '%s'", filename); + card_free(c); + continue; + } + + snprintfz(filename, FILENAME_MAX, "%s/%s", c->pathname, "device/revision"); + if(read_single_base64_or_hex_number_file(filename, &c->id.pci_rev_id)){ + collector_error("Cannot read pci_rev_id from '%s'", filename); + card_free(c); + continue; + } + + for(int i = 0; amdgpu_ids[i].asic_id; i++){ + if(c->id.asic_id == amdgpu_ids[i].asic_id && c->id.pci_rev_id == amdgpu_ids[i].pci_rev_id){ + c->id.marketing_name = strdupz(amdgpu_ids[i].marketing_name); + break; + } + } + if(!c->id.marketing_name) + c->id.marketing_name = strdupz(amdgpu_ids[sizeof(amdgpu_ids)/sizeof(amdgpu_ids[0]) - 1].marketing_name); + + + collected_number tmp_val; + #define set_prop_pathname(prop_filename, prop_pathname, p_ff){ \ + snprintfz(filename, FILENAME_MAX, "%s/%s", c->pathname, prop_filename); \ + if((p_ff && !read_clk_freq_file(p_ff, filename, &tmp_val)) || \ + !read_single_number_file(filename, (unsigned long long *) &tmp_val)) \ + prop_pathname = strdupz(filename); \ + else \ + collector_info("Cannot read file '%s'", filename); \ + } + + /* Initialize GPU and VRAM utilization metrics */ + + set_prop_pathname("device/gpu_busy_percent", c->pathname_util_gpu, NULL); + + if(c->pathname_util_gpu){ + c->st_util_gpu = rrdset_create_localhost( + AMDGPU_CHART_TYPE + , set_id("gpu_utilization", c->id.marketing_name, de->d_name) + , NULL + , "utilization" + , AMDGPU_CHART_TYPE ".gpu_utilization" + , "GPU utilization" + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DRM_NAME + , chart_prio++ + , update_every + , RRDSET_TYPE_LINE + ); + + rrdlabels_add(c->st_util_gpu->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO); + + c->rd_util_gpu = rrddim_add(c->st_util_gpu, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_do_rrd_x(c, do_rrd_util_gpu); + } + + set_prop_pathname("device/mem_busy_percent", c->pathname_util_mem, NULL); + + if(c->pathname_util_mem){ + c->st_util_mem = rrdset_create_localhost( + AMDGPU_CHART_TYPE + , set_id("gpu_mem_utilization", c->id.marketing_name, de->d_name) + , NULL + , "utilization" + , AMDGPU_CHART_TYPE ".gpu_mem_utilization" + , "GPU memory utilization" + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DRM_NAME + , chart_prio++ + , update_every + , RRDSET_TYPE_LINE + ); + + rrdlabels_add(c->st_util_mem->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO); + + c->rd_util_mem = rrddim_add(c->st_util_mem, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_do_rrd_x(c, do_rrd_util_mem); + } + + + /* Initialize GPU and VRAM clock frequency metrics */ + + set_prop_pathname("device/pp_dpm_sclk", c->pathname_clk_gpu, &c->ff_clk_gpu); + + if(c->pathname_clk_gpu){ + c->st_clk_gpu = rrdset_create_localhost( + AMDGPU_CHART_TYPE + , set_id("gpu_clk_frequency", c->id.marketing_name, de->d_name) + , NULL + , "frequency" + , AMDGPU_CHART_TYPE ".gpu_clk_frequency" + , "GPU clock frequency" + , "MHz" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DRM_NAME + , chart_prio++ + , update_every + , RRDSET_TYPE_LINE + ); + + rrdlabels_add(c->st_clk_gpu->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO); + + c->rd_clk_gpu = rrddim_add(c->st_clk_gpu, "frequency", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_do_rrd_x(c, do_rrd_clk_gpu); + + } + + set_prop_pathname("device/pp_dpm_mclk", c->pathname_clk_mem, &c->ff_clk_mem); + + if(c->pathname_clk_mem){ + c->st_clk_mem = rrdset_create_localhost( + AMDGPU_CHART_TYPE + , set_id("gpu_mem_clk_frequency", c->id.marketing_name, de->d_name) + , NULL + , "frequency" + , AMDGPU_CHART_TYPE ".gpu_mem_clk_frequency" + , "GPU memory clock frequency" + , "MHz" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DRM_NAME + , chart_prio++ + , update_every + , RRDSET_TYPE_LINE + ); + + rrdlabels_add(c->st_clk_mem->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO); + + c->rd_clk_mem = rrddim_add(c->st_clk_mem, "frequency", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_do_rrd_x(c, do_rrd_clk_mem); + } + + + /* Initialize GPU memory usage metrics */ + + set_prop_pathname("device/mem_info_vram_used", c->pathname_mem_used_vram, NULL); + set_prop_pathname("device/mem_info_vram_total", c->pathname_mem_total_vram, NULL); + if(c->pathname_mem_total_vram) c->total_vram = tmp_val; + + if(c->pathname_mem_used_vram && c->pathname_mem_total_vram){ + c->st_mem_usage_perc_vram = rrdset_create_localhost( + AMDGPU_CHART_TYPE + , set_id("gpu_mem_vram_usage_perc", c->id.marketing_name, de->d_name) + , NULL + , "memory_usage" + , AMDGPU_CHART_TYPE ".gpu_mem_vram_usage_perc" + , "VRAM memory usage percentage" + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DRM_NAME + , chart_prio++ + , update_every + , RRDSET_TYPE_LINE + ); + + rrdlabels_add(c->st_mem_usage_perc_vram->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO); + + c->rd_mem_used_perc_vram = rrddim_add(c->st_mem_usage_perc_vram, "usage", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + + + c->st_mem_usage_vram = rrdset_create_localhost( + AMDGPU_CHART_TYPE + , set_id("gpu_mem_vram_usage", c->id.marketing_name, de->d_name) + , NULL + , "memory_usage" + , AMDGPU_CHART_TYPE ".gpu_mem_vram_usage" + , "VRAM memory usage" + , "bytes" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DRM_NAME + , chart_prio++ + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdlabels_add(c->st_mem_usage_vram->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO); + + c->rd_mem_free_vram = rrddim_add(c->st_mem_usage_vram, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_mem_used_vram = rrddim_add(c->st_mem_usage_vram, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + + add_do_rrd_x(c, do_rrd_vram); + } + + set_prop_pathname("device/mem_info_vis_vram_used", c->pathname_mem_used_vis_vram, NULL); + set_prop_pathname("device/mem_info_vis_vram_total", c->pathname_mem_total_vis_vram, NULL); + if(c->pathname_mem_total_vis_vram) c->total_vis_vram = tmp_val; + + if(c->pathname_mem_used_vis_vram && c->pathname_mem_total_vis_vram){ + c->st_mem_usage_perc_vis_vram = rrdset_create_localhost( + AMDGPU_CHART_TYPE + , set_id("gpu_mem_vis_vram_usage_perc", c->id.marketing_name, de->d_name) + , NULL + , "memory_usage" + , AMDGPU_CHART_TYPE ".gpu_mem_vis_vram_usage_perc" + , "visible VRAM memory usage percentage" + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DRM_NAME + , chart_prio++ + , update_every + , RRDSET_TYPE_LINE + ); + + rrdlabels_add(c->st_mem_usage_perc_vis_vram->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO); + + c->rd_mem_used_perc_vis_vram = rrddim_add(c->st_mem_usage_perc_vis_vram, "usage", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + + + c->st_mem_usage_vis_vram = rrdset_create_localhost( + AMDGPU_CHART_TYPE + , set_id("gpu_mem_vis_vram_usage", c->id.marketing_name, de->d_name) + , NULL + , "memory_usage" + , AMDGPU_CHART_TYPE ".gpu_mem_vis_vram_usage" + , "visible VRAM memory usage" + , "bytes" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DRM_NAME + , chart_prio++ + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdlabels_add(c->st_mem_usage_vis_vram->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO); + + c->rd_mem_free_vis_vram = rrddim_add(c->st_mem_usage_vis_vram, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_mem_used_vis_vram = rrddim_add(c->st_mem_usage_vis_vram, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + + add_do_rrd_x(c, do_rrd_vis_vram); + } + + set_prop_pathname("device/mem_info_gtt_used", c->pathname_mem_used_gtt, NULL); + set_prop_pathname("device/mem_info_gtt_total", c->pathname_mem_total_gtt, NULL); + if(c->pathname_mem_total_gtt) c->total_gtt = tmp_val; + + if(c->pathname_mem_used_gtt && c->pathname_mem_total_gtt){ + c->st_mem_usage_perc_gtt = rrdset_create_localhost( + AMDGPU_CHART_TYPE + , set_id("gpu_mem_gtt_usage_perc", c->id.marketing_name, de->d_name) + , NULL + , "memory_usage" + , AMDGPU_CHART_TYPE ".gpu_mem_gtt_usage_perc" + , "GTT memory usage percentage" + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DRM_NAME + , chart_prio++ + , update_every + , RRDSET_TYPE_LINE + ); + + rrdlabels_add(c->st_mem_usage_perc_gtt->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO); + + c->rd_mem_used_perc_gtt = rrddim_add(c->st_mem_usage_perc_gtt, "usage", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + + c->st_mem_usage_gtt = rrdset_create_localhost( + AMDGPU_CHART_TYPE + , set_id("gpu_mem_gtt_usage", c->id.marketing_name, de->d_name) + , NULL + , "memory_usage" + , AMDGPU_CHART_TYPE ".gpu_mem_gtt_usage" + , "GTT memory usage" + , "bytes" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_DRM_NAME + , chart_prio++ + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdlabels_add(c->st_mem_usage_gtt->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO); + + c->rd_mem_free_gtt = rrddim_add(c->st_mem_usage_gtt, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_mem_used_gtt = rrddim_add(c->st_mem_usage_gtt, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + + add_do_rrd_x(c, do_rrd_gtt); + } + + c->next = card_root; + card_root = c; + } + } + } + + + struct card *card_cur = card_root, + *card_next; + while(card_cur){ + + struct do_rrd_x *do_rrd_x_cur = card_cur->do_rrd_x_root, + *do_rrd_x_next; + while(do_rrd_x_cur){ + if(unlikely(do_rrd_x_cur->func(card_cur))) { + do_rrd_x_next = do_rrd_x_cur->next; + rm_do_rrd_x(card_cur, do_rrd_x_cur); + do_rrd_x_cur = do_rrd_x_next; + } + else do_rrd_x_cur = do_rrd_x_cur->next; + } + + if(unlikely(!card_cur->do_rrd_x_root)){ + card_next = card_cur->next; + card_free(card_cur); + card_cur = card_next; + } + else card_cur = card_cur->next; + } + + return card_root ? 0 : 1; +} diff --git a/collectors/proc.plugin/sys_devices_pci_aer.c b/collectors/proc.plugin/sys_devices_pci_aer.c new file mode 100644 index 00000000..13442623 --- /dev/null +++ b/collectors/proc.plugin/sys_devices_pci_aer.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +static char *pci_aer_dirname = NULL; + +typedef enum __attribute__((packed)) { + AER_DEV_NONFATAL = (1 << 0), + AER_DEV_CORRECTABLE = (1 << 1), + AER_DEV_FATAL = (1 << 2), + AER_ROOTPORT_TOTAL_ERR_COR = (1 << 3), + AER_ROOTPORT_TOTAL_ERR_FATAL = (1 << 4), +} AER_TYPE; + +struct aer_value { + kernel_uint_t count; + RRDDIM *rd; +}; + +struct aer_entry { + bool updated; + + STRING *name; + AER_TYPE type; + + procfile *ff; + DICTIONARY *values; + + RRDSET *st; +}; + +DICTIONARY *aer_root = NULL; + +static bool aer_value_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data __maybe_unused) { + struct aer_value *v = old_value; + struct aer_value *nv = new_value; + + v->count = nv->count; + + return false; +} + +static void aer_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct aer_entry *a = value; + a->values = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_conflict_callback(a->values, aer_value_conflict_callback, NULL); +} + +static void add_pci_aer(const char *base_dir, const char *d_name, AER_TYPE type) { + char buffer[FILENAME_MAX + 1]; + snprintfz(buffer, FILENAME_MAX, "%s/%s", base_dir, d_name); + struct aer_entry *a = dictionary_set(aer_root, buffer, NULL, sizeof(struct aer_entry)); + + if(!a->name) + a->name = string_strdupz(d_name); + + a->type = type; +} + +static bool recursively_find_pci_aer(AER_TYPE types, const char *base_dir, const char *d_name, int depth) { + if(depth > 100) + return false; + + char buffer[FILENAME_MAX + 1]; + snprintfz(buffer, FILENAME_MAX, "%s/%s", base_dir, d_name); + DIR *dir = opendir(buffer); + if(unlikely(!dir)) { + collector_error("Cannot read PCI_AER directory '%s'", buffer); + return true; + } + + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type == DT_DIR) { + if(de->d_name[0] == '.') + continue; + + recursively_find_pci_aer(types, buffer, de->d_name, depth + 1); + } + else if(de->d_type == DT_REG) { + if((types & AER_DEV_NONFATAL) && strcmp(de->d_name, "aer_dev_nonfatal") == 0) { + add_pci_aer(buffer, de->d_name, AER_DEV_NONFATAL); + } + else if((types & AER_DEV_CORRECTABLE) && strcmp(de->d_name, "aer_dev_correctable") == 0) { + add_pci_aer(buffer, de->d_name, AER_DEV_CORRECTABLE); + } + else if((types & AER_DEV_FATAL) && strcmp(de->d_name, "aer_dev_fatal") == 0) { + add_pci_aer(buffer, de->d_name, AER_DEV_FATAL); + } + else if((types & AER_ROOTPORT_TOTAL_ERR_COR) && strcmp(de->d_name, "aer_rootport_total_err_cor") == 0) { + add_pci_aer(buffer, de->d_name, AER_ROOTPORT_TOTAL_ERR_COR); + } + else if((types & AER_ROOTPORT_TOTAL_ERR_FATAL) && strcmp(de->d_name, "aer_rootport_total_err_fatal") == 0) { + add_pci_aer(buffer, de->d_name, AER_ROOTPORT_TOTAL_ERR_FATAL); + } + } + } + closedir(dir); + return true; +} + +static void find_all_pci_aer(AER_TYPE types) { + char name[FILENAME_MAX + 1]; + snprintfz(name, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices"); + pci_aer_dirname = config_get("plugin:proc:/sys/devices/pci/aer", "directory to monitor", name); + + DIR *dir = opendir(pci_aer_dirname); + if(unlikely(!dir)) { + collector_error("Cannot read PCI_AER directory '%s'", pci_aer_dirname); + return; + } + + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type == DT_DIR && de->d_name[0] == 'p' && de->d_name[1] == 'c' && de->d_name[2] == 'i' && isdigit(de->d_name[3])) + recursively_find_pci_aer(types, pci_aer_dirname, de->d_name, 1); + } + closedir(dir); +} + +static void read_pci_aer_values(const char *filename, struct aer_entry *t) { + t->updated = false; + + if(unlikely(!t->ff)) { + t->ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!t->ff)) + return; + } + + t->ff = procfile_readall(t->ff); + if(unlikely(!t->ff || procfile_lines(t->ff) < 1 || procfile_linewords(t->ff, 0) < 1)) + return; + + size_t lines = procfile_lines(t->ff); + for(size_t l = 0; l < lines ; l++) { + if(procfile_linewords(t->ff, l) != 2) + continue; + + struct aer_value v = { + .count = str2ull(procfile_lineword(t->ff, l, 1), NULL) + }; + + char *key = procfile_lineword(t->ff, l, 0); + if(!key || !*key || (key[0] == 'T' && key[1] == 'O' && key[2] == 'T' && key[3] == 'A' && key[4] == 'L' && key[5] == '_')) + continue; + + dictionary_set(t->values, key, &v, sizeof(v)); + } + + t->updated = true; +} + +static void read_pci_aer_count(const char *filename, struct aer_entry *t) { + t->updated = false; + + if(unlikely(!t->ff)) { + t->ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!t->ff)) + return; + } + + t->ff = procfile_readall(t->ff); + if(unlikely(!t->ff || procfile_lines(t->ff) < 1 || procfile_linewords(t->ff, 0) < 1)) + return; + + struct aer_value v = { + .count = str2ull(procfile_lineword(t->ff, 0, 0), NULL) + }; + dictionary_set(t->values, "count", &v, sizeof(v)); + t->updated = true; +} + +static void add_label_from_link(struct aer_entry *a, const char *path, const char *link) { + char name[FILENAME_MAX + 1]; + strncpyz(name, path, FILENAME_MAX); + char *slash = strrchr(name, '/'); + if(slash) + *slash = '\0'; + + char name2[FILENAME_MAX + 1]; + snprintfz(name2, FILENAME_MAX, "%s/%s", name, link); + + ssize_t len = readlink(name2, name, FILENAME_MAX); + if(len != -1) { + name[len] = '\0'; // Null-terminate the string + slash = strrchr(name, '/'); + if(slash) slash++; + else slash = name; + rrdlabels_add(a->st->rrdlabels, link, slash, RRDLABEL_SRC_AUTO); + } +} + +int do_proc_sys_devices_pci_aer(int update_every, usec_t dt __maybe_unused) { + if(unlikely(!aer_root)) { + int do_root_ports = CONFIG_BOOLEAN_AUTO; + int do_pci_slots = CONFIG_BOOLEAN_NO; + + char buffer[100 + 1] = ""; + rrdlabels_get_value_strcpyz(localhost->rrdlabels, buffer, 100, "_virtualization"); + if(strcmp(buffer, "none") != 0) { + // no need to run on virtualized environments + do_root_ports = CONFIG_BOOLEAN_NO; + do_pci_slots = CONFIG_BOOLEAN_NO; + } + + do_root_ports = config_get_boolean("plugin:proc:/sys/class/pci/aer", "enable root ports", do_root_ports); + do_pci_slots = config_get_boolean("plugin:proc:/sys/class/pci/aer", "enable pci slots", do_pci_slots); + + if(!do_root_ports && !do_pci_slots) + return 1; + + aer_root = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_insert_callback(aer_root, aer_insert_callback, NULL); + + AER_TYPE types = ((do_root_ports) ? (AER_ROOTPORT_TOTAL_ERR_COR|AER_ROOTPORT_TOTAL_ERR_FATAL) : 0) | + ((do_pci_slots) ? (AER_DEV_FATAL|AER_DEV_NONFATAL|AER_DEV_CORRECTABLE) : 0); + + find_all_pci_aer(types); + + if(!dictionary_entries(aer_root)) + return 1; + } + + struct aer_entry *a; + dfe_start_read(aer_root, a) { + switch(a->type) { + case AER_DEV_NONFATAL: + case AER_DEV_FATAL: + case AER_DEV_CORRECTABLE: + read_pci_aer_values(a_dfe.name, a); + break; + + case AER_ROOTPORT_TOTAL_ERR_COR: + case AER_ROOTPORT_TOTAL_ERR_FATAL: + read_pci_aer_count(a_dfe.name, a); + break; + } + + if(!a->updated) + continue; + + if(!a->st) { + const char *title; + const char *context; + + switch(a->type) { + case AER_DEV_NONFATAL: + title = "PCI Advanced Error Reporting (AER) Non-Fatal Errors"; + context = "pci.aer_nonfatal"; + break; + + case AER_DEV_FATAL: + title = "PCI Advanced Error Reporting (AER) Fatal Errors"; + context = "pci.aer_fatal"; + break; + + case AER_DEV_CORRECTABLE: + title = "PCI Advanced Error Reporting (AER) Correctable Errors"; + context = "pci.aer_correctable"; + break; + + case AER_ROOTPORT_TOTAL_ERR_COR: + title = "PCI Root-Port Advanced Error Reporting (AER) Correctable Errors"; + context = "pci.rootport_aer_correctable"; + break; + + case AER_ROOTPORT_TOTAL_ERR_FATAL: + title = "PCI Root-Port Advanced Error Reporting (AER) Fatal Errors"; + context = "pci.rootport_aer_fatal"; + break; + } + + char id[RRD_ID_LENGTH_MAX + 1]; + char nm[RRD_ID_LENGTH_MAX + 1]; + size_t len = strlen(pci_aer_dirname); + + const char *fname = a_dfe.name; + if(strncmp(a_dfe.name, pci_aer_dirname, len) == 0) + fname = &a_dfe.name[len]; + + if(*fname == '/') + fname++; + + snprintfz(id, RRD_ID_LENGTH_MAX, "%s_%s", &context[4], fname); + char *slash = strrchr(id, '/'); + if(slash) + *slash = '\0'; + + netdata_fix_chart_id(id); + + snprintfz(nm, RRD_ID_LENGTH_MAX, "%s", fname); + slash = strrchr(nm, '/'); + if(slash) + *slash = '\0'; + + a->st = rrdset_create_localhost( + "pci" + , id + , NULL + , "aer" + , context + , title + , "errors/s" + , PLUGIN_PROC_NAME + , "/sys/devices/pci/aer" + , NETDATA_CHART_PRIO_PCI_AER + , update_every + , RRDSET_TYPE_LINE + ); + + rrdlabels_add(a->st->rrdlabels, "device", nm, RRDLABEL_SRC_AUTO); + add_label_from_link(a, a_dfe.name, "driver"); + + struct aer_value *v; + dfe_start_read(a->values, v) { + v->rd = rrddim_add(a->st, v_dfe.name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + dfe_done(v); + } + + struct aer_value *v; + dfe_start_read(a->values, v) { + if(unlikely(!v->rd)) + v->rd = rrddim_add(a->st, v_dfe.name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(a->st, v->rd, (collected_number)v->count); + } + dfe_done(v); + + rrdset_done(a->st); + } + dfe_done(a); + + return 0; +} diff --git a/collectors/proc.plugin/sys_devices_system_edac_mc.c b/collectors/proc.plugin/sys_devices_system_edac_mc.c index fdb6b51e..0947f61f 100644 --- a/collectors/proc.plugin/sys_devices_system_edac_mc.c +++ b/collectors/proc.plugin/sys_devices_system_edac_mc.c @@ -2,35 +2,51 @@ #include "plugin_proc.h" +struct edac_count { + bool updated; + char *filename; + procfile *ff; + kernel_uint_t count; + RRDDIM *rd; +}; + +struct edac_dimm { + char *name; + + struct edac_count ce; + struct edac_count ue; + + RRDSET *st; + + struct edac_dimm *prev, *next; +}; + struct mc { char *name; - char ce_updated; - char ue_updated; - char *ce_count_filename; - char *ue_count_filename; + struct edac_count ce; + struct edac_count ue; + struct edac_count ce_noinfo; + struct edac_count ue_noinfo; - procfile *ce_ff; - procfile *ue_ff; + RRDSET *st; - collected_number ce_count; - collected_number ue_count; + struct edac_dimm *dimms; - RRDDIM *ce_rd; - RRDDIM *ue_rd; - - struct mc *next; + struct mc *prev, *next; }; + static struct mc *mc_root = NULL; +static char *mc_dirname = NULL; static void find_all_mc() { char name[FILENAME_MAX + 1]; snprintfz(name, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/edac/mc"); - char *dirname = config_get("plugin:proc:/sys/devices/system/edac/mc", "directory to monitor", name); + mc_dirname = config_get("plugin:proc:/sys/devices/system/edac/mc", "directory to monitor", name); - DIR *dir = opendir(dirname); + DIR *dir = opendir(mc_dirname); if(unlikely(!dir)) { - collector_error("Cannot read ECC memory errors directory '%s'", dirname); + collector_error("Cannot read EDAC memory errors directory '%s'", mc_dirname); return; } @@ -42,162 +58,240 @@ static void find_all_mc() { struct stat st; - snprintfz(name, FILENAME_MAX, "%s/%s/ce_count", dirname, de->d_name); + snprintfz(name, FILENAME_MAX, "%s/%s/ce_count", mc_dirname, de->d_name); if(stat(name, &st) != -1) - m->ce_count_filename = strdupz(name); + m->ce.filename = strdupz(name); - snprintfz(name, FILENAME_MAX, "%s/%s/ue_count", dirname, de->d_name); + snprintfz(name, FILENAME_MAX, "%s/%s/ue_count", mc_dirname, de->d_name); if(stat(name, &st) != -1) - m->ue_count_filename = strdupz(name); + m->ue.filename = strdupz(name); - if(!m->ce_count_filename && !m->ue_count_filename) { + snprintfz(name, FILENAME_MAX, "%s/%s/ce_noinfo_count", mc_dirname, de->d_name); + if(stat(name, &st) != -1) + m->ce_noinfo.filename = strdupz(name); + + snprintfz(name, FILENAME_MAX, "%s/%s/ue_noinfo_count", mc_dirname, de->d_name); + if(stat(name, &st) != -1) + m->ue_noinfo.filename = strdupz(name); + + if(!m->ce.filename && !m->ue.filename && !m->ce_noinfo.filename && !m->ue_noinfo.filename) { freez(m->name); freez(m); } - else { - m->next = mc_root; - mc_root = m; - } + else + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(mc_root, m, prev, next); } } - closedir(dir); -} -int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt) { - (void)dt; + for(struct mc *m = mc_root; m ;m = m->next) { + snprintfz(name, FILENAME_MAX, "%s/%s", mc_dirname, m->name); + dir = opendir(name); + if(!dir) { + collector_error("Cannot read EDAC memory errors directory '%s'", name); + continue; + } - if(unlikely(mc_root == NULL)) { - find_all_mc(); - if(unlikely(mc_root == NULL)) - return 1; - } + while((de = readdir(dir))) { + // it can be dimmX or rankX directory + // https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#f5 - static int do_ce = -1, do_ue = -1; - NETDATA_DOUBLE ce_sum = 0, ue_sum = 0; - struct mc *m; + if (de->d_type == DT_DIR && + ((strncmp(de->d_name, "rank", 4) == 0 || strncmp(de->d_name, "dimm", 4) == 0)) && + isdigit(de->d_name[4])) { - if(unlikely(do_ce == -1)) { - do_ce = config_get_boolean_ondemand("plugin:proc:/sys/devices/system/edac/mc", "enable ECC memory correctable errors", CONFIG_BOOLEAN_YES); - do_ue = config_get_boolean_ondemand("plugin:proc:/sys/devices/system/edac/mc", "enable ECC memory uncorrectable errors", CONFIG_BOOLEAN_YES); - } + struct edac_dimm *d = callocz(1, sizeof(struct edac_dimm)); + d->name = strdupz(de->d_name); - if(do_ce != CONFIG_BOOLEAN_NO) { - for(m = mc_root; m; m = m->next) { - if(m->ce_count_filename) { - m->ce_updated = 0; + struct stat st; - if(unlikely(!m->ce_ff)) { - m->ce_ff = procfile_open(m->ce_count_filename, " \t", PROCFILE_FLAG_DEFAULT); - if(unlikely(!m->ce_ff)) - continue; - } + snprintfz(name, FILENAME_MAX, "%s/%s/%s/dimm_ce_count", mc_dirname, m->name, de->d_name); + if(stat(name, &st) != -1) + d->ce.filename = strdupz(name); - m->ce_ff = procfile_readall(m->ce_ff); - if(unlikely(!m->ce_ff || procfile_lines(m->ce_ff) < 1 || procfile_linewords(m->ce_ff, 0) < 1)) - continue; + snprintfz(name, FILENAME_MAX, "%s/%s/%s/dimm_ue_count", mc_dirname, m->name, de->d_name); + if(stat(name, &st) != -1) + d->ue.filename = strdupz(name); - m->ce_count = str2ull(procfile_lineword(m->ce_ff, 0, 0), NULL); - ce_sum += m->ce_count; - m->ce_updated = 1; + if(!d->ce.filename && !d->ue.filename) { + freez(d->name); + freez(d); + } + else + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(m->dimms, d, prev, next); } } + closedir(dir); } +} - if(do_ue != CONFIG_BOOLEAN_NO) { - for(m = mc_root; m; m = m->next) { - if(m->ue_count_filename) { - m->ue_updated = 0; +static kernel_uint_t read_edac_count(struct edac_count *t) { + t->updated = false; + t->count = 0; - if(unlikely(!m->ue_ff)) { - m->ue_ff = procfile_open(m->ue_count_filename, " \t", PROCFILE_FLAG_DEFAULT); - if(unlikely(!m->ue_ff)) - continue; - } + if(t->filename) { + if(unlikely(!t->ff)) { + t->ff = procfile_open(t->filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!t->ff)) + return 0; + } - m->ue_ff = procfile_readall(m->ue_ff); - if(unlikely(!m->ue_ff || procfile_lines(m->ue_ff) < 1 || procfile_linewords(m->ue_ff, 0) < 1)) - continue; + t->ff = procfile_readall(t->ff); + if(unlikely(!t->ff || procfile_lines(t->ff) < 1 || procfile_linewords(t->ff, 0) < 1)) + return 0; - m->ue_count = str2ull(procfile_lineword(m->ue_ff, 0, 0), NULL); - ue_sum += m->ue_count; - m->ue_updated = 1; - } + t->count = str2ull(procfile_lineword(t->ff, 0, 0), NULL); + t->updated = true; + } + + return t->count; +} + +static bool read_edac_mc_file(const char *mc, const char *filename, char *out, size_t out_size) { + char f[FILENAME_MAX + 1]; + snprintfz(f, FILENAME_MAX, "%s/%s/%s", mc_dirname, mc, filename); + if(read_file(f, out, out_size) != 0) { + collector_error("EDAC: cannot read file '%s'", f); + return false; + } + return true; +} + +static bool read_edac_mc_rank_file(const char *mc, const char *rank, const char *filename, char *out, size_t out_size) { + char f[FILENAME_MAX + 1]; + snprintfz(f, FILENAME_MAX, "%s/%s/%s/%s", mc_dirname, mc, rank, filename); + if(read_file(f, out, out_size) != 0) { + collector_error("EDAC: cannot read file '%s'", f); + return false; + } + return true; +} + +int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt __maybe_unused) { + if(unlikely(!mc_root)) { + find_all_mc(); + + if(!mc_root) + // don't call this again + return 1; + } + + for(struct mc *m = mc_root; m; m = m->next) { + read_edac_count(&m->ce); + read_edac_count(&m->ce_noinfo); + read_edac_count(&m->ue); + read_edac_count(&m->ue_noinfo); + + for(struct edac_dimm *d = m->dimms; d ;d = d->next) { + read_edac_count(&d->ce); + read_edac_count(&d->ue); } } // -------------------------------------------------------------------- - if(do_ce == CONFIG_BOOLEAN_YES || (do_ce == CONFIG_BOOLEAN_AUTO && - (ce_sum > 0 || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { - do_ce = CONFIG_BOOLEAN_YES; + for(struct mc *m = mc_root; m ; m = m->next) { + if(unlikely(!m->ce.updated && !m->ue.updated && !m->ce_noinfo.updated && !m->ue_noinfo.updated)) + continue; - static RRDSET *ce_st = NULL; - - if(unlikely(!ce_st)) { - ce_st = rrdset_create_localhost( + if(unlikely(!m->st)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "edac_%s", m->name); + m->st = rrdset_create_localhost( "mem" - , "ecc_ce" - , NULL - , "ecc" + , id , NULL - , "ECC Memory Correctable Errors" - , "errors" + , "edac" + , "mem.edac_mc" + , "Memory Controller (MC) Error Detection And Correction (EDAC) Errors" + , "errors/s" , PLUGIN_PROC_NAME , "/sys/devices/system/edac/mc" , NETDATA_CHART_PRIO_MEM_HW_ECC_CE , update_every , RRDSET_TYPE_LINE ); - } - for(m = mc_root; m; m = m->next) { - if (m->ce_count_filename && m->ce_updated) { - if(unlikely(!m->ce_rd)) - m->ce_rd = rrddim_add(ce_st, m->name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrdlabels_add(m->st->rrdlabels, "controller", m->name, RRDLABEL_SRC_AUTO); - rrddim_set_by_pointer(ce_st, m->ce_rd, m->ce_count); - } + char buffer[1024 + 1]; + + if(read_edac_mc_file(m->name, "mc_name", buffer, 1024)) + rrdlabels_add(m->st->rrdlabels, "mc_name", buffer, RRDLABEL_SRC_AUTO); + + if(read_edac_mc_file(m->name, "size_mb", buffer, 1024)) + rrdlabels_add(m->st->rrdlabels, "size_mb", buffer, RRDLABEL_SRC_AUTO); + + if(read_edac_mc_file(m->name, "max_location", buffer, 1024)) + rrdlabels_add(m->st->rrdlabels, "max_location", buffer, RRDLABEL_SRC_AUTO); + + m->ce.rd = rrddim_add(m->st, "correctable", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + m->ue.rd = rrddim_add(m->st, "uncorrectable", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + m->ce_noinfo.rd = rrddim_add(m->st, "correctable_noinfo", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + m->ue_noinfo.rd = rrddim_add(m->st, "uncorrectable_noinfo", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } - rrdset_done(ce_st); - } + rrddim_set_by_pointer(m->st, m->ce.rd, (collected_number)m->ce.count); + rrddim_set_by_pointer(m->st, m->ue.rd, (collected_number)m->ue.count); + rrddim_set_by_pointer(m->st, m->ce_noinfo.rd, (collected_number)m->ce_noinfo.count); + rrddim_set_by_pointer(m->st, m->ue_noinfo.rd, (collected_number)m->ue_noinfo.count); - // -------------------------------------------------------------------- + rrdset_done(m->st); - if(do_ue == CONFIG_BOOLEAN_YES || (do_ue == CONFIG_BOOLEAN_AUTO && - (ue_sum > 0 || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { - do_ue = CONFIG_BOOLEAN_YES; + for(struct edac_dimm *d = m->dimms; d ;d = d->next) { + if(unlikely(!d->ce.updated && !d->ue.updated)) + continue; - static RRDSET *ue_st = NULL; + if(unlikely(!d->st)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "edac_%s_%s", m->name, d->name); + d->st = rrdset_create_localhost( + "mem" + , id + , NULL + , "edac" + , "mem.edac_mc_dimm" + , "DIMM Error Detection And Correction (EDAC) Errors" + , "errors/s" + , PLUGIN_PROC_NAME + , "/sys/devices/system/edac/mc" + , NETDATA_CHART_PRIO_MEM_HW_ECC_CE + 1 + , update_every + , RRDSET_TYPE_LINE + ); - if(unlikely(!ue_st)) { - ue_st = rrdset_create_localhost( - "mem" - , "ecc_ue" - , NULL - , "ecc" - , NULL - , "ECC Memory Uncorrectable Errors" - , "errors" - , PLUGIN_PROC_NAME - , "/sys/devices/system/edac/mc" - , NETDATA_CHART_PRIO_MEM_HW_ECC_UE - , update_every - , RRDSET_TYPE_LINE - ); - } + rrdlabels_add(d->st->rrdlabels, "controller", m->name, RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st->rrdlabels, "dimm", d->name, RRDLABEL_SRC_AUTO); + + char buffer[1024 + 1]; - for(m = mc_root; m; m = m->next) { - if (m->ue_count_filename && m->ue_updated) { - if(unlikely(!m->ue_rd)) - m->ue_rd = rrddim_add(ue_st, m->name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + if(read_edac_mc_rank_file(m->name, d->name, "dimm_dev_type", buffer, 1024)) + rrdlabels_add(d->st->rrdlabels, "dimm_dev_type", buffer, RRDLABEL_SRC_AUTO); - rrddim_set_by_pointer(ue_st, m->ue_rd, m->ue_count); + if(read_edac_mc_rank_file(m->name, d->name, "dimm_edac_mode", buffer, 1024)) + rrdlabels_add(d->st->rrdlabels, "dimm_edac_mode", buffer, RRDLABEL_SRC_AUTO); + + if(read_edac_mc_rank_file(m->name, d->name, "dimm_label", buffer, 1024)) + rrdlabels_add(d->st->rrdlabels, "dimm_label", buffer, RRDLABEL_SRC_AUTO); + + if(read_edac_mc_rank_file(m->name, d->name, "dimm_location", buffer, 1024)) + rrdlabels_add(d->st->rrdlabels, "dimm_location", buffer, RRDLABEL_SRC_AUTO); + + if(read_edac_mc_rank_file(m->name, d->name, "dimm_mem_type", buffer, 1024)) + rrdlabels_add(d->st->rrdlabels, "dimm_mem_type", buffer, RRDLABEL_SRC_AUTO); + + if(read_edac_mc_rank_file(m->name, d->name, "size", buffer, 1024)) + rrdlabels_add(d->st->rrdlabels, "size", buffer, RRDLABEL_SRC_AUTO); + + d->ce.rd = rrddim_add(d->st, "correctable", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->ue.rd = rrddim_add(d->st, "uncorrectable", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } - } - rrdset_done(ue_st); + rrddim_set_by_pointer(d->st, d->ce.rd, (collected_number)d->ce.count); + rrddim_set_by_pointer(d->st, d->ue.rd, (collected_number)d->ue.count); + + rrdset_done(d->st); + } } return 0; diff --git a/collectors/python.d.plugin/adaptec_raid/metadata.yaml b/collectors/python.d.plugin/adaptec_raid/metadata.yaml index 5986aed6..7ee4ce7c 100644 --- a/collectors/python.d.plugin/adaptec_raid/metadata.yaml +++ b/collectors/python.d.plugin/adaptec_raid/metadata.yaml @@ -1,98 +1,167 @@ -meta: - plugin_name: python.d.plugin - module_name: adaptec_raid - monitored_instance: - name: AdaptecRAID - link: '' - categories: - - data-collection.storage-mount-points-and-filesystems - icon_filename: 'adaptec.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Assess Adaptec RAID hardware storage controllers with Netdata for RAID controller performance and operational metrics. Improve your RAID controller performance with comprehensive dashboards and anomaly detection.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: adaptec_raid_ld_status - link: https://github.com/netdata/netdata/blob/master/health/health.d/adaptec_raid.conf - metric: adaptec_raid.ld_status - info: logical device status is failed or degraded -- name: adaptec_raid_pd_state - link: https://github.com/netdata/netdata/blob/master/health/health.d/adaptec_raid.conf - metric: adaptec_raid.pd_state - info: physical device state is not online -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: adaptec_raid + monitored_instance: + name: AdaptecRAID + link: "https://www.microchip.com/en-us/products/storage" + categories: + - data-collection.storage-mount-points-and-filesystems + icon_filename: "adaptec.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - storage + - raid-controller + - manage-disks + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors Adaptec RAID hardware storage controller metrics about both physical and logical drives. + method_description: | + It uses the arcconf command line utility (from adaptec) to monitor your raid controller. + + Executed commands: + - sudo -n arcconf GETCONFIG 1 LD + - sudo -n arcconf GETCONFIG 1 PD + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: "The module uses arcconf, which can only be executed by root. It uses sudo and assumes that it is configured such that the netdata user can execute arcconf as root without a password." + default_behavior: + auto_detection: + description: "After all the permissions are satisfied, netdata should be to execute commands via the arcconf command line utility" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Grant permissions for netdata, to run arcconf as sudoer + description: | + The module uses arcconf, which can only be executed by root. It uses sudo and assumes that it is configured such that the netdata user can execute arcconf as root without a password. + + Add to your /etc/sudoers file: + which arcconf shows the full path to the binary. + + ```bash + netdata ALL=(root) NOPASSWD: /path/to/arcconf + ``` + - title: Reset Netdata's systemd unit CapabilityBoundingSet (Linux distributions with systemd) + description: | + The default CapabilityBoundingSet doesn't allow using sudo, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute arcconf using sudo. + + As root user, do the following: + + ```bash + mkdir /etc/systemd/system/netdata.service.d + echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf + systemctl daemon-reload + systemctl restart netdata.service + ``` + configuration: + file: + name: "python.d/adaptec_raid.conf" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + folding: + enabled: false + description: A basic example configuration per job + config: | + job_name: + name: my_job_name + update_every: 1 # the JOB's data collection frequency + priority: 60000 # the JOB's order on the dashboard + penalty: yes # the JOB's penalty + autodetection_retry: 0 # the JOB's re-check interval in seconds + troubleshooting: + problems: + list: [] + alerts: + - name: adaptec_raid_ld_status + link: https://github.com/netdata/netdata/blob/master/health/health.d/adaptec_raid.conf + metric: adaptec_raid.ld_status + info: logical device status is failed or degraded + - name: adaptec_raid_pd_state + link: https://github.com/netdata/netdata/blob/master/health/health.d/adaptec_raid.conf + metric: adaptec_raid.pd_state + info: physical device state is not online metrics: - - name: adaptec_raid.ld_status - description: 'Status of logical devices (1: Failed or Degraded)' - unit: "bool" - chart_type: line - dimensions: - - name: a dimension per logical device - - name: adaptec_raid.pd_state - description: 'State of physical devices (1: not Online)' - unit: "bool" - chart_type: line - dimensions: - - name: a dimension per physical device - - name: adaptec_raid.smart_warnings - description: S.M.A.R.T warnings - unit: "count" - chart_type: line - dimensions: - - name: a dimension per physical device - - name: adaptec_raid.temperature - description: Temperature - unit: "celsius" - chart_type: line - dimensions: - - name: a dimension per physical device + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: adaptec_raid.ld_status + description: "Status of logical devices (1: Failed or Degraded)" + unit: "bool" + chart_type: line + dimensions: + - name: a dimension per logical device + - name: adaptec_raid.pd_state + description: "State of physical devices (1: not Online)" + unit: "bool" + chart_type: line + dimensions: + - name: a dimension per physical device + - name: adaptec_raid.smart_warnings + description: S.M.A.R.T warnings + unit: "count" + chart_type: line + dimensions: + - name: a dimension per physical device + - name: adaptec_raid.temperature + description: Temperature + unit: "celsius" + chart_type: line + dimensions: + - name: a dimension per physical device diff --git a/collectors/python.d.plugin/adaptec_raid/metrics.csv b/collectors/python.d.plugin/adaptec_raid/metrics.csv deleted file mode 100644 index 1462940c..00000000 --- a/collectors/python.d.plugin/adaptec_raid/metrics.csv +++ /dev/null @@ -1,5 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -adaptec_raid.ld_status,,a dimension per logical device,bool,Status of logical devices (1: Failed or Degraded),line,,python.d.plugin,adaptec_raid -adaptec_raid.pd_state,,a dimension per physical device,bool,State of physical devices (1: not Online),line,,python.d.plugin,adaptec_raid -adaptec_raid.smart_warnings,,a dimension per physical device,count,S.M.A.R.T warnings,line,,python.d.plugin,adaptec_raid -adaptec_raid.temperature,,a dimension per physical device,celsius,Temperature,line,,python.d.plugin,adaptec_raid diff --git a/collectors/python.d.plugin/alarms/metadata.yaml b/collectors/python.d.plugin/alarms/metadata.yaml index 81afd5cc..30a89778 100644 --- a/collectors/python.d.plugin/alarms/metadata.yaml +++ b/collectors/python.d.plugin/alarms/metadata.yaml @@ -1,77 +1,177 @@ -meta: - plugin_name: python.d.plugin - module_name: alarms - monitored_instance: - name: python.d alarms - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: alarms + monitored_instance: + name: Netdata Agent alarms + link: https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/alarms/README.md + categories: + - data-collection.other + icon_filename: "" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - alarms + - netdata + most_popular: false + overview: + data_collection: + metrics_description: | + This collector creates an 'Alarms' menu with one line plot of `alarms.status`. + method_description: | + Alarm status is read from the Netdata agent rest api [`/api/v1/alarms?all`](https://learn.netdata.cloud/api#/alerts/alerts1). + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: | + It discovers instances of Netdata running on localhost, and gathers metrics from `http://127.0.0.1:19999/api/v1/alarms?all`. `CLEAR` status is mapped to `0`, `WARNING` to `1` and `CRITICAL` to `2`. Also, by default all alarms produced will be monitored. + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: python.d/alarms.conf + description: "" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: Config options + enabled: true + list: + - name: url + description: Netdata agent alarms endpoint to collect from. Can be local or remote so long as reachable by agent. + default_value: http://127.0.0.1:19999/api/v1/alarms?all + required: true + - name: status_map + description: Mapping of alarm status to integer number that will be the metric value collected. + default_value: '{"CLEAR": 0, "WARNING": 1, "CRITICAL": 2}' + required: true + - name: collect_alarm_values + description: set to true to include a chart with calculated alarm values over time. + default_value: false + required: true + - name: alarm_status_chart_type + description: define the type of chart for plotting status over time e.g. 'line' or 'stacked'. + default_value: "line" + required: true + - name: alarm_contains_words + description: > + A "," separated list of words you want to filter alarm names for. For example 'cpu,load' would filter for only alarms with "cpu" or "load" in alarm name. Default includes all. + default_value: "" + required: true + - name: alarm_excludes_words + description: > + A "," separated list of words you want to exclude based on alarm name. For example 'cpu,load' would exclude all alarms with "cpu" or "load" in alarm name. Default excludes None. + default_value: "" + required: true + - name: update_every + description: Sets the default data collection frequency. + default_value: 10 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + examples: + folding: + enabled: true + title: Config + list: + - name: Basic + folding: + enabled: false + description: A basic example configuration. + config: | + jobs: + url: 'http://127.0.0.1:19999/api/v1/alarms?all' + - name: Advanced + folding: + enabled: true + description: | + An advanced example configuration with multiple jobs collecting different subsets of alarms for plotting on different charts. + "ML" job will collect status and values for all alarms with "ml_" in the name. Default job will collect status for all other alarms. + config: | + ML: + update_every: 5 + url: 'http://127.0.0.1:19999/api/v1/alarms?all' + status_map: + CLEAR: 0 + WARNING: 1 + CRITICAL: 2 + collect_alarm_values: true + alarm_status_chart_type: 'stacked' + alarm_contains_words: 'ml_' + + Default: + update_every: 5 + url: 'http://127.0.0.1:19999/api/v1/alarms?all' + status_map: + CLEAR: 0 + WARNING: 1 + CRITICAL: 2 + collect_alarm_values: false + alarm_status_chart_type: 'stacked' + alarm_excludes_words: 'ml_' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: alarms.status - description: Alarms ({status mapping}) - unit: "status" - chart_type: line - dimensions: - - name: a dimension per alarm - - name: alarms.status - description: Alarm Values - unit: "value" - chart_type: line - dimensions: - - name: a dimension per alarm + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: | + These metrics refer to the entire monitored application. + labels: [] + metrics: + - name: alarms.status + description: Alarms ({status mapping}) + unit: "status" + chart_type: line + dimensions: + - name: a dimension per alarm representing the latest status of the alarm. + - name: alarms.values + description: Alarm Values + unit: "value" + chart_type: line + dimensions: + - name: a dimension per alarm representing the latest collected value of the alarm. diff --git a/collectors/python.d.plugin/alarms/metrics.csv b/collectors/python.d.plugin/alarms/metrics.csv deleted file mode 100644 index 1c28a836..00000000 --- a/collectors/python.d.plugin/alarms/metrics.csv +++ /dev/null @@ -1,3 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -alarms.status,,a dimension per alarm,status,Alarms ({status mapping}),line,,python.d.plugin,alarms -alarms.status,,a dimension per alarm,value,Alarm Values,line,,python.d.plugin,alarms diff --git a/collectors/python.d.plugin/am2320/metadata.yaml b/collectors/python.d.plugin/am2320/metadata.yaml index 88e86fb2..c85cd5f2 100644 --- a/collectors/python.d.plugin/am2320/metadata.yaml +++ b/collectors/python.d.plugin/am2320/metadata.yaml @@ -1,78 +1,135 @@ -meta: - plugin_name: python.d.plugin - module_name: am2320 - monitored_instance: - name: AM2320 - link: '' - categories: - - data-collection.hardware-devices-and-sensors - icon_filename: 'microchip.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor AM2320 metrics with Netdata for optimal temperature and humidity sensor performance. Improve your sensor performance with comprehensive dashboards and anomaly detection.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: am2320 + monitored_instance: + name: AM2320 + link: 'https://learn.adafruit.com/adafruit-am2320-temperature-humidity-i2c-sensor/overview' + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: 'microchip.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - temperature + - am2320 + - sensor + - humidity + most_popular: false + overview: + data_collection: + metrics_description: 'This collector monitors AM2320 sensor metrics about temperature and humidity.' + method_description: 'It retrieves temperature and humidity values by contacting an AM2320 sensor over i2c.' + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: 'Assuming prerequisites are met, the collector will try to connect to the sensor via i2c' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: + - title: 'Sensor connection to a Raspberry Pi' + description: | + Connect the am2320 to the Raspberry Pi I2C pins + + Raspberry Pi 3B/4 Pins: + + - Board 3.3V (pin 1) to sensor VIN (pin 1) + - Board SDA (pin 3) to sensor SDA (pin 2) + - Board GND (pin 6) to sensor GND (pin 3) + - Board SCL (pin 5) to sensor SCL (pin 4) + + You may also need to add two I2C pullup resistors if your board does not already have them. The Raspberry Pi does have internal pullup resistors but it doesn't hurt to add them anyway. You can use 2.2K - 10K but we will just use 10K. The resistors go from VDD to SCL and SDA each. + - title: 'Software requirements' + description: | + Install the Adafruit Circuit Python AM2320 library: + + `sudo pip3 install adafruit-circuitpython-am2320` + configuration: + file: + name: python.d/am2320.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: '' + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Local sensor + description: A basic JOB configuration + config: | + local_sensor: + name: 'Local AM2320' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: am2320.temperature - description: Temperature - unit: "celsius" - chart_type: line - dimensions: - - name: temperature - - name: am2320.humidity - description: Relative Humidity - unit: "percentage" - chart_type: line - dimensions: - - name: humidity + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: am2320.temperature + description: Temperature + unit: "celsius" + chart_type: line + dimensions: + - name: temperature + - name: am2320.humidity + description: Relative Humidity + unit: "percentage" + chart_type: line + dimensions: + - name: humidity diff --git a/collectors/python.d.plugin/am2320/metrics.csv b/collectors/python.d.plugin/am2320/metrics.csv deleted file mode 100644 index 0f3b79f2..00000000 --- a/collectors/python.d.plugin/am2320/metrics.csv +++ /dev/null @@ -1,3 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -am2320.temperature,,temperature,celsius,Temperature,line,,python.d.plugin,am2320 -am2320.humidity,,humidity,percentage,Relative Humidity,line,,python.d.plugin,am2320 diff --git a/collectors/python.d.plugin/anomalies/metadata.yaml b/collectors/python.d.plugin/anomalies/metadata.yaml index 7bcac646..d138cf5d 100644 --- a/collectors/python.d.plugin/anomalies/metadata.yaml +++ b/collectors/python.d.plugin/anomalies/metadata.yaml @@ -1,85 +1,87 @@ -meta: - plugin_name: python.d.plugin - module_name: anomalies - monitored_instance: - name: python.d anomalies - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: anomalies_anomaly_probabilities - link: https://github.com/netdata/netdata/blob/master/health/health.d/anomalies.conf - metric: anomalies.probability - info: average anomaly probability over the last 2 minutes -- name: anomalies_anomaly_flags - link: https://github.com/netdata/netdata/blob/master/health/health.d/anomalies.conf - metric: anomalies.anomaly - info: number of anomalies in the last 2 minutes -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: anomalies.probability - description: Anomaly Probability - unit: "probability" - chart_type: line - dimensions: - - name: a dimension per probability - - name: anomalies.anomaly - description: Anomaly - unit: "count" - chart_type: stacked - dimensions: - - name: a dimension per anomaly +# NOTE: this file is commented out as users are reccomended to use the +# native anomaly detection capabilities on the agent instead. +# meta: +# plugin_name: python.d.plugin +# module_name: anomalies +# monitored_instance: +# name: python.d anomalies +# link: "" +# categories: [] +# icon_filename: "" +# related_resources: +# integrations: +# list: [] +# info_provided_to_referring_integrations: +# description: "" +# keywords: [] +# most_popular: false +# overview: +# data_collection: +# metrics_description: "" +# method_description: "" +# supported_platforms: +# include: [] +# exclude: [] +# multi_instance: true +# additional_permissions: +# description: "" +# default_behavior: +# auto_detection: +# description: "" +# limits: +# description: "" +# performance_impact: +# description: "" +# setup: +# prerequisites: +# list: [] +# configuration: +# file: +# name: "" +# description: "" +# options: +# description: "" +# folding: +# title: "" +# enabled: true +# list: [] +# examples: +# folding: +# enabled: true +# title: "" +# list: [] +# troubleshooting: +# problems: +# list: [] +# alerts: +# - name: anomalies_anomaly_probabilities +# link: https://github.com/netdata/netdata/blob/master/health/health.d/anomalies.conf +# metric: anomalies.probability +# info: average anomaly probability over the last 2 minutes +# - name: anomalies_anomaly_flags +# link: https://github.com/netdata/netdata/blob/master/health/health.d/anomalies.conf +# metric: anomalies.anomaly +# info: number of anomalies in the last 2 minutes +# metrics: +# folding: +# title: Metrics +# enabled: false +# description: "" +# availability: [] +# scopes: +# - name: global +# description: "" +# labels: [] +# metrics: +# - name: anomalies.probability +# description: Anomaly Probability +# unit: "probability" +# chart_type: line +# dimensions: +# - name: a dimension per probability +# - name: anomalies.anomaly +# description: Anomaly +# unit: "count" +# chart_type: stacked +# dimensions: +# - name: a dimension per anomaly diff --git a/collectors/python.d.plugin/anomalies/metrics.csv b/collectors/python.d.plugin/anomalies/metrics.csv deleted file mode 100644 index 847d9d1d..00000000 --- a/collectors/python.d.plugin/anomalies/metrics.csv +++ /dev/null @@ -1,3 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -anomalies.probability,,a dimension per probability,probability,Anomaly Probability,line,,python.d.plugin,anomalies -anomalies.anomaly,,a dimension per anomaly,count,Anomaly,stacked,,python.d.plugin,anomalies diff --git a/collectors/python.d.plugin/beanstalk/metadata.yaml b/collectors/python.d.plugin/beanstalk/metadata.yaml index d8730bb8..b6ff2f11 100644 --- a/collectors/python.d.plugin/beanstalk/metadata.yaml +++ b/collectors/python.d.plugin/beanstalk/metadata.yaml @@ -1,194 +1,263 @@ -meta: - plugin_name: python.d.plugin - module_name: beanstalk - monitored_instance: - name: Beanstalk - link: '' - categories: - - data-collection.message-brokers - icon_filename: 'beanstalk.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Beanstalk metrics to enhance job queueing and processing efficiency. Track job rates, processing times, and queue lengths for better task management.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: beanstalk_server_buried_jobs - link: https://github.com/netdata/netdata/blob/master/health/health.d/beanstalkd.conf - metric: beanstalk.current_jobs - info: number of buried jobs across all tubes. You need to manually kick them so they can be processed. Presence of buried jobs in a tube does not affect new jobs. -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: beanstalk + monitored_instance: + name: Beanstalk + link: "https://beanstalkd.github.io/" + categories: + - data-collection.message-brokers + - data-collection.task-queues + icon_filename: "beanstalk.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - beanstalk + - beanstalkd + - message + most_popular: false + overview: + data_collection: + metrics_description: "Monitor Beanstalk metrics to enhance job queueing and processing efficiency. Track job rates, processing times, and queue lengths for better task management." + method_description: "The collector uses the `beanstalkc` python module to connect to a `beanstalkd` service and gather metrics." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "If no configuration is given, module will attempt to connect to beanstalkd on 127.0.0.1:11300 address." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: "beanstalkc python module" + description: The collector requires the `beanstalkc` python module to be installed. + configuration: + file: + name: python.d/beanstalk.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + - name: host + description: IP or URL to a beanstalk service. + default_value: "127.0.0.1" + required: false + - name: port + description: Port to the IP or URL to a beanstalk service. + default_value: "11300" + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Remote beanstalk server + description: A basic remote beanstalk server + folding: + enabled: false + config: | + remote: + name: 'beanstalk' + host: '1.2.3.4' + port: 11300 + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + localhost: + name: 'local_beanstalk' + host: '127.0.0.1' + port: 11300 + + remote_job: + name: 'remote_beanstalk' + host: '192.0.2.1' + port: 113000 + troubleshooting: + problems: + list: [] + alerts: + - name: beanstalk_server_buried_jobs + link: https://github.com/netdata/netdata/blob/master/health/health.d/beanstalkd.conf + metric: beanstalk.current_jobs + info: number of buried jobs across all tubes. You need to manually kick them so they can be processed. Presence of buried jobs in a tube does not affect new jobs. metrics: - - name: beanstalk.cpu_usage - description: Cpu Usage - unit: "cpu time" - chart_type: area - dimensions: - - name: user - - name: system - - name: beanstalk.jobs_rate - description: Jobs Rate - unit: "jobs/s" - chart_type: line - dimensions: - - name: total - - name: timeouts - - name: beanstalk.connections_rate - description: Connections Rate - unit: "connections/s" - chart_type: area - dimensions: - - name: connections - - name: beanstalk.commands_rate - description: Commands Rate - unit: "commands/s" - chart_type: stacked - dimensions: - - name: put - - name: peek - - name: peek-ready - - name: peek-delayed - - name: peek-buried - - name: reserve - - name: use - - name: watch - - name: ignore - - name: delete - - name: bury - - name: kick - - name: stats - - name: stats-job - - name: stats-tube - - name: list-tubes - - name: list-tube-used - - name: list-tubes-watched - - name: pause-tube - - name: beanstalk.connections_rate - description: Current Tubes - unit: "tubes" - chart_type: area - dimensions: - - name: tubes - - name: beanstalk.current_jobs - description: Current Jobs - unit: "jobs" - chart_type: stacked - dimensions: - - name: urgent - - name: ready - - name: reserved - - name: delayed - - name: buried - - name: beanstalk.current_connections - description: Current Connections - unit: "connections" - chart_type: line - dimensions: - - name: written - - name: producers - - name: workers - - name: waiting - - name: beanstalk.binlog - description: Binlog - unit: "records/s" - chart_type: line - dimensions: - - name: written - - name: migrated - - name: beanstalk.uptime - description: seconds - unit: "seconds" - chart_type: line - dimensions: - - name: uptime - - name: tube - description: "" - labels: [] - metrics: - - name: beanstalk.jobs_rate - description: Jobs Rate - unit: "jobs/s" - chart_type: area - dimensions: - - name: jobs - - name: beanstalk.jobs - description: Jobs - unit: "jobs" - chart_type: stacked - dimensions: - - name: urgent - - name: ready - - name: reserved - - name: delayed - - name: buried - - name: beanstalk.connections - description: Connections - unit: "connections" - chart_type: stacked - dimensions: - - name: using - - name: waiting - - name: watching - - name: beanstalk.commands - description: Commands - unit: "commands/s" - chart_type: stacked - dimensions: - - name: deletes - - name: pauses - - name: beanstalk.pause - description: Pause - unit: "seconds" - chart_type: stacked - dimensions: - - name: since - - name: left + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: beanstalk.cpu_usage + description: Cpu Usage + unit: "cpu time" + chart_type: area + dimensions: + - name: user + - name: system + - name: beanstalk.jobs_rate + description: Jobs Rate + unit: "jobs/s" + chart_type: line + dimensions: + - name: total + - name: timeouts + - name: beanstalk.connections_rate + description: Connections Rate + unit: "connections/s" + chart_type: area + dimensions: + - name: connections + - name: beanstalk.commands_rate + description: Commands Rate + unit: "commands/s" + chart_type: stacked + dimensions: + - name: put + - name: peek + - name: peek-ready + - name: peek-delayed + - name: peek-buried + - name: reserve + - name: use + - name: watch + - name: ignore + - name: delete + - name: bury + - name: kick + - name: stats + - name: stats-job + - name: stats-tube + - name: list-tubes + - name: list-tube-used + - name: list-tubes-watched + - name: pause-tube + - name: beanstalk.connections_rate + description: Current Tubes + unit: "tubes" + chart_type: area + dimensions: + - name: tubes + - name: beanstalk.current_jobs + description: Current Jobs + unit: "jobs" + chart_type: stacked + dimensions: + - name: urgent + - name: ready + - name: reserved + - name: delayed + - name: buried + - name: beanstalk.current_connections + description: Current Connections + unit: "connections" + chart_type: line + dimensions: + - name: written + - name: producers + - name: workers + - name: waiting + - name: beanstalk.binlog + description: Binlog + unit: "records/s" + chart_type: line + dimensions: + - name: written + - name: migrated + - name: beanstalk.uptime + description: seconds + unit: "seconds" + chart_type: line + dimensions: + - name: uptime + - name: tube + description: "Metrics related to Beanstalk tubes. Each tube produces its own set of the following metrics." + labels: [] + metrics: + - name: beanstalk.jobs_rate + description: Jobs Rate + unit: "jobs/s" + chart_type: area + dimensions: + - name: jobs + - name: beanstalk.jobs + description: Jobs + unit: "jobs" + chart_type: stacked + dimensions: + - name: urgent + - name: ready + - name: reserved + - name: delayed + - name: buried + - name: beanstalk.connections + description: Connections + unit: "connections" + chart_type: stacked + dimensions: + - name: using + - name: waiting + - name: watching + - name: beanstalk.commands + description: Commands + unit: "commands/s" + chart_type: stacked + dimensions: + - name: deletes + - name: pauses + - name: beanstalk.pause + description: Pause + unit: "seconds" + chart_type: stacked + dimensions: + - name: since + - name: left diff --git a/collectors/python.d.plugin/beanstalk/metrics.csv b/collectors/python.d.plugin/beanstalk/metrics.csv deleted file mode 100644 index fe0219d1..00000000 --- a/collectors/python.d.plugin/beanstalk/metrics.csv +++ /dev/null @@ -1,15 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -beanstalk.cpu_usage,,"user, system",cpu time,Cpu Usage,area,,python.d.plugin,beanstalk -beanstalk.jobs_rate,,"total, timeouts",jobs/s,Jobs Rate,line,,python.d.plugin,beanstalk -beanstalk.connections_rate,,connections,connections/s,Connections Rate,area,,python.d.plugin,beanstalk -beanstalk.commands_rate,,"put, peek, peek-ready, peek-delayed, peek-buried, reserve, use, watch, ignore, delete, bury, kick, stats, stats-job, stats-tube, list-tubes, list-tube-used, list-tubes-watched, pause-tube",commands/s,Commands Rate,stacked,,python.d.plugin,beanstalk -beanstalk.connections_rate,,tubes,tubes,Current Tubes,area,,python.d.plugin,beanstalk -beanstalk.current_jobs,,"urgent, ready, reserved, delayed, buried",jobs,Current Jobs,stacked,,python.d.plugin,beanstalk -beanstalk.current_connections,,"written, producers, workers, waiting",connections,Current Connections,line,,python.d.plugin,beanstalk -beanstalk.binlog,,"written, migrated",records/s,Binlog,line,,python.d.plugin,beanstalk -beanstalk.uptime,,uptime,seconds,seconds,line,,python.d.plugin,beanstalk -beanstalk.jobs_rate,tube,jobs,jobs/s,Jobs Rate,area,,python.d.plugin,beanstalk -beanstalk.jobs,tube,"urgent, ready, reserved, delayed, buried",jobs,Jobs,stacked,,python.d.plugin,beanstalk -beanstalk.connections,tube,"using, waiting, watching",connections,Connections,stacked,,python.d.plugin,beanstalk -beanstalk.commands,tube,"deletes, pauses",commands/s,Commands,stacked,,python.d.plugin,beanstalk -beanstalk.pause,tube,"since, left",seconds,Pause,stacked,,python.d.plugin,beanstalk diff --git a/collectors/python.d.plugin/bind_rndc/bind_rndc.conf b/collectors/python.d.plugin/bind_rndc/bind_rndc.conf index 3b7e9a21..84eaf059 100644 --- a/collectors/python.d.plugin/bind_rndc/bind_rndc.conf +++ b/collectors/python.d.plugin/bind_rndc/bind_rndc.conf @@ -63,14 +63,14 @@ # # named_stats_path: 'path to named.stats' # Default: '/var/log/bind/named.stats' #------------------------------------------------------------------------------------------------------------------ -# IMPORTANT Information +# Important Information # -# BIND APPEND logs at EVERY RUN. Its NOT RECOMMENDED to set update_every below 30 sec. -# STRONGLY RECOMMENDED to create a bind-rndc conf file for logrotate +# BIND appends logs at EVERY RUN. It is NOT RECOMMENDED to set update_every below 30 sec. +# It is STRONGLY RECOMMENDED to create a bind-rndc.conf file for logrotate. # # To set up your BIND to dump stats do the following: # -# 1. add to 'named.conf.options' options {}: +# 1. Add to 'named.conf.options' options {}: # statistics-file "/var/log/bind/named.stats"; # # 2. Create bind/ directory in /var/log @@ -83,13 +83,12 @@ # systemctl reload bind9.service # # 5. Run as a root 'rndc stats' to dump (BIND will create named.stats in new directory) -# # -# To ALLOW NETDATA TO RUN 'rndc stats' change '/etc/bind/rndc.key' group to netdata +# To allow Netdata to run 'rndc stats' change '/etc/bind/rndc.key' group to netdata # chown :netdata rndc.key # -# The last BUT NOT least is to create bind-rndc.conf in logrotate.d/ -# The working one +# Last, BUT NOT least, is to create bind-rndc.conf in logrotate.d/: +# # /var/log/bind/named.stats { # # daily @@ -104,7 +103,6 @@ # } # # To test your logrotate conf file run as root: -# # logrotate /etc/logrotate.d/bind-rndc -d (debug dry-run mode) # # ---------------------------------------------------------------------- diff --git a/collectors/python.d.plugin/bind_rndc/metadata.yaml b/collectors/python.d.plugin/bind_rndc/metadata.yaml index 72c3acff..1e9fb24f 100644 --- a/collectors/python.d.plugin/bind_rndc/metadata.yaml +++ b/collectors/python.d.plugin/bind_rndc/metadata.yaml @@ -1,105 +1,191 @@ -meta: - plugin_name: python.d.plugin - module_name: bind_rndc - monitored_instance: - name: ISCBind (RNDC) - link: '' - categories: - - data-collection.dns-and-dhcp-servers - icon_filename: 'isc.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor ISCBind (RNDC) performance for optimal DNS server operations. Monitor query rates, response times, and error rates to ensure reliable DNS service delivery.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: bind_rndc_stats_file_size - link: https://github.com/netdata/netdata/blob/master/health/health.d/bind_rndc.conf - metric: bind_rndc.stats_size - info: BIND statistics-file size -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: bind_rndc + monitored_instance: + name: ISCBind (RNDC) + link: "https://www.isc.org/bind/" + categories: + - data-collection.dns-and-dhcp-servers + icon_filename: "isc.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - dns + - bind + - server + most_popular: false + overview: + data_collection: + metrics_description: "Monitor ISCBind (RNDC) performance for optimal DNS server operations. Monitor query rates, response times, and error rates to ensure reliable DNS service delivery." + method_description: "This collector uses the `rndc` tool to dump (named.stats) statistics then read them to gather Bind Name Server summary performance metrics." + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "If no configuration is given, the collector will attempt to read named.stats file at `/var/log/bind/named.stats`" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: "Minimum bind version and permissions" + description: "Version of bind must be >=9.6 and the Netdata user must have permissions to run `rndc stats`" + - title: "Setup log rotate for bind stats" + description: | + BIND appends logs at EVERY RUN. It is NOT RECOMMENDED to set `update_every` below 30 sec. + It is STRONGLY RECOMMENDED to create a `bind-rndc.conf` file for logrotate. + + To set up BIND to dump stats do the following: + + 1. Add to 'named.conf.options' options {}: + `statistics-file "/var/log/bind/named.stats";` + + 2. Create bind/ directory in /var/log: + `cd /var/log/ && mkdir bind` + + 3. Change owner of directory to 'bind' user: + `chown bind bind/` + + 4. RELOAD (NOT restart) BIND: + `systemctl reload bind9.service` + + 5. Run as a root 'rndc stats' to dump (BIND will create named.stats in new directory) + + To allow Netdata to run 'rndc stats' change '/etc/bind/rndc.key' group to netdata: + `chown :netdata rndc.key` + + Last, BUT NOT least, is to create bind-rndc.conf in logrotate.d/: + ``` + /var/log/bind/named.stats { + + daily + rotate 4 + compress + delaycompress + create 0644 bind bind + missingok + postrotate + rndc reload > /dev/null + endscript + } + ``` + To test your logrotate conf file run as root: + `logrotate /etc/logrotate.d/bind-rndc -d (debug dry-run mode)` + configuration: + file: + name: python.d/bind_rndc.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + - name: named_stats_path + description: Path to the named stats, after being dumped by `nrdc` + default_value: "/var/log/bind/named.stats" + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Local bind stats + description: Define a local path to bind stats file + config: | + local: + named_stats_path: '/var/log/bind/named.stats' + troubleshooting: + problems: + list: [] + alerts: + - name: bind_rndc_stats_file_size + link: https://github.com/netdata/netdata/blob/master/health/health.d/bind_rndc.conf + metric: bind_rndc.stats_size + info: BIND statistics-file size metrics: - - name: bind_rndc.name_server_statistics - description: Name Server Statistics - unit: "stats" - chart_type: line - dimensions: - - name: requests - - name: rejected_queries - - name: success - - name: failure - - name: responses - - name: duplicate - - name: recursion - - name: nxrrset - - name: nxdomain - - name: non_auth_answer - - name: auth_answer - - name: dropped_queries - - name: bind_rndc.incoming_queries - description: Incoming queries - unit: "queries" - chart_type: line - dimensions: - - name: a dimension per incoming query type - - name: bind_rndc.outgoing_queries - description: Outgoing queries - unit: "queries" - chart_type: line - dimensions: - - name: a dimension per outgoing query type - - name: bind_rndc.stats_size - description: Named Stats File Size - unit: "MiB" - chart_type: line - dimensions: - - name: stats_size + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: bind_rndc.name_server_statistics + description: Name Server Statistics + unit: "stats" + chart_type: line + dimensions: + - name: requests + - name: rejected_queries + - name: success + - name: failure + - name: responses + - name: duplicate + - name: recursion + - name: nxrrset + - name: nxdomain + - name: non_auth_answer + - name: auth_answer + - name: dropped_queries + - name: bind_rndc.incoming_queries + description: Incoming queries + unit: "queries" + chart_type: line + dimensions: + - name: a dimension per incoming query type + - name: bind_rndc.outgoing_queries + description: Outgoing queries + unit: "queries" + chart_type: line + dimensions: + - name: a dimension per outgoing query type + - name: bind_rndc.stats_size + description: Named Stats File Size + unit: "MiB" + chart_type: line + dimensions: + - name: stats_size diff --git a/collectors/python.d.plugin/bind_rndc/metrics.csv b/collectors/python.d.plugin/bind_rndc/metrics.csv deleted file mode 100644 index 3b073309..00000000 --- a/collectors/python.d.plugin/bind_rndc/metrics.csv +++ /dev/null @@ -1,5 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -bind_rndc.name_server_statistics,,"requests, rejected_queries, success, failure, responses, duplicate, recursion, nxrrset, nxdomain, non_auth_answer, auth_answer, dropped_queries",stats,Name Server Statistics,line,,python.d.plugin,bind_rndc -bind_rndc.incoming_queries,,a dimension per incoming query type,queries,Incoming queries,line,,python.d.plugin,bind_rndc -bind_rndc.outgoing_queries,,a dimension per outgoing query type,queries,Outgoing queries,line,,python.d.plugin,bind_rndc -bind_rndc.stats_size,,stats_size,MiB,Named Stats File Size,line,,python.d.plugin,bind_rndc diff --git a/collectors/python.d.plugin/boinc/metadata.yaml b/collectors/python.d.plugin/boinc/metadata.yaml index a0a7bb32..33a67ac3 100644 --- a/collectors/python.d.plugin/boinc/metadata.yaml +++ b/collectors/python.d.plugin/boinc/metadata.yaml @@ -1,125 +1,198 @@ -meta: - plugin_name: python.d.plugin - module_name: boinc - monitored_instance: - name: BOINC - link: '' - categories: - - data-collection.distributed-computing-systems - icon_filename: 'bolt.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Study BOINC metrics to gain insights into volunteer computing projects. Examine computation times, task completion rates, and project statuses to enhance volunteer computing efforts.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: boinc_total_tasks - link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf - metric: boinc.tasks - info: average number of total tasks over the last 10 minutes - os: "*" -- name: boinc_active_tasks - link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf - metric: boinc.tasks - info: average number of active tasks over the last 10 minutes - os: "*" -- name: boinc_compute_errors - link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf - metric: boinc.states - info: average number of compute errors over the last 10 minutes - os: "*" -- name: boinc_upload_errors - link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf - metric: boinc.states - info: average number of failed uploads over the last 10 minutes - os: "*" -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: boinc + monitored_instance: + name: BOINC + link: "https://boinc.berkeley.edu/" + categories: + - data-collection.distributed-computing-systems + icon_filename: "bolt.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - boinc + - distributed + most_popular: false + overview: + data_collection: + metrics_description: "This collector monitors task counts for the Berkeley Open Infrastructure Networking Computing (BOINC) distributed computing client." + method_description: "It uses the same RPC interface that the BOINC monitoring GUI does." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "By default, the module will try to auto-detect the password to the RPC interface by looking in `/var/lib/boinc` for this file (this is the location most Linux distributions use for a system-wide BOINC installation), so things may just work without needing configuration for a local system." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: "Boinc RPC interface" + description: BOINC requires use of a password to access it's RPC interface. You can find this password in the `gui_rpc_auth.cfg` file in your BOINC directory. + configuration: + file: + name: python.d/boinc.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + - name: hostname + description: Define a hostname where boinc is running. + default_value: "localhost" + required: false + - name: port + description: The port of boinc RPC interface. + default_value: "" + required: false + - name: password + description: Provide a password to connect to a boinc RPC interface. + default_value: "" + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Configuration of a remote boinc instance + description: A basic JOB configuration for a remote boinc instance + folding: + enabled: false + config: | + remote: + hostname: '1.2.3.4' + port: 1234 + password: 'some-password' + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + localhost: + name: 'local' + host: '127.0.0.1' + port: 1234 + password: 'some-password' + + remote_job: + name: 'remote' + host: '192.0.2.1' + port: 1234 + password: some-other-password + troubleshooting: + problems: + list: [] + alerts: + - name: boinc_total_tasks + link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf + metric: boinc.tasks + info: average number of total tasks over the last 10 minutes + os: "*" + - name: boinc_active_tasks + link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf + metric: boinc.tasks + info: average number of active tasks over the last 10 minutes + os: "*" + - name: boinc_compute_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf + metric: boinc.states + info: average number of compute errors over the last 10 minutes + os: "*" + - name: boinc_upload_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf + metric: boinc.states + info: average number of failed uploads over the last 10 minutes + os: "*" metrics: - - name: boinc.tasks - description: Overall Tasks - unit: "tasks" - chart_type: line - dimensions: - - name: Total - - name: Active - - name: boinc.states - description: Tasks per State - unit: "tasks" - chart_type: line - dimensions: - - name: New - - name: Downloading - - name: Ready to Run - - name: Compute Errors - - name: Uploading - - name: Uploaded - - name: Aborted - - name: Failed Uploads - - name: boinc.sched - description: Tasks per Scheduler State - unit: "tasks" - chart_type: line - dimensions: - - name: Uninitialized - - name: Preempted - - name: Scheduled - - name: boinc.process - description: Tasks per Process State - unit: "tasks" - chart_type: line - dimensions: - - name: Uninitialized - - name: Executing - - name: Suspended - - name: Aborted - - name: Quit - - name: Copy Pending + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: boinc.tasks + description: Overall Tasks + unit: "tasks" + chart_type: line + dimensions: + - name: Total + - name: Active + - name: boinc.states + description: Tasks per State + unit: "tasks" + chart_type: line + dimensions: + - name: New + - name: Downloading + - name: Ready to Run + - name: Compute Errors + - name: Uploading + - name: Uploaded + - name: Aborted + - name: Failed Uploads + - name: boinc.sched + description: Tasks per Scheduler State + unit: "tasks" + chart_type: line + dimensions: + - name: Uninitialized + - name: Preempted + - name: Scheduled + - name: boinc.process + description: Tasks per Process State + unit: "tasks" + chart_type: line + dimensions: + - name: Uninitialized + - name: Executing + - name: Suspended + - name: Aborted + - name: Quit + - name: Copy Pending diff --git a/collectors/python.d.plugin/boinc/metrics.csv b/collectors/python.d.plugin/boinc/metrics.csv deleted file mode 100644 index 98c6e866..00000000 --- a/collectors/python.d.plugin/boinc/metrics.csv +++ /dev/null @@ -1,5 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -boinc.tasks,,"Total, Active",tasks,Overall Tasks,line,,python.d.plugin,boinc -boinc.states,,"New, Downloading, Ready to Run, Compute Errors, Uploading, Uploaded, Aborted, Failed Uploads",tasks,Tasks per State,line,,python.d.plugin,boinc -boinc.sched,,"Uninitialized, Preempted, Scheduled",tasks,Tasks per Scheduler State,line,,python.d.plugin,boinc -boinc.process,,"Uninitialized, Executing, Suspended, Aborted, Quit, Copy Pending",tasks,Tasks per Process State,line,,python.d.plugin,boinc diff --git a/collectors/python.d.plugin/ceph/metadata.yaml b/collectors/python.d.plugin/ceph/metadata.yaml index eabf6b26..0f06470b 100644 --- a/collectors/python.d.plugin/ceph/metadata.yaml +++ b/collectors/python.d.plugin/ceph/metadata.yaml @@ -1,164 +1,223 @@ -meta: - plugin_name: python.d.plugin - module_name: ceph - monitored_instance: - name: Ceph - link: '' - categories: - - data-collection.storage-mount-points-and-filesystems - icon_filename: 'ceph.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Ceph metrics for efficient distributed storage system performance. Keep tabs on cluster health, data redundancy, and latency to ensure reliable storage operations.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: ceph_cluster_space_usage - link: https://github.com/netdata/netdata/blob/master/health/health.d/ceph.conf - metric: ceph.general_usage - info: cluster disk space utilization -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: ceph + monitored_instance: + name: Ceph + link: 'https://ceph.io/' + categories: + - data-collection.storage-mount-points-and-filesystems + icon_filename: 'ceph.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - ceph + - storage + most_popular: false + overview: + data_collection: + metrics_description: 'This collector monitors Ceph metrics about Cluster statistics, OSD usage, latency and Pool statistics.' + method_description: 'Uses the `rados` python module to connect to a Ceph cluster.' + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: + - title: '`rados` python module' + description: 'Make sure the `rados` python module is installed' + - title: 'Granting read permissions to ceph group from keyring file' + description: 'Execute: `chmod 640 /etc/ceph/ceph.client.admin.keyring`' + - title: 'Create a specific rados_id' + description: 'You can optionally create a rados_id to use instead of admin' + configuration: + file: + name: python.d/ceph.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: '' + required: false + - name: config_file + description: Ceph config file + default_value: '' + required: true + - name: keyring_file + description: Ceph keyring file. netdata user must be added into ceph group and keyring file must be read group permission. + default_value: '' + required: true + - name: rados_id + description: A rados user id to use for connecting to the Ceph cluster. + default_value: 'admin' + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic local Ceph cluster + description: A basic configuration to connect to a local Ceph cluster. + folding: + enabled: false + config: | + local: + config_file: '/etc/ceph/ceph.conf' + keyring_file: '/etc/ceph/ceph.client.admin.keyring' + troubleshooting: + problems: + list: [] + alerts: + - name: ceph_cluster_space_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/ceph.conf + metric: ceph.general_usage + info: cluster disk space utilization metrics: - - name: ceph.general_usage - description: Ceph General Space - unit: "KiB" - chart_type: stacked - dimensions: - - name: avail - - name: used - - name: ceph.general_objects - description: Ceph General Objects - unit: "objects" - chart_type: area - dimensions: - - name: cluster - - name: ceph.general_bytes - description: Ceph General Read/Write Data/s - unit: "KiB/s" - chart_type: area - dimensions: - - name: read - - name: write - - name: ceph.general_operations - description: Ceph General Read/Write Operations/s - unit: "operations" - chart_type: area - dimensions: - - name: read - - name: write - - name: ceph.general_latency - description: Ceph General Apply/Commit latency - unit: "milliseconds" - chart_type: area - dimensions: - - name: apply - - name: commit - - name: ceph.pool_usage - description: Ceph Pools - unit: "KiB" - chart_type: line - dimensions: - - name: a dimension per Ceph Pool - - name: ceph.pool_objects - description: Ceph Pools - unit: "objects" - chart_type: line - dimensions: - - name: a dimension per Ceph Pool - - name: ceph.pool_read_bytes - description: Ceph Read Pool Data/s - unit: "KiB/s" - chart_type: area - dimensions: - - name: a dimension per Ceph Pool - - name: ceph.pool_write_bytes - description: Ceph Write Pool Data/s - unit: "KiB/s" - chart_type: area - dimensions: - - name: a dimension per Ceph Pool - - name: ceph.pool_read_operations - description: Ceph Read Pool Operations/s - unit: "operations" - chart_type: area - dimensions: - - name: a dimension per Ceph Pool - - name: ceph.pool_write_operations - description: Ceph Write Pool Operations/s - unit: "operations" - chart_type: area - dimensions: - - name: a dimension per Ceph Pool - - name: ceph.osd_usage - description: Ceph OSDs - unit: "KiB" - chart_type: line - dimensions: - - name: a dimension per Ceph OSD - - name: ceph.osd_size - description: Ceph OSDs size - unit: "KiB" - chart_type: line - dimensions: - - name: a dimension per Ceph OSD - - name: ceph.apply_latency - description: Ceph OSDs apply latency - unit: "milliseconds" - chart_type: line - dimensions: - - name: a dimension per Ceph OSD - - name: ceph.commit_latency - description: Ceph OSDs commit latency - unit: "milliseconds" - chart_type: line - dimensions: - - name: a dimension per Ceph OSD + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: ceph.general_usage + description: Ceph General Space + unit: "KiB" + chart_type: stacked + dimensions: + - name: avail + - name: used + - name: ceph.general_objects + description: Ceph General Objects + unit: "objects" + chart_type: area + dimensions: + - name: cluster + - name: ceph.general_bytes + description: Ceph General Read/Write Data/s + unit: "KiB/s" + chart_type: area + dimensions: + - name: read + - name: write + - name: ceph.general_operations + description: Ceph General Read/Write Operations/s + unit: "operations" + chart_type: area + dimensions: + - name: read + - name: write + - name: ceph.general_latency + description: Ceph General Apply/Commit latency + unit: "milliseconds" + chart_type: area + dimensions: + - name: apply + - name: commit + - name: ceph.pool_usage + description: Ceph Pools + unit: "KiB" + chart_type: line + dimensions: + - name: a dimension per Ceph Pool + - name: ceph.pool_objects + description: Ceph Pools + unit: "objects" + chart_type: line + dimensions: + - name: a dimension per Ceph Pool + - name: ceph.pool_read_bytes + description: Ceph Read Pool Data/s + unit: "KiB/s" + chart_type: area + dimensions: + - name: a dimension per Ceph Pool + - name: ceph.pool_write_bytes + description: Ceph Write Pool Data/s + unit: "KiB/s" + chart_type: area + dimensions: + - name: a dimension per Ceph Pool + - name: ceph.pool_read_operations + description: Ceph Read Pool Operations/s + unit: "operations" + chart_type: area + dimensions: + - name: a dimension per Ceph Pool + - name: ceph.pool_write_operations + description: Ceph Write Pool Operations/s + unit: "operations" + chart_type: area + dimensions: + - name: a dimension per Ceph Pool + - name: ceph.osd_usage + description: Ceph OSDs + unit: "KiB" + chart_type: line + dimensions: + - name: a dimension per Ceph OSD + - name: ceph.osd_size + description: Ceph OSDs size + unit: "KiB" + chart_type: line + dimensions: + - name: a dimension per Ceph OSD + - name: ceph.apply_latency + description: Ceph OSDs apply latency + unit: "milliseconds" + chart_type: line + dimensions: + - name: a dimension per Ceph OSD + - name: ceph.commit_latency + description: Ceph OSDs commit latency + unit: "milliseconds" + chart_type: line + dimensions: + - name: a dimension per Ceph OSD diff --git a/collectors/python.d.plugin/ceph/metrics.csv b/collectors/python.d.plugin/ceph/metrics.csv deleted file mode 100644 index e64f2cf5..00000000 --- a/collectors/python.d.plugin/ceph/metrics.csv +++ /dev/null @@ -1,16 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -ceph.general_usage,,"avail, used",KiB,Ceph General Space,stacked,,python.d.plugin,ceph -ceph.general_objects,,cluster,objects,Ceph General Objects,area,,python.d.plugin,ceph -ceph.general_bytes,,"read, write",KiB/s,Ceph General Read/Write Data/s,area,,python.d.plugin,ceph -ceph.general_operations,,"read, write",operations,Ceph General Read/Write Operations/s,area,,python.d.plugin,ceph -ceph.general_latency,,"apply, commit",milliseconds,Ceph General Apply/Commit latency,area,,python.d.plugin,ceph -ceph.pool_usage,,a dimension per Ceph Pool,KiB,Ceph Pools,line,,python.d.plugin,ceph -ceph.pool_objects,,a dimension per Ceph Pool,objects,Ceph Pools,line,,python.d.plugin,ceph -ceph.pool_read_bytes,,a dimension per Ceph Pool,KiB/s,Ceph Read Pool Data/s,area,,python.d.plugin,ceph -ceph.pool_write_bytes,,a dimension per Ceph Pool,KiB/s,Ceph Write Pool Data/s,area,,python.d.plugin,ceph -ceph.pool_read_operations,,a dimension per Ceph Pool,operations,Ceph Read Pool Operations/s,area,,python.d.plugin,ceph -ceph.pool_write_operations,,a dimension per Ceph Pool,operations,Ceph Write Pool Operations/s,area,,python.d.plugin,ceph -ceph.osd_usage,,a dimension per Ceph OSD,KiB,Ceph OSDs,line,,python.d.plugin,ceph -ceph.osd_size,,a dimension per Ceph OSD,KiB,Ceph OSDs size,line,,python.d.plugin,ceph -ceph.apply_latency,,a dimension per Ceph OSD,milliseconds,Ceph OSDs apply latency,line,,python.d.plugin,ceph -ceph.commit_latency,,a dimension per Ceph OSD,milliseconds,Ceph OSDs commit latency,line,,python.d.plugin,ceph diff --git a/collectors/python.d.plugin/changefinder/metadata.yaml b/collectors/python.d.plugin/changefinder/metadata.yaml index 99b28e48..6dcd903e 100644 --- a/collectors/python.d.plugin/changefinder/metadata.yaml +++ b/collectors/python.d.plugin/changefinder/metadata.yaml @@ -1,77 +1,80 @@ -meta: - plugin_name: python.d.plugin - module_name: changefinder - monitored_instance: - name: python.d changefinder - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: changefinder + monitored_instance: + name: python.d changefinder + link: '' + categories: + - data-collection.other + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: changefinder.scores - description: ChangeFinder - unit: "score" - chart_type: line - dimensions: - - name: a dimension per chart - - name: changefinder.flags - description: ChangeFinder - unit: "flag" - chart_type: stacked - dimensions: - - name: a dimension per chart + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: changefinder.scores + description: ChangeFinder + unit: "score" + chart_type: line + dimensions: + - name: a dimension per chart + - name: changefinder.flags + description: ChangeFinder + unit: "flag" + chart_type: stacked + dimensions: + - name: a dimension per chart diff --git a/collectors/python.d.plugin/changefinder/metrics.csv b/collectors/python.d.plugin/changefinder/metrics.csv deleted file mode 100644 index ecad582b..00000000 --- a/collectors/python.d.plugin/changefinder/metrics.csv +++ /dev/null @@ -1,3 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -changefinder.scores,,a dimension per chart,score,ChangeFinder,line,,python.d.plugin,changefinder -changefinder.flags,,a dimension per chart,flag,ChangeFinder,stacked,,python.d.plugin,changefinder diff --git a/collectors/python.d.plugin/dovecot/metadata.yaml b/collectors/python.d.plugin/dovecot/metadata.yaml index 35d820ef..b247da84 100644 --- a/collectors/python.d.plugin/dovecot/metadata.yaml +++ b/collectors/python.d.plugin/dovecot/metadata.yaml @@ -1,146 +1,207 @@ -meta: - plugin_name: python.d.plugin - module_name: dovecot - monitored_instance: - name: Dovecot - link: '' - categories: - - data-collection.mail-servers - icon_filename: 'dovecot.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Examine Dovecot metrics for insights into IMAP and POP3 server operations. Analyze connection statuses, mailbox operations, and error rates for efficient mail server operations.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: dovecot + monitored_instance: + name: Dovecot + link: 'https://www.dovecot.org/' + categories: + - data-collection.mail-servers + icon_filename: 'dovecot.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - dovecot + - imap + - mail + most_popular: false + overview: + data_collection: + metrics_description: 'This collector monitors Dovecot metrics about sessions, logins, commands, page faults and more.' + method_description: 'It uses the dovecot socket and executes the `EXPORT global` command to get the statistics.' + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: 'If no configuration is given, the collector will attempt to connect to dovecot using unix socket localized in `/var/run/dovecot/stats`' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: + - title: 'Dovecot configuration' + description: The Dovecot UNIX socket should have R/W permissions for user netdata, or Dovecot should be configured with a TCP/IP socket. + configuration: + file: + name: python.d/dovecot.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: '' + required: false + - name: socket + description: Use this socket to communicate with Devcot + default_value: /var/run/dovecot/stats + required: false + - name: host + description: Instead of using a socket, you can point the collector to an ip for devcot statistics. + default_value: '' + required: false + - name: port + description: Used in combination with host, configures the port devcot listens to. + default_value: '' + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Local TCP + description: A basic TCP configuration. + config: | + localtcpip: + name: 'local' + host: '127.0.0.1' + port: 24242 + - name: Local socket + description: A basic local socket configuration + config: | + localsocket: + name: 'local' + socket: '/var/run/dovecot/stats' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: dovecot.sessions - description: Dovecot Active Sessions - unit: "number" - chart_type: line - dimensions: - - name: active sessions - - name: dovecot.logins - description: Dovecot Logins - unit: "number" - chart_type: line - dimensions: - - name: logins - - name: dovecot.commands - description: Dovecot Commands - unit: "commands" - chart_type: line - dimensions: - - name: commands - - name: dovecot.faults - description: Dovecot Page Faults - unit: "faults" - chart_type: line - dimensions: - - name: minor - - name: major - - name: dovecot.context_switches - description: Dovecot Context Switches - unit: "switches" - chart_type: line - dimensions: - - name: voluntary - - name: involuntary - - name: dovecot.io - description: Dovecot Disk I/O - unit: "KiB/s" - chart_type: area - dimensions: - - name: read - - name: write - - name: dovecot.net - description: Dovecot Network Bandwidth - unit: "kilobits/s" - chart_type: area - dimensions: - - name: read - - name: write - - name: dovecot.syscalls - description: Dovecot Number of SysCalls - unit: "syscalls/s" - chart_type: line - dimensions: - - name: read - - name: write - - name: dovecot.lookup - description: Dovecot Lookups - unit: "number/s" - chart_type: stacked - dimensions: - - name: path - - name: attr - - name: dovecot.cache - description: Dovecot Cache Hits - unit: "hits/s" - chart_type: line - dimensions: - - name: hits - - name: dovecot.auth - description: Dovecot Authentications - unit: "attempts" - chart_type: stacked - dimensions: - - name: ok - - name: failed - - name: dovecot.auth_cache - description: Dovecot Authentication Cache - unit: "number" - chart_type: stacked - dimensions: - - name: hit - - name: miss + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: dovecot.sessions + description: Dovecot Active Sessions + unit: "number" + chart_type: line + dimensions: + - name: active sessions + - name: dovecot.logins + description: Dovecot Logins + unit: "number" + chart_type: line + dimensions: + - name: logins + - name: dovecot.commands + description: Dovecot Commands + unit: "commands" + chart_type: line + dimensions: + - name: commands + - name: dovecot.faults + description: Dovecot Page Faults + unit: "faults" + chart_type: line + dimensions: + - name: minor + - name: major + - name: dovecot.context_switches + description: Dovecot Context Switches + unit: "switches" + chart_type: line + dimensions: + - name: voluntary + - name: involuntary + - name: dovecot.io + description: Dovecot Disk I/O + unit: "KiB/s" + chart_type: area + dimensions: + - name: read + - name: write + - name: dovecot.net + description: Dovecot Network Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: read + - name: write + - name: dovecot.syscalls + description: Dovecot Number of SysCalls + unit: "syscalls/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: dovecot.lookup + description: Dovecot Lookups + unit: "number/s" + chart_type: stacked + dimensions: + - name: path + - name: attr + - name: dovecot.cache + description: Dovecot Cache Hits + unit: "hits/s" + chart_type: line + dimensions: + - name: hits + - name: dovecot.auth + description: Dovecot Authentications + unit: "attempts" + chart_type: stacked + dimensions: + - name: ok + - name: failed + - name: dovecot.auth_cache + description: Dovecot Authentication Cache + unit: "number" + chart_type: stacked + dimensions: + - name: hit + - name: miss diff --git a/collectors/python.d.plugin/dovecot/metrics.csv b/collectors/python.d.plugin/dovecot/metrics.csv deleted file mode 100644 index dbffd0b3..00000000 --- a/collectors/python.d.plugin/dovecot/metrics.csv +++ /dev/null @@ -1,13 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -dovecot.sessions,,active sessions,number,Dovecot Active Sessions,line,,python.d.plugin,dovecot -dovecot.logins,,logins,number,Dovecot Logins,line,,python.d.plugin,dovecot -dovecot.commands,,commands,commands,Dovecot Commands,line,,python.d.plugin,dovecot -dovecot.faults,,"minor, major",faults,Dovecot Page Faults,line,,python.d.plugin,dovecot -dovecot.context_switches,,"voluntary, involuntary",switches,Dovecot Context Switches,line,,python.d.plugin,dovecot -dovecot.io,,"read, write",KiB/s,Dovecot Disk I/O,area,,python.d.plugin,dovecot -dovecot.net,,"read, write",kilobits/s,Dovecot Network Bandwidth,area,,python.d.plugin,dovecot -dovecot.syscalls,,"read, write",syscalls/s,Dovecot Number of SysCalls,line,,python.d.plugin,dovecot -dovecot.lookup,,"path, attr",number/s,Dovecot Lookups,stacked,,python.d.plugin,dovecot -dovecot.cache,,hits,hits/s,Dovecot Cache Hits,line,,python.d.plugin,dovecot -dovecot.auth,,"ok, failed",attempts,Dovecot Authentications,stacked,,python.d.plugin,dovecot -dovecot.auth_cache,,"hit, miss",number,Dovecot Authentication Cache,stacked,,python.d.plugin,dovecot diff --git a/collectors/python.d.plugin/example/metadata.yaml b/collectors/python.d.plugin/example/metadata.yaml new file mode 100644 index 00000000..eae84d9e --- /dev/null +++ b/collectors/python.d.plugin/example/metadata.yaml @@ -0,0 +1,138 @@ +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: example + monitored_instance: + name: Example collector + link: https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/example/README.md + categories: + - data-collection.other + icon_filename: "" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - example + - netdata + - python + most_popular: false + overview: + data_collection: + metrics_description: | + Example collector that generates some random numbers as metrics. + + If you want to write your own collector, read our [writing a new Python module](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/README.md#how-to-write-a-new-module) tutorial. + method_description: | + The `get_data()` function uses `random.randint()` to generate a random number which will be collected as a metric. + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: python.d/example.conf + description: "" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: Config options + enabled: true + list: + - name: num_lines + description: The number of lines to create. + default_value: 4 + required: false + - name: lower + description: The lower bound of numbers to randomly sample from. + default_value: 0 + required: false + - name: upper + description: The upper bound of numbers to randomly sample from. + default_value: 100 + required: false + - name: update_every + description: Sets the default data collection frequency. + default_value: 1 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + examples: + folding: + enabled: true + title: Config + list: + - name: Basic + folding: + enabled: false + description: A basic example configuration. + config: | + four_lines: + name: "Four Lines" + update_every: 1 + priority: 60000 + penalty: yes + autodetection_retry: 0 + num_lines: 4 + lower: 0 + upper: 100 + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: | + These metrics refer to the entire monitored application. + labels: [] + metrics: + - name: example.random + description: A random number + unit: number + chart_type: line + dimensions: + - name: random diff --git a/collectors/python.d.plugin/exim/metadata.yaml b/collectors/python.d.plugin/exim/metadata.yaml index 092479a0..a8be02d9 100644 --- a/collectors/python.d.plugin/exim/metadata.yaml +++ b/collectors/python.d.plugin/exim/metadata.yaml @@ -1,72 +1,132 @@ -meta: - plugin_name: python.d.plugin - module_name: exim - monitored_instance: - name: Exim - link: '' - categories: - - data-collection.mail-servers - icon_filename: 'exim.jpg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Exim metrics for efficient mail transfer' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: exim + monitored_instance: + name: Exim + link: "https://www.exim.org/" + categories: + - data-collection.mail-servers + icon_filename: "exim.jpg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - exim + - mail + - server + most_popular: false + overview: + data_collection: + metrics_description: "This collector monitors Exim mail queue." + method_description: "It uses the `exim` command line binary to get the statistics." + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "Assuming setup prerequisites are met, the collector will try to gather statistics using the method described above, even without any configuration." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: "Exim configuration - local installation" + description: | + The module uses the `exim` binary, which can only be executed as root by default. We need to allow other users to `exim` binary. We solve that adding `queue_list_requires_admin` statement in exim configuration and set to `false`, because it is `true` by default. On many Linux distributions, the default location of `exim` configuration is in `/etc/exim.conf`. + + 1. Edit the `exim` configuration with your preferred editor and add: + `queue_list_requires_admin = false` + 2. Restart `exim` and Netdata + - title: "Exim configuration - WHM (CPanel) server" + description: | + On a WHM server, you can reconfigure `exim` over the WHM interface with the following steps. + + 1. Login to WHM + 2. Navigate to Service Configuration --> Exim Configuration Manager --> tab Advanced Editor + 3. Scroll down to the button **Add additional configuration setting** and click on it. + 4. In the new dropdown which will appear above we need to find and choose: + `queue_list_requires_admin` and set to `false` + 5. Scroll to the end and click the **Save** button. + configuration: + file: + name: python.d/exim.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + - name: command + description: Path and command to the `exim` binary + default_value: "exim -bpc" + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Local exim install + description: A basic local exim install + config: | + local: + command: 'exim -bpc' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: exim.qemails - description: Exim Queue Emails - unit: "emails" - chart_type: line - dimensions: - - name: emails + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: exim.qemails + description: Exim Queue Emails + unit: "emails" + chart_type: line + dimensions: + - name: emails diff --git a/collectors/python.d.plugin/exim/metrics.csv b/collectors/python.d.plugin/exim/metrics.csv deleted file mode 100644 index 8e6cc0c2..00000000 --- a/collectors/python.d.plugin/exim/metrics.csv +++ /dev/null @@ -1,2 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -exim.qemails,,emails,emails,Exim Queue Emails,line,,python.d.plugin,exim diff --git a/collectors/python.d.plugin/fail2ban/metadata.yaml b/collectors/python.d.plugin/fail2ban/metadata.yaml index 1c906c67..80aa68b6 100644 --- a/collectors/python.d.plugin/fail2ban/metadata.yaml +++ b/collectors/python.d.plugin/fail2ban/metadata.yaml @@ -1,84 +1,180 @@ -meta: - plugin_name: python.d.plugin - module_name: fail2ban - monitored_instance: - name: Fail2ban - link: '' - categories: - - data-collection.authentication-and-authorization - icon_filename: 'fail2ban.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Fail2ban performance for prime intrusion prevention operations. Monitor ban counts, jail statuses, and failed login attempts to ensure robust network security.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: fail2ban + monitored_instance: + name: Fail2ban + link: https://www.fail2ban.org/ + categories: + - data-collection.authentication-and-authorization + icon_filename: "fail2ban.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - fail2ban + - security + - authentication + - authorization + most_popular: false + overview: + data_collection: + metrics_description: | + Monitor Fail2ban performance for prime intrusion prevention operations. Monitor ban counts, jail statuses, and failed login attempts to ensure robust network security. + method_description: | + It collects metrics through reading the default log and configuration files of fail2ban. + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: | + The `fail2ban.log` file must be readable by the user `netdata`. + - change the file ownership and access permissions. + - update `/etc/logrotate.d/fail2ban`` to persist the changes after rotating the log file. + + To change the file ownership and access permissions, execute the following: + + ```shell + sudo chown root:netdata /var/log/fail2ban.log + sudo chmod 640 /var/log/fail2ban.log + ``` + + To persist the changes after rotating the log file, add `create 640 root netdata` to the `/etc/logrotate.d/fail2ban`: + + ```shell + /var/log/fail2ban.log { + + weekly + rotate 4 + compress + + delaycompress + missingok + postrotate + fail2ban-client flushlogs 1>/dev/null + endscript + + # If fail2ban runs as non-root it still needs to have write access + # to logfiles. + # create 640 fail2ban adm + create 640 root netdata + } + ``` + default_behavior: + auto_detection: + description: | + By default the collector will attempt to read log file at /var/log/fail2ban.log and conf file at /etc/fail2ban/jail.local. If conf file is not found default jail is ssh. + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: "" + description: "" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: Config options + enabled: true + list: + - name: log_path + description: path to fail2ban.log. + default_value: /var/log/fail2ban.log + required: false + - name: conf_path + description: path to jail.local/jail.conf. + default_value: /etc/fail2ban/jail.local + required: false + - name: conf_dir + description: path to jail.d/. + default_value: /etc/fail2ban/jail.d/ + required: false + - name: exclude + description: jails you want to exclude from autodetection. + default_value: "" + required: false + - name: update_every + description: Sets the default data collection frequency. + default_value: 1 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + examples: + folding: + enabled: true + title: Config + list: + - name: Basic + folding: + enabled: false + description: A basic example configuration. + config: | + local: + log_path: '/var/log/fail2ban.log' + conf_path: '/etc/fail2ban/jail.local' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: fail2ban.faile_attempts - description: Failed attempts - unit: "attempts/s" - chart_type: line - dimensions: - - name: a dimension per jail - - name: fail2ban.bans - description: Bans - unit: "bans/s" - chart_type: line - dimensions: - - name: a dimension per jail - - name: fail2ban.banned_ips - description: Banned IP addresses (since the last restart of netdata) - unit: "ips" - chart_type: line - dimensions: - - name: a dimension per jail + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: | + These metrics refer to the entire monitored application. + labels: [] + metrics: + - name: fail2ban.failed_attempts + description: Failed attempts + unit: "attempts/s" + chart_type: line + dimensions: + - name: a dimension per jail + - name: fail2ban.bans + description: Bans + unit: "bans/s" + chart_type: line + dimensions: + - name: a dimension per jail + - name: fail2ban.banned_ips + description: Banned IP addresses (since the last restart of netdata) + unit: "ips" + chart_type: line + dimensions: + - name: a dimension per jail diff --git a/collectors/python.d.plugin/fail2ban/metrics.csv b/collectors/python.d.plugin/fail2ban/metrics.csv deleted file mode 100644 index 13ef80f4..00000000 --- a/collectors/python.d.plugin/fail2ban/metrics.csv +++ /dev/null @@ -1,4 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -fail2ban.faile_attempts,,a dimension per jail,attempts/s,Failed attempts,line,,python.d.plugin,fail2ban -fail2ban.bans,,a dimension per jail,bans/s,Bans,line,,python.d.plugin,fail2ban -fail2ban.banned_ips,,a dimension per jail,ips,Banned IP addresses (since the last restart of netdata),line,,python.d.plugin,fail2ban diff --git a/collectors/python.d.plugin/gearman/gearman.conf b/collectors/python.d.plugin/gearman/gearman.conf index c41fd9ff..635e893e 100644 --- a/collectors/python.d.plugin/gearman/gearman.conf +++ b/collectors/python.d.plugin/gearman/gearman.conf @@ -61,8 +61,11 @@ # # Additionally to the above, gearman also supports the following: # -# hostname: localhost # The host running the Gearman server +# host: localhost # The host running the Gearman server # port: 4730 # Port of the Gearman server +# tls: no # Whether to use TLS or not +# cert: /path/to/cert # Path to cert if using TLS +# key: /path/to/key # Path to key if using TLS # ---------------------------------------------------------------------- # AUTO-DETECTION JOB diff --git a/collectors/python.d.plugin/gearman/metadata.yaml b/collectors/python.d.plugin/gearman/metadata.yaml index 49d4e27b..f1760568 100644 --- a/collectors/python.d.plugin/gearman/metadata.yaml +++ b/collectors/python.d.plugin/gearman/metadata.yaml @@ -1,89 +1,168 @@ -meta: - plugin_name: python.d.plugin - module_name: gearman - monitored_instance: - name: Gearman - link: '' - categories: - - data-collection.distributed-computing-systems - icon_filename: 'gearman.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Gearman metrics for proficient system task distribution. Track job counts, worker statuses, and queue lengths for effective distributed task management.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: gearman_workers_queued - link: https://github.com/netdata/netdata/blob/master/health/health.d/gearman.conf - metric: gearman.single_job - info: average number of queued jobs over the last 10 minutes -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: gearman + monitored_instance: + name: Gearman + link: "http://gearman.org/" + categories: + - data-collection.distributed-computing-systems + icon_filename: "gearman.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - gearman + - gearman job server + most_popular: false + overview: + data_collection: + metrics_description: "Monitor Gearman metrics for proficient system task distribution. Track job counts, worker statuses, and queue lengths for effective distributed task management." + method_description: "This collector connects to a Gearman instance via either TCP or unix socket." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "When no configuration file is found, the collector tries to connect to TCP/IP socket: localhost:4730." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: "Socket permissions" + description: The gearman UNIX socket should have read permission for user netdata. + configuration: + file: + name: python.d/gearman.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + - name: host + description: URL or IP where gearman is running. + default_value: "localhost" + required: false + - name: port + description: Port of URL or IP where gearman is running. + default_value: "4730" + required: false + - name: tls + description: Use tls to connect to gearman. + default_value: "false" + required: false + - name: cert + description: Provide a certificate file if needed to connect to a TLS gearman instance. + default_value: "" + required: false + - name: key + description: Provide a key file if needed to connect to a TLS gearman instance. + default_value: "" + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Local gearman service + description: A basic host and port gearman configuration for localhost. + folding: + enabled: false + config: | + localhost: + name: 'local' + host: 'localhost' + port: 4730 + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + localhost: + name: 'local' + host: 'localhost' + port: 4730 + + remote: + name: 'remote' + host: '192.0.2.1' + port: 4730 + troubleshooting: + problems: + list: [] + alerts: + - name: gearman_workers_queued + link: https://github.com/netdata/netdata/blob/master/health/health.d/gearman.conf + metric: gearman.single_job + info: average number of queued jobs over the last 10 minutes metrics: - - name: gearman.total_jobs - description: Total Jobs - unit: "Jobs" - chart_type: line - dimensions: - - name: Pending - - name: Running - - name: gearman job - description: "" - labels: [] - metrics: - - name: gearman.single_job - description: '{job_name}' - unit: "Jobs" - chart_type: stacked - dimensions: - - name: Pending - - name: Idle - - name: Runnning + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: gearman.total_jobs + description: Total Jobs + unit: "Jobs" + chart_type: line + dimensions: + - name: Pending + - name: Running + - name: gearman job + description: "Metrics related to Gearman jobs. Each job produces its own set of the following metrics." + labels: [] + metrics: + - name: gearman.single_job + description: "{job_name}" + unit: "Jobs" + chart_type: stacked + dimensions: + - name: Pending + - name: Idle + - name: Runnning diff --git a/collectors/python.d.plugin/gearman/metrics.csv b/collectors/python.d.plugin/gearman/metrics.csv deleted file mode 100644 index 0592e75d..00000000 --- a/collectors/python.d.plugin/gearman/metrics.csv +++ /dev/null @@ -1,3 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -gearman.total_jobs,,"Pending, Running",Jobs,Total Jobs,line,,python.d.plugin,gearman -gearman.single_job,gearman job,"Pending, Idle, Runnning",Jobs,{job_name},stacked,,python.d.plugin,gearman diff --git a/collectors/python.d.plugin/go_expvar/metadata.yaml b/collectors/python.d.plugin/go_expvar/metadata.yaml index 31b85fa1..92669dd9 100644 --- a/collectors/python.d.plugin/go_expvar/metadata.yaml +++ b/collectors/python.d.plugin/go_expvar/metadata.yaml @@ -1,109 +1,319 @@ -meta: - plugin_name: python.d.plugin - module_name: go_expvar - monitored_instance: - name: Go applications - link: '' - categories: - - data-collection.apm - icon_filename: 'go.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Go applications performance for optimal Go language software operations. Monitor runtime statistics, garbage collection, and memory usage to enhance Go application performance.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: go_expvar + monitored_instance: + name: Go applications + link: "https://pkg.go.dev/expvar" + categories: + - data-collection.apm + icon_filename: "go.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - go + - expvar + - application + most_popular: false + overview: + data_collection: + metrics_description: "This collector monitors Go applications that expose their metrics with the use of the `expvar` package from the Go standard library. It produces charts for Go runtime memory statistics and optionally any number of custom charts." + method_description: "It connects via http to gather the metrics exposed via the `expvar` package." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: "Sample `expvar` usage in a Go application" + description: | + The `expvar` package exposes metrics over HTTP and is very easy to use. + Consider this minimal sample below: + + ```go + package main + + import ( + _ "expvar" + "net/http" + ) + + func main() { + http.ListenAndServe("127.0.0.1:8080", nil) + } + ``` + + When imported this way, the `expvar` package registers a HTTP handler at `/debug/vars` that + exposes Go runtime's memory statistics in JSON format. You can inspect the output by opening + the URL in your browser (or by using `wget` or `curl`). + + Sample output: + + ```json + { + "cmdline": ["./expvar-demo-binary"], + "memstats": {"Alloc":630856,"TotalAlloc":630856,"Sys":3346432,"Lookups":27, <omitted for brevity>} + } + ``` + + You can of course expose and monitor your own variables as well. + Here is a sample Go application that exposes a few custom variables: + + ```go + package main + + import ( + "expvar" + "net/http" + "runtime" + "time" + ) + + func main() { + + tick := time.NewTicker(1 * time.Second) + num_go := expvar.NewInt("runtime.goroutines") + counters := expvar.NewMap("counters") + counters.Set("cnt1", new(expvar.Int)) + counters.Set("cnt2", new(expvar.Float)) + + go http.ListenAndServe(":8080", nil) + + for { + select { + case <- tick.C: + num_go.Set(int64(runtime.NumGoroutine())) + counters.Add("cnt1", 1) + counters.AddFloat("cnt2", 1.452) + } + } + } + ``` + + Apart from the runtime memory stats, this application publishes two counters and the + number of currently running Goroutines and updates these stats every second. + configuration: + file: + name: python.d/go_expvar.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. Each JOB can be used to monitor a different Go application. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + - name: url + description: the URL and port of the expvar endpoint. Please include the whole path of the endpoint, as the expvar handler can be installed in a non-standard location. + default_value: "" + required: true + - name: user + description: If the URL is password protected, this is the username to use. + default_value: "" + required: false + - name: pass + description: If the URL is password protected, this is the password to use. + default_value: "" + required: false + - name: collect_memstats + description: Enables charts for Go runtime's memory statistics. + default_value: "" + required: false + - name: extra_charts + description: Defines extra data/charts to monitor, please see the example below. + default_value: "" + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Monitor a Go app1 application + description: | + The example below sets a configuration for a Go application, called `app1`. Besides the `memstats`, the application also exposes two counters and the number of currently running Goroutines and updates these stats every second. + + The `go_expvar` collector can monitor these as well with the use of the `extra_charts` configuration variable. + + The `extra_charts` variable is a YaML list of Netdata chart definitions. + Each chart definition has the following keys: + + ``` + id: Netdata chart ID + options: a key-value mapping of chart options + lines: a list of line definitions + ``` + + **Note: please do not use dots in the chart or line ID field. + See [this issue](https://github.com/netdata/netdata/pull/1902#issuecomment-284494195) for explanation.** + + Please see these two links to the official Netdata documentation for more information about the values: + + - [External plugins - charts](https://github.com/netdata/netdata/blob/master/collectors/plugins.d/README.md#chart) + - [Chart variables](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/README.md#global-variables-order-and-chart) + + **Line definitions** + + Each chart can define multiple lines (dimensions). + A line definition is a key-value mapping of line options. + Each line can have the following options: + + ``` + # mandatory + expvar_key: the name of the expvar as present in the JSON output of /debug/vars endpoint + expvar_type: value type; supported are "float" or "int" + id: the id of this line/dimension in Netdata + + # optional - Netdata defaults are used if these options are not defined + name: '' + algorithm: absolute + multiplier: 1 + divisor: 100 if expvar_type == float, 1 if expvar_type == int + hidden: False + ``` + + Please see the following link for more information about the options and their default values: + [External plugins - dimensions](https://github.com/netdata/netdata/blob/master/collectors/plugins.d/README.md#dimension) + + Apart from top-level expvars, this plugin can also parse expvars stored in a multi-level map; + All dicts in the resulting JSON document are then flattened to one level. + Expvar names are joined together with '.' when flattening. + + Example: + + ``` + { + "counters": {"cnt1": 1042, "cnt2": 1512.9839999999983}, + "runtime.goroutines": 5 + } + ``` + + In the above case, the exported variables will be available under `runtime.goroutines`, + `counters.cnt1` and `counters.cnt2` expvar_keys. If the flattening results in a key collision, + the first defined key wins and all subsequent keys with the same name are ignored. + config: | + app1: + name : 'app1' + url : 'http://127.0.0.1:8080/debug/vars' + collect_memstats: true + extra_charts: + - id: "runtime_goroutines" + options: + name: num_goroutines + title: "runtime: number of goroutines" + units: goroutines + family: runtime + context: expvar.runtime.goroutines + chart_type: line + lines: + - {expvar_key: 'runtime.goroutines', expvar_type: int, id: runtime_goroutines} + - id: "foo_counters" + options: + name: counters + title: "some random counters" + units: awesomeness + family: counters + context: expvar.foo.counters + chart_type: line + lines: + - {expvar_key: 'counters.cnt1', expvar_type: int, id: counters_cnt1} + - {expvar_key: 'counters.cnt2', expvar_type: float, id: counters_cnt2} + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: expvar.memstats.heap - description: 'memory: size of heap memory structures' - unit: "KiB" - chart_type: line - dimensions: - - name: alloc - - name: inuse - - name: expvar.memstats.stack - description: 'memory: size of stack memory structures' - unit: "KiB" - chart_type: line - dimensions: - - name: inuse - - name: expvar.memstats.mspan - description: 'memory: size of mspan memory structures' - unit: "KiB" - chart_type: line - dimensions: - - name: inuse - - name: expvar.memstats.mcache - description: 'memory: size of mcache memory structures' - unit: "KiB" - chart_type: line - dimensions: - - name: inuse - - name: expvar.memstats.live_objects - description: 'memory: number of live objects' - unit: "objects" - chart_type: line - dimensions: - - name: live - - name: expvar.memstats.sys - description: 'memory: size of reserved virtual address space' - unit: "KiB" - chart_type: line - dimensions: - - name: sys - - name: expvar.memstats.gc_pauses - description: 'memory: average duration of GC pauses' - unit: "ns" - chart_type: line - dimensions: - - name: avg + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: expvar.memstats.heap + description: "memory: size of heap memory structures" + unit: "KiB" + chart_type: line + dimensions: + - name: alloc + - name: inuse + - name: expvar.memstats.stack + description: "memory: size of stack memory structures" + unit: "KiB" + chart_type: line + dimensions: + - name: inuse + - name: expvar.memstats.mspan + description: "memory: size of mspan memory structures" + unit: "KiB" + chart_type: line + dimensions: + - name: inuse + - name: expvar.memstats.mcache + description: "memory: size of mcache memory structures" + unit: "KiB" + chart_type: line + dimensions: + - name: inuse + - name: expvar.memstats.live_objects + description: "memory: number of live objects" + unit: "objects" + chart_type: line + dimensions: + - name: live + - name: expvar.memstats.sys + description: "memory: size of reserved virtual address space" + unit: "KiB" + chart_type: line + dimensions: + - name: sys + - name: expvar.memstats.gc_pauses + description: "memory: average duration of GC pauses" + unit: "ns" + chart_type: line + dimensions: + - name: avg diff --git a/collectors/python.d.plugin/go_expvar/metrics.csv b/collectors/python.d.plugin/go_expvar/metrics.csv deleted file mode 100644 index 5d96ff75..00000000 --- a/collectors/python.d.plugin/go_expvar/metrics.csv +++ /dev/null @@ -1,8 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -expvar.memstats.heap,,"alloc, inuse",KiB,memory: size of heap memory structures,line,,python.d.plugin,go_expvar -expvar.memstats.stack,,inuse,KiB,memory: size of stack memory structures,line,,python.d.plugin,go_expvar -expvar.memstats.mspan,,inuse,KiB,memory: size of mspan memory structures,line,,python.d.plugin,go_expvar -expvar.memstats.mcache,,inuse,KiB,memory: size of mcache memory structures,line,,python.d.plugin,go_expvar -expvar.memstats.live_objects,,live,objects,memory: number of live objects,line,,python.d.plugin,go_expvar -expvar.memstats.sys,,sys,KiB,memory: size of reserved virtual address space,line,,python.d.plugin,go_expvar -expvar.memstats.gc_pauses,,avg,ns,memory: average duration of GC pauses,line,,python.d.plugin,go_expvar diff --git a/collectors/python.d.plugin/haproxy/metadata.yaml b/collectors/python.d.plugin/haproxy/metadata.yaml index 401313e9..82ab37d2 100644 --- a/collectors/python.d.plugin/haproxy/metadata.yaml +++ b/collectors/python.d.plugin/haproxy/metadata.yaml @@ -1,254 +1,322 @@ -meta: - plugin_name: python.d.plugin - module_name: haproxy - monitored_instance: - name: HAProxy - link: '' - categories: - - data-collection.web-servers-and-web-proxies - icon_filename: 'haproxy.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor HAProxy performance for ideal load balancing operations. Monitor session rates, queue lengths, and error rates to maintain balanced network traffic.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: haproxy_backend_server_status - link: https://github.com/netdata/netdata/blob/master/health/health.d/haproxy.conf - metric: haproxy_hs.down - info: average number of failed haproxy backend servers over the last 10 seconds -- name: haproxy_backend_status - link: https://github.com/netdata/netdata/blob/master/health/health.d/haproxy.conf - metric: haproxy_hb.down - info: average number of failed haproxy backends over the last 10 seconds -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: haproxy_f.bin - description: Kilobytes In - unit: "KiB/s" - chart_type: line - dimensions: - - name: a dimension per frontend server - - name: haproxy_f.bout - description: Kilobytes Out - unit: "KiB/s" - chart_type: line - dimensions: - - name: a dimension per frontend server - - name: haproxy_f.scur - description: Sessions Active - unit: "sessions" - chart_type: line - dimensions: - - name: a dimension per frontend server - - name: haproxy_f.qcur - description: Session In Queue - unit: "sessions" - chart_type: line - dimensions: - - name: a dimension per frontend server - - name: haproxy_f.hrsp_1xx - description: HTTP responses with 1xx code - unit: "responses/s" - chart_type: line - dimensions: - - name: a dimension per frontend server - - name: haproxy_f.hrsp_2xx - description: HTTP responses with 2xx code - unit: "responses/s" - chart_type: line - dimensions: - - name: a dimension per frontend server - - name: haproxy_f.hrsp_3xx - description: HTTP responses with 3xx code - unit: "responses/s" - chart_type: line - dimensions: - - name: a dimension per frontend server - - name: haproxy_f.hrsp_4xx - description: HTTP responses with 4xx code - unit: "responses/s" - chart_type: line - dimensions: - - name: a dimension per frontend server - - name: haproxy_f.hrsp_5xx - description: HTTP responses with 5xx code - unit: "responses/s" - chart_type: line - dimensions: - - name: a dimension per frontend server - - name: haproxy_f.hrsp_other - description: HTTP responses with other codes (protocol error) - unit: "responses/s" - chart_type: line - dimensions: - - name: a dimension per frontend server - - name: haproxy_f.hrsp_total - description: HTTP responses - unit: "responses" - chart_type: line - dimensions: - - name: a dimension per frontend server - - name: haproxy_b.bin - description: Kilobytes In - unit: "KiB/s" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.bout - description: Kilobytes Out - unit: "KiB/s" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.scur - description: Sessions Active - unit: "sessions" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.qcur - description: Sessions In Queue - unit: "sessions" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.hrsp_1xx - description: HTTP responses with 1xx code - unit: "responses/s" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.hrsp_2xx - description: HTTP responses with 2xx code - unit: "responses/s" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.hrsp_3xx - description: HTTP responses with 3xx code - unit: "responses/s" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.hrsp_4xx - description: HTTP responses with 4xx code - unit: "responses/s" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.hrsp_5xx - description: HTTP responses with 5xx code - unit: "responses/s" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.hrsp_other - description: HTTP responses with other codes (protocol error) - unit: "responses/s" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.hrsp_total - description: HTTP responses (total) - unit: "responses/s" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.qtime - description: The average queue time over the 1024 last requests - unit: "milliseconds" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.ctime - description: The average connect time over the 1024 last requests - unit: "milliseconds" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.rtime - description: The average response time over the 1024 last requests - unit: "milliseconds" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_b.ttime - description: The average total session time over the 1024 last requests - unit: "milliseconds" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_hs.down - description: Backend Servers In DOWN State - unit: "failed servers" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_hs.up - description: Backend Servers In UP State - unit: "health servers" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy_hb.down - description: Is Backend Failed? - unit: "boolean" - chart_type: line - dimensions: - - name: a dimension per backend server - - name: haproxy.idle - description: The Ratio Of Polling Time Vs Total Time - unit: "percentage" - chart_type: line - dimensions: - - name: idle +# This collector will not appear in documentation, as the go version is preferred, +# https://github.com/netdata/go.d.plugin/blob/master/modules/haproxy/README.md +# +# +# meta: +# plugin_name: python.d.plugin +# module_name: haproxy +# monitored_instance: +# name: HAProxy +# link: 'https://www.haproxy.org/' +# categories: +# - data-collection.web-servers-and-web-proxies +# icon_filename: 'haproxy.png' +# related_resources: +# integrations: +# list: [] +# info_provided_to_referring_integrations: +# description: '' +# keywords: +# - haproxy +# - tcp +# - balancer +# most_popular: false +# overview: +# data_collection: +# metrics_description: 'This collector monitors HAProxy metrics about frontend servers, backend servers, responses and more.' +# method_description: 'It connects to the HAProxy instance via URL or UNIX socket.' +# supported_platforms: +# include: [] +# exclude: [] +# multi_instance: true +# additional_permissions: +# description: '' +# default_behavior: +# auto_detection: +# description: '' +# limits: +# description: '' +# performance_impact: +# description: '' +# setup: +# prerequisites: +# list: +# - title: 'HAProxy setup for socket' +# description: 'Socket must be readable and writable by the netdata user.' +# - title: 'HAProxy setup for URL' +# description: 'URL must have `stats uri <path>` present in the haproxy config, otherwise you will get HTTP 503 in the haproxy logs.' +# configuration: +# file: +# name: python.d/haproxy.conf +# options: +# description: | +# There are 2 sections: + +# * Global variables +# * One or more JOBS that can define multiple different instances to monitor. + +# The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + +# Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + +# Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. +# folding: +# title: "Config options" +# enabled: true +# list: +# - name: update_every +# description: Sets the default data collection frequency. +# default_value: 5 +# required: false +# - name: priority +# description: Controls the order of charts at the netdata dashboard. +# default_value: 60000 +# required: false +# - name: autodetection_retry +# description: Sets the job re-check interval in seconds. +# default_value: 0 +# required: false +# - name: penalty +# description: Indicates whether to apply penalty to update_every in case of failures. +# default_value: yes +# required: false +# - name: name +# description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. +# default_value: '' +# required: false +# - name: user +# description: Username if stats auth is used. +# default_value: '' +# required: false +# - name: pass +# description: Password if stats auth is used. +# default_value: '' +# required: false +# - name: url +# description: URL to the haproxy_stats endpoint. Also make sure the parameters `csv` and `norefresh` are provided. +# default_value: '' +# required: false +# - name: socket +# description: Unix socket path to the haproxy sock file. +# default_value: '' +# required: false +# examples: +# folding: +# enabled: true +# title: "Config" +# list: +# - name: URL method +# description: Use a URL to specify the endpoint to check for haproxy statistics. +# config: | +# via_url: +# user: 'username' # ONLY IF stats auth is used +# pass: 'password' # # ONLY IF stats auth is used +# url: 'http://ip.address:port/url;csv;norefresh' +# - name: Local socket +# description: Use a local socket to check for haproxy statistics. +# config: | +# via_socket: +# socket: 'path/to/haproxy/sock' +# troubleshooting: +# problems: +# list: [] +# alerts: +# - name: haproxy_backend_server_status +# link: https://github.com/netdata/netdata/blob/master/health/health.d/haproxy.conf +# metric: haproxy_hs.down +# info: average number of failed haproxy backend servers over the last 10 seconds +# - name: haproxy_backend_status +# link: https://github.com/netdata/netdata/blob/master/health/health.d/haproxy.conf +# metric: haproxy_hb.down +# info: average number of failed haproxy backends over the last 10 seconds +# metrics: +# folding: +# title: Metrics +# enabled: false +# description: "" +# availability: [] +# scopes: +# - name: global +# description: 'These metrics refer to the entire monitored application.' +# labels: [] +# metrics: +# - name: haproxy_f.bin +# description: Kilobytes In +# unit: "KiB/s" +# chart_type: line +# dimensions: +# - name: a dimension per frontend server +# - name: haproxy_f.bout +# description: Kilobytes Out +# unit: "KiB/s" +# chart_type: line +# dimensions: +# - name: a dimension per frontend server +# - name: haproxy_f.scur +# description: Sessions Active +# unit: "sessions" +# chart_type: line +# dimensions: +# - name: a dimension per frontend server +# - name: haproxy_f.qcur +# description: Session In Queue +# unit: "sessions" +# chart_type: line +# dimensions: +# - name: a dimension per frontend server +# - name: haproxy_f.hrsp_1xx +# description: HTTP responses with 1xx code +# unit: "responses/s" +# chart_type: line +# dimensions: +# - name: a dimension per frontend server +# - name: haproxy_f.hrsp_2xx +# description: HTTP responses with 2xx code +# unit: "responses/s" +# chart_type: line +# dimensions: +# - name: a dimension per frontend server +# - name: haproxy_f.hrsp_3xx +# description: HTTP responses with 3xx code +# unit: "responses/s" +# chart_type: line +# dimensions: +# - name: a dimension per frontend server +# - name: haproxy_f.hrsp_4xx +# description: HTTP responses with 4xx code +# unit: "responses/s" +# chart_type: line +# dimensions: +# - name: a dimension per frontend server +# - name: haproxy_f.hrsp_5xx +# description: HTTP responses with 5xx code +# unit: "responses/s" +# chart_type: line +# dimensions: +# - name: a dimension per frontend server +# - name: haproxy_f.hrsp_other +# description: HTTP responses with other codes (protocol error) +# unit: "responses/s" +# chart_type: line +# dimensions: +# - name: a dimension per frontend server +# - name: haproxy_f.hrsp_total +# description: HTTP responses +# unit: "responses" +# chart_type: line +# dimensions: +# - name: a dimension per frontend server +# - name: haproxy_b.bin +# description: Kilobytes In +# unit: "KiB/s" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.bout +# description: Kilobytes Out +# unit: "KiB/s" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.scur +# description: Sessions Active +# unit: "sessions" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.qcur +# description: Sessions In Queue +# unit: "sessions" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.hrsp_1xx +# description: HTTP responses with 1xx code +# unit: "responses/s" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.hrsp_2xx +# description: HTTP responses with 2xx code +# unit: "responses/s" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.hrsp_3xx +# description: HTTP responses with 3xx code +# unit: "responses/s" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.hrsp_4xx +# description: HTTP responses with 4xx code +# unit: "responses/s" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.hrsp_5xx +# description: HTTP responses with 5xx code +# unit: "responses/s" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.hrsp_other +# description: HTTP responses with other codes (protocol error) +# unit: "responses/s" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.hrsp_total +# description: HTTP responses (total) +# unit: "responses/s" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.qtime +# description: The average queue time over the 1024 last requests +# unit: "milliseconds" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.ctime +# description: The average connect time over the 1024 last requests +# unit: "milliseconds" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.rtime +# description: The average response time over the 1024 last requests +# unit: "milliseconds" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_b.ttime +# description: The average total session time over the 1024 last requests +# unit: "milliseconds" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_hs.down +# description: Backend Servers In DOWN State +# unit: "failed servers" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_hs.up +# description: Backend Servers In UP State +# unit: "health servers" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy_hb.down +# description: Is Backend Failed? +# unit: "boolean" +# chart_type: line +# dimensions: +# - name: a dimension per backend server +# - name: haproxy.idle +# description: The Ratio Of Polling Time Vs Total Time +# unit: "percentage" +# chart_type: line +# dimensions: +# - name: idle diff --git a/collectors/python.d.plugin/haproxy/metrics.csv b/collectors/python.d.plugin/haproxy/metrics.csv deleted file mode 100644 index 7c92c566..00000000 --- a/collectors/python.d.plugin/haproxy/metrics.csv +++ /dev/null @@ -1,31 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -haproxy_f.bin,,a dimension per frontend server,KiB/s,Kilobytes In,line,,python.d.plugin,haproxy -haproxy_f.bout,,a dimension per frontend server,KiB/s,Kilobytes Out,line,,python.d.plugin,haproxy -haproxy_f.scur,,a dimension per frontend server,sessions,Sessions Active,line,,python.d.plugin,haproxy -haproxy_f.qcur,,a dimension per frontend server,sessions,Session In Queue,line,,python.d.plugin,haproxy -haproxy_f.hrsp_1xx,,a dimension per frontend server,responses/s,HTTP responses with 1xx code,line,,python.d.plugin,haproxy -haproxy_f.hrsp_2xx,,a dimension per frontend server,responses/s,HTTP responses with 2xx code,line,,python.d.plugin,haproxy -haproxy_f.hrsp_3xx,,a dimension per frontend server,responses/s,HTTP responses with 3xx code,line,,python.d.plugin,haproxy -haproxy_f.hrsp_4xx,,a dimension per frontend server,responses/s,HTTP responses with 4xx code,line,,python.d.plugin,haproxy -haproxy_f.hrsp_5xx,,a dimension per frontend server,responses/s,HTTP responses with 5xx code,line,,python.d.plugin,haproxy -haproxy_f.hrsp_other,,a dimension per frontend server,responses/s,HTTP responses with other codes (protocol error),line,,python.d.plugin,haproxy -haproxy_f.hrsp_total,,a dimension per frontend server,responses,HTTP responses,line,,python.d.plugin,haproxy -haproxy_b.bin,,a dimension per backend server,KiB/s,Kilobytes In,line,,python.d.plugin,haproxy -haproxy_b.bout,,a dimension per backend server,KiB/s,Kilobytes Out,line,,python.d.plugin,haproxy -haproxy_b.scur,,a dimension per backend server,sessions,Sessions Active,line,,python.d.plugin,haproxy -haproxy_b.qcur,,a dimension per backend server,sessions,Sessions In Queue,line,,python.d.plugin,haproxy -haproxy_b.hrsp_1xx,,a dimension per backend server,responses/s,HTTP responses with 1xx code,line,,python.d.plugin,haproxy -haproxy_b.hrsp_2xx,,a dimension per backend server,responses/s,HTTP responses with 2xx code,line,,python.d.plugin,haproxy -haproxy_b.hrsp_3xx,,a dimension per backend server,responses/s,HTTP responses with 3xx code,line,,python.d.plugin,haproxy -haproxy_b.hrsp_4xx,,a dimension per backend server,responses/s,HTTP responses with 4xx code,line,,python.d.plugin,haproxy -haproxy_b.hrsp_5xx,,a dimension per backend server,responses/s,HTTP responses with 5xx code,line,,python.d.plugin,haproxy -haproxy_b.hrsp_other,,a dimension per backend server,responses/s,HTTP responses with other codes (protocol error),line,,python.d.plugin,haproxy -haproxy_b.hrsp_total,,a dimension per backend server,responses/s,HTTP responses (total),line,,python.d.plugin,haproxy -haproxy_b.qtime,,a dimension per backend server,milliseconds,The average queue time over the 1024 last requests,line,,python.d.plugin,haproxy -haproxy_b.ctime,,a dimension per backend server,milliseconds,The average connect time over the 1024 last requests,line,,python.d.plugin,haproxy -haproxy_b.rtime,,a dimension per backend server,milliseconds,The average response time over the 1024 last requests,line,,python.d.plugin,haproxy -haproxy_b.ttime,,a dimension per backend server,milliseconds,The average total session time over the 1024 last requests,line,,python.d.plugin,haproxy -haproxy_hs.down,,a dimension per backend server,failed servers,Backend Servers In DOWN State,line,,python.d.plugin,haproxy -haproxy_hs.up,,a dimension per backend server,health servers,Backend Servers In UP State,line,,python.d.plugin,haproxy -haproxy_hb.down,,a dimension per backend server,boolean,Is Backend Failed?,line,,python.d.plugin,haproxy -haproxy.idle,,idle,percentage,The Ratio Of Polling Time Vs Total Time,line,,python.d.plugin,haproxy diff --git a/collectors/python.d.plugin/hddtemp/metadata.yaml b/collectors/python.d.plugin/hddtemp/metadata.yaml index 7c78a752..ee62dc96 100644 --- a/collectors/python.d.plugin/hddtemp/metadata.yaml +++ b/collectors/python.d.plugin/hddtemp/metadata.yaml @@ -1,72 +1,163 @@ -meta: - plugin_name: python.d.plugin - module_name: hddtemp - monitored_instance: - name: HDD temperature - link: '' - categories: - - data-collection.hardware-devices-and-sensors - icon_filename: 'hard-drive.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor HDD temperature metrics for maintaining optimal hard drive health. Keep tabs on drive temperatures, read/write speeds, and error rates to ensure hard drive longevity.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: hddtemp + monitored_instance: + name: HDD temperature + link: https://linux.die.net/man/8/hddtemp + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: "hard-drive.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - hardware + - hdd temperature + - disk temperature + - temperature + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors disk temperatures. + method_description: | + It uses the `hddtemp` daemon to gather the metrics. + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: By default, this collector will attempt to connect to the `hddtemp` daemon on `127.0.0.1:7634` + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Run `hddtemp` in daemon mode + description: | + You can execute `hddtemp` in TCP/IP daemon mode by using the `-d` argument. + + So running `hddtemp -d` would run the daemon, by default on port 7634. + configuration: + file: + name: "python.d/hddtemp.conf" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + + By default this collector will try to autodetect disks (autodetection works only for disk which names start with "sd"). However this can be overridden by setting the option `disks` to an array of desired disks. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 1 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: > + Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "local" + required: false + - name: devices + description: Array of desired disks to detect, in case their name doesn't start with `sd`. + default_value: "" + required: false + - name: host + description: The IP or HOSTNAME to connect to. + default_value: "localhost" + required: true + - name: port + description: The port to connect to. + default_value: 7634 + required: false + examples: + folding: + enabled: true + title: "" + list: + - name: Basic + description: A basic example configuration. + folding: + enabled: false + config: | + localhost: + name: 'local' + host: '127.0.0.1' + port: 7634 + - name: Custom disk names + description: An example defining the disk names to detect. + config: | + localhost: + name: 'local' + host: '127.0.0.1' + port: 7634 + devices: + - customdisk1 + - customdisk2 + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + localhost: + name: 'local' + host: '127.0.0.1' + port: 7634 + + remote_job: + name : 'remote' + host : 'http://192.0.2.1:2812' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: hddtemp.temperatures - description: Disk Temperatures - unit: "Celsius" - chart_type: line - dimensions: - - name: a dimension per disk + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: hddtemp.temperatures + description: Disk Temperatures + unit: "Celsius" + chart_type: line + dimensions: + - name: a dimension per disk diff --git a/collectors/python.d.plugin/hddtemp/metrics.csv b/collectors/python.d.plugin/hddtemp/metrics.csv deleted file mode 100644 index c3a858db..00000000 --- a/collectors/python.d.plugin/hddtemp/metrics.csv +++ /dev/null @@ -1,2 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -hddtemp.temperatures,,a dimension per disk,Celsius,Disk Temperatures,line,,python.d.plugin,hddtemp diff --git a/collectors/python.d.plugin/hpssa/metadata.yaml b/collectors/python.d.plugin/hpssa/metadata.yaml index cc340780..dc91f05e 100644 --- a/collectors/python.d.plugin/hpssa/metadata.yaml +++ b/collectors/python.d.plugin/hpssa/metadata.yaml @@ -1,99 +1,175 @@ -meta: - plugin_name: python.d.plugin - module_name: hpssa - monitored_instance: - name: HP Smart Storage Arrays - link: '' - categories: - - data-collection.storage-mount-points-and-filesystems - icon_filename: 'hp.jpeg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Examine HP Smart Storage Arrays metrics with Netdata for efficient storage management. Improve your storage efficiency with real-time performance insights.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: hpssa + monitored_instance: + name: HP Smart Storage Arrays + link: 'https://buy.hpe.com/us/en/software/server-management-software/server-management-software/smart-array-management-software/hpe-smart-storage-administrator/p/5409020' + categories: + - data-collection.storage-mount-points-and-filesystems + icon_filename: 'hp.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - storage + - hp + - hpssa + - array + most_popular: false + overview: + data_collection: + metrics_description: 'This collector monitors HP Smart Storage Arrays metrics about operational statuses and temperatures.' + method_description: 'It uses the command line tool `ssacli`. The exact command used is `sudo -n ssacli ctrl all show config detail`' + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: 'If no configuration is provided, the collector will try to execute the `ssacli` binary.' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: + - title: 'Allow user netdata to execute `ssacli` as root.' + description: | + This module uses `ssacli`, which can only be executed by root. It uses `sudo` and assumes that it is configured such that the `netdata` user can execute `ssacli` as root without a password. + + - Add to your `/etc/sudoers` file: + + `which ssacli` shows the full path to the binary. + + ```bash + netdata ALL=(root) NOPASSWD: /path/to/ssacli + ``` + + - Reset Netdata's systemd + unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux + distributions with systemd) + + The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `ssacli` using `sudo`. + + As the `root` user, do the following: + + ```cmd + mkdir /etc/systemd/system/netdata.service.d + echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf + systemctl daemon-reload + systemctl restart netdata.service + ``` + configuration: + file: + name: python.d/hpssa.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: '' + required: false + - name: ssacli_path + description: Path to the `ssacli` command line utility. Configure this if `ssacli` is not in the $PATH + default_value: '' + required: false + - name: use_sudo + description: Whether or not to use `sudo` to execute `ssacli` + default_value: 'True' + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Local simple config + description: A basic configuration, specyfing the path to `ssacli` + folding: + enabled: false + config: | + local: + ssacli_path: /usr/sbin/ssacli + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: hpssa.ctrl_status - description: Status 1 is OK, Status 0 is not OK - unit: "Status" - chart_type: line - dimensions: - - name: ctrl_{adapter slot}_status - - name: cache_{adapter slot}_status - - name: battery_{adapter slot}_status per adapter - - name: hpssa.ctrl_temperature - description: Temperature - unit: "Celsius" - chart_type: line - dimensions: - - name: ctrl_{adapter slot}_temperature - - name: cache_{adapter slot}_temperature per adapter - - name: hpssa.ld_status - description: Status 1 is OK, Status 0 is not OK - unit: "Status" - chart_type: line - dimensions: - - name: a dimension per logical drive - - name: hpssa.pd_status - description: Status 1 is OK, Status 0 is not OK - unit: "Status" - chart_type: line - dimensions: - - name: a dimension per physical drive - - name: hpssa.pd_temperature - description: Temperature - unit: "Celsius" - chart_type: line - dimensions: - - name: a dimension per physical drive + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: hpssa.ctrl_status + description: Status 1 is OK, Status 0 is not OK + unit: "Status" + chart_type: line + dimensions: + - name: ctrl_{adapter slot}_status + - name: cache_{adapter slot}_status + - name: battery_{adapter slot}_status per adapter + - name: hpssa.ctrl_temperature + description: Temperature + unit: "Celsius" + chart_type: line + dimensions: + - name: ctrl_{adapter slot}_temperature + - name: cache_{adapter slot}_temperature per adapter + - name: hpssa.ld_status + description: Status 1 is OK, Status 0 is not OK + unit: "Status" + chart_type: line + dimensions: + - name: a dimension per logical drive + - name: hpssa.pd_status + description: Status 1 is OK, Status 0 is not OK + unit: "Status" + chart_type: line + dimensions: + - name: a dimension per physical drive + - name: hpssa.pd_temperature + description: Temperature + unit: "Celsius" + chart_type: line + dimensions: + - name: a dimension per physical drive diff --git a/collectors/python.d.plugin/hpssa/metrics.csv b/collectors/python.d.plugin/hpssa/metrics.csv deleted file mode 100644 index 126ba5da..00000000 --- a/collectors/python.d.plugin/hpssa/metrics.csv +++ /dev/null @@ -1,6 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -hpssa.ctrl_status,,"ctrl_{adapter slot}_status, cache_{adapter slot}_status, battery_{adapter slot}_status per adapter",Status,"Status 1 is OK, Status 0 is not OK",line,,python.d.plugin,hpssa -hpssa.ctrl_temperature,,"ctrl_{adapter slot}_temperature, cache_{adapter slot}_temperature per adapter",Celsius,Temperature,line,,python.d.plugin,hpssa -hpssa.ld_status,,a dimension per logical drive,Status,"Status 1 is OK, Status 0 is not OK",line,,python.d.plugin,hpssa -hpssa.pd_status,,a dimension per physical drive,Status,"Status 1 is OK, Status 0 is not OK",line,,python.d.plugin,hpssa -hpssa.pd_temperature,,a dimension per physical drive,Celsius,Temperature,line,,python.d.plugin,hpssa diff --git a/collectors/python.d.plugin/icecast/metadata.yaml b/collectors/python.d.plugin/icecast/metadata.yaml index 7b71360d..4bcf5e39 100644 --- a/collectors/python.d.plugin/icecast/metadata.yaml +++ b/collectors/python.d.plugin/icecast/metadata.yaml @@ -1,72 +1,127 @@ -meta: - plugin_name: python.d.plugin - module_name: icecast - monitored_instance: - name: Icecast - link: '' - categories: - - data-collection.media-streaming-servers - icon_filename: 'icecast.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Examine Icecast metrics for insights into media streaming server operations. Study listener counts, bitrate, and connection statuses for smooth streaming services.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: icecast + monitored_instance: + name: Icecast + link: 'https://icecast.org/' + categories: + - data-collection.media-streaming-servers + icon_filename: 'icecast.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - icecast + - streaming + - media + most_popular: false + overview: + data_collection: + metrics_description: 'This collector monitors Icecast listener counts.' + method_description: 'It connects to an icecast URL and uses the `status-json.xsl` endpoint to retrieve statistics.' + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: 'Without configuration, the collector attempts to connect to http://localhost:8443/status-json.xsl' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: + - title: 'Icecast minimum version' + description: 'Needs at least icecast version >= 2.4.0' + configuration: + file: + name: python.d/icecast.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: '' + required: false + - name: url + description: The URL (and port) to the icecast server. Needs to also include `/status-json.xsl` + default_value: 'http://localhost:8443/status-json.xsl' + required: false + - name: user + description: Username to use to connect to `url` if it's password protected. + default_value: '' + required: false + - name: pass + description: Password to use to connect to `url` if it's password protected. + default_value: '' + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Remote Icecast server + description: Configure a remote icecast server + folding: + enabled: false + config: | + remote: + url: 'http://1.2.3.4:8443/status-json.xsl' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: icecast.listeners - description: Number Of Listeners - unit: "listeners" - chart_type: line - dimensions: - - name: a dimension for each active source + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: icecast.listeners + description: Number Of Listeners + unit: "listeners" + chart_type: line + dimensions: + - name: a dimension for each active source diff --git a/collectors/python.d.plugin/icecast/metrics.csv b/collectors/python.d.plugin/icecast/metrics.csv deleted file mode 100644 index e05c0504..00000000 --- a/collectors/python.d.plugin/icecast/metrics.csv +++ /dev/null @@ -1,2 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -icecast.listeners,,a dimension for each active source,listeners,Number Of Listeners,line,,python.d.plugin,icecast diff --git a/collectors/python.d.plugin/ipfs/metadata.yaml b/collectors/python.d.plugin/ipfs/metadata.yaml index 51e50e2a..dbc421c9 100644 --- a/collectors/python.d.plugin/ipfs/metadata.yaml +++ b/collectors/python.d.plugin/ipfs/metadata.yaml @@ -1,98 +1,172 @@ -meta: - plugin_name: python.d.plugin - module_name: ipfs - monitored_instance: - name: IPFS - link: '' - categories: - - data-collection.storage-mount-points-and-filesystems - icon_filename: 'ipfs.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Examine IPFS metrics for insights into distributed file system operations. Analyze node connectivity, data replication, and retrieval times for efficient distributed file handling.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: ipfs_datastore_usage - link: https://github.com/netdata/netdata/blob/master/health/health.d/ipfs.conf - metric: ipfs.repo_size - info: IPFS datastore utilization -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: ipfs + monitored_instance: + name: IPFS + link: "https://ipfs.tech/" + categories: + - data-collection.storage-mount-points-and-filesystems + icon_filename: "ipfs.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: "This collector monitors IPFS server metrics about its quality and performance." + method_description: "It connects to an http endpoint of the IPFS server to collect the metrics" + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "If the endpoint is accessible by the Agent, netdata will autodetect it" + limits: + description: | + Calls to the following endpoints are disabled due to IPFS bugs: + + /api/v0/stats/repo (https://github.com/ipfs/go-ipfs/issues/3874) + /api/v0/pin/ls (https://github.com/ipfs/go-ipfs/issues/7528) + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: "python.d/ipfs.conf" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: The JOB's name as it will appear at the dashboard (by default is the job_name) + default_value: job_name + required: false + - name: url + description: URL to the IPFS API + default_value: no + required: true + - name: repoapi + description: Collect repo metrics. + default_value: no + required: false + - name: pinapi + description: Set status of IPFS pinned object polling. + default_value: no + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic (default out-of-the-box) + description: A basic example configuration, one job will run at a time. Autodetect mechanism uses it by default. + folding: + enabled: false + config: | + localhost: + name: 'local' + url: 'http://localhost:5001' + repoapi: no + pinapi: no + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + localhost: + name: 'local' + url: 'http://localhost:5001' + repoapi: no + pinapi: no + + remote_host: + name: 'remote' + url: 'http://192.0.2.1:5001' + repoapi: no + pinapi: no + troubleshooting: + problems: + list: [] + alerts: + - name: ipfs_datastore_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/ipfs.conf + metric: ipfs.repo_size + info: IPFS datastore utilization metrics: - - name: ipfs.bandwidth - description: IPFS Bandwidth - unit: "kilobits/s" - chart_type: line - dimensions: - - name: in - - name: out - - name: ipfs.peers - description: IPFS Peers - unit: "peers" - chart_type: line - dimensions: - - name: peers - - name: ipfs.repo_size - description: IPFS Repo Size - unit: "GiB" - chart_type: area - dimensions: - - name: avail - - name: size - - name: ipfs.repo_objects - description: IPFS Repo Objects - unit: "objects" - chart_type: line - dimensions: - - name: objects - - name: pinned - - name: recursive_pins + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: ipfs.bandwidth + description: IPFS Bandwidth + unit: "kilobits/s" + chart_type: line + dimensions: + - name: in + - name: out + - name: ipfs.peers + description: IPFS Peers + unit: "peers" + chart_type: line + dimensions: + - name: peers + - name: ipfs.repo_size + description: IPFS Repo Size + unit: "GiB" + chart_type: area + dimensions: + - name: avail + - name: size + - name: ipfs.repo_objects + description: IPFS Repo Objects + unit: "objects" + chart_type: line + dimensions: + - name: objects + - name: pinned + - name: recursive_pins diff --git a/collectors/python.d.plugin/ipfs/metrics.csv b/collectors/python.d.plugin/ipfs/metrics.csv deleted file mode 100644 index 33dd43c9..00000000 --- a/collectors/python.d.plugin/ipfs/metrics.csv +++ /dev/null @@ -1,5 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -ipfs.bandwidth,,"in, out",kilobits/s,IPFS Bandwidth,line,,python.d.plugin,ipfs -ipfs.peers,,peers,peers,IPFS Peers,line,,python.d.plugin,ipfs -ipfs.repo_size,,"avail, size",GiB,IPFS Repo Size,area,,python.d.plugin,ipfs -ipfs.repo_objects,,"objects, pinned, recursive_pins",objects,IPFS Repo Objects,line,,python.d.plugin,ipfs diff --git a/collectors/python.d.plugin/litespeed/metadata.yaml b/collectors/python.d.plugin/litespeed/metadata.yaml index 43a26775..400f3a7f 100644 --- a/collectors/python.d.plugin/litespeed/metadata.yaml +++ b/collectors/python.d.plugin/litespeed/metadata.yaml @@ -1,124 +1,168 @@ -meta: - plugin_name: python.d.plugin - module_name: litespeed - monitored_instance: - name: Litespeed - link: '' - categories: - - data-collection.web-servers-and-web-proxies - icon_filename: 'litespeed.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Examine Litespeed metrics for insights into web server operations. Analyze request rates, response times, and error rates for efficient web service delivery.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: litespeed + monitored_instance: + name: Litespeed + link: "https://www.litespeedtech.com/products/litespeed-web-server" + categories: + - data-collection.web-servers-and-web-proxies + icon_filename: "litespeed.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - litespeed + - web + - server + most_popular: false + overview: + data_collection: + metrics_description: "Examine Litespeed metrics for insights into web server operations. Analyze request rates, response times, and error rates for efficient web service delivery." + method_description: "The collector uses the statistics under /tmp/lshttpd to gather the metrics." + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "If no configuration is present, the collector will attempt to read files under /tmp/lshttpd/." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: python.d/litespeed.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + - name: path + description: Use a different path than the default, where the lightspeed stats files reside. + default_value: "/tmp/lshttpd/" + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Set the path to statistics + description: Change the path for the litespeed stats files + config: | + localhost: + name: 'local' + path: '/tmp/lshttpd' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: litespeed.net_throughput - description: Network Throughput HTTP - unit: "kilobits/s" - chart_type: area - dimensions: - - name: in - - name: out - - name: litespeed.net_throughput - description: Network Throughput HTTPS - unit: "kilobits/s" - chart_type: area - dimensions: - - name: in - - name: out - - name: litespeed.connections - description: Connections HTTP - unit: "conns" - chart_type: stacked - dimensions: - - name: free - - name: used - - name: litespeed.connections - description: Connections HTTPS - unit: "conns" - chart_type: stacked - dimensions: - - name: free - - name: used - - name: litespeed.requests - description: Requests - unit: "requests/s" - chart_type: line - dimensions: - - name: requests - - name: litespeed.requests_processing - description: Requests In Processing - unit: "requests" - chart_type: line - dimensions: - - name: processing - - name: litespeed.cache - description: Public Cache Hits - unit: "hits/s" - chart_type: line - dimensions: - - name: hits - - name: litespeed.cache - description: Private Cache Hits - unit: "hits/s" - chart_type: line - dimensions: - - name: hits - - name: litespeed.static - description: Static Hits - unit: "hits/s" - chart_type: line - dimensions: - - name: hits + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: litespeed.net_throughput + description: Network Throughput HTTP + unit: "kilobits/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: litespeed.net_throughput + description: Network Throughput HTTPS + unit: "kilobits/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: litespeed.connections + description: Connections HTTP + unit: "conns" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: litespeed.connections + description: Connections HTTPS + unit: "conns" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: litespeed.requests + description: Requests + unit: "requests/s" + chart_type: line + dimensions: + - name: requests + - name: litespeed.requests_processing + description: Requests In Processing + unit: "requests" + chart_type: line + dimensions: + - name: processing + - name: litespeed.cache + description: Public Cache Hits + unit: "hits/s" + chart_type: line + dimensions: + - name: hits + - name: litespeed.cache + description: Private Cache Hits + unit: "hits/s" + chart_type: line + dimensions: + - name: hits + - name: litespeed.static + description: Static Hits + unit: "hits/s" + chart_type: line + dimensions: + - name: hits diff --git a/collectors/python.d.plugin/litespeed/metrics.csv b/collectors/python.d.plugin/litespeed/metrics.csv deleted file mode 100644 index 56e50e42..00000000 --- a/collectors/python.d.plugin/litespeed/metrics.csv +++ /dev/null @@ -1,10 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -litespeed.net_throughput,,"in, out",kilobits/s,Network Throughput HTTP,area,,python.d.plugin,litespeed -litespeed.net_throughput,,"in, out",kilobits/s,Network Throughput HTTPS,area,,python.d.plugin,litespeed -litespeed.connections,,"free, used",conns,Connections HTTP,stacked,,python.d.plugin,litespeed -litespeed.connections,,"free, used",conns,Connections HTTPS,stacked,,python.d.plugin,litespeed -litespeed.requests,,requests,requests/s,Requests,line,,python.d.plugin,litespeed -litespeed.requests_processing,,processing,requests,Requests In Processing,line,,python.d.plugin,litespeed -litespeed.cache,,hits,hits/s,Public Cache Hits,line,,python.d.plugin,litespeed -litespeed.cache,,hits,hits/s,Private Cache Hits,line,,python.d.plugin,litespeed -litespeed.static,,hits,hits/s,Static Hits,line,,python.d.plugin,litespeed diff --git a/collectors/python.d.plugin/megacli/metadata.yaml b/collectors/python.d.plugin/megacli/metadata.yaml index 75238dc8..f75a8d2a 100644 --- a/collectors/python.d.plugin/megacli/metadata.yaml +++ b/collectors/python.d.plugin/megacli/metadata.yaml @@ -1,120 +1,193 @@ -meta: - plugin_name: python.d.plugin - module_name: megacli - monitored_instance: - name: MegaCLI - link: '' - categories: - - data-collection.storage-mount-points-and-filesystems - icon_filename: 'hard-drive.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Examine MegaCLI metrics with Netdata for insights into RAID controller performance. Improve your RAID controller efficiency with real-time MegaCLI metrics.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: megacli_adapter_state - link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf - metric: megacli.adapter_degraded - info: 'adapter is in the degraded state (0: false, 1: true)' -- name: megacli_pd_media_errors - link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf - metric: megacli.pd_media_error - info: number of physical drive media errors -- name: megacli_pd_predictive_failures - link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf - metric: megacli.pd_predictive_failure - info: number of physical drive predictive failures -- name: megacli_bbu_relative_charge - link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf - metric: megacli.bbu_relative_charge - info: average battery backup unit (BBU) relative state of charge over the last 10 seconds -- name: megacli_bbu_cycle_count - link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf - metric: megacli.bbu_cycle_count - info: average battery backup unit (BBU) charge cycles count over the last 10 seconds -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: megacli + monitored_instance: + name: MegaCLI + link: "https://wikitech.wikimedia.org/wiki/MegaCli" + categories: + - data-collection.storage-mount-points-and-filesystems + icon_filename: "hard-drive.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - storage + - raid-controller + - manage-disks + most_popular: false + overview: + data_collection: + metrics_description: "Examine MegaCLI metrics with Netdata for insights into RAID controller performance. Improve your RAID controller efficiency with real-time MegaCLI metrics." + method_description: | + Collects adapter, physical drives and battery stats using megacli command-line tool + + Executed commands: + + sudo -n megacli -LDPDInfo -aAll + sudo -n megacli -AdpBbuCmd -a0 + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: "The module uses megacli, which can only be executed by root. It uses sudo and assumes that it is configured such that the netdata user can execute megacli as root without a password." + default_behavior: + auto_detection: + description: "After all the permissions are satisfied, netdata should be to execute commands via the megacli command line utility" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Grant permissions for netdata, to run megacli as sudoer + description: | + The module uses megacli, which can only be executed by root. It uses sudo and assumes that it is configured such that the netdata user can execute megacli as root without a password. + + Add to your /etc/sudoers file: + which megacli shows the full path to the binary. + + ```bash + netdata ALL=(root) NOPASSWD: /path/to/megacli + ``` + - title: "Reset Netdata's systemd unit CapabilityBoundingSet (Linux distributions with systemd)" + description: | + The default CapabilityBoundingSet doesn't allow using sudo, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute arcconf using sudo. + + As root user, do the following: + + ```bash + mkdir /etc/systemd/system/netdata.service.d + echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf + systemctl daemon-reload + systemctl restart netdata.service + ``` + configuration: + file: + name: "python.d/megacli.conf" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: do_battery + description: default is no. Battery stats (adds additional call to megacli `megacli -AdpBbuCmd -a0`). + default_value: no + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + folding: + enabled: false + description: A basic example configuration per job + config: | + job_name: + name: myname + update_every: 1 + priority: 60000 + penalty: yes + autodetection_retry: 0 + troubleshooting: + problems: + list: [] + alerts: + - name: megacli_adapter_state + link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf + metric: megacli.adapter_degraded + info: "adapter is in the degraded state (0: false, 1: true)" + - name: megacli_pd_media_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf + metric: megacli.pd_media_error + info: number of physical drive media errors + - name: megacli_pd_predictive_failures + link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf + metric: megacli.pd_predictive_failure + info: number of physical drive predictive failures + - name: megacli_bbu_relative_charge + link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf + metric: megacli.bbu_relative_charge + info: average battery backup unit (BBU) relative state of charge over the last 10 seconds + - name: megacli_bbu_cycle_count + link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf + metric: megacli.bbu_cycle_count + info: average battery backup unit (BBU) charge cycles count over the last 10 seconds metrics: - - name: megacli.adapter_degraded - description: Adapter State - unit: "is degraded" - chart_type: line - dimensions: - - name: a dimension per adapter - - name: megacli.pd_media_error - description: Physical Drives Media Errors - unit: "errors/s" - chart_type: line - dimensions: - - name: a dimension per physical drive - - name: megacli.pd_predictive_failure - description: Physical Drives Predictive Failures - unit: "failures/s" - chart_type: line - dimensions: - - name: a dimension per physical drive - - name: battery - description: "" - labels: [] - metrics: - - name: megacli.bbu_relative_charge - description: Relative State of Charge - unit: "percentage" - chart_type: line - dimensions: - - name: adapter {battery id} - - name: megacli.bbu_cycle_count - description: Cycle Count - unit: "cycle count" - chart_type: line - dimensions: - - name: adapter {battery id} + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: megacli.adapter_degraded + description: Adapter State + unit: "is degraded" + chart_type: line + dimensions: + - name: a dimension per adapter + - name: megacli.pd_media_error + description: Physical Drives Media Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: a dimension per physical drive + - name: megacli.pd_predictive_failure + description: Physical Drives Predictive Failures + unit: "failures/s" + chart_type: line + dimensions: + - name: a dimension per physical drive + - name: battery + description: "Metrics related to Battery Backup Units, each BBU provides its own set of the following metrics." + labels: [] + metrics: + - name: megacli.bbu_relative_charge + description: Relative State of Charge + unit: "percentage" + chart_type: line + dimensions: + - name: adapter {battery id} + - name: megacli.bbu_cycle_count + description: Cycle Count + unit: "cycle count" + chart_type: line + dimensions: + - name: adapter {battery id} diff --git a/collectors/python.d.plugin/megacli/metrics.csv b/collectors/python.d.plugin/megacli/metrics.csv deleted file mode 100644 index 6d7b00bf..00000000 --- a/collectors/python.d.plugin/megacli/metrics.csv +++ /dev/null @@ -1,6 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -megacli.adapter_degraded,,a dimension per adapter,is degraded,Adapter State,line,,python.d.plugin,megacli -megacli.pd_media_error,,a dimension per physical drive,errors/s,Physical Drives Media Errors,line,,python.d.plugin,megacli -megacli.pd_predictive_failure,,a dimension per physical drive,failures/s,Physical Drives Predictive Failures,line,,python.d.plugin,megacli -megacli.bbu_relative_charge,battery,adapter {battery id},percentage,Relative State of Charge,line,,python.d.plugin,megacli -megacli.bbu_cycle_count,battery,adapter {battery id},cycle count,Cycle Count,line,,python.d.plugin,megacli diff --git a/collectors/python.d.plugin/memcached/metadata.yaml b/collectors/python.d.plugin/memcached/metadata.yaml index 46195a46..38c9f685 100644 --- a/collectors/python.d.plugin/memcached/metadata.yaml +++ b/collectors/python.d.plugin/memcached/metadata.yaml @@ -1,175 +1,247 @@ -meta: - plugin_name: python.d.plugin - module_name: memcached - monitored_instance: - name: Memcached - link: '' - categories: - - data-collection.database-servers - icon_filename: 'memcached.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Memcached metrics for proficient in-memory key-value store operations. Track cache hits, misses, and memory usage for efficient data caching.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: memcached_cache_memory_usage - link: https://github.com/netdata/netdata/blob/master/health/health.d/memcached.conf - metric: memcached.cache - info: cache memory utilization -- name: memcached_cache_fill_rate - link: https://github.com/netdata/netdata/blob/master/health/health.d/memcached.conf - metric: memcached.cache - info: average rate the cache fills up (positive), or frees up (negative) space over the last hour -- name: memcached_out_of_cache_space_time - link: https://github.com/netdata/netdata/blob/master/health/health.d/memcached.conf - metric: memcached.cache - info: estimated time the cache will run out of space if the system continues to add data at the same rate as the past hour -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: memcached + monitored_instance: + name: Memcached + link: https://memcached.org/ + categories: + - data-collection.database-servers + icon_filename: "memcached.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - memcached + - memcache + - cache + - database + most_popular: false + overview: + data_collection: + metrics_description: "Monitor Memcached metrics for proficient in-memory key-value store operations. Track cache hits, misses, and memory usage for efficient data caching." + method_description: "It reads server response to stats command ([stats interface](https://github.com/memcached/memcached/wiki/Commands#stats))." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: | + If no configuration is given, collector will attempt to connect to memcached instance on `127.0.0.1:11211` address. + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: python.d/memcached.conf + description: "" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: Config options + enabled: true + list: + - name: host + description: the host to connect to. + default_value: "127.0.0.1" + required: false + - name: port + description: the port to connect to. + default_value: "11211" + required: false + - name: update_every + description: Sets the default data collection frequency. + default_value: 10 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: localhost + description: An example configuration for localhost. + folding: + enabled: false + config: | + localhost: + name: 'local' + host: 'localhost' + port: 11211 + - name: localipv4 + description: An example configuration for localipv4. + folding: + enabled: true + config: | + localhost: + name: 'local' + host: '127.0.0.1' + port: 11211 + - name: localipv6 + description: An example configuration for localipv6. + folding: + enabled: true + config: | + localhost: + name: 'local' + host: '::1' + port: 11211 + troubleshooting: + problems: + list: [] + alerts: + - name: memcached_cache_memory_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/memcached.conf + metric: memcached.cache + info: cache memory utilization + - name: memcached_cache_fill_rate + link: https://github.com/netdata/netdata/blob/master/health/health.d/memcached.conf + metric: memcached.cache + info: average rate the cache fills up (positive), or frees up (negative) space over the last hour + - name: memcached_out_of_cache_space_time + link: https://github.com/netdata/netdata/blob/master/health/health.d/memcached.conf + metric: memcached.cache + info: estimated time the cache will run out of space if the system continues to add data at the same rate as the past hour metrics: - - name: memcached.cache - description: Cache Size - unit: "MiB" - chart_type: stacked - dimensions: - - name: available - - name: used - - name: memcached.net - description: Network - unit: "kilobits/s" - chart_type: area - dimensions: - - name: in - - name: out - - name: memcached.connections - description: Connections - unit: "connections/s" - chart_type: line - dimensions: - - name: current - - name: rejected - - name: total - - name: memcached.items - description: Items - unit: "items" - chart_type: line - dimensions: - - name: current - - name: total - - name: memcached.evicted_reclaimed - description: Evicted and Reclaimed Items - unit: "items" - chart_type: line - dimensions: - - name: reclaimed - - name: evicted - - name: memcached.get - description: Get Requests - unit: "requests" - chart_type: stacked - dimensions: - - name: hints - - name: misses - - name: memcached.get_rate - description: Get Request Rate - unit: "requests/s" - chart_type: line - dimensions: - - name: rate - - name: memcached.set_rate - description: Set Request Rate - unit: "requests/s" - chart_type: line - dimensions: - - name: rate - - name: memcached.delete - description: Delete Requests - unit: "requests" - chart_type: stacked - dimensions: - - name: hits - - name: misses - - name: memcached.cas - description: Check and Set Requests - unit: "requests" - chart_type: stacked - dimensions: - - name: hits - - name: misses - - name: bad value - - name: memcached.increment - description: Increment Requests - unit: "requests" - chart_type: stacked - dimensions: - - name: hits - - name: misses - - name: memcached.decrement - description: Decrement Requests - unit: "requests" - chart_type: stacked - dimensions: - - name: hits - - name: misses - - name: memcached.touch - description: Touch Requests - unit: "requests" - chart_type: stacked - dimensions: - - name: hits - - name: misses - - name: memcached.touch_rate - description: Touch Request Rate - unit: "requests/s" - chart_type: line - dimensions: - - name: rate + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: memcached.cache + description: Cache Size + unit: "MiB" + chart_type: stacked + dimensions: + - name: available + - name: used + - name: memcached.net + description: Network + unit: "kilobits/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: memcached.connections + description: Connections + unit: "connections/s" + chart_type: line + dimensions: + - name: current + - name: rejected + - name: total + - name: memcached.items + description: Items + unit: "items" + chart_type: line + dimensions: + - name: current + - name: total + - name: memcached.evicted_reclaimed + description: Evicted and Reclaimed Items + unit: "items" + chart_type: line + dimensions: + - name: reclaimed + - name: evicted + - name: memcached.get + description: Get Requests + unit: "requests" + chart_type: stacked + dimensions: + - name: hints + - name: misses + - name: memcached.get_rate + description: Get Request Rate + unit: "requests/s" + chart_type: line + dimensions: + - name: rate + - name: memcached.set_rate + description: Set Request Rate + unit: "requests/s" + chart_type: line + dimensions: + - name: rate + - name: memcached.delete + description: Delete Requests + unit: "requests" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: memcached.cas + description: Check and Set Requests + unit: "requests" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: bad value + - name: memcached.increment + description: Increment Requests + unit: "requests" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: memcached.decrement + description: Decrement Requests + unit: "requests" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: memcached.touch + description: Touch Requests + unit: "requests" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: memcached.touch_rate + description: Touch Request Rate + unit: "requests/s" + chart_type: line + dimensions: + - name: rate diff --git a/collectors/python.d.plugin/memcached/metrics.csv b/collectors/python.d.plugin/memcached/metrics.csv deleted file mode 100644 index 8016a9d6..00000000 --- a/collectors/python.d.plugin/memcached/metrics.csv +++ /dev/null @@ -1,15 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -memcached.cache,,"available, used",MiB,Cache Size,stacked,,python.d.plugin,memcached -memcached.net,,"in, out",kilobits/s,Network,area,,python.d.plugin,memcached -memcached.connections,,"current, rejected, total",connections/s,Connections,line,,python.d.plugin,memcached -memcached.items,,"current,total",items,Items,line,,python.d.plugin,memcached -memcached.evicted_reclaimed,,"reclaimed, evicted",items,Evicted and Reclaimed Items,line,,python.d.plugin,memcached -memcached.get,,"hints, misses",requests,Get Requests,stacked,,python.d.plugin,memcached -memcached.get_rate,,rate,requests/s,Get Request Rate,line,,python.d.plugin,memcached -memcached.set_rate,,rate,requests/s,Set Request Rate,line,,python.d.plugin,memcached -memcached.delete,,"hits, misses",requests,Delete Requests,stacked,,python.d.plugin,memcached -memcached.cas,,"hits, misses, bad value",requests,Check and Set Requests,stacked,,python.d.plugin,memcached -memcached.increment,,"hits, misses",requests,Increment Requests,stacked,,python.d.plugin,memcached -memcached.decrement,,"hits, misses",requests,Decrement Requests,stacked,,python.d.plugin,memcached -memcached.touch,,"hits, misses",requests,Touch Requests,stacked,,python.d.plugin,memcached -memcached.touch_rate,,rate,requests/s,Touch Request Rate,line,,python.d.plugin,memcached diff --git a/collectors/python.d.plugin/monit/metadata.yaml b/collectors/python.d.plugin/monit/metadata.yaml index bfa3e621..b5127318 100644 --- a/collectors/python.d.plugin/monit/metadata.yaml +++ b/collectors/python.d.plugin/monit/metadata.yaml @@ -1,138 +1,217 @@ -meta: - plugin_name: python.d.plugin - module_name: monit - monitored_instance: - name: Monit - link: '' - categories: - - data-collection.synthetic-checks - icon_filename: 'monit.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Monit performance for optimal system monitoring operations. Monitor system status, process health, and error rates to maintain system stability.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: monit + monitored_instance: + name: Monit + link: https://mmonit.com/monit/ + categories: + - data-collection.synthetic-checks + icon_filename: "monit.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - monit + - mmonit + - supervision tool + - monitrc + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors Monit targets such as filesystems, directories, files, FIFO pipes and more. + method_description: | + It gathers data from Monit's XML interface. + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: By default, this collector will attempt to connect to Monit at `http://localhost:2812` + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: "python.d/monit.conf" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 1 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: > + Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "local" + required: false + - name: url + description: The URL to fetch Monit's metrics. + default_value: http://localhost:2812 + required: true + - name: user + description: Username in case the URL is password protected. + default_value: "" + required: false + - name: pass + description: Password in case the URL is password protected. + default_value: "" + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + description: A basic configuration example. + folding: + enabled: false + config: | + localhost: + name : 'local' + url : 'http://localhost:2812' + - name: Basic Authentication + description: Example using basic username and password in order to authenticate. + config: | + localhost: + name : 'local' + url : 'http://localhost:2812' + user: 'foo' + pass: 'bar' + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + localhost: + name: 'local' + url: 'http://localhost:2812' + + remote_job: + name: 'remote' + url: 'http://192.0.2.1:2812' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: monit.filesystems - description: Filesystems - unit: "filesystems" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.directories - description: Directories - unit: "directories" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.files - description: Files - unit: "files" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.fifos - description: Pipes (fifo) - unit: "pipes" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.programs - description: Programs statuses - unit: "programs" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.services - description: Processes statuses - unit: "processes" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.process_uptime - description: Processes uptime - unit: "seconds" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.process_threads - description: Processes threads - unit: "threads" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.process_childrens - description: Child processes - unit: "children" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.hosts - description: Hosts - unit: "hosts" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.host_latency - description: Hosts latency - unit: "milliseconds" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.networks - description: Network interfaces and addresses - unit: "interfaces" - chart_type: line - dimensions: - - name: a dimension per target + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: monit.filesystems + description: Filesystems + unit: "filesystems" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.directories + description: Directories + unit: "directories" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.files + description: Files + unit: "files" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.fifos + description: Pipes (fifo) + unit: "pipes" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.programs + description: Programs statuses + unit: "programs" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.services + description: Processes statuses + unit: "processes" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.process_uptime + description: Processes uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.process_threads + description: Processes threads + unit: "threads" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.process_childrens + description: Child processes + unit: "children" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.hosts + description: Hosts + unit: "hosts" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.host_latency + description: Hosts latency + unit: "milliseconds" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.networks + description: Network interfaces and addresses + unit: "interfaces" + chart_type: line + dimensions: + - name: a dimension per target diff --git a/collectors/python.d.plugin/monit/metrics.csv b/collectors/python.d.plugin/monit/metrics.csv deleted file mode 100644 index 1981a07e..00000000 --- a/collectors/python.d.plugin/monit/metrics.csv +++ /dev/null @@ -1,13 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -monit.filesystems,,a dimension per target,filesystems,Filesystems,line,,python.d.plugin,monit -monit.directories,,a dimension per target,directories,Directories,line,,python.d.plugin,monit -monit.files,,a dimension per target,files,Files,line,,python.d.plugin,monit -monit.fifos,,a dimension per target,pipes,Pipes (fifo),line,,python.d.plugin,monit -monit.programs,,a dimension per target,programs,Programs statuses,line,,python.d.plugin,monit -monit.services,,a dimension per target,processes,Processes statuses,line,,python.d.plugin,monit -monit.process_uptime,,a dimension per target,seconds,Processes uptime,line,,python.d.plugin,monit -monit.process_threads,,a dimension per target,threads,Processes threads,line,,python.d.plugin,monit -monit.process_childrens,,a dimension per target,children,Child processes,line,,python.d.plugin,monit -monit.hosts,,a dimension per target,hosts,Hosts,line,,python.d.plugin,monit -monit.host_latency,,a dimension per target,milliseconds,Hosts latency,line,,python.d.plugin,monit -monit.networks,,a dimension per target,interfaces,Network interfaces and addresses,line,,python.d.plugin,monit diff --git a/collectors/python.d.plugin/nsd/metadata.yaml b/collectors/python.d.plugin/nsd/metadata.yaml index ce4ce35b..bd0a256f 100644 --- a/collectors/python.d.plugin/nsd/metadata.yaml +++ b/collectors/python.d.plugin/nsd/metadata.yaml @@ -1,124 +1,198 @@ -meta: - plugin_name: python.d.plugin - module_name: nsd - monitored_instance: - name: NSD - link: '' - categories: - - data-collection.dns-and-dhcp-servers - icon_filename: 'nsd.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor NSD performance for optimal authoritative DNS server operations. Monitor query rates, response times, and error rates to ensure reliable DNS service delivery.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: nsd + monitored_instance: + name: Name Server Daemon + link: https://nsd.docs.nlnetlabs.nl/en/latest/# + categories: + - data-collection.dns-and-dhcp-servers + icon_filename: "nsd.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - nsd + - name server daemon + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors NSD statistics like queries, zones, protocols, query types and more. + method_description: | + It uses the `nsd-control stats_noreset` command to gather metrics. + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: If permissions are satisfied, the collector will be able to run `nsd-control stats_noreset`, thus collecting metrics. + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Provide Netdata the permissions to run the command + description: | + Netdata must have permissions to run the `nsd-control stats_noreset` command. + + You can: + + - Add "netdata" user to "nsd" group: + ``` + usermod -aG nsd netdata + ``` + - Add Netdata to sudoers + 1. Edit the sudoers file: + ``` + visudo -f /etc/sudoers.d/netdata + ``` + 2. Add the entry: + ``` + Defaults:netdata !requiretty + netdata ALL=(ALL) NOPASSWD: /usr/sbin/nsd-control stats_noreset + ``` + + > Note that you will need to set the `command` option to `sudo /usr/sbin/nsd-control stats_noreset` if you use this method. + + configuration: + file: + name: "python.d/nsd.conf" + options: + description: | + This particular collector does not need further configuration to work if permissions are satisfied, but you can always customize it's data collection behavior. + + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 30 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: > + Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed + running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + - name: command + description: The command to run + default_value: "nsd-control stats_noreset" + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + description: A basic configuration example. + folding: + enabled: false + config: | + local: + name: 'nsd_local' + command: 'nsd-control stats_noreset' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: nsd.queries - description: queries - unit: "queries/s" - chart_type: line - dimensions: - - name: queries - - name: nsd.zones - description: zones - unit: "zones" - chart_type: stacked - dimensions: - - name: master - - name: slave - - name: nsd.protocols - description: protocol - unit: "queries/s" - chart_type: stacked - dimensions: - - name: udp - - name: udp6 - - name: tcp - - name: tcp6 - - name: nsd.type - description: query type - unit: "queries/s" - chart_type: stacked - dimensions: - - name: A - - name: NS - - name: CNAME - - name: SOA - - name: PTR - - name: HINFO - - name: MX - - name: NAPTR - - name: TXT - - name: AAAA - - name: SRV - - name: ANY - - name: nsd.transfer - description: transfer - unit: "queries/s" - chart_type: stacked - dimensions: - - name: NOTIFY - - name: AXFR - - name: nsd.rcode - description: return code - unit: "queries/s" - chart_type: stacked - dimensions: - - name: NOERROR - - name: FORMERR - - name: SERVFAIL - - name: NXDOMAIN - - name: NOTIMP - - name: REFUSED - - name: YXDOMAIN + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: nsd.queries + description: queries + unit: "queries/s" + chart_type: line + dimensions: + - name: queries + - name: nsd.zones + description: zones + unit: "zones" + chart_type: stacked + dimensions: + - name: master + - name: slave + - name: nsd.protocols + description: protocol + unit: "queries/s" + chart_type: stacked + dimensions: + - name: udp + - name: udp6 + - name: tcp + - name: tcp6 + - name: nsd.type + description: query type + unit: "queries/s" + chart_type: stacked + dimensions: + - name: A + - name: NS + - name: CNAME + - name: SOA + - name: PTR + - name: HINFO + - name: MX + - name: NAPTR + - name: TXT + - name: AAAA + - name: SRV + - name: ANY + - name: nsd.transfer + description: transfer + unit: "queries/s" + chart_type: stacked + dimensions: + - name: NOTIFY + - name: AXFR + - name: nsd.rcode + description: return code + unit: "queries/s" + chart_type: stacked + dimensions: + - name: NOERROR + - name: FORMERR + - name: SERVFAIL + - name: NXDOMAIN + - name: NOTIMP + - name: REFUSED + - name: YXDOMAIN diff --git a/collectors/python.d.plugin/nsd/metrics.csv b/collectors/python.d.plugin/nsd/metrics.csv deleted file mode 100644 index b82812bf..00000000 --- a/collectors/python.d.plugin/nsd/metrics.csv +++ /dev/null @@ -1,7 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -nsd.queries,,queries,queries/s,queries,line,,python.d.plugin,nsd -nsd.zones,,"master, slave",zones,zones,stacked,,python.d.plugin,nsd -nsd.protocols,,"udp, udp6, tcp, tcp6",queries/s,protocol,stacked,,python.d.plugin,nsd -nsd.type,,"A, NS, CNAME, SOA, PTR, HINFO, MX, NAPTR, TXT, AAAA, SRV, ANY",queries/s,query type,stacked,,python.d.plugin,nsd -nsd.transfer,,"NOTIFY, AXFR",queries/s,transfer,stacked,,python.d.plugin,nsd -nsd.rcode,,"NOERROR, FORMERR, SERVFAIL, NXDOMAIN, NOTIMP, REFUSED, YXDOMAIN",queries/s,return code,stacked,,python.d.plugin,nsd diff --git a/collectors/python.d.plugin/nvidia_smi/metadata.yaml b/collectors/python.d.plugin/nvidia_smi/metadata.yaml index fc0c90d5..9bf1e6ca 100644 --- a/collectors/python.d.plugin/nvidia_smi/metadata.yaml +++ b/collectors/python.d.plugin/nvidia_smi/metadata.yaml @@ -1,163 +1,166 @@ -meta: - plugin_name: python.d.plugin - module_name: nvidia_smi - monitored_instance: - name: python.d nvidia_smi - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: GPU - description: "" - labels: [] - metrics: - - name: nvidia_smi.pci_bandwidth - description: PCI Express Bandwidth Utilization - unit: "KiB/s" - chart_type: area - dimensions: - - name: rx - - name: tx - - name: nvidia_smi.pci_bandwidth_percent - description: PCI Express Bandwidth Percent - unit: "percentage" - chart_type: area - dimensions: - - name: rx_percent - - name: tx_percent - - name: nvidia_smi.fan_speed - description: Fan Speed - unit: "percentage" - chart_type: line - dimensions: - - name: speed - - name: nvidia_smi.gpu_utilization - description: GPU Utilization - unit: "percentage" - chart_type: line - dimensions: - - name: utilization - - name: nvidia_smi.mem_utilization - description: Memory Bandwidth Utilization - unit: "percentage" - chart_type: line - dimensions: - - name: utilization - - name: nvidia_smi.encoder_utilization - description: Encoder/Decoder Utilization - unit: "percentage" - chart_type: line - dimensions: - - name: encoder - - name: decoder - - name: nvidia_smi.memory_allocated - description: Memory Usage - unit: "MiB" - chart_type: stacked - dimensions: - - name: free - - name: used - - name: nvidia_smi.bar1_memory_usage - description: Bar1 Memory Usage - unit: "MiB" - chart_type: stacked - dimensions: - - name: free - - name: used - - name: nvidia_smi.temperature - description: Temperature - unit: "celsius" - chart_type: line - dimensions: - - name: temp - - name: nvidia_smi.clocks - description: Clock Frequencies - unit: "MHz" - chart_type: line - dimensions: - - name: graphics - - name: video - - name: sm - - name: mem - - name: nvidia_smi.power - description: Power Utilization - unit: "Watts" - chart_type: line - dimensions: - - name: power - - name: nvidia_smi.power_state - description: Power State - unit: "state" - chart_type: line - dimensions: - - name: a dimension per {power_state} - - name: nvidia_smi.processes_mem - description: Memory Used by Each Process - unit: "MiB" - chart_type: stacked - dimensions: - - name: a dimension per process - - name: nvidia_smi.user_mem - description: Memory Used by Each User - unit: "MiB" - chart_type: stacked - dimensions: - - name: a dimension per user - - name: nvidia_smi.user_num - description: Number of User on GPU - unit: "num" - chart_type: line - dimensions: - - name: users +# This collector will not appear in documentation, as the go version is preferred, +# https://github.com/netdata/go.d.plugin/blob/master/modules/nvidia_smi/README.md +# +# meta: +# plugin_name: python.d.plugin +# module_name: nvidia_smi +# monitored_instance: +# name: python.d nvidia_smi +# link: '' +# categories: [] +# icon_filename: '' +# related_resources: +# integrations: +# list: [] +# info_provided_to_referring_integrations: +# description: '' +# keywords: [] +# most_popular: false +# overview: +# data_collection: +# metrics_description: '' +# method_description: '' +# supported_platforms: +# include: [] +# exclude: [] +# multi_instance: true +# additional_permissions: +# description: '' +# default_behavior: +# auto_detection: +# description: '' +# limits: +# description: '' +# performance_impact: +# description: '' +# setup: +# prerequisites: +# list: [] +# configuration: +# file: +# name: '' +# description: '' +# options: +# description: '' +# folding: +# title: '' +# enabled: true +# list: [] +# examples: +# folding: +# enabled: true +# title: '' +# list: [] +# troubleshooting: +# problems: +# list: [] +# alerts: [] +# metrics: +# folding: +# title: Metrics +# enabled: false +# description: "" +# availability: [] +# scopes: +# - name: GPU +# description: "" +# labels: [] +# metrics: +# - name: nvidia_smi.pci_bandwidth +# description: PCI Express Bandwidth Utilization +# unit: "KiB/s" +# chart_type: area +# dimensions: +# - name: rx +# - name: tx +# - name: nvidia_smi.pci_bandwidth_percent +# description: PCI Express Bandwidth Percent +# unit: "percentage" +# chart_type: area +# dimensions: +# - name: rx_percent +# - name: tx_percent +# - name: nvidia_smi.fan_speed +# description: Fan Speed +# unit: "percentage" +# chart_type: line +# dimensions: +# - name: speed +# - name: nvidia_smi.gpu_utilization +# description: GPU Utilization +# unit: "percentage" +# chart_type: line +# dimensions: +# - name: utilization +# - name: nvidia_smi.mem_utilization +# description: Memory Bandwidth Utilization +# unit: "percentage" +# chart_type: line +# dimensions: +# - name: utilization +# - name: nvidia_smi.encoder_utilization +# description: Encoder/Decoder Utilization +# unit: "percentage" +# chart_type: line +# dimensions: +# - name: encoder +# - name: decoder +# - name: nvidia_smi.memory_allocated +# description: Memory Usage +# unit: "MiB" +# chart_type: stacked +# dimensions: +# - name: free +# - name: used +# - name: nvidia_smi.bar1_memory_usage +# description: Bar1 Memory Usage +# unit: "MiB" +# chart_type: stacked +# dimensions: +# - name: free +# - name: used +# - name: nvidia_smi.temperature +# description: Temperature +# unit: "celsius" +# chart_type: line +# dimensions: +# - name: temp +# - name: nvidia_smi.clocks +# description: Clock Frequencies +# unit: "MHz" +# chart_type: line +# dimensions: +# - name: graphics +# - name: video +# - name: sm +# - name: mem +# - name: nvidia_smi.power +# description: Power Utilization +# unit: "Watts" +# chart_type: line +# dimensions: +# - name: power +# - name: nvidia_smi.power_state +# description: Power State +# unit: "state" +# chart_type: line +# dimensions: +# - name: a dimension per {power_state} +# - name: nvidia_smi.processes_mem +# description: Memory Used by Each Process +# unit: "MiB" +# chart_type: stacked +# dimensions: +# - name: a dimension per process +# - name: nvidia_smi.user_mem +# description: Memory Used by Each User +# unit: "MiB" +# chart_type: stacked +# dimensions: +# - name: a dimension per user +# - name: nvidia_smi.user_num +# description: Number of User on GPU +# unit: "num" +# chart_type: line +# dimensions: +# - name: users diff --git a/collectors/python.d.plugin/nvidia_smi/metrics.csv b/collectors/python.d.plugin/nvidia_smi/metrics.csv deleted file mode 100644 index 683ea565..00000000 --- a/collectors/python.d.plugin/nvidia_smi/metrics.csv +++ /dev/null @@ -1,16 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -nvidia_smi.pci_bandwidth,GPU,"rx, tx",KiB/s,PCI Express Bandwidth Utilization,area,,python.d.plugin,nvidia_smi -nvidia_smi.pci_bandwidth_percent,GPU,"rx_percent, tx_percent",percentage,PCI Express Bandwidth Percent,area,,python.d.plugin,nvidia_smi -nvidia_smi.fan_speed,GPU,speed,percentage,Fan Speed,line,,python.d.plugin,nvidia_smi -nvidia_smi.gpu_utilization,GPU,utilization,percentage,GPU Utilization,line,,python.d.plugin,nvidia_smi -nvidia_smi.mem_utilization,GPU,utilization,percentage,Memory Bandwidth Utilization,line,,python.d.plugin,nvidia_smi -nvidia_smi.encoder_utilization,GPU,"encoder, decoder",percentage,Encoder/Decoder Utilization,line,,python.d.plugin,nvidia_smi -nvidia_smi.memory_allocated,GPU,"free, used",MiB,Memory Usage,stacked,,python.d.plugin,nvidia_smi -nvidia_smi.bar1_memory_usage,GPU,"free, used",MiB,Bar1 Memory Usage,stacked,,python.d.plugin,nvidia_smi -nvidia_smi.temperature,GPU,temp,celsius,Temperature,line,,python.d.plugin,nvidia_smi -nvidia_smi.clocks,GPU,"graphics, video, sm, mem",MHz,Clock Frequencies,line,,python.d.plugin,nvidia_smi -nvidia_smi.power,GPU,power,Watts,Power Utilization,line,,python.d.plugin,nvidia_smi -nvidia_smi.power_state,GPU,a dimension per {power_state},state,Power State,line,,python.d.plugin,nvidia_smi -nvidia_smi.processes_mem,GPU,a dimension per process,MiB,Memory Used by Each Process,stacked,,python.d.plugin,nvidia_smi -nvidia_smi.user_mem,GPU,a dimension per user,MiB,Memory Used by Each User,stacked,,python.d.plugin,nvidia_smi -nvidia_smi.user_num,GPU,users,num,Number of User on GPU,line,,python.d.plugin,nvidia_smi diff --git a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py index 271c9963..556a6143 100644 --- a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py +++ b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py @@ -62,20 +62,22 @@ POWER_STATES = ['P' + str(i) for i in range(0, 16)] # PCI Transfer data rate in gigabits per second (Gb/s) per generation PCI_SPEED = { - "1": 2.5, - "2": 5, - "3": 8, - "4": 16, - "5": 32 + "1": 2.5, + "2": 5, + "3": 8, + "4": 16, + "5": 32 } # PCI encoding per generation PCI_ENCODING = { - "1": 2/10, - "2": 2/10, - "3": 2/130, - "4": 2/130, - "5": 2/130 + "1": 2 / 10, + "2": 2 / 10, + "3": 2 / 130, + "4": 2 / 130, + "5": 2 / 130 } + + def gpu_charts(gpu): fam = gpu.full_name() @@ -88,7 +90,8 @@ def gpu_charts(gpu): ] }, PCI_BANDWIDTH_PERCENT: { - 'options': [None, 'PCI Express Bandwidth Percent', 'percentage', fam, 'nvidia_smi.pci_bandwidth_percent', 'area'], + 'options': [None, 'PCI Express Bandwidth Percent', 'percentage', fam, 'nvidia_smi.pci_bandwidth_percent', + 'area'], 'lines': [ ['rx_util_percent', 'rx_percent'], ['tx_util_percent', 'tx_percent'], @@ -358,7 +361,8 @@ class GPU: @handle_attr_error def pci_link_width(self): - return self.root.find('pci').find('pci_gpu_link_info').find('link_widths').find('max_link_width').text.split('x')[0] + info = self.root.find('pci').find('pci_gpu_link_info') + return info.find('link_widths').find('max_link_width').text.split('x')[0] def pci_bw_max(self): link_gen = self.pci_link_gen() @@ -368,7 +372,7 @@ class GPU: # Maximum PCIe Bandwidth = SPEED * WIDTH * (1 - ENCODING) - 1Gb/s. # see details https://enterprise-support.nvidia.com/s/article/understanding-pcie-configuration-for-maximum-performance # return max bandwidth in kilobytes per second (kB/s) - return (PCI_SPEED[link_gen] * link_width * (1- PCI_ENCODING[link_gen]) - 1) * 1000 * 1000 / 8 + return (PCI_SPEED[link_gen] * link_width * (1 - PCI_ENCODING[link_gen]) - 1) * 1000 * 1000 / 8 @handle_attr_error def rx_util(self): @@ -435,13 +439,18 @@ class GPU: return self.root.find('clocks').find('mem_clock').text.split()[0] @handle_attr_error + def power_readings(self): + elem = self.root.find('power_readings') + return elem if elem else self.root.find('gpu_power_readings') + + @handle_attr_error def power_state(self): - return str(self.root.find('power_readings').find('power_state').text.split()[0]) + return str(self.power_readings().find('power_state').text.split()[0]) @handle_value_error @handle_attr_error def power_draw(self): - return float(self.root.find('power_readings').find('power_draw').text.split()[0]) * 100 + return float(self.power_readings().find('power_draw').text.split()[0]) * 100 @handle_attr_error def processes(self): @@ -492,7 +501,6 @@ class GPU: data['rx_util_percent'] = str(int(int(self.rx_util()) * 100 / self.pci_bw_max())) data['tx_util_percent'] = str(int(int(self.tx_util()) * 100 / self.pci_bw_max())) - for v in POWER_STATES: data['power_state_' + v.lower()] = 0 p_state = self.power_state() diff --git a/collectors/python.d.plugin/openldap/metadata.yaml b/collectors/python.d.plugin/openldap/metadata.yaml index 413aaf1d..3826b22c 100644 --- a/collectors/python.d.plugin/openldap/metadata.yaml +++ b/collectors/python.d.plugin/openldap/metadata.yaml @@ -1,116 +1,225 @@ -meta: - plugin_name: python.d.plugin - module_name: openldap - monitored_instance: - name: OpenLDAP - link: '' - categories: - - data-collection.authentication-and-authorization - icon_filename: 'statsd.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Examine OpenLDAP metrics for insights into directory service operations. Analyze query rates, response times, and error rates for efficient directory services.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: openldap + monitored_instance: + name: OpenLDAP + link: "https://www.openldap.org/" + categories: + - data-collection.authentication-and-authorization + icon_filename: "statsd.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - openldap + - RBAC + - Directory access + most_popular: false + overview: + data_collection: + metrics_description: "This collector monitors OpenLDAP metrics about connections, operations, referrals and more." + method_description: | + Statistics are taken from the monitoring interface of a openLDAP (slapd) server + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: | + This collector doesn't work until all the prerequisites are checked. + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Configure the openLDAP server to expose metrics to monitor it. + description: | + Follow instructions from https://www.openldap.org/doc/admin24/monitoringslapd.html to activate monitoring interface. + - title: Install python-ldap module + description: | + Install python ldap module + + 1. From pip package manager + + ```bash + pip install ldap + ``` + + 2. With apt package manager (in most deb based distros) + + + ```bash + apt-get install python-ldap + ``` + + + 3. With yum package manager (in most rpm based distros) + + + ```bash + yum install python-ldap + ``` + - title: Insert credentials for Netdata to access openLDAP server + description: | + Use the `ldappasswd` utility to set a password for the username you will use. + configuration: + file: + name: "python.d/openldap.conf" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + - name: username + description: The bind user with right to access monitor statistics + default_value: "" + required: true + - name: password + description: The password for the binded user + default_value: "" + required: true + - name: server + description: The listening address of the LDAP server. In case of TLS, use the hostname which the certificate is published for. + default_value: "" + required: true + - name: port + description: The listening port of the LDAP server. Change to 636 port in case of TLS connection. + default_value: "389" + required: true + - name: use_tls + description: Make True if a TLS connection is used over ldaps:// + default_value: False + required: false + - name: use_start_tls + description: Make True if a TLS connection is used over ldap:// + default_value: False + required: false + - name: cert_check + description: False if you want to ignore certificate check + default_value: "True" + required: true + - name: timeout + description: Seconds to timeout if no connection exist + default_value: "" + required: true + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + description: A basic example configuration. + folding: + enabled: false + config: | + username: "cn=admin" + password: "pass" + server: "localhost" + port: "389" + check_cert: True + timeout: 1 + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: openldap.total_connections - description: Total Connections - unit: "connections/s" - chart_type: line - dimensions: - - name: connections - - name: openldap.traffic_stats - description: Traffic - unit: "KiB/s" - chart_type: line - dimensions: - - name: sent - - name: openldap.operations_status - description: Operations Status - unit: "ops/s" - chart_type: line - dimensions: - - name: completed - - name: initiated - - name: openldap.referrals - description: Referrals - unit: "referrals/s" - chart_type: line - dimensions: - - name: sent - - name: openldap.entries - description: Entries - unit: "entries/s" - chart_type: line - dimensions: - - name: sent - - name: openldap.ldap_operations - description: Operations - unit: "ops/s" - chart_type: line - dimensions: - - name: bind - - name: search - - name: unbind - - name: add - - name: delete - - name: modify - - name: compare - - name: openldap.waiters - description: Waiters - unit: "waiters/s" - chart_type: line - dimensions: - - name: write - - name: read + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: openldap.total_connections + description: Total Connections + unit: "connections/s" + chart_type: line + dimensions: + - name: connections + - name: openldap.traffic_stats + description: Traffic + unit: "KiB/s" + chart_type: line + dimensions: + - name: sent + - name: openldap.operations_status + description: Operations Status + unit: "ops/s" + chart_type: line + dimensions: + - name: completed + - name: initiated + - name: openldap.referrals + description: Referrals + unit: "referrals/s" + chart_type: line + dimensions: + - name: sent + - name: openldap.entries + description: Entries + unit: "entries/s" + chart_type: line + dimensions: + - name: sent + - name: openldap.ldap_operations + description: Operations + unit: "ops/s" + chart_type: line + dimensions: + - name: bind + - name: search + - name: unbind + - name: add + - name: delete + - name: modify + - name: compare + - name: openldap.waiters + description: Waiters + unit: "waiters/s" + chart_type: line + dimensions: + - name: write + - name: read diff --git a/collectors/python.d.plugin/openldap/metrics.csv b/collectors/python.d.plugin/openldap/metrics.csv deleted file mode 100644 index 0386b889..00000000 --- a/collectors/python.d.plugin/openldap/metrics.csv +++ /dev/null @@ -1,8 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -openldap.total_connections,,connections,connections/s,Total Connections,line,,python.d.plugin,openldap -openldap.traffic_stats,,sent,KiB/s,Traffic,line,,python.d.plugin,openldap -openldap.operations_status,,"completed, initiated",ops/s,Operations Status,line,,python.d.plugin,openldap -openldap.referrals,,sent,referrals/s,Referrals,line,,python.d.plugin,openldap -openldap.entries,,sent,entries/s,Entries,line,,python.d.plugin,openldap -openldap.ldap_operations,,"bind, search, unbind, add, delete, modify, compare",ops/s,Operations,line,,python.d.plugin,openldap -openldap.waiters,,"write, read",waiters/s,Waiters,line,,python.d.plugin,openldap diff --git a/collectors/python.d.plugin/oracledb/metadata.yaml b/collectors/python.d.plugin/oracledb/metadata.yaml index 7c530aa5..f2ab8312 100644 --- a/collectors/python.d.plugin/oracledb/metadata.yaml +++ b/collectors/python.d.plugin/oracledb/metadata.yaml @@ -1,216 +1,309 @@ -meta: - plugin_name: python.d.plugin - module_name: oracledb - monitored_instance: - name: OracleDB - link: '' - categories: - - data-collection.database-servers - icon_filename: 'oracle.jpeg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor OracleDB performance for efficient database operations and resource management. Netdata provides real-time insights and alerts for optimal database management.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: oracledb + monitored_instance: + name: Oracle DB + link: "https://docs.oracle.com/en/database/oracle/oracle-database/" + categories: + - data-collection.database-servers + icon_filename: "oracle.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - database + - oracle + - data warehouse + - SQL + most_popular: false + overview: + data_collection: + metrics_description: "This collector monitors OracleDB database metrics about sessions, tables, memory and more." + method_description: "It collects the metrics via the supported database client library" + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: | + In order for this collector to work, it needs a read-only user `netdata` in the RDBMS. + default_behavior: + auto_detection: + description: "When the requirements are met, databases on the local host on port 1521 will be auto-detected" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Install the python-oracledb package + description: | + You can follow the official guide below to install the required package: + + Source: https://python-oracledb.readthedocs.io/en/latest/user_guide/installation.html + - title: Create a read only user for netdata + description: | + Follow the official instructions for your oracle RDBMS to create a read-only user for netdata. The operation may follow this approach + + Connect to your Oracle database with an administrative user and execute: + + ```bash + CREATE USER netdata IDENTIFIED BY <PASSWORD>; + + GRANT CONNECT TO netdata; + GRANT SELECT_CATALOG_ROLE TO netdata; + ``` + - title: Edit the configuration + description: | + Edit the configuration troubleshooting: + + 1. Provide a valid user for the netdata collector to access the database + 2. Specify the network target this database is listening. + configuration: + file: + name: "python.d/oracledb.conf" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: user + description: The username for the user account. + default_value: no + required: true + - name: password + description: The password for the user account. + default_value: no + required: true + - name: server + description: The IP address or hostname (and port) of the Oracle Database Server. + default_value: no + required: true + - name: service + description: The Oracle Database service name. To view the services available on your server run this query, `select SERVICE_NAME from gv$session where sid in (select sid from V$MYSTAT)`. + default_value: no + required: true + - name: protocol + description: one of the strings "tcp" or "tcps" indicating whether to use unencrypted network traffic or encrypted network traffic + default_value: no + required: true + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + folding: + enabled: false + description: A basic example configuration, two jobs described for two databases. + config: | + local: + user: 'netdata' + password: 'secret' + server: 'localhost:1521' + service: 'XE' + protocol: 'tcps' + + remote: + user: 'netdata' + password: 'secret' + server: '10.0.0.1:1521' + service: 'XE' + protocol: 'tcps' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: oracledb.session_count - description: Session Count - unit: "sessions" - chart_type: line - dimensions: - - name: total - - name: active - - name: oracledb.session_limit_usage - description: Session Limit Usage - unit: "%" - chart_type: area - dimensions: - - name: usage - - name: oracledb.logons - description: Logons - unit: "events/s" - chart_type: area - dimensions: - - name: logons - - name: oracledb.physical_disk_read_writes - description: Physical Disk Reads/Writes - unit: "events/s" - chart_type: area - dimensions: - - name: reads - - name: writes - - name: oracledb.sorts_on_disks - description: Sorts On Disk - unit: "events/s" - chart_type: line - dimensions: - - name: sorts - - name: oracledb.full_table_scans - description: Full Table Scans - unit: "events/s" - chart_type: line - dimensions: - - name: full table scans - - name: oracledb.database_wait_time_ratio - description: Database Wait Time Ratio - unit: "%" - chart_type: line - dimensions: - - name: wait time ratio - - name: oracledb.shared_pool_free_memory - description: Shared Pool Free Memory - unit: "%" - chart_type: line - dimensions: - - name: free memory - - name: oracledb.in_memory_sorts_ratio - description: In-Memory Sorts Ratio - unit: "%" - chart_type: line - dimensions: - - name: in-memory sorts - - name: oracledb.sql_service_response_time - description: SQL Service Response Time - unit: "seconds" - chart_type: line - dimensions: - - name: time - - name: oracledb.user_rollbacks - description: User Rollbacks - unit: "events/s" - chart_type: line - dimensions: - - name: rollbacks - - name: oracledb.enqueue_timeouts - description: Enqueue Timeouts - unit: "events/s" - chart_type: line - dimensions: - - name: enqueue timeouts - - name: oracledb.cache_hit_ration - description: Cache Hit Ratio - unit: "%" - chart_type: stacked - dimensions: - - name: buffer - - name: cursor - - name: library - - name: row - - name: oracledb.global_cache_blocks - description: Global Cache Blocks Events - unit: "events/s" - chart_type: area - dimensions: - - name: corrupted - - name: lost - - name: oracledb.activity - description: Activities - unit: "events/s" - chart_type: stacked - dimensions: - - name: parse count - - name: execute count - - name: user commits - - name: user rollbacks - - name: oracledb.wait_time - description: Wait Time - unit: "ms" - chart_type: stacked - dimensions: - - name: application - - name: configuration - - name: administrative - - name: concurrency - - name: commit - - name: network - - name: user I/O - - name: system I/O - - name: scheduler - - name: other - - name: oracledb.tablespace_size - description: Size - unit: "KiB" - chart_type: line - dimensions: - - name: a dimension per active tablespace - - name: oracledb.tablespace_usage - description: Usage - unit: "KiB" - chart_type: line - dimensions: - - name: a dimension per active tablespace - - name: oracledb.tablespace_usage_in_percent - description: Usage - unit: "%" - chart_type: line - dimensions: - - name: a dimension per active tablespace - - name: oracledb.allocated_size - description: Size - unit: "B" - chart_type: line - dimensions: - - name: a dimension per active tablespace - - name: oracledb.allocated_usage - description: Usage - unit: "B" - chart_type: line - dimensions: - - name: a dimension per active tablespace - - name: oracledb.allocated_usage_in_percent - description: Usage - unit: "%" - chart_type: line - dimensions: - - name: a dimension per active tablespace + folding: + title: Metrics + enabled: false + description: "These metrics refer to the entire monitored application." + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: oracledb.session_count + description: Session Count + unit: "sessions" + chart_type: line + dimensions: + - name: total + - name: active + - name: oracledb.session_limit_usage + description: Session Limit Usage + unit: "%" + chart_type: area + dimensions: + - name: usage + - name: oracledb.logons + description: Logons + unit: "events/s" + chart_type: area + dimensions: + - name: logons + - name: oracledb.physical_disk_read_writes + description: Physical Disk Reads/Writes + unit: "events/s" + chart_type: area + dimensions: + - name: reads + - name: writes + - name: oracledb.sorts_on_disks + description: Sorts On Disk + unit: "events/s" + chart_type: line + dimensions: + - name: sorts + - name: oracledb.full_table_scans + description: Full Table Scans + unit: "events/s" + chart_type: line + dimensions: + - name: full table scans + - name: oracledb.database_wait_time_ratio + description: Database Wait Time Ratio + unit: "%" + chart_type: line + dimensions: + - name: wait time ratio + - name: oracledb.shared_pool_free_memory + description: Shared Pool Free Memory + unit: "%" + chart_type: line + dimensions: + - name: free memory + - name: oracledb.in_memory_sorts_ratio + description: In-Memory Sorts Ratio + unit: "%" + chart_type: line + dimensions: + - name: in-memory sorts + - name: oracledb.sql_service_response_time + description: SQL Service Response Time + unit: "seconds" + chart_type: line + dimensions: + - name: time + - name: oracledb.user_rollbacks + description: User Rollbacks + unit: "events/s" + chart_type: line + dimensions: + - name: rollbacks + - name: oracledb.enqueue_timeouts + description: Enqueue Timeouts + unit: "events/s" + chart_type: line + dimensions: + - name: enqueue timeouts + - name: oracledb.cache_hit_ration + description: Cache Hit Ratio + unit: "%" + chart_type: stacked + dimensions: + - name: buffer + - name: cursor + - name: library + - name: row + - name: oracledb.global_cache_blocks + description: Global Cache Blocks Events + unit: "events/s" + chart_type: area + dimensions: + - name: corrupted + - name: lost + - name: oracledb.activity + description: Activities + unit: "events/s" + chart_type: stacked + dimensions: + - name: parse count + - name: execute count + - name: user commits + - name: user rollbacks + - name: oracledb.wait_time + description: Wait Time + unit: "ms" + chart_type: stacked + dimensions: + - name: application + - name: configuration + - name: administrative + - name: concurrency + - name: commit + - name: network + - name: user I/O + - name: system I/O + - name: scheduler + - name: other + - name: oracledb.tablespace_size + description: Size + unit: "KiB" + chart_type: line + dimensions: + - name: a dimension per active tablespace + - name: oracledb.tablespace_usage + description: Usage + unit: "KiB" + chart_type: line + dimensions: + - name: a dimension per active tablespace + - name: oracledb.tablespace_usage_in_percent + description: Usage + unit: "%" + chart_type: line + dimensions: + - name: a dimension per active tablespace + - name: oracledb.allocated_size + description: Size + unit: "B" + chart_type: line + dimensions: + - name: a dimension per active tablespace + - name: oracledb.allocated_usage + description: Usage + unit: "B" + chart_type: line + dimensions: + - name: a dimension per active tablespace + - name: oracledb.allocated_usage_in_percent + description: Usage + unit: "%" + chart_type: line + dimensions: + - name: a dimension per active tablespace diff --git a/collectors/python.d.plugin/oracledb/metrics.csv b/collectors/python.d.plugin/oracledb/metrics.csv deleted file mode 100644 index 126c5c4c..00000000 --- a/collectors/python.d.plugin/oracledb/metrics.csv +++ /dev/null @@ -1,23 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -oracledb.session_count,,"total, active",sessions,Session Count,line,,python.d.plugin,oracledb -oracledb.session_limit_usage,,usage,%,Session Limit Usage,area,,python.d.plugin,oracledb -oracledb.logons,,logons,events/s,Logons,area,,python.d.plugin,oracledb -oracledb.physical_disk_read_writes,,"reads, writes",events/s,Physical Disk Reads/Writes,area,,python.d.plugin,oracledb -oracledb.sorts_on_disks,,sorts,events/s,Sorts On Disk,line,,python.d.plugin,oracledb -oracledb.full_table_scans,,full table scans,events/s,Full Table Scans,line,,python.d.plugin,oracledb -oracledb.database_wait_time_ratio,,wait time ratio,%,Database Wait Time Ratio,line,,python.d.plugin,oracledb -oracledb.shared_pool_free_memory,,free memory,%,Shared Pool Free Memory,line,,python.d.plugin,oracledb -oracledb.in_memory_sorts_ratio,,in-memory sorts,%,In-Memory Sorts Ratio,line,,python.d.plugin,oracledb -oracledb.sql_service_response_time,,time,seconds,SQL Service Response Time,line,,python.d.plugin,oracledb -oracledb.user_rollbacks,,rollbacks,events/s,User Rollbacks,line,,python.d.plugin,oracledb -oracledb.enqueue_timeouts,,enqueue timeouts,events/s,Enqueue Timeouts,line,,python.d.plugin,oracledb -oracledb.cache_hit_ration,,"buffer, cursor, library, row",%,Cache Hit Ratio,stacked,,python.d.plugin,oracledb -oracledb.global_cache_blocks,,"corrupted, lost",events/s,Global Cache Blocks Events,area,,python.d.plugin,oracledb -oracledb.activity,,"parse count, execute count, user commits, user rollbacks",events/s,Activities,stacked,,python.d.plugin,oracledb -oracledb.wait_time,,"application, configuration, administrative, concurrency, commit, network, user I/O, system I/O, scheduler, other",ms,Wait Time,stacked,,python.d.plugin,oracledb -oracledb.tablespace_size,,a dimension per active tablespace,KiB,Size,line,,python.d.plugin,oracledb -oracledb.tablespace_usage,,a dimension per active tablespace,KiB,Usage,line,,python.d.plugin,oracledb -oracledb.tablespace_usage_in_percent,,a dimension per active tablespace,%,Usage,line,,python.d.plugin,oracledb -oracledb.allocated_size,,a dimension per active tablespace,B,Size,line,,python.d.plugin,oracledb -oracledb.allocated_usage,,a dimension per active tablespace,B,Usage,line,,python.d.plugin,oracledb -oracledb.allocated_usage_in_percent,,a dimension per active tablespace,%,Usage,line,,python.d.plugin,oracledb diff --git a/collectors/python.d.plugin/pandas/metadata.yaml b/collectors/python.d.plugin/pandas/metadata.yaml new file mode 100644 index 00000000..28a1d3b2 --- /dev/null +++ b/collectors/python.d.plugin/pandas/metadata.yaml @@ -0,0 +1,310 @@ +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: pandas + monitored_instance: + name: Pandas + link: https://learn.netdata.cloud/docs/data-collection/generic-data-collection/structured-data-pandas + categories: + - data-collection.generic-data-collection + icon_filename: pandas.png + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - pandas + - python + most_popular: false + overview: + data_collection: + metrics_description: | + [Pandas](https://pandas.pydata.org/) is a de-facto standard in reading and processing most types of structured data in Python. + If you have metrics appearing in a CSV, JSON, XML, HTML, or [other supported format](https://pandas.pydata.org/docs/user_guide/io.html), + either locally or via some HTTP endpoint, you can easily ingest and present those metrics in Netdata, by leveraging the Pandas collector. + + This collector can be used to collect pretty much anything that can be read by Pandas, and then processed by Pandas. + + More detailed information can be found in the Netdata documentation [here](https://learn.netdata.cloud/docs/data-collection/generic-data-collection/structured-data-pandas). + method_description: | + The collector uses [pandas](https://pandas.pydata.org/) to pull data and do pandas-based preprocessing, before feeding to Netdata. + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Python Requirements + description: | + This collector depends on some Python (Python 3 only) packages that can usually be installed via `pip` or `pip3`. + + ```bash + sudo pip install pandas requests + ``` + + Note: If you would like to use [`pandas.read_sql`](https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html) to query a database, you will need to install the below packages as well. + + ```bash + sudo pip install 'sqlalchemy<2.0' psycopg2-binary + ``` + configuration: + file: + name: python.d/pandas.conf + description: "" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: Config options + enabled: true + list: + - name: chart_configs + description: an array of chart configuration dictionaries + default_value: "[]" + required: true + - name: chart_configs.name + description: name of the chart to be displayed in the dashboard. + default_value: None + required: true + - name: chart_configs.title + description: title of the chart to be displayed in the dashboard. + default_value: None + required: true + - name: chart_configs.family + description: "[family](https://learn.netdata.cloud/docs/data-collection/chart-dimensions-contexts-and-families#family) of the chart to be displayed in the dashboard." + default_value: None + required: true + - name: chart_configs.context + description: "[context](https://learn.netdata.cloud/docs/data-collection/chart-dimensions-contexts-and-families#context) of the chart to be displayed in the dashboard." + default_value: None + required: true + - name: chart_configs.type + description: the type of the chart to be displayed in the dashboard. + default_value: None + required: true + - name: chart_configs.units + description: the units of the chart to be displayed in the dashboard. + default_value: None + required: true + - name: chart_configs.df_steps + description: a series of pandas operations (one per line) that each returns a dataframe. + default_value: None + required: true + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + examples: + folding: + enabled: true + title: Config + list: + - name: Temperature API Example + folding: + enabled: true + description: example pulling some hourly temperature data, a chart for today forecast (mean,min,max) and another chart for current. + config: | + temperature: + name: "temperature" + update_every: 5 + chart_configs: + - name: "temperature_forecast_by_city" + title: "Temperature By City - Today Forecast" + family: "temperature.today" + context: "pandas.temperature" + type: "line" + units: "Celsius" + df_steps: > + pd.DataFrame.from_dict( + {city: requests.get(f'https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lng}&hourly=temperature_2m').json()['hourly']['temperature_2m'] + for (city,lat,lng) + in [ + ('dublin', 53.3441, -6.2675), + ('athens', 37.9792, 23.7166), + ('london', 51.5002, -0.1262), + ('berlin', 52.5235, 13.4115), + ('paris', 48.8567, 2.3510), + ('madrid', 40.4167, -3.7033), + ('new_york', 40.71, -74.01), + ('los_angeles', 34.05, -118.24), + ] + } + ); + df.describe(); # get aggregate stats for each city; + df.transpose()[['mean', 'max', 'min']].reset_index(); # just take mean, min, max; + df.rename(columns={'index':'city'}); # some column renaming; + df.pivot(columns='city').mean().to_frame().reset_index(); # force to be one row per city; + df.rename(columns={0:'degrees'}); # some column renaming; + pd.concat([df, df['city']+'_'+df['level_0']], axis=1); # add new column combining city and summary measurement label; + df.rename(columns={0:'measurement'}); # some column renaming; + df[['measurement', 'degrees']].set_index('measurement'); # just take two columns we want; + df.sort_index(); # sort by city name; + df.transpose(); # transpose so its just one wide row; + - name: "temperature_current_by_city" + title: "Temperature By City - Current" + family: "temperature.current" + context: "pandas.temperature" + type: "line" + units: "Celsius" + df_steps: > + pd.DataFrame.from_dict( + {city: requests.get(f'https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lng}¤t_weather=true').json()['current_weather'] + for (city,lat,lng) + in [ + ('dublin', 53.3441, -6.2675), + ('athens', 37.9792, 23.7166), + ('london', 51.5002, -0.1262), + ('berlin', 52.5235, 13.4115), + ('paris', 48.8567, 2.3510), + ('madrid', 40.4167, -3.7033), + ('new_york', 40.71, -74.01), + ('los_angeles', 34.05, -118.24), + ] + } + ); + df.transpose(); + df[['temperature']]; + df.transpose(); + - name: API CSV Example + folding: + enabled: true + description: example showing a read_csv from a url and some light pandas data wrangling. + config: | + example_csv: + name: "example_csv" + update_every: 2 + chart_configs: + - name: "london_system_cpu" + title: "London System CPU - Ratios" + family: "london_system_cpu" + context: "pandas" + type: "line" + units: "n" + df_steps: > + pd.read_csv('https://london.my-netdata.io/api/v1/data?chart=system.cpu&format=csv&after=-60', storage_options={'User-Agent': 'netdata'}); + df.drop('time', axis=1); + df.mean().to_frame().transpose(); + df.apply(lambda row: (row.user / row.system), axis = 1).to_frame(); + df.rename(columns={0:'average_user_system_ratio'}); + df*100; + - name: API JSON Example + folding: + enabled: true + description: example showing a read_json from a url and some light pandas data wrangling. + config: | + example_json: + name: "example_json" + update_every: 2 + chart_configs: + - name: "london_system_net" + title: "London System Net - Total Bandwidth" + family: "london_system_net" + context: "pandas" + type: "area" + units: "kilobits/s" + df_steps: > + pd.DataFrame(requests.get('https://london.my-netdata.io/api/v1/data?chart=system.net&format=json&after=-1').json()['data'], columns=requests.get('https://london.my-netdata.io/api/v1/data?chart=system.net&format=json&after=-1').json()['labels']); + df.drop('time', axis=1); + abs(df); + df.sum(axis=1).to_frame(); + df.rename(columns={0:'total_bandwidth'}); + - name: XML Example + folding: + enabled: true + description: example showing a read_xml from a url and some light pandas data wrangling. + config: | + example_xml: + name: "example_xml" + update_every: 2 + line_sep: "|" + chart_configs: + - name: "temperature_forcast" + title: "Temperature Forecast" + family: "temp" + context: "pandas.temp" + type: "line" + units: "celsius" + df_steps: > + pd.read_xml('http://metwdb-openaccess.ichec.ie/metno-wdb2ts/locationforecast?lat=54.7210798611;long=-8.7237392806', xpath='./product/time[1]/location/temperature', parser='etree')| + df.rename(columns={'value': 'dublin'})| + df[['dublin']]| + - name: SQL Example + folding: + enabled: true + description: example showing a read_sql from a postgres database using sqlalchemy. + config: | + sql: + name: "sql" + update_every: 5 + chart_configs: + - name: "sql" + title: "SQL Example" + family: "sql.example" + context: "example" + type: "line" + units: "percent" + df_steps: > + pd.read_sql_query( + sql='\ + select \ + random()*100 as metric_1, \ + random()*100 as metric_2 \ + ', + con=create_engine('postgresql://localhost/postgres?user=netdata&password=netdata') + ); + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: | + This collector is expecting one row in the final pandas DataFrame. It is that first row that will be taken + as the most recent values for each dimension on each chart using (`df.to_dict(orient='records')[0]`). + See [pd.to_dict()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_dict.html)." + availability: [] + scopes: + - name: global + description: | + These metrics refer to the entire monitored application. + labels: [] + metrics: [] diff --git a/collectors/python.d.plugin/pandas/pandas.conf b/collectors/python.d.plugin/pandas/pandas.conf index ca523ed3..74a7da3e 100644 --- a/collectors/python.d.plugin/pandas/pandas.conf +++ b/collectors/python.d.plugin/pandas/pandas.conf @@ -61,9 +61,7 @@ update_every: 5 # # Additionally to the above, example also supports the following: # -# num_lines: 4 # the number of lines to create -# lower: 0 # the lower bound of numbers to randomly sample from -# upper: 100 # the upper bound of numbers to randomly sample from +# chart_configs: [<dictionary>] # an array for chart config dictionaries. # # ---------------------------------------------------------------------- # AUTO-DETECTION JOBS diff --git a/collectors/python.d.plugin/postfix/metadata.yaml b/collectors/python.d.plugin/postfix/metadata.yaml index 3c1275ed..1bbb6116 100644 --- a/collectors/python.d.plugin/postfix/metadata.yaml +++ b/collectors/python.d.plugin/postfix/metadata.yaml @@ -1,78 +1,124 @@ -meta: - plugin_name: python.d.plugin - module_name: postfix - monitored_instance: - name: Postfix - link: '' - categories: - - data-collection.mail-servers - icon_filename: 'postfix.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Keep an eye on Postfix metrics for efficient mail server operations. Improve your mail server performance with Netdata''s real-time metrics and built-in alerts.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: postfix + monitored_instance: + name: Postfix + link: https://www.postfix.org/ + categories: + - data-collection.mail-servers + icon_filename: "postfix.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - postfix + - mail + - mail server + most_popular: false + overview: + data_collection: + metrics_description: > + Keep an eye on Postfix metrics for efficient mail server operations. + + Improve your mail server performance with Netdata's real-time metrics and built-in alerts. + method_description: > + Monitors MTA email queue statistics using [postqueue](http://www.postfix.org/postqueue.1.html) tool. + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: > + Postfix has internal access controls that limit activities on the mail queue. By default, all users are allowed to view + the queue. If your system is configured with stricter access controls, you need to grant the `netdata` user access to + view the mail queue. In order to do it, add `netdata` to `authorized_mailq_users` in the `/etc/postfix/main.cf` file. + + See the `authorized_mailq_users` setting in + the [Postfix documentation](https://www.postfix.org/postconf.5.html) for more details. + default_behavior: + auto_detection: + description: "The collector executes `postqueue -p` to get Postfix queue statistics." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: "" + description: "" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: Config options + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 1 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: postfix.qemails - description: Postfix Queue Emails - unit: "emails" - chart_type: line - dimensions: - - name: emails - - name: postfix.qsize - description: Postfix Queue Emails Size - unit: "KiB" - chart_type: area - dimensions: - - name: size + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: | + These metrics refer to the entire monitored application. + labels: [] + metrics: + - name: postfix.qemails + description: Postfix Queue Emails + unit: "emails" + chart_type: line + dimensions: + - name: emails + - name: postfix.qsize + description: Postfix Queue Emails Size + unit: "KiB" + chart_type: area + dimensions: + - name: size diff --git a/collectors/python.d.plugin/postfix/metrics.csv b/collectors/python.d.plugin/postfix/metrics.csv deleted file mode 100644 index 696f6ad3..00000000 --- a/collectors/python.d.plugin/postfix/metrics.csv +++ /dev/null @@ -1,3 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -postfix.qemails,,emails,emails,Postfix Queue Emails,line,,python.d.plugin,postfix -postfix.qsize,,size,KiB,Postfix Queue Emails Size,area,,python.d.plugin,postfix diff --git a/collectors/python.d.plugin/puppet/metadata.yaml b/collectors/python.d.plugin/puppet/metadata.yaml index fc162746..781519b6 100644 --- a/collectors/python.d.plugin/puppet/metadata.yaml +++ b/collectors/python.d.plugin/puppet/metadata.yaml @@ -1,93 +1,185 @@ -meta: - plugin_name: python.d.plugin - module_name: puppet - monitored_instance: - name: Puppet - link: '' - categories: - - data-collection.provisioning-systems - icon_filename: 'puppet.jpeg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Puppet configurations with Netdata for changes, errors, and performance metrics. Enhance configuration management and troubleshoot faster with real-time insights and built-in alerts.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: puppet + monitored_instance: + name: Puppet + link: "https://www.puppet.com/" + categories: + - data-collection.ci-cd-systems + icon_filename: "puppet.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - puppet + - jvm heap + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors Puppet metrics about JVM Heap, Non-Heap, CPU usage and file descriptors.' + method_description: | + It uses Puppet's metrics API endpoint to gather the metrics. + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: By default, this collector will use `https://fqdn.example.com:8140` as the URL to look for metrics. + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: "python.d/puppet.conf" + options: + description: | + This particular collector does not need further configuration to work if permissions are satisfied, but you can always customize it's data collection behavior. + + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + + > Notes: + > - Exact Fully Qualified Domain Name of the node should be used. + > - Usually Puppet Server/DB startup time is VERY long. So, there should be quite reasonable retry count. + > - A secured PuppetDB config may require a client certificate. This does not apply to the default PuppetDB configuration though. + folding: + title: "Config options" + enabled: true + list: + - name: url + description: HTTP or HTTPS URL, exact Fully Qualified Domain Name of the node should be used. + default_value: https://fqdn.example.com:8081 + required: true + - name: tls_verify + description: Control HTTPS server certificate verification. + default_value: "False" + required: false + - name: tls_ca_file + description: Optional CA (bundle) file to use + default_value: "" + required: false + - name: tls_cert_file + description: Optional client certificate file + default_value: "" + required: false + - name: tls_key_file + description: Optional client key file + default_value: "" + required: false + - name: update_every + description: Sets the default data collection frequency. + default_value: 30 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: > + Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + description: A basic example configuration + folding: + enabled: false + config: | + puppetserver: + url: 'https://fqdn.example.com:8140' + autodetection_retry: 1 + - name: TLS Certificate + description: An example using a TLS certificate + config: | + puppetdb: + url: 'https://fqdn.example.com:8081' + tls_cert_file: /path/to/client.crt + tls_key_file: /path/to/client.key + autodetection_retry: 1 + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + puppetserver1: + url: 'https://fqdn.example.com:8140' + autodetection_retry: 1 + + puppetserver2: + url: 'https://fqdn.example2.com:8140' + autodetection_retry: 1 + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: puppet.jvm - description: JVM Heap - unit: "MiB" - chart_type: area - dimensions: - - name: committed - - name: used - - name: puppet.jvm - description: JVM Non-Heap - unit: "MiB" - chart_type: area - dimensions: - - name: committed - - name: used - - name: puppet.cpu - description: CPU usage - unit: "percentage" - chart_type: stacked - dimensions: - - name: execution - - name: GC - - name: puppet.fdopen - description: File Descriptors - unit: "descriptors" - chart_type: line - dimensions: - - name: used + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: puppet.jvm + description: JVM Heap + unit: "MiB" + chart_type: area + dimensions: + - name: committed + - name: used + - name: puppet.jvm + description: JVM Non-Heap + unit: "MiB" + chart_type: area + dimensions: + - name: committed + - name: used + - name: puppet.cpu + description: CPU usage + unit: "percentage" + chart_type: stacked + dimensions: + - name: execution + - name: GC + - name: puppet.fdopen + description: File Descriptors + unit: "descriptors" + chart_type: line + dimensions: + - name: used diff --git a/collectors/python.d.plugin/puppet/metrics.csv b/collectors/python.d.plugin/puppet/metrics.csv deleted file mode 100644 index 1ec99e10..00000000 --- a/collectors/python.d.plugin/puppet/metrics.csv +++ /dev/null @@ -1,5 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -puppet.jvm,,"committed, used",MiB,JVM Heap,area,,python.d.plugin,puppet -puppet.jvm,,"committed, used",MiB,JVM Non-Heap,area,,python.d.plugin,puppet -puppet.cpu,,"execution, GC",percentage,CPU usage,stacked,,python.d.plugin,puppet -puppet.fdopen,,used,descriptors,File Descriptors,line,,python.d.plugin,puppet diff --git a/collectors/python.d.plugin/rethinkdbs/metadata.yaml b/collectors/python.d.plugin/rethinkdbs/metadata.yaml index 53959b89..bbc50eac 100644 --- a/collectors/python.d.plugin/rethinkdbs/metadata.yaml +++ b/collectors/python.d.plugin/rethinkdbs/metadata.yaml @@ -1,121 +1,188 @@ -meta: - plugin_name: python.d.plugin - module_name: rethinkdbs - monitored_instance: - name: RethinkDB - link: '' - categories: - - data-collection.database-servers - icon_filename: 'rethinkdb.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor RethinkDB performance for real-time database operations and performance. Improve your database operations with Netdata''s real-time metrics and built-in alerts.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: rethinkdbs + monitored_instance: + name: RethinkDB + link: 'https://rethinkdb.com/' + categories: + - data-collection.database-servers + icon_filename: 'rethinkdb.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - rethinkdb + - database + - db + most_popular: false + overview: + data_collection: + metrics_description: 'This collector monitors metrics about RethinkDB clusters and database servers.' + method_description: 'It uses the `rethinkdb` python module to connect to a RethinkDB server instance and gather statistics.' + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: 'When no configuration file is found, the collector tries to connect to 127.0.0.1:28015.' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: + - title: 'Required python module' + description: 'The collector requires the `rethinkdb` python module to be installed.' + configuration: + file: + name: python.d/rethinkdbs.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: '' + required: false + - name: host + description: Hostname or ip of the RethinkDB server. + default_value: 'localhost' + required: false + - name: port + description: Port to connect to the RethinkDB server. + default_value: '28015' + required: false + - name: user + description: The username to use to connect to the RethinkDB server. + default_value: 'admin' + required: false + - name: password + description: The password to use to connect to the RethinkDB server. + default_value: '' + required: false + - name: timeout + description: Set a connect timeout to the RethinkDB server. + default_value: '2' + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Local RethinkDB server + description: An example of a configuration for a local RethinkDB server + folding: + enabled: false + config: | + localhost: + name: 'local' + host: '127.0.0.1' + port: 28015 + user: "user" + password: "pass" + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: rethinkdb.cluster_connected_servers - description: Connected Servers - unit: "servers" - chart_type: stacked - dimensions: - - name: connected - - name: missing - - name: rethinkdb.cluster_clients_active - description: Active Clients - unit: "clients" - chart_type: line - dimensions: - - name: active - - name: rethinkdb.cluster_queries - description: Queries - unit: "queries/s" - chart_type: line - dimensions: - - name: queries - - name: rethinkdb.cluster_documents - description: Documents - unit: "documents/s" - chart_type: line - dimensions: - - name: reads - - name: writes - - name: database server - description: "" - labels: [] - metrics: - - name: rethinkdb.client_connections - description: Client Connections - unit: "connections" - chart_type: line - dimensions: - - name: connections - - name: rethinkdb.clients_active - description: Active Clients - unit: "clients" - chart_type: line - dimensions: - - name: active - - name: rethinkdb.queries - description: Queries - unit: "queries/s" - chart_type: line - dimensions: - - name: queries - - name: rethinkdb.documents - description: Documents - unit: "documents/s" - chart_type: line - dimensions: - - name: reads - - name: writes + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: rethinkdb.cluster_connected_servers + description: Connected Servers + unit: "servers" + chart_type: stacked + dimensions: + - name: connected + - name: missing + - name: rethinkdb.cluster_clients_active + description: Active Clients + unit: "clients" + chart_type: line + dimensions: + - name: active + - name: rethinkdb.cluster_queries + description: Queries + unit: "queries/s" + chart_type: line + dimensions: + - name: queries + - name: rethinkdb.cluster_documents + description: Documents + unit: "documents/s" + chart_type: line + dimensions: + - name: reads + - name: writes + - name: database server + description: "" + labels: [] + metrics: + - name: rethinkdb.client_connections + description: Client Connections + unit: "connections" + chart_type: line + dimensions: + - name: connections + - name: rethinkdb.clients_active + description: Active Clients + unit: "clients" + chart_type: line + dimensions: + - name: active + - name: rethinkdb.queries + description: Queries + unit: "queries/s" + chart_type: line + dimensions: + - name: queries + - name: rethinkdb.documents + description: Documents + unit: "documents/s" + chart_type: line + dimensions: + - name: reads + - name: writes diff --git a/collectors/python.d.plugin/rethinkdbs/metrics.csv b/collectors/python.d.plugin/rethinkdbs/metrics.csv deleted file mode 100644 index 2eb1eb7a..00000000 --- a/collectors/python.d.plugin/rethinkdbs/metrics.csv +++ /dev/null @@ -1,9 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -rethinkdb.cluster_connected_servers,,"connected, missing",servers,Connected Servers,stacked,,python.d.plugin,rethinkdbs -rethinkdb.cluster_clients_active,,active,clients,Active Clients,line,,python.d.plugin,rethinkdbs -rethinkdb.cluster_queries,,queries,queries/s,Queries,line,,python.d.plugin,rethinkdbs -rethinkdb.cluster_documents,,"reads, writes",documents/s,Documents,line,,python.d.plugin,rethinkdbs -rethinkdb.client_connections,database server,connections,connections,Client Connections,line,,python.d.plugin,rethinkdbs -rethinkdb.clients_active,database server,active,clients,Active Clients,line,,python.d.plugin,rethinkdbs -rethinkdb.queries,database server,queries,queries/s,Queries,line,,python.d.plugin,rethinkdbs -rethinkdb.documents,database server,"reads, writes",documents/s,Documents,line,,python.d.plugin,rethinkdbs diff --git a/collectors/python.d.plugin/retroshare/metadata.yaml b/collectors/python.d.plugin/retroshare/metadata.yaml index b847bb6f..0a769616 100644 --- a/collectors/python.d.plugin/retroshare/metadata.yaml +++ b/collectors/python.d.plugin/retroshare/metadata.yaml @@ -1,91 +1,144 @@ -meta: - plugin_name: python.d.plugin - module_name: retroshare - monitored_instance: - name: RetroShare - link: '' - categories: - - data-collection.p2p - icon_filename: 'retroshare.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor RetroShare metrics for efficient peer-to-peer communication and file sharing. Enhance your communication and file sharing performance with real-time insights and alerts from Netdata.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: retroshare_dht_working - link: https://github.com/netdata/netdata/blob/master/health/health.d/retroshare.conf - metric: retroshare.dht - info: number of DHT peers -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: retroshare + monitored_instance: + name: RetroShare + link: "https://retroshare.cc/" + categories: + - data-collection.media-streaming-servers + icon_filename: "retroshare.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - retroshare + - p2p + most_popular: false + overview: + data_collection: + metrics_description: "This collector monitors RetroShare statistics such as application bandwidth, peers, and DHT metrics." + method_description: "It connects to the RetroShare web interface to gather metrics." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "The collector will attempt to connect and detect a RetroShare web interface through http://localhost:9090, even without any configuration." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: "RetroShare web interface" + description: | + RetroShare needs to be configured to enable the RetroShare WEB Interface and allow access from the Netdata host. + configuration: + file: + name: python.d/retroshare.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + - name: url + description: The URL to the RetroShare Web UI. + default_value: "http://localhost:9090" + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Local RetroShare Web UI + description: A basic configuration for a RetroShare server running on localhost. + config: | + localhost: + name: 'local retroshare' + url: 'http://localhost:9090' + - name: Remote RetroShare Web UI + description: A basic configuration for a remote RetroShare server. + config: | + remote: + name: 'remote retroshare' + url: 'http://1.2.3.4:9090' + + troubleshooting: + problems: + list: [] + alerts: + - name: retroshare_dht_working + link: https://github.com/netdata/netdata/blob/master/health/health.d/retroshare.conf + metric: retroshare.dht + info: number of DHT peers metrics: - - name: retroshare.bandwidth - description: RetroShare Bandwidth - unit: "kilobits/s" - chart_type: area - dimensions: - - name: Upload - - name: Download - - name: retroshare.peers - description: RetroShare Peers - unit: "peers" - chart_type: line - dimensions: - - name: All friends - - name: Connected friends - - name: retroshare.dht - description: Retroshare DHT - unit: "peers" - chart_type: line - dimensions: - - name: DHT nodes estimated - - name: RS nodes estimated + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: retroshare.bandwidth + description: RetroShare Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: Upload + - name: Download + - name: retroshare.peers + description: RetroShare Peers + unit: "peers" + chart_type: line + dimensions: + - name: All friends + - name: Connected friends + - name: retroshare.dht + description: Retroshare DHT + unit: "peers" + chart_type: line + dimensions: + - name: DHT nodes estimated + - name: RS nodes estimated diff --git a/collectors/python.d.plugin/retroshare/metrics.csv b/collectors/python.d.plugin/retroshare/metrics.csv deleted file mode 100644 index 35a0a48c..00000000 --- a/collectors/python.d.plugin/retroshare/metrics.csv +++ /dev/null @@ -1,4 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -retroshare.bandwidth,,"Upload, Download",kilobits/s,RetroShare Bandwidth,area,,python.d.plugin,retroshare -retroshare.peers,,"All friends, Connected friends",peers,RetroShare Peers,line,,python.d.plugin,retroshare -retroshare.dht,,"DHT nodes estimated, RS nodes estimated",peers,Retroshare DHT,line,,python.d.plugin,retroshare diff --git a/collectors/python.d.plugin/riakkv/metadata.yaml b/collectors/python.d.plugin/riakkv/metadata.yaml index 795763eb..441937f8 100644 --- a/collectors/python.d.plugin/riakkv/metadata.yaml +++ b/collectors/python.d.plugin/riakkv/metadata.yaml @@ -1,300 +1,358 @@ -meta: - plugin_name: python.d.plugin - module_name: riakkv - monitored_instance: - name: RiakKV - link: '' - categories: - - data-collection.database-servers - icon_filename: 'riak.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Examine RiakKV metrics for optimal key-value database performance. Netdata provides comprehensive dashboards and anomaly detection for efficient database operations.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: riakkv_1h_kv_get_mean_latency - link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf - metric: riak.kv.latency.get - info: average time between reception of client GET request and subsequent response to client over the last hour -- name: riakkv_kv_get_slow - link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf - metric: riak.kv.latency.get - info: average time between reception of client GET request and subsequent response to the client over the last 3 minutes, compared to the average over the last hour -- name: riakkv_1h_kv_put_mean_latency - link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf - metric: riak.kv.latency.put - info: average time between reception of client PUT request and subsequent response to the client over the last hour -- name: riakkv_kv_put_slow - link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf - metric: riak.kv.latency.put - info: average time between reception of client PUT request and subsequent response to the client over the last 3 minutes, compared to the average over the last hour -- name: riakkv_vm_high_process_count - link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf - metric: riak.vm - info: number of processes running in the Erlang VM -- name: riakkv_list_keys_active - link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf - metric: riak.core.fsm_active - info: number of currently running list keys finite state machines -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: riakkv + monitored_instance: + name: RiakKV + link: "https://riak.com/products/riak-kv/index.html" + categories: + - data-collection.database-servers + icon_filename: "riak.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - database + - nosql + - big data + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors RiakKV metrics about throughput, latency, resources and more.' + method_description: "This collector reads the database stats from the `/stats` endpoint." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "If the /stats endpoint is accessible, RiakKV instances on the local host running on port 8098 will be autodetected." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Configure RiakKV to enable /stats endpoint + description: | + You can follow the RiakKV configuration reference documentation for how to enable this. + + Source : https://docs.riak.com/riak/kv/2.2.3/configuring/reference/#client-interfaces + configuration: + file: + name: "python.d/riakkv.conf" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: url + description: The url of the server + default_value: no + required: true + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic (default) + folding: + enabled: false + description: A basic example configuration per job + config: | + local: + url: 'http://localhost:8098/stats' + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + local: + url: 'http://localhost:8098/stats' + + remote: + url: 'http://192.0.2.1:8098/stats' + troubleshooting: + problems: + list: [] + alerts: + - name: riakkv_1h_kv_get_mean_latency + link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf + metric: riak.kv.latency.get + info: average time between reception of client GET request and subsequent response to client over the last hour + - name: riakkv_kv_get_slow + link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf + metric: riak.kv.latency.get + info: average time between reception of client GET request and subsequent response to the client over the last 3 minutes, compared to the average over the last hour + - name: riakkv_1h_kv_put_mean_latency + link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf + metric: riak.kv.latency.put + info: average time between reception of client PUT request and subsequent response to the client over the last hour + - name: riakkv_kv_put_slow + link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf + metric: riak.kv.latency.put + info: average time between reception of client PUT request and subsequent response to the client over the last 3 minutes, compared to the average over the last hour + - name: riakkv_vm_high_process_count + link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf + metric: riak.vm + info: number of processes running in the Erlang VM + - name: riakkv_list_keys_active + link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf + metric: riak.core.fsm_active + info: number of currently running list keys finite state machines metrics: - - name: riak.kv.throughput - description: Reads & writes coordinated by this node - unit: "operations/s" - chart_type: line - dimensions: - - name: gets - - name: puts - - name: riak.dt.vnode_updates - description: Update operations coordinated by local vnodes by data type - unit: "operations/s" - chart_type: line - dimensions: - - name: counters - - name: sets - - name: maps - - name: riak.search - description: Search queries on the node - unit: "queries/s" - chart_type: line - dimensions: - - name: queries - - name: riak.search.documents - description: Documents indexed by search - unit: "documents/s" - chart_type: line - dimensions: - - name: indexed - - name: riak.consistent.operations - description: Consistent node operations - unit: "operations/s" - chart_type: line - dimensions: - - name: gets - - name: puts - - name: riak.kv.latency.get - description: Time between reception of a client GET request and subsequent response to client - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: '95' - - name: '99' - - name: '100' - - name: riak.kv.latency.put - description: Time between reception of a client PUT request and subsequent response to client - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: '95' - - name: '99' - - name: '100' - - name: riak.dt.latency.counter_merge - description: Time it takes to perform an Update Counter operation - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: '95' - - name: '99' - - name: '100' - - name: riak.dt.latency.set_merge - description: Time it takes to perform an Update Set operation - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: '95' - - name: '99' - - name: '100' - - name: riak.dt.latency.map_merge - description: Time it takes to perform an Update Map operation - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: '95' - - name: '99' - - name: '100' - - name: riak.search.latency.query - description: Search query latency - unit: "ms" - chart_type: line - dimensions: - - name: median - - name: min - - name: '95' - - name: '99' - - name: '999' - - name: max - - name: riak.search.latency.index - description: Time it takes Search to index a new document - unit: "ms" - chart_type: line - dimensions: - - name: median - - name: min - - name: '95' - - name: '99' - - name: '999' - - name: max - - name: riak.consistent.latency.get - description: Strongly consistent read latency - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: '95' - - name: '99' - - name: '100' - - name: riak.consistent.latency.put - description: Strongly consistent write latency - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: '95' - - name: '99' - - name: '100' - - name: riak.vm - description: Total processes running in the Erlang VM - unit: "total" - chart_type: line - dimensions: - - name: processes - - name: riak.vm.memory.processes - description: Memory allocated & used by Erlang processes - unit: "MB" - chart_type: line - dimensions: - - name: allocated - - name: used - - name: riak.kv.siblings_encountered.get - description: Number of siblings encountered during GET operations by this node during the past minute - unit: "siblings" - chart_type: line - dimensions: - - name: mean - - name: median - - name: '95' - - name: '99' - - name: '100' - - name: riak.kv.objsize.get - description: Object size encountered by this node during the past minute - unit: "KB" - chart_type: line - dimensions: - - name: mean - - name: median - - name: '95' - - name: '99' - - name: '100' - - name: riak.search.vnodeq_size - description: Number of unprocessed messages in the vnode message queues of Search on this node in the past minute - unit: "messages" - chart_type: line - dimensions: - - name: mean - - name: median - - name: '95' - - name: '99' - - name: '100' - - name: riak.search.index - description: Number of document index errors encountered by Search - unit: "errors" - chart_type: line - dimensions: - - name: errors - - name: riak.core.protobuf_connections - description: Protocol buffer connections by status - unit: "connections" - chart_type: line - dimensions: - - name: active - - name: riak.core.repairs - description: Number of repair operations this node has coordinated - unit: "repairs" - chart_type: line - dimensions: - - name: read - - name: riak.core.fsm_active - description: Active finite state machines by kind - unit: "fsms" - chart_type: line - dimensions: - - name: get - - name: put - - name: secondary index - - name: list keys - - name: riak.core.fsm_rejected - description: Finite state machines being rejected by Sidejobs overload protection - unit: "fsms" - chart_type: line - dimensions: - - name: get - - name: put - - name: riak.search.index - description: Number of writes to Search failed due to bad data format by reason - unit: "writes" - chart_type: line - dimensions: - - name: bad_entry - - name: extract_fail + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: riak.kv.throughput + description: Reads & writes coordinated by this node + unit: "operations/s" + chart_type: line + dimensions: + - name: gets + - name: puts + - name: riak.dt.vnode_updates + description: Update operations coordinated by local vnodes by data type + unit: "operations/s" + chart_type: line + dimensions: + - name: counters + - name: sets + - name: maps + - name: riak.search + description: Search queries on the node + unit: "queries/s" + chart_type: line + dimensions: + - name: queries + - name: riak.search.documents + description: Documents indexed by search + unit: "documents/s" + chart_type: line + dimensions: + - name: indexed + - name: riak.consistent.operations + description: Consistent node operations + unit: "operations/s" + chart_type: line + dimensions: + - name: gets + - name: puts + - name: riak.kv.latency.get + description: Time between reception of a client GET request and subsequent response to client + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: "95" + - name: "99" + - name: "100" + - name: riak.kv.latency.put + description: Time between reception of a client PUT request and subsequent response to client + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: "95" + - name: "99" + - name: "100" + - name: riak.dt.latency.counter_merge + description: Time it takes to perform an Update Counter operation + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: "95" + - name: "99" + - name: "100" + - name: riak.dt.latency.set_merge + description: Time it takes to perform an Update Set operation + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: "95" + - name: "99" + - name: "100" + - name: riak.dt.latency.map_merge + description: Time it takes to perform an Update Map operation + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: "95" + - name: "99" + - name: "100" + - name: riak.search.latency.query + description: Search query latency + unit: "ms" + chart_type: line + dimensions: + - name: median + - name: min + - name: "95" + - name: "99" + - name: "999" + - name: max + - name: riak.search.latency.index + description: Time it takes Search to index a new document + unit: "ms" + chart_type: line + dimensions: + - name: median + - name: min + - name: "95" + - name: "99" + - name: "999" + - name: max + - name: riak.consistent.latency.get + description: Strongly consistent read latency + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: "95" + - name: "99" + - name: "100" + - name: riak.consistent.latency.put + description: Strongly consistent write latency + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: "95" + - name: "99" + - name: "100" + - name: riak.vm + description: Total processes running in the Erlang VM + unit: "total" + chart_type: line + dimensions: + - name: processes + - name: riak.vm.memory.processes + description: Memory allocated & used by Erlang processes + unit: "MB" + chart_type: line + dimensions: + - name: allocated + - name: used + - name: riak.kv.siblings_encountered.get + description: Number of siblings encountered during GET operations by this node during the past minute + unit: "siblings" + chart_type: line + dimensions: + - name: mean + - name: median + - name: "95" + - name: "99" + - name: "100" + - name: riak.kv.objsize.get + description: Object size encountered by this node during the past minute + unit: "KB" + chart_type: line + dimensions: + - name: mean + - name: median + - name: "95" + - name: "99" + - name: "100" + - name: riak.search.vnodeq_size + description: Number of unprocessed messages in the vnode message queues of Search on this node in the past minute + unit: "messages" + chart_type: line + dimensions: + - name: mean + - name: median + - name: "95" + - name: "99" + - name: "100" + - name: riak.search.index + description: Number of document index errors encountered by Search + unit: "errors" + chart_type: line + dimensions: + - name: errors + - name: riak.core.protobuf_connections + description: Protocol buffer connections by status + unit: "connections" + chart_type: line + dimensions: + - name: active + - name: riak.core.repairs + description: Number of repair operations this node has coordinated + unit: "repairs" + chart_type: line + dimensions: + - name: read + - name: riak.core.fsm_active + description: Active finite state machines by kind + unit: "fsms" + chart_type: line + dimensions: + - name: get + - name: put + - name: secondary index + - name: list keys + - name: riak.core.fsm_rejected + description: Finite state machines being rejected by Sidejobs overload protection + unit: "fsms" + chart_type: line + dimensions: + - name: get + - name: put + - name: riak.search.index + description: Number of writes to Search failed due to bad data format by reason + unit: "writes" + chart_type: line + dimensions: + - name: bad_entry + - name: extract_fail diff --git a/collectors/python.d.plugin/riakkv/metrics.csv b/collectors/python.d.plugin/riakkv/metrics.csv deleted file mode 100644 index fbac7603..00000000 --- a/collectors/python.d.plugin/riakkv/metrics.csv +++ /dev/null @@ -1,26 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -riak.kv.throughput,,"gets, puts",operations/s,Reads & writes coordinated by this node,line,,python.d.plugin,riakkv -riak.dt.vnode_updates,,"counters, sets, maps",operations/s,Update operations coordinated by local vnodes by data type,line,,python.d.plugin,riakkv -riak.search,,queries,queries/s,Search queries on the node,line,,python.d.plugin,riakkv -riak.search.documents,,indexed,documents/s,Documents indexed by search,line,,python.d.plugin,riakkv -riak.consistent.operations,,"gets, puts",operations/s,Consistent node operations,line,,python.d.plugin,riakkv -riak.kv.latency.get,,"mean, median, 95, 99, 100",ms,Time between reception of a client GET request and subsequent response to client,line,,python.d.plugin,riakkv -riak.kv.latency.put,,"mean, median, 95, 99, 100",ms,Time between reception of a client PUT request and subsequent response to client,line,,python.d.plugin,riakkv -riak.dt.latency.counter_merge,,"mean, median, 95, 99, 100",ms,Time it takes to perform an Update Counter operation,line,,python.d.plugin,riakkv -riak.dt.latency.set_merge,,"mean, median, 95, 99, 100",ms,Time it takes to perform an Update Set operation,line,,python.d.plugin,riakkv -riak.dt.latency.map_merge,,"mean, median, 95, 99, 100",ms,Time it takes to perform an Update Map operation,line,,python.d.plugin,riakkv -riak.search.latency.query,,"median, min, 95, 99, 999, max",ms,Search query latency,line,,python.d.plugin,riakkv -riak.search.latency.index,,"median, min, 95, 99, 999, max",ms,Time it takes Search to index a new document,line,,python.d.plugin,riakkv -riak.consistent.latency.get,,"mean, median, 95, 99, 100",ms,Strongly consistent read latency,line,,python.d.plugin,riakkv -riak.consistent.latency.put,,"mean, median, 95, 99, 100",ms,Strongly consistent write latency,line,,python.d.plugin,riakkv -riak.vm,,processes,total,Total processes running in the Erlang VM,line,,python.d.plugin,riakkv -riak.vm.memory.processes,,"allocated, used",MB,Memory allocated & used by Erlang processes,line,,python.d.plugin,riakkv -riak.kv.siblings_encountered.get,,"mean, median, 95, 99, 100",siblings,Number of siblings encountered during GET operations by this node during the past minute,line,,python.d.plugin,riakkv -riak.kv.objsize.get,,"mean, median, 95, 99, 100",KB,Object size encountered by this node during the past minute,line,,python.d.plugin,riakkv -riak.search.vnodeq_size,,"mean, median, 95, 99, 100",messages,Number of unprocessed messages in the vnode message queues of Search on this node in the past minute,line,,python.d.plugin,riakkv -riak.search.index,,errors,errors,Number of document index errors encountered by Search,line,,python.d.plugin,riakkv -riak.core.protobuf_connections,,active,connections,Protocol buffer connections by status,line,,python.d.plugin,riakkv -riak.core.repairs,,read,repairs,Number of repair operations this node has coordinated,line,,python.d.plugin,riakkv -riak.core.fsm_active,,"get, put, secondary index, list keys",fsms,Active finite state machines by kind,line,,python.d.plugin,riakkv -riak.core.fsm_rejected,,"get, put",fsms,Finite state machines being rejected by Sidejobs overload protection,line,,python.d.plugin,riakkv -riak.search.index,,"bad_entry, extract_fail",writes,Number of writes to Search failed due to bad data format by reason,line,,python.d.plugin,riakkv diff --git a/collectors/python.d.plugin/samba/metadata.yaml b/collectors/python.d.plugin/samba/metadata.yaml index d9231a1f..43bca208 100644 --- a/collectors/python.d.plugin/samba/metadata.yaml +++ b/collectors/python.d.plugin/samba/metadata.yaml @@ -1,123 +1,196 @@ -meta: - plugin_name: python.d.plugin - module_name: samba - monitored_instance: - name: Samba - link: '' - categories: - - data-collection.storage-mount-points-and-filesystems - icon_filename: 'samba.jpg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Samba performance for optimal network file sharing operations. Netdata provides real-time insights and alerts for efficient file sharing.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: samba + monitored_instance: + name: Samba + link: https://www.samba.org/samba/ + categories: + - data-collection.storage-mount-points-and-filesystems + icon_filename: "samba.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - samba + - file sharing + most_popular: false + overview: + data_collection: + metrics_description: "This collector monitors the performance metrics of Samba file sharing." + method_description: | + It is using the `smbstatus` command-line tool. + + Executed commands: + + - `sudo -n smbstatus -P` + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: | + `smbstatus` is used, which can only be executed by `root`. It uses `sudo` and assumes that it is configured such that the `netdata` user can execute `smbstatus` as root without a password. + default_behavior: + auto_detection: + description: "After all the permissions are satisfied, the `smbstatus -P` binary is executed." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Permissions and programs + description: | + To run the collector you need: + + - `smbstatus` program + - `sudo` program + - `smbd` must be compiled with profiling enabled + - `smbd` must be started either with the `-P 1` option or inside `smb.conf` using `smbd profiling level` + + The module uses `smbstatus`, which can only be executed by `root`. It uses `sudo` and assumes that it is configured such that the `netdata` user can execute `smbstatus` as root without a password. + + - add to your `/etc/sudoers` file: + + `which smbstatus` shows the full path to the binary. + + ```bash + netdata ALL=(root) NOPASSWD: /path/to/smbstatus + ``` + + - Reset Netdata's systemd unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux distributions with systemd) + + The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `smbstatus` using `sudo`. + + + As the `root` user, do the following: + + ```cmd + mkdir /etc/systemd/system/netdata.service.d + echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf + systemctl daemon-reload + systemctl restart netdata.service + ``` + configuration: + file: + name: python.d/samba.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + description: A basic example configuration. + config: | + my_job_name: + name: my_name + update_every: 1 + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: syscall.rw - description: R/Ws - unit: "KiB/s" - chart_type: area - dimensions: - - name: sendfile - - name: recvfile - - name: smb2.rw - description: R/Ws - unit: "KiB/s" - chart_type: area - dimensions: - - name: readout - - name: writein - - name: readin - - name: writeout - - name: smb2.create_close - description: Create/Close - unit: "operations/s" - chart_type: line - dimensions: - - name: create - - name: close - - name: smb2.get_set_info - description: Info - unit: "operations/s" - chart_type: line - dimensions: - - name: getinfo - - name: setinfo - - name: smb2.find - description: Find - unit: "operations/s" - chart_type: line - dimensions: - - name: find - - name: smb2.notify - description: Notify - unit: "operations/s" - chart_type: line - dimensions: - - name: notify - - name: smb2.sm_counters - description: Lesser Ops - unit: "count" - chart_type: stacked - dimensions: - - name: tcon - - name: negprot - - name: tdis - - name: cancel - - name: logoff - - name: flush - - name: lock - - name: keepalive - - name: break - - name: sessetup + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: syscall.rw + description: R/Ws + unit: "KiB/s" + chart_type: area + dimensions: + - name: sendfile + - name: recvfile + - name: smb2.rw + description: R/Ws + unit: "KiB/s" + chart_type: area + dimensions: + - name: readout + - name: writein + - name: readin + - name: writeout + - name: smb2.create_close + description: Create/Close + unit: "operations/s" + chart_type: line + dimensions: + - name: create + - name: close + - name: smb2.get_set_info + description: Info + unit: "operations/s" + chart_type: line + dimensions: + - name: getinfo + - name: setinfo + - name: smb2.find + description: Find + unit: "operations/s" + chart_type: line + dimensions: + - name: find + - name: smb2.notify + description: Notify + unit: "operations/s" + chart_type: line + dimensions: + - name: notify + - name: smb2.sm_counters + description: Lesser Ops + unit: "count" + chart_type: stacked + dimensions: + - name: tcon + - name: negprot + - name: tdis + - name: cancel + - name: logoff + - name: flush + - name: lock + - name: keepalive + - name: break + - name: sessetup diff --git a/collectors/python.d.plugin/samba/metrics.csv b/collectors/python.d.plugin/samba/metrics.csv deleted file mode 100644 index 600181f6..00000000 --- a/collectors/python.d.plugin/samba/metrics.csv +++ /dev/null @@ -1,8 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -syscall.rw,,"sendfile, recvfile",KiB/s,R/Ws,area,,python.d.plugin,samba -smb2.rw,,"readout, writein, readin, writeout",KiB/s,R/Ws,area,,python.d.plugin,samba -smb2.create_close,,"create, close",operations/s,Create/Close,line,,python.d.plugin,samba -smb2.get_set_info,,"getinfo, setinfo",operations/s,Info,line,,python.d.plugin,samba -smb2.find,,find,operations/s,Find,line,,python.d.plugin,samba -smb2.notify,,notify,operations/s,Notify,line,,python.d.plugin,samba -smb2.sm_counters,,"tcon, negprot, tdis, cancel, logoff, flush, lock, keepalive, break, sessetup",count,Lesser Ops,stacked,,python.d.plugin,samba diff --git a/collectors/python.d.plugin/sensors/metadata.yaml b/collectors/python.d.plugin/sensors/metadata.yaml index 1c01554a..c3f68191 100644 --- a/collectors/python.d.plugin/sensors/metadata.yaml +++ b/collectors/python.d.plugin/sensors/metadata.yaml @@ -1,108 +1,175 @@ -meta: - plugin_name: python.d.plugin - module_name: sensors - monitored_instance: - name: Linux Sensors (lm-sensors) - link: '' - categories: - - data-collection.hardware-devices-and-sensors - icon_filename: 'microchip.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Examine Linux Sensors metrics with Netdata for insights into hardware health and performance. Enhance your system''s reliability with real-time hardware health insights.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: chip - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: sensors + monitored_instance: + name: Linux Sensors (lm-sensors) + link: https://hwmon.wiki.kernel.org/lm_sensors + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: "microchip.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - sensors + - temperature + - voltage + - current + - power + - fan + - energy + - humidity + most_popular: false + overview: + data_collection: + metrics_description: | + Examine Linux Sensors metrics with Netdata for insights into hardware health and performance. + + Enhance your system's reliability with real-time hardware health insights. + method_description: > + Reads system sensors information (temperature, voltage, electric current, power, etc.) via [lm-sensors](https://hwmon.wiki.kernel.org/lm_sensors). + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: > + The following type of sensors are auto-detected: + + - temperature + - fan + - voltage + - current + - power + - energy + - humidity + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: python.d/sensors.conf + description: "" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: Config options + enabled: true + list: + - name: types + description: The types of sensors to collect. + default_value: "temperature, fan, voltage, current, power, energy, humidity" + required: true + - name: update_every + description: Sets the default data collection frequency. + default_value: 1 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + examples: + folding: + enabled: true + title: Config + list: + - name: Default + folding: + enabled: false + description: Default configuration. + config: | + types: + - temperature + - fan + - voltage + - current + - power + - energy + - humidity + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: sensors.temperature - description: Temperature - unit: "Celsius" - chart_type: line - dimensions: - - name: a dimension per sensor - - name: sensors.voltage - description: Voltage - unit: "Volts" - chart_type: line - dimensions: - - name: a dimension per sensor - - name: sensors.current - description: Current - unit: "Ampere" - chart_type: line - dimensions: - - name: a dimension per sensor - - name: sensors.power - description: Power - unit: "Watt" - chart_type: line - dimensions: - - name: a dimension per sensor - - name: sensors.fan - description: Fans speed - unit: "Rotations/min" - chart_type: line - dimensions: - - name: a dimension per sensor - - name: sensors.energy - description: Energy - unit: "Joule" - chart_type: line - dimensions: - - name: a dimension per sensor - - name: sensors.humidity - description: Humidity - unit: "Percent" - chart_type: line - dimensions: - - name: a dimension per sensor + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: chip + description: > + Metrics related to chips. Each chip provides a set of the following metrics, each having the chip name in the metric name as reported by `sensors -u`. + labels: [] + metrics: + - name: sensors.temperature + description: Temperature + unit: "Celsius" + chart_type: line + dimensions: + - name: a dimension per sensor + - name: sensors.voltage + description: Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: a dimension per sensor + - name: sensors.current + description: Current + unit: "Ampere" + chart_type: line + dimensions: + - name: a dimension per sensor + - name: sensors.power + description: Power + unit: "Watt" + chart_type: line + dimensions: + - name: a dimension per sensor + - name: sensors.fan + description: Fans speed + unit: "Rotations/min" + chart_type: line + dimensions: + - name: a dimension per sensor + - name: sensors.energy + description: Energy + unit: "Joule" + chart_type: line + dimensions: + - name: a dimension per sensor + - name: sensors.humidity + description: Humidity + unit: "Percent" + chart_type: line + dimensions: + - name: a dimension per sensor diff --git a/collectors/python.d.plugin/sensors/metrics.csv b/collectors/python.d.plugin/sensors/metrics.csv deleted file mode 100644 index d49e1938..00000000 --- a/collectors/python.d.plugin/sensors/metrics.csv +++ /dev/null @@ -1,8 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -sensors.temperature,chip,a dimension per sensor,Celsius,Temperature,line,,python.d.plugin,sensors -sensors.voltage,chip,a dimension per sensor,Volts,Voltage,line,,python.d.plugin,sensors -sensors.current,chip,a dimension per sensor,Ampere,Current,line,,python.d.plugin,sensors -sensors.power,chip,a dimension per sensor,Watt,Power,line,,python.d.plugin,sensors -sensors.fan,chip,a dimension per sensor,Rotations/min,Fans speed,line,,python.d.plugin,sensors -sensors.energy,chip,a dimension per sensor,Joule,Energy,line,,python.d.plugin,sensors -sensors.humidity,chip,a dimension per sensor,Percent,Humidity,line,,python.d.plugin,sensors diff --git a/collectors/python.d.plugin/smartd_log/metadata.yaml b/collectors/python.d.plugin/smartd_log/metadata.yaml index 334fb90c..d1194969 100644 --- a/collectors/python.d.plugin/smartd_log/metadata.yaml +++ b/collectors/python.d.plugin/smartd_log/metadata.yaml @@ -1,276 +1,429 @@ -meta: - plugin_name: python.d.plugin - module_name: smartd_log - monitored_instance: - name: S.M.A.R.T. - link: '' - categories: - - data-collection.hardware-devices-and-sensors - icon_filename: 'smart.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor S.M.A.R.T. metrics for insights into your hard drive health and performance. Enhance your hard drive performance and reliability with real-time insights and alerts from Netdata.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: smartd_log + monitored_instance: + name: S.M.A.R.T. + link: "https://linux.die.net/man/8/smartd" + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: "smart.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - smart + - S.M.A.R.T. + - SCSI devices + - ATA devices + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors HDD/SSD S.M.A.R.T. metrics about drive health and performance. + method_description: | + It reads `smartd` log files to collect the metrics. + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: Upon satisfying the prerequisites, the collector will auto-detect metrics if written in either `/var/log/smartd/` or `/var/lib/smartmontools/`. + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Configure `smartd` to write attribute information to files. + description: | + `smartd` must be running with `-A` option to write `smartd` attribute information to files. + + For this you need to set `smartd_opts` (or `SMARTD_ARGS`, check _smartd.service_ content) in `/etc/default/smartmontools`: + + ``` + # dump smartd attrs info every 600 seconds + smartd_opts="-A /var/log/smartd/ -i 600" + ``` + + You may need to create the smartd directory before smartd will write to it: + + ```sh + mkdir -p /var/log/smartd + ``` + + Otherwise, all the smartd `.csv` files may get written to `/var/lib/smartmontools` (default location). See also <https://linux.die.net/man/8/smartd> for more info on the `-A --attributelog=PREFIX` command. + + `smartd` appends logs at every run. It's strongly recommended to use `logrotate` for smartd files. + configuration: + file: + name: "python.d/smartd_log.conf" + options: + description: | + This particular collector does not need further configuration to work if permissions are satisfied, but you can always customize it's data collection behavior. + + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: log_path + description: path to smartd log files. + default_value: /var/log/smartd + required: true + - name: exclude_disks + description: Space-separated patterns. If the pattern is in the drive name, the module will not collect data for it. + default_value: "" + required: false + - name: age + description: Time in minutes since the last dump to file. + default_value: 30 + required: false + - name: update_every + description: Sets the default data collection frequency. + default_value: 1 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: > + Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + description: A basic configuration example. + folding: + enabled: false + config: | + custom: + name: smartd_log + log_path: '/var/log/smartd/' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: smartd_log.read_error_rate - description: Read Error Rate - unit: "value" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.seek_error_rate - description: Seek Error Rate - unit: "value" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.soft_read_error_rate - description: Soft Read Error Rate - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.write_error_rate - description: Write Error Rate - unit: "value" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.read_total_err_corrected - description: Read Error Corrected - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.read_total_unc_errors - description: Read Error Uncorrected - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.write_total_err_corrected - description: Write Error Corrected - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.write_total_unc_errors - description: Write Error Uncorrected - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.verify_total_err_corrected - description: Verify Error Corrected - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.verify_total_unc_errors - description: Verify Error Uncorrected - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.sata_interface_downshift - description: SATA Interface Downshift - unit: "events" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.udma_crc_error_count - description: UDMA CRC Error Count - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.throughput_performance - description: Throughput Performance - unit: "value" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.seek_time_performance - description: Seek Time Performance - unit: "value" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.start_stop_count - description: Start/Stop Count - unit: "events" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.power_on_hours_count - description: Power-On Hours Count - unit: "hours" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.power_cycle_count - description: Power Cycle Count - unit: "events" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.unexpected_power_loss - description: Unexpected Power Loss - unit: "events" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.spin_up_time - description: Spin-Up Time - unit: "ms" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.spin_up_retries - description: Spin-up Retries - unit: "retries" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.calibration_retries - description: Calibration Retries - unit: "retries" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.airflow_temperature_celsius - description: Airflow Temperature Celsius - unit: "celsius" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.temperature_celsius - description: Temperature - unit: "celsius" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.reallocated_sectors_count - description: Reallocated Sectors Count - unit: "sectors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.reserved_block_count - description: Reserved Block Count - unit: "percentage" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.program_fail_count - description: Program Fail Count - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.erase_fail_count - description: Erase Fail Count - unit: "failures" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.wear_leveller_worst_case_erase_count - description: Wear Leveller Worst Case Erase Count - unit: "erases" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.unused_reserved_nand_blocks - description: Unused Reserved NAND Blocks - unit: "blocks" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.reallocation_event_count - description: Reallocation Event Count - unit: "events" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.current_pending_sector_count - description: Current Pending Sector Count - unit: "sectors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.offline_uncorrectable_sector_count - description: Offline Uncorrectable Sector Count - unit: "sectors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.percent_lifetime_used - description: Percent Lifetime Used - unit: "percentage" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.media_wearout_indicator - description: Media Wearout Indicator - unit: "percentage" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.nand_writes_1gib - description: NAND Writes - unit: "GiB" - chart_type: line - dimensions: - - name: a dimension per device + folding: + title: Metrics + enabled: false + description: "The metrics listed below are split in terms of availability on device type, SCSI or ATA." + availability: + - "SCSI" + - "ATA" + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: smartd_log.read_error_rate + description: Read Error Rate + availability: + - ATA + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.seek_error_rate + description: Seek Error Rate + availability: + - ATA + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.soft_read_error_rate + description: Soft Read Error Rate + availability: + - ATA + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.write_error_rate + description: Write Error Rate + availability: + - ATA + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.read_total_err_corrected + description: Read Error Corrected + availability: + - SCSI + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.read_total_unc_errors + description: Read Error Uncorrected + availability: + - SCSI + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.write_total_err_corrected + description: Write Error Corrected + availability: + - SCSI + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.write_total_unc_errors + description: Write Error Uncorrected + availability: + - SCSI + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.verify_total_err_corrected + description: Verify Error Corrected + availability: + - SCSI + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.verify_total_unc_errors + description: Verify Error Uncorrected + availability: + - SCSI + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.sata_interface_downshift + description: SATA Interface Downshift + availability: + - ATA + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.udma_crc_error_count + description: UDMA CRC Error Count + availability: + - ATA + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.throughput_performance + description: Throughput Performance + availability: + - ATA + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.seek_time_performance + description: Seek Time Performance + availability: + - ATA + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.start_stop_count + description: Start/Stop Count + availability: + - ATA + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.power_on_hours_count + description: Power-On Hours Count + availability: + - ATA + unit: "hours" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.power_cycle_count + description: Power Cycle Count + availability: + - ATA + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.unexpected_power_loss + description: Unexpected Power Loss + availability: + - ATA + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.spin_up_time + description: Spin-Up Time + availability: + - ATA + unit: "ms" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.spin_up_retries + description: Spin-up Retries + availability: + - ATA + unit: "retries" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.calibration_retries + description: Calibration Retries + availability: + - ATA + unit: "retries" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.airflow_temperature_celsius + description: Airflow Temperature Celsius + availability: + - ATA + unit: "celsius" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.temperature_celsius + description: Temperature + availability: + - SCSI + - ATA + unit: "celsius" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.reallocated_sectors_count + description: Reallocated Sectors Count + availability: + - ATA + unit: "sectors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.reserved_block_count + description: Reserved Block Count + availability: + - ATA + unit: "percentage" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.program_fail_count + description: Program Fail Count + availability: + - ATA + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.erase_fail_count + description: Erase Fail Count + availability: + - ATA + unit: "failures" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.wear_leveller_worst_case_erase_count + description: Wear Leveller Worst Case Erase Count + availability: + - ATA + unit: "erases" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.unused_reserved_nand_blocks + description: Unused Reserved NAND Blocks + availability: + - ATA + unit: "blocks" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.reallocation_event_count + description: Reallocation Event Count + availability: + - ATA + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.current_pending_sector_count + description: Current Pending Sector Count + availability: + - ATA + unit: "sectors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.offline_uncorrectable_sector_count + description: Offline Uncorrectable Sector Count + availability: + - ATA + unit: "sectors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.percent_lifetime_used + description: Percent Lifetime Used + availability: + - ATA + unit: "percentage" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.media_wearout_indicator + description: Media Wearout Indicator + availability: + - ATA + unit: "percentage" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.nand_writes_1gib + description: NAND Writes + availability: + - ATA + unit: "GiB" + chart_type: line + dimensions: + - name: a dimension per device diff --git a/collectors/python.d.plugin/smartd_log/metrics.csv b/collectors/python.d.plugin/smartd_log/metrics.csv deleted file mode 100644 index 7dcc703c..00000000 --- a/collectors/python.d.plugin/smartd_log/metrics.csv +++ /dev/null @@ -1,36 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -smartd_log.read_error_rate,,a dimension per device,value,Read Error Rate,line,,python.d.plugin,smartd_log -smartd_log.seek_error_rate,,a dimension per device,value,Seek Error Rate,line,,python.d.plugin,smartd_log -smartd_log.soft_read_error_rate,,a dimension per device,errors,Soft Read Error Rate,line,,python.d.plugin,smartd_log -smartd_log.write_error_rate,,a dimension per device,value,Write Error Rate,line,,python.d.plugin,smartd_log -smartd_log.read_total_err_corrected,,a dimension per device,errors,Read Error Corrected,line,,python.d.plugin,smartd_log -smartd_log.read_total_unc_errors,,a dimension per device,errors,Read Error Uncorrected,line,,python.d.plugin,smartd_log -smartd_log.write_total_err_corrected,,a dimension per device,errors,Write Error Corrected,line,,python.d.plugin,smartd_log -smartd_log.write_total_unc_errors,,a dimension per device,errors,Write Error Uncorrected,line,,python.d.plugin,smartd_log -smartd_log.verify_total_err_corrected,,a dimension per device,errors,Verify Error Corrected,line,,python.d.plugin,smartd_log -smartd_log.verify_total_unc_errors,,a dimension per device,errors,Verify Error Uncorrected,line,,python.d.plugin,smartd_log -smartd_log.sata_interface_downshift,,a dimension per device,events,SATA Interface Downshift,line,,python.d.plugin,smartd_log -smartd_log.udma_crc_error_count,,a dimension per device,errors,UDMA CRC Error Count,line,,python.d.plugin,smartd_log -smartd_log.throughput_performance,,a dimension per device,value,Throughput Performance,line,,python.d.plugin,smartd_log -smartd_log.seek_time_performance,,a dimension per device,value,Seek Time Performance,line,,python.d.plugin,smartd_log -smartd_log.start_stop_count,,a dimension per device,events,Start/Stop Count,line,,python.d.plugin,smartd_log -smartd_log.power_on_hours_count,,a dimension per device,hours,Power-On Hours Count,line,,python.d.plugin,smartd_log -smartd_log.power_cycle_count,,a dimension per device,events,Power Cycle Count,line,,python.d.plugin,smartd_log -smartd_log.unexpected_power_loss,,a dimension per device,events,Unexpected Power Loss,line,,python.d.plugin,smartd_log -smartd_log.spin_up_time,,a dimension per device,ms,Spin-Up Time,line,,python.d.plugin,smartd_log -smartd_log.spin_up_retries,,a dimension per device,retries,Spin-up Retries,line,,python.d.plugin,smartd_log -smartd_log.calibration_retries,,a dimension per device,retries,Calibration Retries,line,,python.d.plugin,smartd_log -smartd_log.airflow_temperature_celsius,,a dimension per device,celsius,Airflow Temperature Celsius,line,,python.d.plugin,smartd_log -smartd_log.temperature_celsius,,"a dimension per device",celsius,Temperature,line,,python.d.plugin,smartd_log -smartd_log.reallocated_sectors_count,,a dimension per device,sectors,Reallocated Sectors Count,line,,python.d.plugin,smartd_log -smartd_log.reserved_block_count,,a dimension per device,percentage,Reserved Block Count,line,,python.d.plugin,smartd_log -smartd_log.program_fail_count,,a dimension per device,errors,Program Fail Count,line,,python.d.plugin,smartd_log -smartd_log.erase_fail_count,,a dimension per device,failures,Erase Fail Count,line,,python.d.plugin,smartd_log -smartd_log.wear_leveller_worst_case_erase_count,,a dimension per device,erases,Wear Leveller Worst Case Erase Count,line,,python.d.plugin,smartd_log -smartd_log.unused_reserved_nand_blocks,,a dimension per device,blocks,Unused Reserved NAND Blocks,line,,python.d.plugin,smartd_log -smartd_log.reallocation_event_count,,a dimension per device,events,Reallocation Event Count,line,,python.d.plugin,smartd_log -smartd_log.current_pending_sector_count,,a dimension per device,sectors,Current Pending Sector Count,line,,python.d.plugin,smartd_log -smartd_log.offline_uncorrectable_sector_count,,a dimension per device,sectors,Offline Uncorrectable Sector Count,line,,python.d.plugin,smartd_log -smartd_log.percent_lifetime_used,,a dimension per device,percentage,Percent Lifetime Used,line,,python.d.plugin,smartd_log -smartd_log.media_wearout_indicator,,a dimension per device,percentage,Media Wearout Indicator,line,,python.d.plugin,smartd_log -smartd_log.nand_writes_1gib,,a dimension per device,GiB,NAND Writes,line,,python.d.plugin,smartd_log diff --git a/collectors/python.d.plugin/spigotmc/metadata.yaml b/collectors/python.d.plugin/spigotmc/metadata.yaml index 5446bb30..5dea9f0c 100644 --- a/collectors/python.d.plugin/spigotmc/metadata.yaml +++ b/collectors/python.d.plugin/spigotmc/metadata.yaml @@ -1,88 +1,176 @@ -meta: - plugin_name: python.d.plugin - module_name: spigotmc - monitored_instance: - name: SpigotMC - link: '' - categories: - - data-collection.gaming - icon_filename: 'spigot.jfif' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor SpigotMC performance with Netdata for optimal Minecraft server operations. Improve your gaming experience with real-time server performance insights.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: spigotmc + monitored_instance: + name: SpigotMC + link: "" + categories: + - data-collection.gaming + icon_filename: "spigot.jfif" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - minecraft server + - spigotmc server + - spigot + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors SpigotMC server performance, in the form of ticks per second average, memory utilization, and active users. + method_description: | + It sends the `tps`, `list` and `online` commands to the Server, and gathers the metrics from the responses. + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: By default, this collector will attempt to connect to a Spigot server running on the local host on port `25575`. + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Enable the Remote Console Protocol + description: | + Under your SpigotMC server's `server.properties` configuration file, you should set `enable-rcon` to `true`. + + This will allow the Server to listen and respond to queries over the rcon protocol. + configuration: + file: + name: "python.d/spigotmc.conf" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 1 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: > + Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed + running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + - name: host + description: The host's IP to connect to. + default_value: localhost + required: true + - name: port + description: The port the remote console is listening on. + default_value: 25575 + required: true + - name: password + description: Remote console password if any. + default_value: "" + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + description: A basic configuration example. + folding: + enabled: false + config: | + local: + name: local_server + url: 127.0.0.1 + port: 25575 + - name: Basic Authentication + description: An example using basic password for authentication with the remote console. + config: | + local: + name: local_server_pass + url: 127.0.0.1 + port: 25575 + password: 'foobar' + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + local_server: + name : my_local_server + url : 127.0.0.1 + port: 25575 + + remote_server: + name : another_remote_server + url : 192.0.2.1 + port: 25575 + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: spigotmc.tps - description: Spigot Ticks Per Second - unit: "ticks" - chart_type: line - dimensions: - - name: 1 Minute Average - - name: 5 Minute Average - - name: 15 Minute Average - - name: spigotmc.users - description: Minecraft Users - unit: "users" - chart_type: area - dimensions: - - name: Users - - name: spigotmc.mem - description: Minecraft Memory Usage - unit: "MiB" - chart_type: line - dimensions: - - name: used - - name: allocated - - name: max + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: spigotmc.tps + description: Spigot Ticks Per Second + unit: "ticks" + chart_type: line + dimensions: + - name: 1 Minute Average + - name: 5 Minute Average + - name: 15 Minute Average + - name: spigotmc.users + description: Minecraft Users + unit: "users" + chart_type: area + dimensions: + - name: Users + - name: spigotmc.mem + description: Minecraft Memory Usage + unit: "MiB" + chart_type: line + dimensions: + - name: used + - name: allocated + - name: max diff --git a/collectors/python.d.plugin/spigotmc/metrics.csv b/collectors/python.d.plugin/spigotmc/metrics.csv deleted file mode 100644 index 8d040b95..00000000 --- a/collectors/python.d.plugin/spigotmc/metrics.csv +++ /dev/null @@ -1,4 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -spigotmc.tps,,"1 Minute Average, 5 Minute Average, 15 Minute Average",ticks,Spigot Ticks Per Second,line,,python.d.plugin,spigotmc -spigotmc.users,,Users,users,Minecraft Users,area,,python.d.plugin,spigotmc -spigotmc.mem,,"used, allocated, max",MiB,Minecraft Memory Usage,line,,python.d.plugin,spigotmc diff --git a/collectors/python.d.plugin/squid/metadata.yaml b/collectors/python.d.plugin/squid/metadata.yaml index 736a2204..d0c5b3ec 100644 --- a/collectors/python.d.plugin/squid/metadata.yaml +++ b/collectors/python.d.plugin/squid/metadata.yaml @@ -1,96 +1,174 @@ -meta: - plugin_name: python.d.plugin - module_name: squid - monitored_instance: - name: Squid - link: '' - categories: - - data-collection.web-servers-and-web-proxies - icon_filename: 'squid.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Squid metrics with Netdata for efficient caching proxy for the Web performance. Improve your web caching efficiency with real-time Squid metrics.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: squid instance - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: squid + monitored_instance: + name: Squid + link: "http://www.squid-cache.org/" + categories: + - data-collection.web-servers-and-web-proxies + icon_filename: "squid.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - squid + - web delivery + - squid caching proxy + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors statistics about the Squid Clients and Servers, like bandwidth and requests. + method_description: "It collects metrics from the endpoint where Squid exposes its `counters` data." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "By default, this collector will try to autodetect where Squid presents its `counters` data, by trying various configurations." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Configure Squid's Cache Manager + description: | + Take a look at [Squid's official documentation](https://wiki.squid-cache.org/Features/CacheManager/Index#controlling-access-to-the-cache-manager) on how to configure access to the Cache Manager. + configuration: + file: + name: "python.d/squid.conf" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 1 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: > + Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "local" + required: false + - name: host + description: The host to connect to. + default_value: "" + required: true + - name: port + description: The port to connect to. + default_value: "" + required: true + - name: request + description: The URL to request from Squid. + default_value: "" + required: true + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + description: A basic configuration example. + folding: + enabled: false + config: | + example_job_name: + name: 'local' + host: 'localhost' + port: 3128 + request: 'cache_object://localhost:3128/counters' + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + local_job: + name: 'local' + host: '127.0.0.1' + port: 3128 + request: 'cache_object://127.0.0.1:3128/counters' + + remote_job: + name: 'remote' + host: '192.0.2.1' + port: 3128 + request: 'cache_object://192.0.2.1:3128/counters' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: squid.clients_net - description: Squid Client Bandwidth - unit: "kilobits/s" - chart_type: area - dimensions: - - name: in - - name: out - - name: hits - - name: squid.clients_requests - description: Squid Client Requests - unit: "requests/s" - chart_type: line - dimensions: - - name: requests - - name: hits - - name: errors - - name: squid.servers_net - description: Squid Server Bandwidth - unit: "kilobits/s" - chart_type: area - dimensions: - - name: in - - name: out - - name: squid.servers_requests - description: Squid Server Requests - unit: "requests/s" - chart_type: line - dimensions: - - name: requests - - name: errors + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: Squid instance + description: "These metrics refer to each monitored Squid instance." + labels: [] + metrics: + - name: squid.clients_net + description: Squid Client Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: hits + - name: squid.clients_requests + description: Squid Client Requests + unit: "requests/s" + chart_type: line + dimensions: + - name: requests + - name: hits + - name: errors + - name: squid.servers_net + description: Squid Server Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: squid.servers_requests + description: Squid Server Requests + unit: "requests/s" + chart_type: line + dimensions: + - name: requests + - name: errors diff --git a/collectors/python.d.plugin/squid/metrics.csv b/collectors/python.d.plugin/squid/metrics.csv deleted file mode 100644 index c2899f2e..00000000 --- a/collectors/python.d.plugin/squid/metrics.csv +++ /dev/null @@ -1,5 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -squid.clients_net,squid instance,"in, out, hits",kilobits/s,Squid Client Bandwidth,area,,python.d.plugin,squid -squid.clients_requests,squid instance,"requests, hits, errors",requests/s,Squid Client Requests,line,,python.d.plugin,squid -squid.servers_net,squid instance,"in, out",kilobits/s,Squid Server Bandwidth,area,,python.d.plugin,squid -squid.servers_requests,squid instance,"requests, errors",requests/s,Squid Server Requests,line,,python.d.plugin,squid diff --git a/collectors/python.d.plugin/tomcat/metadata.yaml b/collectors/python.d.plugin/tomcat/metadata.yaml index 4f2a2b0e..c22f4f58 100644 --- a/collectors/python.d.plugin/tomcat/metadata.yaml +++ b/collectors/python.d.plugin/tomcat/metadata.yaml @@ -1,129 +1,200 @@ -meta: - plugin_name: python.d.plugin - module_name: tomcat - monitored_instance: - name: Tomcat - link: '' - categories: - - data-collection.web-servers-and-web-proxies - icon_filename: 'tomcat.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Tomcat performance with Netdata for optimal Java servlet container operations. Improve your web application performance with real-time Tomcat metrics.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: tomcat + monitored_instance: + name: Tomcat + link: "https://tomcat.apache.org/" + categories: + - data-collection.web-servers-and-web-proxies + icon_filename: "tomcat.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - apache + - tomcat + - webserver + - websocket + - jakarta + - javaEE + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors Tomcat metrics about bandwidth, processing time, threads and more. + method_description: | + It parses the information provided by the http endpoint of the `/manager/status` in XML format + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "You need to provide the username and the password, to access the webserver's status page. Create a seperate user with read only rights for this particular endpoint" + default_behavior: + auto_detection: + description: "If the Netdata Agent and the Tomcat webserver are in the same host, without configuration, module attempts to connect to http://localhost:8080/manager/status?XML=true, without any credentials. So it will probably fail." + limits: + description: "This module is not supporting SSL communication. If you want a Netdata Agent to monitor a Tomcat deployment, you shouldnt try to monitor it via public network (public internet). Credentials are passed by Netdata in an unsecure port" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Create a read-only `netdata` user, to monitor the `/status` endpoint. + description: You will need this configuring the collector + configuration: + file: + name: "python.d/tomcat.conf" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options per job" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: url + description: The URL of the Tomcat server's status endpoint. Always add the suffix ?XML=true. + default_value: no + required: true + - name: user + description: A valid user with read permission to access the /manager/status endpoint of the server. Required if the endpoint is password protected + default_value: no + required: false + - name: pass + description: A valid password for the user in question. Required if the endpoint is password protected + default_value: no + required: false + - name: connector_name + description: The connector component that communicates with a web connector via the AJP protocol, e.g ajp-bio-8009 + default_value: "" + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + folding: + enabled: false + description: A basic example configuration + config: | + localhost: + name : 'local' + url : 'http://localhost:8080/manager/status?XML=true' + - name: Using an IPv4 endpoint + description: A typical configuration using an IPv4 endpoint + config: | + local_ipv4: + name : 'local' + url : 'http://127.0.0.1:8080/manager/status?XML=true' + - name: Using an IPv6 endpoint + description: A typical configuration using an IPv6 endpoint + config: | + local_ipv6: + name : 'local' + url : 'http://[::1]:8080/manager/status?XML=true' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: tomcat.accesses - description: Requests - unit: "requests/s" - chart_type: area - dimensions: - - name: accesses - - name: errors - - name: tomcat.bandwidth - description: Bandwidth - unit: "KiB/s" - chart_type: area - dimensions: - - name: sent - - name: received - - name: tomcat.processing_time - description: processing time - unit: "seconds" - chart_type: area - dimensions: - - name: processing time - - name: tomcat.threads - description: Threads - unit: "current threads" - chart_type: area - dimensions: - - name: current - - name: busy - - name: tomcat.jvm - description: JVM Memory Pool Usage - unit: "MiB" - chart_type: stacked - dimensions: - - name: free - - name: eden - - name: survivor - - name: tenured - - name: code cache - - name: compressed - - name: metaspace - - name: tomcat.jvm_eden - description: Eden Memory Usage - unit: "MiB" - chart_type: area - dimensions: - - name: used - - name: committed - - name: max - - name: tomcat.jvm_survivor - description: Survivor Memory Usage - unit: "MiB" - chart_type: area - dimensions: - - name: used - - name: committed - - name: max - - name: tomcat.jvm_tenured - description: Tenured Memory Usage - unit: "MiB" - chart_type: area - dimensions: - - name: used - - name: committed - - name: max + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: tomcat.accesses + description: Requests + unit: "requests/s" + chart_type: area + dimensions: + - name: accesses + - name: errors + - name: tomcat.bandwidth + description: Bandwidth + unit: "KiB/s" + chart_type: area + dimensions: + - name: sent + - name: received + - name: tomcat.processing_time + description: processing time + unit: "seconds" + chart_type: area + dimensions: + - name: processing time + - name: tomcat.threads + description: Threads + unit: "current threads" + chart_type: area + dimensions: + - name: current + - name: busy + - name: tomcat.jvm + description: JVM Memory Pool Usage + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: eden + - name: survivor + - name: tenured + - name: code cache + - name: compressed + - name: metaspace + - name: tomcat.jvm_eden + description: Eden Memory Usage + unit: "MiB" + chart_type: area + dimensions: + - name: used + - name: committed + - name: max + - name: tomcat.jvm_survivor + description: Survivor Memory Usage + unit: "MiB" + chart_type: area + dimensions: + - name: used + - name: committed + - name: max + - name: tomcat.jvm_tenured + description: Tenured Memory Usage + unit: "MiB" + chart_type: area + dimensions: + - name: used + - name: committed + - name: max diff --git a/collectors/python.d.plugin/tomcat/metrics.csv b/collectors/python.d.plugin/tomcat/metrics.csv deleted file mode 100644 index 6769fa3f..00000000 --- a/collectors/python.d.plugin/tomcat/metrics.csv +++ /dev/null @@ -1,9 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -tomcat.accesses,,"accesses, errors",requests/s,Requests,area,,python.d.plugin,tomcat -tomcat.bandwidth,,"sent, received",KiB/s,Bandwidth,area,,python.d.plugin,tomcat -tomcat.processing_time,,processing time,seconds,processing time,area,,python.d.plugin,tomcat -tomcat.threads,,"current, busy",current threads,Threads,area,,python.d.plugin,tomcat -tomcat.jvm,,"free, eden, survivor, tenured, code cache, compressed, metaspace",MiB,JVM Memory Pool Usage,stacked,,python.d.plugin,tomcat -tomcat.jvm_eden,,"used, committed, max",MiB,Eden Memory Usage,area,,python.d.plugin,tomcat -tomcat.jvm_survivor,,"used, committed, max",MiB,Survivor Memory Usage,area,,python.d.plugin,tomcat -tomcat.jvm_tenured,,"used, committed, max",MiB,Tenured Memory Usage,area,,python.d.plugin,tomcat diff --git a/collectors/python.d.plugin/tor/metadata.yaml b/collectors/python.d.plugin/tor/metadata.yaml index 7d02b2d7..d0ecc1a4 100644 --- a/collectors/python.d.plugin/tor/metadata.yaml +++ b/collectors/python.d.plugin/tor/metadata.yaml @@ -1,73 +1,140 @@ -meta: - plugin_name: python.d.plugin - module_name: tor - monitored_instance: - name: Tor - link: '' - categories: - - data-collection.vpns - icon_filename: 'tor.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Tor metrics with Netdata for efficient anonymous communication operations. Enhance your anonymous communication with real-time insights and alerts.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: tor + monitored_instance: + name: Tor + link: 'https://www.torproject.org/' + categories: + - data-collection.vpns + icon_filename: 'tor.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - tor + - traffic + - vpn + most_popular: false + overview: + data_collection: + metrics_description: 'This collector monitors Tor bandwidth traffic .' + method_description: 'It connects to the Tor control port to collect traffic statistics.' + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: 'If no configuration is provided the collector will try to connect to 127.0.0.1:9051 to detect a running tor instance.' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: + - title: 'Required Tor configuration' + description: | + Add to /etc/tor/torrc: + + ControlPort 9051 + + For more options please read the manual. + configuration: + file: + name: python.d/tor.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: '' + required: false + - name: control_addr + description: Tor control IP address + default_value: 127.0.0.1 + required: false + - name: control_port + description: Tor control port. Can be either a tcp port, or a path to a socket file. + default_value: 9051 + required: false + - name: password + description: Tor control password + default_value: '' + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Local TCP + description: A basic TCP configuration. `local_addr` is ommited and will default to `127.0.0.1` + config: | + local_tcp: + name: 'local' + control_port: 9051 + password: <password> # if required + - name: Local socket + description: A basic local socket configuration + config: | + local_socket: + name: 'local' + control_port: '/var/run/tor/control' + password: <password> # if required + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: tor.traffic - description: Tor Traffic - unit: "KiB/s" - chart_type: area - dimensions: - - name: read - - name: write + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: tor.traffic + description: Tor Traffic + unit: "KiB/s" + chart_type: area + dimensions: + - name: read + - name: write diff --git a/collectors/python.d.plugin/tor/metrics.csv b/collectors/python.d.plugin/tor/metrics.csv deleted file mode 100644 index 62402d8d..00000000 --- a/collectors/python.d.plugin/tor/metrics.csv +++ /dev/null @@ -1,2 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -tor.traffic,,"read, write",KiB/s,Tor Traffic,area,,python.d.plugin,tor diff --git a/collectors/python.d.plugin/traefik/metadata.yaml b/collectors/python.d.plugin/traefik/metadata.yaml index b817d422..dcfb098a 100644 --- a/collectors/python.d.plugin/traefik/metadata.yaml +++ b/collectors/python.d.plugin/traefik/metadata.yaml @@ -1,122 +1,125 @@ -meta: - plugin_name: python.d.plugin - module_name: traefik - monitored_instance: - name: python.d traefik - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: traefik.response_statuses - description: Response statuses - unit: "requests/s" - chart_type: stacked - dimensions: - - name: success - - name: error - - name: redirect - - name: bad - - name: other - - name: traefik.response_codes - description: Responses by codes - unit: "requests/s" - chart_type: stacked - dimensions: - - name: 2xx - - name: 5xx - - name: 3xx - - name: 4xx - - name: 1xx - - name: other - - name: traefik.detailed_response_codes - description: Detailed response codes - unit: "requests/s" - chart_type: stacked - dimensions: - - name: a dimension for each response code family - - name: traefik.requests - description: Requests - unit: "requests/s" - chart_type: line - dimensions: - - name: requests - - name: traefik.total_response_time - description: Total response time - unit: "seconds" - chart_type: line - dimensions: - - name: response - - name: traefik.average_response_time - description: Average response time - unit: "milliseconds" - chart_type: line - dimensions: - - name: response - - name: traefik.average_response_time_per_iteration - description: Average response time per iteration - unit: "milliseconds" - chart_type: line - dimensions: - - name: response - - name: traefik.uptime - description: Uptime - unit: "seconds" - chart_type: line - dimensions: - - name: uptime +# This collector will not appear in documentation, as the go version is preferred, +# https://github.com/netdata/go.d.plugin/blob/master/modules/traefik/README.md +# +# meta: +# plugin_name: python.d.plugin +# module_name: traefik +# monitored_instance: +# name: python.d traefik +# link: '' +# categories: [] +# icon_filename: '' +# related_resources: +# integrations: +# list: [] +# info_provided_to_referring_integrations: +# description: '' +# keywords: [] +# most_popular: false +# overview: +# data_collection: +# metrics_description: '' +# method_description: '' +# supported_platforms: +# include: [] +# exclude: [] +# multi_instance: true +# additional_permissions: +# description: '' +# default_behavior: +# auto_detection: +# description: '' +# limits: +# description: '' +# performance_impact: +# description: '' +# setup: +# prerequisites: +# list: [] +# configuration: +# file: +# name: '' +# description: '' +# options: +# description: '' +# folding: +# title: '' +# enabled: true +# list: [] +# examples: +# folding: +# enabled: true +# title: '' +# list: [] +# troubleshooting: +# problems: +# list: [] +# alerts: [] +# metrics: +# folding: +# title: Metrics +# enabled: false +# description: "" +# availability: [] +# scopes: +# - name: global +# description: "" +# labels: [] +# metrics: +# - name: traefik.response_statuses +# description: Response statuses +# unit: "requests/s" +# chart_type: stacked +# dimensions: +# - name: success +# - name: error +# - name: redirect +# - name: bad +# - name: other +# - name: traefik.response_codes +# description: Responses by codes +# unit: "requests/s" +# chart_type: stacked +# dimensions: +# - name: 2xx +# - name: 5xx +# - name: 3xx +# - name: 4xx +# - name: 1xx +# - name: other +# - name: traefik.detailed_response_codes +# description: Detailed response codes +# unit: "requests/s" +# chart_type: stacked +# dimensions: +# - name: a dimension for each response code family +# - name: traefik.requests +# description: Requests +# unit: "requests/s" +# chart_type: line +# dimensions: +# - name: requests +# - name: traefik.total_response_time +# description: Total response time +# unit: "seconds" +# chart_type: line +# dimensions: +# - name: response +# - name: traefik.average_response_time +# description: Average response time +# unit: "milliseconds" +# chart_type: line +# dimensions: +# - name: response +# - name: traefik.average_response_time_per_iteration +# description: Average response time per iteration +# unit: "milliseconds" +# chart_type: line +# dimensions: +# - name: response +# - name: traefik.uptime +# description: Uptime +# unit: "seconds" +# chart_type: line +# dimensions: +# - name: uptime diff --git a/collectors/python.d.plugin/traefik/metrics.csv b/collectors/python.d.plugin/traefik/metrics.csv deleted file mode 100644 index 77e1c294..00000000 --- a/collectors/python.d.plugin/traefik/metrics.csv +++ /dev/null @@ -1,9 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -traefik.response_statuses,,"success, error, redirect, bad, other",requests/s,Response statuses,stacked,,python.d.plugin,traefik -traefik.response_codes,,"2xx, 5xx, 3xx, 4xx, 1xx, other",requests/s,Responses by codes,stacked,,python.d.plugin,traefik -traefik.detailed_response_codes,,a dimension for each response code family,requests/s,Detailed response codes,stacked,,python.d.plugin,traefik -traefik.requests,,requests,requests/s,Requests,line,,python.d.plugin,traefik -traefik.total_response_time,,response,seconds,Total response time,line,,python.d.plugin,traefik -traefik.average_response_time,,response,milliseconds,Average response time,line,,python.d.plugin,traefik -traefik.average_response_time_per_iteration,,response,milliseconds,Average response time per iteration,line,,python.d.plugin,traefik -traefik.uptime,,uptime,seconds,Uptime,line,,python.d.plugin,traefik diff --git a/collectors/python.d.plugin/uwsgi/metadata.yaml b/collectors/python.d.plugin/uwsgi/metadata.yaml index 3447f532..cdb090ac 100644 --- a/collectors/python.d.plugin/uwsgi/metadata.yaml +++ b/collectors/python.d.plugin/uwsgi/metadata.yaml @@ -1,114 +1,201 @@ -meta: - plugin_name: python.d.plugin - module_name: uwsgi - monitored_instance: - name: uWSGI - link: '' - categories: - - data-collection.web-servers-and-web-proxies - icon_filename: 'uwsgi.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor uWSGI performance for optimal application server operations. Monitor request rates, worker statuses, and error rates to ensure efficient application delivery.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: uwsgi + monitored_instance: + name: uWSGI + link: "https://github.com/unbit/uwsgi/tree/2.0.21" + categories: + - data-collection.web-servers-and-web-proxies + icon_filename: "uwsgi.svg" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - application server + - python + - web applications + most_popular: false + overview: + data_collection: + metrics_description: "This collector monitors uWSGI metrics about requests, workers, memory and more." + method_description: "It collects every metric exposed from the stats server of uWSGI, either from the `stats.socket` or from the web server's TCP/IP socket." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "This collector will auto-detect uWSGI instances deployed on the local host, running on port 1717, or exposing stats on socket `tmp/stats.socket`." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Enable the uWSGI Stats server + description: | + Make sure that you uWSGI exposes it's metrics via a Stats server. + + Source: https://uwsgi-docs.readthedocs.io/en/latest/StatsServer.html + configuration: + file: + name: "python.d/uwsgi.conf" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: The JOB's name as it will appear at the dashboard (by default is the job_name) + default_value: job_name + required: false + - name: socket + description: The 'path/to/uwsgistats.sock' + default_value: no + required: false + - name: host + description: The host to connect to + default_value: no + required: false + - name: port + description: The port to connect to + default_value: no + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic (default out-of-the-box) + description: A basic example configuration, one job will run at a time. Autodetect mechanism uses it by default. As all JOBs have the same name, only one can run at a time. + config: | + socket: + name : 'local' + socket : '/tmp/stats.socket' + + localhost: + name : 'local' + host : 'localhost' + port : 1717 + + localipv4: + name : 'local' + host : '127.0.0.1' + port : 1717 + + localipv6: + name : 'local' + host : '::1' + port : 1717 + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + local: + name : 'local' + host : 'localhost' + port : 1717 + + remote: + name : 'remote' + host : '192.0.2.1' + port : 1717 + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: uwsgi.requests - description: Requests - unit: "requests/s" - chart_type: stacked - dimensions: - - name: a dimension per worker - - name: uwsgi.tx - description: Transmitted data - unit: "KiB/s" - chart_type: stacked - dimensions: - - name: a dimension per worker - - name: uwsgi.avg_rt - description: Average request time - unit: "milliseconds" - chart_type: line - dimensions: - - name: a dimension per worker - - name: uwsgi.memory_rss - description: RSS (Resident Set Size) - unit: "MiB" - chart_type: stacked - dimensions: - - name: a dimension per worker - - name: uwsgi.memory_vsz - description: VSZ (Virtual Memory Size) - unit: "MiB" - chart_type: stacked - dimensions: - - name: a dimension per worker - - name: uwsgi.exceptions - description: Exceptions - unit: "exceptions" - chart_type: line - dimensions: - - name: exceptions - - name: uwsgi.harakiris - description: Harakiris - unit: "harakiris" - chart_type: line - dimensions: - - name: harakiris - - name: uwsgi.respawns - description: Respawns - unit: "respawns" - chart_type: line - dimensions: - - name: respawns + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: uwsgi.requests + description: Requests + unit: "requests/s" + chart_type: stacked + dimensions: + - name: a dimension per worker + - name: uwsgi.tx + description: Transmitted data + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per worker + - name: uwsgi.avg_rt + description: Average request time + unit: "milliseconds" + chart_type: line + dimensions: + - name: a dimension per worker + - name: uwsgi.memory_rss + description: RSS (Resident Set Size) + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per worker + - name: uwsgi.memory_vsz + description: VSZ (Virtual Memory Size) + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per worker + - name: uwsgi.exceptions + description: Exceptions + unit: "exceptions" + chart_type: line + dimensions: + - name: exceptions + - name: uwsgi.harakiris + description: Harakiris + unit: "harakiris" + chart_type: line + dimensions: + - name: harakiris + - name: uwsgi.respawns + description: Respawns + unit: "respawns" + chart_type: line + dimensions: + - name: respawns diff --git a/collectors/python.d.plugin/uwsgi/metrics.csv b/collectors/python.d.plugin/uwsgi/metrics.csv deleted file mode 100644 index c974653f..00000000 --- a/collectors/python.d.plugin/uwsgi/metrics.csv +++ /dev/null @@ -1,9 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -uwsgi.requests,,a dimension per worker,requests/s,Requests,stacked,,python.d.plugin,uwsgi -uwsgi.tx,,a dimension per worker,KiB/s,Transmitted data,stacked,,python.d.plugin,uwsgi -uwsgi.avg_rt,,a dimension per worker,milliseconds,Average request time,line,,python.d.plugin,uwsgi -uwsgi.memory_rss,,a dimension per worker,MiB,RSS (Resident Set Size),stacked,,python.d.plugin,uwsgi -uwsgi.memory_vsz,,a dimension per worker,MiB,VSZ (Virtual Memory Size),stacked,,python.d.plugin,uwsgi -uwsgi.exceptions,,exceptions,exceptions,Exceptions,line,,python.d.plugin,uwsgi -uwsgi.harakiris,,harakiris,harakiris,Harakiris,line,,python.d.plugin,uwsgi -uwsgi.respawns,,respawns,respawns,Respawns,line,,python.d.plugin,uwsgi diff --git a/collectors/python.d.plugin/varnish/metadata.yaml b/collectors/python.d.plugin/varnish/metadata.yaml index 267279fa..aa245c25 100644 --- a/collectors/python.d.plugin/varnish/metadata.yaml +++ b/collectors/python.d.plugin/varnish/metadata.yaml @@ -1,192 +1,253 @@ -meta: - plugin_name: python.d.plugin - module_name: varnish - monitored_instance: - name: Varnish - link: '' - categories: - - data-collection.web-servers-and-web-proxies - icon_filename: 'varnish.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Varnish metrics with Netdata for efficient HTTP accelerator performance. Enhance your web performance with real-time Varnish metrics.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: varnish.session_connection - description: Connections Statistics - unit: "connections/s" - chart_type: line - dimensions: - - name: accepted - - name: dropped - - name: varnish.client_requests - description: Client Requests - unit: "requests/s" - chart_type: line - dimensions: - - name: received - - name: varnish.all_time_hit_rate - description: All History Hit Rate Ratio - unit: "percentage" - chart_type: stacked - dimensions: - - name: hit - - name: miss - - name: hitpass - - name: varnish.current_poll_hit_rate - description: Current Poll Hit Rate Ratio - unit: "percentage" - chart_type: stacked - dimensions: - - name: hit - - name: miss - - name: hitpass - - name: varnish.cached_objects_expired - description: Expired Objects - unit: "expired/s" - chart_type: line - dimensions: - - name: objects - - name: varnish.cached_objects_nuked - description: Least Recently Used Nuked Objects - unit: "nuked/s" - chart_type: line - dimensions: - - name: objects - - name: varnish.threads_total - description: Number Of Threads In All Pools - unit: "number" - chart_type: line - dimensions: - - name: None - - name: varnish.threads_statistics - description: Threads Statistics - unit: "threads/s" - chart_type: line - dimensions: - - name: created - - name: failed - - name: limited - - name: varnish.threads_queue_len - description: Current Queue Length - unit: "requests" - chart_type: line - dimensions: - - name: in queue - - name: varnish.backend_connections - description: Backend Connections Statistics - unit: "connections/s" - chart_type: line - dimensions: - - name: successful - - name: unhealthy - - name: reused - - name: closed - - name: recycled - - name: failed - - name: varnish.backend_requests - description: Requests To The Backend - unit: "requests/s" - chart_type: line - dimensions: - - name: sent - - name: varnish.esi_statistics - description: ESI Statistics - unit: "problems/s" - chart_type: line - dimensions: - - name: errors - - name: warnings - - name: varnish.memory_usage - description: Memory Usage - unit: "MiB" - chart_type: stacked - dimensions: - - name: free - - name: allocated - - name: varnish.uptime - description: Uptime - unit: "seconds" - chart_type: line - dimensions: - - name: uptime - - name: Backend - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: varnish + monitored_instance: + name: Varnish + link: https://varnish-cache.org/ + categories: + - data-collection.web-servers-and-web-proxies + icon_filename: 'varnish.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - varnish + - varnishstat + - varnishd + - cache + - web server + - web cache + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors Varnish metrics about HTTP accelerator global, Backends (VBE) and Storages (SMF, SMA, MSE) statistics. + + Note that both, Varnish-Cache (free and open source) and Varnish-Plus (Commercial/Enterprise version), are supported. + method_description: | + It uses the `varnishstat` tool in order to collect the metrics. + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: | + `netdata` user must be a member of the `varnish` group. + default_behavior: + auto_detection: + description: By default, if the permissions are satisfied, the `varnishstat` tool will be executed on the host. + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: + - title: Provide the necessary permissions + description: | + In order for the collector to work, you need to add the `netdata` user to the `varnish` user group, so that it can execute the `varnishstat` tool: + + ``` + usermod -aG varnish netdata + ``` + configuration: + file: + name: python.d/varnish.conf + description: '' + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: Config options + enabled: true + list: + - name: instance_name + description: the name of the varnishd instance to get logs from. If not specified, the host name is used. + default_value: '<host name>' + required: true + - name: update_every + description: Sets the default data collection frequency. + default_value: 10 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: '' + required: false + examples: + folding: + enabled: true + title: 'Config' + list: + - name: Basic + description: An example configuration. + folding: + enabled: false + config: | + job_name: + instance_name: '<name-of-varnishd-instance>' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: varnish.backend - description: Backend {backend_name} - unit: "kilobits/s" - chart_type: area - dimensions: - - name: header - - name: body - - name: Storage - description: "" - labels: [] - metrics: - - name: varnish.storage_usage - description: Storage {storage_name} Usage - unit: "KiB" - chart_type: stacked - dimensions: - - name: free - - name: allocated - - name: varnish.storage_alloc_objs - description: Storage {storage_name} Allocated Objects - unit: "objects" - chart_type: line - dimensions: - - name: allocated + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: varnish.session_connection + description: Connections Statistics + unit: "connections/s" + chart_type: line + dimensions: + - name: accepted + - name: dropped + - name: varnish.client_requests + description: Client Requests + unit: "requests/s" + chart_type: line + dimensions: + - name: received + - name: varnish.all_time_hit_rate + description: All History Hit Rate Ratio + unit: "percentage" + chart_type: stacked + dimensions: + - name: hit + - name: miss + - name: hitpass + - name: varnish.current_poll_hit_rate + description: Current Poll Hit Rate Ratio + unit: "percentage" + chart_type: stacked + dimensions: + - name: hit + - name: miss + - name: hitpass + - name: varnish.cached_objects_expired + description: Expired Objects + unit: "expired/s" + chart_type: line + dimensions: + - name: objects + - name: varnish.cached_objects_nuked + description: Least Recently Used Nuked Objects + unit: "nuked/s" + chart_type: line + dimensions: + - name: objects + - name: varnish.threads_total + description: Number Of Threads In All Pools + unit: "number" + chart_type: line + dimensions: + - name: None + - name: varnish.threads_statistics + description: Threads Statistics + unit: "threads/s" + chart_type: line + dimensions: + - name: created + - name: failed + - name: limited + - name: varnish.threads_queue_len + description: Current Queue Length + unit: "requests" + chart_type: line + dimensions: + - name: in queue + - name: varnish.backend_connections + description: Backend Connections Statistics + unit: "connections/s" + chart_type: line + dimensions: + - name: successful + - name: unhealthy + - name: reused + - name: closed + - name: recycled + - name: failed + - name: varnish.backend_requests + description: Requests To The Backend + unit: "requests/s" + chart_type: line + dimensions: + - name: sent + - name: varnish.esi_statistics + description: ESI Statistics + unit: "problems/s" + chart_type: line + dimensions: + - name: errors + - name: warnings + - name: varnish.memory_usage + description: Memory Usage + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: allocated + - name: varnish.uptime + description: Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: uptime + - name: Backend + description: "" + labels: [] + metrics: + - name: varnish.backend + description: Backend {backend_name} + unit: "kilobits/s" + chart_type: area + dimensions: + - name: header + - name: body + - name: Storage + description: "" + labels: [] + metrics: + - name: varnish.storage_usage + description: Storage {storage_name} Usage + unit: "KiB" + chart_type: stacked + dimensions: + - name: free + - name: allocated + - name: varnish.storage_alloc_objs + description: Storage {storage_name} Allocated Objects + unit: "objects" + chart_type: line + dimensions: + - name: allocated diff --git a/collectors/python.d.plugin/varnish/metrics.csv b/collectors/python.d.plugin/varnish/metrics.csv deleted file mode 100644 index bafb9fd1..00000000 --- a/collectors/python.d.plugin/varnish/metrics.csv +++ /dev/null @@ -1,18 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -varnish.session_connection,,"accepted, dropped",connections/s,Connections Statistics,line,,python.d.plugin,varnish -varnish.client_requests,,received,requests/s,Client Requests,line,,python.d.plugin,varnish -varnish.all_time_hit_rate,,"hit, miss, hitpass",percentage,All History Hit Rate Ratio,stacked,,python.d.plugin,varnish -varnish.current_poll_hit_rate,,"hit, miss, hitpass",percentage,Current Poll Hit Rate Ratio,stacked,,python.d.plugin,varnish -varnish.cached_objects_expired,,objects,expired/s,Expired Objects,line,,python.d.plugin,varnish -varnish.cached_objects_nuked,,objects,nuked/s,Least Recently Used Nuked Objects,line,,python.d.plugin,varnish -varnish.threads_total,,None,number,Number Of Threads In All Pools,line,,python.d.plugin,varnish -varnish.threads_statistics,,"created, failed, limited",threads/s,Threads Statistics,line,,python.d.plugin,varnish -varnish.threads_queue_len,,in queue,requests,Current Queue Length,line,,python.d.plugin,varnish -varnish.backend_connections,,"successful, unhealthy, reused, closed, recycled, failed",connections/s,Backend Connections Statistics,line,,python.d.plugin,varnish -varnish.backend_requests,,sent,requests/s,Requests To The Backend,line,,python.d.plugin,varnish -varnish.esi_statistics,,"errors, warnings",problems/s,ESI Statistics,line,,python.d.plugin,varnish -varnish.memory_usage,,"free, allocated",MiB,Memory Usage,stacked,,python.d.plugin,varnish -varnish.uptime,,uptime,seconds,Uptime,line,,python.d.plugin,varnish -varnish.backend,Backend,"header, body",kilobits/s,Backend {backend_name},area,,python.d.plugin,varnish -varnish.storage_usage,Storage,"free, allocated",KiB,Storage {storage_name} Usage,stacked,,python.d.plugin,varnish -varnish.storage_alloc_objs,Storage,allocated,objects,Storage {storage_name} Allocated Objects,line,,python.d.plugin,varnish diff --git a/collectors/python.d.plugin/w1sensor/metadata.yaml b/collectors/python.d.plugin/w1sensor/metadata.yaml index 5d495fe5..7b076823 100644 --- a/collectors/python.d.plugin/w1sensor/metadata.yaml +++ b/collectors/python.d.plugin/w1sensor/metadata.yaml @@ -1,72 +1,119 @@ -meta: - plugin_name: python.d.plugin - module_name: w1sensor - monitored_instance: - name: 1-Wire Sensors - link: '' - categories: - - data-collection.hardware-devices-and-sensors - icon_filename: '1-wire.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor 1-Wire Sensors metrics with Netdata for optimal environmental conditions monitoring. Enhance your environmental monitoring with real-time insights and alerts.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: w1sensor + monitored_instance: + name: 1-Wire Sensors + link: "https://www.analog.com/en/product-category/1wire-temperature-sensors.html" + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: "1-wire.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - temperature + - sensor + - 1-wire + most_popular: false + overview: + data_collection: + metrics_description: "Monitor 1-Wire Sensors metrics with Netdata for optimal environmental conditions monitoring. Enhance your environmental monitoring with real-time insights and alerts." + method_description: "The collector uses the wire, w1_gpio, and w1_therm kernel modules. Currently temperature sensors are supported and automatically detected." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "The collector will try to auto detect available 1-Wire devices." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: "Required Linux kernel modules" + description: "Make sure `wire`, `w1_gpio`, and `w1_therm` kernel modules are loaded." + configuration: + file: + name: python.d/w1sensor.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + - name: name_<1-Wire id> + description: This allows associating a human readable name with a sensor's 1-Wire identifier. + default_value: "" + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Provide human readable names + description: Associate two 1-Wire identifiers with human readable names. + config: | + sensors: + name_00000022276e: 'Machine room' + name_00000022298f: 'Rack 12' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: w1sensor.temp - description: 1-Wire Temperature Sensor - unit: "Celsius" - chart_type: line - dimensions: - - name: a dimension per sensor + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: w1sensor.temp + description: 1-Wire Temperature Sensor + unit: "Celsius" + chart_type: line + dimensions: + - name: a dimension per sensor diff --git a/collectors/python.d.plugin/w1sensor/metrics.csv b/collectors/python.d.plugin/w1sensor/metrics.csv deleted file mode 100644 index 54564934..00000000 --- a/collectors/python.d.plugin/w1sensor/metrics.csv +++ /dev/null @@ -1,2 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -w1sensor.temp,,a dimension per sensor,Celsius,1-Wire Temperature Sensor,line,,python.d.plugin,w1sensor diff --git a/collectors/python.d.plugin/w1sensor/w1sensor.conf b/collectors/python.d.plugin/w1sensor/w1sensor.conf index 17271001..b60d2865 100644 --- a/collectors/python.d.plugin/w1sensor/w1sensor.conf +++ b/collectors/python.d.plugin/w1sensor/w1sensor.conf @@ -59,7 +59,7 @@ # penalty: yes # the JOB's penalty # autodetection_retry: 0 # the JOB's re-check interval in seconds # -# Additionally to the above, example also supports the following: +# Additionally to the above, w1sensor also supports the following: # # name_<1-Wire id>: '<human readable name>' # This allows associating a human readable name with a sensor's 1-Wire diff --git a/collectors/python.d.plugin/zscores/metadata.yaml b/collectors/python.d.plugin/zscores/metadata.yaml index 740d91e4..388e9b46 100644 --- a/collectors/python.d.plugin/zscores/metadata.yaml +++ b/collectors/python.d.plugin/zscores/metadata.yaml @@ -1,77 +1,187 @@ -meta: - plugin_name: python.d.plugin - module_name: zscores - monitored_instance: - name: python.d zscores - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: zscores + monitored_instance: + name: python.d zscores + link: https://en.wikipedia.org/wiki/Standard_score + categories: + - data-collection.other + icon_filename: "" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - zscore + - z-score + - standard score + - standard deviation + - anomaly detection + - statistical anomaly detection + most_popular: false + overview: + data_collection: + metrics_description: | + By using smoothed, rolling [Z-Scores](https://en.wikipedia.org/wiki/Standard_score) for selected metrics or charts you can narrow down your focus and shorten root cause analysis. + method_description: | + This collector uses the [Netdata rest api](https://github.com/netdata/netdata/blob/master/web/api/README.md) to get the `mean` and `stddev` + for each dimension on specified charts over a time range (defined by `train_secs` and `offset_secs`). + + For each dimension it will calculate a Z-Score as `z = (x - mean) / stddev` (clipped at `z_clip`). Scores are then smoothed over + time (`z_smooth_n`) and, if `mode: 'per_chart'`, aggregated across dimensions to a smoothed, rolling chart level Z-Score at each time step. + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Python Requirements + description: | + This collector will only work with Python 3 and requires the below packages be installed. + + ```bash + # become netdata user + sudo su -s /bin/bash netdata + # install required packages + pip3 install numpy pandas requests netdata-pandas==0.0.38 + ``` + configuration: + file: + name: python.d/zscores.conf + description: "" + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: charts_regex + description: what charts to pull data for - A regex like `system\..*|` or `system\..*|apps.cpu|apps.mem` etc. + default_value: "system\\..*" + required: true + - name: train_secs + description: length of time (in seconds) to base calculations off for mean and stddev. + default_value: 14400 + required: true + - name: offset_secs + description: offset (in seconds) preceding latest data to ignore when calculating mean and stddev. + default_value: 300 + required: true + - name: train_every_n + description: recalculate the mean and stddev every n steps of the collector. + default_value: 900 + required: true + - name: z_smooth_n + description: smooth the z score (to reduce sensitivity to spikes) by averaging it over last n values. + default_value: 15 + required: true + - name: z_clip + description: cap absolute value of zscore (before smoothing) for better stability. + default_value: 10 + required: true + - name: z_abs + description: "set z_abs: 'true' to make all zscores be absolute values only." + default_value: "true" + required: true + - name: burn_in + description: burn in period in which to initially calculate mean and stddev on every step. + default_value: 2 + required: true + - name: mode + description: mode can be to get a zscore 'per_dim' or 'per_chart'. + default_value: per_chart + required: true + - name: per_chart_agg + description: per_chart_agg is how you aggregate from dimension to chart when mode='per_chart'. + default_value: mean + required: true + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Default + description: Default configuration. + folding: + enabled: false + config: | + local: + name: 'local' + host: '127.0.0.1:19999' + charts_regex: 'system\..*' + charts_to_exclude: 'system.uptime' + train_secs: 14400 + offset_secs: 300 + train_every_n: 900 + z_smooth_n: 15 + z_clip: 10 + z_abs: 'true' + burn_in: 2 + mode: 'per_chart' + per_chart_agg: 'mean' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: zscores.z - description: Z Score - unit: "z" - chart_type: line - dimensions: - - name: a dimension per chart or dimension - - name: zscores.3stddev - description: Z Score >3 - unit: "count" - chart_type: stacked - dimensions: - - name: a dimension per chart or dimension + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: zscores.z + description: Z Score + unit: "z" + chart_type: line + dimensions: + - name: a dimension per chart or dimension + - name: zscores.3stddev + description: Z Score >3 + unit: "count" + chart_type: stacked + dimensions: + - name: a dimension per chart or dimension diff --git a/collectors/python.d.plugin/zscores/metrics.csv b/collectors/python.d.plugin/zscores/metrics.csv deleted file mode 100644 index 5066c7c3..00000000 --- a/collectors/python.d.plugin/zscores/metrics.csv +++ /dev/null @@ -1,3 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -zscores.z,,a dimension per chart or dimension,z,Z Score,line,,python.d.plugin,zscores -zscores.3stddev,,a dimension per chart or dimension,count,Z Score >3,stacked,,python.d.plugin,zscores diff --git a/collectors/slabinfo.plugin/metadata.yaml b/collectors/slabinfo.plugin/metadata.yaml index 4da1a198..7d135d61 100644 --- a/collectors/slabinfo.plugin/metadata.yaml +++ b/collectors/slabinfo.plugin/metadata.yaml @@ -1,83 +1,106 @@ -meta: - plugin_name: slabinfo.plugin - module_name: slabinfo.plugin - monitored_instance: - name: slabinfo - link: '' - categories: [] - icon_filename: '' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: '' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: slabinfo.plugin +modules: + - meta: + plugin_name: slabinfo.plugin + module_name: slabinfo.plugin + monitored_instance: + name: Linux kernel SLAB allocator statistics + link: "https://kernel.org/" + categories: + - data-collection.linux-systems.kernel-metrics + icon_filename: 'linuxserver.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - linux kernel + - slab + - slub + - slob + - slabinfo + most_popular: false + overview: + data_collection: + metrics_description: > + Collects metrics on kernel SLAB cache utilization to monitor the low-level performance impact of workloads + in the kernel. + method_description: "The plugin parses `/proc/slabinfo`" + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: false + additional_permissions: + description: > + This integration requires read access to `/proc/slabinfo`, which is accessible only to the root user by + default. Netdata uses Linux Capabilities to give the plugin access to this file. `CAP_DAC_READ_SEARCH` + is added automatically during installation. This capability allows bypassing file read permission checks + and directory read and execute permission checks. If file capabilities are not usable, then the plugin is + instead installed with the SUID bit set in permissions sVko that it runs as root. + default_behavior: + auto_detection: + description: > + Due to the large number of metrics generated by this integration, it is disabled by default and must be + manually enabled inside `/etc/netdata/netdata.conf` + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: "netdata.conf" + section_name: "[plugins]" + description: "The main configuration file." + options: + description: "" + folding: + title: "The main configuration file." + enabled: true + list: + - name: Enable plugin + description: As described above plugin is disabled by default, this option is used to enable plugin. + default_value: no + required: true + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: mem.slabmemory - description: Memory Usage - unit: "B" - chart_type: line - dimensions: - - name: a dimension per cache - - name: mem.slabfilling - description: Object Filling - unit: "%" - chart_type: line - dimensions: - - name: a dimension per cache - - name: mem.slabwaste - description: Memory waste - unit: "B" - chart_type: line - dimensions: - - name: a dimension per cache + folding: + title: Metrics + enabled: false + description: "SLAB cache utilization metrics for the whole system." + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: mem.slabmemory + description: Memory Usage + unit: "B" + chart_type: line + dimensions: + - name: a dimension per cache + - name: mem.slabfilling + description: Object Filling + unit: "%" + chart_type: line + dimensions: + - name: a dimension per cache + - name: mem.slabwaste + description: Memory waste + unit: "B" + chart_type: line + dimensions: + - name: a dimension per cache diff --git a/collectors/slabinfo.plugin/metrics.csv b/collectors/slabinfo.plugin/metrics.csv deleted file mode 100644 index 4391cb6f..00000000 --- a/collectors/slabinfo.plugin/metrics.csv +++ /dev/null @@ -1,4 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -mem.slabmemory,,a dimension per cache,B,"Memory Usage",line,,slabinfo.plugin, -mem.slabfilling,,a dimension per cache,%,"Object Filling",line,,slabinfo.plugin, -mem.slabwaste,,a dimension per cache,B,"Memory waste",line,,slabinfo.plugin,
\ No newline at end of file diff --git a/collectors/systemd-journal.plugin/Makefile.am b/collectors/systemd-journal.plugin/Makefile.am new file mode 100644 index 00000000..fd8f4ab2 --- /dev/null +++ b/collectors/systemd-journal.plugin/Makefile.am @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) + +dist_libconfig_DATA = \ + $(NULL) diff --git a/collectors/systemd-journal.plugin/README.md b/collectors/systemd-journal.plugin/README.md new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/collectors/systemd-journal.plugin/README.md diff --git a/collectors/systemd-journal.plugin/systemd-journal.c b/collectors/systemd-journal.plugin/systemd-journal.c new file mode 100644 index 00000000..08a1891e --- /dev/null +++ b/collectors/systemd-journal.plugin/systemd-journal.c @@ -0,0 +1,584 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +/* + * netdata systemd-journal.plugin + * Copyright (C) 2023 Netdata Inc. + * GPL v3+ + */ + +// TODO - 1) MARKDOC + +#include "collectors/all.h" +#include "libnetdata/libnetdata.h" +#include "libnetdata/required_dummies.h" + +#ifndef SD_JOURNAL_ALL_NAMESPACES +#define JOURNAL_NAMESPACE SD_JOURNAL_LOCAL_ONLY +#else +#define JOURNAL_NAMESPACE SD_JOURNAL_ALL_NAMESPACES +#endif + +#include <systemd/sd-journal.h> +#include <syslog.h> + +#define FACET_MAX_VALUE_LENGTH 8192 + +#define SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION "View, search and analyze systemd journal entries." +#define SYSTEMD_JOURNAL_FUNCTION_NAME "systemd-journal" +#define SYSTEMD_JOURNAL_DEFAULT_TIMEOUT 30 +#define SYSTEMD_JOURNAL_MAX_PARAMS 100 +#define SYSTEMD_JOURNAL_DEFAULT_QUERY_DURATION (3 * 3600) +#define SYSTEMD_JOURNAL_DEFAULT_ITEMS_PER_QUERY 200 + +#define JOURNAL_PARAMETER_HELP "help" +#define JOURNAL_PARAMETER_AFTER "after" +#define JOURNAL_PARAMETER_BEFORE "before" +#define JOURNAL_PARAMETER_ANCHOR "anchor" +#define JOURNAL_PARAMETER_LAST "last" +#define JOURNAL_PARAMETER_QUERY "query" + +#define SYSTEMD_ALWAYS_VISIBLE_KEYS NULL +#define SYSTEMD_KEYS_EXCLUDED_FROM_FACETS NULL +#define SYSTEMD_KEYS_INCLUDED_IN_FACETS \ + "_TRANSPORT" \ + "|SYSLOG_IDENTIFIER" \ + "|SYSLOG_FACILITY" \ + "|PRIORITY" \ + "|_HOSTNAME" \ + "|_RUNTIME_SCOPE" \ + "|_PID" \ + "|_UID" \ + "|_GID" \ + "|_SYSTEMD_UNIT" \ + "|_SYSTEMD_SLICE" \ + "|_SYSTEMD_USER_SLICE" \ + "|_COMM" \ + "|_EXE" \ + "|_SYSTEMD_CGROUP" \ + "|_SYSTEMD_USER_UNIT" \ + "|USER_UNIT" \ + "|UNIT" \ + "" + +static netdata_mutex_t mutex = NETDATA_MUTEX_INITIALIZER; +static bool plugin_should_exit = false; + +DICTIONARY *uids = NULL; +DICTIONARY *gids = NULL; + + +// ---------------------------------------------------------------------------- + +int systemd_journal_query(BUFFER *wb, FACETS *facets, usec_t after_ut, usec_t before_ut, usec_t stop_monotonic_ut) { + sd_journal *j; + int r; + + // Open the system journal for reading + r = sd_journal_open(&j, JOURNAL_NAMESPACE); + if (r < 0) + return HTTP_RESP_INTERNAL_SERVER_ERROR; + + facets_rows_begin(facets); + + bool timed_out = false; + size_t row_counter = 0; + sd_journal_seek_realtime_usec(j, before_ut); + SD_JOURNAL_FOREACH_BACKWARDS(j) { + row_counter++; + + uint64_t msg_ut; + sd_journal_get_realtime_usec(j, &msg_ut); + if (msg_ut < after_ut) + break; + + const void *data; + size_t length; + SD_JOURNAL_FOREACH_DATA(j, data, length) { + const char *key = data; + const char *equal = strchr(key, '='); + if(unlikely(!equal)) + continue; + + const char *value = ++equal; + size_t key_length = value - key; // including '\0' + + char key_copy[key_length]; + memcpy(key_copy, key, key_length - 1); + key_copy[key_length - 1] = '\0'; + + size_t value_length = length - key_length; // without '\0' + facets_add_key_value_length(facets, key_copy, value, value_length <= FACET_MAX_VALUE_LENGTH ? value_length : FACET_MAX_VALUE_LENGTH); + } + + facets_row_finished(facets, msg_ut); + + if((row_counter % 100) == 0 && now_monotonic_usec() > stop_monotonic_ut) { + timed_out = true; + break; + } + } + + sd_journal_close(j); + + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + buffer_json_member_add_boolean(wb, "partial", timed_out); + buffer_json_member_add_string(wb, "type", "table"); + buffer_json_member_add_time_t(wb, "update_every", 1); + buffer_json_member_add_string(wb, "help", SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION); + + facets_report(facets, wb); + + buffer_json_member_add_time_t(wb, "expires", now_realtime_sec()); + buffer_json_finalize(wb); + + return HTTP_RESP_OK; +} + +static void systemd_journal_function_help(const char *transaction) { + pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600); + fprintf(stdout, + "%s / %s\n" + "\n" + "%s\n" + "\n" + "The following filters are supported:\n" + "\n" + " help\n" + " Shows this help message.\n" + "\n" + " before:TIMESTAMP\n" + " Absolute or relative (to now) timestamp in seconds, to start the query.\n" + " The query is always executed from the most recent to the oldest log entry.\n" + " If not given the default is: now.\n" + "\n" + " after:TIMESTAMP\n" + " Absolute or relative (to `before`) timestamp in seconds, to end the query.\n" + " If not given, the default is %d.\n" + "\n" + " last:ITEMS\n" + " The number of items to return.\n" + " The default is %d.\n" + "\n" + " anchor:NUMBER\n" + " The `timestamp` of the item last received, to return log entries after that.\n" + " If not given, the query will return the top `ITEMS` from the most recent.\n" + "\n" + " facet_id:value_id1,value_id2,value_id3,...\n" + " Apply filters to the query, based on the facet IDs returned.\n" + " Each `facet_id` can be given once, but multiple `facet_ids` can be given.\n" + "\n" + "Filters can be combined. Each filter can be given only one time.\n" + , program_name + , SYSTEMD_JOURNAL_FUNCTION_NAME + , SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION + , -SYSTEMD_JOURNAL_DEFAULT_QUERY_DURATION + , SYSTEMD_JOURNAL_DEFAULT_ITEMS_PER_QUERY + ); + pluginsd_function_result_end_to_stdout(); +} + +static const char *syslog_facility_to_name(int facility) { + switch (facility) { + case LOG_FAC(LOG_KERN): return "kern"; + case LOG_FAC(LOG_USER): return "user"; + case LOG_FAC(LOG_MAIL): return "mail"; + case LOG_FAC(LOG_DAEMON): return "daemon"; + case LOG_FAC(LOG_AUTH): return "auth"; + case LOG_FAC(LOG_SYSLOG): return "syslog"; + case LOG_FAC(LOG_LPR): return "lpr"; + case LOG_FAC(LOG_NEWS): return "news"; + case LOG_FAC(LOG_UUCP): return "uucp"; + case LOG_FAC(LOG_CRON): return "cron"; + case LOG_FAC(LOG_AUTHPRIV): return "authpriv"; + case LOG_FAC(LOG_FTP): return "ftp"; + case LOG_FAC(LOG_LOCAL0): return "local0"; + case LOG_FAC(LOG_LOCAL1): return "local1"; + case LOG_FAC(LOG_LOCAL2): return "local2"; + case LOG_FAC(LOG_LOCAL3): return "local3"; + case LOG_FAC(LOG_LOCAL4): return "local4"; + case LOG_FAC(LOG_LOCAL5): return "local5"; + case LOG_FAC(LOG_LOCAL6): return "local6"; + case LOG_FAC(LOG_LOCAL7): return "local7"; + default: return NULL; + } +} + +static const char *syslog_priority_to_name(int priority) { + switch (priority) { + case LOG_ALERT: return "alert"; + case LOG_CRIT: return "critical"; + case LOG_DEBUG: return "debug"; + case LOG_EMERG: return "panic"; + case LOG_ERR: return "error"; + case LOG_INFO: return "info"; + case LOG_NOTICE: return "notice"; + case LOG_WARNING: return "warning"; + default: return NULL; + } +} + +static char *uid_to_username(uid_t uid, char *buffer, size_t buffer_size) { + struct passwd pw, *result; + char tmp[1024 + 1]; + + if (getpwuid_r(uid, &pw, tmp, 1024, &result) != 0 || result == NULL) + return NULL; + + strncpy(buffer, pw.pw_name, buffer_size - 1); + buffer[buffer_size - 1] = '\0'; // Null-terminate just in case + return buffer; +} + +static char *gid_to_groupname(gid_t gid, char* buffer, size_t buffer_size) { + struct group grp, *result; + char tmp[1024 + 1]; + + if (getgrgid_r(gid, &grp, tmp, 1024, &result) != 0 || result == NULL) + return NULL; + + strncpy(buffer, grp.gr_name, buffer_size - 1); + buffer[buffer_size - 1] = '\0'; // Null-terminate just in case + return buffer; +} + +static void systemd_journal_transform_syslog_facility(FACETS *facets __maybe_unused, BUFFER *wb, void *data __maybe_unused) { + const char *v = buffer_tostring(wb); + if(*v && isdigit(*v)) { + int facility = str2i(buffer_tostring(wb)); + const char *name = syslog_facility_to_name(facility); + if (name) { + buffer_flush(wb); + buffer_strcat(wb, name); + } + } +} + +static void systemd_journal_transform_priority(FACETS *facets __maybe_unused, BUFFER *wb, void *data __maybe_unused) { + const char *v = buffer_tostring(wb); + if(*v && isdigit(*v)) { + int priority = str2i(buffer_tostring(wb)); + const char *name = syslog_priority_to_name(priority); + if (name) { + buffer_flush(wb); + buffer_strcat(wb, name); + } + } +} + +static void systemd_journal_transform_uid(FACETS *facets __maybe_unused, BUFFER *wb, void *data) { + DICTIONARY *cache = data; + const char *v = buffer_tostring(wb); + if(*v && isdigit(*v)) { + const char *sv = dictionary_get(cache, v); + if(!sv) { + char buf[1024 + 1]; + int uid = str2i(buffer_tostring(wb)); + const char *name = uid_to_username(uid, buf, 1024); + if (!name) + name = v; + + sv = dictionary_set(cache, v, (void *)name, strlen(name) + 1); + } + + buffer_flush(wb); + buffer_strcat(wb, sv); + } +} + +static void systemd_journal_transform_gid(FACETS *facets __maybe_unused, BUFFER *wb, void *data) { + DICTIONARY *cache = data; + const char *v = buffer_tostring(wb); + if(*v && isdigit(*v)) { + const char *sv = dictionary_get(cache, v); + if(!sv) { + char buf[1024 + 1]; + int gid = str2i(buffer_tostring(wb)); + const char *name = gid_to_groupname(gid, buf, 1024); + if (!name) + name = v; + + sv = dictionary_set(cache, v, (void *)name, strlen(name) + 1); + } + + buffer_flush(wb); + buffer_strcat(wb, sv); + } +} + +static void systemd_journal_dynamic_row_id(FACETS *facets __maybe_unused, BUFFER *json_array, FACET_ROW_KEY_VALUE *rkv, FACET_ROW *row, void *data __maybe_unused) { + FACET_ROW_KEY_VALUE *syslog_identifier_rkv = dictionary_get(row->dict, "SYSLOG_IDENTIFIER"); + FACET_ROW_KEY_VALUE *pid_rkv = dictionary_get(row->dict, "_PID"); + + const char *identifier = syslog_identifier_rkv ? buffer_tostring(syslog_identifier_rkv->wb) : "UNKNOWN"; + const char *pid = pid_rkv ? buffer_tostring(pid_rkv->wb) : "UNKNOWN"; + + buffer_flush(rkv->wb); + buffer_sprintf(rkv->wb, "%s[%s]", identifier, pid); + + buffer_json_add_array_item_string(json_array, buffer_tostring(rkv->wb)); +} + +static void function_systemd_journal(const char *transaction, char *function, char *line_buffer __maybe_unused, int line_max __maybe_unused, int timeout __maybe_unused) { + char *words[SYSTEMD_JOURNAL_MAX_PARAMS] = { NULL }; + size_t num_words = quoted_strings_splitter_pluginsd(function, words, SYSTEMD_JOURNAL_MAX_PARAMS); + + BUFFER *wb = buffer_create(0, NULL); + buffer_flush(wb); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_NEWLINE_ON_ARRAY_ITEMS); + + FACETS *facets = facets_create(50, 0, FACETS_OPTION_ALL_KEYS_FTS, + SYSTEMD_ALWAYS_VISIBLE_KEYS, + SYSTEMD_KEYS_INCLUDED_IN_FACETS, + SYSTEMD_KEYS_EXCLUDED_FROM_FACETS); + + facets_accepted_param(facets, JOURNAL_PARAMETER_AFTER); + facets_accepted_param(facets, JOURNAL_PARAMETER_BEFORE); + facets_accepted_param(facets, JOURNAL_PARAMETER_ANCHOR); + facets_accepted_param(facets, JOURNAL_PARAMETER_LAST); + facets_accepted_param(facets, JOURNAL_PARAMETER_QUERY); + + // register the fields in the order you want them on the dashboard + + facets_register_dynamic_key(facets, "ND_JOURNAL_PROCESS", FACET_KEY_OPTION_NO_FACET|FACET_KEY_OPTION_VISIBLE|FACET_KEY_OPTION_FTS, + systemd_journal_dynamic_row_id, NULL); + + facets_register_key(facets, "MESSAGE", + FACET_KEY_OPTION_NO_FACET|FACET_KEY_OPTION_MAIN_TEXT|FACET_KEY_OPTION_VISIBLE|FACET_KEY_OPTION_FTS); + + facets_register_key_transformation(facets, "PRIORITY", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS, + systemd_journal_transform_priority, NULL); + + facets_register_key_transformation(facets, "SYSLOG_FACILITY", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS, + systemd_journal_transform_syslog_facility, NULL); + + facets_register_key(facets, "SYSLOG_IDENTIFIER", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS); + facets_register_key(facets, "UNIT", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS); + facets_register_key(facets, "USER_UNIT", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS); + + facets_register_key_transformation(facets, "_UID", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS, + systemd_journal_transform_uid, uids); + + facets_register_key_transformation(facets, "_GID", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS, + systemd_journal_transform_gid, gids); + + time_t after_s = 0, before_s = 0; + usec_t anchor = 0; + size_t last = 0; + const char *query = NULL; + + buffer_json_member_add_object(wb, "request"); + buffer_json_member_add_object(wb, "filters"); + + for(int i = 1; i < SYSTEMD_JOURNAL_MAX_PARAMS ;i++) { + const char *keyword = get_word(words, num_words, i); + if(!keyword) break; + + if(strcmp(keyword, JOURNAL_PARAMETER_HELP) == 0) { + systemd_journal_function_help(transaction); + goto cleanup; + } + else if(strncmp(keyword, JOURNAL_PARAMETER_AFTER ":", strlen(JOURNAL_PARAMETER_AFTER ":")) == 0) { + after_s = str2l(&keyword[strlen(JOURNAL_PARAMETER_AFTER ":")]); + } + else if(strncmp(keyword, JOURNAL_PARAMETER_BEFORE ":", strlen(JOURNAL_PARAMETER_BEFORE ":")) == 0) { + before_s = str2l(&keyword[strlen(JOURNAL_PARAMETER_BEFORE ":")]); + } + else if(strncmp(keyword, JOURNAL_PARAMETER_ANCHOR ":", strlen(JOURNAL_PARAMETER_ANCHOR ":")) == 0) { + anchor = str2ull(&keyword[strlen(JOURNAL_PARAMETER_ANCHOR ":")], NULL); + } + else if(strncmp(keyword, JOURNAL_PARAMETER_LAST ":", strlen(JOURNAL_PARAMETER_LAST ":")) == 0) { + last = str2ul(&keyword[strlen(JOURNAL_PARAMETER_LAST ":")]); + } + else if(strncmp(keyword, JOURNAL_PARAMETER_QUERY ":", strlen(JOURNAL_PARAMETER_QUERY ":")) == 0) { + query= &keyword[strlen(JOURNAL_PARAMETER_QUERY ":")]; + } + else { + char *value = strchr(keyword, ':'); + if(value) { + *value++ = '\0'; + + buffer_json_member_add_array(wb, keyword); + + while(value) { + char *sep = strchr(value, ','); + if(sep) + *sep++ = '\0'; + + facets_register_facet_filter(facets, keyword, value, FACET_KEY_OPTION_REORDER); + buffer_json_add_array_item_string(wb, value); + + value = sep; + } + + buffer_json_array_close(wb); // keyword + } + } + } + + buffer_json_object_close(wb); // filters + + time_t expires = now_realtime_sec() + 1; + time_t now_s; + + if(!after_s && !before_s) { + now_s = now_realtime_sec(); + before_s = now_s; + after_s = before_s - SYSTEMD_JOURNAL_DEFAULT_QUERY_DURATION; + } + else + rrdr_relative_window_to_absolute(&after_s, &before_s, &now_s, false); + + if(after_s > before_s) { + time_t tmp = after_s; + after_s = before_s; + before_s = tmp; + } + + if(after_s == before_s) + after_s = before_s - SYSTEMD_JOURNAL_DEFAULT_QUERY_DURATION; + + if(!last) + last = SYSTEMD_JOURNAL_DEFAULT_ITEMS_PER_QUERY; + + buffer_json_member_add_time_t(wb, "after", after_s); + buffer_json_member_add_time_t(wb, "before", before_s); + buffer_json_member_add_uint64(wb, "anchor", anchor); + buffer_json_member_add_uint64(wb, "last", last); + buffer_json_member_add_string(wb, "query", query); + buffer_json_member_add_time_t(wb, "timeout", timeout); + buffer_json_object_close(wb); // request + + facets_set_items(facets, last); + facets_set_anchor(facets, anchor); + facets_set_query(facets, query); + int response = systemd_journal_query(wb, facets, after_s * USEC_PER_SEC, before_s * USEC_PER_SEC, + now_monotonic_usec() + (timeout - 1) * USEC_PER_SEC); + + if(response != HTTP_RESP_OK) { + pluginsd_function_json_error(transaction, response, "failed"); + goto cleanup; + } + + pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "application/json", expires); + fwrite(buffer_tostring(wb), buffer_strlen(wb), 1, stdout); + + pluginsd_function_result_end_to_stdout(); + +cleanup: + facets_destroy(facets); + buffer_free(wb); +} + +static void *reader_main(void *arg __maybe_unused) { + char buffer[PLUGINSD_LINE_MAX + 1]; + + char *s = NULL; + while(!plugin_should_exit && (s = fgets(buffer, PLUGINSD_LINE_MAX, stdin))) { + + char *words[PLUGINSD_MAX_WORDS] = { NULL }; + size_t num_words = quoted_strings_splitter_pluginsd(buffer, words, PLUGINSD_MAX_WORDS); + + const char *keyword = get_word(words, num_words, 0); + + if(keyword && strcmp(keyword, PLUGINSD_KEYWORD_FUNCTION) == 0) { + char *transaction = get_word(words, num_words, 1); + char *timeout_s = get_word(words, num_words, 2); + char *function = get_word(words, num_words, 3); + + if(!transaction || !*transaction || !timeout_s || !*timeout_s || !function || !*function) { + netdata_log_error("Received incomplete %s (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.", + keyword, + transaction?transaction:"(unset)", + timeout_s?timeout_s:"(unset)", + function?function:"(unset)"); + } + else { + int timeout = str2i(timeout_s); + if(timeout <= 0) timeout = SYSTEMD_JOURNAL_DEFAULT_TIMEOUT; + + netdata_mutex_lock(&mutex); + + if(strncmp(function, SYSTEMD_JOURNAL_FUNCTION_NAME, strlen(SYSTEMD_JOURNAL_FUNCTION_NAME)) == 0) + function_systemd_journal(transaction, function, buffer, PLUGINSD_LINE_MAX + 1, timeout); + else + pluginsd_function_json_error(transaction, HTTP_RESP_NOT_FOUND, "No function with this name found in systemd-journal.plugin."); + + fflush(stdout); + netdata_mutex_unlock(&mutex); + } + } + else + netdata_log_error("Received unknown command: %s", keyword?keyword:"(unset)"); + } + + if(!s || feof(stdin) || ferror(stdin)) { + plugin_should_exit = true; + netdata_log_error("Received error on stdin."); + } + + exit(1); +} + +int main(int argc __maybe_unused, char **argv __maybe_unused) { + stderror = stderr; + clocks_init(); + + program_name = "systemd-journal.plugin"; + + // disable syslog + error_log_syslog = 0; + + // set errors flood protection to 100 logs per hour + error_log_errors_per_period = 100; + error_log_throttle_period = 3600; + + uids = dictionary_create(0); + gids = dictionary_create(0); + + // ------------------------------------------------------------------------ + // debug + + if(argc == 2 && strcmp(argv[1], "debug") == 0) { + char buf[] = "systemd-journal after:-86400 before:0 last:500"; + function_systemd_journal("123", buf, "", 0, 30); + exit(1); + } + + // ------------------------------------------------------------------------ + + netdata_thread_t reader_thread; + netdata_thread_create(&reader_thread, "SDJ_READER", NETDATA_THREAD_OPTION_DONT_LOG, reader_main, NULL); + + // ------------------------------------------------------------------------ + + time_t started_t = now_monotonic_sec(); + + size_t iteration; + usec_t step = 1000 * USEC_PER_MS; + bool tty = isatty(fileno(stderr)) == 1; + + netdata_mutex_lock(&mutex); + fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " \"%s\" %d \"%s\"\n", + SYSTEMD_JOURNAL_FUNCTION_NAME, SYSTEMD_JOURNAL_DEFAULT_TIMEOUT, SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION); + + heartbeat_t hb; + heartbeat_init(&hb); + for(iteration = 0; 1 ; iteration++) { + netdata_mutex_unlock(&mutex); + heartbeat_next(&hb, step); + netdata_mutex_lock(&mutex); + + if(!tty) + fprintf(stdout, "\n"); + + fflush(stdout); + + time_t now = now_monotonic_sec(); + if(now - started_t > 86400) + break; + } + + dictionary_destroy(uids); + dictionary_destroy(gids); + + exit(0); +} diff --git a/collectors/tc.plugin/metadata.yaml b/collectors/tc.plugin/metadata.yaml index 2fca8d1e..dcd03e47 100644 --- a/collectors/tc.plugin/metadata.yaml +++ b/collectors/tc.plugin/metadata.yaml @@ -1,102 +1,115 @@ -meta: - plugin_name: tc.plugin - module_name: tc.plugin - monitored_instance: - name: tc - link: '' - categories: - - data-collection.networking-stack-and-network-interfaces - icon_filename: 'freeradius.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Examine tc metrics to gain insights into Linux traffic control operations. Study packet flow rates, queue lengths, and drop rates to optimize network traffic flow.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: network device, direction - description: "" - labels: - - name: device - description: TBD - - name: name - description: TBD - - name: family - description: TBD +plugin_name: tc.plugin +modules: + - meta: + plugin_name: tc.plugin + module_name: tc.plugin + monitored_instance: + name: tc QoS classes + link: "https://wiki.linuxfoundation.org/networking/iproute2" + categories: + - data-collection.linux-systems.network-metrics + icon_filename: "netdata.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: "Examine tc metrics to gain insights into Linux traffic control operations. Study packet flow rates, queue lengths, and drop rates to optimize network traffic flow." + method_description: "The plugin uses `tc` command to collect information about Traffic control." + supported_platforms: + include: + - Linux + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs to access command `tc` to get the necessary metrics. To achieve this netdata modifies permission of file `/usr/libexec/netdata/plugins.d/tc-qos-helper.sh`." + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: "netdata.conf" + section_name: "[plugin:tc]" + description: "The main configuration file." + options: + description: "" + folding: + title: "Config option" + enabled: true + list: + - name: script to run to get tc values + description: Path to script `tc-qos-helper.sh` + default_value: "usr/libexec/netdata/plugins.d/tc-qos-helper.s" + required: false + examples: + folding: + enabled: false + title: "Config" + list: + - name: Basic + description: A basic example configuration. + config: | + [plugin:tc] + script to run to get tc values = /usr/libexec/netdata/plugins.d/tc-qos-helper.sh + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: tc.qos - description: Class Usage - unit: "kilobits/s" - chart_type: stacked - dimensions: - - name: a dimension per class - - name: tc.qos_packets - description: Class Packets - unit: "packets/s" - chart_type: stacked - dimensions: - - name: a dimension per class - - name: tc.qos_dropped - description: Class Dropped Packets - unit: "packets/s" - chart_type: stacked - dimensions: - - name: a dimension per class - - name: tc.qos_tokens - description: Class Tokens - unit: "tokens" - chart_type: line - dimensions: - - name: a dimension per class - - name: tc.qos_ctokens - description: Class cTokens - unit: "ctokens" - chart_type: line - dimensions: - - name: a dimension per class + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: network device direction + description: "Metrics related to QoS network device directions. Each direction (in/out) produces its own set of the following metrics." + labels: + - name: device + description: The network interface. + - name: device_name + description: The network interface name + - name: group + description: The device family + metrics: + - name: tc.qos + description: Class Usage + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per class + - name: tc.qos_packets + description: Class Packets + unit: "packets/s" + chart_type: stacked + dimensions: + - name: a dimension per class + - name: tc.qos_dropped + description: Class Dropped Packets + unit: "packets/s" + chart_type: stacked + dimensions: + - name: a dimension per class + - name: tc.qos_tokens + description: Class Tokens + unit: "tokens" + chart_type: line + dimensions: + - name: a dimension per class + - name: tc.qos_ctokens + description: Class cTokens + unit: "ctokens" + chart_type: line + dimensions: + - name: a dimension per class diff --git a/collectors/tc.plugin/metrics.csv b/collectors/tc.plugin/metrics.csv deleted file mode 100644 index b8e15649..00000000 --- a/collectors/tc.plugin/metrics.csv +++ /dev/null @@ -1,6 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -tc.qos,"network device, direction",a dimension per class,kilobits/s,"Class Usage",stacked,"device, name, family",tc.plugin, -tc.qos_packets,"network device, direction",a dimension per class,packets/s,"Class Packets",stacked,"device, name, family",tc.plugin, -tc.qos_dropped,"network device, direction",a dimension per class,packets/s,"Class Dropped Packets",stacked,"device, name, family",tc.plugin, -tc.qos_tokens,"network device, direction",a dimension per class,tokens,"Class Tokens",line,"device, name, family",tc.plugin, -tc.qos_ctokens,"network device, direction",a dimension per class,ctokens,"Class cTokens",line,"device, name, family",tc.plugin,
\ No newline at end of file diff --git a/collectors/timex.plugin/metadata.yaml b/collectors/timex.plugin/metadata.yaml index 27a54575..2b43d8a2 100644 --- a/collectors/timex.plugin/metadata.yaml +++ b/collectors/timex.plugin/metadata.yaml @@ -1,90 +1,112 @@ -meta: - plugin_name: timex.plugin - module_name: timex.plugin - monitored_instance: - name: Timex - link: '' - categories: - - data-collection.system-clock-and-ntp - icon_filename: 'syslog.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Examine Timex metrics to gain insights into system clock operations. Study time sync status, clock drift, and adjustments to ensure accurate system timekeeping.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: -- name: system_clock_sync_state - link: https://github.com/netdata/netdata/blob/master/health/health.d/timex.conf - metric: system.clock_sync_state - info: when set to 0, the system kernel believes the system clock is not properly synchronized to a reliable server - os: "linux" -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: timex.plugin +modules: + - meta: + plugin_name: timex.plugin + module_name: timex.plugin + monitored_instance: + name: Timex + link: "" + categories: + - data-collection.system-clock-and-ntp + icon_filename: "syslog.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: "Examine Timex metrics to gain insights into system clock operations. Study time sync status, clock drift, and adjustments to ensure accurate system timekeeping." + method_description: "It uses system call adjtimex on Linux and ntp_adjtime on FreeBSD or Mac to monitor the system kernel clock synchronization state." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: "netdata.conf" + section_name: "[plugin:timex]" + description: "The netdata main configuration file." + options: + description: "At least one option ('clock synchronization state', 'time offset') needs to be enabled for this collector to run." + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 1 + required: false + - name: clock synchronization state + description: Make chart showing system clock synchronization state. + default_value: yes + required: true + - name: time offset + description: Make chart showing computed time offset between local system and reference clock + default_value: yes + required: true + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + description: A basic configuration example. + config: | + [plugin:timex] + update every = 1 + clock synchronization state = yes + time offset = yes + troubleshooting: + problems: + list: [] + alerts: + - name: system_clock_sync_state + link: https://github.com/netdata/netdata/blob/master/health/health.d/timex.conf + metric: system.clock_sync_state + info: when set to 0, the system kernel believes the system clock is not properly synchronized to a reliable server + os: "linux" metrics: - - name: system.clock_sync_state - description: System Clock Synchronization State - unit: "state" - chart_type: line - dimensions: - - name: state - - name: system.clock_status - description: System Clock Status - unit: "status" - chart_type: line - dimensions: - - name: unsync - - name: clockerr - - name: system.clock_sync_offset - description: Computed Time Offset Between Local System and Reference Clock - unit: "milliseconds" - chart_type: line - dimensions: - - name: offset + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: system.clock_sync_state + description: System Clock Synchronization State + unit: "state" + chart_type: line + dimensions: + - name: state + - name: system.clock_status + description: System Clock Status + unit: "status" + chart_type: line + dimensions: + - name: unsync + - name: clockerr + - name: system.clock_sync_offset + description: Computed Time Offset Between Local System and Reference Clock + unit: "milliseconds" + chart_type: line + dimensions: + - name: offset diff --git a/collectors/timex.plugin/metrics.csv b/collectors/timex.plugin/metrics.csv deleted file mode 100644 index c7e59cca..00000000 --- a/collectors/timex.plugin/metrics.csv +++ /dev/null @@ -1,4 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -system.clock_sync_state,,state,state,"System Clock Synchronization State",line,,timex.plugin, -system.clock_status,,"unsync, clockerr",status,"System Clock Status",line,,timex.plugin, -system.clock_sync_offset,,offset,milliseconds,"Computed Time Offset Between Local System and Reference Clock",line,,timex.plugin,
\ No newline at end of file diff --git a/collectors/xenstat.plugin/metadata.yaml b/collectors/xenstat.plugin/metadata.yaml index 610435a3..49318369 100644 --- a/collectors/xenstat.plugin/metadata.yaml +++ b/collectors/xenstat.plugin/metadata.yaml @@ -1,181 +1,195 @@ -meta: - plugin_name: xenstat.plugin - module_name: xenstat.plugin - monitored_instance: - name: Xen/XCP-ng - link: '' - categories: - - data-collection.containers-and-vms - icon_filename: 'xen.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor Xen/XCP-ng with Netdata for streamlined virtual machine performance and resource management. Optimize virtualization operations with real-time insights, built-in alerts, and anomaly advisor.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: xenstat.mem - description: Memory Usage - unit: "MiB" - chart_type: stacked - dimensions: - - name: free - - name: used - - name: xenstat.domains - description: Number of Domains - unit: "domains" - chart_type: line - dimensions: - - name: domains - - name: xenstat.cpus - description: Number of CPUs - unit: "cpus" - chart_type: line - dimensions: - - name: cpus - - name: xenstat.cpu_freq - description: CPU Frequency - unit: "MHz" - chart_type: line - dimensions: - - name: frequency - - name: xendomain - description: "" - labels: [] +plugin_name: xenstat.plugin +modules: + - meta: + plugin_name: xenstat.plugin + module_name: xenstat.plugin + monitored_instance: + name: Xen/XCP-ng + link: "https://xenproject.org/" + categories: + - data-collection.containers-and-vms + icon_filename: "xen.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: "This collector monitors XenServer and XCP-ng host and domains statistics." + method_description: "" + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "The plugin needs setuid." + default_behavior: + auto_detection: + description: "This plugin requires the `xen-dom0-libs-devel` and `yajl-devel` libraries to be installed." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Libraries + description: | + 1. Install `xen-dom0-libs-devel` and `yajl-devel` using the package manager of your system. + + Note: On Cent-OS systems you will need `centos-release-xen` repository and the required package for xen is `xen-devel` + + 2. Re-install Netdata from source. The installer will detect that the required libraries are now available and will also build xenstat.plugin. + configuration: + file: + name: "netdata.conf" + section_name: "[plugin:xenstat]" + description: "The netdata main configuration file." + options: + description: "" + folding: + title: "Config options" + enabled: true + list: + - name: update every + description: Data collection frequency. + default_value: 1 + required: false + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: xendomain.states - description: Domain States - unit: "boolean" - chart_type: line - dimensions: - - name: running - - name: blocked - - name: paused - - name: shutdown - - name: crashed - - name: dying - - name: xendomain.cpu - description: CPU Usage (100% = 1 core) - unit: "percentage" - chart_type: line - dimensions: - - name: used - - name: xendomain.mem - description: Memory Reservation - unit: "MiB" - chart_type: line - dimensions: - - name: maximum - - name: current - - name: xendomain.vcpu - description: CPU Usage per VCPU - unit: "percentage" - chart_type: line - dimensions: - - name: a dimension per vcpu - - name: xendomain, vbd - description: "" - labels: [] - metrics: - - name: xendomain.oo_req_vbd - description: VBD{%u} Out Of Requests - unit: "requests/s" - chart_type: line - dimensions: - - name: requests - - name: xendomain.requests_vbd - description: VBD{%u} Requests - unit: "requests/s" - chart_type: line - dimensions: - - name: read - - name: write - - name: xendomain.sectors_vbd - description: VBD{%u} Read/Written Sectors - unit: "sectors/s" - chart_type: line - dimensions: - - name: read - - name: write - - name: xendomain, network - description: "" - labels: [] - metrics: - - name: xendomain.bytes_network - description: Network{%u} Received/Sent Bytes - unit: "kilobits/s" - chart_type: line - dimensions: - - name: received - - name: sent - - name: xendomain.packets_network - description: Network{%u} Received/Sent Packets - unit: "packets/s" - chart_type: line - dimensions: - - name: received - - name: sent - - name: xendomain.errors_network - description: Network{%u} Receive/Transmit Errors - unit: "errors/s" - chart_type: line - dimensions: - - name: received - - name: sent - - name: xendomain.drops_network - description: Network{%u} Receive/Transmit Drops - unit: "drops/s" - chart_type: line - dimensions: - - name: received - - name: sent + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: xenstat.mem + description: Memory Usage + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: xenstat.domains + description: Number of Domains + unit: "domains" + chart_type: line + dimensions: + - name: domains + - name: xenstat.cpus + description: Number of CPUs + unit: "cpus" + chart_type: line + dimensions: + - name: cpus + - name: xenstat.cpu_freq + description: CPU Frequency + unit: "MHz" + chart_type: line + dimensions: + - name: frequency + - name: xendomain + description: "Metrics related to Xen domains. Each domain provides its own set of the following metrics." + labels: [] + metrics: + - name: xendomain.states + description: Domain States + unit: "boolean" + chart_type: line + dimensions: + - name: running + - name: blocked + - name: paused + - name: shutdown + - name: crashed + - name: dying + - name: xendomain.cpu + description: CPU Usage (100% = 1 core) + unit: "percentage" + chart_type: line + dimensions: + - name: used + - name: xendomain.mem + description: Memory Reservation + unit: "MiB" + chart_type: line + dimensions: + - name: maximum + - name: current + - name: xendomain.vcpu + description: CPU Usage per VCPU + unit: "percentage" + chart_type: line + dimensions: + - name: a dimension per vcpu + - name: xendomain vbd + description: "Metrics related to Xen domain Virtual Block Device. Each VBD provides its own set of the following metrics." + labels: [] + metrics: + - name: xendomain.oo_req_vbd + description: VBD{%u} Out Of Requests + unit: "requests/s" + chart_type: line + dimensions: + - name: requests + - name: xendomain.requests_vbd + description: VBD{%u} Requests + unit: "requests/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: xendomain.sectors_vbd + description: VBD{%u} Read/Written Sectors + unit: "sectors/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: xendomain network + description: "Metrics related to Xen domain network interfaces. Each network interface provides its own set of the following metrics." + labels: [] + metrics: + - name: xendomain.bytes_network + description: Network{%u} Received/Sent Bytes + unit: "kilobits/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: xendomain.packets_network + description: Network{%u} Received/Sent Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: xendomain.errors_network + description: Network{%u} Receive/Transmit Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: xendomain.drops_network + description: Network{%u} Receive/Transmit Drops + unit: "drops/s" + chart_type: line + dimensions: + - name: received + - name: sent diff --git a/collectors/xenstat.plugin/metrics.csv b/collectors/xenstat.plugin/metrics.csv deleted file mode 100644 index 2256ddf1..00000000 --- a/collectors/xenstat.plugin/metrics.csv +++ /dev/null @@ -1,16 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -xenstat.mem,,"free, used",MiB,"Memory Usage",stacked,,xenstat.plugin, -xenstat.domains,,domains,domains,"Number of Domains",line,,xenstat.plugin, -xenstat.cpus,,cpus,cpus,"Number of CPUs",line,,xenstat.plugin, -xenstat.cpu_freq,,frequency,MHz,"CPU Frequency",line,,xenstat.plugin, -xendomain.states,xendomain,"running, blocked, paused, shutdown, crashed, dying",boolean,"Domain States",line,,xenstat.plugin, -xendomain.cpu,xendomain,used,percentage,"CPU Usage (100% = 1 core)",line,,xenstat.plugin, -xendomain.mem,xendomain,"maximum, current",MiB,"Memory Reservation",line,,xenstat.plugin, -xendomain.vcpu,xendomain,a dimension per vcpu,percentage,"CPU Usage per VCPU",line,,xenstat.plugin, -xendomain.oo_req_vbd,"xendomain, vbd",requests,requests/s,"VBD{%u} Out Of Requests",line,,xenstat.plugin, -xendomain.requests_vbd,"xendomain, vbd","read, write",requests/s,"VBD{%u} Requests",line,,xenstat.plugin, -xendomain.sectors_vbd,"xendomain, vbd","read, write",sectors/s,"VBD{%u} Read/Written Sectors",line,,xenstat.plugin, -xendomain.bytes_network,"xendomain, network","received, sent",kilobits/s,"Network{%u} Received/Sent Bytes",line,,xenstat.plugin, -xendomain.packets_network,"xendomain, network","received, sent",packets/s,"Network{%u} Received/Sent Packets",line,,xenstat.plugin, -xendomain.errors_network,"xendomain, network","received, sent",errors/s,"Network{%u} Receive/Transmit Errors",line,,xenstat.plugin, -xendomain.drops_network,"xendomain, network","received, sent",drops/s,"Network{%u} Receive/Transmit Drops",line,,xenstat.plugin,
\ No newline at end of file |