diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:19:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:20:02 +0000 |
commit | 58daab21cd043e1dc37024a7f99b396788372918 (patch) | |
tree | 96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /collectors | |
parent | Releasing debian version 1.43.2-1. (diff) | |
download | netdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz netdata-58daab21cd043e1dc37024a7f99b396788372918.zip |
Merging upstream version 1.44.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors')
310 files changed, 23573 insertions, 9694 deletions
diff --git a/collectors/COLLECTORS.md b/collectors/COLLECTORS.md index ea0776abc..9a3499593 100644 --- a/collectors/COLLECTORS.md +++ b/collectors/COLLECTORS.md @@ -221,7 +221,7 @@ If you don't see the app/service you'd like to monitor in this list: - [Virtual Machines](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/integrations/virtual_machines.md) -- [Xen/XCP-ng](https://github.com/netdata/netdata/blob/master/collectors/xenstat.plugin/integrations/xen-xcp-ng.md) +- [Xen XCP-ng](https://github.com/netdata/netdata/blob/master/collectors/xenstat.plugin/integrations/xen_xcp-ng.md) - [cAdvisor](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/cadvisor.md) @@ -377,7 +377,7 @@ If you don't see the app/service you'd like to monitor in this list: - [FreeBSD NFS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/freebsd_nfs.md) -- [FreeBSD RCTL/RACCT](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/freebsd_rctl-racct.md) +- [FreeBSD RCTL-RACCT](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/freebsd_rctl-racct.md) - [dev.cpu.0.freq](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/dev.cpu.0.freq.md) @@ -1109,8 +1109,6 @@ If you don't see the app/service you'd like to monitor in this list: - [Eaton UPS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/eaton_ups.md) -- [Network UPS Tools (NUT)](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/nut/integrations/network_ups_tools_nut.md) - - [UPS (NUT)](https://github.com/netdata/go.d.plugin/blob/master/modules/upsd/integrations/ups_nut.md) ### VPNs diff --git a/collectors/Makefile.am b/collectors/Makefile.am index d477e5b80..1bbb2e0ef 100644 --- a/collectors/Makefile.am +++ b/collectors/Makefile.am @@ -15,6 +15,7 @@ SUBDIRS = \ freebsd.plugin \ freeipmi.plugin \ idlejitter.plugin \ + log2journal \ macos.plugin \ nfacct.plugin \ xenstat.plugin \ diff --git a/collectors/all.h b/collectors/all.h index ec4ac00eb..38241dfa9 100644 --- a/collectors/all.h +++ b/collectors/all.h @@ -401,6 +401,11 @@ #define NETDATA_CHART_PRIO_STATSD_PRIVATE 90000 // many charts +// Logs Management + +#define NETDATA_CHART_PRIO_LOGS_BASE 95000 // many charts +#define NETDATA_CHART_PRIO_LOGS_STATS_BASE 160000 // logsmanagement stats in "Netdata Monitoring" + // PCI #define NETDATA_CHART_PRIO_PCI_AER 100000 diff --git a/collectors/apps.plugin/apps_groups.conf b/collectors/apps.plugin/apps_groups.conf index 9e9d83436..195536a0a 100644 --- a/collectors/apps.plugin/apps_groups.conf +++ b/collectors/apps.plugin/apps_groups.conf @@ -91,6 +91,7 @@ go.d.plugin: *go.d.plugin* slabinfo.plugin: *slabinfo.plugin* ebpf.plugin: *ebpf.plugin* debugfs.plugin: *debugfs.plugin* +logs-management.plugin: *logs-management.plugin* # agent-service-discovery agent_sd: agent_sd @@ -381,6 +382,7 @@ rabbitmq: *rabbitmq* sidekiq: *sidekiq* java: java ipfs: ipfs +erlang: beam.smp node: node factorio: factorio diff --git a/collectors/apps.plugin/apps_plugin.c b/collectors/apps.plugin/apps_plugin.c index 152038968..5bcda84f4 100644 --- a/collectors/apps.plugin/apps_plugin.c +++ b/collectors/apps.plugin/apps_plugin.c @@ -3750,7 +3750,7 @@ static void send_collected_data_to_netdata(struct target *root, const char *type struct target *w; for (w = root; w ; w = w->next) { - if (unlikely(!w->exposed && !w->is_other)) + if (unlikely(!w->exposed)) continue; send_BEGIN(type, w->clean_name, "processes", dt); @@ -3806,16 +3806,30 @@ static void send_collected_data_to_netdata(struct target *root, const char *type #endif #ifndef __FreeBSD__ - send_BEGIN(type, w->clean_name, "uptime", dt); - send_SET("uptime", (global_uptime > w->starttime) ? (global_uptime - w->starttime) : 0); - send_END(); + if (w->processes == 0) { + send_BEGIN(type, w->clean_name, "uptime", dt); + send_SET("uptime", 0); + send_END(); - if (enable_detailed_uptime_charts) { - send_BEGIN(type, w->clean_name, "uptime_summary", dt); - send_SET("min", w->uptime_min); - send_SET("avg", w->processes > 0 ? w->uptime_sum / w->processes : 0); - send_SET("max", w->uptime_max); + if (enable_detailed_uptime_charts) { + send_BEGIN(type, w->clean_name, "uptime_summary", dt); + send_SET("min", 0); + send_SET("avg", 0); + send_SET("max", 0); + send_END(); + } + } else { + send_BEGIN(type, w->clean_name, "uptime", dt); + send_SET("uptime", (global_uptime > w->starttime) ? (global_uptime - w->starttime) : 0); send_END(); + + if (enable_detailed_uptime_charts) { + send_BEGIN(type, w->clean_name, "uptime_summary", dt); + send_SET("min", w->uptime_min); + send_SET("avg", w->processes > 0 ? w->uptime_sum / w->processes : 0); + send_SET("max", w->uptime_max); + send_END(); + } } #endif @@ -3860,7 +3874,7 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type if (debug_enabled) { for (w = root; w; w = w->next) { - if (unlikely(w->debug_enabled && !w->target && w->processes)) { + if (unlikely(!w->target && w->processes)) { struct pid_on_target *pid_on_target; fprintf(stderr, "apps.plugin: target '%s' has aggregated %u process(es):", w->name, w->processes); for (pid_on_target = w->root_pid; pid_on_target; pid_on_target = pid_on_target->next) { @@ -3878,7 +3892,7 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type w->exposed = 1; fprintf(stdout, "CHART %s.%s_cpu_utilization '' '%s CPU utilization (100%% = 1 core)' 'percentage' cpu %s.cpu_utilization stacked 20001 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION user '' absolute 1 %llu\n", time_factor * RATES_DETAIL / 100LLU); fprintf(stdout, "DIMENSION system '' absolute 1 %llu\n", time_factor * RATES_DETAIL / 100LLU); @@ -3886,84 +3900,84 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type #ifndef __FreeBSD__ if (enable_guest_charts) { fprintf(stdout, "CHART %s.%s_cpu_guest_utilization '' '%s CPU guest utlization (100%% = 1 core)' 'percentage' cpu %s.cpu_guest_utilization line 20005 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION guest '' absolute 1 %llu\n", time_factor * RATES_DETAIL / 100LLU); } fprintf(stdout, "CHART %s.%s_cpu_context_switches '' '%s CPU context switches' 'switches/s' cpu %s.cpu_context_switches stacked 20010 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION voluntary '' absolute 1 %llu\n", RATES_DETAIL); fprintf(stdout, "DIMENSION involuntary '' absolute 1 %llu\n", RATES_DETAIL); fprintf(stdout, "CHART %s.%s_mem_private_usage '' '%s memory usage without shared' 'MiB' mem %s.mem_private_usage area 20050 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION mem '' absolute %ld %ld\n", 1L, 1024L); #endif fprintf(stdout, "CHART %s.%s_mem_usage '' '%s memory RSS usage' 'MiB' mem %s.mem_usage area 20055 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION rss '' absolute %ld %ld\n", 1L, 1024L); fprintf(stdout, "CHART %s.%s_mem_page_faults '' '%s memory page faults' 'pgfaults/s' mem %s.mem_page_faults stacked 20060 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION major '' absolute 1 %llu\n", RATES_DETAIL); fprintf(stdout, "DIMENSION minor '' absolute 1 %llu\n", RATES_DETAIL); fprintf(stdout, "CHART %s.%s_vmem_usage '' '%s virtual memory size' 'MiB' mem %s.vmem_usage line 20065 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION vmem '' absolute %ld %ld\n", 1L, 1024L); #ifndef __FreeBSD__ fprintf(stdout, "CHART %s.%s_swap_usage '' '%s swap usage' 'MiB' mem %s.swap_usage area 20065 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION swap '' absolute %ld %ld\n", 1L, 1024L); #endif #ifndef __FreeBSD__ fprintf(stdout, "CHART %s.%s_disk_physical_io '' '%s disk physical IO' 'KiB/s' disk %s.disk_physical_io area 20100 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION reads '' absolute 1 %llu\n", 1024LLU * RATES_DETAIL); fprintf(stdout, "DIMENSION writes '' absolute -1 %llu\n", 1024LLU * RATES_DETAIL); fprintf(stdout, "CHART %s.%s_disk_logical_io '' '%s disk logical IO' 'KiB/s' disk %s.disk_logical_io area 20105 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION reads '' absolute 1 %llu\n", 1024LLU * RATES_DETAIL); fprintf(stdout, "DIMENSION writes '' absolute -1 %llu\n", 1024LLU * RATES_DETAIL); #else fprintf(stdout, "CHART %s.%s_disk_physical_io '' '%s disk physical IO' 'blocks/s' disk %s.disk_physical_block_io area 20100 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION reads '' absolute 1 %llu\n", RATES_DETAIL); fprintf(stdout, "DIMENSION writes '' absolute -1 %llu\n", RATES_DETAIL); #endif fprintf(stdout, "CHART %s.%s_processes '' '%s processes' 'processes' processes %s.processes line 20150 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION processes '' absolute 1 1\n"); fprintf(stdout, "CHART %s.%s_threads '' '%s threads' 'threads' processes %s.threads line 20155 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION threads '' absolute 1 1\n"); if (enable_file_charts) { fprintf(stdout, "CHART %s.%s_fds_open_limit '' '%s open file descriptors limit' '%%' fds %s.fds_open_limit line 20200 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION limit '' absolute 1 100\n"); fprintf(stdout, "CHART %s.%s_fds_open '' '%s open files descriptors' 'fds' fds %s.fds_open stacked 20210 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION files '' absolute 1 1\n"); fprintf(stdout, "DIMENSION sockets '' absolute 1 1\n"); @@ -3978,13 +3992,13 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type #ifndef __FreeBSD__ fprintf(stdout, "CHART %s.%s_uptime '' '%s uptime' 'seconds' uptime %s.uptime line 20250 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION uptime '' absolute 1 1\n"); if (enable_detailed_uptime_charts) { fprintf(stdout, "CHART %s.%s_uptime_summary '' '%s uptime summary' 'seconds' uptime %s.uptime_summary area 20255 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 0\n", lbl_name, w->name); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION min '' absolute 1 1\n"); fprintf(stdout, "DIMENSION avg '' absolute 1 1\n"); @@ -5234,25 +5248,11 @@ static void function_processes(const char *transaction, char *function __maybe_u static bool apps_plugin_exit = false; int main(int argc, char **argv) { - // debug_flags = D_PROCFILE; - stderror = stderr; - clocks_init(); + nd_log_initialize_for_external_plugins("apps.plugin"); pagesize = (size_t)sysconf(_SC_PAGESIZE); - // set the name for logging - program_name = "apps.plugin"; - - // disable syslog for apps.plugin - error_log_syslog = 0; - - // set errors flood protection to 100 logs per hour - error_log_errors_per_period = 100; - error_log_throttle_period = 3600; - - log_set_global_severity_for_external_plugins(); - bool send_resource_usage = true; { const char *s = getenv("NETDATA_INTERNALS_MONITORING"); @@ -5264,7 +5264,7 @@ int main(int argc, char **argv) { procfile_open_flags = O_RDONLY|O_NOFOLLOW; netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); - if(verify_netdata_host_prefix() == -1) exit(1); + if(verify_netdata_host_prefix(true) == -1) exit(1); user_config_dir = getenv("NETDATA_USER_CONFIG_DIR"); if(user_config_dir == NULL) { diff --git a/collectors/apps.plugin/integrations/applications.md b/collectors/apps.plugin/integrations/applications.md index f4bbc8733..e5219fcc2 100644 --- a/collectors/apps.plugin/integrations/applications.md +++ b/collectors/apps.plugin/integrations/applications.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/apps.plugi sidebar_label: "Applications" learn_status: "Published" learn_rel_path: "Data Collection/Processes and System Services" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -66,22 +67,22 @@ Metrics: | Metric | Dimensions | Unit | |:------|:----------|:----| -| app_group.cpu_utilization | user, system | percentage | -| app_group.cpu_guest_utilization | guest | percentage | -| app_group.cpu_context_switches | voluntary, involuntary | switches/s | -| app_group.mem_usage | rss | MiB | -| app_group.mem_private_usage | mem | MiB | -| app_group.vmem_usage | vmem | MiB | -| app_group.mem_page_faults | minor, major | pgfaults/s | -| app_group.swap_usage | swap | MiB | -| app_group.disk_physical_io | reads, writes | KiB/s | -| app_group.disk_logical_io | reads, writes | KiB/s | -| app_group.processes | processes | processes | -| app_group.threads | threads | threads | -| app_group.fds_open_limit | limit | percentage | -| app_group.fds_open | files, sockets, pipes, inotifies, event, timer, signal, eventpolls, other | fds | -| app_group.uptime | uptime | seconds | -| app_group.uptime_summary | min, avg, max | seconds | +| app.cpu_utilization | user, system | percentage | +| app.cpu_guest_utilization | guest | percentage | +| app.cpu_context_switches | voluntary, involuntary | switches/s | +| app.mem_usage | rss | MiB | +| app.mem_private_usage | mem | MiB | +| app.vmem_usage | vmem | MiB | +| app.mem_page_faults | minor, major | pgfaults/s | +| app.swap_usage | swap | MiB | +| app.disk_physical_io | reads, writes | KiB/s | +| app.disk_logical_io | reads, writes | KiB/s | +| app.processes | processes | processes | +| app.threads | threads | threads | +| app.fds_open_limit | limit | percentage | +| app.fds_open | files, sockets, pipes, inotifies, event, timer, signal, eventpolls, other | fds | +| app.uptime | uptime | seconds | +| app.uptime_summary | min, avg, max | seconds | diff --git a/collectors/apps.plugin/integrations/user_groups.md b/collectors/apps.plugin/integrations/user_groups.md index 6f56d7be6..4ccbfc95f 100644 --- a/collectors/apps.plugin/integrations/user_groups.md +++ b/collectors/apps.plugin/integrations/user_groups.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/apps.plugi sidebar_label: "User Groups" learn_status: "Published" learn_rel_path: "Data Collection/Processes and System Services" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/apps.plugin/integrations/users.md b/collectors/apps.plugin/integrations/users.md index f325f05f6..c151fd8a2 100644 --- a/collectors/apps.plugin/integrations/users.md +++ b/collectors/apps.plugin/integrations/users.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/apps.plugi sidebar_label: "Users" learn_status: "Published" learn_rel_path: "Data Collection/Processes and System Services" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/apps.plugin/metadata.yaml b/collectors/apps.plugin/metadata.yaml index f24160ba7..f5f22853a 100644 --- a/collectors/apps.plugin/metadata.yaml +++ b/collectors/apps.plugin/metadata.yaml @@ -73,90 +73,90 @@ modules: - name: app_group description: The name of the group defined in the configuration. metrics: - - name: app_group.cpu_utilization + - name: app.cpu_utilization description: Apps CPU utilization (100% = 1 core) unit: percentage chart_type: stacked dimensions: - name: user - name: system - - name: app_group.cpu_guest_utilization + - name: app.cpu_guest_utilization description: Apps CPU guest utilization (100% = 1 core) unit: percentage chart_type: line dimensions: - name: guest - - name: app_group.cpu_context_switches + - name: app.cpu_context_switches description: Apps CPU context switches unit: switches/s chart_type: stacked dimensions: - name: voluntary - name: involuntary - - name: app_group.mem_usage + - name: app.mem_usage description: Apps memory RSS usage unit: MiB chart_type: line dimensions: - name: rss - - name: app_group.mem_private_usage + - name: app.mem_private_usage description: Apps memory usage without shared unit: MiB chart_type: stacked dimensions: - name: mem - - name: app_group.vmem_usage + - name: app.vmem_usage description: Apps virtual memory size unit: MiB chart_type: line dimensions: - name: vmem - - name: app_group.mem_page_faults + - name: app.mem_page_faults description: Apps memory page faults unit: pgfaults/s chart_type: stacked dimensions: - name: minor - name: major - - name: app_group.swap_usage + - name: app.swap_usage description: Apps swap usage unit: MiB chart_type: area dimensions: - name: swap - - name: app_group.disk_physical_io + - name: app.disk_physical_io description: Apps disk physical IO unit: KiB/s chart_type: area dimensions: - name: reads - name: writes - - name: app_group.disk_logical_io + - name: app.disk_logical_io description: Apps disk logical IO unit: KiB/s chart_type: area dimensions: - name: reads - name: writes - - name: app_group.processes + - name: app.processes description: Apps processes unit: processes chart_type: line dimensions: - name: processes - - name: app_group.threads + - name: app.threads description: Apps threads unit: threads chart_type: line dimensions: - name: threads - - name: app_group.fds_open_limit + - name: app.fds_open_limit description: Apps open file descriptors limit unit: percentage chart_type: line dimensions: - name: limit - - name: app_group.fds_open + - name: app.fds_open description: Apps open file descriptors unit: fds chart_type: stacked @@ -170,13 +170,13 @@ modules: - name: signal - name: eventpolls - name: other - - name: app_group.uptime + - name: app.uptime description: Apps uptime unit: seconds chart_type: line dimensions: - name: uptime - - name: app_group.uptime_summary + - name: app.uptime_summary description: Apps uptime summary unit: seconds chart_type: area diff --git a/collectors/cgroups.plugin/Makefile.am b/collectors/cgroups.plugin/Makefile.am index 354b9fbdc..0f6062420 100644 --- a/collectors/cgroups.plugin/Makefile.am +++ b/collectors/cgroups.plugin/Makefile.am @@ -3,11 +3,21 @@ AUTOMAKE_OPTIONS = subdir-objects MAINTAINERCLEANFILES = $(srcdir)/Makefile.in +CLEANFILES = \ + cgroup-name.sh \ + cgroup-network-helper.sh \ + $(NULL) + +include $(top_srcdir)/build/subst.inc +SUFFIXES = .in + dist_plugins_SCRIPTS = \ cgroup-name.sh \ cgroup-network-helper.sh \ $(NULL) dist_noinst_DATA = \ + cgroup-name.sh.in \ + cgroup-network-helper.sh.in \ README.md \ $(NULL) diff --git a/collectors/cgroups.plugin/cgroup-charts.c b/collectors/cgroups.plugin/cgroup-charts.c new file mode 100644 index 000000000..a89e8ac45 --- /dev/null +++ b/collectors/cgroups.plugin/cgroup-charts.c @@ -0,0 +1,1526 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "cgroup-internals.h" + +void update_cpu_utilization_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_cpu; + + if (unlikely(!cg->st_cpu)) { + char *title; + char *context; + int prio; + + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services CPU utilization (100%% = 1 core)"; + context = "systemd.service.cpu.utilization"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD; + } else { + title = k8s_is_kubepod(cg) ? "CPU Usage (100%% = 1000 mCPU)" : "CPU Usage (100%% = 1 core)"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu" : "cgroup.cpu"; + prio = cgroup_containers_chart_priority; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_cpu = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "cpu", + NULL, + "cpu", + context, + title, + "percentage", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_STACKED); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + + if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + cg->st_cpu_rd_user = rrddim_add(chart, "user", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); + cg->st_cpu_rd_system = rrddim_add(chart, "system", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); + } else { + cg->st_cpu_rd_user = rrddim_add(chart, "user", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); + cg->st_cpu_rd_system = rrddim_add(chart, "system", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); + } + } + + rrddim_set_by_pointer(chart, cg->st_cpu_rd_user, (collected_number)cg->cpuacct_stat.user); + rrddim_set_by_pointer(chart, cg->st_cpu_rd_system, (collected_number)cg->cpuacct_stat.system); + rrdset_done(chart); +} + +void update_cpu_utilization_limit_chart(struct cgroup *cg, NETDATA_DOUBLE cpu_limit) { + if (is_cgroup_systemd_service(cg)) + return; + + RRDSET *chart = cg->st_cpu_limit; + + if (unlikely(!cg->st_cpu_limit)) { + char *title = "CPU Usage within the limits"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_limit" : "cgroup.cpu_limit"; + int prio = cgroup_containers_chart_priority - 1; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_cpu_limit = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "cpu_limit", + NULL, + "cpu", + context, + title, + "percentage", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + + if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) + rrddim_add(chart, "used", NULL, 1, system_hz, RRD_ALGORITHM_ABSOLUTE); + else + rrddim_add(chart, "used", NULL, 1, 1000000, RRD_ALGORITHM_ABSOLUTE); + cg->prev_cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; + } + + NETDATA_DOUBLE cpu_usage = 0; + cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; + NETDATA_DOUBLE cpu_used = 100 * (cpu_usage - cg->prev_cpu_usage) / (cpu_limit * cgroup_update_every); + + rrdset_isnot_obsolete___safe_from_collector_thread(chart); + + rrddim_set(chart, "used", (cpu_used > 0) ? (collected_number)cpu_used : 0); + + cg->prev_cpu_usage = cpu_usage; + + rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, cpu_limit); + rrdset_done(chart); +} + +void update_cpu_throttled_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + RRDSET *chart = cg->st_cpu_nr_throttled; + + if (unlikely(!cg->st_cpu_nr_throttled)) { + char *title = "CPU Throttled Runnable Periods"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.throttled" : "cgroup.throttled"; + int prio = cgroup_containers_chart_priority + 10; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_cpu_nr_throttled = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "throttled", + NULL, + "cpu", + context, + title, + "percentage", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + rrddim_add(chart, "throttled", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(chart, "throttled", (collected_number)cg->cpuacct_cpu_throttling.nr_throttled_perc); + rrdset_done(chart); +} + +void update_cpu_throttled_duration_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + RRDSET *chart = cg->st_cpu_throttled_time; + + if (unlikely(!cg->st_cpu_throttled_time)) { + char *title = "CPU Throttled Time Duration"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.throttled_duration" : "cgroup.throttled_duration"; + int prio = cgroup_containers_chart_priority + 15; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_cpu_throttled_time = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "throttled_duration", + NULL, + "cpu", + context, + title, + "ms", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + rrddim_add(chart, "duration", NULL, 1, 1000000, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(chart, "duration", (collected_number)cg->cpuacct_cpu_throttling.throttled_time); + rrdset_done(chart); +} + +void update_cpu_shares_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + RRDSET *chart = cg->st_cpu_shares; + + if (unlikely(!cg->st_cpu_shares)) { + char *title = "CPU Time Relative Share"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_shares" : "cgroup.cpu_shares"; + int prio = cgroup_containers_chart_priority + 20; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_cpu_shares = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "cpu_shares", + NULL, + "cpu", + context, + title, + "shares", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + rrddim_add(chart, "shares", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(chart, "shares", (collected_number)cg->cpuacct_cpu_shares.shares); + rrdset_done(chart); +} + +void update_cpu_per_core_usage_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + char id[RRD_ID_LENGTH_MAX + 1]; + unsigned int i; + + if (unlikely(!cg->st_cpu_per_core)) { + char *title = k8s_is_kubepod(cg) ? "CPU Usage (100%% = 1000 mCPU) Per Core" : "CPU Usage (100%% = 1 core) Per Core"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_per_core" : "cgroup.cpu_per_core"; + int prio = cgroup_containers_chart_priority + 100; + + char buff[RRD_ID_LENGTH_MAX + 1]; + cg->st_cpu_per_core = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "cpu_per_core", + NULL, + "cpu", + context, + title, + "percentage", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_STACKED); + + rrdset_update_rrdlabels(cg->st_cpu_per_core, cg->chart_labels); + + for (i = 0; i < cg->cpuacct_usage.cpus; i++) { + snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i); + rrddim_add(cg->st_cpu_per_core, id, NULL, 100, 1000000000, RRD_ALGORITHM_INCREMENTAL); + } + } + + for (i = 0; i < cg->cpuacct_usage.cpus; i++) { + snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i); + rrddim_set(cg->st_cpu_per_core, id, (collected_number)cg->cpuacct_usage.cpu_percpu[i]); + } + rrdset_done(cg->st_cpu_per_core); +} + +void update_mem_usage_detailed_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_mem; + + if (unlikely(!cg->st_mem)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Memory"; + context = "systemd.service.memory.ram.usage"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 15; + } else { + title = "Memory Usage"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem" : "cgroup.mem"; + prio = cgroup_containers_chart_priority + 220; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + + chart = cg->st_mem = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "mem", + NULL, + "mem", + context, + title, + "MiB", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_STACKED); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + + if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_add(chart, "cache", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(chart, "rss", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + if (cg->memory.detailed_has_swap) + rrddim_add(chart, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_add(chart, "rss_huge", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(chart, "mapped_file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } else { + rrddim_add(chart, "anon", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(chart, "kernel_stack", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(chart, "slab", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(chart, "sock", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(chart, "anon_thp", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(chart, "file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + } + + if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_set(chart, "cache", (collected_number)cg->memory.total_cache); + collected_number rss = (collected_number)(cg->memory.total_rss - cg->memory.total_rss_huge); + if (rss < 0) + rss = 0; + rrddim_set(chart, "rss", rss); + if (cg->memory.detailed_has_swap) + rrddim_set(chart, "swap", (collected_number)cg->memory.total_swap); + rrddim_set(chart, "rss_huge", (collected_number)cg->memory.total_rss_huge); + rrddim_set(chart, "mapped_file", (collected_number)cg->memory.total_mapped_file); + } else { + rrddim_set(chart, "anon", (collected_number)cg->memory.anon); + rrddim_set(chart, "kernel_stack", (collected_number)cg->memory.kernel_stack); + rrddim_set(chart, "slab", (collected_number)cg->memory.slab); + rrddim_set(chart, "sock", (collected_number)cg->memory.sock); + rrddim_set(chart, "anon_thp", (collected_number)cg->memory.anon_thp); + rrddim_set(chart, "file", (collected_number)cg->memory.total_mapped_file); + } + rrdset_done(chart); +} + +void update_mem_writeback_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_writeback; + + if (unlikely(!cg->st_writeback)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Writeback Memory"; + context = "systemd.service.memory.writeback"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 20; + } else { + title = "Writeback Memory"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.writeback" : "cgroup.writeback"; + prio = cgroup_containers_chart_priority + 300; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_writeback = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "writeback", + NULL, + "mem", + context, + title, + "MiB", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_AREA); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + if (cg->memory.detailed_has_dirty) + rrddim_add(chart, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(chart, "writeback", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + if (cg->memory.detailed_has_dirty) + rrddim_set(chart, "dirty", (collected_number)cg->memory.total_dirty); + rrddim_set(chart, "writeback", (collected_number)cg->memory.total_writeback); + rrdset_done(chart); +} + +void update_mem_activity_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_mem_activity; + + if (unlikely(!cg->st_mem_activity)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Memory Paging IO"; + context = "systemd.service.memory.paging.io"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 30; + } else { + title = "Memory Activity"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_activity" : "cgroup.mem_activity"; + prio = cgroup_containers_chart_priority + 400; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_mem_activity = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "mem_activity", + NULL, + "mem", + context, + title, + "MiB/s", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + // FIXME: systemd just in, out + rrddim_add(chart, "pgpgin", "in", system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(chart, "pgpgout", "out", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(chart, "pgpgin", (collected_number)cg->memory.total_pgpgin); + rrddim_set(chart, "pgpgout", (collected_number)cg->memory.total_pgpgout); + rrdset_done(chart); +} + +void update_mem_pgfaults_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_pgfaults; + + if (unlikely(!cg->st_pgfaults)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Memory Page Faults"; + context = "systemd.service.memory.paging.faults"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 25; + } else { + title = "Memory Page Faults"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.pgfaults" : "cgroup.pgfaults"; + prio = cgroup_containers_chart_priority + 500; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_pgfaults = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "pgfaults", + NULL, + "mem", + context, + title, + "MiB/s", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + rrddim_add(chart, "pgfault", NULL, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(chart, "pgmajfault", "swap", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(chart, "pgfault", (collected_number)cg->memory.total_pgfault); + rrddim_set(chart, "pgmajfault", (collected_number)cg->memory.total_pgmajfault); + rrdset_done(chart); +} + +void update_mem_usage_limit_chart(struct cgroup *cg, unsigned long long memory_limit) { + if (is_cgroup_systemd_service(cg)) + return; + + RRDSET *chart = cg->st_mem_usage_limit; + + if (unlikely(!cg->st_mem_usage_limit)) { + char *title = "Used RAM within the limits"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage_limit" : "cgroup.mem_usage_limit"; + int prio = cgroup_containers_chart_priority + 200; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_mem_usage_limit = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "mem_usage_limit", + NULL, + "mem", + context, + title, + "MiB", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_STACKED); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + + rrddim_add(chart, "available", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(chart, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrdset_isnot_obsolete___safe_from_collector_thread(chart); + + rrddim_set(chart, "available", (collected_number)(memory_limit - cg->memory.usage_in_bytes)); + rrddim_set(chart, "used", (collected_number)cg->memory.usage_in_bytes); + rrdset_done(chart); +} + +void update_mem_utilization_chart(struct cgroup *cg, unsigned long long memory_limit) { + if (is_cgroup_systemd_service(cg)) + return; + + RRDSET *chart = cg->st_mem_utilization; + + if (unlikely(!cg->st_mem_utilization)) { + char *title = "Memory Utilization"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_utilization" : "cgroup.mem_utilization"; + int prio = cgroup_containers_chart_priority + 199; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_mem_utilization = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "mem_utilization", + NULL, + "mem", + context, + title, + "percentage", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_AREA); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + + rrddim_add(chart, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrdset_isnot_obsolete___safe_from_collector_thread(chart); + collected_number util = (collected_number)(cg->memory.usage_in_bytes * 100 / memory_limit); + rrddim_set(chart, "utilization", util); + rrdset_done(chart); +} + +void update_mem_failcnt_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_mem_failcnt; + + if (unlikely(!cg->st_mem_failcnt)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Memory Limit Failures"; + context = "systemd.service.memory.failcnt"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 10; + } else { + title = "Memory Limit Failures"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_failcnt" : "cgroup.mem_failcnt"; + prio = cgroup_containers_chart_priority + 250; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_mem_failcnt = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "mem_failcnt", + NULL, + "mem", + context, + title, + "count", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + rrddim_add(chart, "failures", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(chart, "failures", (collected_number)cg->memory.failcnt); + rrdset_done(chart); +} + +void update_mem_usage_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_mem_usage; + + if (unlikely(!cg->st_mem_usage)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Used Memory"; + context = "systemd.service.memory.usage"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 5; + } else { + title = "Used Memory"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage" : "cgroup.mem_usage"; + prio = cgroup_containers_chart_priority + 210; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_mem_usage = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "mem_usage", + NULL, + "mem", + context, + title, + "MiB", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_STACKED); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + + cg->st_mem_rd_ram = rrddim_add(chart, "ram", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + cg->st_mem_rd_swap = rrddim_add(chart, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(chart, cg->st_mem_rd_ram, (collected_number)cg->memory.usage_in_bytes); + + if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_set_by_pointer( + chart, + cg->st_mem_rd_swap, + cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ? + (collected_number)(cg->memory.msw_usage_in_bytes - + (cg->memory.usage_in_bytes + cg->memory.total_inactive_file)) : + 0); + } else { + rrddim_set_by_pointer(chart, cg->st_mem_rd_swap, (collected_number)cg->memory.msw_usage_in_bytes); + } + + rrdset_done(chart); +} + +void update_io_serviced_bytes_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_io; + + if (unlikely(!cg->st_io)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Disk Read/Write Bandwidth"; + context = "systemd.service.disk.io"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 35; + } else { + title = "I/O Bandwidth (all disks)"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.io" : "cgroup.io"; + prio = cgroup_containers_chart_priority + 1200; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_io = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "io", + NULL, + "disk", + context, + title, + "KiB/s", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_AREA); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + cg->st_io_rd_read = rrddim_add(chart, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + cg->st_io_rd_written = rrddim_add(cg->st_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(chart, cg->st_io_rd_read, (collected_number)cg->io_service_bytes.Read); + rrddim_set_by_pointer(chart, cg->st_io_rd_written, (collected_number)cg->io_service_bytes.Write); + rrdset_done(chart); +} + +void update_io_serviced_ops_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_serviced_ops; + + if (unlikely(!cg->st_serviced_ops)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Disk Read/Write Operations"; + context = "systemd.service.disk.iops"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 40; + } else { + title = "Serviced I/O Operations (all disks)"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.serviced_ops" : "cgroup.serviced_ops"; + prio = cgroup_containers_chart_priority + 1200; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_serviced_ops = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "serviced_ops", + NULL, + "disk", + context, + title, + "operations/s", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + rrddim_add(chart, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(chart, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(chart, "read", (collected_number)cg->io_serviced.Read); + rrddim_set(chart, "write", (collected_number)cg->io_serviced.Write); + rrdset_done(chart); +} + +void update_throttle_io_serviced_bytes_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_throttle_io; + + if (unlikely(!cg->st_throttle_io)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Throttle Disk Read/Write Bandwidth"; + context = "systemd.service.disk.throttle.io"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 45; + } else { + title = "Throttle I/O Bandwidth (all disks)"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_io" : "cgroup.throttle_io"; + prio = cgroup_containers_chart_priority + 1200; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_throttle_io = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "throttle_io", + NULL, + "disk", + context, + title, + "KiB/s", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_AREA); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + + cg->st_throttle_io_rd_read = rrddim_add(chart, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + cg->st_throttle_io_rd_written = rrddim_add(chart, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(chart, cg->st_throttle_io_rd_read, (collected_number)cg->throttle_io_service_bytes.Read); + rrddim_set_by_pointer(chart, cg->st_throttle_io_rd_written, (collected_number)cg->throttle_io_service_bytes.Write); + rrdset_done(chart); +} + +void update_throttle_io_serviced_ops_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_throttle_serviced_ops; + + if (unlikely(!cg->st_throttle_serviced_ops)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Throttle Disk Read/Write Operations"; + context = "systemd.service.disk.throttle.iops"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 50; + } else { + title = "Throttle Serviced I/O Operations (all disks)"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_serviced_ops" : "cgroup.throttle_serviced_ops"; + prio = cgroup_containers_chart_priority + 1200; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_throttle_serviced_ops = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "throttle_serviced_ops", + NULL, + "disk", + context, + title, + "operations/s", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + rrddim_add(chart, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(chart, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(chart, "read", (collected_number)cg->throttle_io_serviced.Read); + rrddim_set(chart, "write", (collected_number)cg->throttle_io_serviced.Write); + rrdset_done(chart); +} + +void update_io_queued_ops_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_queued_ops; + + if (unlikely(!cg->st_queued_ops)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Queued Disk Read/Write Operations"; + context = "systemd.service.disk.queued_iops"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 55; + } else { + title = "Queued I/O Operations (all disks)"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.queued_ops" : "cgroup.queued_ops"; + prio = cgroup_containers_chart_priority + 2000; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_queued_ops = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "queued_ops", + NULL, + "disk", + context, + title, + "operations", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + rrddim_add(chart, "read", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(chart, "write", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(chart, "read", (collected_number)cg->io_queued.Read); + rrddim_set(chart, "write", (collected_number)cg->io_queued.Write); + rrdset_done(chart); +} + +void update_io_merged_ops_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_merged_ops; + + if (unlikely(!cg->st_merged_ops)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Merged Disk Read/Write Operations"; + context = "systemd.service.disk.merged_iops"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 60; + } else { + title = "Merged I/O Operations (all disks)"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.merged_ops" : "cgroup.merged_ops"; + prio = cgroup_containers_chart_priority + 2100; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_merged_ops = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "merged_ops", + NULL, + "disk", + context, + title, + "operations/s", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + rrddim_add(chart, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(chart, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(chart, "read", (collected_number)cg->io_merged.Read); + rrddim_set(chart, "write", (collected_number)cg->io_merged.Write); + rrdset_done(chart); +} + +void update_cpu_some_pressure_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->cpu_pressure; + struct pressure_charts *pcs = &res->some; + RRDSET *chart = pcs->share_time.st; + + if (unlikely(!pcs->share_time.st)) { + char *title = "CPU some pressure"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure" : "cgroup.cpu_some_pressure"; + int prio = cgroup_containers_chart_priority + 2200; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "cpu_some_pressure", + NULL, + "cpu", + context, + title, + "percentage", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); + rrdset_done(chart); +} + +void update_cpu_some_pressure_stall_time_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->cpu_pressure; + struct pressure_charts *pcs = &res->some; + RRDSET *chart = pcs->total_time.st; + + if (unlikely(!pcs->total_time.st)) { + char *title = "CPU some pressure stall time"; + char *context = + k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure_stall_time" : "cgroup.cpu_some_pressure_stall_time"; + int prio = cgroup_containers_chart_priority + 2220; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "cpu_some_pressure_stall_time", + NULL, + "cpu", + context, + title, + "ms", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); + rrdset_done(chart); +} + +void update_cpu_full_pressure_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->cpu_pressure; + struct pressure_charts *pcs = &res->full; + RRDSET *chart = pcs->share_time.st; + + if (unlikely(!pcs->share_time.st)) { + char *title = "CPU full pressure"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure" : "cgroup.cpu_full_pressure"; + int prio = cgroup_containers_chart_priority + 2240; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "cpu_full_pressure", + NULL, + "cpu", + context, + title, + "percentage", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); + rrdset_done(chart); +} + +void update_cpu_full_pressure_stall_time_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->cpu_pressure; + struct pressure_charts *pcs = &res->full; + RRDSET *chart = pcs->total_time.st; + + if (unlikely(!pcs->total_time.st)) { + char *title = "CPU full pressure stall time"; + char *context = + k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure_stall_time" : "cgroup.cpu_full_pressure_stall_time"; + int prio = cgroup_containers_chart_priority + 2260; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "cpu_full_pressure_stall_time", + NULL, + "cpu", + context, + title, + "ms", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); + rrdset_done(chart); +} + +void update_mem_some_pressure_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->memory_pressure; + struct pressure_charts *pcs = &res->some; + RRDSET *chart = pcs->share_time.st; + + if (unlikely(!pcs->share_time.st)) { + char *title = "Memory some pressure"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure" : "cgroup.memory_some_pressure"; + int prio = cgroup_containers_chart_priority + 2300; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "mem_some_pressure", + NULL, + "mem", + context, + title, + "percentage", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); + rrdset_done(chart); +} + +void update_mem_some_pressure_stall_time_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->memory_pressure; + struct pressure_charts *pcs = &res->some; + RRDSET *chart = pcs->total_time.st; + + if (unlikely(!pcs->total_time.st)) { + char *title = "Memory some pressure stall time"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure_stall_time" : + "cgroup.memory_some_pressure_stall_time"; + int prio = cgroup_containers_chart_priority + 2320; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "memory_some_pressure_stall_time", + NULL, + "mem", + context, + title, + "ms", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); + rrdset_done(chart); +} + +void update_mem_full_pressure_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->memory_pressure; + struct pressure_charts *pcs = &res->full; + RRDSET *chart = pcs->share_time.st; + + if (unlikely(!pcs->share_time.st)) { + char *title = "Memory full pressure"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure" : "cgroup.memory_full_pressure"; + int prio = cgroup_containers_chart_priority + 2340; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "mem_full_pressure", + NULL, + "mem", + context, + title, + "percentage", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); + rrdset_done(chart); +} + +void update_mem_full_pressure_stall_time_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->memory_pressure; + struct pressure_charts *pcs = &res->full; + RRDSET *chart = pcs->total_time.st; + + if (unlikely(!pcs->total_time.st)) { + char *title = "Memory full pressure stall time"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure_stall_time" : + "cgroup.memory_full_pressure_stall_time"; + int prio = cgroup_containers_chart_priority + 2360; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "memory_full_pressure_stall_time", + NULL, + "mem", + context, + title, + "ms", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); + rrdset_done(chart); +} + +void update_irq_some_pressure_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->irq_pressure; + struct pressure_charts *pcs = &res->some; + RRDSET *chart = pcs->share_time.st; + + if (unlikely(!pcs->share_time.st)) { + char *title = "IRQ some pressure"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure" : "cgroup.irq_some_pressure"; + int prio = cgroup_containers_chart_priority + 2310; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "irq_some_pressure", + NULL, + "interrupts", + context, + title, + "percentage", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); + rrdset_done(chart); +} + +void update_irq_some_pressure_stall_time_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->irq_pressure; + struct pressure_charts *pcs = &res->some; + RRDSET *chart = pcs->total_time.st; + + if (unlikely(!pcs->total_time.st)) { + char *title = "IRQ some pressure stall time"; + char *context = + k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure_stall_time" : "cgroup.irq_some_pressure_stall_time"; + int prio = cgroup_containers_chart_priority + 2330; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "irq_some_pressure_stall_time", + NULL, + "interrupts", + context, + title, + "ms", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); + rrdset_done(chart); +} + +void update_irq_full_pressure_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->irq_pressure; + struct pressure_charts *pcs = &res->full; + RRDSET *chart = pcs->share_time.st; + + if (unlikely(!pcs->share_time.st)) { + char *title = "IRQ full pressure"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure" : "cgroup.irq_full_pressure"; + int prio = cgroup_containers_chart_priority + 2350; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "irq_full_pressure", + NULL, + "interrupts", + context, + title, + "percentage", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); + rrdset_done(chart); +} + +void update_irq_full_pressure_stall_time_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->irq_pressure; + struct pressure_charts *pcs = &res->full; + RRDSET *chart = pcs->total_time.st; + + if (unlikely(!pcs->total_time.st)) { + char *title = "IRQ full pressure stall time"; + char *context = + k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure_stall_time" : "cgroup.irq_full_pressure_stall_time"; + int prio = cgroup_containers_chart_priority + 2370; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "irq_full_pressure_stall_time", + NULL, + "interrupts", + context, + title, + "ms", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); + rrdset_done(chart); +} + +void update_io_some_pressure_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->io_pressure; + struct pressure_charts *pcs = &res->some; + RRDSET *chart = pcs->share_time.st; + + if (unlikely(!pcs->share_time.st)) { + char *title = "I/O some pressure"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure" : "cgroup.io_some_pressure"; + int prio = cgroup_containers_chart_priority + 2400; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "io_some_pressure", + NULL, + "disk", + context, + title, + "percentage", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); + rrdset_done(chart); +} + +void update_io_some_pressure_stall_time_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->io_pressure; + struct pressure_charts *pcs = &res->some; + RRDSET *chart = pcs->total_time.st; + + if (unlikely(!pcs->total_time.st)) { + char *title = "I/O some pressure stall time"; + char *context = + k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure_stall_time" : "cgroup.io_some_pressure_stall_time"; + int prio = cgroup_containers_chart_priority + 2420; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "io_some_pressure_stall_time", + NULL, + "disk", + context, + title, + "ms", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); + rrdset_done(chart); +} + +void update_io_full_pressure_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->io_pressure; + struct pressure_charts *pcs = &res->full; + RRDSET *chart = pcs->share_time.st; + + if (unlikely(!pcs->share_time.st)) { + char *title = "I/O full pressure"; + char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure" : "cgroup.io_full_pressure"; + int prio = cgroup_containers_chart_priority + 2440; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "io_full_pressure", + NULL, + "disk", + context, + title, + "percentage", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); + rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); + rrdset_done(chart); +} + +void update_io_full_pressure_stall_time_chart(struct cgroup *cg) { + if (is_cgroup_systemd_service(cg)) + return; + + struct pressure *res = &cg->io_pressure; + struct pressure_charts *pcs = &res->full; + RRDSET *chart = pcs->total_time.st; + + if (unlikely(!pcs->total_time.st)) { + char *title = "I/O full pressure stall time"; + char *context = + k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure_stall_time" : "cgroup.io_full_pressure_stall_time"; + int prio = cgroup_containers_chart_priority + 2460; + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "io_full_pressure_stall_time", + NULL, + "disk", + context, + title, + "ms", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); + rrdset_done(chart); +} + +void update_pids_current_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_pids; + + if (unlikely(!cg->st_pids)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Number of Processes"; + context = "systemd.service.pids.current"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 70; + } else { + title = "Number of processes"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.pids_current" : "cgroup.pids_current"; + prio = cgroup_containers_chart_priority + 2150; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_pids = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "pids_current", + NULL, + "pids", + context, + title, + "pids", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + cg->st_pids_rd_pids_current = rrddim_add(chart, "pids", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(chart, cg->st_pids_rd_pids_current, (collected_number)cg->pids.pids_current); + rrdset_done(chart); +} diff --git a/collectors/cgroups.plugin/cgroup-discovery.c b/collectors/cgroups.plugin/cgroup-discovery.c new file mode 100644 index 000000000..ede35ed8a --- /dev/null +++ b/collectors/cgroups.plugin/cgroup-discovery.c @@ -0,0 +1,1245 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "cgroup-internals.h" + +// discovery cgroup thread worker jobs +#define WORKER_DISCOVERY_INIT 0 +#define WORKER_DISCOVERY_FIND 1 +#define WORKER_DISCOVERY_PROCESS 2 +#define WORKER_DISCOVERY_PROCESS_RENAME 3 +#define WORKER_DISCOVERY_PROCESS_NETWORK 4 +#define WORKER_DISCOVERY_PROCESS_FIRST_TIME 5 +#define WORKER_DISCOVERY_UPDATE 6 +#define WORKER_DISCOVERY_CLEANUP 7 +#define WORKER_DISCOVERY_COPY 8 +#define WORKER_DISCOVERY_SHARE 9 +#define WORKER_DISCOVERY_LOCK 10 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 11 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 11 +#endif + +struct cgroup *discovered_cgroup_root = NULL; + +char cgroup_chart_id_prefix[] = "cgroup_"; +char services_chart_id_prefix[] = "systemd_"; +char *cgroups_rename_script = NULL; + + +// ---------------------------------------------------------------------------- + +static inline void free_pressure(struct pressure *res) { + if (res->some.share_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->some.share_time.st); + if (res->some.total_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->some.total_time.st); + if (res->full.share_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->full.share_time.st); + if (res->full.total_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->full.total_time.st); + freez(res->filename); +} + +static inline void cgroup_free_network_interfaces(struct cgroup *cg) { + while(cg->interfaces) { + struct cgroup_network_interface *i = cg->interfaces; + cg->interfaces = i->next; + + // delete the registration of proc_net_dev rename + netdev_rename_device_del(i->host_device); + + freez((void *)i->host_device); + freez((void *)i->container_device); + freez((void *)i); + } +} + +static inline void cgroup_free(struct cgroup *cg) { + netdata_log_debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available"); + + cgroup_netdev_delete(cg); + + if(cg->st_cpu) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu); + if(cg->st_cpu_limit) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_limit); + if(cg->st_cpu_per_core) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_per_core); + if(cg->st_cpu_nr_throttled) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_nr_throttled); + if(cg->st_cpu_throttled_time) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_throttled_time); + if(cg->st_cpu_shares) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_shares); + if(cg->st_mem) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem); + if(cg->st_writeback) rrdset_is_obsolete___safe_from_collector_thread(cg->st_writeback); + if(cg->st_mem_activity) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_activity); + if(cg->st_pgfaults) rrdset_is_obsolete___safe_from_collector_thread(cg->st_pgfaults); + if(cg->st_mem_usage) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_usage); + if(cg->st_mem_usage_limit) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_usage_limit); + if(cg->st_mem_utilization) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_utilization); + if(cg->st_mem_failcnt) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_failcnt); + if(cg->st_io) rrdset_is_obsolete___safe_from_collector_thread(cg->st_io); + if(cg->st_serviced_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_serviced_ops); + if(cg->st_throttle_io) rrdset_is_obsolete___safe_from_collector_thread(cg->st_throttle_io); + if(cg->st_throttle_serviced_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_throttle_serviced_ops); + if(cg->st_queued_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_queued_ops); + if(cg->st_merged_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_merged_ops); + if(cg->st_pids) rrdset_is_obsolete___safe_from_collector_thread(cg->st_pids); + + freez(cg->filename_cpuset_cpus); + freez(cg->filename_cpu_cfs_period); + freez(cg->filename_cpu_cfs_quota); + freez(cg->filename_memory_limit); + freez(cg->filename_memoryswap_limit); + + cgroup_free_network_interfaces(cg); + + freez(cg->cpuacct_usage.cpu_percpu); + + freez(cg->cpuacct_stat.filename); + freez(cg->cpuacct_usage.filename); + freez(cg->cpuacct_cpu_throttling.filename); + freez(cg->cpuacct_cpu_shares.filename); + + arl_free(cg->memory.arl_base); + freez(cg->memory.filename_detailed); + freez(cg->memory.filename_failcnt); + freez(cg->memory.filename_usage_in_bytes); + freez(cg->memory.filename_msw_usage_in_bytes); + + freez(cg->io_service_bytes.filename); + freez(cg->io_serviced.filename); + + freez(cg->throttle_io_service_bytes.filename); + freez(cg->throttle_io_serviced.filename); + + freez(cg->io_merged.filename); + freez(cg->io_queued.filename); + freez(cg->pids.pids_current_filename); + + free_pressure(&cg->cpu_pressure); + free_pressure(&cg->io_pressure); + free_pressure(&cg->memory_pressure); + free_pressure(&cg->irq_pressure); + + freez(cg->id); + freez(cg->intermediate_id); + freez(cg->chart_id); + freez(cg->name); + + rrdlabels_destroy(cg->chart_labels); + + freez(cg); + + cgroup_root_count--; +} + +// ---------------------------------------------------------------------------- +// add/remove/find cgroup objects + +#define CGROUP_CHARTID_LINE_MAX 1024 + +static inline char *cgroup_chart_id_strdupz(const char *s) { + if(!s || !*s) s = "/"; + + if(*s == '/' && s[1] != '\0') s++; + + char *r = strdupz(s); + netdata_fix_chart_id(r); + + return r; +} + +// TODO: move the code to cgroup_chart_id_strdupz() when the renaming script is fixed +static inline void substitute_dots_in_id(char *s) { + // dots are used to distinguish chart type and id in streaming, so we should replace them + for (char *d = s; *d; d++) { + if (*d == '.') + *d = '-'; + } +} + +// ---------------------------------------------------------------------------- +// parse k8s labels + +char *cgroup_parse_resolved_name_and_labels(RRDLABELS *labels, char *data) { + // the first word, up to the first space is the name + char *name = strsep_skip_consecutive_separators(&data, " "); + + // the rest are key=value pairs separated by comma + while(data) { + char *pair = strsep_skip_consecutive_separators(&data, ","); + rrdlabels_add_pair(labels, pair, RRDLABEL_SRC_AUTO | RRDLABEL_SRC_K8S); + } + + return name; +} + +static inline void discovery_rename_cgroup(struct cgroup *cg) { + if (!cg->pending_renames) { + return; + } + cg->pending_renames--; + + netdata_log_debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id); + netdata_log_debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id); + pid_t cgroup_pid; + + FILE *fp_child_input, *fp_child_output; + (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_rename_script, cg->id, cg->intermediate_id); + if (!fp_child_output) { + collector_error("CGROUP: cannot popen(%s \"%s\", \"r\").", cgroups_rename_script, cg->intermediate_id); + cg->pending_renames = 0; + cg->processed = 1; + return; + } + + char buffer[CGROUP_CHARTID_LINE_MAX + 1]; + char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp_child_output); + int exit_code = netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + + switch (exit_code) { + case 0: + cg->pending_renames = 0; + break; + + case 3: + cg->pending_renames = 0; + cg->processed = 1; + break; + + default: + break; + } + + if (cg->pending_renames || cg->processed) + return; + if (!new_name || !*new_name || *new_name == '\n') + return; + if (!(new_name = trim(new_name))) + return; + + if (!cg->chart_labels) + cg->chart_labels = rrdlabels_create(); + // read the new labels and remove the obsolete ones + rrdlabels_unmark_all(cg->chart_labels); + char *name = cgroup_parse_resolved_name_and_labels(cg->chart_labels, new_name); + rrdlabels_remove_all_unmarked(cg->chart_labels); + + freez(cg->name); + cg->name = strdupz(name); + + freez(cg->chart_id); + cg->chart_id = cgroup_chart_id_strdupz(name); + + substitute_dots_in_id(cg->chart_id); + cg->hash_chart_id = simple_hash(cg->chart_id); +} + +static void is_cgroup_procs_exist(netdata_ebpf_cgroup_shm_body_t *out, char *id) { + struct stat buf; + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_cpuset_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_blkio_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_memory_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_devices_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + out->path[0] = '\0'; + out->enabled = 0; +} + +static inline void convert_cgroup_to_systemd_service(struct cgroup *cg) { + char buffer[CGROUP_CHARTID_LINE_MAX + 1]; + cg->options |= CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE; + strncpyz(buffer, cg->id, CGROUP_CHARTID_LINE_MAX); + char *s = buffer; + + // skip to the last slash + size_t len = strlen(s); + while (len--) { + if (unlikely(s[len] == '/')) { + break; + } + } + if (len) { + s = &s[len + 1]; + } + + // remove extension + len = strlen(s); + while (len--) { + if (unlikely(s[len] == '.')) { + break; + } + } + if (len) { + s[len] = '\0'; + } + + freez(cg->name); + cg->name = strdupz(s); + + freez(cg->chart_id); + cg->chart_id = cgroup_chart_id_strdupz(s); + substitute_dots_in_id(cg->chart_id); + cg->hash_chart_id = simple_hash(cg->chart_id); +} + +static inline struct cgroup *discovery_cgroup_add(const char *id) { + netdata_log_debug(D_CGROUP, "adding to list, cgroup with id '%s'", id); + + struct cgroup *cg = callocz(1, sizeof(struct cgroup)); + + cg->id = strdupz(id); + cg->hash = simple_hash(cg->id); + + cg->name = strdupz(id); + + cg->intermediate_id = cgroup_chart_id_strdupz(id); + + cg->chart_id = cgroup_chart_id_strdupz(id); + substitute_dots_in_id(cg->chart_id); + cg->hash_chart_id = simple_hash(cg->chart_id); + + if (cgroup_use_unified_cgroups) { + cg->options |= CGROUP_OPTIONS_IS_UNIFIED; + } + + if (!discovered_cgroup_root) + discovered_cgroup_root = cg; + else { + struct cgroup *t; + for (t = discovered_cgroup_root; t->discovered_next; t = t->discovered_next) { + } + t->discovered_next = cg; + } + + return cg; +} + +static inline struct cgroup *discovery_cgroup_find(const char *id) { + netdata_log_debug(D_CGROUP, "searching for cgroup '%s'", id); + + uint32_t hash = simple_hash(id); + + struct cgroup *cg; + for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { + if(hash == cg->hash && strcmp(id, cg->id) == 0) + break; + } + + netdata_log_debug(D_CGROUP, "cgroup '%s' %s in memory", id, (cg)?"found":"not found"); + return cg; +} + +static int calc_cgroup_depth(const char *id) { + int depth = 0; + const char *s; + for (s = id; *s; s++) { + depth += unlikely(*s == '/'); + } + return depth; +} + +static inline void discovery_find_cgroup_in_dir_callback(const char *dir) { + if (!dir || !*dir) { + dir = "/"; + } + + netdata_log_debug(D_CGROUP, "examining cgroup dir '%s'", dir); + + struct cgroup *cg = discovery_cgroup_find(dir); + if (cg) { + cg->available = 1; + return; + } + + if (cgroup_root_count >= cgroup_root_max) { + nd_log_limit_static_global_var(erl, 3600, 0); + nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_WARNING, "CGROUP: maximum number of cgroups reached (%d). No more cgroups will be added.", cgroup_root_count); + return; + } + + if (cgroup_max_depth > 0) { + int depth = calc_cgroup_depth(dir); + if (depth > cgroup_max_depth) { + nd_log_collector(NDLP_DEBUG, "CGROUP: '%s' is too deep (%d, while max is %d)", dir, depth, cgroup_max_depth); + return; + } + } + + cg = discovery_cgroup_add(dir); + cg->available = 1; + cg->first_time_seen = 1; + cg->function_ready = false; + cgroup_root_count++; +} + +static inline int discovery_find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) { + if(!this) this = base; + netdata_log_debug(D_CGROUP, "searching for directories in '%s' (base '%s')", this?this:"", base); + + size_t dirlen = strlen(this), baselen = strlen(base); + + int ret = -1; + int enabled = -1; + + const char *relative_path = &this[baselen]; + if(!*relative_path) relative_path = "/"; + + DIR *dir = opendir(this); + if(!dir) { + collector_error("CGROUP: cannot read directory '%s'", base); + return ret; + } + ret = 1; + + callback(relative_path); + + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type == DT_DIR + && ( + (de->d_name[0] == '.' && de->d_name[1] == '\0') + || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') + )) + continue; + + if(de->d_type == DT_DIR) { + if(enabled == -1) { + const char *r = relative_path; + if(*r == '\0') r = "/"; + + // do not decent in directories we are not interested + enabled = matches_search_cgroup_paths(r); + } + + if(enabled) { + char *s = mallocz(dirlen + strlen(de->d_name) + 2); + strcpy(s, this); + strcat(s, "/"); + strcat(s, de->d_name); + int ret2 = discovery_find_dir_in_subdirs(base, s, callback); + if(ret2 > 0) ret += ret2; + freez(s); + } + } + } + + closedir(dir); + return ret; +} + +static inline void discovery_mark_as_unavailable_all_cgroups() { + for (struct cgroup *cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { + cg->available = 0; + } +} + +static inline void discovery_update_filenames_cgroup_v1(struct cgroup *cg) { + char filename[FILENAME_MAX + 1]; + struct stat buf; + + // CPU + if (unlikely(cgroup_enable_cpuacct_stat && !cg->cpuacct_stat.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.stat", cgroup_cpuacct_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpuacct_stat.filename = strdupz(filename); + cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat; + snprintfz(filename, FILENAME_MAX, "%s%s/cpuset.cpus", cgroup_cpuset_base, cg->id); + cg->filename_cpuset_cpus = strdupz(filename); + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_period_us", cgroup_cpuacct_base, cg->id); + cg->filename_cpu_cfs_period = strdupz(filename); + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_quota_us", cgroup_cpuacct_base, cg->id); + cg->filename_cpu_cfs_quota = strdupz(filename); + } + } + // FIXME: remove usage_percpu + if (unlikely(cgroup_enable_cpuacct_usage && !cg->cpuacct_usage.filename && !is_cgroup_systemd_service(cg))) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.usage_percpu", cgroup_cpuacct_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpuacct_usage.filename = strdupz(filename); + cg->cpuacct_usage.enabled = cgroup_enable_cpuacct_usage; + } + } + if (unlikely( + cgroup_enable_cpuacct_cpu_throttling && !cg->cpuacct_cpu_throttling.filename && + !is_cgroup_systemd_service(cg))) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_cpuacct_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpuacct_cpu_throttling.filename = strdupz(filename); + cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling; + } + } + if (unlikely( + cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename && !is_cgroup_systemd_service(cg))) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.shares", cgroup_cpuacct_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpuacct_cpu_shares.filename = strdupz(filename); + cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; + } + } + + // Memory + if (unlikely( + (cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && + (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_memory_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->memory.filename_detailed = strdupz(filename); + cg->memory.enabled_detailed = + (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_AUTO; + } + } + if (unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.usage_in_bytes", cgroup_memory_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->memory.filename_usage_in_bytes = strdupz(filename); + cg->memory.enabled_usage_in_bytes = cgroup_enable_memory; + snprintfz(filename, FILENAME_MAX, "%s%s/memory.limit_in_bytes", cgroup_memory_base, cg->id); + cg->filename_memory_limit = strdupz(filename); + } + } + if (unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.usage_in_bytes", cgroup_memory_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->memory.filename_msw_usage_in_bytes = strdupz(filename); + cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap; + snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.limit_in_bytes", cgroup_memory_base, cg->id); + cg->filename_memoryswap_limit = strdupz(filename); + } + } + if (unlikely(cgroup_enable_memory_failcnt && !cg->memory.filename_failcnt)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.failcnt", cgroup_memory_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->memory.filename_failcnt = strdupz(filename); + cg->memory.enabled_failcnt = cgroup_enable_memory_failcnt; + } + } + + // Blkio + if (unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->io_service_bytes.filename = strdupz(filename); + cg->io_service_bytes.enabled = cgroup_enable_blkio_io; + } else { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_service_bytes.filename = strdupz(filename); + cg->io_service_bytes.enabled = cgroup_enable_blkio_io; + } + } + } + if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->io_serviced.filename = strdupz(filename); + cg->io_serviced.enabled = cgroup_enable_blkio_ops; + } else { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_serviced.filename = strdupz(filename); + cg->io_serviced.enabled = cgroup_enable_blkio_ops; + } + } + } + if (unlikely(cgroup_enable_blkio_throttle_io && !cg->throttle_io_service_bytes.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->throttle_io_service_bytes.filename = strdupz(filename); + cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io; + } else { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->throttle_io_service_bytes.filename = strdupz(filename); + cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io; + } + } + } + if (unlikely(cgroup_enable_blkio_throttle_ops && !cg->throttle_io_serviced.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->throttle_io_serviced.filename = strdupz(filename); + cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops; + } else { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->throttle_io_serviced.filename = strdupz(filename); + cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops; + } + } + } + if (unlikely(cgroup_enable_blkio_merged_ops && !cg->io_merged.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->io_merged.filename = strdupz(filename); + cg->io_merged.enabled = cgroup_enable_blkio_merged_ops; + } else { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_merged.filename = strdupz(filename); + cg->io_merged.enabled = cgroup_enable_blkio_merged_ops; + } + } + } + if (unlikely(cgroup_enable_blkio_queued_ops && !cg->io_queued.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->io_queued.filename = strdupz(filename); + cg->io_queued.enabled = cgroup_enable_blkio_queued_ops; + } else { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_queued.filename = strdupz(filename); + cg->io_queued.enabled = cgroup_enable_blkio_queued_ops; + } + } + } + + // Pids + if (unlikely(!cg->pids.pids_current_filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/pids.current", cgroup_pids_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->pids.pids_current_filename = strdupz(filename); + } + } +} + +static inline void discovery_update_filenames_cgroup_v2(struct cgroup *cg) { + char filename[FILENAME_MAX + 1]; + struct stat buf; + + // CPU + if (unlikely((cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_cpu_throttling) && !cg->cpuacct_stat.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpuacct_stat.filename = strdupz(filename); + cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat; + cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling; + cg->filename_cpuset_cpus = NULL; + cg->filename_cpu_cfs_period = NULL; + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.max", cgroup_unified_base, cg->id); + cg->filename_cpu_cfs_quota = strdupz(filename); + } + } + if (unlikely(cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.weight", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpuacct_cpu_shares.filename = strdupz(filename); + cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; + } + } + + // Memory + // FIXME: this if condition! + if (unlikely( + (cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && + (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->memory.filename_detailed = strdupz(filename); + cg->memory.enabled_detailed = + (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_AUTO; + } + } + + if (unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.current", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->memory.filename_usage_in_bytes = strdupz(filename); + cg->memory.enabled_usage_in_bytes = cgroup_enable_memory; + snprintfz(filename, FILENAME_MAX, "%s%s/memory.max", cgroup_unified_base, cg->id); + cg->filename_memory_limit = strdupz(filename); + } + } + + if (unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.current", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->memory.filename_msw_usage_in_bytes = strdupz(filename); + cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap; + snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.max", cgroup_unified_base, cg->id); + cg->filename_memoryswap_limit = strdupz(filename); + } + } + + // Blkio + if (unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_service_bytes.filename = strdupz(filename); + cg->io_service_bytes.enabled = cgroup_enable_blkio_io; + } + } + + if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_serviced.filename = strdupz(filename); + cg->io_serviced.enabled = cgroup_enable_blkio_ops; + } + } + + // PSI + if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpu_pressure.filename = strdupz(filename); + cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu; + cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO; + } + } + + if (unlikely((cgroup_enable_pressure_io_some || cgroup_enable_pressure_io_full) && !cg->io_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/io.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_pressure.filename = strdupz(filename); + cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some; + cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full; + } + } + + if (unlikely( + (cgroup_enable_pressure_memory_some || cgroup_enable_pressure_memory_full) && + !cg->memory_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->memory_pressure.filename = strdupz(filename); + cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some; + cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full; + } + } + + if (unlikely((cgroup_enable_pressure_irq_some || cgroup_enable_pressure_irq_full) && !cg->irq_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/irq.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->irq_pressure.filename = strdupz(filename); + cg->irq_pressure.some.enabled = cgroup_enable_pressure_irq_some; + cg->irq_pressure.full.enabled = cgroup_enable_pressure_irq_full; + } + } + + // Pids + if (unlikely(!cg->pids.pids_current_filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/pids.current", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->pids.pids_current_filename = strdupz(filename); + } + } +} + +static inline void discovery_update_filenames_all_cgroups() { + for (struct cgroup *cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { + if (unlikely(!cg->available || !cg->enabled || cg->pending_renames)) + continue; + + if (!cgroup_use_unified_cgroups) + discovery_update_filenames_cgroup_v1(cg); + else if (likely(cgroup_unified_exist)) + discovery_update_filenames_cgroup_v2(cg); + } +} + +static inline void discovery_cleanup_all_cgroups() { + struct cgroup *cg = discovered_cgroup_root, *last = NULL; + + for(; cg ;) { + if(!cg->available) { + // enable the first duplicate cgroup + { + struct cgroup *t; + for (t = discovered_cgroup_root; t; t = t->discovered_next) { + if (t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && + (is_cgroup_systemd_service(t) == is_cgroup_systemd_service(cg)) && + t->hash_chart_id == cg->hash_chart_id && !strcmp(t->chart_id, cg->chart_id)) { + netdata_log_debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id); + t->enabled = 1; + t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE; + break; + } + } + } + + if(!last) + discovered_cgroup_root = cg->discovered_next; + else + last->discovered_next = cg->discovered_next; + + cgroup_free(cg); + + if(!last) + cg = discovered_cgroup_root; + else + cg = last->discovered_next; + } + else { + last = cg; + cg = cg->discovered_next; + } + } +} + +static inline void discovery_copy_discovered_cgroups_to_reader() { + netdata_log_debug(D_CGROUP, "copy discovered cgroups to the main group list"); + + struct cgroup *cg; + + for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { + cg->next = cg->discovered_next; + } + + cgroup_root = discovered_cgroup_root; +} + +static inline void discovery_share_cgroups_with_ebpf() { + struct cgroup *cg; + int count; + struct stat buf; + + if (shm_mutex_cgroup_ebpf == SEM_FAILED) { + return; + } + sem_wait(shm_mutex_cgroup_ebpf); + + for (cg = cgroup_root, count = 0; cg; cg = cg->next, count++) { + netdata_ebpf_cgroup_shm_body_t *ptr = &shm_cgroup_ebpf.body[count]; + char *prefix = (is_cgroup_systemd_service(cg)) ? services_chart_id_prefix : cgroup_chart_id_prefix; + snprintfz(ptr->name, CGROUP_EBPF_NAME_SHARED_LENGTH - 1, "%s%s", prefix, cg->chart_id); + ptr->hash = simple_hash(ptr->name); + ptr->options = cg->options; + ptr->enabled = cg->enabled; + if (cgroup_use_unified_cgroups) { + snprintfz(ptr->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_unified_base, cg->id); + if (likely(stat(ptr->path, &buf) == -1)) { + ptr->path[0] = '\0'; + ptr->enabled = 0; + } + } else { + is_cgroup_procs_exist(ptr, cg->id); + } + + netdata_log_debug(D_CGROUP, "cgroup shared: NAME=%s, ENABLED=%d", ptr->name, ptr->enabled); + } + + shm_cgroup_ebpf.header->cgroup_root_count = count; + sem_post(shm_mutex_cgroup_ebpf); +} + +static inline void discovery_find_all_cgroups_v1() { + if (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) { + if (discovery_find_dir_in_subdirs(cgroup_cpuacct_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_enable_cpuacct_stat = cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO; + collector_error("CGROUP: disabled cpu statistics."); + } + } + + if (cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io || + cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) { + if (discovery_find_dir_in_subdirs(cgroup_blkio_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_enable_blkio_io = cgroup_enable_blkio_ops = cgroup_enable_blkio_throttle_io = + cgroup_enable_blkio_throttle_ops = cgroup_enable_blkio_merged_ops = cgroup_enable_blkio_queued_ops = + CONFIG_BOOLEAN_NO; + collector_error("CGROUP: disabled blkio statistics."); + } + } + + if (cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) { + if (discovery_find_dir_in_subdirs(cgroup_memory_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_enable_memory = cgroup_enable_detailed_memory = cgroup_enable_swap = cgroup_enable_memory_failcnt = + CONFIG_BOOLEAN_NO; + collector_error("CGROUP: disabled memory statistics."); + } + } + + if (cgroup_search_in_devices) { + if (discovery_find_dir_in_subdirs(cgroup_devices_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_search_in_devices = 0; + collector_error("CGROUP: disabled devices statistics."); + } + } +} + +static inline void discovery_find_all_cgroups_v2() { + if (discovery_find_dir_in_subdirs(cgroup_unified_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_unified_exist = CONFIG_BOOLEAN_NO; + collector_error("CGROUP: disabled unified cgroups statistics."); + } +} + +static int is_digits_only(const char *s) { + do { + if (!isdigit(*s++)) { + return 0; + } + } while (*s); + + return 1; +} + +static int is_cgroup_k8s_container(const char *id) { + // examples: + // https://github.com/netdata/netdata/blob/0fc101679dcd12f1cb8acdd07bb4c85d8e553e53/collectors/cgroups.plugin/cgroup-name.sh#L121-L147 + const char *p = id; + const char *pp = NULL; + int i = 0; + size_t l = 3; // pod + while ((p = strstr(p, "pod"))) { + i++; + p += l; + pp = p; + } + return !(i < 2 || !pp || !(pp = strchr(pp, '/')) || !pp++ || !*pp); +} + +#define TASK_COMM_LEN 16 + +static int k8s_get_container_first_proc_comm(const char *id, char *comm) { + if (!is_cgroup_k8s_container(id)) { + return 1; + } + + static procfile *ff = NULL; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/%s/cgroup.procs", cgroup_cpuacct_base, id); + + ff = procfile_reopen(ff, filename, NULL, CGROUP_PROCFILE_FLAG); + if (unlikely(!ff)) { + netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); + return 1; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); + return 1; + } + + unsigned long lines = procfile_lines(ff); + if (likely(lines < 2)) { + return 1; + } + + char *pid = procfile_lineword(ff, 0, 0); + if (!pid || !*pid) { + return 1; + } + + snprintfz(filename, FILENAME_MAX, "%s/proc/%s/comm", netdata_configured_host_prefix, pid); + + ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) { + netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); + return 1; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); + return 1; + } + + lines = procfile_lines(ff); + if (unlikely(lines != 2)) { + return 1; + } + + char *proc_comm = procfile_lineword(ff, 0, 0); + if (!proc_comm || !*proc_comm) { + return 1; + } + + strncpyz(comm, proc_comm, TASK_COMM_LEN); + return 0; +} + +static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) { + if (!cg->first_time_seen) { + return; + } + cg->first_time_seen = 0; + + char comm[TASK_COMM_LEN + 1]; + + if (cg->container_orchestrator == CGROUPS_ORCHESTRATOR_UNSET) { + if (strstr(cg->id, "kubepods")) { + cg->container_orchestrator = CGROUPS_ORCHESTRATOR_K8S; + } else { + cg->container_orchestrator = CGROUPS_ORCHESTRATOR_UNKNOWN; + } + } + + if (is_inside_k8s && !k8s_get_container_first_proc_comm(cg->id, comm)) { + // container initialization may take some time when CPU % is high + // seen on GKE: comm is '6' before 'runc:[2:INIT]' (dunno if it could be another number) + if (is_digits_only(comm) || matches_entrypoint_parent_process_comm(comm)) { + cg->first_time_seen = 1; + return; + } + if (!strcmp(comm, "pause")) { + // a container that holds the network namespace for the pod + // we don't need to collect its metrics + cg->processed = 1; + return; + } + } + + if (cgroup_enable_systemd_services && matches_systemd_services_cgroups(cg->id)) { + netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'cgroups to match as systemd services'", cg->id, cg->chart_id); + convert_cgroup_to_systemd_service(cg); + return; + } + + if (matches_enabled_cgroup_renames(cg->id)) { + netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'run script to rename cgroups matching', will try to rename it", cg->id, cg->chart_id); + if (is_inside_k8s && is_cgroup_k8s_container(cg->id)) { + // it may take up to a minute for the K8s API to return data for the container + // tested on AWS K8s cluster with 100% CPU utilization + cg->pending_renames = 9; // 1.5 minute + } else { + cg->pending_renames = 2; + } + } +} + +static int discovery_is_cgroup_duplicate(struct cgroup *cg) { + // https://github.com/netdata/netdata/issues/797#issuecomment-241248884 + struct cgroup *c; + for (c = discovered_cgroup_root; c; c = c->discovered_next) { + if (c != cg && c->enabled && (is_cgroup_systemd_service(c) == is_cgroup_systemd_service(cg)) && + c->hash_chart_id == cg->hash_chart_id && !strcmp(c->chart_id, cg->chart_id)) { + collector_error( + "CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", + cg->chart_id, + c->id, + cg->id); + return 1; + } + } + return 0; +} + +// ---------------------------------------------------------------------------- +// cgroup network interfaces + +#define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048 + +static inline void read_cgroup_network_interfaces(struct cgroup *cg) { + netdata_log_debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id); + + pid_t cgroup_pid; + char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_cpuacct_base, cg->id); + } + else { + snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_unified_base, cg->id); + } + + netdata_log_debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id); + FILE *fp_child_input, *fp_child_output; + (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_network_interface_script, "--cgroup", cgroup_identifier); + if(!fp_child_output) { + collector_error("CGROUP: cannot popen(%s --cgroup \"%s\", \"r\").", cgroups_network_interface_script, cgroup_identifier); + return; + } + + char *s; + char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; + while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) { + trim(s); + + if(*s && *s != '\n') { + char *t = s; + while(*t && *t != ' ') t++; + if(*t == ' ') { + *t = '\0'; + t++; + } + + if(!*s) { + collector_error("CGROUP: empty host interface returned by script"); + continue; + } + + if(!*t) { + collector_error("CGROUP: empty guest interface returned by script"); + continue; + } + + struct cgroup_network_interface *i = callocz(1, sizeof(struct cgroup_network_interface)); + i->host_device = strdupz(s); + i->container_device = strdupz(t); + i->next = cg->interfaces; + cg->interfaces = i; + + collector_info("CGROUP: cgroup '%s' has network interface '%s' as '%s'", cg->id, i->host_device, i->container_device); + + // register a device rename to proc_net_dev.c + netdev_rename_device_add(i->host_device, i->container_device, cg->chart_id, cg->chart_labels, + k8s_is_kubepod(cg) ? "k8s." : "", cgroup_netdev_get(cg)); + } + } + + netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + // netdata_log_debug(D_CGROUP, "closed cgroup_identifier for cgroup '%s'", cg->id); +} + +static inline void discovery_process_cgroup(struct cgroup *cg) { + if (!cg->available || cg->processed) { + return; + } + + if (cg->first_time_seen) { + worker_is_busy(WORKER_DISCOVERY_PROCESS_FIRST_TIME); + discovery_process_first_time_seen_cgroup(cg); + if (unlikely(cg->first_time_seen || cg->processed)) { + return; + } + } + + if (cg->pending_renames) { + worker_is_busy(WORKER_DISCOVERY_PROCESS_RENAME); + discovery_rename_cgroup(cg); + if (unlikely(cg->pending_renames || cg->processed)) { + return; + } + } + + cg->processed = 1; + + if ((strlen(cg->chart_id) + strlen(cgroup_chart_id_prefix)) >= RRD_ID_LENGTH_MAX) { + collector_info("cgroup '%s' (chart id '%s') disabled because chart_id exceeds the limit (RRD_ID_LENGTH_MAX)", cg->id, cg->chart_id); + return; + } + + if (is_cgroup_systemd_service(cg)) { + if (discovery_is_cgroup_duplicate(cg)) { + cg->enabled = 0; + cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; + return; + } + if (!cg->chart_labels) + cg->chart_labels = rrdlabels_create(); + rrdlabels_add(cg->chart_labels, "service_name", cg->name, RRDLABEL_SRC_AUTO); + cg->enabled = 1; + return; + } + + if (!(cg->enabled = matches_enabled_cgroup_names(cg->name))) { + netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups names matching'", cg->id, cg->name); + return; + } + + if (!(cg->enabled = matches_enabled_cgroup_paths(cg->id))) { + netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups matching'", cg->id, cg->name); + return; + } + + if (discovery_is_cgroup_duplicate(cg)) { + cg->enabled = 0; + cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; + return; + } + + if (!cg->chart_labels) + cg->chart_labels = rrdlabels_create(); + + if (!k8s_is_kubepod(cg)) { + rrdlabels_add(cg->chart_labels, "cgroup_name", cg->name, RRDLABEL_SRC_AUTO); + if (!rrdlabels_exist(cg->chart_labels, "image")) + rrdlabels_add(cg->chart_labels, "image", "", RRDLABEL_SRC_AUTO); + } + + worker_is_busy(WORKER_DISCOVERY_PROCESS_NETWORK); + read_cgroup_network_interfaces(cg); +} + +static inline void discovery_find_all_cgroups() { + netdata_log_debug(D_CGROUP, "searching for cgroups"); + + worker_is_busy(WORKER_DISCOVERY_INIT); + discovery_mark_as_unavailable_all_cgroups(); + + worker_is_busy(WORKER_DISCOVERY_FIND); + if (!cgroup_use_unified_cgroups) { + discovery_find_all_cgroups_v1(); + } else { + discovery_find_all_cgroups_v2(); + } + + for (struct cgroup *cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { + worker_is_busy(WORKER_DISCOVERY_PROCESS); + discovery_process_cgroup(cg); + } + + worker_is_busy(WORKER_DISCOVERY_UPDATE); + discovery_update_filenames_all_cgroups(); + + worker_is_busy(WORKER_DISCOVERY_LOCK); + uv_mutex_lock(&cgroup_root_mutex); + + worker_is_busy(WORKER_DISCOVERY_CLEANUP); + discovery_cleanup_all_cgroups(); + + worker_is_busy(WORKER_DISCOVERY_COPY); + discovery_copy_discovered_cgroups_to_reader(); + + uv_mutex_unlock(&cgroup_root_mutex); + + worker_is_busy(WORKER_DISCOVERY_SHARE); + discovery_share_cgroups_with_ebpf(); + + netdata_log_debug(D_CGROUP, "done searching for cgroups"); +} + +void cgroup_discovery_worker(void *ptr) +{ + UNUSED(ptr); + + worker_register("CGROUPSDISC"); + worker_register_job_name(WORKER_DISCOVERY_INIT, "init"); + worker_register_job_name(WORKER_DISCOVERY_FIND, "find"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS, "process"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS_RENAME, "rename"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS_NETWORK, "network"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS_FIRST_TIME, "new"); + worker_register_job_name(WORKER_DISCOVERY_UPDATE, "update"); + worker_register_job_name(WORKER_DISCOVERY_CLEANUP, "cleanup"); + worker_register_job_name(WORKER_DISCOVERY_COPY, "copy"); + worker_register_job_name(WORKER_DISCOVERY_SHARE, "share"); + worker_register_job_name(WORKER_DISCOVERY_LOCK, "lock"); + + entrypoint_parent_process_comm = simple_pattern_create( + " runc:[* " // http://terenceli.github.io/%E6%8A%80%E6%9C%AF/2021/12/28/runc-internals-3) + " exe ", // https://github.com/falcosecurity/falco/blob/9d41b0a151b83693929d3a9c84f7c5c85d070d3a/rules/falco_rules.yaml#L1961 + NULL, + SIMPLE_PATTERN_EXACT, true); + + service_register(SERVICE_THREAD_TYPE_LIBUV, NULL, NULL, NULL, false); + + while (service_running(SERVICE_COLLECTORS)) { + worker_is_idle(); + + uv_mutex_lock(&discovery_thread.mutex); + uv_cond_wait(&discovery_thread.cond_var, &discovery_thread.mutex); + uv_mutex_unlock(&discovery_thread.mutex); + + if (unlikely(!service_running(SERVICE_COLLECTORS))) + break; + + discovery_find_all_cgroups(); + } + collector_info("discovery thread stopped"); + worker_unregister(); + service_exits(); + __atomic_store_n(&discovery_thread.exited,1,__ATOMIC_RELAXED); +} diff --git a/collectors/cgroups.plugin/cgroup-internals.h b/collectors/cgroups.plugin/cgroup-internals.h new file mode 100644 index 000000000..a69802240 --- /dev/null +++ b/collectors/cgroups.plugin/cgroup-internals.h @@ -0,0 +1,514 @@ +#include "sys_fs_cgroup.h" + +#ifndef NETDATA_CGROUP_INTERNALS_H +#define NETDATA_CGROUP_INTERNALS_H 1 + +#ifdef NETDATA_INTERNAL_CHECKS +#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_DEFAULT +#else +#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_NO_ERROR_ON_FILE_IO +#endif + +struct blkio { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int delay_counter; + + char *filename; + + unsigned long long Read; + unsigned long long Write; +/* + unsigned long long Sync; + unsigned long long Async; + unsigned long long Total; +*/ +}; + +struct pids { + char *pids_current_filename; + int pids_current_updated; + unsigned long long pids_current; +}; + +// https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt +struct memory { + ARL_BASE *arl_base; + ARL_ENTRY *arl_dirty; + ARL_ENTRY *arl_swap; + + int updated_detailed; + int updated_usage_in_bytes; + int updated_msw_usage_in_bytes; + int updated_failcnt; + + int enabled_detailed; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int enabled_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int enabled_msw_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int enabled_failcnt; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + int delay_counter_detailed; + int delay_counter_failcnt; + + char *filename_detailed; + char *filename_usage_in_bytes; + char *filename_msw_usage_in_bytes; + char *filename_failcnt; + + int detailed_has_dirty; + int detailed_has_swap; + + // detailed metrics +/* + unsigned long long cache; + unsigned long long rss; + unsigned long long rss_huge; + unsigned long long mapped_file; + unsigned long long writeback; + unsigned long long dirty; + unsigned long long swap; + unsigned long long pgpgin; + unsigned long long pgpgout; + unsigned long long pgfault; + unsigned long long pgmajfault; + unsigned long long inactive_anon; + unsigned long long active_anon; + unsigned long long inactive_file; + unsigned long long active_file; + unsigned long long unevictable; + unsigned long long hierarchical_memory_limit; +*/ + //unified cgroups metrics + unsigned long long anon; + unsigned long long kernel_stack; + unsigned long long slab; + unsigned long long sock; + // unsigned long long shmem; + unsigned long long anon_thp; + //unsigned long long file_writeback; + //unsigned long long file_dirty; + //unsigned long long file; + + unsigned long long total_cache; + unsigned long long total_rss; + unsigned long long total_rss_huge; + unsigned long long total_mapped_file; + unsigned long long total_writeback; + unsigned long long total_dirty; + unsigned long long total_swap; + unsigned long long total_pgpgin; + unsigned long long total_pgpgout; + unsigned long long total_pgfault; + unsigned long long total_pgmajfault; +/* + unsigned long long total_inactive_anon; + unsigned long long total_active_anon; +*/ + + unsigned long long total_inactive_file; + +/* + unsigned long long total_active_file; + unsigned long long total_unevictable; +*/ + + // single file metrics + unsigned long long usage_in_bytes; + unsigned long long msw_usage_in_bytes; + unsigned long long failcnt; +}; + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt +struct cpuacct_stat { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned long long user; // v1, v2(user_usec) + unsigned long long system; // v1, v2(system_usec) +}; + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt +struct cpuacct_usage { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned int cpus; + unsigned long long *cpu_percpu; +}; + +// represents cpuacct/cpu.stat, for v2 'cpuacct_stat' is used for 'user_usec', 'system_usec' +struct cpuacct_cpu_throttling { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned long long nr_periods; + unsigned long long nr_throttled; + unsigned long long throttled_time; + + unsigned long long nr_throttled_perc; +}; + +// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu#sect-cfs +// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/managing_monitoring_and_updating_the_kernel/using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications_managing-monitoring-and-updating-the-kernel#proc_controlling-distribution-of-cpu-time-for-applications-by-adjusting-cpu-weight_using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications +struct cpuacct_cpu_shares { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned long long shares; +}; + +struct cgroup_network_interface { + const char *host_device; + const char *container_device; + struct cgroup_network_interface *next; +}; + +enum cgroups_container_orchestrator { + CGROUPS_ORCHESTRATOR_UNSET, + CGROUPS_ORCHESTRATOR_UNKNOWN, + CGROUPS_ORCHESTRATOR_K8S +}; + + +// *** WARNING *** The fields are not thread safe. Take care of safe usage. +struct cgroup { + uint32_t options; + + int first_time_seen; // first time seen by the discoverer + int processed; // the discoverer is done processing a cgroup (resolved name, set 'enabled' option) + + char available; // found in the filesystem + char enabled; // enabled in the config + + bool function_ready; // true after the first iteration of chart creation/update + + char pending_renames; + + char *id; + uint32_t hash; + + char *intermediate_id; // TODO: remove it when the renaming script is fixed + + char *chart_id; + uint32_t hash_chart_id; + + // 'cgroup_name' label value. + // by default this is the *id (path), later changed to the resolved name (cgroup-name.sh) or systemd service name. + char *name; + + RRDLABELS *chart_labels; + + int container_orchestrator; + + struct cpuacct_stat cpuacct_stat; + struct cpuacct_usage cpuacct_usage; + struct cpuacct_cpu_throttling cpuacct_cpu_throttling; + struct cpuacct_cpu_shares cpuacct_cpu_shares; + + struct memory memory; + + struct blkio io_service_bytes; // bytes + struct blkio io_serviced; // operations + + struct blkio throttle_io_service_bytes; // bytes + struct blkio throttle_io_serviced; // operations + + struct blkio io_merged; // operations + struct blkio io_queued; // operations + + struct pids pids; + + struct cgroup_network_interface *interfaces; + + struct pressure cpu_pressure; + struct pressure io_pressure; + struct pressure memory_pressure; + struct pressure irq_pressure; + + // Cpu + RRDSET *st_cpu; + RRDDIM *st_cpu_rd_user; + RRDDIM *st_cpu_rd_system; + + RRDSET *st_cpu_limit; + RRDSET *st_cpu_per_core; + RRDSET *st_cpu_nr_throttled; + RRDSET *st_cpu_throttled_time; + RRDSET *st_cpu_shares; + + // Memory + RRDSET *st_mem; + RRDDIM *st_mem_rd_ram; + RRDDIM *st_mem_rd_swap; + + RRDSET *st_mem_utilization; + RRDSET *st_writeback; + RRDSET *st_mem_activity; + RRDSET *st_pgfaults; + RRDSET *st_mem_usage; + RRDSET *st_mem_usage_limit; + RRDSET *st_mem_failcnt; + + // Blkio + RRDSET *st_io; + RRDDIM *st_io_rd_read; + RRDDIM *st_io_rd_written; + + RRDSET *st_serviced_ops; + + RRDSET *st_throttle_io; + RRDDIM *st_throttle_io_rd_read; + RRDDIM *st_throttle_io_rd_written; + + RRDSET *st_throttle_serviced_ops; + + RRDSET *st_queued_ops; + RRDSET *st_merged_ops; + + // Pids + RRDSET *st_pids; + RRDDIM *st_pids_rd_pids_current; + + // per cgroup chart variables + char *filename_cpuset_cpus; + unsigned long long cpuset_cpus; + + char *filename_cpu_cfs_period; + unsigned long long cpu_cfs_period; + + char *filename_cpu_cfs_quota; + unsigned long long cpu_cfs_quota; + + const RRDSETVAR_ACQUIRED *chart_var_cpu_limit; + NETDATA_DOUBLE prev_cpu_usage; + + char *filename_memory_limit; + unsigned long long memory_limit; + const RRDSETVAR_ACQUIRED *chart_var_memory_limit; + + char *filename_memoryswap_limit; + unsigned long long memoryswap_limit; + const RRDSETVAR_ACQUIRED *chart_var_memoryswap_limit; + + const DICTIONARY_ITEM *cgroup_netdev_link; + + struct cgroup *next; + struct cgroup *discovered_next; + +}; + +struct discovery_thread { + uv_thread_t thread; + uv_mutex_t mutex; + uv_cond_t cond_var; + int exited; +}; + +extern struct discovery_thread discovery_thread; + +extern char *cgroups_rename_script; +extern char cgroup_chart_id_prefix[]; +extern char services_chart_id_prefix[]; +extern uv_mutex_t cgroup_root_mutex; + +void cgroup_discovery_worker(void *ptr); + +extern int is_inside_k8s; +extern long system_page_size; +extern int cgroup_enable_cpuacct_stat; +extern int cgroup_enable_cpuacct_usage; +extern int cgroup_enable_cpuacct_cpu_throttling; +extern int cgroup_enable_cpuacct_cpu_shares; +extern int cgroup_enable_memory; +extern int cgroup_enable_detailed_memory; +extern int cgroup_enable_memory_failcnt; +extern int cgroup_enable_swap; +extern int cgroup_enable_blkio_io; +extern int cgroup_enable_blkio_ops; +extern int cgroup_enable_blkio_throttle_io; +extern int cgroup_enable_blkio_throttle_ops; +extern int cgroup_enable_blkio_merged_ops; +extern int cgroup_enable_blkio_queued_ops; +extern int cgroup_enable_pressure_cpu; +extern int cgroup_enable_pressure_io_some; +extern int cgroup_enable_pressure_io_full; +extern int cgroup_enable_pressure_memory_some; +extern int cgroup_enable_pressure_memory_full; +extern int cgroup_enable_pressure_irq_some; +extern int cgroup_enable_pressure_irq_full; +extern int cgroup_enable_systemd_services; +extern int cgroup_enable_systemd_services_detailed_memory; +extern int cgroup_used_memory; +extern int cgroup_use_unified_cgroups; +extern int cgroup_unified_exist; +extern int cgroup_search_in_devices; +extern int cgroup_check_for_new_every; +extern int cgroup_update_every; +extern int cgroup_containers_chart_priority; +extern int cgroup_recheck_zero_blkio_every_iterations; +extern int cgroup_recheck_zero_mem_failcnt_every_iterations; +extern int cgroup_recheck_zero_mem_detailed_every_iterations; +extern char *cgroup_cpuacct_base; +extern char *cgroup_cpuset_base; +extern char *cgroup_blkio_base; +extern char *cgroup_memory_base; +extern char *cgroup_pids_base; +extern char *cgroup_devices_base; +extern char *cgroup_unified_base; +extern int cgroup_root_count; +extern int cgroup_root_max; +extern int cgroup_max_depth; +extern SIMPLE_PATTERN *enabled_cgroup_paths; +extern SIMPLE_PATTERN *enabled_cgroup_names; +extern SIMPLE_PATTERN *search_cgroup_paths; +extern SIMPLE_PATTERN *enabled_cgroup_renames; +extern SIMPLE_PATTERN *systemd_services_cgroups; +extern SIMPLE_PATTERN *entrypoint_parent_process_comm; +extern char *cgroups_network_interface_script; +extern int cgroups_check; +extern uint32_t Read_hash; +extern uint32_t Write_hash; +extern uint32_t user_hash; +extern uint32_t system_hash; +extern uint32_t user_usec_hash; +extern uint32_t system_usec_hash; +extern uint32_t nr_periods_hash; +extern uint32_t nr_throttled_hash; +extern uint32_t throttled_time_hash; +extern uint32_t throttled_usec_hash; +extern struct cgroup *cgroup_root; + +extern netdata_ebpf_cgroup_shm_t shm_cgroup_ebpf; +extern int shm_fd_cgroup_ebpf; +extern sem_t *shm_mutex_cgroup_ebpf; + +enum cgroups_type { CGROUPS_AUTODETECT_FAIL, CGROUPS_V1, CGROUPS_V2 }; + +enum cgroups_systemd_setting { + SYSTEMD_CGROUP_ERR, + SYSTEMD_CGROUP_LEGACY, + SYSTEMD_CGROUP_HYBRID, + SYSTEMD_CGROUP_UNIFIED +}; + +struct cgroups_systemd_config_setting { + char *name; + enum cgroups_systemd_setting setting; +}; + +extern struct cgroups_systemd_config_setting cgroups_systemd_options[]; + +static inline int matches_enabled_cgroup_paths(char *id) { + return simple_pattern_matches(enabled_cgroup_paths, id); +} + +static inline int matches_enabled_cgroup_names(char *name) { + return simple_pattern_matches(enabled_cgroup_names, name); +} + +static inline int matches_enabled_cgroup_renames(char *id) { + return simple_pattern_matches(enabled_cgroup_renames, id); +} + +static inline int matches_systemd_services_cgroups(char *id) { + return simple_pattern_matches(systemd_services_cgroups, id); +} + +static inline int matches_search_cgroup_paths(const char *dir) { + return simple_pattern_matches(search_cgroup_paths, dir); +} + +static inline int matches_entrypoint_parent_process_comm(const char *comm) { + return simple_pattern_matches(entrypoint_parent_process_comm, comm); +} + +static inline int is_cgroup_systemd_service(struct cgroup *cg) { + return (int)(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE); +} + +static inline int k8s_is_kubepod(struct cgroup *cg) { + return cg->container_orchestrator == CGROUPS_ORCHESTRATOR_K8S; +} + +static inline char *cgroup_chart_type(char *buffer, struct cgroup *cg) { + buffer[0] = '\0'; + + if (cg->chart_id[0] == '\0' || (cg->chart_id[0] == '/' && cg->chart_id[1] == '\0')) + strncpy(buffer, "cgroup_root", RRD_ID_LENGTH_MAX); + else if (is_cgroup_systemd_service(cg)) + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s%s", services_chart_id_prefix, cg->chart_id); + else + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s%s", cgroup_chart_id_prefix, cg->chart_id); + + return buffer; +} + +#define RRDFUNCTIONS_CGTOP_HELP "View running containers" + +int cgroup_function_cgroup_top(BUFFER *wb, int timeout, const char *function, void *collector_data, + rrd_function_result_callback_t result_cb, void *result_cb_data, + rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data, + rrd_function_register_canceller_cb_t register_canceller_cb, void *register_canceller_cb_data); +int cgroup_function_systemd_top(BUFFER *wb, int timeout, const char *function, void *collector_data, + rrd_function_result_callback_t result_cb, void *result_cb_data, + rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data, + rrd_function_register_canceller_cb_t register_canceller_cb, void *register_canceller_cb_data); + +void cgroup_netdev_link_init(void); +const DICTIONARY_ITEM *cgroup_netdev_get(struct cgroup *cg); +void cgroup_netdev_delete(struct cgroup *cg); + +void update_cpu_utilization_chart(struct cgroup *cg); +void update_cpu_utilization_limit_chart(struct cgroup *cg, NETDATA_DOUBLE cpu_limit); +void update_cpu_throttled_chart(struct cgroup *cg); +void update_cpu_throttled_duration_chart(struct cgroup *cg); +void update_cpu_shares_chart(struct cgroup *cg); +void update_cpu_per_core_usage_chart(struct cgroup *cg); + +void update_mem_usage_limit_chart(struct cgroup *cg, unsigned long long memory_limit); +void update_mem_utilization_chart(struct cgroup *cg, unsigned long long memory_limit); +void update_mem_usage_detailed_chart(struct cgroup *cg); +void update_mem_writeback_chart(struct cgroup *cg); +void update_mem_activity_chart(struct cgroup *cg); +void update_mem_pgfaults_chart(struct cgroup *cg); +void update_mem_failcnt_chart(struct cgroup *cg); +void update_mem_usage_chart(struct cgroup *cg); + +void update_io_serviced_bytes_chart(struct cgroup *cg); +void update_io_serviced_ops_chart(struct cgroup *cg); +void update_throttle_io_serviced_bytes_chart(struct cgroup *cg); +void update_throttle_io_serviced_ops_chart(struct cgroup *cg); +void update_io_queued_ops_chart(struct cgroup *cg); +void update_io_merged_ops_chart(struct cgroup *cg); + +void update_pids_current_chart(struct cgroup *cg); + +void update_cpu_some_pressure_chart(struct cgroup *cg); +void update_cpu_some_pressure_stall_time_chart(struct cgroup *cg); +void update_cpu_full_pressure_chart(struct cgroup *cg); +void update_cpu_full_pressure_stall_time_chart(struct cgroup *cg); + +void update_mem_some_pressure_chart(struct cgroup *cg); +void update_mem_some_pressure_stall_time_chart(struct cgroup *cg); +void update_mem_full_pressure_chart(struct cgroup *cg); +void update_mem_full_pressure_stall_time_chart(struct cgroup *cg); + +void update_irq_some_pressure_chart(struct cgroup *cg); +void update_irq_some_pressure_stall_time_chart(struct cgroup *cg); +void update_irq_full_pressure_chart(struct cgroup *cg); +void update_irq_full_pressure_stall_time_chart(struct cgroup *cg); + +void update_io_some_pressure_chart(struct cgroup *cg); +void update_io_some_pressure_stall_time_chart(struct cgroup *cg); +void update_io_full_pressure_chart(struct cgroup *cg); +void update_io_full_pressure_stall_time_chart(struct cgroup *cg); + +#endif // NETDATA_CGROUP_INTERNALS_H
\ No newline at end of file diff --git a/collectors/cgroups.plugin/cgroup-name.sh b/collectors/cgroups.plugin/cgroup-name.sh.in index c0f3d0cb6..0f8b63256 100755 --- a/collectors/cgroups.plugin/cgroup-name.sh +++ b/collectors/cgroups.plugin/cgroup-name.sh.in @@ -3,66 +3,115 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2023 Netdata Inc. # SPDX-License-Identifier: GPL-3.0-or-later # # Script to find a better name for cgroups # -export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin:@sbindir_POST@" export LC_ALL=C +cmd_line="'${0}' $(printf "'%s' " "${@}")" + # ----------------------------------------------------------------------------- +# logging PROGRAM_NAME="$(basename "${0}")" -LOG_LEVEL_ERR=1 -LOG_LEVEL_WARN=2 -LOG_LEVEL_INFO=3 -LOG_LEVEL="$LOG_LEVEL_INFO" - -set_log_severity_level() { - case ${NETDATA_LOG_SEVERITY_LEVEL,,} in - "info") LOG_LEVEL="$LOG_LEVEL_INFO";; - "warn" | "warning") LOG_LEVEL="$LOG_LEVEL_WARN";; - "err" | "error") LOG_LEVEL="$LOG_LEVEL_ERR";; +# these should be the same with syslog() priorities +NDLP_EMERG=0 # system is unusable +NDLP_ALERT=1 # action must be taken immediately +NDLP_CRIT=2 # critical conditions +NDLP_ERR=3 # error conditions +NDLP_WARN=4 # warning conditions +NDLP_NOTICE=5 # normal but significant condition +NDLP_INFO=6 # informational +NDLP_DEBUG=7 # debug-level messages + +# the max (numerically) log level we will log +LOG_LEVEL=$NDLP_INFO + +set_log_min_priority() { + case "${NETDATA_LOG_LEVEL,,}" in + "emerg" | "emergency") + LOG_LEVEL=$NDLP_EMERG + ;; + + "alert") + LOG_LEVEL=$NDLP_ALERT + ;; + + "crit" | "critical") + LOG_LEVEL=$NDLP_CRIT + ;; + + "err" | "error") + LOG_LEVEL=$NDLP_ERR + ;; + + "warn" | "warning") + LOG_LEVEL=$NDLP_WARN + ;; + + "notice") + LOG_LEVEL=$NDLP_NOTICE + ;; + + "info") + LOG_LEVEL=$NDLP_INFO + ;; + + "debug") + LOG_LEVEL=$NDLP_DEBUG + ;; esac } -set_log_severity_level - -logdate() { - date "+%Y-%m-%d %H:%M:%S" -} +set_log_min_priority log() { - local status="${1}" - shift - - echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" - + local level="${1}" + shift 1 + + [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return + + systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <<EOFLOG +INVOCATION_ID=${NETDATA_INVOCATION_ID} +SYSLOG_IDENTIFIER=${PROGRAM_NAME} +PRIORITY=${level} +THREAD_TAG=cgroup-name +ND_LOG_SOURCE=collector +ND_REQUEST=${cmd_line} +MESSAGE=${*//\\n/--NEWLINE--} + +EOFLOG + # AN EMPTY LINE IS NEEDED ABOVE } info() { - [[ -n "$LOG_LEVEL" && "$LOG_LEVEL_INFO" -gt "$LOG_LEVEL" ]] && return - log INFO "${@}" + log "$NDLP_INFO" "${@}" } warning() { - [[ -n "$LOG_LEVEL" && "$LOG_LEVEL_WARN" -gt "$LOG_LEVEL" ]] && return - log WARNING "${@}" + log "$NDLP_WARN" "${@}" } error() { - [[ -n "$LOG_LEVEL" && "$LOG_LEVEL_ERR" -gt "$LOG_LEVEL" ]] && return - log ERROR "${@}" + log "$NDLP_ERR" "${@}" } fatal() { - log FATAL "${@}" + log "$NDLP_ALERT" "${@}" exit 1 } +debug() { + log "$NDLP_DEBUG" "${@}" +} + +# ----------------------------------------------------------------------------- + function parse_docker_like_inspect_output() { local output="${1}" eval "$(grep -E "^(NOMAD_NAMESPACE|NOMAD_JOB_NAME|NOMAD_TASK_NAME|NOMAD_SHORT_ALLOC_ID|CONT_NAME|IMAGE_NAME)=" <<<"$output")" @@ -620,21 +669,19 @@ if [ -z "${NAME}" ]; then # libvirtd / qemu virtual machines NAME="qemu_$(echo "${CGROUP}" | sed 's/^machine_//; s/\.libvirt-qemu$//; s/-/_/;')" - elif [[ ${CGROUP} =~ qemu.slice_([0-9]+).scope && -d /etc/pve ]]; then + elif [[ ${CGROUP} =~ qemu.slice_([0-9]+).scope && -d "${NETDATA_HOST_PREFIX}/etc/pve" ]]; then # Proxmox VMs - - FILENAME="/etc/pve/qemu-server/${BASH_REMATCH[1]}.conf" + FILENAME="${NETDATA_HOST_PREFIX}/etc/pve/qemu-server/${BASH_REMATCH[1]}.conf" if [[ -f $FILENAME && -r $FILENAME ]]; then - NAME="qemu_$(grep -e '^name: ' "/etc/pve/qemu-server/${BASH_REMATCH[1]}.conf" | head -1 | sed -rn 's|\s*name\s*:\s*(.*)?$|\1|p')" + NAME="qemu_$(grep -e '^name: ' "${FILENAME}" | head -1 | sed -rn 's|\s*name\s*:\s*(.*)?$|\1|p')" else error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group." fi - elif [[ ${CGROUP} =~ lxc_([0-9]+) && -d /etc/pve ]]; then + elif [[ ${CGROUP} =~ lxc_([0-9]+) && -d "${NETDATA_HOST_PREFIX}/etc/pve" ]]; then # Proxmox Containers (LXC) - - FILENAME="/etc/pve/lxc/${BASH_REMATCH[1]}.conf" + FILENAME="${NETDATA_HOST_PREFIX}/etc/pve/lxc/${BASH_REMATCH[1]}.conf" if [[ -f ${FILENAME} && -r ${FILENAME} ]]; then - NAME=$(grep -e '^hostname: ' "/etc/pve/lxc/${BASH_REMATCH[1]}.conf" | head -1 | sed -rn 's|\s*hostname\s*:\s*(.*)?$|\1|p') + NAME=$(grep -e '^hostname: ' "${FILENAME}" | head -1 | sed -rn 's|\s*hostname\s*:\s*(.*)?$|\1|p') else error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group." fi diff --git a/collectors/cgroups.plugin/cgroup-network-helper.sh b/collectors/cgroups.plugin/cgroup-network-helper.sh.in index 008bc987f..da9b9162a 100755 --- a/collectors/cgroups.plugin/cgroup-network-helper.sh +++ b/collectors/cgroups.plugin/cgroup-network-helper.sh.in @@ -4,7 +4,7 @@ # cgroup-network-helper.sh # detect container and virtual machine interfaces # -# (C) 2017 Costa Tsaousis +# (C) 2023 Netdata Inc. # SPDX-License-Identifier: GPL-3.0-or-later # # This script is called as root (by cgroup-network), with either a pid, or a cgroup path. @@ -26,68 +26,121 @@ # the system path is cleared by cgroup-network # shellcheck source=/dev/null [ -f /etc/profile ] && source /etc/profile +export PATH="${PATH}:@sbindir_POST@" export LC_ALL=C -PROGRAM_NAME="$(basename "${0}")" +cmd_line="'${0}' $(printf "'%s' " "${@}")" + +# ----------------------------------------------------------------------------- +# logging -LOG_LEVEL_ERR=1 -LOG_LEVEL_WARN=2 -LOG_LEVEL_INFO=3 -LOG_LEVEL="$LOG_LEVEL_INFO" +PROGRAM_NAME="$(basename "${0}")" -set_log_severity_level() { - case ${NETDATA_LOG_SEVERITY_LEVEL,,} in - "info") LOG_LEVEL="$LOG_LEVEL_INFO";; - "warn" | "warning") LOG_LEVEL="$LOG_LEVEL_WARN";; - "err" | "error") LOG_LEVEL="$LOG_LEVEL_ERR";; +# these should be the same with syslog() priorities +NDLP_EMERG=0 # system is unusable +NDLP_ALERT=1 # action must be taken immediately +NDLP_CRIT=2 # critical conditions +NDLP_ERR=3 # error conditions +NDLP_WARN=4 # warning conditions +NDLP_NOTICE=5 # normal but significant condition +NDLP_INFO=6 # informational +NDLP_DEBUG=7 # debug-level messages + +# the max (numerically) log level we will log +LOG_LEVEL=$NDLP_INFO + +set_log_min_priority() { + case "${NETDATA_LOG_LEVEL,,}" in + "emerg" | "emergency") + LOG_LEVEL=$NDLP_EMERG + ;; + + "alert") + LOG_LEVEL=$NDLP_ALERT + ;; + + "crit" | "critical") + LOG_LEVEL=$NDLP_CRIT + ;; + + "err" | "error") + LOG_LEVEL=$NDLP_ERR + ;; + + "warn" | "warning") + LOG_LEVEL=$NDLP_WARN + ;; + + "notice") + LOG_LEVEL=$NDLP_NOTICE + ;; + + "info") + LOG_LEVEL=$NDLP_INFO + ;; + + "debug") + LOG_LEVEL=$NDLP_DEBUG + ;; esac } -set_log_severity_level - -logdate() { - date "+%Y-%m-%d %H:%M:%S" -} +set_log_min_priority log() { - local status="${1}" - shift - - echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" - + local level="${1}" + shift 1 + + [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return + + systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <<EOFLOG +INVOCATION_ID=${NETDATA_INVOCATION_ID} +SYSLOG_IDENTIFIER=${PROGRAM_NAME} +PRIORITY=${level} +THREAD_TAG=cgroup-network-helper +ND_LOG_SOURCE=collector +ND_REQUEST=${cmd_line} +MESSAGE=${*//\\n/--NEWLINE--} + +EOFLOG + # AN EMPTY LINE IS NEEDED ABOVE } info() { - [[ -n "$LOG_LEVEL" && "$LOG_LEVEL_INFO" -gt "$LOG_LEVEL" ]] && return - log INFO "${@}" + log "$NDLP_INFO" "${@}" } warning() { - [[ -n "$LOG_LEVEL" && "$LOG_LEVEL_WARN" -gt "$LOG_LEVEL" ]] && return - log WARNING "${@}" + log "$NDLP_WARN" "${@}" } error() { - [[ -n "$LOG_LEVEL" && "$LOG_LEVEL_ERR" -gt "$LOG_LEVEL" ]] && return - log ERROR "${@}" + log "$NDLP_ERR" "${@}" } fatal() { - log FATAL "${@}" - exit 1 + log "$NDLP_ALERT" "${@}" + exit 1 } -debug=${NETDATA_CGROUP_NETWORK_HELPER_DEBUG=0} debug() { - [ "${debug}" = "1" ] && log DEBUG "${@}" + log "$NDLP_DEBUG" "${@}" } +debug=0 +if [ "${NETDATA_CGROUP_NETWORK_HELPER_DEBUG-0}" = "1" ]; then + debug=1 + LOG_LEVEL=$NDLP_DEBUG +fi + # ----------------------------------------------------------------------------- # check for BASH v4+ (required for associative arrays) -[ $(( BASH_VERSINFO[0] )) -lt 4 ] && \ - fatal "BASH version 4 or later is required (this is ${BASH_VERSION})." +if [ ${BASH_VERSINFO[0]} -lt 4 ]; then + echo >&2 "BASH version 4 or later is required (this is ${BASH_VERSION})." + exit 1 +fi # ----------------------------------------------------------------------------- # parse the arguments @@ -99,7 +152,10 @@ do case "${1}" in --cgroup) cgroup="${2}"; shift 1;; --pid|-p) pid="${2}"; shift 1;; - --debug|debug) debug=1;; + --debug|debug) + debug=1 + LOG_LEVEL=$NDLP_DEBUG + ;; *) fatal "Cannot understand argument '${1}'";; esac diff --git a/collectors/cgroups.plugin/cgroup-network.c b/collectors/cgroups.plugin/cgroup-network.c index b00f246bb..508ea07c6 100644 --- a/collectors/cgroups.plugin/cgroup-network.c +++ b/collectors/cgroups.plugin/cgroup-network.c @@ -10,12 +10,16 @@ #include <sched.h> #endif -char environment_variable2[FILENAME_MAX + 50] = ""; -char environment_variable3[FILENAME_MAX + 50] = ""; +char env_netdata_host_prefix[FILENAME_MAX + 50] = ""; +char env_netdata_log_method[FILENAME_MAX + 50] = ""; +char env_netdata_log_format[FILENAME_MAX + 50] = ""; +char env_netdata_log_level[FILENAME_MAX + 50] = ""; char *environment[] = { "PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin", - environment_variable2, - environment_variable3, + env_netdata_host_prefix, + env_netdata_log_method, + env_netdata_log_format, + env_netdata_log_level, NULL }; @@ -288,7 +292,8 @@ int switch_namespace(const char *prefix, pid_t pid) { pid_t read_pid_from_cgroup_file(const char *filename) { int fd = open(filename, procfile_open_flags); if(fd == -1) { - collector_error("Cannot open pid_from_cgroup() file '%s'.", filename); + if (errno != ENOENT) + collector_error("Cannot open pid_from_cgroup() file '%s'.", filename); return 0; } @@ -648,12 +653,11 @@ void usage(void) { } int main(int argc, char **argv) { - stderror = stderr; pid_t pid = 0; - program_name = argv[0]; program_version = VERSION; - error_log_syslog = 0; + clocks_init(); + nd_log_initialize_for_external_plugins("cgroup-network"); // since cgroup-network runs as root, prevent it from opening symbolic links procfile_open_flags = O_RDONLY|O_NOFOLLOW; @@ -662,7 +666,7 @@ int main(int argc, char **argv) { // make sure NETDATA_HOST_PREFIX is safe netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); - if(verify_netdata_host_prefix() == -1) exit(1); + if(verify_netdata_host_prefix(false) == -1) exit(1); if(netdata_configured_host_prefix[0] != '\0' && verify_path(netdata_configured_host_prefix) == -1) fatal("invalid NETDATA_HOST_PREFIX '%s'", netdata_configured_host_prefix); @@ -671,11 +675,20 @@ int main(int argc, char **argv) { // build a safe environment for our script // the first environment variable is a fixed PATH= - snprintfz(environment_variable2, sizeof(environment_variable2) - 1, "NETDATA_HOST_PREFIX=%s", netdata_configured_host_prefix); + snprintfz(env_netdata_host_prefix, sizeof(env_netdata_host_prefix) - 1, "NETDATA_HOST_PREFIX=%s", netdata_configured_host_prefix); - char *s = getenv("NETDATA_LOG_SEVERITY_LEVEL"); + char *s; + + s = getenv("NETDATA_LOG_METHOD"); + snprintfz(env_netdata_log_method, sizeof(env_netdata_log_method) - 1, "NETDATA_LOG_METHOD=%s", nd_log_method_for_external_plugins(s)); + + s = getenv("NETDATA_LOG_FORMAT"); + if (s) + snprintfz(env_netdata_log_format, sizeof(env_netdata_log_format) - 1, "NETDATA_LOG_FORMAT=%s", s); + + s = getenv("NETDATA_LOG_LEVEL"); if (s) - snprintfz(environment_variable3, sizeof(environment_variable3) - 1, "NETDATA_LOG_SEVERITY_LEVEL=%s", s); + snprintfz(env_netdata_log_level, sizeof(env_netdata_log_level) - 1, "NETDATA_LOG_LEVEL=%s", s); // ------------------------------------------------------------------------ @@ -686,8 +699,6 @@ int main(int argc, char **argv) { if(argc != 3) usage(); - - log_set_global_severity_for_external_plugins(); int arg = 1; int helper = 1; diff --git a/collectors/cgroups.plugin/cgroup-top.c b/collectors/cgroups.plugin/cgroup-top.c new file mode 100644 index 000000000..8d44d3b56 --- /dev/null +++ b/collectors/cgroups.plugin/cgroup-top.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "cgroup-internals.h" + +struct cgroup_netdev_link { + size_t read_slot; + NETDATA_DOUBLE received[2]; + NETDATA_DOUBLE sent[2]; +}; + +static DICTIONARY *cgroup_netdev_link_dict = NULL; + +void cgroup_netdev_link_init(void) { + cgroup_netdev_link_dict = dictionary_create_advanced(DICT_OPTION_FIXED_SIZE|DICT_OPTION_DONT_OVERWRITE_VALUE, NULL, sizeof(struct cgroup_netdev_link)); +} + +const DICTIONARY_ITEM *cgroup_netdev_get(struct cgroup *cg) { + if(!cg->cgroup_netdev_link) { + struct cgroup_netdev_link t = { + .read_slot = 0, + .received = {NAN, NAN}, + .sent = {NAN, NAN}, + }; + + cg->cgroup_netdev_link = + dictionary_set_and_acquire_item(cgroup_netdev_link_dict, cg->id, &t, sizeof(struct cgroup_netdev_link)); + } + + return dictionary_acquired_item_dup(cgroup_netdev_link_dict, cg->cgroup_netdev_link); +} + +void cgroup_netdev_delete(struct cgroup *cg) { + if(cg->cgroup_netdev_link) { + dictionary_acquired_item_release(cgroup_netdev_link_dict, cg->cgroup_netdev_link); + dictionary_del(cgroup_netdev_link_dict, cg->id); + dictionary_garbage_collect(cgroup_netdev_link_dict); + cg->cgroup_netdev_link = NULL; + } +} + +void cgroup_netdev_release(const DICTIONARY_ITEM *link) { + if(link) + dictionary_acquired_item_release(cgroup_netdev_link_dict, link); +} + +const void *cgroup_netdev_dup(const DICTIONARY_ITEM *link) { + return dictionary_acquired_item_dup(cgroup_netdev_link_dict, link); +} + +void cgroup_netdev_reset_all(void) { + struct cgroup_netdev_link *t; + dfe_start_read(cgroup_netdev_link_dict, t) { + if(t->read_slot >= 1) { + t->read_slot = 0; + t->received[1] = NAN; + t->sent[1] = NAN; + } + else { + t->read_slot = 1; + t->received[0] = NAN; + t->sent[0] = NAN; + } + } + dfe_done(t); +} + +void cgroup_netdev_add_bandwidth(const DICTIONARY_ITEM *link, NETDATA_DOUBLE received, NETDATA_DOUBLE sent) { + if(!link) + return; + + struct cgroup_netdev_link *t = dictionary_acquired_item_value(link); + + size_t slot = (t->read_slot) ? 0 : 1; + + if(isnan(t->received[slot])) + t->received[slot] = received; + else + t->received[slot] += received; + + if(isnan(t->sent[slot])) + t->sent[slot] = sent; + else + t->sent[slot] += sent; +} + +void cgroup_netdev_get_bandwidth(struct cgroup *cg, NETDATA_DOUBLE *received, NETDATA_DOUBLE *sent) { + if(!cg->cgroup_netdev_link) { + *received = NAN; + *sent = NAN; + return; + } + + struct cgroup_netdev_link *t = dictionary_acquired_item_value(cg->cgroup_netdev_link); + + size_t slot = (t->read_slot) ? 1 : 0; + + *received = t->received[slot]; + *sent = t->sent[slot]; +} + +int cgroup_function_cgroup_top(BUFFER *wb, int timeout __maybe_unused, const char *function __maybe_unused, + void *collector_data __maybe_unused, + rrd_function_result_callback_t result_cb, void *result_cb_data, + rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data, + rrd_function_register_canceller_cb_t register_canceller_cb __maybe_unused, + void *register_canceller_cb_data __maybe_unused) { + + buffer_flush(wb); + wb->content_type = CT_APPLICATION_JSON; + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); + + buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(localhost)); + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + buffer_json_member_add_string(wb, "type", "table"); + buffer_json_member_add_time_t(wb, "update_every", 1); + buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_CGTOP_HELP); + buffer_json_member_add_array(wb, "data"); + + double max_pids = 0.0; + double max_cpu = 0.0; + double max_ram = 0.0; + double max_disk_io_read = 0.0; + double max_disk_io_written = 0.0; + double max_net_received = 0.0; + double max_net_sent = 0.0; + + RRDDIM *rd = NULL; + + uv_mutex_lock(&cgroup_root_mutex); + + for(struct cgroup *cg = cgroup_root; cg ; cg = cg->next) { + if(unlikely(!cg->enabled || cg->pending_renames || !cg->function_ready || is_cgroup_systemd_service(cg))) + continue; + + buffer_json_add_array_item_array(wb); + + buffer_json_add_array_item_string(wb, cg->name); // Name + + if(k8s_is_kubepod(cg)) + buffer_json_add_array_item_string(wb, "k8s"); // Kind + else + buffer_json_add_array_item_string(wb, "cgroup"); // Kind + + double pids_current = rrddim_get_last_stored_value(cg->st_pids_rd_pids_current, &max_pids, 1.0); + + double cpu = NAN; + if (cg->st_cpu_rd_user && cg->st_cpu_rd_system) { + cpu = cg->st_cpu_rd_user->collector.last_stored_value + cg->st_cpu_rd_system->collector.last_stored_value; + max_cpu = MAX(max_cpu, cpu); + } + + double ram = rrddim_get_last_stored_value(cg->st_mem_rd_ram, &max_ram, 1.0); + + rd = cg->st_throttle_io_rd_read ? cg->st_throttle_io_rd_read : cg->st_io_rd_read; + double disk_io_read = rrddim_get_last_stored_value(rd, &max_disk_io_read, 1024.0); + rd = cg->st_throttle_io_rd_written ? cg->st_throttle_io_rd_written : cg->st_io_rd_written; + double disk_io_written = rrddim_get_last_stored_value(rd, &max_disk_io_written, 1024.0); + + NETDATA_DOUBLE received, sent; + cgroup_netdev_get_bandwidth(cg, &received, &sent); + if (!isnan(received) && !isnan(sent)) { + received /= 1000.0; + sent /= 1000.0; + max_net_received = MAX(max_net_received, received); + max_net_sent = MAX(max_net_sent, sent); + } + + buffer_json_add_array_item_double(wb, pids_current); + buffer_json_add_array_item_double(wb, cpu); + buffer_json_add_array_item_double(wb, ram); + buffer_json_add_array_item_double(wb, disk_io_read); + buffer_json_add_array_item_double(wb, disk_io_written); + buffer_json_add_array_item_double(wb, received); + buffer_json_add_array_item_double(wb, sent); + + buffer_json_array_close(wb); + } + + uv_mutex_unlock(&cgroup_root_mutex); + + buffer_json_array_close(wb); // data + buffer_json_member_add_object(wb, "columns"); + { + size_t field_id = 0; + + // Node + buffer_rrdf_table_add_field(wb, field_id++, "Name", "CGROUP Name", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_FULL_WIDTH, + NULL); + + // Kind + buffer_rrdf_table_add_field(wb, field_id++, "Kind", "CGROUP Kind", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // PIDs + buffer_rrdf_table_add_field(wb, field_id++, "PIDs", "Number of Processes Currently in the CGROUP", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "pids", max_pids, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // CPU + buffer_rrdf_table_add_field(wb, field_id++, "CPU", "CPU Usage", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "%", max_cpu, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // RAM + buffer_rrdf_table_add_field(wb, field_id++, "RAM", "RAM Usage", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "MiB", max_ram, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // Disk IO Reads + buffer_rrdf_table_add_field(wb, field_id++, "Reads", "Disk Read Data", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "MiB", max_disk_io_read, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // Disk IO Writes + buffer_rrdf_table_add_field(wb, field_id++, "Writes", "Disk Written Data", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "MiB", max_disk_io_written, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // Network Received + buffer_rrdf_table_add_field(wb, field_id++, "Received", "Network Traffic Received", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "Mbps", max_net_received, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // Network Sent + buffer_rrdf_table_add_field(wb, field_id++, "Sent", "Network Traffic Sent ", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "Mbps", max_net_sent, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + } + buffer_json_object_close(wb); // columns + buffer_json_member_add_string(wb, "default_sort_column", "CPU"); + + buffer_json_member_add_object(wb, "charts"); + { + buffer_json_member_add_object(wb, "CPU"); + { + buffer_json_member_add_string(wb, "name", "CPU"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "CPU"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "Memory"); + { + buffer_json_member_add_string(wb, "name", "Memory"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "RAM"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "Traffic"); + { + buffer_json_member_add_string(wb, "name", "Traffic"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Received"); + buffer_json_add_array_item_string(wb, "Sent"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // charts + + buffer_json_member_add_array(wb, "default_charts"); + { + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "CPU"); + buffer_json_add_array_item_string(wb, "Name"); + buffer_json_array_close(wb); + + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "Memory"); + buffer_json_add_array_item_string(wb, "Name"); + buffer_json_array_close(wb); + } + buffer_json_array_close(wb); + + buffer_json_member_add_object(wb, "group_by"); + { + buffer_json_member_add_object(wb, "Kind"); + { + buffer_json_member_add_string(wb, "name", "Kind"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Kind"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // group_by + + buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1); + buffer_json_finalize(wb); + + int response = HTTP_RESP_OK; + if(is_cancelled_cb && is_cancelled_cb(is_cancelled_cb_data)) { + buffer_flush(wb); + response = HTTP_RESP_CLIENT_CLOSED_REQUEST; + } + + if(result_cb) + result_cb(wb, response, result_cb_data); + + return response; +} + +int cgroup_function_systemd_top(BUFFER *wb, int timeout __maybe_unused, const char *function __maybe_unused, + void *collector_data __maybe_unused, + rrd_function_result_callback_t result_cb, void *result_cb_data, + rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data, + rrd_function_register_canceller_cb_t register_canceller_cb __maybe_unused, + void *register_canceller_cb_data __maybe_unused) { + + buffer_flush(wb); + wb->content_type = CT_APPLICATION_JSON; + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); + + buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(localhost)); + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + buffer_json_member_add_string(wb, "type", "table"); + buffer_json_member_add_time_t(wb, "update_every", 1); + buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_CGTOP_HELP); + buffer_json_member_add_array(wb, "data"); + + double max_pids = 0.0; + double max_cpu = 0.0; + double max_ram = 0.0; + double max_disk_io_read = 0.0; + double max_disk_io_written = 0.0; + + RRDDIM *rd = NULL; + + uv_mutex_lock(&cgroup_root_mutex); + + for(struct cgroup *cg = cgroup_root; cg ; cg = cg->next) { + if(unlikely(!cg->enabled || cg->pending_renames || !cg->function_ready || !is_cgroup_systemd_service(cg))) + continue; + + buffer_json_add_array_item_array(wb); + + buffer_json_add_array_item_string(wb, cg->name); + + double pids_current = rrddim_get_last_stored_value(cg->st_pids_rd_pids_current, &max_pids, 1.0); + + double cpu = NAN; + if (cg->st_cpu_rd_user && cg->st_cpu_rd_system) { + cpu = cg->st_cpu_rd_user->collector.last_stored_value + cg->st_cpu_rd_system->collector.last_stored_value; + max_cpu = MAX(max_cpu, cpu); + } + + double ram = rrddim_get_last_stored_value(cg->st_mem_rd_ram, &max_ram, 1.0); + + rd = cg->st_throttle_io_rd_read ? cg->st_throttle_io_rd_read : cg->st_io_rd_read; + double disk_io_read = rrddim_get_last_stored_value(rd, &max_disk_io_read, 1024.0); + rd = cg->st_throttle_io_rd_written ? cg->st_throttle_io_rd_written : cg->st_io_rd_written; + double disk_io_written = rrddim_get_last_stored_value(rd, &max_disk_io_written, 1024.0); + + buffer_json_add_array_item_double(wb, pids_current); + buffer_json_add_array_item_double(wb, cpu); + buffer_json_add_array_item_double(wb, ram); + buffer_json_add_array_item_double(wb, disk_io_read); + buffer_json_add_array_item_double(wb, disk_io_written); + + buffer_json_array_close(wb); + } + + uv_mutex_unlock(&cgroup_root_mutex); + + buffer_json_array_close(wb); // data + buffer_json_member_add_object(wb, "columns"); + { + size_t field_id = 0; + + // Node + buffer_rrdf_table_add_field(wb, field_id++, "Name", "Systemd Service Name", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_FULL_WIDTH, + NULL); + + // PIDs + buffer_rrdf_table_add_field(wb, field_id++, "PIDs", "Number of Processes Currently in the CGROUP", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "pids", max_pids, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // CPU + buffer_rrdf_table_add_field(wb, field_id++, "CPU", "CPU Usage", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "%", max_cpu, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // RAM + buffer_rrdf_table_add_field(wb, field_id++, "RAM", "RAM Usage", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "MiB", max_ram, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // Disk IO Reads + buffer_rrdf_table_add_field(wb, field_id++, "Reads", "Disk Read Data", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "MiB", max_disk_io_read, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // Disk IO Writes + buffer_rrdf_table_add_field(wb, field_id++, "Writes", "Disk Written Data", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "MiB", max_disk_io_written, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + } + + buffer_json_object_close(wb); // columns + buffer_json_member_add_string(wb, "default_sort_column", "CPU"); + + buffer_json_member_add_object(wb, "charts"); + { + buffer_json_member_add_object(wb, "CPU"); + { + buffer_json_member_add_string(wb, "name", "CPU"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "CPU"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "Memory"); + { + buffer_json_member_add_string(wb, "name", "Memory"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "RAM"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // charts + + buffer_json_member_add_array(wb, "default_charts"); + { + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "CPU"); + buffer_json_add_array_item_string(wb, "Name"); + buffer_json_array_close(wb); + + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "Memory"); + buffer_json_add_array_item_string(wb, "Name"); + buffer_json_array_close(wb); + } + buffer_json_array_close(wb); + + buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1); + buffer_json_finalize(wb); + + int response = HTTP_RESP_OK; + if(is_cancelled_cb && is_cancelled_cb(is_cancelled_cb_data)) { + buffer_flush(wb); + response = HTTP_RESP_CLIENT_CLOSED_REQUEST; + } + + if(result_cb) + result_cb(wb, response, result_cb_data); + + return response; +} diff --git a/collectors/cgroups.plugin/integrations/containers.md b/collectors/cgroups.plugin/integrations/containers.md index 6dec9ce2b..6273d1e91 100644 --- a/collectors/cgroups.plugin/integrations/containers.md +++ b/collectors/cgroups.plugin/integrations/containers.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.pl sidebar_label: "Containers" learn_status: "Published" learn_rel_path: "Data Collection/Containers and VMs" +most_popular: True message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -60,8 +61,8 @@ Labels: | Label | Description | |:-----------|:----------------| -| container_name | TBD | -| image | TBD | +| container_name | The container name or group path if name resolution fails. | +| image | Docker/Podman container image name. | Metrics: @@ -99,6 +100,7 @@ Metrics: | cgroup.io_some_pressure_stall_time | time | ms | | cgroup.io_full_pressure | some10, some60, some300 | percentage | | cgroup.io_full_pressure_stall_time | time | ms | +| cgroup.pids_current | pids | pids | ### Per cgroup network device @@ -108,10 +110,11 @@ Labels: | Label | Description | |:-----------|:----------------| -| container_name | TBD | -| image | TBD | -| device | TBD | -| interface_type | TBD | +| container_name | The container name or group path if name resolution fails. | +| image | Docker/Podman container image name. | +| device | The name of the host network interface linked to the container's network interface. | +| container_device | Container network interface name. | +| interface_type | Network interface type. Always "virtual" for the containers. | Metrics: diff --git a/collectors/cgroups.plugin/integrations/kubernetes_containers.md b/collectors/cgroups.plugin/integrations/kubernetes_containers.md index 4bfa55c6d..9be32a12a 100644 --- a/collectors/cgroups.plugin/integrations/kubernetes_containers.md +++ b/collectors/cgroups.plugin/integrations/kubernetes_containers.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.pl sidebar_label: "Kubernetes Containers" learn_status: "Published" learn_rel_path: "Data Collection/Kubernetes" +most_popular: True message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -54,23 +55,21 @@ The scope defines the instance that the metric belongs to. An instance is unique ### Per k8s cgroup - +These metrics refer to the Pod container. Labels: | Label | Description | |:-----------|:----------------| -| k8s_namespace | TBD | -| k8s_pod_name | TBD | -| k8s_pod_uid | TBD | -| k8s_controller_kind | TBD | -| k8s_controller_name | TBD | -| k8s_node_name | TBD | -| k8s_container_name | TBD | -| k8s_container_id | TBD | -| k8s_kind | TBD | -| k8s_qos_class | TBD | -| k8s_cluster_id | TBD | +| k8s_node_name | Node name. The value of _pod.spec.nodeName_. | +| k8s_namespace | Namespace name. The value of _pod.metadata.namespace_. | +| k8s_controller_kind | Controller kind (ReplicaSet, DaemonSet, StatefulSet, Job, etc.). The value of _pod.OwnerReferences.Controller.Kind_. | +| k8s_controller_name | Controller name.The value of _pod.OwnerReferences.Controller.Name_. | +| k8s_pod_name | Pod name. The value of _pod.metadata.name_. | +| k8s_container_name | Container name. The value of _pod.spec.containers.name_. | +| k8s_kind | Instance kind: "pod" or "container". | +| k8s_qos_class | QoS class (guaranteed, burstable, besteffort). | +| k8s_cluster_id | Cluster ID. The value of kube-system namespace _namespace.metadata.uid_. | Metrics: @@ -108,28 +107,28 @@ Metrics: | k8s.cgroup.io_some_pressure_stall_time | time | ms | | k8s.cgroup.io_full_pressure | some10, some60, some300 | percentage | | k8s.cgroup.io_full_pressure_stall_time | time | ms | +| k8s.cgroup.pids_current | pids | pids | ### Per k8s cgroup network device - +These metrics refer to the Pod container network interface. Labels: | Label | Description | |:-----------|:----------------| -| device | TBD | -| interface_type | TBD | -| k8s_namespace | TBD | -| k8s_pod_name | TBD | -| k8s_pod_uid | TBD | -| k8s_controller_kind | TBD | -| k8s_controller_name | TBD | -| k8s_node_name | TBD | -| k8s_container_name | TBD | -| k8s_container_id | TBD | -| k8s_kind | TBD | -| k8s_qos_class | TBD | -| k8s_cluster_id | TBD | +| device | The name of the host network interface linked to the container's network interface. | +| container_device | Container network interface name. | +| interface_type | Network interface type. Always "virtual" for the containers. | +| k8s_node_name | Node name. The value of _pod.spec.nodeName_. | +| k8s_namespace | Namespace name. The value of _pod.metadata.namespace_. | +| k8s_controller_kind | Controller kind (ReplicaSet, DaemonSet, StatefulSet, Job, etc.). The value of _pod.OwnerReferences.Controller.Kind_. | +| k8s_controller_name | Controller name.The value of _pod.OwnerReferences.Controller.Name_. | +| k8s_pod_name | Pod name. The value of _pod.metadata.name_. | +| k8s_container_name | Container name. The value of _pod.spec.containers.name_. | +| k8s_kind | Instance kind: "pod" or "container". | +| k8s_qos_class | QoS class (guaranteed, burstable, besteffort). | +| k8s_cluster_id | Cluster ID. The value of kube-system namespace _namespace.metadata.uid_. | Metrics: diff --git a/collectors/cgroups.plugin/integrations/libvirt_containers.md b/collectors/cgroups.plugin/integrations/libvirt_containers.md index af0310b10..fed454698 100644 --- a/collectors/cgroups.plugin/integrations/libvirt_containers.md +++ b/collectors/cgroups.plugin/integrations/libvirt_containers.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.pl sidebar_label: "Libvirt Containers" learn_status: "Published" learn_rel_path: "Data Collection/Containers and VMs" +most_popular: True message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -60,8 +61,8 @@ Labels: | Label | Description | |:-----------|:----------------| -| container_name | TBD | -| image | TBD | +| container_name | The container name or group path if name resolution fails. | +| image | Docker/Podman container image name. | Metrics: @@ -99,6 +100,7 @@ Metrics: | cgroup.io_some_pressure_stall_time | time | ms | | cgroup.io_full_pressure | some10, some60, some300 | percentage | | cgroup.io_full_pressure_stall_time | time | ms | +| cgroup.pids_current | pids | pids | ### Per cgroup network device @@ -108,10 +110,11 @@ Labels: | Label | Description | |:-----------|:----------------| -| container_name | TBD | -| image | TBD | -| device | TBD | -| interface_type | TBD | +| container_name | The container name or group path if name resolution fails. | +| image | Docker/Podman container image name. | +| device | The name of the host network interface linked to the container's network interface. | +| container_device | Container network interface name. | +| interface_type | Network interface type. Always "virtual" for the containers. | Metrics: diff --git a/collectors/cgroups.plugin/integrations/lxc_containers.md b/collectors/cgroups.plugin/integrations/lxc_containers.md index becc9ae17..3f05ffd5f 100644 --- a/collectors/cgroups.plugin/integrations/lxc_containers.md +++ b/collectors/cgroups.plugin/integrations/lxc_containers.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.pl sidebar_label: "LXC Containers" learn_status: "Published" learn_rel_path: "Data Collection/Containers and VMs" +most_popular: True message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -60,8 +61,8 @@ Labels: | Label | Description | |:-----------|:----------------| -| container_name | TBD | -| image | TBD | +| container_name | The container name or group path if name resolution fails. | +| image | Docker/Podman container image name. | Metrics: @@ -99,6 +100,7 @@ Metrics: | cgroup.io_some_pressure_stall_time | time | ms | | cgroup.io_full_pressure | some10, some60, some300 | percentage | | cgroup.io_full_pressure_stall_time | time | ms | +| cgroup.pids_current | pids | pids | ### Per cgroup network device @@ -108,10 +110,11 @@ Labels: | Label | Description | |:-----------|:----------------| -| container_name | TBD | -| image | TBD | -| device | TBD | -| interface_type | TBD | +| container_name | The container name or group path if name resolution fails. | +| image | Docker/Podman container image name. | +| device | The name of the host network interface linked to the container's network interface. | +| container_device | Container network interface name. | +| interface_type | Network interface type. Always "virtual" for the containers. | Metrics: diff --git a/collectors/cgroups.plugin/integrations/ovirt_containers.md b/collectors/cgroups.plugin/integrations/ovirt_containers.md index c9f6d74b7..5771aeea1 100644 --- a/collectors/cgroups.plugin/integrations/ovirt_containers.md +++ b/collectors/cgroups.plugin/integrations/ovirt_containers.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.pl sidebar_label: "oVirt Containers" learn_status: "Published" learn_rel_path: "Data Collection/Containers and VMs" +most_popular: True message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -60,8 +61,8 @@ Labels: | Label | Description | |:-----------|:----------------| -| container_name | TBD | -| image | TBD | +| container_name | The container name or group path if name resolution fails. | +| image | Docker/Podman container image name. | Metrics: @@ -99,6 +100,7 @@ Metrics: | cgroup.io_some_pressure_stall_time | time | ms | | cgroup.io_full_pressure | some10, some60, some300 | percentage | | cgroup.io_full_pressure_stall_time | time | ms | +| cgroup.pids_current | pids | pids | ### Per cgroup network device @@ -108,10 +110,11 @@ Labels: | Label | Description | |:-----------|:----------------| -| container_name | TBD | -| image | TBD | -| device | TBD | -| interface_type | TBD | +| container_name | The container name or group path if name resolution fails. | +| image | Docker/Podman container image name. | +| device | The name of the host network interface linked to the container's network interface. | +| container_device | Container network interface name. | +| interface_type | Network interface type. Always "virtual" for the containers. | Metrics: diff --git a/collectors/cgroups.plugin/integrations/proxmox_containers.md b/collectors/cgroups.plugin/integrations/proxmox_containers.md index 2caad5eac..1804a40ca 100644 --- a/collectors/cgroups.plugin/integrations/proxmox_containers.md +++ b/collectors/cgroups.plugin/integrations/proxmox_containers.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.pl sidebar_label: "Proxmox Containers" learn_status: "Published" learn_rel_path: "Data Collection/Containers and VMs" +most_popular: True message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -60,8 +61,8 @@ Labels: | Label | Description | |:-----------|:----------------| -| container_name | TBD | -| image | TBD | +| container_name | The container name or group path if name resolution fails. | +| image | Docker/Podman container image name. | Metrics: @@ -99,6 +100,7 @@ Metrics: | cgroup.io_some_pressure_stall_time | time | ms | | cgroup.io_full_pressure | some10, some60, some300 | percentage | | cgroup.io_full_pressure_stall_time | time | ms | +| cgroup.pids_current | pids | pids | ### Per cgroup network device @@ -108,10 +110,11 @@ Labels: | Label | Description | |:-----------|:----------------| -| container_name | TBD | -| image | TBD | -| device | TBD | -| interface_type | TBD | +| container_name | The container name or group path if name resolution fails. | +| image | Docker/Podman container image name. | +| device | The name of the host network interface linked to the container's network interface. | +| container_device | Container network interface name. | +| interface_type | Network interface type. Always "virtual" for the containers. | Metrics: diff --git a/collectors/cgroups.plugin/integrations/systemd_services.md b/collectors/cgroups.plugin/integrations/systemd_services.md index b71060050..0ce906366 100644 --- a/collectors/cgroups.plugin/integrations/systemd_services.md +++ b/collectors/cgroups.plugin/integrations/systemd_services.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.pl sidebar_label: "Systemd Services" learn_status: "Published" learn_rel_path: "Data Collection/Systemd" +most_popular: True message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -79,6 +80,7 @@ Metrics: | systemd.service.disk.throttle.iops | read, write | operations/s | | systemd.service.disk.queued_iops | read, write | operations/s | | systemd.service.disk.merged_iops | read, write | operations/s | +| systemd.service.pids.current | pids | pids | diff --git a/collectors/cgroups.plugin/integrations/virtual_machines.md b/collectors/cgroups.plugin/integrations/virtual_machines.md index 3bb79c128..6a64923c4 100644 --- a/collectors/cgroups.plugin/integrations/virtual_machines.md +++ b/collectors/cgroups.plugin/integrations/virtual_machines.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.pl sidebar_label: "Virtual Machines" learn_status: "Published" learn_rel_path: "Data Collection/Containers and VMs" +most_popular: True message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -60,8 +61,8 @@ Labels: | Label | Description | |:-----------|:----------------| -| container_name | TBD | -| image | TBD | +| container_name | The container name or group path if name resolution fails. | +| image | Docker/Podman container image name. | Metrics: @@ -99,6 +100,7 @@ Metrics: | cgroup.io_some_pressure_stall_time | time | ms | | cgroup.io_full_pressure | some10, some60, some300 | percentage | | cgroup.io_full_pressure_stall_time | time | ms | +| cgroup.pids_current | pids | pids | ### Per cgroup network device @@ -108,10 +110,11 @@ Labels: | Label | Description | |:-----------|:----------------| -| container_name | TBD | -| image | TBD | -| device | TBD | -| interface_type | TBD | +| container_name | The container name or group path if name resolution fails. | +| image | Docker/Podman container image name. | +| device | The name of the host network interface linked to the container's network interface. | +| container_device | Container network interface name. | +| interface_type | Network interface type. Always "virtual" for the containers. | Metrics: diff --git a/collectors/cgroups.plugin/metadata.yaml b/collectors/cgroups.plugin/metadata.yaml index ec6228ea2..a1abbb5a9 100644 --- a/collectors/cgroups.plugin/metadata.yaml +++ b/collectors/cgroups.plugin/metadata.yaml @@ -86,9 +86,9 @@ modules: description: "" labels: - name: container_name - description: TBD + description: The container name or group path if name resolution fails. - name: image - description: TBD + description: Docker/Podman container image name. metrics: - name: cgroup.cpu_limit description: CPU Usage within the limits @@ -310,17 +310,25 @@ modules: chart_type: line dimensions: - name: time + - name: cgroup.pids_current + description: Number of processes + unit: "pids" + chart_type: line + dimensions: + - name: pids - name: cgroup network device description: "" labels: - name: container_name - description: TBD + description: The container name or group path if name resolution fails. - name: image - description: TBD + description: Docker/Podman container image name. - name: device - description: TBD + description: "The name of the host network interface linked to the container's network interface." + - name: container_device + description: Container network interface name. - name: interface_type - description: TBD + description: 'Network interface type. Always "virtual" for the containers.' metrics: - name: cgroup.net_net description: Bandwidth @@ -445,30 +453,26 @@ modules: availability: [] scopes: - name: k8s cgroup - description: "" + description: These metrics refer to the Pod container. labels: + - name: k8s_node_name + description: 'Node name. The value of _pod.spec.nodeName_.' - name: k8s_namespace - description: TBD - - name: k8s_pod_name - description: TBD - - name: k8s_pod_uid - description: TBD + description: 'Namespace name. The value of _pod.metadata.namespace_.' - name: k8s_controller_kind - description: TBD + description: 'Controller kind (ReplicaSet, DaemonSet, StatefulSet, Job, etc.). The value of _pod.OwnerReferences.Controller.Kind_.' - name: k8s_controller_name - description: TBD - - name: k8s_node_name - description: TBD + description: 'Controller name.The value of _pod.OwnerReferences.Controller.Name_.' + - name: k8s_pod_name + description: 'Pod name. The value of _pod.metadata.name_.' - name: k8s_container_name - description: TBD - - name: k8s_container_id - description: TBD + description: 'Container name. The value of _pod.spec.containers.name_.' - name: k8s_kind - description: TBD + description: 'Instance kind: "pod" or "container".' - name: k8s_qos_class - description: TBD + description: 'QoS class (guaranteed, burstable, besteffort).' - name: k8s_cluster_id - description: TBD + description: 'Cluster ID. The value of kube-system namespace _namespace.metadata.uid_.' metrics: - name: k8s.cgroup.cpu_limit description: CPU Usage within the limits @@ -690,35 +694,39 @@ modules: chart_type: line dimensions: - name: time + - name: k8s.cgroup.pids_current + description: Number of processes + unit: "pids" + chart_type: line + dimensions: + - name: pids - name: k8s cgroup network device - description: "" + description: These metrics refer to the Pod container network interface. labels: - name: device - description: TBD + description: "The name of the host network interface linked to the container's network interface." + - name: container_device + description: Container network interface name. - name: interface_type - description: TBD + description: 'Network interface type. Always "virtual" for the containers.' + - name: k8s_node_name + description: 'Node name. The value of _pod.spec.nodeName_.' - name: k8s_namespace - description: TBD - - name: k8s_pod_name - description: TBD - - name: k8s_pod_uid - description: TBD + description: 'Namespace name. The value of _pod.metadata.namespace_.' - name: k8s_controller_kind - description: TBD + description: 'Controller kind (ReplicaSet, DaemonSet, StatefulSet, Job, etc.). The value of _pod.OwnerReferences.Controller.Kind_.' - name: k8s_controller_name - description: TBD - - name: k8s_node_name - description: TBD + description: 'Controller name.The value of _pod.OwnerReferences.Controller.Name_.' + - name: k8s_pod_name + description: 'Pod name. The value of _pod.metadata.name_.' - name: k8s_container_name - description: TBD - - name: k8s_container_id - description: TBD + description: 'Container name. The value of _pod.spec.containers.name_.' - name: k8s_kind - description: TBD + description: 'Instance kind: "pod" or "container".' - name: k8s_qos_class - description: TBD + description: 'QoS class (guaranteed, burstable, besteffort).' - name: k8s_cluster_id - description: TBD + description: 'Cluster ID. The value of kube-system namespace _namespace.metadata.uid_.' metrics: - name: k8s.cgroup.net_net description: Bandwidth @@ -919,6 +927,12 @@ modules: dimensions: - name: read - name: write + - name: systemd.service.pids.current + description: Systemd Services Number of Processes + unit: pids + chart_type: line + dimensions: + - name: pids - <<: *module meta: <<: *meta diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index 6196e7603..705edf6f7 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -1,149 +1,98 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "sys_fs_cgroup.h" - -#define PLUGIN_CGROUPS_NAME "cgroups.plugin" -#define PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME "systemd" -#define PLUGIN_CGROUPS_MODULE_CGROUPS_NAME "/sys/fs/cgroup" - -#ifdef NETDATA_INTERNAL_CHECKS -#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_DEFAULT -#else -#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_NO_ERROR_ON_FILE_IO -#endif +#include "cgroup-internals.h" // main cgroups thread worker jobs #define WORKER_CGROUPS_LOCK 0 #define WORKER_CGROUPS_READ 1 #define WORKER_CGROUPS_CHART 2 -// discovery cgroup thread worker jobs -#define WORKER_DISCOVERY_INIT 0 -#define WORKER_DISCOVERY_FIND 1 -#define WORKER_DISCOVERY_PROCESS 2 -#define WORKER_DISCOVERY_PROCESS_RENAME 3 -#define WORKER_DISCOVERY_PROCESS_NETWORK 4 -#define WORKER_DISCOVERY_PROCESS_FIRST_TIME 5 -#define WORKER_DISCOVERY_UPDATE 6 -#define WORKER_DISCOVERY_CLEANUP 7 -#define WORKER_DISCOVERY_COPY 8 -#define WORKER_DISCOVERY_SHARE 9 -#define WORKER_DISCOVERY_LOCK 10 - -#if WORKER_UTILIZATION_MAX_JOB_TYPES < 11 -#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 11 -#endif - // ---------------------------------------------------------------------------- // cgroup globals +unsigned long long host_ram_total = 0; +int is_inside_k8s = 0; +long system_page_size = 4096; // system will be queried via sysconf() in configuration() +int cgroup_enable_cpuacct_stat = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO; +int cgroup_enable_cpuacct_cpu_throttling = CONFIG_BOOLEAN_YES; +int cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_NO; +int cgroup_enable_memory = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_detailed_memory = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_swap = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_blkio_io = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_blkio_ops = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_pressure_irq_some = CONFIG_BOOLEAN_NO; +int cgroup_enable_pressure_irq_full = CONFIG_BOOLEAN_AUTO; +int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES; +int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO; +int cgroup_used_memory = CONFIG_BOOLEAN_YES; +int cgroup_use_unified_cgroups = CONFIG_BOOLEAN_NO; +int cgroup_unified_exist = CONFIG_BOOLEAN_AUTO; +int cgroup_search_in_devices = 1; +int cgroup_check_for_new_every = 10; +int cgroup_update_every = 1; +int cgroup_containers_chart_priority = NETDATA_CHART_PRIO_CGROUPS_CONTAINERS; +int cgroup_recheck_zero_blkio_every_iterations = 10; +int cgroup_recheck_zero_mem_failcnt_every_iterations = 10; +int cgroup_recheck_zero_mem_detailed_every_iterations = 10; +char *cgroup_cpuacct_base = NULL; +char *cgroup_cpuset_base = NULL; +char *cgroup_blkio_base = NULL; +char *cgroup_memory_base = NULL; +char *cgroup_devices_base = NULL; +char *cgroup_pids_base = NULL; +char *cgroup_unified_base = NULL; +int cgroup_root_count = 0; +int cgroup_root_max = 1000; +int cgroup_max_depth = 0; +SIMPLE_PATTERN *enabled_cgroup_paths = NULL; +SIMPLE_PATTERN *enabled_cgroup_names = NULL; +SIMPLE_PATTERN *search_cgroup_paths = NULL; +SIMPLE_PATTERN *enabled_cgroup_renames = NULL; +SIMPLE_PATTERN *systemd_services_cgroups = NULL; +SIMPLE_PATTERN *entrypoint_parent_process_comm = NULL; +char *cgroups_network_interface_script = NULL; +int cgroups_check = 0; +uint32_t Read_hash = 0; +uint32_t Write_hash = 0; +uint32_t user_hash = 0; +uint32_t system_hash = 0; +uint32_t user_usec_hash = 0; +uint32_t system_usec_hash = 0; +uint32_t nr_periods_hash = 0; +uint32_t nr_throttled_hash = 0; +uint32_t throttled_time_hash = 0; +uint32_t throttled_usec_hash = 0; -static char cgroup_chart_id_prefix[] = "cgroup_"; -static char services_chart_id_prefix[] = "systemd_"; - -static int is_inside_k8s = 0; - -static long system_page_size = 4096; // system will be queried via sysconf() in configuration() - -static int cgroup_enable_cpuacct_stat = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_cpuacct_cpu_throttling = CONFIG_BOOLEAN_YES; -static int cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_NO; -static int cgroup_enable_memory = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_detailed_memory = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_swap = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_blkio_io = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_blkio_ops = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO; -static int cgroup_enable_pressure_irq_some = CONFIG_BOOLEAN_NO; -static int cgroup_enable_pressure_irq_full = CONFIG_BOOLEAN_AUTO; - -static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES; -static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO; -static int cgroup_used_memory = CONFIG_BOOLEAN_YES; - -static int cgroup_use_unified_cgroups = CONFIG_BOOLEAN_NO; -static int cgroup_unified_exist = CONFIG_BOOLEAN_AUTO; - -static int cgroup_search_in_devices = 1; - -static int cgroup_check_for_new_every = 10; -static int cgroup_update_every = 1; -static int cgroup_containers_chart_priority = NETDATA_CHART_PRIO_CGROUPS_CONTAINERS; - -static int cgroup_recheck_zero_blkio_every_iterations = 10; -static int cgroup_recheck_zero_mem_failcnt_every_iterations = 10; -static int cgroup_recheck_zero_mem_detailed_every_iterations = 10; - -static char *cgroup_cpuacct_base = NULL; -static char *cgroup_cpuset_base = NULL; -static char *cgroup_blkio_base = NULL; -static char *cgroup_memory_base = NULL; -static char *cgroup_devices_base = NULL; -static char *cgroup_unified_base = NULL; - -static int cgroup_root_count = 0; -static int cgroup_root_max = 1000; -static int cgroup_max_depth = 0; - -static SIMPLE_PATTERN *enabled_cgroup_paths = NULL; -static SIMPLE_PATTERN *enabled_cgroup_names = NULL; -static SIMPLE_PATTERN *search_cgroup_paths = NULL; -static SIMPLE_PATTERN *enabled_cgroup_renames = NULL; -static SIMPLE_PATTERN *systemd_services_cgroups = NULL; - -static SIMPLE_PATTERN *entrypoint_parent_process_comm = NULL; - -static char *cgroups_rename_script = NULL; -static char *cgroups_network_interface_script = NULL; - -static int cgroups_check = 0; - -static uint32_t Read_hash = 0; -static uint32_t Write_hash = 0; -static uint32_t user_hash = 0; -static uint32_t system_hash = 0; -static uint32_t user_usec_hash = 0; -static uint32_t system_usec_hash = 0; -static uint32_t nr_periods_hash = 0; -static uint32_t nr_throttled_hash = 0; -static uint32_t throttled_time_hash = 0; -static uint32_t throttled_usec_hash = 0; - -enum cgroups_type { CGROUPS_AUTODETECT_FAIL, CGROUPS_V1, CGROUPS_V2 }; - -enum cgroups_systemd_setting { - SYSTEMD_CGROUP_ERR, - SYSTEMD_CGROUP_LEGACY, - SYSTEMD_CGROUP_HYBRID, - SYSTEMD_CGROUP_UNIFIED -}; - -struct cgroups_systemd_config_setting { - char *name; - enum cgroups_systemd_setting setting; -}; +// *** WARNING *** The fields are not thread safe. Take care of safe usage. +struct cgroup *cgroup_root = NULL; +uv_mutex_t cgroup_root_mutex; -static struct cgroups_systemd_config_setting cgroups_systemd_options[] = { - { .name = "legacy", .setting = SYSTEMD_CGROUP_LEGACY }, - { .name = "hybrid", .setting = SYSTEMD_CGROUP_HYBRID }, - { .name = "unified", .setting = SYSTEMD_CGROUP_UNIFIED }, - { .name = NULL, .setting = SYSTEMD_CGROUP_ERR }, +struct cgroups_systemd_config_setting cgroups_systemd_options[] = { + { .name = "legacy", .setting = SYSTEMD_CGROUP_LEGACY }, + { .name = "hybrid", .setting = SYSTEMD_CGROUP_HYBRID }, + { .name = "unified", .setting = SYSTEMD_CGROUP_UNIFIED }, + { .name = NULL, .setting = SYSTEMD_CGROUP_ERR }, }; // Shared memory with information from detected cgroups netdata_ebpf_cgroup_shm_t shm_cgroup_ebpf = {NULL, NULL}; -static int shm_fd_cgroup_ebpf = -1; +int shm_fd_cgroup_ebpf = -1; sem_t *shm_mutex_cgroup_ebpf = SEM_FAILED; +struct discovery_thread discovery_thread; + + /* on Fed systemd is not in PATH for some reason */ #define SYSTEMD_CMD_RHEL "/usr/lib/systemd/systemd --version" #define SYSTEMD_HIERARCHY_STRING "default-hierarchy=" @@ -362,54 +311,70 @@ void read_cgroup_plugin_configuration() { cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_NO; mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct"); - if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct"); - if(!mi) { + if (!mi) + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct"); + if (!mi) { collector_error("CGROUP: cannot find cpuacct mountinfo. Assuming default: /sys/fs/cgroup/cpuacct"); s = "/sys/fs/cgroup/cpuacct"; - } - else s = mi->mount_point; + } else + s = mi->mount_point; set_cgroup_base_path(filename, s); cgroup_cpuacct_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuacct", filename); mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuset"); - if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuset"); - if(!mi) { + if (!mi) + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuset"); + if (!mi) { collector_error("CGROUP: cannot find cpuset mountinfo. Assuming default: /sys/fs/cgroup/cpuset"); s = "/sys/fs/cgroup/cpuset"; - } - else s = mi->mount_point; + } else + s = mi->mount_point; set_cgroup_base_path(filename, s); cgroup_cpuset_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuset", filename); mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "blkio"); - if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "blkio"); - if(!mi) { + if (!mi) + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "blkio"); + if (!mi) { collector_error("CGROUP: cannot find blkio mountinfo. Assuming default: /sys/fs/cgroup/blkio"); s = "/sys/fs/cgroup/blkio"; - } - else s = mi->mount_point; + } else + s = mi->mount_point; set_cgroup_base_path(filename, s); cgroup_blkio_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/blkio", filename); mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "memory"); - if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "memory"); - if(!mi) { + if (!mi) + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "memory"); + if (!mi) { collector_error("CGROUP: cannot find memory mountinfo. Assuming default: /sys/fs/cgroup/memory"); s = "/sys/fs/cgroup/memory"; - } - else s = mi->mount_point; + } else + s = mi->mount_point; set_cgroup_base_path(filename, s); cgroup_memory_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/memory", filename); mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "devices"); - if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "devices"); - if(!mi) { + if (!mi) + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "devices"); + if (!mi) { collector_error("CGROUP: cannot find devices mountinfo. Assuming default: /sys/fs/cgroup/devices"); s = "/sys/fs/cgroup/devices"; - } - else s = mi->mount_point; + } else + s = mi->mount_point; set_cgroup_base_path(filename, s); cgroup_devices_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/devices", filename); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "pids"); + if (!mi) + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "pids"); + if (!mi) { + collector_error("CGROUP: cannot find pids mountinfo. Assuming default: /sys/fs/cgroup/pids"); + s = "/sys/fs/cgroup/pids"; + } else + s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_pids_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/pids", filename); } else { //cgroup_enable_cpuacct_stat = @@ -429,22 +394,19 @@ void read_cgroup_plugin_configuration() { cgroup_used_memory = CONFIG_BOOLEAN_NO; //unified cgroups use different values //TODO: can there be more than 1 cgroup2 mount point? - mi = mountinfo_find_by_filesystem_super_option(root, "cgroup2", "rw"); //there is no cgroup2 specific super option - for now use 'rw' option - if(mi) - netdata_log_debug(D_CGROUP, "found unified cgroup root using super options, with path: '%s'", mi->mount_point); - if(!mi) { + //there is no cgroup2 specific super option - for now use 'rw' option + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup2", "rw"); + if (!mi) { mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup2", "cgroup"); - if(mi) - netdata_log_debug(D_CGROUP, "found unified cgroup root using mountsource info, with path: '%s'", mi->mount_point); } - if(!mi) { + if (!mi) { collector_error("CGROUP: cannot find cgroup2 mountinfo. Assuming default: /sys/fs/cgroup"); s = "/sys/fs/cgroup"; - } - else s = mi->mount_point; + } else + s = mi->mount_point; + set_cgroup_base_path(filename, s); cgroup_unified_base = config_get("plugin:cgroups", "path to unified cgroups", filename); - netdata_log_debug(D_CGROUP, "using cgroup root: '%s'", cgroup_unified_base); } cgroup_root_max = (int)config_get_number("plugin:cgroups", "max cgroups to allow", cgroup_root_max); @@ -621,395 +583,6 @@ end_init_shm: shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME); } -// ---------------------------------------------------------------------------- -// cgroup objects - -struct blkio { - int updated; - int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - int delay_counter; - - char *filename; - - unsigned long long Read; - unsigned long long Write; -/* - unsigned long long Sync; - unsigned long long Async; - unsigned long long Total; -*/ -}; - -// https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt -struct memory { - ARL_BASE *arl_base; - ARL_ENTRY *arl_dirty; - ARL_ENTRY *arl_swap; - - int updated_detailed; - int updated_usage_in_bytes; - int updated_msw_usage_in_bytes; - int updated_failcnt; - - int enabled_detailed; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - int enabled_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - int enabled_msw_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - int enabled_failcnt; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - - int delay_counter_detailed; - int delay_counter_failcnt; - - char *filename_detailed; - char *filename_usage_in_bytes; - char *filename_msw_usage_in_bytes; - char *filename_failcnt; - - int detailed_has_dirty; - int detailed_has_swap; - - // detailed metrics -/* - unsigned long long cache; - unsigned long long rss; - unsigned long long rss_huge; - unsigned long long mapped_file; - unsigned long long writeback; - unsigned long long dirty; - unsigned long long swap; - unsigned long long pgpgin; - unsigned long long pgpgout; - unsigned long long pgfault; - unsigned long long pgmajfault; - unsigned long long inactive_anon; - unsigned long long active_anon; - unsigned long long inactive_file; - unsigned long long active_file; - unsigned long long unevictable; - unsigned long long hierarchical_memory_limit; -*/ - //unified cgroups metrics - unsigned long long anon; - unsigned long long kernel_stack; - unsigned long long slab; - unsigned long long sock; - unsigned long long shmem; - unsigned long long anon_thp; - //unsigned long long file_writeback; - //unsigned long long file_dirty; - //unsigned long long file; - - unsigned long long total_cache; - unsigned long long total_rss; - unsigned long long total_rss_huge; - unsigned long long total_mapped_file; - unsigned long long total_writeback; - unsigned long long total_dirty; - unsigned long long total_swap; - unsigned long long total_pgpgin; - unsigned long long total_pgpgout; - unsigned long long total_pgfault; - unsigned long long total_pgmajfault; -/* - unsigned long long total_inactive_anon; - unsigned long long total_active_anon; -*/ - - unsigned long long total_inactive_file; - -/* - unsigned long long total_active_file; - unsigned long long total_unevictable; -*/ - - // single file metrics - unsigned long long usage_in_bytes; - unsigned long long msw_usage_in_bytes; - unsigned long long failcnt; -}; - -// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt -struct cpuacct_stat { - int updated; - int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - - char *filename; - - unsigned long long user; // v1, v2(user_usec) - unsigned long long system; // v1, v2(system_usec) -}; - -// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt -struct cpuacct_usage { - int updated; - int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - - char *filename; - - unsigned int cpus; - unsigned long long *cpu_percpu; -}; - -// represents cpuacct/cpu.stat, for v2 'cpuacct_stat' is used for 'user_usec', 'system_usec' -struct cpuacct_cpu_throttling { - int updated; - int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - - char *filename; - - unsigned long long nr_periods; - unsigned long long nr_throttled; - unsigned long long throttled_time; - - unsigned long long nr_throttled_perc; -}; - -// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu#sect-cfs -// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/managing_monitoring_and_updating_the_kernel/using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications_managing-monitoring-and-updating-the-kernel#proc_controlling-distribution-of-cpu-time-for-applications-by-adjusting-cpu-weight_using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications -struct cpuacct_cpu_shares { - int updated; - int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - - char *filename; - - unsigned long long shares; -}; - -struct cgroup_network_interface { - const char *host_device; - const char *container_device; - struct cgroup_network_interface *next; -}; - -enum cgroups_container_orchestrator { - CGROUPS_ORCHESTRATOR_UNSET, - CGROUPS_ORCHESTRATOR_UNKNOWN, - CGROUPS_ORCHESTRATOR_K8S -}; - -// *** WARNING *** The fields are not thread safe. Take care of safe usage. -struct cgroup { - uint32_t options; - - int first_time_seen; // first time seen by the discoverer - int processed; // the discoverer is done processing a cgroup (resolved name, set 'enabled' option) - - char available; // found in the filesystem - char enabled; // enabled in the config - - char pending_renames; - - char *id; - uint32_t hash; - - char *intermediate_id; // TODO: remove it when the renaming script is fixed - - char *chart_id; - uint32_t hash_chart_id; - - // 'cgroup_name' label value. - // By default this is the *id (path), later changed to the resolved name (cgroup-name.sh) or systemd service name. - char *name; - - RRDLABELS *chart_labels; - - int container_orchestrator; - - struct cpuacct_stat cpuacct_stat; - struct cpuacct_usage cpuacct_usage; - struct cpuacct_cpu_throttling cpuacct_cpu_throttling; - struct cpuacct_cpu_shares cpuacct_cpu_shares; - - struct memory memory; - - struct blkio io_service_bytes; // bytes - struct blkio io_serviced; // operations - - struct blkio throttle_io_service_bytes; // bytes - struct blkio throttle_io_serviced; // operations - - struct blkio io_merged; // operations - struct blkio io_queued; // operations - - struct cgroup_network_interface *interfaces; - - struct pressure cpu_pressure; - struct pressure io_pressure; - struct pressure memory_pressure; - struct pressure irq_pressure; - - // per cgroup charts - RRDSET *st_cpu; - RRDSET *st_cpu_limit; - RRDSET *st_cpu_per_core; - RRDSET *st_cpu_nr_throttled; - RRDSET *st_cpu_throttled_time; - RRDSET *st_cpu_shares; - - RRDSET *st_mem; - RRDSET *st_mem_utilization; - RRDSET *st_writeback; - RRDSET *st_mem_activity; - RRDSET *st_pgfaults; - RRDSET *st_mem_usage; - RRDSET *st_mem_usage_limit; - RRDSET *st_mem_failcnt; - - RRDSET *st_io; - RRDSET *st_serviced_ops; - RRDSET *st_throttle_io; - RRDSET *st_throttle_serviced_ops; - RRDSET *st_queued_ops; - RRDSET *st_merged_ops; - - // per cgroup chart variables - char *filename_cpuset_cpus; - unsigned long long cpuset_cpus; - - char *filename_cpu_cfs_period; - unsigned long long cpu_cfs_period; - - char *filename_cpu_cfs_quota; - unsigned long long cpu_cfs_quota; - - const RRDSETVAR_ACQUIRED *chart_var_cpu_limit; - NETDATA_DOUBLE prev_cpu_usage; - - char *filename_memory_limit; - unsigned long long memory_limit; - const RRDSETVAR_ACQUIRED *chart_var_memory_limit; - - char *filename_memoryswap_limit; - unsigned long long memoryswap_limit; - const RRDSETVAR_ACQUIRED *chart_var_memoryswap_limit; - - struct cgroup *next; - struct cgroup *discovered_next; - -} *cgroup_root = NULL; - -uv_mutex_t cgroup_root_mutex; - -struct cgroup *discovered_cgroup_root = NULL; - -struct discovery_thread { - uv_thread_t thread; - uv_mutex_t mutex; - uv_cond_t cond_var; - int exited; -} discovery_thread; - -// --------------------------------------------------------------------------------------------- - -static inline int matches_enabled_cgroup_paths(char *id) { - return simple_pattern_matches(enabled_cgroup_paths, id); -} - -static inline int matches_enabled_cgroup_names(char *name) { - return simple_pattern_matches(enabled_cgroup_names, name); -} - -static inline int matches_enabled_cgroup_renames(char *id) { - return simple_pattern_matches(enabled_cgroup_renames, id); -} - -static inline int matches_systemd_services_cgroups(char *id) { - return simple_pattern_matches(systemd_services_cgroups, id); -} - -static inline int matches_search_cgroup_paths(const char *dir) { - return simple_pattern_matches(search_cgroup_paths, dir); -} - -static inline int matches_entrypoint_parent_process_comm(const char *comm) { - return simple_pattern_matches(entrypoint_parent_process_comm, comm); -} - -static inline int is_cgroup_systemd_service(struct cgroup *cg) { - return (cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE); -} - -// --------------------------------------------------------------------------------------------- -static int k8s_is_kubepod(struct cgroup *cg) { - return cg->container_orchestrator == CGROUPS_ORCHESTRATOR_K8S; -} - -static int k8s_is_container(const char *id) { - // examples: - // https://github.com/netdata/netdata/blob/0fc101679dcd12f1cb8acdd07bb4c85d8e553e53/collectors/cgroups.plugin/cgroup-name.sh#L121-L147 - const char *p = id; - const char *pp = NULL; - int i = 0; - size_t l = 3; // pod - while ((p = strstr(p, "pod"))) { - i++; - p += l; - pp = p; - } - return !(i < 2 || !pp || !(pp = strchr(pp, '/')) || !pp++ || !*pp); -} - -#define TASK_COMM_LEN 16 - -static int k8s_get_container_first_proc_comm(const char *id, char *comm) { - if (!k8s_is_container(id)) { - return 1; - } - - static procfile *ff = NULL; - - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/%s/cgroup.procs", cgroup_cpuacct_base, id); - - ff = procfile_reopen(ff, filename, NULL, CGROUP_PROCFILE_FLAG); - if (unlikely(!ff)) { - netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); - return 1; - } - - ff = procfile_readall(ff); - if (unlikely(!ff)) { - netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); - return 1; - } - - unsigned long lines = procfile_lines(ff); - if (likely(lines < 2)) { - return 1; - } - - char *pid = procfile_lineword(ff, 0, 0); - if (!pid || !*pid) { - return 1; - } - - snprintfz(filename, FILENAME_MAX, "%s/proc/%s/comm", netdata_configured_host_prefix, pid); - - ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT); - if (unlikely(!ff)) { - netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); - return 1; - } - - ff = procfile_readall(ff); - if (unlikely(!ff)) { - netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); - return 1; - } - - lines = procfile_lines(ff); - if (unlikely(lines != 2)) { - return 1; - } - - char *proc_comm = procfile_lineword(ff, 0, 0); - if (!proc_comm || !*proc_comm) { - return 1; - } - - strncpyz(comm, proc_comm, TASK_COMM_LEN); - return 0; -} - // --------------------------------------------------------------------------------------------- static unsigned long long calc_delta(unsigned long long curr, unsigned long long prev) { @@ -1023,16 +596,7 @@ static unsigned long long calc_percentage(unsigned long long value, unsigned lon if (total == 0) { return 0; } - return (NETDATA_DOUBLE)value / (NETDATA_DOUBLE)total * 100; -} - -static int calc_cgroup_depth(const char *id) { - int depth = 0; - const char *s; - for (s = id; *s; s++) { - depth += unlikely(*s == '/'); - } - return depth; + return (unsigned long long)((NETDATA_DOUBLE)value / (NETDATA_DOUBLE)total * 100); } // ---------------------------------------------------------------------------- @@ -1596,6 +1160,15 @@ memory_next: } } +static void cgroup_read_pids_current(struct pids *pids) { + pids->pids_current_updated = 0; + + if (unlikely(!pids->pids_current_filename)) + return; + + pids->pids_current_updated = !read_single_number_file(pids->pids_current_filename, &pids->pids_current); +} + static inline void read_cgroup(struct cgroup *cg) { netdata_log_debug(D_CGROUP, "reading metrics for cgroups '%s'", cg->id); if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { @@ -1610,6 +1183,7 @@ static inline void read_cgroup(struct cgroup *cg) { cgroup_read_blkio(&cg->throttle_io_serviced); cgroup_read_blkio(&cg->io_merged); cgroup_read_blkio(&cg->io_queued); + cgroup_read_pids_current(&cg->pids); } else { //TODO: io_service_bytes and io_serviced use same file merge into 1 function @@ -1622,6 +1196,7 @@ static inline void read_cgroup(struct cgroup *cg) { cgroup2_read_pressure(&cg->memory_pressure); cgroup2_read_pressure(&cg->irq_pressure); cgroup_read_memory(&cg->memory, 1); + cgroup_read_pids_current(&cg->pids); } } @@ -1636,1583 +1211,7 @@ static inline void read_all_discovered_cgroups(struct cgroup *root) { } } -// ---------------------------------------------------------------------------- -// cgroup network interfaces - -#define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048 -static inline void read_cgroup_network_interfaces(struct cgroup *cg) { - netdata_log_debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id); - - pid_t cgroup_pid; - char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; - - if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_cpuacct_base, cg->id); - } - else { - snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_unified_base, cg->id); - } - - netdata_log_debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id); - FILE *fp_child_input, *fp_child_output; - (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_network_interface_script, "--cgroup", cgroup_identifier); - if(!fp_child_output) { - collector_error("CGROUP: cannot popen(%s --cgroup \"%s\", \"r\").", cgroups_network_interface_script, cgroup_identifier); - return; - } - - char *s; - char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; - while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) { - trim(s); - - if(*s && *s != '\n') { - char *t = s; - while(*t && *t != ' ') t++; - if(*t == ' ') { - *t = '\0'; - t++; - } - - if(!*s) { - collector_error("CGROUP: empty host interface returned by script"); - continue; - } - - if(!*t) { - collector_error("CGROUP: empty guest interface returned by script"); - continue; - } - - struct cgroup_network_interface *i = callocz(1, sizeof(struct cgroup_network_interface)); - i->host_device = strdupz(s); - i->container_device = strdupz(t); - i->next = cg->interfaces; - cg->interfaces = i; - - collector_info("CGROUP: cgroup '%s' has network interface '%s' as '%s'", cg->id, i->host_device, i->container_device); - - // register a device rename to proc_net_dev.c - netdev_rename_device_add( - i->host_device, i->container_device, cg->chart_id, cg->chart_labels, k8s_is_kubepod(cg) ? "k8s." : ""); - } - } - - netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); - // netdata_log_debug(D_CGROUP, "closed cgroup_identifier for cgroup '%s'", cg->id); -} - -static inline void free_cgroup_network_interfaces(struct cgroup *cg) { - while(cg->interfaces) { - struct cgroup_network_interface *i = cg->interfaces; - cg->interfaces = i->next; - - // delete the registration of proc_net_dev rename - netdev_rename_device_del(i->host_device); - - freez((void *)i->host_device); - freez((void *)i->container_device); - freez((void *)i); - } -} - -// ---------------------------------------------------------------------------- -// add/remove/find cgroup objects - -#define CGROUP_CHARTID_LINE_MAX 1024 - -static inline char *cgroup_chart_id_strdupz(const char *s) { - if(!s || !*s) s = "/"; - - if(*s == '/' && s[1] != '\0') s++; - - char *r = strdupz(s); - netdata_fix_chart_id(r); - - return r; -} - -// TODO: move the code to cgroup_chart_id_strdupz() when the renaming script is fixed -static inline void substitute_dots_in_id(char *s) { - // dots are used to distinguish chart type and id in streaming, so we should replace them - for (char *d = s; *d; d++) { - if (*d == '.') - *d = '-'; - } -} - -// ---------------------------------------------------------------------------- -// parse k8s labels - -char *cgroup_parse_resolved_name_and_labels(RRDLABELS *labels, char *data) { - // the first word, up to the first space is the name - char *name = strsep_skip_consecutive_separators(&data, " "); - - // the rest are key=value pairs separated by comma - while(data) { - char *pair = strsep_skip_consecutive_separators(&data, ","); - rrdlabels_add_pair(labels, pair, RRDLABEL_SRC_AUTO | RRDLABEL_SRC_K8S); - } - - return name; -} - -// ---------------------------------------------------------------------------- - -static inline void free_pressure(struct pressure *res) { - if (res->some.share_time.st) rrdset_is_obsolete(res->some.share_time.st); - if (res->some.total_time.st) rrdset_is_obsolete(res->some.total_time.st); - if (res->full.share_time.st) rrdset_is_obsolete(res->full.share_time.st); - if (res->full.total_time.st) rrdset_is_obsolete(res->full.total_time.st); - freez(res->filename); -} - -static inline void cgroup_free(struct cgroup *cg) { - netdata_log_debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available"); - - if(cg->st_cpu) rrdset_is_obsolete(cg->st_cpu); - if(cg->st_cpu_limit) rrdset_is_obsolete(cg->st_cpu_limit); - if(cg->st_cpu_per_core) rrdset_is_obsolete(cg->st_cpu_per_core); - if(cg->st_cpu_nr_throttled) rrdset_is_obsolete(cg->st_cpu_nr_throttled); - if(cg->st_cpu_throttled_time) rrdset_is_obsolete(cg->st_cpu_throttled_time); - if(cg->st_cpu_shares) rrdset_is_obsolete(cg->st_cpu_shares); - if(cg->st_mem) rrdset_is_obsolete(cg->st_mem); - if(cg->st_writeback) rrdset_is_obsolete(cg->st_writeback); - if(cg->st_mem_activity) rrdset_is_obsolete(cg->st_mem_activity); - if(cg->st_pgfaults) rrdset_is_obsolete(cg->st_pgfaults); - if(cg->st_mem_usage) rrdset_is_obsolete(cg->st_mem_usage); - if(cg->st_mem_usage_limit) rrdset_is_obsolete(cg->st_mem_usage_limit); - if(cg->st_mem_utilization) rrdset_is_obsolete(cg->st_mem_utilization); - if(cg->st_mem_failcnt) rrdset_is_obsolete(cg->st_mem_failcnt); - if(cg->st_io) rrdset_is_obsolete(cg->st_io); - if(cg->st_serviced_ops) rrdset_is_obsolete(cg->st_serviced_ops); - if(cg->st_throttle_io) rrdset_is_obsolete(cg->st_throttle_io); - if(cg->st_throttle_serviced_ops) rrdset_is_obsolete(cg->st_throttle_serviced_ops); - if(cg->st_queued_ops) rrdset_is_obsolete(cg->st_queued_ops); - if(cg->st_merged_ops) rrdset_is_obsolete(cg->st_merged_ops); - - freez(cg->filename_cpuset_cpus); - freez(cg->filename_cpu_cfs_period); - freez(cg->filename_cpu_cfs_quota); - freez(cg->filename_memory_limit); - freez(cg->filename_memoryswap_limit); - - free_cgroup_network_interfaces(cg); - - freez(cg->cpuacct_usage.cpu_percpu); - - freez(cg->cpuacct_stat.filename); - freez(cg->cpuacct_usage.filename); - freez(cg->cpuacct_cpu_throttling.filename); - freez(cg->cpuacct_cpu_shares.filename); - - arl_free(cg->memory.arl_base); - freez(cg->memory.filename_detailed); - freez(cg->memory.filename_failcnt); - freez(cg->memory.filename_usage_in_bytes); - freez(cg->memory.filename_msw_usage_in_bytes); - - freez(cg->io_service_bytes.filename); - freez(cg->io_serviced.filename); - - freez(cg->throttle_io_service_bytes.filename); - freez(cg->throttle_io_serviced.filename); - - freez(cg->io_merged.filename); - freez(cg->io_queued.filename); - - free_pressure(&cg->cpu_pressure); - free_pressure(&cg->io_pressure); - free_pressure(&cg->memory_pressure); - free_pressure(&cg->irq_pressure); - - freez(cg->id); - freez(cg->intermediate_id); - freez(cg->chart_id); - freez(cg->name); - - rrdlabels_destroy(cg->chart_labels); - - freez(cg); - - cgroup_root_count--; -} - -// ---------------------------------------------------------------------------- - -static inline void discovery_rename_cgroup(struct cgroup *cg) { - if (!cg->pending_renames) { - return; - } - cg->pending_renames--; - - netdata_log_debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id); - netdata_log_debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id); - pid_t cgroup_pid; - - FILE *fp_child_input, *fp_child_output; - (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_rename_script, cg->id, cg->intermediate_id); - if (!fp_child_output) { - collector_error("CGROUP: cannot popen(%s \"%s\", \"r\").", cgroups_rename_script, cg->intermediate_id); - cg->pending_renames = 0; - cg->processed = 1; - return; - } - - char buffer[CGROUP_CHARTID_LINE_MAX + 1]; - char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp_child_output); - int exit_code = netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); - - switch (exit_code) { - case 0: - cg->pending_renames = 0; - break; - - case 3: - cg->pending_renames = 0; - cg->processed = 1; - break; - } - - if (cg->pending_renames || cg->processed) - return; - if (!new_name || !*new_name || *new_name == '\n') - return; - if (!(new_name = trim(new_name))) - return; - - char *name = new_name; - - if (!cg->chart_labels) - cg->chart_labels = rrdlabels_create(); - // read the new labels and remove the obsolete ones - rrdlabels_unmark_all(cg->chart_labels); - name = cgroup_parse_resolved_name_and_labels(cg->chart_labels, new_name); - rrdlabels_remove_all_unmarked(cg->chart_labels); - - freez(cg->name); - cg->name = strdupz(name); - - freez(cg->chart_id); - cg->chart_id = cgroup_chart_id_strdupz(name); - - substitute_dots_in_id(cg->chart_id); - cg->hash_chart_id = simple_hash(cg->chart_id); -} - -static void is_cgroup_procs_exist(netdata_ebpf_cgroup_shm_body_t *out, char *id) { - struct stat buf; - - snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_cpuset_base, id); - if (likely(stat(out->path, &buf) == 0)) { - return; - } - - snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_blkio_base, id); - if (likely(stat(out->path, &buf) == 0)) { - return; - } - - snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_memory_base, id); - if (likely(stat(out->path, &buf) == 0)) { - return; - } - - snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_devices_base, id); - if (likely(stat(out->path, &buf) == 0)) { - return; - } - - out->path[0] = '\0'; - out->enabled = 0; -} - -static inline void convert_cgroup_to_systemd_service(struct cgroup *cg) { - char buffer[CGROUP_CHARTID_LINE_MAX + 1]; - cg->options |= CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE; - strncpyz(buffer, cg->id, CGROUP_CHARTID_LINE_MAX); - char *s = buffer; - - // skip to the last slash - size_t len = strlen(s); - while (len--) { - if (unlikely(s[len] == '/')) { - break; - } - } - if (len) { - s = &s[len + 1]; - } - - // remove extension - len = strlen(s); - while (len--) { - if (unlikely(s[len] == '.')) { - break; - } - } - if (len) { - s[len] = '\0'; - } - - freez(cg->name); - cg->name = strdupz(s); - - freez(cg->chart_id); - cg->chart_id = cgroup_chart_id_strdupz(s); - substitute_dots_in_id(cg->chart_id); - cg->hash_chart_id = simple_hash(cg->chart_id); -} - -static inline struct cgroup *discovery_cgroup_add(const char *id) { - netdata_log_debug(D_CGROUP, "adding to list, cgroup with id '%s'", id); - - struct cgroup *cg = callocz(1, sizeof(struct cgroup)); - - cg->id = strdupz(id); - cg->hash = simple_hash(cg->id); - - cg->name = strdupz(id); - - cg->intermediate_id = cgroup_chart_id_strdupz(id); - - cg->chart_id = cgroup_chart_id_strdupz(id); - substitute_dots_in_id(cg->chart_id); - cg->hash_chart_id = simple_hash(cg->chart_id); - - if (cgroup_use_unified_cgroups) { - cg->options |= CGROUP_OPTIONS_IS_UNIFIED; - } - - if (!discovered_cgroup_root) - discovered_cgroup_root = cg; - else { - struct cgroup *t; - for (t = discovered_cgroup_root; t->discovered_next; t = t->discovered_next) { - } - t->discovered_next = cg; - } - - return cg; -} - -static inline struct cgroup *discovery_cgroup_find(const char *id) { - netdata_log_debug(D_CGROUP, "searching for cgroup '%s'", id); - - uint32_t hash = simple_hash(id); - - struct cgroup *cg; - for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { - if(hash == cg->hash && strcmp(id, cg->id) == 0) - break; - } - - netdata_log_debug(D_CGROUP, "cgroup '%s' %s in memory", id, (cg)?"found":"not found"); - return cg; -} - -static inline void discovery_find_cgroup_in_dir_callback(const char *dir) { - if (!dir || !*dir) { - dir = "/"; - } - netdata_log_debug(D_CGROUP, "examining cgroup dir '%s'", dir); - - struct cgroup *cg = discovery_cgroup_find(dir); - if (cg) { - cg->available = 1; - return; - } - - if (cgroup_root_count >= cgroup_root_max) { - collector_info("CGROUP: maximum number of cgroups reached (%d). Not adding cgroup '%s'", cgroup_root_count, dir); - return; - } - - if (cgroup_max_depth > 0) { - int depth = calc_cgroup_depth(dir); - if (depth > cgroup_max_depth) { - collector_info("CGROUP: '%s' is too deep (%d, while max is %d)", dir, depth, cgroup_max_depth); - return; - } - } - - cg = discovery_cgroup_add(dir); - cg->available = 1; - cg->first_time_seen = 1; - cgroup_root_count++; -} - -static inline int discovery_find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) { - if(!this) this = base; - netdata_log_debug(D_CGROUP, "searching for directories in '%s' (base '%s')", this?this:"", base); - - size_t dirlen = strlen(this), baselen = strlen(base); - - int ret = -1; - int enabled = -1; - - const char *relative_path = &this[baselen]; - if(!*relative_path) relative_path = "/"; - - DIR *dir = opendir(this); - if(!dir) { - collector_error("CGROUP: cannot read directory '%s'", base); - return ret; - } - ret = 1; - - callback(relative_path); - - struct dirent *de = NULL; - while((de = readdir(dir))) { - if(de->d_type == DT_DIR - && ( - (de->d_name[0] == '.' && de->d_name[1] == '\0') - || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') - )) - continue; - - if(de->d_type == DT_DIR) { - if(enabled == -1) { - const char *r = relative_path; - if(*r == '\0') r = "/"; - - // do not decent in directories we are not interested - enabled = matches_search_cgroup_paths(r); - } - - if(enabled) { - char *s = mallocz(dirlen + strlen(de->d_name) + 2); - strcpy(s, this); - strcat(s, "/"); - strcat(s, de->d_name); - int ret2 = discovery_find_dir_in_subdirs(base, s, callback); - if(ret2 > 0) ret += ret2; - freez(s); - } - } - } - - closedir(dir); - return ret; -} - -static inline void discovery_mark_all_cgroups_as_unavailable() { - netdata_log_debug(D_CGROUP, "marking all cgroups as not available"); - struct cgroup *cg; - for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { - cg->available = 0; - } -} - -static inline void discovery_update_filenames() { - struct cgroup *cg; - struct stat buf; - for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { - if(unlikely(!cg->available || !cg->enabled || cg->pending_renames)) - continue; - - netdata_log_debug(D_CGROUP, "checking paths for cgroup '%s'", cg->id); - - // check for newly added cgroups - // and update the filenames they read - char filename[FILENAME_MAX + 1]; - if(!cgroup_use_unified_cgroups) { - if(unlikely(cgroup_enable_cpuacct_stat && !cg->cpuacct_stat.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.stat", cgroup_cpuacct_base, cg->id); - if(likely(stat(filename, &buf) != -1)) { - cg->cpuacct_stat.filename = strdupz(filename); - cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat; - snprintfz(filename, FILENAME_MAX, "%s%s/cpuset.cpus", cgroup_cpuset_base, cg->id); - cg->filename_cpuset_cpus = strdupz(filename); - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_period_us", cgroup_cpuacct_base, cg->id); - cg->filename_cpu_cfs_period = strdupz(filename); - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_quota_us", cgroup_cpuacct_base, cg->id); - cg->filename_cpu_cfs_quota = strdupz(filename); - netdata_log_debug(D_CGROUP, "cpuacct.stat filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_stat.filename); - } - else - netdata_log_debug(D_CGROUP, "cpuacct.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - - if(unlikely(cgroup_enable_cpuacct_usage && !cg->cpuacct_usage.filename && !is_cgroup_systemd_service(cg))) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.usage_percpu", cgroup_cpuacct_base, cg->id); - if(likely(stat(filename, &buf) != -1)) { - cg->cpuacct_usage.filename = strdupz(filename); - cg->cpuacct_usage.enabled = cgroup_enable_cpuacct_usage; - netdata_log_debug(D_CGROUP, "cpuacct.usage_percpu filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_usage.filename); - } - else - netdata_log_debug(D_CGROUP, "cpuacct.usage_percpu file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - if(unlikely(cgroup_enable_cpuacct_cpu_throttling && !cg->cpuacct_cpu_throttling.filename && !is_cgroup_systemd_service(cg))) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_cpuacct_base, cg->id); - if(likely(stat(filename, &buf) != -1)) { - cg->cpuacct_cpu_throttling.filename = strdupz(filename); - cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling; - netdata_log_debug(D_CGROUP, "cpu.stat filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_throttling.filename); - } - else - netdata_log_debug(D_CGROUP, "cpu.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - if (unlikely( - cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename && - !is_cgroup_systemd_service(cg))) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.shares", cgroup_cpuacct_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->cpuacct_cpu_shares.filename = strdupz(filename); - cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; - netdata_log_debug( - D_CGROUP, "cpu.shares filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_shares.filename); - } else - netdata_log_debug(D_CGROUP, "cpu.shares file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - - if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_memory_base, cg->id); - if(likely(stat(filename, &buf) != -1)) { - cg->memory.filename_detailed = strdupz(filename); - cg->memory.enabled_detailed = (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_AUTO; - netdata_log_debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_detailed); - } - else - netdata_log_debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - - if(unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.usage_in_bytes", cgroup_memory_base, cg->id); - if(likely(stat(filename, &buf) != -1)) { - cg->memory.filename_usage_in_bytes = strdupz(filename); - cg->memory.enabled_usage_in_bytes = cgroup_enable_memory; - netdata_log_debug(D_CGROUP, "memory.usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes); - snprintfz(filename, FILENAME_MAX, "%s%s/memory.limit_in_bytes", cgroup_memory_base, cg->id); - cg->filename_memory_limit = strdupz(filename); - } - else - netdata_log_debug(D_CGROUP, "memory.usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - - if(unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.usage_in_bytes", cgroup_memory_base, cg->id); - if(likely(stat(filename, &buf) != -1)) { - cg->memory.filename_msw_usage_in_bytes = strdupz(filename); - cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap; - snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.limit_in_bytes", cgroup_memory_base, cg->id); - cg->filename_memoryswap_limit = strdupz(filename); - netdata_log_debug(D_CGROUP, "memory.msw_usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes); - } - else - netdata_log_debug(D_CGROUP, "memory.msw_usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - - if(unlikely(cgroup_enable_memory_failcnt && !cg->memory.filename_failcnt)) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.failcnt", cgroup_memory_base, cg->id); - if(likely(stat(filename, &buf) != -1)) { - cg->memory.filename_failcnt = strdupz(filename); - cg->memory.enabled_failcnt = cgroup_enable_memory_failcnt; - netdata_log_debug(D_CGROUP, "memory.failcnt filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_failcnt); - } - else - netdata_log_debug(D_CGROUP, "memory.failcnt file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - - if(unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes_recursive", cgroup_blkio_base, cg->id); - if (unlikely(stat(filename, &buf) != -1)) { - cg->io_service_bytes.filename = strdupz(filename); - cg->io_service_bytes.enabled = cgroup_enable_blkio_io; - netdata_log_debug(D_CGROUP, "blkio.io_service_bytes_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); - } else { - netdata_log_debug(D_CGROUP, "blkio.io_service_bytes_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes", cgroup_blkio_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->io_service_bytes.filename = strdupz(filename); - cg->io_service_bytes.enabled = cgroup_enable_blkio_io; - netdata_log_debug(D_CGROUP, "blkio.io_service_bytes filename for cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); - } else { - netdata_log_debug(D_CGROUP, "blkio.io_service_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - } - } - - if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced_recursive", cgroup_blkio_base, cg->id); - if (unlikely(stat(filename, &buf) != -1)) { - cg->io_serviced.filename = strdupz(filename); - cg->io_serviced.enabled = cgroup_enable_blkio_ops; - netdata_log_debug(D_CGROUP, "blkio.io_serviced_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_serviced.filename); - } else { - netdata_log_debug(D_CGROUP, "blkio.io_serviced_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced", cgroup_blkio_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->io_serviced.filename = strdupz(filename); - cg->io_serviced.enabled = cgroup_enable_blkio_ops; - netdata_log_debug(D_CGROUP, "blkio.io_serviced filename for cgroup '%s': '%s'", cg->id, cg->io_serviced.filename); - } else { - netdata_log_debug(D_CGROUP, "blkio.io_serviced file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - } - } - - if (unlikely(cgroup_enable_blkio_throttle_io && !cg->throttle_io_service_bytes.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes_recursive", cgroup_blkio_base, cg->id); - if (unlikely(stat(filename, &buf) != -1)) { - cg->throttle_io_service_bytes.filename = strdupz(filename); - cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io; - netdata_log_debug(D_CGROUP,"blkio.throttle.io_service_bytes_recursive filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_service_bytes.filename); - } else { - netdata_log_debug(D_CGROUP, "blkio.throttle.io_service_bytes_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); - snprintfz( - filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes", cgroup_blkio_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->throttle_io_service_bytes.filename = strdupz(filename); - cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io; - netdata_log_debug(D_CGROUP, "blkio.throttle.io_service_bytes filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_service_bytes.filename); - } else { - netdata_log_debug(D_CGROUP, "blkio.throttle.io_service_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - } - } - - if (unlikely(cgroup_enable_blkio_throttle_ops && !cg->throttle_io_serviced.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced_recursive", cgroup_blkio_base, cg->id); - if (unlikely(stat(filename, &buf) != -1)) { - cg->throttle_io_serviced.filename = strdupz(filename); - cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops; - netdata_log_debug(D_CGROUP, "blkio.throttle.io_serviced_recursive filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_serviced.filename); - } else { - netdata_log_debug(D_CGROUP, "blkio.throttle.io_serviced_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced", cgroup_blkio_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->throttle_io_serviced.filename = strdupz(filename); - cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops; - netdata_log_debug(D_CGROUP, "blkio.throttle.io_serviced filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_serviced.filename); - } else { - netdata_log_debug(D_CGROUP, "blkio.throttle.io_serviced file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - } - } - - if (unlikely(cgroup_enable_blkio_merged_ops && !cg->io_merged.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged_recursive", cgroup_blkio_base, cg->id); - if (unlikely(stat(filename, &buf) != -1)) { - cg->io_merged.filename = strdupz(filename); - cg->io_merged.enabled = cgroup_enable_blkio_merged_ops; - netdata_log_debug(D_CGROUP, "blkio.io_merged_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_merged.filename); - } else { - netdata_log_debug(D_CGROUP, "blkio.io_merged_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged", cgroup_blkio_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->io_merged.filename = strdupz(filename); - cg->io_merged.enabled = cgroup_enable_blkio_merged_ops; - netdata_log_debug(D_CGROUP, "blkio.io_merged filename for cgroup '%s': '%s'", cg->id, cg->io_merged.filename); - } else { - netdata_log_debug(D_CGROUP, "blkio.io_merged file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - } - } - - if (unlikely(cgroup_enable_blkio_queued_ops && !cg->io_queued.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued_recursive", cgroup_blkio_base, cg->id); - if (unlikely(stat(filename, &buf) != -1)) { - cg->io_queued.filename = strdupz(filename); - cg->io_queued.enabled = cgroup_enable_blkio_queued_ops; - netdata_log_debug(D_CGROUP, "blkio.io_queued_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_queued.filename); - } else { - netdata_log_debug(D_CGROUP, "blkio.io_queued_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued", cgroup_blkio_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->io_queued.filename = strdupz(filename); - cg->io_queued.enabled = cgroup_enable_blkio_queued_ops; - netdata_log_debug(D_CGROUP, "blkio.io_queued filename for cgroup '%s': '%s'", cg->id, cg->io_queued.filename); - } else { - netdata_log_debug(D_CGROUP, "blkio.io_queued file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - } - } - } - else if(likely(cgroup_unified_exist)) { - if(unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id); - if(likely(stat(filename, &buf) != -1)) { - cg->io_service_bytes.filename = strdupz(filename); - cg->io_service_bytes.enabled = cgroup_enable_blkio_io; - netdata_log_debug(D_CGROUP, "io.stat filename for unified cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); - } else - netdata_log_debug(D_CGROUP, "io.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); - } - if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->io_serviced.filename = strdupz(filename); - cg->io_serviced.enabled = cgroup_enable_blkio_ops; - netdata_log_debug(D_CGROUP, "io.stat filename for unified cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); - } else - netdata_log_debug(D_CGROUP, "io.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); - } - if (unlikely( - (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_cpu_throttling) && - !cg->cpuacct_stat.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_unified_base, cg->id); - if(likely(stat(filename, &buf) != -1)) { - cg->cpuacct_stat.filename = strdupz(filename); - cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat; - cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling; - cg->filename_cpuset_cpus = NULL; - cg->filename_cpu_cfs_period = NULL; - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.max", cgroup_unified_base, cg->id); - cg->filename_cpu_cfs_quota = strdupz(filename); - netdata_log_debug(D_CGROUP, "cpu.stat filename for unified cgroup '%s': '%s'", cg->id, cg->cpuacct_stat.filename); - } - else - netdata_log_debug(D_CGROUP, "cpu.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); - } - if (unlikely(cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.weight", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->cpuacct_cpu_shares.filename = strdupz(filename); - cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; - netdata_log_debug(D_CGROUP, "cpu.weight filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_shares.filename); - } else - netdata_log_debug(D_CGROUP, "cpu.weight file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - - if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_unified_base, cg->id); - if(likely(stat(filename, &buf) != -1)) { - cg->memory.filename_detailed = strdupz(filename); - cg->memory.enabled_detailed = (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_AUTO; - netdata_log_debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_detailed); - } - else - netdata_log_debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - - if(unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.current", cgroup_unified_base, cg->id); - if(likely(stat(filename, &buf) != -1)) { - cg->memory.filename_usage_in_bytes = strdupz(filename); - cg->memory.enabled_usage_in_bytes = cgroup_enable_memory; - netdata_log_debug(D_CGROUP, "memory.current filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes); - snprintfz(filename, FILENAME_MAX, "%s%s/memory.max", cgroup_unified_base, cg->id); - cg->filename_memory_limit = strdupz(filename); - } - else - netdata_log_debug(D_CGROUP, "memory.current file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - - if(unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.current", cgroup_unified_base, cg->id); - if(likely(stat(filename, &buf) != -1)) { - cg->memory.filename_msw_usage_in_bytes = strdupz(filename); - cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap; - snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.max", cgroup_unified_base, cg->id); - cg->filename_memoryswap_limit = strdupz(filename); - netdata_log_debug(D_CGROUP, "memory.swap.current filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes); - } - else - netdata_log_debug(D_CGROUP, "memory.swap file for cgroup '%s': '%s' does not exist.", cg->id, filename); - } - - if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.pressure", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->cpu_pressure.filename = strdupz(filename); - cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu; - cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO; - netdata_log_debug(D_CGROUP, "cpu.pressure filename for cgroup '%s': '%s'", cg->id, cg->cpu_pressure.filename); - } else { - netdata_log_debug(D_CGROUP, "cpu.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); - } - } - - if (unlikely((cgroup_enable_pressure_io_some || cgroup_enable_pressure_io_full) && !cg->io_pressure.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/io.pressure", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->io_pressure.filename = strdupz(filename); - cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some; - cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full; - netdata_log_debug(D_CGROUP, "io.pressure filename for cgroup '%s': '%s'", cg->id, cg->io_pressure.filename); - } else { - netdata_log_debug(D_CGROUP, "io.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); - } - } - - if (unlikely((cgroup_enable_pressure_memory_some || cgroup_enable_pressure_memory_full) && !cg->memory_pressure.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.pressure", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->memory_pressure.filename = strdupz(filename); - cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some; - cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full; - netdata_log_debug(D_CGROUP, "memory.pressure filename for cgroup '%s': '%s'", cg->id, cg->memory_pressure.filename); - } else { - netdata_log_debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); - } - } - - if (unlikely((cgroup_enable_pressure_irq_some || cgroup_enable_pressure_irq_full) && !cg->irq_pressure.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/irq.pressure", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->irq_pressure.filename = strdupz(filename); - cg->irq_pressure.some.enabled = cgroup_enable_pressure_irq_some; - cg->irq_pressure.full.enabled = cgroup_enable_pressure_irq_full; - netdata_log_debug(D_CGROUP, "irq.pressure filename for cgroup '%s': '%s'", cg->id, cg->irq_pressure.filename); - } else { - netdata_log_debug(D_CGROUP, "irq.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); - } - } - } - } -} - -static inline void discovery_cleanup_all_cgroups() { - struct cgroup *cg = discovered_cgroup_root, *last = NULL; - - for(; cg ;) { - if(!cg->available) { - // enable the first duplicate cgroup - { - struct cgroup *t; - for (t = discovered_cgroup_root; t; t = t->discovered_next) { - if (t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && - (is_cgroup_systemd_service(t) == is_cgroup_systemd_service(cg)) && - t->hash_chart_id == cg->hash_chart_id && !strcmp(t->chart_id, cg->chart_id)) { - netdata_log_debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id); - t->enabled = 1; - t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE; - break; - } - } - } - - if(!last) - discovered_cgroup_root = cg->discovered_next; - else - last->discovered_next = cg->discovered_next; - - cgroup_free(cg); - - if(!last) - cg = discovered_cgroup_root; - else - cg = last->discovered_next; - } - else { - last = cg; - cg = cg->discovered_next; - } - } -} - -static inline void discovery_copy_discovered_cgroups_to_reader() { - netdata_log_debug(D_CGROUP, "copy discovered cgroups to the main group list"); - - struct cgroup *cg; - - for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { - cg->next = cg->discovered_next; - } - - cgroup_root = discovered_cgroup_root; -} - -static inline void discovery_share_cgroups_with_ebpf() { - struct cgroup *cg; - int count; - struct stat buf; - - if (shm_mutex_cgroup_ebpf == SEM_FAILED) { - return; - } - sem_wait(shm_mutex_cgroup_ebpf); - - for (cg = cgroup_root, count = 0; cg; cg = cg->next, count++) { - netdata_ebpf_cgroup_shm_body_t *ptr = &shm_cgroup_ebpf.body[count]; - char *prefix = (is_cgroup_systemd_service(cg)) ? services_chart_id_prefix : cgroup_chart_id_prefix; - snprintfz(ptr->name, CGROUP_EBPF_NAME_SHARED_LENGTH - 1, "%s%s", prefix, cg->chart_id); - ptr->hash = simple_hash(ptr->name); - ptr->options = cg->options; - ptr->enabled = cg->enabled; - if (cgroup_use_unified_cgroups) { - snprintfz(ptr->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_unified_base, cg->id); - if (likely(stat(ptr->path, &buf) == -1)) { - ptr->path[0] = '\0'; - ptr->enabled = 0; - } - } else { - is_cgroup_procs_exist(ptr, cg->id); - } - - netdata_log_debug(D_CGROUP, "cgroup shared: NAME=%s, ENABLED=%d", ptr->name, ptr->enabled); - } - - shm_cgroup_ebpf.header->cgroup_root_count = count; - sem_post(shm_mutex_cgroup_ebpf); -} - -static inline void discovery_find_all_cgroups_v1() { - if (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) { - if (discovery_find_dir_in_subdirs(cgroup_cpuacct_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { - cgroup_enable_cpuacct_stat = cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO; - collector_error("CGROUP: disabled cpu statistics."); - } - } - - if (cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io || - cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) { - if (discovery_find_dir_in_subdirs(cgroup_blkio_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { - cgroup_enable_blkio_io = cgroup_enable_blkio_ops = cgroup_enable_blkio_throttle_io = - cgroup_enable_blkio_throttle_ops = cgroup_enable_blkio_merged_ops = cgroup_enable_blkio_queued_ops = - CONFIG_BOOLEAN_NO; - collector_error("CGROUP: disabled blkio statistics."); - } - } - - if (cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) { - if (discovery_find_dir_in_subdirs(cgroup_memory_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { - cgroup_enable_memory = cgroup_enable_detailed_memory = cgroup_enable_swap = cgroup_enable_memory_failcnt = - CONFIG_BOOLEAN_NO; - collector_error("CGROUP: disabled memory statistics."); - } - } - - if (cgroup_search_in_devices) { - if (discovery_find_dir_in_subdirs(cgroup_devices_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { - cgroup_search_in_devices = 0; - collector_error("CGROUP: disabled devices statistics."); - } - } -} - -static inline void discovery_find_all_cgroups_v2() { - if (discovery_find_dir_in_subdirs(cgroup_unified_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { - cgroup_unified_exist = CONFIG_BOOLEAN_NO; - collector_error("CGROUP: disabled unified cgroups statistics."); - } -} - -static int is_digits_only(const char *s) { - do { - if (!isdigit(*s++)) { - return 0; - } - } while (*s); - - return 1; -} - -static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) { - if (!cg->first_time_seen) { - return; - } - cg->first_time_seen = 0; - - char comm[TASK_COMM_LEN + 1]; - - if (cg->container_orchestrator == CGROUPS_ORCHESTRATOR_UNSET) { - if (strstr(cg->id, "kubepods")) { - cg->container_orchestrator = CGROUPS_ORCHESTRATOR_K8S; - } else { - cg->container_orchestrator = CGROUPS_ORCHESTRATOR_UNKNOWN; - } - } - - if (is_inside_k8s && !k8s_get_container_first_proc_comm(cg->id, comm)) { - // container initialization may take some time when CPU % is high - // seen on GKE: comm is '6' before 'runc:[2:INIT]' (dunno if it could be another number) - if (is_digits_only(comm) || matches_entrypoint_parent_process_comm(comm)) { - cg->first_time_seen = 1; - return; - } - if (!strcmp(comm, "pause")) { - // a container that holds the network namespace for the pod - // we don't need to collect its metrics - cg->processed = 1; - return; - } - } - - if (cgroup_enable_systemd_services && matches_systemd_services_cgroups(cg->id)) { - netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'cgroups to match as systemd services'", cg->id, cg->chart_id); - convert_cgroup_to_systemd_service(cg); - return; - } - - if (matches_enabled_cgroup_renames(cg->id)) { - netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'run script to rename cgroups matching', will try to rename it", cg->id, cg->chart_id); - if (is_inside_k8s && k8s_is_container(cg->id)) { - // it may take up to a minute for the K8s API to return data for the container - // tested on AWS K8s cluster with 100% CPU utilization - cg->pending_renames = 9; // 1.5 minute - } else { - cg->pending_renames = 2; - } - } -} - -static int discovery_is_cgroup_duplicate(struct cgroup *cg) { - // https://github.com/netdata/netdata/issues/797#issuecomment-241248884 - struct cgroup *c; - for (c = discovered_cgroup_root; c; c = c->discovered_next) { - if (c != cg && c->enabled && (is_cgroup_systemd_service(c) == is_cgroup_systemd_service(cg)) && - c->hash_chart_id == cg->hash_chart_id && !strcmp(c->chart_id, cg->chart_id)) { - collector_error( - "CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", - cg->chart_id, - c->id, - cg->id); - return 1; - } - } - return 0; -} - -static inline void discovery_process_cgroup(struct cgroup *cg) { - if (!cg) { - netdata_log_debug(D_CGROUP, "discovery_process_cgroup() received NULL"); - return; - } - if (!cg->available || cg->processed) { - return; - } - - if (cg->first_time_seen) { - worker_is_busy(WORKER_DISCOVERY_PROCESS_FIRST_TIME); - discovery_process_first_time_seen_cgroup(cg); - if (unlikely(cg->first_time_seen || cg->processed)) { - return; - } - } - - if (cg->pending_renames) { - worker_is_busy(WORKER_DISCOVERY_PROCESS_RENAME); - discovery_rename_cgroup(cg); - if (unlikely(cg->pending_renames || cg->processed)) { - return; - } - } - - cg->processed = 1; - - if ((strlen(cg->chart_id) + strlen(cgroup_chart_id_prefix)) >= RRD_ID_LENGTH_MAX) { - collector_info("cgroup '%s' (chart id '%s') disabled because chart_id exceeds the limit (RRD_ID_LENGTH_MAX)", cg->id, cg->chart_id); - return; - } - - if (is_cgroup_systemd_service(cg)) { - if (discovery_is_cgroup_duplicate(cg)) { - cg->enabled = 0; - cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; - return; - } - if (!cg->chart_labels) - cg->chart_labels = rrdlabels_create(); - rrdlabels_add(cg->chart_labels, "service_name", cg->name, RRDLABEL_SRC_AUTO); - cg->enabled = 1; - return; - } - - if (!(cg->enabled = matches_enabled_cgroup_names(cg->name))) { - netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups names matching'", cg->id, cg->name); - return; - } - - if (!(cg->enabled = matches_enabled_cgroup_paths(cg->id))) { - netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups matching'", cg->id, cg->name); - return; - } - - if (discovery_is_cgroup_duplicate(cg)) { - cg->enabled = 0; - cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; - return; - } - - if (!cg->chart_labels) - cg->chart_labels = rrdlabels_create(); - - if (!k8s_is_kubepod(cg)) { - rrdlabels_add(cg->chart_labels, "cgroup_name", cg->name, RRDLABEL_SRC_AUTO); - if (!rrdlabels_exist(cg->chart_labels, "image")) - rrdlabels_add(cg->chart_labels, "image", "", RRDLABEL_SRC_AUTO); - } - - worker_is_busy(WORKER_DISCOVERY_PROCESS_NETWORK); - read_cgroup_network_interfaces(cg); -} - -static inline void discovery_find_all_cgroups() { - netdata_log_debug(D_CGROUP, "searching for cgroups"); - - worker_is_busy(WORKER_DISCOVERY_INIT); - discovery_mark_all_cgroups_as_unavailable(); - - worker_is_busy(WORKER_DISCOVERY_FIND); - if (!cgroup_use_unified_cgroups) { - discovery_find_all_cgroups_v1(); - } else { - discovery_find_all_cgroups_v2(); - } - - struct cgroup *cg; - for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { - worker_is_busy(WORKER_DISCOVERY_PROCESS); - discovery_process_cgroup(cg); - } - - worker_is_busy(WORKER_DISCOVERY_UPDATE); - discovery_update_filenames(); - - worker_is_busy(WORKER_DISCOVERY_LOCK); - uv_mutex_lock(&cgroup_root_mutex); - - worker_is_busy(WORKER_DISCOVERY_CLEANUP); - discovery_cleanup_all_cgroups(); - - worker_is_busy(WORKER_DISCOVERY_COPY); - discovery_copy_discovered_cgroups_to_reader(); - - uv_mutex_unlock(&cgroup_root_mutex); - - worker_is_busy(WORKER_DISCOVERY_SHARE); - discovery_share_cgroups_with_ebpf(); - - netdata_log_debug(D_CGROUP, "done searching for cgroups"); -} - -static inline char *cgroup_chart_type(char *buffer, struct cgroup *cg) { - if(buffer[0]) return buffer; - - if (cg->chart_id[0] == '\0' || (cg->chart_id[0] == '/' && cg->chart_id[1] == '\0')) - strncpy(buffer, "cgroup_root", RRD_ID_LENGTH_MAX); - else if (is_cgroup_systemd_service(cg)) - snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s%s", services_chart_id_prefix, cg->chart_id); - else - snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s%s", cgroup_chart_id_prefix, cg->chart_id); - - return buffer; -} - -void cgroup_discovery_worker(void *ptr) -{ - UNUSED(ptr); - - worker_register("CGROUPSDISC"); - worker_register_job_name(WORKER_DISCOVERY_INIT, "init"); - worker_register_job_name(WORKER_DISCOVERY_FIND, "find"); - worker_register_job_name(WORKER_DISCOVERY_PROCESS, "process"); - worker_register_job_name(WORKER_DISCOVERY_PROCESS_RENAME, "rename"); - worker_register_job_name(WORKER_DISCOVERY_PROCESS_NETWORK, "network"); - worker_register_job_name(WORKER_DISCOVERY_PROCESS_FIRST_TIME, "new"); - worker_register_job_name(WORKER_DISCOVERY_UPDATE, "update"); - worker_register_job_name(WORKER_DISCOVERY_CLEANUP, "cleanup"); - worker_register_job_name(WORKER_DISCOVERY_COPY, "copy"); - worker_register_job_name(WORKER_DISCOVERY_SHARE, "share"); - worker_register_job_name(WORKER_DISCOVERY_LOCK, "lock"); - - entrypoint_parent_process_comm = simple_pattern_create( - " runc:[* " // http://terenceli.github.io/%E6%8A%80%E6%9C%AF/2021/12/28/runc-internals-3) - " exe ", // https://github.com/falcosecurity/falco/blob/9d41b0a151b83693929d3a9c84f7c5c85d070d3a/rules/falco_rules.yaml#L1961 - NULL, - SIMPLE_PATTERN_EXACT, true); - - service_register(SERVICE_THREAD_TYPE_LIBUV, NULL, NULL, NULL, false); - - while (service_running(SERVICE_COLLECTORS)) { - worker_is_idle(); - - uv_mutex_lock(&discovery_thread.mutex); - uv_cond_wait(&discovery_thread.cond_var, &discovery_thread.mutex); - uv_mutex_unlock(&discovery_thread.mutex); - - if (unlikely(!service_running(SERVICE_COLLECTORS))) - break; - - discovery_find_all_cgroups(); - } - collector_info("discovery thread stopped"); - worker_unregister(); - service_exits(); - __atomic_store_n(&discovery_thread.exited,1,__ATOMIC_RELAXED); -} - -// ---------------------------------------------------------------------------- -// generate charts - -#define CHART_TITLE_MAX 300 - -void update_systemd_services_charts( - int update_every, - int do_cpu, - int do_mem_usage, - int do_mem_detailed, - int do_mem_failcnt, - int do_swap_usage, - int do_io, - int do_io_ops, - int do_throttle_io, - int do_throttle_ops, - int do_queued_ops, - int do_merged_ops) -{ - // update the values - struct cgroup *cg; - int systemd_cgroup_chart_priority = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD; - char type[RRD_ID_LENGTH_MAX + 1]; - - for (cg = cgroup_root; cg; cg = cg->next) { - if (unlikely(!cg->enabled || cg->pending_renames || !is_cgroup_systemd_service(cg))) - continue; - - type[0] = '\0'; - if (likely(do_cpu && cg->cpuacct_stat.updated)) { - if (unlikely(!cg->st_cpu)) { - cg->st_cpu = rrdset_create_localhost( - cgroup_chart_type(type, cg), - "cpu_utilization", - NULL, - "cpu", - "systemd.service.cpu.utilization", - "Systemd Services CPU utilization (100%% = 1 core)", - "percentage", - PLUGIN_CGROUPS_NAME, - PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME, - systemd_cgroup_chart_priority, - update_every, - RRDSET_TYPE_STACKED); - - rrdset_update_rrdlabels(cg->st_cpu, cg->chart_labels); - if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - rrddim_add(cg->st_cpu, "user", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_cpu, "system", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); - } else { - rrddim_add(cg->st_cpu, "user", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_cpu, "system", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); - } - } - - // complete the iteration - rrddim_set(cg->st_cpu, "user", cg->cpuacct_stat.user); - rrddim_set(cg->st_cpu, "system", cg->cpuacct_stat.system); - rrdset_done(cg->st_cpu); - } - - if (unlikely(do_mem_usage && cg->memory.updated_usage_in_bytes)) { - if (unlikely(!cg->st_mem_usage)) { - cg->st_mem_usage = rrdset_create_localhost( - cgroup_chart_type(type, cg), - "mem_usage", - NULL, - "mem", - "systemd.service.memory.usage", - "Systemd Services Used Memory", - "MiB", - PLUGIN_CGROUPS_NAME, - PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME, - systemd_cgroup_chart_priority + 5, - update_every, - RRDSET_TYPE_STACKED); - - rrdset_update_rrdlabels(cg->st_mem_usage, cg->chart_labels); - rrddim_add(cg->st_mem_usage, "ram", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - if (likely(do_swap_usage)) - rrddim_add(cg->st_mem_usage, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set(cg->st_mem_usage, "ram", cg->memory.usage_in_bytes); - if (likely(do_swap_usage)) { - if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - rrddim_set( - cg->st_mem_usage, - "swap", - cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ? - cg->memory.msw_usage_in_bytes - - (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) : - 0); - } else { - rrddim_set(cg->st_mem_usage, "swap", cg->memory.msw_usage_in_bytes); - } - } - rrdset_done(cg->st_mem_usage); - } - - if (likely(do_mem_failcnt && cg->memory.updated_failcnt)) { - if (unlikely(do_mem_failcnt && !cg->st_mem_failcnt)) { - cg->st_mem_failcnt = rrdset_create_localhost( - cgroup_chart_type(type, cg), - "mem_failcnt", - NULL, - "mem", - "systemd.service.memory.failcnt", - "Systemd Services Memory Limit Failures", - "failures/s", - PLUGIN_CGROUPS_NAME, - PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME, - systemd_cgroup_chart_priority + 10, - update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(cg->st_mem_failcnt, cg->chart_labels); - rrddim_add(cg->st_mem_failcnt, "fail", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(cg->st_mem_failcnt, "fail", cg->memory.failcnt); - rrdset_done(cg->st_mem_failcnt); - } - - if (likely(do_mem_detailed && cg->memory.updated_detailed)) { - if (unlikely(!cg->st_mem)) { - cg->st_mem = rrdset_create_localhost( - cgroup_chart_type(type, cg), - "mem_ram_usage", - NULL, - "mem", - "systemd.service.memory.ram.usage", - "Systemd Services Memory", - "MiB", - PLUGIN_CGROUPS_NAME, - PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME, - systemd_cgroup_chart_priority + 15, - update_every, - RRDSET_TYPE_STACKED); - - rrdset_update_rrdlabels(cg->st_mem, cg->chart_labels); - rrddim_add(cg->st_mem, "rss", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_mem, "cache", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_mem, "mapped_file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_mem, "rss_huge", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set(cg->st_mem, "rss", cg->memory.total_rss); - rrddim_set(cg->st_mem, "cache", cg->memory.total_cache); - rrddim_set(cg->st_mem, "mapped_file", cg->memory.total_mapped_file); - rrddim_set(cg->st_mem, "rss_huge", cg->memory.total_rss_huge); - rrdset_done(cg->st_mem); - - if (unlikely(!cg->st_writeback)) { - cg->st_writeback = rrdset_create_localhost( - cgroup_chart_type(type, cg), - "mem_writeback", - NULL, - "mem", - "systemd.service.memory.writeback", - "Systemd Services Writeback Memory", - "MiB", - PLUGIN_CGROUPS_NAME, - PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME, - systemd_cgroup_chart_priority + 20, - update_every, - RRDSET_TYPE_STACKED); - - rrdset_update_rrdlabels(cg->st_writeback, cg->chart_labels); - rrddim_add(cg->st_writeback, "writeback", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_writeback, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set(cg->st_writeback, "writeback", cg->memory.total_writeback); - rrddim_set(cg->st_writeback, "dirty", cg->memory.total_dirty); - rrdset_done(cg->st_writeback); - - if (unlikely(!cg->st_pgfaults)) { - cg->st_pgfaults = rrdset_create_localhost( - cgroup_chart_type(type, cg), - "mem_pgfault", - NULL, - "mem", - "systemd.service.memory.paging.faults", - "Systemd Services Memory Minor and Major Page Faults", - "MiB/s", - PLUGIN_CGROUPS_NAME, - PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME, - systemd_cgroup_chart_priority + 25, - update_every, - RRDSET_TYPE_AREA); - - rrdset_update_rrdlabels(cg->st_pgfaults, cg->chart_labels); - rrddim_add(cg->st_pgfaults, "minor", NULL, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_pgfaults, "major", NULL, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(cg->st_pgfaults, "minor", cg->memory.total_pgfault); - rrddim_set(cg->st_pgfaults, "major", cg->memory.total_pgmajfault); - rrdset_done(cg->st_pgfaults); - - if (unlikely(!cg->st_mem_activity)) { - cg->st_mem_activity = rrdset_create_localhost( - cgroup_chart_type(type, cg), - "mem_paging_io", - NULL, - "mem", - "systemd.service.memory.paging.io", - "Systemd Services Memory Paging IO", - "MiB/s", - PLUGIN_CGROUPS_NAME, - PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME, - systemd_cgroup_chart_priority + 30, - update_every, - RRDSET_TYPE_AREA); - - rrdset_update_rrdlabels(cg->st_mem_activity, cg->chart_labels); - rrddim_add(cg->st_mem_activity, "in", NULL, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_mem_activity, "out", NULL, -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(cg->st_mem_activity, "in", cg->memory.total_pgpgin); - rrddim_set(cg->st_mem_activity, "out", cg->memory.total_pgpgout); - rrdset_done(cg->st_mem_activity); - } - - if (likely(do_io && cg->io_service_bytes.updated)) { - if (unlikely(!cg->st_io)) { - cg->st_io = rrdset_create_localhost( - cgroup_chart_type(type, cg), - "disk_io", - NULL, - "disk", - "systemd.service.disk.io", - "Systemd Services Disk Read/Write Bandwidth", - "KiB/s", - PLUGIN_CGROUPS_NAME, - PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME, - systemd_cgroup_chart_priority + 35, - update_every, - RRDSET_TYPE_AREA); - - rrdset_update_rrdlabels(cg->st_io, cg->chart_labels); - rrddim_add(cg->st_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); - } - rrddim_set(cg->st_io, "read", cg->io_service_bytes.Read); - rrddim_set(cg->st_io, "write", cg->io_service_bytes.Write); - rrdset_done(cg->st_io); - } - - if (likely(do_io_ops && cg->io_serviced.updated)) { - if (unlikely(!cg->st_serviced_ops)) { - cg->st_serviced_ops = rrdset_create_localhost( - cgroup_chart_type(type, cg), - "disk_iops", - NULL, - "disk", - "systemd.service.disk.iops", - "Systemd Services Disk Read/Write Operations", - "operations/s", - PLUGIN_CGROUPS_NAME, - PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME, - systemd_cgroup_chart_priority + 40, - update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(cg->st_serviced_ops, cg->chart_labels); - rrddim_add(cg->st_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - } - rrddim_set(cg->st_serviced_ops, "read", cg->io_serviced.Read); - rrddim_set(cg->st_serviced_ops, "write", cg->io_serviced.Write); - rrdset_done(cg->st_serviced_ops); - } - - if (likely(do_throttle_io && cg->throttle_io_service_bytes.updated)) { - if (unlikely(!cg->st_throttle_io)) { - cg->st_throttle_io = rrdset_create_localhost( - cgroup_chart_type(type, cg), - "disk_throttle_io", - NULL, - "disk", - "systemd.service.disk.throttle.io", - "Systemd Services Throttle Disk Read/Write Bandwidth", - "KiB/s", - PLUGIN_CGROUPS_NAME, - PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME, - systemd_cgroup_chart_priority + 45, - update_every, - RRDSET_TYPE_AREA); - - rrdset_update_rrdlabels(cg->st_throttle_io, cg->chart_labels); - rrddim_add(cg->st_throttle_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_throttle_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); - } - rrddim_set(cg->st_throttle_io, "read", cg->throttle_io_service_bytes.Read); - rrddim_set(cg->st_throttle_io, "write", cg->throttle_io_service_bytes.Write); - rrdset_done(cg->st_throttle_io); - } - - if (likely(do_throttle_ops && cg->throttle_io_serviced.updated)) { - if (unlikely(!cg->st_throttle_serviced_ops)) { - cg->st_throttle_serviced_ops = rrdset_create_localhost( - cgroup_chart_type(type, cg), - "disk_throttle_iops", - NULL, - "disk", - "systemd.service.disk.throttle.iops", - "Systemd Services Throttle Disk Read/Write Operations", - "operations/s", - PLUGIN_CGROUPS_NAME, - PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME, - systemd_cgroup_chart_priority + 50, - update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(cg->st_throttle_serviced_ops, cg->chart_labels); - rrddim_add(cg->st_throttle_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_throttle_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - } - rrddim_set(cg->st_throttle_serviced_ops, "read", cg->throttle_io_serviced.Read); - rrddim_set(cg->st_throttle_serviced_ops, "write", cg->throttle_io_serviced.Write); - rrdset_done(cg->st_throttle_serviced_ops); - } - - if (likely(do_queued_ops && cg->io_queued.updated)) { - if (unlikely(!cg->st_queued_ops)) { - cg->st_queued_ops = rrdset_create_localhost( - cgroup_chart_type(type, cg), - "disk_queued_iops", - NULL, - "disk", - "systemd.service.disk.queued_iops", - "Systemd Services Queued Disk Read/Write Operations", - "operations/s", - PLUGIN_CGROUPS_NAME, - PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME, - systemd_cgroup_chart_priority + 55, - update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(cg->st_queued_ops, cg->chart_labels); - rrddim_add(cg->st_queued_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_queued_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - } - rrddim_set(cg->st_queued_ops, "read", cg->io_queued.Read); - rrddim_set(cg->st_queued_ops, "write", cg->io_queued.Write); - rrdset_done(cg->st_queued_ops); - } - - if (likely(do_merged_ops && cg->io_merged.updated)) { - if (unlikely(!cg->st_merged_ops)) { - cg->st_merged_ops = rrdset_create_localhost( - cgroup_chart_type(type, cg), - "disk_merged_iops", - NULL, - "disk", - "systemd.service.disk.merged_iops", - "Systemd Services Merged Disk Read/Write Operations", - "operations/s", - PLUGIN_CGROUPS_NAME, - PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME, - systemd_cgroup_chart_priority + 60, - update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(cg->st_merged_ops, cg->chart_labels); - rrddim_add(cg->st_merged_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_merged_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - } - rrddim_set(cg->st_merged_ops, "read", cg->io_merged.Read); - rrddim_set(cg->st_merged_ops, "write", cg->io_merged.Write); - rrdset_done(cg->st_merged_ops); - } - } -} +// update CPU and memory limits static inline void update_cpu_limits(char **filename, unsigned long long *value, struct cgroup *cg) { if(*filename) { @@ -3225,10 +1224,7 @@ static inline void update_cpu_limits(char **filename, unsigned long long *value, ret = 0; } } - else if(value == &cg->cpu_cfs_period) { - ret = read_single_number_file(*filename, value); - } - else if(value == &cg->cpu_cfs_quota) { + else if(value == &cg->cpu_cfs_period || value == &cg->cpu_cfs_quota) { ret = read_single_number_file(*filename, value); } else ret = -1; @@ -3282,12 +1278,16 @@ cpu_limits2_err: } } -static inline int update_memory_limits(char **filename, const RRDSETVAR_ACQUIRED **chart_var, unsigned long long *value, const char *chart_var_name, struct cgroup *cg) { +static inline int update_memory_limits(struct cgroup *cg) { + char **filename = &cg->filename_memory_limit; + const RRDSETVAR_ACQUIRED **chart_var = &cg->chart_var_memory_limit; + unsigned long long *value = &cg->memory_limit; + if(*filename) { if(unlikely(!*chart_var)) { - *chart_var = rrdsetvar_custom_chart_variable_add_and_acquire(cg->st_mem_usage, chart_var_name); + *chart_var = rrdsetvar_custom_chart_variable_add_and_acquire(cg->st_mem_usage, "memory_limit"); if(!*chart_var) { - collector_error("Cannot create cgroup %s chart variable '%s'. Will not update its limit anymore.", cg->id, chart_var_name); + collector_error("Cannot create cgroup %s chart variable '%s'. Will not update its limit anymore.", cg->id, "memory_limit"); freez(*filename); *filename = NULL; } @@ -3301,7 +1301,7 @@ static inline int update_memory_limits(char **filename, const RRDSETVAR_ACQUIRED *filename = NULL; } else { - rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); + rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0)); return 1; } } else { @@ -3316,11 +1316,11 @@ static inline int update_memory_limits(char **filename, const RRDSETVAR_ACQUIRED char *s = "max\n\0"; if(strcmp(s, buffer) == 0){ *value = UINT64_MAX; - rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); + rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0)); return 1; } *value = str2ull(buffer, NULL); - rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); + rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0)); return 1; } } @@ -3328,85 +1328,65 @@ static inline int update_memory_limits(char **filename, const RRDSETVAR_ACQUIRED return 0; } -void update_cgroup_charts(int update_every) { - netdata_log_debug(D_CGROUP, "updating cgroups charts"); - - char type[RRD_ID_LENGTH_MAX + 1]; - char title[CHART_TITLE_MAX + 1]; - - int services_do_cpu = 0, - services_do_mem_usage = 0, - services_do_mem_detailed = 0, - services_do_mem_failcnt = 0, - services_do_swap_usage = 0, - services_do_io = 0, - services_do_io_ops = 0, - services_do_throttle_io = 0, - services_do_throttle_ops = 0, - services_do_queued_ops = 0, - services_do_merged_ops = 0; +// ---------------------------------------------------------------------------- +// generate charts - struct cgroup *cg; - for(cg = cgroup_root; cg ; cg = cg->next) { - if(unlikely(!cg->enabled || cg->pending_renames)) +void update_cgroup_systemd_services_charts() { + for (struct cgroup *cg = cgroup_root; cg; cg = cg->next) { + if (unlikely(!cg->enabled || cg->pending_renames || !is_cgroup_systemd_service(cg))) continue; - if(likely(cgroup_enable_systemd_services && is_cgroup_systemd_service(cg))) { - if(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES) services_do_cpu++; - - if(cgroup_enable_systemd_services_detailed_memory && cg->memory.updated_detailed && cg->memory.enabled_detailed) services_do_mem_detailed++; - if(cg->memory.updated_usage_in_bytes && cg->memory.enabled_usage_in_bytes == CONFIG_BOOLEAN_YES) services_do_mem_usage++; - if(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES) services_do_mem_failcnt++; - if(cg->memory.updated_msw_usage_in_bytes && cg->memory.enabled_msw_usage_in_bytes == CONFIG_BOOLEAN_YES) services_do_swap_usage++; + if (likely(cg->cpuacct_stat.updated)) { + update_cpu_utilization_chart(cg); + } + if (likely(cg->memory.updated_msw_usage_in_bytes)) { + update_mem_usage_chart(cg); + } + if (likely(cg->memory.updated_failcnt)) { + update_mem_failcnt_chart(cg); + } + if (likely(cg->memory.updated_detailed)) { + update_mem_usage_detailed_chart(cg); + update_mem_writeback_chart(cg); + update_mem_pgfaults_chart(cg); + if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + update_mem_activity_chart(cg); + } + } + if (likely(cg->io_service_bytes.updated)) { + update_io_serviced_bytes_chart(cg); + } + if (likely(cg->io_serviced.updated)) { + update_io_serviced_ops_chart(cg); + } + if (likely(cg->throttle_io_service_bytes.updated)) { + update_throttle_io_serviced_bytes_chart(cg); + } + if (likely(cg->throttle_io_serviced.updated)) { + update_throttle_io_serviced_ops_chart(cg); + } + if (likely(cg->io_queued.updated)) { + update_io_queued_ops_chart(cg); + } + if (likely(cg->io_merged.updated)) { + update_io_merged_ops_chart(cg); + } - if(cg->io_service_bytes.updated && cg->io_service_bytes.enabled == CONFIG_BOOLEAN_YES) services_do_io++; - if(cg->io_serviced.updated && cg->io_serviced.enabled == CONFIG_BOOLEAN_YES) services_do_io_ops++; - if(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.enabled == CONFIG_BOOLEAN_YES) services_do_throttle_io++; - if(cg->throttle_io_serviced.updated && cg->throttle_io_serviced.enabled == CONFIG_BOOLEAN_YES) services_do_throttle_ops++; - if(cg->io_queued.updated && cg->io_queued.enabled == CONFIG_BOOLEAN_YES) services_do_queued_ops++; - if(cg->io_merged.updated && cg->io_merged.enabled == CONFIG_BOOLEAN_YES) services_do_merged_ops++; - continue; + if (likely(cg->pids.pids_current_updated)) { + update_pids_current_chart(cg); } - type[0] = '\0'; - - if(likely(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES)) { - if(unlikely(!cg->st_cpu)) { - snprintfz( - title, - CHART_TITLE_MAX, - k8s_is_kubepod(cg) ? "CPU Usage (100%% = 1000 mCPU)" : "CPU Usage (100%% = 1 core)"); - - cg->st_cpu = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "cpu" - , NULL - , "cpu" - , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu" : "cgroup.cpu" - , title - , "percentage" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority - , update_every - , RRDSET_TYPE_STACKED - ); - - rrdset_update_rrdlabels(cg->st_cpu, cg->chart_labels); + cg->function_ready = true; + } +} - if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - rrddim_add(cg->st_cpu, "user", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_cpu, "system", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); - } - else { - rrddim_add(cg->st_cpu, "user", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_cpu, "system", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); - } - } +void update_cgroup_charts() { + for (struct cgroup *cg = cgroup_root; cg; cg = cg->next) { + if(unlikely(!cg->enabled || cg->pending_renames || is_cgroup_systemd_service(cg))) + continue; - rrddim_set(cg->st_cpu, "user", cg->cpuacct_stat.user); - rrddim_set(cg->st_cpu, "system", cg->cpuacct_stat.system); - rrdset_done(cg->st_cpu); + if (likely(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES)) { + update_cpu_utilization_chart(cg); if(likely(cg->filename_cpuset_cpus || cg->filename_cpu_cfs_period || cg->filename_cpu_cfs_quota)) { if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { @@ -3428,8 +1408,7 @@ void update_cgroup_charts(int update_every) { if(cg->filename_cpu_cfs_quota) freez(cg->filename_cpu_cfs_quota); cg->filename_cpu_cfs_quota = NULL; } - } - else { + } else { NETDATA_DOUBLE value = 0, quota = 0; if(likely( ((!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) && (cg->filename_cpuset_cpus || (cg->filename_cpu_cfs_period && cg->filename_cpu_cfs_quota))) @@ -3443,49 +1422,10 @@ void update_cgroup_charts(int update_every) { value = (NETDATA_DOUBLE)cg->cpuset_cpus * 100; } if(likely(value)) { - if(unlikely(!cg->st_cpu_limit)) { - snprintfz(title, CHART_TITLE_MAX, "CPU Usage within the limits"); - - cg->st_cpu_limit = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "cpu_limit" - , NULL - , "cpu" - , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_limit" : "cgroup.cpu_limit" - , title - , "percentage" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority - 1 - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_update_rrdlabels(cg->st_cpu_limit, cg->chart_labels); - - if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) - rrddim_add(cg->st_cpu_limit, "used", NULL, 1, system_hz, RRD_ALGORITHM_ABSOLUTE); - else - rrddim_add(cg->st_cpu_limit, "used", NULL, 1, 1000000, RRD_ALGORITHM_ABSOLUTE); - cg->prev_cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; - } - - NETDATA_DOUBLE cpu_usage = 0; - cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; - NETDATA_DOUBLE cpu_used = 100 * (cpu_usage - cg->prev_cpu_usage) / (value * update_every); - - rrdset_isnot_obsolete(cg->st_cpu_limit); - - rrddim_set(cg->st_cpu_limit, "used", (cpu_used > 0)?cpu_used:0); - - cg->prev_cpu_usage = cpu_usage; - - rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, value); - rrdset_done(cg->st_cpu_limit); - } - else { - if(unlikely(cg->st_cpu_limit)) { - rrdset_is_obsolete(cg->st_cpu_limit); + update_cpu_utilization_limit_chart(cg, value); + } else { + if (unlikely(cg->st_cpu_limit)) { + rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_limit); cg->st_cpu_limit = NULL; } rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, NAN); @@ -3495,1056 +1435,137 @@ void update_cgroup_charts(int update_every) { } if (likely(cg->cpuacct_cpu_throttling.updated && cg->cpuacct_cpu_throttling.enabled == CONFIG_BOOLEAN_YES)) { - if (unlikely(!cg->st_cpu_nr_throttled)) { - snprintfz(title, CHART_TITLE_MAX, "CPU Throttled Runnable Periods"); - - cg->st_cpu_nr_throttled = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "throttled" - , NULL - , "cpu" - , k8s_is_kubepod(cg) ? "k8s.cgroup.throttled" : "cgroup.throttled" - , title - , "percentage" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 10 - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_update_rrdlabels(cg->st_cpu_nr_throttled, cg->chart_labels); - rrddim_add(cg->st_cpu_nr_throttled, "throttled", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } else { - rrddim_set(cg->st_cpu_nr_throttled, "throttled", cg->cpuacct_cpu_throttling.nr_throttled_perc); - rrdset_done(cg->st_cpu_nr_throttled); - } - - if (unlikely(!cg->st_cpu_throttled_time)) { - snprintfz(title, CHART_TITLE_MAX, "CPU Throttled Time Duration"); - - cg->st_cpu_throttled_time = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "throttled_duration" - , NULL - , "cpu" - , k8s_is_kubepod(cg) ? "k8s.cgroup.throttled_duration" : "cgroup.throttled_duration" - , title - , "ms" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 15 - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_update_rrdlabels(cg->st_cpu_throttled_time, cg->chart_labels); - rrddim_add(cg->st_cpu_throttled_time, "duration", NULL, 1, 1000000, RRD_ALGORITHM_INCREMENTAL); - } else { - rrddim_set(cg->st_cpu_throttled_time, "duration", cg->cpuacct_cpu_throttling.throttled_time); - rrdset_done(cg->st_cpu_throttled_time); - } + update_cpu_throttled_chart(cg); + update_cpu_throttled_duration_chart(cg); } if (likely(cg->cpuacct_cpu_shares.updated && cg->cpuacct_cpu_shares.enabled == CONFIG_BOOLEAN_YES)) { - if (unlikely(!cg->st_cpu_shares)) { - snprintfz(title, CHART_TITLE_MAX, "CPU Time Relative Share"); - - cg->st_cpu_shares = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "cpu_shares" - , NULL - , "cpu" - , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_shares" : "cgroup.cpu_shares" - , title - , "shares" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 20 - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_update_rrdlabels(cg->st_cpu_shares, cg->chart_labels); - rrddim_add(cg->st_cpu_shares, "shares", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } else { - rrddim_set(cg->st_cpu_shares, "shares", cg->cpuacct_cpu_shares.shares); - rrdset_done(cg->st_cpu_shares); - } + update_cpu_shares_chart(cg); } - if(likely(cg->cpuacct_usage.updated && cg->cpuacct_usage.enabled == CONFIG_BOOLEAN_YES)) { - char id[RRD_ID_LENGTH_MAX + 1]; - unsigned int i; - - if(unlikely(!cg->st_cpu_per_core)) { - snprintfz( - title, - CHART_TITLE_MAX, - k8s_is_kubepod(cg) ? "CPU Usage (100%% = 1000 mCPU) Per Core" : - "CPU Usage (100%% = 1 core) Per Core"); - - cg->st_cpu_per_core = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "cpu_per_core" - , NULL - , "cpu" - , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_per_core" : "cgroup.cpu_per_core" - , title - , "percentage" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 100 - , update_every - , RRDSET_TYPE_STACKED - ); - - rrdset_update_rrdlabels(cg->st_cpu_per_core, cg->chart_labels); - - for(i = 0; i < cg->cpuacct_usage.cpus; i++) { - snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i); - rrddim_add(cg->st_cpu_per_core, id, NULL, 100, 1000000000, RRD_ALGORITHM_INCREMENTAL); - } - } - - for(i = 0; i < cg->cpuacct_usage.cpus ;i++) { - snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i); - rrddim_set(cg->st_cpu_per_core, id, cg->cpuacct_usage.cpu_percpu[i]); - } - rrdset_done(cg->st_cpu_per_core); + if (likely(cg->cpuacct_usage.updated && cg->cpuacct_usage.enabled == CONFIG_BOOLEAN_YES)) { + update_cpu_per_core_usage_chart(cg); } - if(likely(cg->memory.updated_detailed && cg->memory.enabled_detailed == CONFIG_BOOLEAN_YES)) { - if(unlikely(!cg->st_mem)) { - snprintfz(title, CHART_TITLE_MAX, "Memory Usage"); - - cg->st_mem = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "mem" - , NULL - , "mem" - , k8s_is_kubepod(cg) ? "k8s.cgroup.mem" : "cgroup.mem" - , title - , "MiB" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 220 - , update_every - , RRDSET_TYPE_STACKED - ); - - rrdset_update_rrdlabels(cg->st_mem, cg->chart_labels); - - if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - rrddim_add(cg->st_mem, "cache", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_mem, "rss", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - - if(cg->memory.detailed_has_swap) - rrddim_add(cg->st_mem, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - - rrddim_add(cg->st_mem, "rss_huge", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_mem, "mapped_file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } else { - rrddim_add(cg->st_mem, "anon", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_mem, "kernel_stack", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_mem, "slab", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_mem, "sock", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_mem, "anon_thp", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_mem, "file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } - } - - if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - rrddim_set(cg->st_mem, "cache", cg->memory.total_cache); - rrddim_set(cg->st_mem, "rss", (cg->memory.total_rss > cg->memory.total_rss_huge)?(cg->memory.total_rss - cg->memory.total_rss_huge):0); - - if(cg->memory.detailed_has_swap) - rrddim_set(cg->st_mem, "swap", cg->memory.total_swap); - - rrddim_set(cg->st_mem, "rss_huge", cg->memory.total_rss_huge); - rrddim_set(cg->st_mem, "mapped_file", cg->memory.total_mapped_file); - } else { - rrddim_set(cg->st_mem, "anon", cg->memory.anon); - rrddim_set(cg->st_mem, "kernel_stack", cg->memory.kernel_stack); - rrddim_set(cg->st_mem, "slab", cg->memory.slab); - rrddim_set(cg->st_mem, "sock", cg->memory.sock); - rrddim_set(cg->st_mem, "anon_thp", cg->memory.anon_thp); - rrddim_set(cg->st_mem, "file", cg->memory.total_mapped_file); - } - rrdset_done(cg->st_mem); - - if(unlikely(!cg->st_writeback)) { - snprintfz(title, CHART_TITLE_MAX, "Writeback Memory"); - - cg->st_writeback = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "writeback" - , NULL - , "mem" - , k8s_is_kubepod(cg) ? "k8s.cgroup.writeback" : "cgroup.writeback" - , title - , "MiB" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 300 - , update_every - , RRDSET_TYPE_AREA - ); - - rrdset_update_rrdlabels(cg->st_writeback, cg->chart_labels); - - if(cg->memory.detailed_has_dirty) - rrddim_add(cg->st_writeback, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - - rrddim_add(cg->st_writeback, "writeback", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } - - if(cg->memory.detailed_has_dirty) - rrddim_set(cg->st_writeback, "dirty", cg->memory.total_dirty); - - rrddim_set(cg->st_writeback, "writeback", cg->memory.total_writeback); - rrdset_done(cg->st_writeback); + if (likely(cg->memory.updated_detailed && cg->memory.enabled_detailed == CONFIG_BOOLEAN_YES)) { + update_mem_usage_detailed_chart(cg); + update_mem_writeback_chart(cg); if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - if(unlikely(!cg->st_mem_activity)) { - snprintfz(title, CHART_TITLE_MAX, "Memory Activity"); - - cg->st_mem_activity = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "mem_activity" - , NULL - , "mem" - , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_activity" : "cgroup.mem_activity" - , title - , "MiB/s" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 400 - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_update_rrdlabels(cg->st_mem_activity, cg->chart_labels); - - rrddim_add(cg->st_mem_activity, "pgpgin", "in", system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_mem_activity, "pgpgout", "out", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(cg->st_mem_activity, "pgpgin", cg->memory.total_pgpgin); - rrddim_set(cg->st_mem_activity, "pgpgout", cg->memory.total_pgpgout); - rrdset_done(cg->st_mem_activity); - } - - if(unlikely(!cg->st_pgfaults)) { - snprintfz(title, CHART_TITLE_MAX, "Memory Page Faults"); - - cg->st_pgfaults = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "pgfaults" - , NULL - , "mem" - , k8s_is_kubepod(cg) ? "k8s.cgroup.pgfaults" : "cgroup.pgfaults" - , title - , "MiB/s" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 500 - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_update_rrdlabels(cg->st_pgfaults, cg->chart_labels); - - rrddim_add(cg->st_pgfaults, "pgfault", NULL, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_pgfaults, "pgmajfault", "swap", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + update_mem_activity_chart(cg); } - rrddim_set(cg->st_pgfaults, "pgfault", cg->memory.total_pgfault); - rrddim_set(cg->st_pgfaults, "pgmajfault", cg->memory.total_pgmajfault); - rrdset_done(cg->st_pgfaults); + update_mem_pgfaults_chart(cg); } - if(likely(cg->memory.updated_usage_in_bytes && cg->memory.enabled_usage_in_bytes == CONFIG_BOOLEAN_YES)) { - if(unlikely(!cg->st_mem_usage)) { - snprintfz(title, CHART_TITLE_MAX, "Used Memory"); - - cg->st_mem_usage = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "mem_usage" - , NULL - , "mem" - , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage" : "cgroup.mem_usage" - , title - , "MiB" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 210 - , update_every - , RRDSET_TYPE_STACKED - ); - - rrdset_update_rrdlabels(cg->st_mem_usage, cg->chart_labels); - - rrddim_add(cg->st_mem_usage, "ram", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_mem_usage, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set(cg->st_mem_usage, "ram", cg->memory.usage_in_bytes); - if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - rrddim_set( - cg->st_mem_usage, - "swap", - cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ? - cg->memory.msw_usage_in_bytes - (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) : 0); - } else { - rrddim_set(cg->st_mem_usage, "swap", cg->memory.msw_usage_in_bytes); - } - rrdset_done(cg->st_mem_usage); - - if (likely(update_memory_limits(&cg->filename_memory_limit, &cg->chart_var_memory_limit, &cg->memory_limit, "memory_limit", cg))) { - static unsigned long long ram_total = 0; - - if(unlikely(!ram_total)) { - procfile *ff = NULL; - - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/meminfo"); - ff = procfile_open(config_get("plugin:cgroups", "meminfo filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); - - if(likely(ff)) - ff = procfile_readall(ff); - if(likely(ff && procfile_lines(ff) && !strncmp(procfile_word(ff, 0), "MemTotal", 8))) - ram_total = str2ull(procfile_word(ff, 1), NULL) * 1024; - else { - collector_error("Cannot read file %s. Will not update cgroup %s RAM limit anymore.", filename, cg->id); - freez(cg->filename_memory_limit); - cg->filename_memory_limit = NULL; - } + if (likely(cg->memory.updated_usage_in_bytes && cg->memory.enabled_usage_in_bytes == CONFIG_BOOLEAN_YES)) { + update_mem_usage_chart(cg); - procfile_close(ff); - } - - if(likely(ram_total)) { - unsigned long long memory_limit = ram_total; + // FIXME: this if should be only for unlimited charts + if(likely(host_ram_total)) { + // FIXME: do we need to update mem limits on every data collection? + if (likely(update_memory_limits(cg))) { - if(unlikely(cg->memory_limit < ram_total)) + unsigned long long memory_limit = host_ram_total; + if (unlikely(cg->memory_limit < host_ram_total)) memory_limit = cg->memory_limit; - if(unlikely(!cg->st_mem_usage_limit)) { - snprintfz(title, CHART_TITLE_MAX, "Used RAM within the limits"); - - cg->st_mem_usage_limit = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "mem_usage_limit" - , NULL - , "mem" - , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage_limit": "cgroup.mem_usage_limit" - , title - , "MiB" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 200 - , update_every - , RRDSET_TYPE_STACKED - ); - - rrdset_update_rrdlabels(cg->st_mem_usage_limit, cg->chart_labels); - - rrddim_add(cg->st_mem_usage_limit, "available", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_mem_usage_limit, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } - - rrdset_isnot_obsolete(cg->st_mem_usage_limit); - - rrddim_set(cg->st_mem_usage_limit, "available", memory_limit - cg->memory.usage_in_bytes); - rrddim_set(cg->st_mem_usage_limit, "used", cg->memory.usage_in_bytes); - rrdset_done(cg->st_mem_usage_limit); - - if (unlikely(!cg->st_mem_utilization)) { - snprintfz(title, CHART_TITLE_MAX, "Memory Utilization"); - - cg->st_mem_utilization = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "mem_utilization" - , NULL - , "mem" - , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_utilization" : "cgroup.mem_utilization" - , title - , "percentage" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 199 - , update_every - , RRDSET_TYPE_AREA - ); - - rrdset_update_rrdlabels(cg->st_mem_utilization, cg->chart_labels); - - rrddim_add(cg->st_mem_utilization, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + update_mem_usage_limit_chart(cg, memory_limit); + update_mem_utilization_chart(cg, memory_limit); + } else { + if (unlikely(cg->st_mem_usage_limit)) { + rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_usage_limit); + cg->st_mem_usage_limit = NULL; } - if (memory_limit) { - rrdset_isnot_obsolete(cg->st_mem_utilization); - - rrddim_set( - cg->st_mem_utilization, "utilization", cg->memory.usage_in_bytes * 100 / memory_limit); - rrdset_done(cg->st_mem_utilization); + if (unlikely(cg->st_mem_utilization)) { + rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_utilization); + cg->st_mem_utilization = NULL; } } } - else { - if(unlikely(cg->st_mem_usage_limit)) { - rrdset_is_obsolete(cg->st_mem_usage_limit); - cg->st_mem_usage_limit = NULL; - } - - if(unlikely(cg->st_mem_utilization)) { - rrdset_is_obsolete(cg->st_mem_utilization); - cg->st_mem_utilization = NULL; - } - } - - update_memory_limits(&cg->filename_memoryswap_limit, &cg->chart_var_memoryswap_limit, &cg->memoryswap_limit, "memory_and_swap_limit", cg); } - if(likely(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES)) { - if(unlikely(!cg->st_mem_failcnt)) { - snprintfz(title, CHART_TITLE_MAX, "Memory Limit Failures"); - - cg->st_mem_failcnt = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "mem_failcnt" - , NULL - , "mem" - , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_failcnt" : "cgroup.mem_failcnt" - , title - , "count" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 250 - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_update_rrdlabels(cg->st_mem_failcnt, cg->chart_labels); - - rrddim_add(cg->st_mem_failcnt, "failures", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(cg->st_mem_failcnt, "failures", cg->memory.failcnt); - rrdset_done(cg->st_mem_failcnt); + if (likely(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES)) { + update_mem_failcnt_chart(cg); } - if(likely(cg->io_service_bytes.updated && cg->io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) { - if(unlikely(!cg->st_io)) { - snprintfz(title, CHART_TITLE_MAX, "I/O Bandwidth (all disks)"); - - cg->st_io = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "io" - , NULL - , "disk" - , k8s_is_kubepod(cg) ? "k8s.cgroup.io" : "cgroup.io" - , title - , "KiB/s" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 1200 - , update_every - , RRDSET_TYPE_AREA - ); - - rrdset_update_rrdlabels(cg->st_io, cg->chart_labels); - - rrddim_add(cg->st_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(cg->st_io, "read", cg->io_service_bytes.Read); - rrddim_set(cg->st_io, "write", cg->io_service_bytes.Write); - rrdset_done(cg->st_io); + if (likely(cg->io_service_bytes.updated && cg->io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) { + update_io_serviced_bytes_chart(cg); } - if(likely(cg->io_serviced.updated && cg->io_serviced.enabled == CONFIG_BOOLEAN_YES)) { - if(unlikely(!cg->st_serviced_ops)) { - snprintfz(title, CHART_TITLE_MAX, "Serviced I/O Operations (all disks)"); - - cg->st_serviced_ops = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "serviced_ops" - , NULL - , "disk" - , k8s_is_kubepod(cg) ? "k8s.cgroup.serviced_ops" : "cgroup.serviced_ops" - , title - , "operations/s" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 1200 - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_update_rrdlabels(cg->st_serviced_ops, cg->chart_labels); - - rrddim_add(cg->st_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(cg->st_serviced_ops, "read", cg->io_serviced.Read); - rrddim_set(cg->st_serviced_ops, "write", cg->io_serviced.Write); - rrdset_done(cg->st_serviced_ops); + if (likely(cg->io_serviced.updated && cg->io_serviced.enabled == CONFIG_BOOLEAN_YES)) { + update_io_serviced_ops_chart(cg); } - if(likely(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) { - if(unlikely(!cg->st_throttle_io)) { - snprintfz(title, CHART_TITLE_MAX, "Throttle I/O Bandwidth (all disks)"); - - cg->st_throttle_io = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "throttle_io" - , NULL - , "disk" - , k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_io" : "cgroup.throttle_io" - , title - , "KiB/s" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 1200 - , update_every - , RRDSET_TYPE_AREA - ); - - rrdset_update_rrdlabels(cg->st_throttle_io, cg->chart_labels); - - rrddim_add(cg->st_throttle_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_throttle_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(cg->st_throttle_io, "read", cg->throttle_io_service_bytes.Read); - rrddim_set(cg->st_throttle_io, "write", cg->throttle_io_service_bytes.Write); - rrdset_done(cg->st_throttle_io); + if (likely(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) { + update_throttle_io_serviced_bytes_chart(cg); } - if(likely(cg->throttle_io_serviced.updated && cg->throttle_io_serviced.enabled == CONFIG_BOOLEAN_YES)) { - if(unlikely(!cg->st_throttle_serviced_ops)) { - snprintfz(title, CHART_TITLE_MAX, "Throttle Serviced I/O Operations (all disks)"); - - cg->st_throttle_serviced_ops = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "throttle_serviced_ops" - , NULL - , "disk" - , k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_serviced_ops" : "cgroup.throttle_serviced_ops" - , title - , "operations/s" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 1200 - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_update_rrdlabels(cg->st_throttle_serviced_ops, cg->chart_labels); - - rrddim_add(cg->st_throttle_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_throttle_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(cg->st_throttle_serviced_ops, "read", cg->throttle_io_serviced.Read); - rrddim_set(cg->st_throttle_serviced_ops, "write", cg->throttle_io_serviced.Write); - rrdset_done(cg->st_throttle_serviced_ops); + if (likely(cg->throttle_io_serviced.updated && cg->throttle_io_serviced.enabled == CONFIG_BOOLEAN_YES)) { + update_throttle_io_serviced_ops_chart(cg); } - if(likely(cg->io_queued.updated && cg->io_queued.enabled == CONFIG_BOOLEAN_YES)) { - if(unlikely(!cg->st_queued_ops)) { - snprintfz(title, CHART_TITLE_MAX, "Queued I/O Operations (all disks)"); - - cg->st_queued_ops = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "queued_ops" - , NULL - , "disk" - , k8s_is_kubepod(cg) ? "k8s.cgroup.queued_ops" : "cgroup.queued_ops" - , title - , "operations" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2000 - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_update_rrdlabels(cg->st_queued_ops, cg->chart_labels); - - rrddim_add(cg->st_queued_ops, "read", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(cg->st_queued_ops, "write", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set(cg->st_queued_ops, "read", cg->io_queued.Read); - rrddim_set(cg->st_queued_ops, "write", cg->io_queued.Write); - rrdset_done(cg->st_queued_ops); + if (likely(cg->io_queued.updated && cg->io_queued.enabled == CONFIG_BOOLEAN_YES)) { + update_io_queued_ops_chart(cg); } - if(likely(cg->io_merged.updated && cg->io_merged.enabled == CONFIG_BOOLEAN_YES)) { - if(unlikely(!cg->st_merged_ops)) { - snprintfz(title, CHART_TITLE_MAX, "Merged I/O Operations (all disks)"); - - cg->st_merged_ops = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "merged_ops" - , NULL - , "disk" - , k8s_is_kubepod(cg) ? "k8s.cgroup.merged_ops" : "cgroup.merged_ops" - , title - , "operations/s" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2100 - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_update_rrdlabels(cg->st_merged_ops, cg->chart_labels); - - rrddim_add(cg->st_merged_ops, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(cg->st_merged_ops, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); - } + if (likely(cg->io_merged.updated && cg->io_merged.enabled == CONFIG_BOOLEAN_YES)) { + update_io_merged_ops_chart(cg); + } - rrddim_set(cg->st_merged_ops, "read", cg->io_merged.Read); - rrddim_set(cg->st_merged_ops, "write", cg->io_merged.Write); - rrdset_done(cg->st_merged_ops); + if (likely(cg->pids.pids_current_updated)) { + update_pids_current_chart(cg); } if (cg->options & CGROUP_OPTIONS_IS_UNIFIED) { - struct pressure *res = &cg->cpu_pressure; - - if (likely(res->updated && res->some.enabled)) { - struct pressure_charts *pcs; - pcs = &res->some; - - if (unlikely(!pcs->share_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "CPU some pressure"); - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "cpu_some_pressure" - , NULL - , "cpu" - , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure" : "cgroup.cpu_some_pressure" - , title - , "percentage" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2200 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - } - - if (unlikely(!pcs->total_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "CPU some pressure stall time"); - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "cpu_some_pressure_stall_time" - , NULL - , "cpu" - , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure_stall_time" : "cgroup.cpu_some_pressure_stall_time" - , title - , "ms" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2220 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - update_pressure_charts(pcs); - } - if (likely(res->updated && res->full.enabled)) { - struct pressure_charts *pcs; - pcs = &res->full; - - if (unlikely(!pcs->share_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "CPU full pressure"); - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "cpu_full_pressure" - , NULL - , "cpu" - , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure" : "cgroup.cpu_full_pressure" - , title - , "percentage" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2240 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - } - - if (unlikely(!pcs->total_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "CPU full pressure stall time"); - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "cpu_full_pressure_stall_time" - , NULL - , "cpu" - , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure_stall_time" : "cgroup.cpu_full_pressure_stall_time" - , title - , "ms" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2260 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - update_pressure_charts(pcs); - } - - res = &cg->memory_pressure; - - if (likely(res->updated && res->some.enabled)) { - struct pressure_charts *pcs; - pcs = &res->some; - - if (unlikely(!pcs->share_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "Memory some pressure"); - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "mem_some_pressure" - , NULL - , "mem" - , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure" : "cgroup.memory_some_pressure" - , title - , "percentage" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2300 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - } - - if (unlikely(!pcs->total_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "Memory some pressure stall time"); - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "memory_some_pressure_stall_time" - , NULL - , "mem" - , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure_stall_time" : "cgroup.memory_some_pressure_stall_time" - , title - , "ms" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2320 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - update_pressure_charts(pcs); - } - - if (likely(res->updated && res->full.enabled)) { - struct pressure_charts *pcs; - pcs = &res->full; - - if (unlikely(!pcs->share_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "Memory full pressure"); - - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "mem_full_pressure" - , NULL - , "mem" - , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure" : "cgroup.memory_full_pressure" - , title - , "percentage" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2340 - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - } - - if (unlikely(!pcs->total_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "Memory full pressure stall time"); - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "memory_full_pressure_stall_time" - , NULL - , "mem" - , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure_stall_time" : "cgroup.memory_full_pressure_stall_time" - , title - , "ms" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2360 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - update_pressure_charts(pcs); - } - - res = &cg->irq_pressure; - - if (likely(res->updated && res->some.enabled)) { - struct pressure_charts *pcs; - pcs = &res->some; - - if (unlikely(!pcs->share_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure"); - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "irq_some_pressure" - , NULL - , "interrupts" - , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure" : "cgroup.irq_some_pressure" - , title - , "percentage" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2310 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - } - - if (unlikely(!pcs->total_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure stall time"); - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "irq_some_pressure_stall_time" - , NULL - , "interrupts" - , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure_stall_time" : "cgroup.irq_some_pressure_stall_time" - , title - , "ms" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2330 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - update_pressure_charts(pcs); + if (likely(cg->cpu_pressure.updated)) { + if (cg->cpu_pressure.some.enabled) { + update_cpu_some_pressure_chart(cg); + update_cpu_some_pressure_stall_time_chart(cg); + } + if (cg->cpu_pressure.full.enabled) { + update_cpu_full_pressure_chart(cg); + update_cpu_full_pressure_stall_time_chart(cg); + } } - if (likely(res->updated && res->full.enabled)) { - struct pressure_charts *pcs; - pcs = &res->full; - - if (unlikely(!pcs->share_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure"); - - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "irq_full_pressure" - , NULL - , "interrupts" - , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure" : "cgroup.irq_full_pressure" - , title - , "percentage" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2350 - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + if (likely(cg->memory_pressure.updated)) { + if (cg->memory_pressure.some.enabled) { + update_mem_some_pressure_chart(cg); + update_mem_some_pressure_stall_time_chart(cg); } - - if (unlikely(!pcs->total_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure stall time"); - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "irq_full_pressure_stall_time" - , NULL - , "interrupts" - , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure_stall_time" : "cgroup.irq_full_pressure_stall_time" - , title - , "ms" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2370 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + if (cg->memory_pressure.full.enabled) { + update_mem_full_pressure_chart(cg); + update_mem_full_pressure_stall_time_chart(cg); } - - update_pressure_charts(pcs); } - res = &cg->io_pressure; - - if (likely(res->updated && res->some.enabled)) { - struct pressure_charts *pcs; - pcs = &res->some; - - if (unlikely(!pcs->share_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "I/O some pressure"); - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "io_some_pressure" - , NULL - , "disk" - , k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure" : "cgroup.io_some_pressure" - , title - , "percentage" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2400 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + if (likely(cg->irq_pressure.updated)) { + if (cg->irq_pressure.some.enabled) { + update_irq_some_pressure_chart(cg); + update_irq_some_pressure_stall_time_chart(cg); } - - if (unlikely(!pcs->total_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "I/O some pressure stall time"); - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "io_some_pressure_stall_time" - , NULL - , "disk" - , k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure_stall_time" : "cgroup.io_some_pressure_stall_time" - , title - , "ms" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2420 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + if (cg->irq_pressure.full.enabled) { + update_irq_full_pressure_chart(cg); + update_irq_full_pressure_stall_time_chart(cg); } - - update_pressure_charts(pcs); } - if (likely(res->updated && res->full.enabled)) { - struct pressure_charts *pcs; - pcs = &res->full; - - if (unlikely(!pcs->share_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "I/O full pressure"); - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "io_full_pressure" - , NULL - , "disk" - , k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure" : "cgroup.io_full_pressure" - , title - , "percentage" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2440 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + if (likely(cg->io_pressure.updated)) { + if (cg->io_pressure.some.enabled) { + update_io_some_pressure_chart(cg); + update_io_some_pressure_stall_time_chart(cg); } - - if (unlikely(!pcs->total_time.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "I/O full pressure stall time"); - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(type, cg) - , "io_full_pressure_stall_time" - , NULL - , "disk" - , k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure_stall_time" : "cgroup.io_full_pressure_stall_time" - , title - , "ms" - , PLUGIN_CGROUPS_NAME - , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2460 - , update_every - , RRDSET_TYPE_LINE - ); - rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + if (cg->io_pressure.full.enabled) { + update_io_full_pressure_chart(cg); + update_io_full_pressure_stall_time_chart(cg); } - - update_pressure_charts(pcs); } } - } - - if(likely(cgroup_enable_systemd_services)) - update_systemd_services_charts(update_every, services_do_cpu, services_do_mem_usage, services_do_mem_detailed - , services_do_mem_failcnt, services_do_swap_usage, services_do_io - , services_do_io_ops, services_do_throttle_io, services_do_throttle_ops - , services_do_queued_ops, services_do_merged_ops - ); - netdata_log_debug(D_CGROUP, "done updating cgroups charts"); + cg->function_ready = true; + } } // ---------------------------------------------------------------------------- @@ -4587,6 +1608,22 @@ static void cgroup_main_cleanup(void *ptr) { static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; } +void cgroup_read_host_total_ram() { + procfile *ff = NULL; + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/meminfo"); + + ff = procfile_open( + config_get("plugin:cgroups", "meminfo filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + + if (likely((ff = procfile_readall(ff)) && procfile_lines(ff) && !strncmp(procfile_word(ff, 0), "MemTotal", 8))) + host_ram_total = str2ull(procfile_word(ff, 1), NULL) * 1024; + else + collector_error("Cannot read file %s. Will not create RAM limit charts.", filename); + + procfile_close(ff); +} + void *cgroups_main(void *ptr) { worker_register("CGROUPS"); worker_register_job_name(WORKER_CGROUPS_LOCK, "lock"); @@ -4601,6 +1638,9 @@ void *cgroups_main(void *ptr) { } read_cgroup_plugin_configuration(); + + cgroup_read_host_total_ram(); + netdata_cgroup_ebpf_initialize_shm(); if (uv_mutex_init(&cgroup_root_mutex)) { @@ -4624,7 +1664,15 @@ void *cgroups_main(void *ptr) { collector_error("CGROUP: cannot create thread worker. uv_thread_create(): %s", uv_strerror(error)); goto exit; } - uv_thread_set_name_np(discovery_thread.thread, "PLUGIN[cgroups]"); + + uv_thread_set_name_np(discovery_thread.thread, "P[cgroups]"); + + // we register this only on localhost + // for the other nodes, the origin server should register it + rrd_collector_started(); // this creates a collector that runs for as long as netdata runs + cgroup_netdev_link_init(); + rrd_function_add(localhost, NULL, "containers-vms", 10, RRDFUNCTIONS_CGTOP_HELP, true, cgroup_function_cgroup_top, NULL); + rrd_function_add(localhost, NULL, "systemd-services", 10, RRDFUNCTIONS_CGTOP_HELP, true, cgroup_function_systemd_top, NULL); heartbeat_t hb; heartbeat_init(&hb); @@ -4632,11 +1680,13 @@ void *cgroups_main(void *ptr) { usec_t find_every = cgroup_check_for_new_every * USEC_PER_SEC, find_dt = 0; netdata_thread_disable_cancelability(); + while(service_running(SERVICE_COLLECTORS)) { worker_is_idle(); usec_t hb_dt = heartbeat_next(&hb, step); - if(unlikely(!service_running(SERVICE_COLLECTORS))) break; + if (unlikely(!service_running(SERVICE_COLLECTORS))) + break; find_dt += hb_dt; if (unlikely(find_dt >= find_every || (!is_inside_k8s && cgroups_check))) { @@ -4652,12 +1702,18 @@ void *cgroups_main(void *ptr) { worker_is_busy(WORKER_CGROUPS_READ); read_all_discovered_cgroups(cgroup_root); + if (unlikely(!service_running(SERVICE_COLLECTORS))) { uv_mutex_unlock(&cgroup_root_mutex); break; } + worker_is_busy(WORKER_CGROUPS_CHART); - update_cgroup_charts(cgroup_update_every); + + update_cgroup_charts(); + if (cgroup_enable_systemd_services) + update_cgroup_systemd_services_charts(); + if (unlikely(!service_running(SERVICE_COLLECTORS))) { uv_mutex_unlock(&cgroup_root_mutex); break; diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.h b/collectors/cgroups.plugin/sys_fs_cgroup.h index 625be755d..e8cfcf5f6 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.h +++ b/collectors/cgroups.plugin/sys_fs_cgroup.h @@ -5,6 +5,10 @@ #include "daemon/common.h" +#define PLUGIN_CGROUPS_NAME "cgroups.plugin" +#define PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME "systemd" +#define PLUGIN_CGROUPS_MODULE_CGROUPS_NAME "/sys/fs/cgroup" + #define CGROUP_OPTIONS_DISABLED_DUPLICATE 0x00000001 #define CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE 0x00000002 #define CGROUP_OPTIONS_IS_UNIFIED 0x00000004 diff --git a/collectors/cgroups.plugin/tests/test_doubles.c b/collectors/cgroups.plugin/tests/test_doubles.c index 498f649f5..b13d4b19c 100644 --- a/collectors/cgroups.plugin/tests/test_doubles.c +++ b/collectors/cgroups.plugin/tests/test_doubles.c @@ -2,12 +2,12 @@ #include "test_cgroups_plugin.h" -void rrdset_is_obsolete(RRDSET *st) +void rrdset_is_obsolete___safe_from_collector_thread(RRDSET *st) { UNUSED(st); } -void rrdset_isnot_obsolete(RRDSET *st) +void rrdset_isnot_obsolete___safe_from_collector_thread(RRDSET *st) { UNUSED(st); } diff --git a/collectors/charts.d.plugin/Makefile.am b/collectors/charts.d.plugin/Makefile.am index 03c7f0a94..f82992fd4 100644 --- a/collectors/charts.d.plugin/Makefile.am +++ b/collectors/charts.d.plugin/Makefile.am @@ -45,6 +45,5 @@ include ap/Makefile.inc include apcupsd/Makefile.inc include example/Makefile.inc include libreswan/Makefile.inc -include nut/Makefile.inc include opensips/Makefile.inc include sensors/Makefile.inc diff --git a/collectors/charts.d.plugin/ap/integrations/access_points.md b/collectors/charts.d.plugin/ap/integrations/access_points.md index 0d8d39046..a0de2c4df 100644 --- a/collectors/charts.d.plugin/ap/integrations/access_points.md +++ b/collectors/charts.d.plugin/ap/integrations/access_points.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/charts.d.p sidebar_label: "Access Points" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Network" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -117,9 +118,9 @@ The following collapsed table contains all the options that can be configured fo | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| ap_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | False | -| ap_priority | Controls the order of charts at the netdata dashboard. | 6900 | False | -| ap_retries | The number of retries to do in case of failure before disabling the collector. | 10 | False | +| ap_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | no | +| ap_priority | Controls the order of charts at the netdata dashboard. | 6900 | no | +| ap_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no | </details> diff --git a/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh b/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh index ef9a90596..da9cd19c3 100644 --- a/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh +++ b/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh @@ -50,7 +50,7 @@ apcupsd_check() { local host working=0 failed=0 for host in "${!apcupsd_sources[@]}"; do - apcupsd_get "${apcupsd_sources[${host}]}" > /dev/null + apcupsd_get "${apcupsd_sources[${host}]}" >/dev/null # shellcheck disable=2181 if [ $? -ne 0 ]; then error "cannot get information for apcupsd server ${host} on ${apcupsd_sources[${host}]}." @@ -77,7 +77,7 @@ apcupsd_create() { local host for host in "${!apcupsd_sources[@]}"; do # create the charts - cat << EOF + cat <<EOF CHART apcupsd_${host}.charge '' "UPS Charge" "percentage" ups apcupsd.charge area $((apcupsd_priority + 2)) $apcupsd_update_every '' '' 'apcupsd' DIMENSION battery_charge charge absolute 1 100 @@ -110,7 +110,28 @@ CHART apcupsd_${host}.time '' "UPS Time Remaining" "Minutes" ups apcupsd.time ar DIMENSION time time absolute 1 100 CHART apcupsd_${host}.online '' "UPS ONLINE flag" "boolean" ups apcupsd.online line $((apcupsd_priority + 9)) $apcupsd_update_every '' '' 'apcupsd' -DIMENSION online online absolute 0 1 +DIMENSION online online absolute 1 1 + +CHART apcupsd_${host}.selftest '' "UPS Self-Test status" "status" ups apcupsd.selftest line $((apcupsd_priority + 10)) $apcupsd_update_every '' '' 'apcupsd' +DIMENSION selftest_OK 'OK' absolute 1 1 +DIMENSION selftest_NO 'NO' absolute 1 1 +DIMENSION selftest_BT 'BT' absolute 1 1 +DIMENSION selftest_NG 'NG' absolute 1 1 + +CHART apcupsd_${host}.status '' "UPS Status" "status" ups apcupsd.status line $((apcupsd_priority + 11)) $apcupsd_update_every '' '' 'apcupsd' +DIMENSION status_ONLINE 'ONLINE' absolute 1 1 +DIMENSION status_ONBATT 'ONBATT' absolute 1 1 +DIMENSION status_OVERLOAD 'OVERLOAD' absolute 1 1 +DIMENSION status_LOWBATT 'LOWBATT' absolute 1 1 +DIMENSION status_REPLACEBATT 'REPLACEBATT' absolute 1 1 +DIMENSION status_NOBATT 'NOBATT' absolute 1 1 +DIMENSION status_SLAVE 'SLAVE' absolute 1 1 +DIMENSION status_SLAVEDOWN 'SLAVEDOWN' absolute 1 1 +DIMENSION status_COMMLOST 'COMMLOST' absolute 1 1 +DIMENSION status_CAL 'CAL' absolute 1 1 +DIMENSION status_TRIM 'TRIM' absolute 1 1 +DIMENSION status_BOOST 'BOOST' absolute 1 1 +DIMENSION status_SHUTTING_DOWN 'SHUTTING_DOWN' absolute 1 1 EOF done @@ -130,20 +151,38 @@ apcupsd_update() { apcupsd_get "${apcupsd_sources[${host}]}" | awk " BEGIN { - battery_charge = 0; - battery_voltage = 0; - battery_voltage_nominal = 0; - input_voltage = 0; - input_voltage_min = 0; - input_voltage_max = 0; - input_frequency = 0; - output_voltage = 0; - output_voltage_nominal = 0; - load = 0; - temp = 0; - time = 0; - nompower = 0; - load_usage = 0; + battery_charge = 0; + battery_voltage = 0; + battery_voltage_nominal = 0; + input_voltage = 0; + input_voltage_min = 0; + input_voltage_max = 0; + input_frequency = 0; + output_voltage = 0; + output_voltage_nominal = 0; + load = 0; + temp = 0; + time = 0; + nompower = 0; + load_usage = 0; + selftest_OK = 0; + selftest_NO = 0; + selftest_BT = 0; + selftest_NG = 0; + status_ONLINE = 0; + status_CAL = 0; + status_TRIM = 0; + status_BOOST = 0; + status_ONBATT = 0; + status_OVERLOAD = 0; + status_LOWBATT = 0; + status_REPLACEBATT = 0; + status_NOBATT = 0; + status_SLAVE = 0; + status_SLAVEDOWN = 0; + status_COMMLOST = 0; + status_SHUTTING_DOWN = 0; + } /^BCHARGE.*/ { battery_charge = \$3 * 100 }; /^BATTV.*/ { battery_voltage = \$3 * 100 }; @@ -158,55 +197,98 @@ BEGIN { /^ITEMP.*/ { temp = \$3 * 100 }; /^NOMPOWER.*/ { nompower = \$3 }; /^TIMELEFT.*/ { time = \$3 * 100 }; -/^STATUS.*/ { online=(\$3 != \"COMMLOST\" && !(\$3 == \"SHUTTING\" && \$4 == \"DOWN\"))?1:0 }; +/^STATUS.*/ { online=(\$3 != \"COMMLOST\" && !(\$3 == \"SHUTTING\" && \$4 == \"DOWN\"))?1:0; }; +/^SELFTEST.*/ { selftest_OK = (\$3 == \"OK\") ? 1 : 0; + selftest_NO = (\$3 == \"NO\") ? 1 : 0; + selftest_BT = (\$3 == \"BT\") ? 1 : 0; + selftest_NG = (\$3 == \"NG\") ? 1 : 0; + }; +/^STATUS.*/ { status_ONLINE = (\$3 == \"ONLINE\") ? 1 : 0; + status_CAL = (\$3 == \"CAL\") ? 1 : 0; + status_TRIM = (\$3 == \"TRIM\") ? 1 : 0; + status_BOOST = (\$3 == \"BOOST\") ? 1 : 0; + status_ONBATT = (\$3 == \"ONBATT\") ? 1 : 0; + status_OVERLOAD = (\$3 == \"OVERLOAD\") ? 1 : 0; + status_LOWBATT = (\$3 == \"LOWBATT\") ? 1 : 0; + status_REPLACEBATT = (\$3 == \"REPLACEBATT\") ? 1 : 0; + status_NOBATT = (\$3 == \"NOBATT\") ? 1 : 0; + status_SLAVE = (\$3 == \"SLAVE\") ? 1 : 0; + status_SLAVEDOWN = (\$3 == \"SLAVEDOWN\") ? 1 : 0; + status_COMMLOST = (\$3 == \"COMMLOST\") ? 1 : 0; + status_SHUTTING_DOWN = (\$3 == \"SHUTTING\" && \$4 == \"DOWN\") ? 1 : 0; + }; + END { - { load_usage = nompower * load / 100 }; - - print \"BEGIN apcupsd_${host}.online $1\"; - print \"SET online = \" online; - print \"END\" - - if (online == 1) { - print \"BEGIN apcupsd_${host}.charge $1\"; - print \"SET battery_charge = \" battery_charge; - print \"END\" - - print \"BEGIN apcupsd_${host}.battery_voltage $1\"; - print \"SET battery_voltage = \" battery_voltage; - print \"SET battery_voltage_nominal = \" battery_voltage_nominal; - print \"END\" - - print \"BEGIN apcupsd_${host}.input_voltage $1\"; - print \"SET input_voltage = \" input_voltage; - print \"SET input_voltage_min = \" input_voltage_min; - print \"SET input_voltage_max = \" input_voltage_max; - print \"END\" - - print \"BEGIN apcupsd_${host}.input_frequency $1\"; - print \"SET input_frequency = \" input_frequency; - print \"END\" - - print \"BEGIN apcupsd_${host}.output_voltage $1\"; - print \"SET output_voltage = \" output_voltage; - print \"SET output_voltage_nominal = \" output_voltage_nominal; - print \"END\" - - print \"BEGIN apcupsd_${host}.load $1\"; - print \"SET load = \" load; - print \"END\" - - print \"BEGIN apcupsd_${host}.load_usage $1\"; - print \"SET load_usage = \" load_usage; - print \"END\" - - print \"BEGIN apcupsd_${host}.temp $1\"; - print \"SET temp = \" temp; - print \"END\" - - print \"BEGIN apcupsd_${host}.time $1\"; - print \"SET time = \" time; - print \"END\" - } + { load_usage = nompower * load / 100 }; + + print \"BEGIN apcupsd_${host}.online $1\"; + print \"SET online = \" online; + print \"END\" + + if (online == 1) { + print \"BEGIN apcupsd_${host}.charge $1\"; + print \"SET battery_charge = \" battery_charge; + print \"END\" + + print \"BEGIN apcupsd_${host}.battery_voltage $1\"; + print \"SET battery_voltage = \" battery_voltage; + print \"SET battery_voltage_nominal = \" battery_voltage_nominal; + print \"END\" + + print \"BEGIN apcupsd_${host}.input_voltage $1\"; + print \"SET input_voltage = \" input_voltage; + print \"SET input_voltage_min = \" input_voltage_min; + print \"SET input_voltage_max = \" input_voltage_max; + print \"END\" + + print \"BEGIN apcupsd_${host}.input_frequency $1\"; + print \"SET input_frequency = \" input_frequency; + print \"END\" + + print \"BEGIN apcupsd_${host}.output_voltage $1\"; + print \"SET output_voltage = \" output_voltage; + print \"SET output_voltage_nominal = \" output_voltage_nominal; + print \"END\" + + print \"BEGIN apcupsd_${host}.load $1\"; + print \"SET load = \" load; + print \"END\" + + print \"BEGIN apcupsd_${host}.load_usage $1\"; + print \"SET load_usage = \" load_usage; + print \"END\" + + print \"BEGIN apcupsd_${host}.temp $1\"; + print \"SET temp = \" temp; + print \"END\" + + print \"BEGIN apcupsd_${host}.time $1\"; + print \"SET time = \" time; + print \"END\" + + print \"BEGIN apcupsd_${host}.selftest $1\"; + print \"SET selftest_OK = \" selftest_OK; + print \"SET selftest_NO = \" selftest_NO; + print \"SET selftest_BT = \" selftest_BT; + print \"SET selftest_NG = \" selftest_NG; + print \"END\" + + print \"BEGIN apcupsd_${host}.status $1\"; + print \"SET status_ONLINE = \" status_ONLINE; + print \"SET status_ONBATT = \" status_ONBATT; + print \"SET status_OVERLOAD = \" status_OVERLOAD; + print \"SET status_LOWBATT = \" status_LOWBATT; + print \"SET status_REPLACEBATT = \" status_REPLACEBATT; + print \"SET status_NOBATT = \" status_NOBATT; + print \"SET status_SLAVE = \" status_SLAVE; + print \"SET status_SLAVEDOWN = \" status_SLAVEDOWN; + print \"SET status_COMMLOST = \" status_COMMLOST; + print \"SET status_CAL = \" status_CAL; + print \"SET status_TRIM = \" status_TRIM; + print \"SET status_BOOST = \" status_BOOST; + print \"SET status_SHUTTING_DOWN = \" status_SHUTTING_DOWN; + print \"END\"; + } }" # shellcheck disable=SC2181 if [ $? -ne 0 ]; then diff --git a/collectors/charts.d.plugin/apcupsd/integrations/apc_ups.md b/collectors/charts.d.plugin/apcupsd/integrations/apc_ups.md index 4d1f2edd6..a5c1f9613 100644 --- a/collectors/charts.d.plugin/apcupsd/integrations/apc_ups.md +++ b/collectors/charts.d.plugin/apcupsd/integrations/apc_ups.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/charts.d.p sidebar_label: "APC UPS" learn_status: "Published" learn_rel_path: "Data Collection/UPS" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -72,6 +73,8 @@ Metrics: | apcupsd.temperature | temp | Celsius | | apcupsd.time | time | Minutes | | apcupsd.online | online | boolean | +| apcupsd.selftest | OK, NO, BT, NG | status | +| apcupsd.status | ONLINE, ONBATT, OVERLOAD, LOWBATT, REPLACEBATT, NOBATT, SLAVE, SLAVEDOWN, COMMLOST, CAL, TRIM, BOOST, SHUTTING_DOWN | status | @@ -85,6 +88,13 @@ The following alerts are available: | [ apcupsd_ups_charge ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.charge | average UPS charge over the last minute | | [ apcupsd_10min_ups_load ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.load | average UPS load over the last 10 minutes | | [ apcupsd_last_collected_secs ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.load | number of seconds since the last successful data collection | +| [ apcupsd_selftest_warning ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.selftest | self-test failed due to insufficient battery capacity or due to overload. | +| [ apcupsd_status_onbatt ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.status | APC UPS has switched to battery power because the input power has failed | +| [ apcupsd_status_overload ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.status | APC UPS is overloaded and cannot supply enough power to the load | +| [ apcupsd_status_lowbatt ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.status | APC UPS battery is low and needs to be recharged | +| [ apcupsd_status_replacebatt ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.status | APC UPS battery has reached the end of its lifespan and needs to be replaced | +| [ apcupsd_status_nobatt ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.status | APC UPS has no battery | +| [ apcupsd_status_commlost ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.status | APC UPS communication link is lost | ## Setup @@ -126,11 +136,11 @@ The following collapsed table contains all the options that can be configured fo | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| apcupsd_sources | This is an array of apcupsd sources. You can have multiple entries there. Please refer to the example below on how to set it. | 127.0.0.1:3551 | False | -| apcupsd_timeout | How long to wait for apcupsd to respond. | 3 | False | -| apcupsd_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | False | -| apcupsd_priority | The charts priority on the dashboard. | 90000 | False | -| apcupsd_retries | The number of retries to do in case of failure before disabling the collector. | 10 | False | +| apcupsd_sources | This is an array of apcupsd sources. You can have multiple entries there. Please refer to the example below on how to set it. | 127.0.0.1:3551 | no | +| apcupsd_timeout | How long to wait for apcupsd to respond. | 3 | no | +| apcupsd_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | no | +| apcupsd_priority | The charts priority on the dashboard. | 90000 | no | +| apcupsd_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no | </details> diff --git a/collectors/charts.d.plugin/apcupsd/metadata.yaml b/collectors/charts.d.plugin/apcupsd/metadata.yaml index 07d56d48d..c333dc964 100644 --- a/collectors/charts.d.plugin/apcupsd/metadata.yaml +++ b/collectors/charts.d.plugin/apcupsd/metadata.yaml @@ -124,6 +124,34 @@ modules: link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf metric: apcupsd.load info: number of seconds since the last successful data collection + - name: apcupsd_selftest_warning + link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf + metric: apcupsd.selftest + info: self-test failed due to insufficient battery capacity or due to overload. + - name: apcupsd_status_onbatt + link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf + metric: apcupsd.status + info: APC UPS has switched to battery power because the input power has failed + - name: apcupsd_status_overload + link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf + metric: apcupsd.status + info: APC UPS is overloaded and cannot supply enough power to the load + - name: apcupsd_status_lowbatt + link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf + metric: apcupsd.status + info: APC UPS battery is low and needs to be recharged + - name: apcupsd_status_replacebatt + link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf + metric: apcupsd.status + info: APC UPS battery has reached the end of its lifespan and needs to be replaced + - name: apcupsd_status_nobatt + link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf + metric: apcupsd.status + info: APC UPS has no battery + - name: apcupsd_status_commlost + link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf + metric: apcupsd.status + info: APC UPS communication link is lost metrics: folding: title: Metrics @@ -199,3 +227,30 @@ modules: chart_type: line dimensions: - name: online + - name: apcupsd.selftest + description: UPS Self-Test status + unit: status + chart_type: line + dimensions: + - name: OK + - name: NO + - name: BT + - name: NG + - name: apcupsd.status + description: UPS Status + unit: status + chart_type: line + dimensions: + - name: ONLINE + - name: ONBATT + - name: OVERLOAD + - name: LOWBATT + - name: REPLACEBATT + - name: NOBATT + - name: SLAVE + - name: SLAVEDOWN + - name: COMMLOST + - name: CAL + - name: TRIM + - name: BOOST + - name: SHUTTING_DOWN diff --git a/collectors/charts.d.plugin/charts.d.conf b/collectors/charts.d.plugin/charts.d.conf index 2d32f73ea..4614f259e 100644 --- a/collectors/charts.d.plugin/charts.d.conf +++ b/collectors/charts.d.plugin/charts.d.conf @@ -36,7 +36,6 @@ # ap=yes # apcupsd=yes # libreswan=yes -# nut=yes # opensips=yes # ----------------------------------------------------------------------------- diff --git a/collectors/charts.d.plugin/charts.d.plugin.in b/collectors/charts.d.plugin/charts.d.plugin.in index 34a5a656e..4e64b7e23 100755 --- a/collectors/charts.d.plugin/charts.d.plugin.in +++ b/collectors/charts.d.plugin/charts.d.plugin.in @@ -13,27 +13,115 @@ # each will have a different config file and modules configuration directory. # -export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin" +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin:@sbindir_POST@" PROGRAM_FILE="$0" -PROGRAM_NAME="$(basename $0)" -PROGRAM_NAME="${PROGRAM_NAME/.plugin/}" MODULE_NAME="main" -LOG_LEVEL_ERR=1 -LOG_LEVEL_WARN=2 -LOG_LEVEL_INFO=3 -LOG_LEVEL="$LOG_LEVEL_INFO" - -set_log_severity_level() { - case ${NETDATA_LOG_SEVERITY_LEVEL,,} in - "info") LOG_LEVEL="$LOG_LEVEL_INFO";; - "warn" | "warning") LOG_LEVEL="$LOG_LEVEL_WARN";; - "err" | "error") LOG_LEVEL="$LOG_LEVEL_ERR";; +# ----------------------------------------------------------------------------- +# logging + +PROGRAM_NAME="$(basename "${0}")" +SHORT_PROGRAM_NAME="${PROGRAM_NAME/.plugin/}" + +# these should be the same with syslog() priorities +NDLP_EMERG=0 # system is unusable +NDLP_ALERT=1 # action must be taken immediately +NDLP_CRIT=2 # critical conditions +NDLP_ERR=3 # error conditions +NDLP_WARN=4 # warning conditions +NDLP_NOTICE=5 # normal but significant condition +NDLP_INFO=6 # informational +NDLP_DEBUG=7 # debug-level messages + +# the max (numerically) log level we will log +LOG_LEVEL=$NDLP_INFO + +set_log_min_priority() { + case "${NETDATA_LOG_LEVEL,,}" in + "emerg" | "emergency") + LOG_LEVEL=$NDLP_EMERG + ;; + + "alert") + LOG_LEVEL=$NDLP_ALERT + ;; + + "crit" | "critical") + LOG_LEVEL=$NDLP_CRIT + ;; + + "err" | "error") + LOG_LEVEL=$NDLP_ERR + ;; + + "warn" | "warning") + LOG_LEVEL=$NDLP_WARN + ;; + + "notice") + LOG_LEVEL=$NDLP_NOTICE + ;; + + "info") + LOG_LEVEL=$NDLP_INFO + ;; + + "debug") + LOG_LEVEL=$NDLP_DEBUG + ;; esac } -set_log_severity_level +set_log_min_priority + +log() { + local level="${1}" + shift 1 + + [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return + + systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <<EOFLOG +INVOCATION_ID=${NETDATA_INVOCATION_ID} +SYSLOG_IDENTIFIER=${PROGRAM_NAME} +PRIORITY=${level} +THREAD_TAG=charts.d.plugin +ND_LOG_SOURCE=collector +MESSAGE=${MODULE_NAME}: ${*//\\n/--NEWLINE--} + +EOFLOG + # AN EMPTY LINE IS NEEDED ABOVE +} + +info() { + log "$NDLP_INFO" "${@}" +} + +warning() { + log "$NDLP_WARN" "${@}" +} + +error() { + log "$NDLP_ERR" "${@}" +} + +fatal() { + log "$NDLP_ALERT" "${@}" + echo "DISABLE" + exit 1 +} + +debug() { + [ "$debug" = "1" ] && log "$NDLP_DEBUG" "${@}" +} + +# ----------------------------------------------------------------------------- +# check for BASH v4+ (required for associative arrays) + +if [ ${BASH_VERSINFO[0]} -lt 4 ]; then + echo >&2 "BASH version 4 or later is required (this is ${BASH_VERSION})." + exit 1 +fi # ----------------------------------------------------------------------------- # create temp dir @@ -62,39 +150,6 @@ logdate() { date "+%Y-%m-%d %H:%M:%S" } -log() { - local status="${1}" - shift - - echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${MODULE_NAME}: ${*}" - -} - -info() { - [[ -n "$LOG_LEVEL" && "$LOG_LEVEL_INFO" -gt "$LOG_LEVEL" ]] && return - log INFO "${@}" -} - -warning() { - [[ -n "$LOG_LEVEL" && "$LOG_LEVEL_WARN" -gt "$LOG_LEVEL" ]] && return - log WARNING "${@}" -} - -error() { - [[ -n "$LOG_LEVEL" && "$LOG_LEVEL_ERR" -gt "$LOG_LEVEL" ]] && return - log ERROR "${@}" -} - -fatal() { - log FATAL "${@}" - echo "DISABLE" - exit 1 -} - -debug() { - [ $debug -eq 1 ] && log DEBUG "${@}" -} - # ----------------------------------------------------------------------------- # check a few commands @@ -137,8 +192,8 @@ info "started from '$PROGRAM_FILE' with options: $*" [ -z "${NETDATA_STOCK_CONFIG_DIR}" ] && NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@" pluginsd="${NETDATA_PLUGINS_DIR}" -stockconfd="${NETDATA_STOCK_CONFIG_DIR}/${PROGRAM_NAME}" -userconfd="${NETDATA_USER_CONFIG_DIR}/${PROGRAM_NAME}" +stockconfd="${NETDATA_STOCK_CONFIG_DIR}/${SHORT_PROGRAM_NAME}" +userconfd="${NETDATA_USER_CONFIG_DIR}/${SHORT_PROGRAM_NAME}" olduserconfd="${NETDATA_USER_CONFIG_DIR}" chartsd="$pluginsd/../charts.d" @@ -194,12 +249,14 @@ while [ ! -z "$1" ]; do if [ "$1" = "debug" -o "$1" = "all" ]; then debug=1 + LOG_LEVEL=$NDLP_DEBUG shift continue fi if [ -f "$chartsd/$1.chart.sh" ]; then debug=1 + LOG_LEVEL=$NDLP_DEBUG chart_only="$(echo $1.chart.sh | sed "s/\.chart\.sh$//g")" shift continue @@ -207,6 +264,7 @@ while [ ! -z "$1" ]; do if [ -f "$chartsd/$1" ]; then debug=1 + LOG_LEVEL=$NDLP_DEBUG chart_only="$(echo $1 | sed "s/\.chart\.sh$//g")" shift continue @@ -247,7 +305,7 @@ source "$pluginsd/loopsleepms.sh.inc" # ----------------------------------------------------------------------------- # load my configuration -for myconfig in "${NETDATA_STOCK_CONFIG_DIR}/${PROGRAM_NAME}.conf" "${NETDATA_USER_CONFIG_DIR}/${PROGRAM_NAME}.conf"; do +for myconfig in "${NETDATA_STOCK_CONFIG_DIR}/${SHORT_PROGRAM_NAME}.conf" "${NETDATA_USER_CONFIG_DIR}/${SHORT_PROGRAM_NAME}.conf"; do if [ -f "$myconfig" ]; then source "$myconfig" if [ $? -ne 0 ]; then diff --git a/collectors/charts.d.plugin/libreswan/integrations/libreswan.md b/collectors/charts.d.plugin/libreswan/integrations/libreswan.md index 6f93a5f4c..bd1eec647 100644 --- a/collectors/charts.d.plugin/libreswan/integrations/libreswan.md +++ b/collectors/charts.d.plugin/libreswan/integrations/libreswan.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/charts.d.p sidebar_label: "Libreswan" learn_status: "Published" learn_rel_path: "Data Collection/VPNs" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -132,10 +133,10 @@ The following collapsed table contains all the options that can be configured fo | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| libreswan_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | False | -| libreswan_priority | The charts priority on the dashboard | 90000 | False | -| libreswan_retries | The number of retries to do in case of failure before disabling the collector. | 10 | False | -| libreswan_sudo | Whether to run `ipsec` with `sudo` or not. | 1 | False | +| libreswan_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | no | +| libreswan_priority | The charts priority on the dashboard | 90000 | no | +| libreswan_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no | +| libreswan_sudo | Whether to run `ipsec` with `sudo` or not. | 1 | no | </details> diff --git a/collectors/charts.d.plugin/nut/Makefile.inc b/collectors/charts.d.plugin/nut/Makefile.inc deleted file mode 100644 index 4fb47145d..000000000 --- a/collectors/charts.d.plugin/nut/Makefile.inc +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -# THIS IS NOT A COMPLETE Makefile -# IT IS INCLUDED BY ITS PARENT'S Makefile.am -# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT - -# install these files -dist_charts_DATA += nut/nut.chart.sh -dist_chartsconfig_DATA += nut/nut.conf - -# do not install these files, but include them in the distribution -dist_noinst_DATA += nut/README.md nut/Makefile.inc - diff --git a/collectors/charts.d.plugin/nut/README.md b/collectors/charts.d.plugin/nut/README.md deleted file mode 120000 index abfefd6f7..000000000 --- a/collectors/charts.d.plugin/nut/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/network_ups_tools_nut.md
\ No newline at end of file diff --git a/collectors/charts.d.plugin/nut/integrations/network_ups_tools_nut.md b/collectors/charts.d.plugin/nut/integrations/network_ups_tools_nut.md deleted file mode 100644 index 74be607a1..000000000 --- a/collectors/charts.d.plugin/nut/integrations/network_ups_tools_nut.md +++ /dev/null @@ -1,207 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/charts.d.plugin/nut/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/charts.d.plugin/nut/metadata.yaml" -sidebar_label: "Network UPS Tools (NUT)" -learn_status: "Published" -learn_rel_path: "Data Collection/UPS" -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Network UPS Tools (NUT) - - -<img src="https://netdata.cloud/img/plug-circle-bolt.svg" width="150"/> - - -Plugin: charts.d.plugin -Module: nut - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Examine UPS/PDU metrics with Netdata for insights into power device performance. Improve your power device performance with comprehensive dashboards and anomaly detection. - -This collector uses the `nut` (Network UPS Tools) to query statistics for multiple UPS devices. - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -This integration doesn't support auto-detection. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per ups - -Metrics related to UPS. Each UPS provides its own set of the following metrics. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| nut.charge | charge | percentage | -| nut.runtime | runtime | seconds | -| nut.battery.voltage | voltage, high, low, nominal | Volts | -| nut.input.voltage | voltage, fault, nominal | Volts | -| nut.input.current | nominal | Ampere | -| nut.input.frequency | frequency, nominal | Hz | -| nut.output.voltage | voltage | Volts | -| nut.load | load | percentage | -| nut.load_usage | load_usage | Watts | -| nut.temperature | temp | temperature | -| nut.clients | clients | clients | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ nut_ups_charge ](https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf) | nut.charge | average UPS charge over the last minute | -| [ nut_10min_ups_load ](https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf) | nut.load | average UPS load over the last 10 minutes | -| [ nut_last_collected_secs ](https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf) | nut.load | number of seconds since the last successful data collection | - - -## Setup - -### Prerequisites - -#### Install charts.d plugin - -If [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed. - - -#### Required software - -Make sure the Network UPS Tools (`nut`) is installed and can detect your UPS devices. - - -### Configuration - -#### File - -The configuration file name for this integration is `charts.d/nut.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config charts.d/nut.conf -``` -#### Options - -The config file is sourced by the charts.d plugin. It's a standard bash file. - -The following collapsed table contains all the options that can be configured for the nut collector. - - -<details><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| nut_ups | A space separated list of UPS names. If empty, the list returned by `upsc -l` will be used. | | False | -| nut_names | Each line represents an alias for one UPS. If empty, the FQDN will be used. | | False | -| nut_timeout | How long to wait for nut to respond. | 2 | False | -| nut_clients_chart | Set this to 1 to enable another chart showing the number of UPS clients connected to `upsd`. | 1 | False | -| nut_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 2 | False | -| nut_priority | The charts priority on the dashboard | 90000 | False | -| nut_retries | The number of retries to do in case of failure before disabling the collector. | 10 | False | - -</details> - -#### Examples - -##### Provide names to UPS devices - -Map aliases to UPS devices - -<details><summary>Config</summary> - -```yaml -# a space separated list of UPS names -# if empty, the list returned by 'upsc -l' will be used -#nut_ups= - -# each line represents an alias for one UPS -# if empty, the FQDN will be used -nut_names["XXXXXX"]="UPS-office" -nut_names["YYYYYY"]="UPS-rack" - -# how much time in seconds, to wait for nut to respond -#nut_timeout=2 - -# set this to 1, to enable another chart showing the number -# of UPS clients connected to upsd -#nut_clients_chart=1 - -# the data collection frequency -# if unset, will inherit the netdata update frequency -#nut_update_every=2 - -# the charts priority on the dashboard -#nut_priority=90000 - -# the number of retries to do in case of failure -# before disabling the module -#nut_retries=10 - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `nut` collector, run the `charts.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `charts.d.plugin` to debug the collector: - - ```bash - ./charts.d.plugin debug 1 nut - ``` - - diff --git a/collectors/charts.d.plugin/nut/metadata.yaml b/collectors/charts.d.plugin/nut/metadata.yaml deleted file mode 100644 index ed3ffebf7..000000000 --- a/collectors/charts.d.plugin/nut/metadata.yaml +++ /dev/null @@ -1,222 +0,0 @@ -plugin_name: charts.d.plugin -modules: - - meta: - plugin_name: charts.d.plugin - module_name: nut - monitored_instance: - name: Network UPS Tools (NUT) - link: '' - categories: - - data-collection.ups - icon_filename: 'plug-circle-bolt.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: - - nut - - network ups tools - - ups - - pdu - most_popular: false - overview: - data_collection: - metrics_description: 'Examine UPS/PDU metrics with Netdata for insights into power device performance. Improve your power device performance with comprehensive dashboards and anomaly detection.' - method_description: 'This collector uses the `nut` (Network UPS Tools) to query statistics for multiple UPS devices.' - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: - - title: "Install charts.d plugin" - description: | - If [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed. - - title: "Required software" - description: "Make sure the Network UPS Tools (`nut`) is installed and can detect your UPS devices." - configuration: - file: - name: charts.d/nut.conf - options: - description: | - The config file is sourced by the charts.d plugin. It's a standard bash file. - - The following collapsed table contains all the options that can be configured for the nut collector. - folding: - title: "Config options" - enabled: true - list: - - name: nut_ups - description: A space separated list of UPS names. If empty, the list returned by `upsc -l` will be used. - default_value: "" - required: false - - name: nut_names - description: Each line represents an alias for one UPS. If empty, the FQDN will be used. - default_value: "" - required: false - - name: nut_timeout - description: How long to wait for nut to respond. - default_value: 2 - required: false - - name: nut_clients_chart - description: Set this to 1 to enable another chart showing the number of UPS clients connected to `upsd`. - default_value: 1 - required: false - - name: nut_update_every - description: The data collection frequency. If unset, will inherit the netdata update frequency. - default_value: 2 - required: false - - name: nut_priority - description: The charts priority on the dashboard - default_value: 90000 - required: false - - name: nut_retries - description: The number of retries to do in case of failure before disabling the collector. - default_value: 10 - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Provide names to UPS devices - description: Map aliases to UPS devices - config: | - # a space separated list of UPS names - # if empty, the list returned by 'upsc -l' will be used - #nut_ups= - - # each line represents an alias for one UPS - # if empty, the FQDN will be used - nut_names["XXXXXX"]="UPS-office" - nut_names["YYYYYY"]="UPS-rack" - - # how much time in seconds, to wait for nut to respond - #nut_timeout=2 - - # set this to 1, to enable another chart showing the number - # of UPS clients connected to upsd - #nut_clients_chart=1 - - # the data collection frequency - # if unset, will inherit the netdata update frequency - #nut_update_every=2 - - # the charts priority on the dashboard - #nut_priority=90000 - - # the number of retries to do in case of failure - # before disabling the module - #nut_retries=10 - troubleshooting: - problems: - list: [] - alerts: - - name: nut_ups_charge - link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf - metric: nut.charge - info: average UPS charge over the last minute - os: "*" - - name: nut_10min_ups_load - link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf - metric: nut.load - info: average UPS load over the last 10 minutes - os: "*" - - name: nut_last_collected_secs - link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf - metric: nut.load - info: number of seconds since the last successful data collection - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: ups - description: "Metrics related to UPS. Each UPS provides its own set of the following metrics." - labels: [] - metrics: - - name: nut.charge - description: UPS Charge - unit: "percentage" - chart_type: area - dimensions: - - name: charge - - name: nut.runtime - description: UPS Runtime - unit: "seconds" - chart_type: line - dimensions: - - name: runtime - - name: nut.battery.voltage - description: UPS Battery Voltage - unit: "Volts" - chart_type: line - dimensions: - - name: voltage - - name: high - - name: low - - name: nominal - - name: nut.input.voltage - description: UPS Input Voltage - unit: "Volts" - chart_type: line - dimensions: - - name: voltage - - name: fault - - name: nominal - - name: nut.input.current - description: UPS Input Current - unit: "Ampere" - chart_type: line - dimensions: - - name: nominal - - name: nut.input.frequency - description: UPS Input Frequency - unit: "Hz" - chart_type: line - dimensions: - - name: frequency - - name: nominal - - name: nut.output.voltage - description: UPS Output Voltage - unit: "Volts" - chart_type: line - dimensions: - - name: voltage - - name: nut.load - description: UPS Load - unit: "percentage" - chart_type: area - dimensions: - - name: load - - name: nut.load_usage - description: UPS Load Usage - unit: "Watts" - chart_type: area - dimensions: - - name: load_usage - - name: nut.temperature - description: UPS Temperature - unit: "temperature" - chart_type: line - dimensions: - - name: temp - - name: nut.clients - description: UPS Connected Clients - unit: "clients" - chart_type: area - dimensions: - - name: clients diff --git a/collectors/charts.d.plugin/nut/nut.chart.sh b/collectors/charts.d.plugin/nut/nut.chart.sh deleted file mode 100644 index 7c32b6dde..000000000 --- a/collectors/charts.d.plugin/nut/nut.chart.sh +++ /dev/null @@ -1,244 +0,0 @@ -# shellcheck shell=bash -# no need for shebang - this file is loaded from charts.d.plugin -# SPDX-License-Identifier: GPL-3.0-or-later - -# netdata -# real-time performance and health monitoring, done right! -# (C) 2016-2017 Costa Tsaousis <costa@tsaousis.gr> -# - -# a space separated list of UPS names -# if empty, the list returned by 'upsc -l' will be used -nut_ups= - -# how frequently to collect UPS data -nut_update_every=2 - -# how much time in seconds, to wait for nut to respond -nut_timeout=2 - -# set this to 1, to enable another chart showing the number -# of UPS clients connected to upsd -nut_clients_chart=0 - -# the priority of nut related to other charts -nut_priority=90000 - -declare -A nut_ids=() -declare -A nut_names=() - -nut_get_all() { - run -t $nut_timeout upsc -l -} - -nut_get() { - run -t $nut_timeout upsc "$1" - - if [ "${nut_clients_chart}" -eq "1" ]; then - printf "ups.connected_clients: " - run -t $nut_timeout upsc -c "$1" | wc -l - fi -} - -nut_check() { - - # this should return: - # - 0 to enable the chart - # - 1 to disable the chart - - local x - - require_cmd upsc || return 1 - - [ -z "$nut_ups" ] && nut_ups="$(nut_get_all)" - - for x in $nut_ups; do - nut_get "$x" > /dev/null - # shellcheck disable=SC2181 - if [ $? -eq 0 ]; then - if [ -n "${nut_names[${x}]}" ]; then - nut_ids[$x]="$(fixid "${nut_names[${x}]}")" - else - nut_ids[$x]="$(fixid "$x")" - fi - continue - fi - error "cannot get information for NUT UPS '$x'." - done - - if [ ${#nut_ids[@]} -eq 0 ]; then - # shellcheck disable=SC2154 - error "Cannot find UPSes - please set nut_ups='ups_name' in $confd/nut.conf" - return 1 - fi - - return 0 -} - -nut_create() { - # create the charts - local x - - for x in "${nut_ids[@]}"; do - cat << EOF -CHART nut_$x.charge '' "UPS Charge" "percentage" ups nut.charge area $((nut_priority + 2)) $nut_update_every -DIMENSION battery_charge charge absolute 1 100 - -CHART nut_$x.runtime '' "UPS Runtime" "seconds" ups nut.runtime area $((nut_priority + 3)) $nut_update_every -DIMENSION battery_runtime runtime absolute 1 100 - -CHART nut_$x.battery_voltage '' "UPS Battery Voltage" "Volts" ups nut.battery.voltage line $((nut_priority + 4)) $nut_update_every -DIMENSION battery_voltage voltage absolute 1 100 -DIMENSION battery_voltage_high high absolute 1 100 -DIMENSION battery_voltage_low low absolute 1 100 -DIMENSION battery_voltage_nominal nominal absolute 1 100 - -CHART nut_$x.input_voltage '' "UPS Input Voltage" "Volts" input nut.input.voltage line $((nut_priority + 5)) $nut_update_every -DIMENSION input_voltage voltage absolute 1 100 -DIMENSION input_voltage_fault fault absolute 1 100 -DIMENSION input_voltage_nominal nominal absolute 1 100 - -CHART nut_$x.input_current '' "UPS Input Current" "Ampere" input nut.input.current line $((nut_priority + 6)) $nut_update_every -DIMENSION input_current_nominal nominal absolute 1 100 - -CHART nut_$x.input_frequency '' "UPS Input Frequency" "Hz" input nut.input.frequency line $((nut_priority + 7)) $nut_update_every -DIMENSION input_frequency frequency absolute 1 100 -DIMENSION input_frequency_nominal nominal absolute 1 100 - -CHART nut_$x.output_voltage '' "UPS Output Voltage" "Volts" output nut.output.voltage line $((nut_priority + 8)) $nut_update_every -DIMENSION output_voltage voltage absolute 1 100 - -CHART nut_$x.load '' "UPS Load" "percentage" ups nut.load area $((nut_priority)) $nut_update_every -DIMENSION load load absolute 1 100 - -CHART nut_$x.load_usage '' "UPS Load Usage" "Watts" ups nut.load_usage area $((nut_priority + 1)) $nut_update_every -DIMENSION load_usage load_usage absolute 1 100 - -CHART nut_$x.temp '' "UPS Temperature" "temperature" ups nut.temperature line $((nut_priority + 9)) $nut_update_every -DIMENSION temp temp absolute 1 100 -EOF - - if [ "${nut_clients_chart}" = "1" ]; then - cat << EOF2 -CHART nut_$x.clients '' "UPS Connected Clients" "clients" ups nut.clients area $((nut_priority + 10)) $nut_update_every -DIMENSION clients '' absolute 1 1 -EOF2 - fi - - done - - return 0 -} - -nut_update() { - # the first argument to this function is the microseconds since last update - # pass this parameter to the BEGIN statement (see below). - - # do all the work to collect / calculate the values - # for each dimension - # remember: KEEP IT SIMPLE AND SHORT - - local i x - for i in "${!nut_ids[@]}"; do - x="${nut_ids[$i]}" - nut_get "$i" | awk " -BEGIN { - battery_charge = 0; - battery_runtime = 0; - battery_voltage = 0; - battery_voltage_high = 0; - battery_voltage_low = 0; - battery_voltage_nominal = 0; - input_voltage = 0; - input_voltage_fault = 0; - input_voltage_nominal = 0; - input_current_nominal = 0; - input_frequency = 0; - input_frequency_nominal = 0; - output_voltage = 0; - load = 0; - load_usage = 0; - nompower = 0; - temp = 0; - client = 0; - do_clients = ${nut_clients_chart}; -} -/^battery.charge: .*/ { battery_charge = \$2 * 100 }; -/^battery.runtime: .*/ { battery_runtime = \$2 * 100 }; -/^battery.voltage: .*/ { battery_voltage = \$2 * 100 }; -/^battery.voltage.high: .*/ { battery_voltage_high = \$2 * 100 }; -/^battery.voltage.low: .*/ { battery_voltage_low = \$2 * 100 }; -/^battery.voltage.nominal: .*/ { battery_voltage_nominal = \$2 * 100 }; -/^input.voltage: .*/ { input_voltage = \$2 * 100 }; -/^input.voltage.fault: .*/ { input_voltage_fault = \$2 * 100 }; -/^input.voltage.nominal: .*/ { input_voltage_nominal = \$2 * 100 }; -/^input.current.nominal: .*/ { input_current_nominal = \$2 * 100 }; -/^input.frequency: .*/ { input_frequency = \$2 * 100 }; -/^input.frequency.nominal: .*/ { input_frequency_nominal = \$2 * 100 }; -/^output.voltage: .*/ { output_voltage = \$2 * 100 }; -/^ups.load: .*/ { load = \$2 * 100 }; -/^ups.realpower.nominal: .*/ { nompower = \$2 }; -/^ups.temperature: .*/ { temp = \$2 * 100 }; -/^ups.connected_clients: .*/ { clients = \$2 }; -END { - { load_usage = nompower * load / 100 }; - - print \"BEGIN nut_$x.charge $1\"; - print \"SET battery_charge = \" battery_charge; - print \"END\" - - print \"BEGIN nut_$x.runtime $1\"; - print \"SET battery_runtime = \" battery_runtime; - print \"END\" - - print \"BEGIN nut_$x.battery_voltage $1\"; - print \"SET battery_voltage = \" battery_voltage; - print \"SET battery_voltage_high = \" battery_voltage_high; - print \"SET battery_voltage_low = \" battery_voltage_low; - print \"SET battery_voltage_nominal = \" battery_voltage_nominal; - print \"END\" - - print \"BEGIN nut_$x.input_voltage $1\"; - print \"SET input_voltage = \" input_voltage; - print \"SET input_voltage_fault = \" input_voltage_fault; - print \"SET input_voltage_nominal = \" input_voltage_nominal; - print \"END\" - - print \"BEGIN nut_$x.input_current $1\"; - print \"SET input_current_nominal = \" input_current_nominal; - print \"END\" - - print \"BEGIN nut_$x.input_frequency $1\"; - print \"SET input_frequency = \" input_frequency; - print \"SET input_frequency_nominal = \" input_frequency_nominal; - print \"END\" - - print \"BEGIN nut_$x.output_voltage $1\"; - print \"SET output_voltage = \" output_voltage; - print \"END\" - - print \"BEGIN nut_$x.load $1\"; - print \"SET load = \" load; - print \"END\" - - print \"BEGIN nut_$x.load_usage $1\"; - print \"SET load_usage = \" load_usage; - print \"END\" - - print \"BEGIN nut_$x.temp $1\"; - print \"SET temp = \" temp; - print \"END\" - - if(do_clients) { - print \"BEGIN nut_$x.clients $1\"; - print \"SET clients = \" clients; - print \"END\" - } -}" - # shellcheck disable=2181 - [ $? -ne 0 ] && unset "nut_ids[$i]" && error "failed to get values for '$i', disabling it." - done - - [ ${#nut_ids[@]} -eq 0 ] && error "no UPSes left active." && return 1 - return 0 -} diff --git a/collectors/charts.d.plugin/nut/nut.conf b/collectors/charts.d.plugin/nut/nut.conf deleted file mode 100644 index b95ad9048..000000000 --- a/collectors/charts.d.plugin/nut/nut.conf +++ /dev/null @@ -1,33 +0,0 @@ -# no need for shebang - this file is loaded from charts.d.plugin - -# netdata -# real-time performance and health monitoring, done right! -# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> -# GPL v3+ - -# a space separated list of UPS names -# if empty, the list returned by 'upsc -l' will be used -#nut_ups= - -# each line represents an alias for one UPS -# if empty, the FQDN will be used -#nut_names["FQDN1"]="alias" -#nut_names["FQDN2"]="alias" - -# how much time in seconds, to wait for nut to respond -#nut_timeout=2 - -# set this to 1, to enable another chart showing the number -# of UPS clients connected to upsd -#nut_clients_chart=1 - -# the data collection frequency -# if unset, will inherit the netdata update frequency -#nut_update_every=2 - -# the charts priority on the dashboard -#nut_priority=90000 - -# the number of retries to do in case of failure -# before disabling the module -#nut_retries=10 diff --git a/collectors/charts.d.plugin/opensips/integrations/opensips.md b/collectors/charts.d.plugin/opensips/integrations/opensips.md index 96abc3325..8c88dba0b 100644 --- a/collectors/charts.d.plugin/opensips/integrations/opensips.md +++ b/collectors/charts.d.plugin/opensips/integrations/opensips.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/charts.d.p sidebar_label: "OpenSIPS" learn_status: "Published" learn_rel_path: "Data Collection/Telephony Servers" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -128,12 +129,12 @@ The following collapsed table contains all the options that can be configured fo | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| opensips_opts | Specify parameters to the `opensipsctl` command. If the default value fails to get global status, set here whatever options are needed to connect to the opensips server. | fifo get_statistics all | False | -| opensips_cmd | If `opensipsctl` is not in $PATH, specify it's full path here. | | False | -| opensips_timeout | How long to wait for `opensipsctl` to respond. | 2 | False | -| opensips_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 5 | False | -| opensips_priority | The charts priority on the dashboard. | 80000 | False | -| opensips_retries | The number of retries to do in case of failure before disabling the collector. | 10 | False | +| opensips_opts | Specify parameters to the `opensipsctl` command. If the default value fails to get global status, set here whatever options are needed to connect to the opensips server. | fifo get_statistics all | no | +| opensips_cmd | If `opensipsctl` is not in $PATH, specify it's full path here. | | no | +| opensips_timeout | How long to wait for `opensipsctl` to respond. | 2 | no | +| opensips_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 5 | no | +| opensips_priority | The charts priority on the dashboard. | 80000 | no | +| opensips_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no | </details> diff --git a/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md b/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md index e0ce74d06..130352f61 100644 --- a/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md +++ b/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/charts.d.p sidebar_label: "Linux Sensors (sysfs)" learn_status: "Published" learn_rel_path: "Data Collection/Hardware Devices and Sensors" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -130,12 +131,12 @@ The following collapsed table contains all the options that can be configured fo | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| sensors_sys_dir | The directory the kernel exposes sensor data. | /sys/devices | False | -| sensors_sys_depth | How deep in the tree to check for sensor data. | 10 | False | -| sensors_source_update | If set to 1, the script will overwrite internal script functions with code generated ones. | 1 | False | -| sensors_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | False | -| sensors_priority | The charts priority on the dashboard. | 90000 | False | -| sensors_retries | The number of retries to do in case of failure before disabling the collector. | 10 | False | +| sensors_sys_dir | The directory the kernel exposes sensor data. | /sys/devices | no | +| sensors_sys_depth | How deep in the tree to check for sensor data. | 10 | no | +| sensors_source_update | If set to 1, the script will overwrite internal script functions with code generated ones. | 1 | no | +| sensors_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | no | +| sensors_priority | The charts priority on the dashboard. | 90000 | no | +| sensors_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no | </details> diff --git a/collectors/cups.plugin/cups_plugin.c b/collectors/cups.plugin/cups_plugin.c index 82bc457a1..8efd32e31 100644 --- a/collectors/cups.plugin/cups_plugin.c +++ b/collectors/cups.plugin/cups_plugin.c @@ -226,22 +226,8 @@ void reset_metrics() { } int main(int argc, char **argv) { - stderror = stderr; clocks_init(); - - // ------------------------------------------------------------------------ - // initialization of netdata plugin - - program_name = "cups.plugin"; - - // disable syslog - error_log_syslog = 0; - - // set errors flood protection to 100 logs per hour - error_log_errors_per_period = 100; - error_log_throttle_period = 3600; - - log_set_global_severity_for_external_plugins(); + nd_log_initialize_for_external_plugins("cups.plugin"); parse_command_line(argc, argv); @@ -437,6 +423,13 @@ int main(int argc, char **argv) { // restart check (14400 seconds) if (!now_monotonic_sec() - started_t > 14400) break; + + fprintf(stdout, "\n"); + fflush(stdout); + if (ferror(stdout) && errno == EPIPE) { + netdata_log_error("error writing to stdout: EPIPE. Exiting..."); + return 1; + } } httpClose(http); diff --git a/collectors/cups.plugin/integrations/cups.md b/collectors/cups.plugin/integrations/cups.md index aa981a99e..a8ea5b15f 100644 --- a/collectors/cups.plugin/integrations/cups.md +++ b/collectors/cups.plugin/integrations/cups.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cups.plugi sidebar_label: "CUPS" learn_status: "Published" learn_rel_path: "Data Collection/Hardware Devices and Sensors" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -129,8 +130,8 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 1 | False | -| command options | Additional parameters for the collector | | False | +| update every | Data collection frequency. | 1 | no | +| command options | Additional parameters for the collector | | no | </details> diff --git a/collectors/debugfs.plugin/debugfs_plugin.c b/collectors/debugfs.plugin/debugfs_plugin.c index 105b0a9e4..13012ec40 100644 --- a/collectors/debugfs.plugin/debugfs_plugin.c +++ b/collectors/debugfs.plugin/debugfs_plugin.c @@ -159,19 +159,11 @@ static void debugfs_parse_args(int argc, char **argv) int main(int argc, char **argv) { - // debug_flags = D_PROCFILE; - stderror = stderr; - - // set the name for logging - program_name = "debugfs.plugin"; - - // disable syslog for debugfs.plugin - error_log_syslog = 0; - - log_set_global_severity_for_external_plugins(); + clocks_init(); + nd_log_initialize_for_external_plugins("debugfs.plugin"); netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); - if (verify_netdata_host_prefix() == -1) + if (verify_netdata_host_prefix(true) == -1) exit(1); user_config_dir = getenv("NETDATA_USER_CONFIG_DIR"); @@ -243,6 +235,13 @@ int main(int argc, char **argv) netdata_log_info("all modules are disabled, exiting..."); return 1; } + + fprintf(stdout, "\n"); + fflush(stdout); + if (ferror(stdout) && errno == EPIPE) { + netdata_log_error("error writing to stdout: EPIPE. Exiting..."); + return 1; + } } fprintf(stdout, "EXIT\n"); diff --git a/collectors/debugfs.plugin/integrations/linux_zswap.md b/collectors/debugfs.plugin/integrations/linux_zswap.md index fa3948149..44478454b 100644 --- a/collectors/debugfs.plugin/integrations/linux_zswap.md +++ b/collectors/debugfs.plugin/integrations/linux_zswap.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/debugfs.pl sidebar_label: "Linux ZSwap" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Memory" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -126,8 +127,8 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 1 | False | -| command options | Additinal parameters for collector | | False | +| update every | Data collection frequency. | 1 | no | +| command options | Additinal parameters for collector | | no | </details> diff --git a/collectors/debugfs.plugin/integrations/power_capping.md b/collectors/debugfs.plugin/integrations/power_capping.md index b17ece9a6..d4b7eb890 100644 --- a/collectors/debugfs.plugin/integrations/power_capping.md +++ b/collectors/debugfs.plugin/integrations/power_capping.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/debugfs.pl sidebar_label: "Power Capping" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Kernel" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -120,8 +121,8 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 1 | False | -| command options | Additinal parameters for collector | | False | +| update every | Data collection frequency. | 1 | no | +| command options | Additinal parameters for collector | | no | </details> diff --git a/collectors/debugfs.plugin/integrations/system_memory_fragmentation.md b/collectors/debugfs.plugin/integrations/system_memory_fragmentation.md index 5eed517ed..ef287bc30 100644 --- a/collectors/debugfs.plugin/integrations/system_memory_fragmentation.md +++ b/collectors/debugfs.plugin/integrations/system_memory_fragmentation.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/debugfs.pl sidebar_label: "System Memory Fragmentation" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Memory" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -124,8 +125,8 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 1 | False | -| command options | Additinal parameters for collector | | False | +| update every | Data collection frequency. | 1 | no | +| command options | Additinal parameters for collector | | no | </details> diff --git a/collectors/debugfs.plugin/sys_devices_virtual_powercap.c b/collectors/debugfs.plugin/sys_devices_virtual_powercap.c index f79b944a9..ee261c27f 100644 --- a/collectors/debugfs.plugin/sys_devices_virtual_powercap.c +++ b/collectors/debugfs.plugin/sys_devices_virtual_powercap.c @@ -151,7 +151,7 @@ int do_sys_devices_virtual_powercap(int update_every, const char *name __maybe_u update_every); fprintf(stdout, - "CLABEL 'zone' '%s' 0\n" + "CLABEL 'zone' '%s' 1\n" "CLABEL_COMMIT\n", zone->name); @@ -171,7 +171,7 @@ int do_sys_devices_virtual_powercap(int update_every, const char *name __maybe_u update_every); fprintf(stdout, - "CLABEL 'zone' '%s' 0\n" + "CLABEL 'zone' '%s' 1\n" "CLABEL_COMMIT\n", zone->name); diff --git a/collectors/diskspace.plugin/integrations/disk_space.md b/collectors/diskspace.plugin/integrations/disk_space.md index 5dd9514c3..1c937ed7f 100644 --- a/collectors/diskspace.plugin/integrations/disk_space.md +++ b/collectors/diskspace.plugin/integrations/disk_space.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/diskspace. sidebar_label: "Disk space" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -122,14 +123,14 @@ You can also specify per mount point `[plugin:proc:diskspace:mountpoint]` | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 1 | False | -| remove charts of unmounted disks | Remove chart when a device is unmounted on host. | yes | False | -| check for new mount points every | Parse proc files frequency. | 15 | False | -| exclude space metrics on paths | Do not show metrics (charts) for listed paths. This option accepts netdata simple pattern. | /proc/* /sys/* /var/run/user/* /run/user/* /snap/* /var/lib/docker/* | False | -| exclude space metrics on filesystems | Do not show metrics (charts) for listed filesystems. This option accepts netdata simple pattern. | *gvfs *gluster* *s3fs *ipfs *davfs2 *httpfs *sshfs *gdfs *moosefs fusectl autofs | False | -| exclude inode metrics on filesystems | Do not show metrics (charts) for listed filesystems. This option accepts netdata simple pattern. | msdosfs msdos vfat overlayfs aufs* *unionfs | False | -| space usage for all disks | Define if plugin will show metrics for space usage. When value is set to `auto` plugin will try to access information to display if filesystem or path was not discarded with previous option. | auto | False | -| inodes usage for all disks | Define if plugin will show metrics for inode usage. When value is set to `auto` plugin will try to access information to display if filesystem or path was not discarded with previous option. | auto | False | +| update every | Data collection frequency. | 1 | no | +| remove charts of unmounted disks | Remove chart when a device is unmounted on host. | yes | no | +| check for new mount points every | Parse proc files frequency. | 15 | no | +| exclude space metrics on paths | Do not show metrics (charts) for listed paths. This option accepts netdata simple pattern. | /proc/* /sys/* /var/run/user/* /run/user/* /snap/* /var/lib/docker/* | no | +| exclude space metrics on filesystems | Do not show metrics (charts) for listed filesystems. This option accepts netdata simple pattern. | *gvfs *gluster* *s3fs *ipfs *davfs2 *httpfs *sshfs *gdfs *moosefs fusectl autofs | no | +| exclude inode metrics on filesystems | Do not show metrics (charts) for listed filesystems. This option accepts netdata simple pattern. | msdosfs msdos vfat overlayfs aufs* *unionfs | no | +| space usage for all disks | Define if plugin will show metrics for space usage. When value is set to `auto` plugin will try to access information to display if filesystem or path was not discarded with previous option. | auto | no | +| inodes usage for all disks | Define if plugin will show metrics for inode usage. When value is set to `auto` plugin will try to access information to display if filesystem or path was not discarded with previous option. | auto | no | </details> diff --git a/collectors/diskspace.plugin/plugin_diskspace.c b/collectors/diskspace.plugin/plugin_diskspace.c index 43c9105dc..94257810c 100644 --- a/collectors/diskspace.plugin/plugin_diskspace.c +++ b/collectors/diskspace.plugin/plugin_diskspace.c @@ -9,6 +9,8 @@ #define DEFAULT_EXCLUDED_FILESYSTEMS_INODES "msdosfs msdos vfat overlayfs aufs* *unionfs" #define CONFIG_SECTION_DISKSPACE "plugin:proc:diskspace" +#define RRDFUNCTIONS_DISKSPACE_HELP "View mount point statistics" + #define MAX_STAT_USEC 10000LU #define SLOW_UPDATE_EVERY 5 @@ -42,6 +44,11 @@ struct mount_point_metadata { int updated; int slow; + bool function_ready; + + STRING *filesystem; + STRING *mountroot; + RRDLABELS *chart_labels; size_t collected; // the number of times this has been collected @@ -59,7 +66,7 @@ struct mount_point_metadata { static DICTIONARY *dict_mountpoints = NULL; -#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); (st) = NULL; } } while(st) +#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete___safe_from_collector_thread(st); (st) = NULL; } } while(st) int mount_point_cleanup(const char *name, void *entry, int slow) { (void)name; @@ -76,10 +83,17 @@ int mount_point_cleanup(const char *name, void *entry, int slow) { } if(likely(cleanup_mount_points && mp->collected)) { + mp->function_ready = false; mp->collected = 0; mp->updated = 0; mp->shown_error = 0; + string_freez(mp->filesystem); + string_freez(mp->mountroot); + + rrdset_obsolete_and_pointer_null(mp->st_space); + rrdset_obsolete_and_pointer_null(mp->st_inodes); + mp->rd_space_avail = NULL; mp->rd_space_used = NULL; mp->rd_space_reserved = NULL; @@ -87,9 +101,6 @@ int mount_point_cleanup(const char *name, void *entry, int slow) { mp->rd_inodes_avail = NULL; mp->rd_inodes_used = NULL; mp->rd_inodes_reserved = NULL; - - rrdset_obsolete_and_pointer_null(mp->st_space); - rrdset_obsolete_and_pointer_null(mp->st_inodes); } return 0; @@ -214,7 +225,7 @@ static void calculate_values_and_show_charts( m->st_space = rrdset_find_active_bytype_localhost("disk_space", disk); if(unlikely(!m->st_space || m->st_space->update_every != update_every)) { char title[4096 + 1]; - snprintfz(title, 4096, "Disk Space Usage"); + snprintfz(title, sizeof(title) - 1, "Disk Space Usage"); m->st_space = rrdset_create_localhost( "disk_space" , disk @@ -254,7 +265,7 @@ static void calculate_values_and_show_charts( m->st_inodes = rrdset_find_active_bytype_localhost("disk_inodes", disk); if(unlikely(!m->st_inodes) || m->st_inodes->update_every != update_every) { char title[4096 + 1]; - snprintfz(title, 4096, "Disk Files (inodes) Usage"); + snprintfz(title, sizeof(title) - 1, "Disk Files (inodes) Usage"); m->st_inodes = rrdset_create_localhost( "disk_inodes" , disk @@ -286,6 +297,8 @@ static void calculate_values_and_show_charts( rendered++; } + m->function_ready = rendered > 0; + if(likely(rendered)) m->collected++; } @@ -333,8 +346,6 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { struct mount_point_metadata *m = dictionary_get(dict_mountpoints, mi->mount_point); if(unlikely(!m)) { int slow = 0; - char var_name[4096 + 1]; - snprintfz(var_name, 4096, "plugin:proc:diskspace:%s", mi->mount_point); int def_space = config_get_boolean_ondemand(CONFIG_SECTION_DISKSPACE, "space usage for all disks", CONFIG_BOOLEAN_AUTO); int def_inodes = config_get_boolean_ondemand(CONFIG_SECTION_DISKSPACE, "inodes usage for all disks", CONFIG_BOOLEAN_AUTO); @@ -385,8 +396,16 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { slow = 1; } - do_space = config_get_boolean_ondemand(var_name, "space usage", def_space); - do_inodes = config_get_boolean_ondemand(var_name, "inodes usage", def_inodes); + char var_name[4096 + 1]; + snprintfz(var_name, 4096, "plugin:proc:diskspace:%s", mi->mount_point); + + do_space = def_space; + do_inodes = def_inodes; + + if (config_exists(var_name, "space usage")) + do_space = config_get_boolean_ondemand(var_name, "space usage", def_space); + if (config_exists(var_name, "inodes usage")) + do_inodes = config_get_boolean_ondemand(var_name, "inodes usage", def_inodes); struct mount_point_metadata mp = { .do_space = do_space, @@ -408,6 +427,9 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { .rd_inodes_reserved = NULL }; + mp.filesystem = string_strdupz(mi->filesystem); + mp.mountroot = string_strdupz(mi->root); + mp.chart_labels = rrdlabels_create(); rrdlabels_add(mp.chart_labels, "mount_point", mi->mount_point, RRDLABEL_SRC_AUTO); rrdlabels_add(mp.chart_labels, "filesystem", mi->filesystem, RRDLABEL_SRC_AUTO); @@ -614,6 +636,228 @@ static void diskspace_main_cleanup(void *ptr) { #error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 3 #endif +int diskspace_function_mount_points(BUFFER *wb, int timeout __maybe_unused, const char *function __maybe_unused, + void *collector_data __maybe_unused, + rrd_function_result_callback_t result_cb, void *result_cb_data, + rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data, + rrd_function_register_canceller_cb_t register_canceller_cb __maybe_unused, + void *register_canceller_cb_data __maybe_unused) { + + buffer_flush(wb); + wb->content_type = CT_APPLICATION_JSON; + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); + + buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(localhost)); + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + buffer_json_member_add_string(wb, "type", "table"); + buffer_json_member_add_time_t(wb, "update_every", 1); + buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_DISKSPACE_HELP); + buffer_json_member_add_array(wb, "data"); + + double max_space_util = 0.0; + double max_space_avail = 0.0; + double max_space_used = 0.0; + double max_space_reserved = 0.0; + + double max_inodes_util = 0.0; + double max_inodes_avail = 0.0; + double max_inodes_used = 0.0; + double max_inodes_reserved = 0.0; + + struct mount_point_metadata *mp; + dfe_start_write(dict_mountpoints, mp) { + if (!mp->function_ready) + continue; + + buffer_json_add_array_item_array(wb); + + buffer_json_add_array_item_string(wb, mp_dfe.name); + buffer_json_add_array_item_string(wb, string2str(mp->filesystem)); + buffer_json_add_array_item_string(wb, string2str(mp->mountroot)); + + double space_avail = rrddim_get_last_stored_value(mp->rd_space_avail, &max_space_avail, 1.0); + double space_used = rrddim_get_last_stored_value(mp->rd_space_used, &max_space_used, 1.0); + double space_reserved = rrddim_get_last_stored_value(mp->rd_space_reserved, &max_space_reserved, 1.0); + double inodes_avail = rrddim_get_last_stored_value(mp->rd_inodes_avail, &max_inodes_avail, 1.0); + double inodes_used = rrddim_get_last_stored_value(mp->rd_inodes_used, &max_inodes_used, 1.0); + double inodes_reserved = rrddim_get_last_stored_value(mp->rd_inodes_reserved, &max_inodes_reserved, 1.0); + + double space_util = NAN; + if (!isnan(space_avail) && !isnan(space_used)) { + space_util = space_avail + space_used > 0 ? space_used * 100.0 / (space_avail + space_used) : 0; + max_space_util = MAX(max_space_util, space_util); + } + double inodes_util = NAN; + if (!isnan(inodes_avail) && !isnan(inodes_used)) { + inodes_util = inodes_avail + inodes_used > 0 ? inodes_used * 100.0 / (inodes_avail + inodes_used) : 0; + max_inodes_util = MAX(max_inodes_util, inodes_util); + } + + buffer_json_add_array_item_double(wb, space_util); + buffer_json_add_array_item_double(wb, space_avail); + buffer_json_add_array_item_double(wb, space_used); + buffer_json_add_array_item_double(wb, space_reserved); + + buffer_json_add_array_item_double(wb, inodes_util); + buffer_json_add_array_item_double(wb, inodes_avail); + buffer_json_add_array_item_double(wb, inodes_used); + buffer_json_add_array_item_double(wb, inodes_reserved); + + buffer_json_array_close(wb); + } + dfe_done(mp); + + buffer_json_array_close(wb); // data + buffer_json_member_add_object(wb, "columns"); + { + size_t field_id = 0; + + buffer_rrdf_table_add_field(wb, field_id++, "Mountpoint", "Mountpoint Name", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_FULL_WIDTH, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Filesystem", "Mountpoint Filesystem", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Root", "Mountpoint Root", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "Used%", "Space Utilization", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "%", max_space_util, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Avail", "Space Avail", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "GiB", max_space_avail, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Used", "Space Used", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "GiB", max_space_used, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Reserved", "Space Reserved for root", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "GiB", max_space_reserved, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "iUsed%", "Inodes Utilization", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "%", max_inodes_util, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_NONE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "iAvail", "Inodes Avail", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "inodes", max_inodes_avail, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_NONE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "iUsed", "Inodes Used", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "inodes", max_inodes_used, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_NONE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "iReserved", "Inodes Reserved for root", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "inodes", max_inodes_reserved, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_NONE, + NULL); + } + + buffer_json_object_close(wb); // columns + buffer_json_member_add_string(wb, "default_sort_column", "Used%"); + + buffer_json_member_add_object(wb, "charts"); + { + buffer_json_member_add_object(wb, "Utilization"); + { + buffer_json_member_add_string(wb, "name", "Utilization"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Used%"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "Usage"); + { + buffer_json_member_add_string(wb, "name", "Usage"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Avail"); + buffer_json_add_array_item_string(wb, "Used"); + buffer_json_add_array_item_string(wb, "Reserved"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "Inodes"); + { + buffer_json_member_add_string(wb, "name", "Inodes"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "iAvail"); + buffer_json_add_array_item_string(wb, "iUsed"); + buffer_json_add_array_item_string(wb, "iReserved"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // charts + + buffer_json_member_add_array(wb, "default_charts"); + { + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "Utilization"); + buffer_json_add_array_item_string(wb, "Mountpoint"); + buffer_json_array_close(wb); + + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "Usage"); + buffer_json_add_array_item_string(wb, "Mountpoint"); + buffer_json_array_close(wb); + } + buffer_json_array_close(wb); + + buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1); + buffer_json_finalize(wb); + + int response = HTTP_RESP_OK; + if(is_cancelled_cb && is_cancelled_cb(is_cancelled_cb_data)) { + buffer_flush(wb); + response = HTTP_RESP_CLIENT_CLOSED_REQUEST; + } + + if(result_cb) + result_cb(wb, response, result_cb_data); + + return response; +} + void *diskspace_main(void *ptr) { worker_register("DISKSPACE"); worker_register_job_name(WORKER_JOB_MOUNTINFO, "mountinfo"); @@ -621,6 +865,7 @@ void *diskspace_main(void *ptr) { worker_register_job_name(WORKER_JOB_CLEANUP, "cleanup"); rrd_collector_started(); + rrd_function_add(localhost, NULL, "mount-points", 10, RRDFUNCTIONS_DISKSPACE_HELP, true, diskspace_function_mount_points, NULL); netdata_thread_cleanup_push(diskspace_main_cleanup, ptr); diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c index 834808fa5..a8e621643 100644 --- a/collectors/ebpf.plugin/ebpf.c +++ b/collectors/ebpf.plugin/ebpf.c @@ -1060,25 +1060,6 @@ collected_number get_value_from_structure(char *basis, size_t offset) } /** - * Write begin command on standard output - * - * @param family the chart family name - * @param name the chart name - */ -void write_begin_chart(char *family, char *name) -{ - printf("BEGIN %s.%s\n", family, name); -} - -/** - * Write END command on stdout. - */ -inline void write_end_chart() -{ - printf("END\n"); -} - -/** * Write set command on standard output * * @param dim the dimension name @@ -1101,7 +1082,7 @@ void write_chart_dimension(char *dim, long long value) */ void write_count_chart(char *name, char *family, netdata_publish_syscall_t *move, uint32_t end) { - write_begin_chart(family, name); + ebpf_write_begin_chart(family, name, ""); uint32_t i = 0; while (move && i < end) { @@ -1111,7 +1092,7 @@ void write_count_chart(char *name, char *family, netdata_publish_syscall_t *move i++; } - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -1124,7 +1105,7 @@ void write_count_chart(char *name, char *family, netdata_publish_syscall_t *move */ void write_err_chart(char *name, char *family, netdata_publish_syscall_t *move, int end) { - write_begin_chart(family, name); + ebpf_write_begin_chart(family, name, ""); int i = 0; while (move && i < end) { @@ -1134,7 +1115,7 @@ void write_err_chart(char *name, char *family, netdata_publish_syscall_t *move, i++; } - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -1149,11 +1130,11 @@ void write_err_chart(char *name, char *family, netdata_publish_syscall_t *move, */ void ebpf_one_dimension_write_charts(char *family, char *chart, char *dim, long long v1) { - write_begin_chart(family, chart); + ebpf_write_begin_chart(family, chart, ""); write_chart_dimension(dim, v1); - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -1170,19 +1151,20 @@ void ebpf_one_dimension_write_charts(char *family, char *chart, char *dim, long */ void write_io_chart(char *chart, char *family, char *dwrite, long long vwrite, char *dread, long long vread) { - write_begin_chart(family, chart); + ebpf_write_begin_chart(family, chart, ""); write_chart_dimension(dwrite, vwrite); write_chart_dimension(dread, vread); - write_end_chart(); + ebpf_write_end_chart(); } /** * Write chart cmd on standard output * * @param type chart type - * @param id chart id + * @param id chart id (the apps group name). + * @param suffix suffix to differentiate charts * @param title chart title * @param units units label * @param family group name used to attach the chart on dashboard @@ -1192,12 +1174,13 @@ void write_io_chart(char *chart, char *family, char *dwrite, long long vwrite, c * @param update_every update interval used by plugin * @param module chart module name, this is the eBPF thread. */ -void ebpf_write_chart_cmd(char *type, char *id, char *title, char *units, char *family, +void ebpf_write_chart_cmd(char *type, char *id, char *suffix, char *title, char *units, char *family, char *charttype, char *context, int order, int update_every, char *module) { - printf("CHART %s.%s '' '%s' '%s' '%s' '%s' '%s' %d %d '' 'ebpf.plugin' '%s'\n", + printf("CHART %s.%s%s '' '%s' '%s' '%s' '%s' '%s' %d %d '' 'ebpf.plugin' '%s'\n", type, id, + suffix, title, units, (family)?family:"", @@ -1213,6 +1196,7 @@ void ebpf_write_chart_cmd(char *type, char *id, char *title, char *units, char * * * @param type chart type * @param id chart id + * @param suffix add suffix to obsolete charts. * @param title chart title * @param units units label * @param family group name used to attach the chart on dashboard @@ -1221,12 +1205,13 @@ void ebpf_write_chart_cmd(char *type, char *id, char *title, char *units, char * * @param order chart order * @param update_every value to overwrite the update frequency set by the server. */ -void ebpf_write_chart_obsolete(char *type, char *id, char *title, char *units, char *family, +void ebpf_write_chart_obsolete(char *type, char *id, char *suffix, char *title, char *units, char *family, char *charttype, char *context, int order, int update_every) { - printf("CHART %s.%s '' '%s' '%s' '%s' '%s' '%s' %d %d 'obsolete'\n", + printf("CHART %s.%s%s '' '%s' '%s' '%s' '%s' '%s' %d %d 'obsolete'\n", type, id, + suffix, title, units, (family)?family:"", @@ -1298,7 +1283,7 @@ void ebpf_create_chart(char *type, int update_every, char *module) { - ebpf_write_chart_cmd(type, id, title, units, family, charttype, context, order, update_every, module); + ebpf_write_chart_cmd(type, id, "", title, units, family, charttype, context, order, update_every, module); if (ncd) { ncd(move, end); @@ -1306,33 +1291,6 @@ void ebpf_create_chart(char *type, } /** - * Create charts on apps submenu - * - * @param id the chart id - * @param title the value displayed on vertical axis. - * @param units the value displayed on vertical axis. - * @param family Submenu that the chart will be attached on dashboard. - * @param charttype chart type - * @param order the chart order - * @param algorithm the algorithm used by dimension - * @param root structure used to create the dimensions. - * @param update_every update interval used by plugin - * @param module chart module name, this is the eBPF thread. - */ -void ebpf_create_charts_on_apps(char *id, char *title, char *units, char *family, char *charttype, int order, - char *algorithm, struct ebpf_target *root, int update_every, char *module) -{ - struct ebpf_target *w; - ebpf_write_chart_cmd(NETDATA_APPS_FAMILY, id, title, units, family, charttype, NULL, order, - update_every, module); - - for (w = root; w; w = w->next) { - if (unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' %s 1 1\n", w->name, algorithm); - } -} - -/** * Call the necessary functions to create a name. * * @param family family name @@ -1345,14 +1303,14 @@ void ebpf_create_charts_on_apps(char *id, char *title, char *units, char *family */ void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, char **dimensions, uint32_t end) { - write_begin_chart(family, name); + ebpf_write_begin_chart(family, name, ""); uint32_t i; for (i = 0; i < end; i++) { write_chart_dimension(dimensions[i], (long long) hist[i]); } - write_end_chart(); + ebpf_write_end_chart(); fflush(stdout); } @@ -1377,6 +1335,7 @@ int ebpf_statistic_create_aral_chart(char *name, ebpf_module_t *em) ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, em->memory_usage, + "", "Bytes allocated for ARAL.", "bytes", NETDATA_EBPF_FAMILY, @@ -1392,6 +1351,7 @@ int ebpf_statistic_create_aral_chart(char *name, ebpf_module_t *em) ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, em->memory_allocations, + "", "Calls to allocate memory.", "calls", NETDATA_EBPF_FAMILY, @@ -1421,6 +1381,7 @@ void ebpf_statistic_obsolete_aral_chart(ebpf_module_t *em, int prio) { ebpf_write_chart_obsolete(NETDATA_MONITORING_FAMILY, em->memory_allocations, + "", "Calls to allocate memory.", "calls", NETDATA_EBPF_FAMILY, @@ -1431,6 +1392,7 @@ void ebpf_statistic_obsolete_aral_chart(ebpf_module_t *em, int prio) ebpf_write_chart_obsolete(NETDATA_MONITORING_FAMILY, em->memory_allocations, + "", "Calls to allocate memory.", "calls", NETDATA_EBPF_FAMILY, @@ -1455,13 +1417,13 @@ void ebpf_send_data_aral_chart(ARAL *memory, ebpf_module_t *em) struct aral_statistics *stats = aral_statistics(memory); - write_begin_chart(NETDATA_MONITORING_FAMILY, em->memory_usage); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, em->memory_usage, ""); write_chart_dimension(mem, (long long)stats->structures.allocated_bytes); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_MONITORING_FAMILY, em->memory_allocations); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, em->memory_allocations, ""); write_chart_dimension(aral, (long long)stats->structures.allocations); - write_end_chart(); + ebpf_write_end_chart(); } /***************************************************************** @@ -3442,7 +3404,7 @@ static char *hash_table_core[NETDATA_EBPF_LOAD_STAT_END] = {"per_core", "unique" static inline void ebpf_send_hash_table_pid_data(char *chart, uint32_t idx) { int i; - write_begin_chart(NETDATA_MONITORING_FAMILY, chart); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, chart, ""); for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { ebpf_module_t *wem = &ebpf_modules[i]; if (wem->functions.apps_routine) @@ -3451,7 +3413,7 @@ static inline void ebpf_send_hash_table_pid_data(char *chart, uint32_t idx) wem->hash_table_stats[idx]: 0); } - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -3463,13 +3425,13 @@ static inline void ebpf_send_hash_table_pid_data(char *chart, uint32_t idx) static inline void ebpf_send_global_hash_table_data() { int i; - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_GLOBAL_ELEMENTS); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_GLOBAL_ELEMENTS, ""); for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { ebpf_module_t *wem = &ebpf_modules[i]; write_chart_dimension((char *)wem->info.thread_name, (wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? NETDATA_CONTROLLER_END: 0); } - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -3482,7 +3444,7 @@ void ebpf_send_statistic_data() if (!publish_internal_metrics) return; - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_THREADS); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_THREADS, ""); int i; for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { ebpf_module_t *wem = &ebpf_modules[i]; @@ -3491,9 +3453,9 @@ void ebpf_send_statistic_data() write_chart_dimension((char *)wem->info.thread_name, (wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? 1 : 0); } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LIFE_TIME); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LIFE_TIME, ""); for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) { ebpf_module_t *wem = &ebpf_modules[i]; // Threads like VFS is slow to load and this can create an invalid number, this is the motive @@ -3506,25 +3468,25 @@ void ebpf_send_statistic_data() (long long) (wem->lifetime - wem->running_time): 0) ; } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LOAD_METHOD); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LOAD_METHOD, ""); write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], (long long)plugin_statistics.legacy); write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], (long long)plugin_statistics.core); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_KERNEL_MEMORY); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_KERNEL_MEMORY, ""); write_chart_dimension(memlock_stat, (long long)plugin_statistics.memlock_kern); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_LOADED); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_LOADED, ""); write_chart_dimension(hash_table_stat, (long long)plugin_statistics.hash_tables); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_PER_CORE); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_PER_CORE, ""); write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], (long long)plugin_statistics.hash_percpu); write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], (long long)plugin_statistics.hash_unique); - write_end_chart(); + ebpf_write_end_chart(); ebpf_send_global_hash_table_data(); @@ -3536,16 +3498,16 @@ void ebpf_send_statistic_data() if (!wem->functions.fnct_routine) continue; - write_begin_chart(NETDATA_MONITORING_FAMILY, (char *)wem->functions.fcnt_thread_chart_name); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, (char *)wem->functions.fcnt_thread_chart_name, ""); write_chart_dimension((char *)wem->info.thread_name, (wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? 1 : 0); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_MONITORING_FAMILY, (char *)wem->functions.fcnt_thread_lifetime_name); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, (char *)wem->functions.fcnt_thread_lifetime_name, ""); write_chart_dimension((char *)wem->info.thread_name, (wem->lifetime && wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? (long long) (wem->lifetime - wem->running_time): 0) ; - write_end_chart(); + ebpf_write_end_chart(); } } @@ -3586,6 +3548,7 @@ static void ebpf_create_thread_chart(char *name, // common call for specific and all charts. ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, name, + "", title, units, NETDATA_EBPF_FAMILY, @@ -3625,6 +3588,7 @@ static inline void ebpf_create_statistic_load_chart(int update_every) { ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LOAD_METHOD, + "", "Load info.", "methods", NETDATA_EBPF_FAMILY, @@ -3654,6 +3618,7 @@ static inline void ebpf_create_statistic_kernel_memory(int update_every) { ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_KERNEL_MEMORY, + "", "Memory allocated for hash tables.", "bytes", NETDATA_EBPF_FAMILY, @@ -3679,6 +3644,7 @@ static inline void ebpf_create_statistic_hash_tables(int update_every) { ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_LOADED, + "", "Number of hash tables loaded.", "hash tables", NETDATA_EBPF_FAMILY, @@ -3704,6 +3670,7 @@ static inline void ebpf_create_statistic_hash_per_core(int update_every) { ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_PER_CORE, + "", "How threads are loading hash/array tables.", "threads", NETDATA_EBPF_FAMILY, @@ -3733,6 +3700,7 @@ static void ebpf_create_statistic_hash_global_elements(int update_every) { ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_GLOBAL_ELEMENTS, + "", "Controllers inside global table", "rows", NETDATA_EBPF_FAMILY, @@ -3764,6 +3732,7 @@ static void ebpf_create_statistic_hash_pid_table(int update_every, char *id, cha { ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, id, + "", title, "rows", NETDATA_EBPF_FAMILY, @@ -3834,7 +3803,7 @@ static void ebpf_create_statistic_charts(int update_every) continue; em->functions.order_thread_chart = j; - snprintfz(name, 255,"%s_%s", NETDATA_EBPF_THREADS, em->info.thread_name); + snprintfz(name, sizeof(name) - 1, "%s_%s", NETDATA_EBPF_THREADS, em->info.thread_name); em->functions.fcnt_thread_chart_name = strdupz(name); ebpf_create_thread_chart(name, "Threads running.", @@ -3847,7 +3816,7 @@ static void ebpf_create_statistic_charts(int update_every) #endif em->functions.order_thread_lifetime = j; - snprintfz(name, 255,"%s_%s", NETDATA_EBPF_LIFE_TIME, em->info.thread_name); + snprintfz(name, sizeof(name) - 1, "%s_%s", NETDATA_EBPF_LIFE_TIME, em->info.thread_name); em->functions.fcnt_thread_lifetime_name = strdupz(name); ebpf_create_thread_chart(name, "Time remaining for thread.", @@ -4055,11 +4024,9 @@ static void ebpf_manage_pid(pid_t pid) */ int main(int argc, char **argv) { - stderror = stderr; - - log_set_global_severity_for_external_plugins(); - clocks_init(); + nd_log_initialize_for_external_plugins("ebpf.plugin"); + main_thread_id = gettid(); set_global_variables(); @@ -4069,16 +4036,6 @@ int main(int argc, char **argv) if (ebpf_check_conditions()) return 2; - // set name - program_name = "ebpf.plugin"; - - // disable syslog - error_log_syslog = 0; - - // set errors flood protection to 100 logs per hour - error_log_errors_per_period = 100; - error_log_throttle_period = 3600; - if (ebpf_adjust_memory_limit()) return 3; @@ -4090,7 +4047,7 @@ int main(int argc, char **argv) ebpf_start_pthread_variables(); netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); - if(verify_netdata_host_prefix() == -1) ebpf_exit(6); + if(verify_netdata_host_prefix(true) == -1) ebpf_exit(6); ebpf_allocate_common_vectors(); diff --git a/collectors/ebpf.plugin/ebpf.h b/collectors/ebpf.plugin/ebpf.h index d52ea5a4a..ad7c5a94c 100644 --- a/collectors/ebpf.plugin/ebpf.h +++ b/collectors/ebpf.plugin/ebpf.h @@ -238,6 +238,7 @@ void ebpf_global_labels(netdata_syscall_stat_t *is, void ebpf_write_chart_cmd(char *type, char *id, + char *suffix, char *title, char *units, char *family, @@ -265,8 +266,6 @@ void ebpf_create_chart(char *type, int update_every, char *module); -void write_begin_chart(char *family, char *name); - void write_chart_dimension(char *dim, long long value); void write_count_chart(char *name, char *family, netdata_publish_syscall_t *move, uint32_t end); @@ -276,18 +275,47 @@ void write_err_chart(char *name, char *family, netdata_publish_syscall_t *move, void write_io_chart(char *chart, char *family, char *dwrite, long long vwrite, char *dread, long long vread); -void ebpf_create_charts_on_apps(char *name, - char *title, - char *units, - char *family, - char *charttype, - int order, - char *algorithm, - struct ebpf_target *root, - int update_every, - char *module); - -void write_end_chart(); +/** + * Create Chart labels + * + * @param name the label name. + * @param value the label value. + * @param origin the labeel source. + */ +static inline void ebpf_create_chart_labels(char *name, char *value, int source) +{ + fprintf(stdout, "CLABEL '%s' '%s' %d\n", name, value, source); +} + +/** + * Commit label + * + * Write commit label to stdout + */ +static inline void ebpf_commit_label() +{ + fprintf(stdout, "CLABEL_COMMIT\n"); +} + +/** + * Write begin command on standard output + * + * @param family the chart family name + * @param name the chart name + * @param metric the chart suffix (used with apps and cgroups) + */ +static inline void ebpf_write_begin_chart(char *family, char *name, char *metric) +{ + printf("BEGIN %s.%s%s\n", family, name, metric); +} + +/** + * Write END command on stdout. + */ +static inline void ebpf_write_end_chart() +{ + printf("END\n"); +} int ebpf_enable_tracepoint(ebpf_tracepoint_t *tp); int ebpf_disable_tracepoint(ebpf_tracepoint_t *tp); @@ -298,6 +326,9 @@ void ebpf_pid_file(char *filename, size_t length); #define EBPF_PROGRAMS_SECTION "ebpf programs" #define EBPF_COMMON_DIMENSION_PERCENTAGE "%" +#define EBPF_PROGRAMS_SECTION "ebpf programs" + +#define EBPF_COMMON_DIMENSION_PERCENTAGE "%" #define EBPF_COMMON_DIMENSION_CALL "calls/s" #define EBPF_COMMON_DIMENSION_CONNECTIONS "connections/s" #define EBPF_COMMON_DIMENSION_BITS "kilobits/s" @@ -334,7 +365,7 @@ void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *root); void ebpf_one_dimension_write_charts(char *family, char *chart, char *dim, long long v1); collected_number get_value_from_structure(char *basis, size_t offset); void ebpf_update_pid_table(ebpf_local_maps_t *pid, ebpf_module_t *em); -void ebpf_write_chart_obsolete(char *type, char *id, char *title, char *units, char *family, +void ebpf_write_chart_obsolete(char *type, char *id, char *suffix, char *title, char *units, char *family, char *charttype, char *context, int order, int update_every); void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, char **dimensions, uint32_t end); void ebpf_update_disabled_plugin_stats(ebpf_module_t *em); diff --git a/collectors/ebpf.plugin/ebpf_apps.c b/collectors/ebpf.plugin/ebpf_apps.c index b1b42c8d8..10c452267 100644 --- a/collectors/ebpf.plugin/ebpf_apps.c +++ b/collectors/ebpf.plugin/ebpf_apps.c @@ -132,16 +132,6 @@ ebpf_socket_publish_apps_t *ebpf_socket_stat_get(void) return target; } -/** - * eBPF socket release - * - * @param stat Release a target after usage. - */ -void ebpf_socket_release(ebpf_socket_publish_apps_t *stat) -{ - aral_freez(ebpf_aral_socket_pid, stat); -} - /***************************************************************** * * CACHESTAT ARAL FUNCTIONS @@ -512,6 +502,13 @@ struct ebpf_target *get_apps_groups_target(struct ebpf_target **agrt, const char // copy the id strncpyz(w->name, nid, EBPF_MAX_NAME); + strncpyz(w->clean_name, w->name, EBPF_MAX_NAME); + netdata_fix_chart_name(w->clean_name); + for (char *d = w->clean_name; *d; d++) { + if (*d == '.') + *d = '_'; + } + strncpyz(w->compare, nid, EBPF_MAX_COMPARE_NAME); size_t len = strlen(w->compare); if (w->compare[len - 1] == '*') { diff --git a/collectors/ebpf.plugin/ebpf_apps.h b/collectors/ebpf.plugin/ebpf_apps.h index 5ae5342dd..258091507 100644 --- a/collectors/ebpf.plugin/ebpf_apps.h +++ b/collectors/ebpf.plugin/ebpf_apps.h @@ -10,11 +10,13 @@ #include "libnetdata/ebpf/ebpf.h" #define NETDATA_APPS_FAMILY "apps" +#define NETDATA_APP_FAMILY "app" #define NETDATA_APPS_FILE_GROUP "file_access" +#define NETDATA_APPS_FILE_FDS "fds" #define NETDATA_APPS_FILE_CGROUP_GROUP "file_access (eBPF)" #define NETDATA_APPS_PROCESS_GROUP "process (eBPF)" #define NETDATA_APPS_NET_GROUP "net" -#define NETDATA_APPS_IPC_SHM_GROUP "ipc shm (eBPF)" +#define NETDATA_APPS_IPC_SHM_GROUP "ipc shm" #include "ebpf_process.h" #include "ebpf_dcstat.h" @@ -47,8 +49,10 @@ struct ebpf_target { char id[EBPF_MAX_NAME + 1]; uint32_t idhash; + uint32_t charts_created; char name[EBPF_MAX_NAME + 1]; + char clean_name[EBPF_MAX_NAME + 1]; // sanitized name used in chart id (need to replace at least dots) // Changes made to simplify integration between apps and eBPF. netdata_publish_cachestat_t cachestat; @@ -218,7 +222,6 @@ extern ebpf_process_stat_t *process_stat_vector; extern ARAL *ebpf_aral_socket_pid; void ebpf_socket_aral_init(); ebpf_socket_publish_apps_t *ebpf_socket_stat_get(void); -void ebpf_socket_release(ebpf_socket_publish_apps_t *stat); extern ARAL *ebpf_aral_cachestat_pid; void ebpf_cachestat_aral_init(); diff --git a/collectors/ebpf.plugin/ebpf_cachestat.c b/collectors/ebpf.plugin/ebpf_cachestat.c index 890600696..d9f8f7b06 100644 --- a/collectors/ebpf.plugin/ebpf_cachestat.c +++ b/collectors/ebpf.plugin/ebpf_cachestat.c @@ -353,6 +353,7 @@ static void ebpf_obsolete_services(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_RATIO_CHART, + "", "Hit ratio", EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_CACHESTAT_SUBMENU, @@ -363,6 +364,7 @@ static void ebpf_obsolete_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_DIRTY_CHART, + "", "Number of dirty pages", EBPF_CACHESTAT_DIMENSION_PAGE, NETDATA_CACHESTAT_SUBMENU, @@ -373,6 +375,7 @@ static void ebpf_obsolete_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_CHART, + "", "Number of accessed files", EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_SUBMENU, @@ -383,6 +386,7 @@ static void ebpf_obsolete_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_MISSES_CHART, + "", "Files out of page cache", EBPF_CACHESTAT_DIMENSION_MISSES, NETDATA_CACHESTAT_SUBMENU, @@ -425,6 +429,7 @@ static void ebpf_obsolete_cachestat_global(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_RATIO_CHART, + "", "Hit ratio", EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_CACHESTAT_SUBMENU, @@ -435,6 +440,7 @@ static void ebpf_obsolete_cachestat_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_DIRTY_CHART, + "", "Number of dirty pages", EBPF_CACHESTAT_DIMENSION_PAGE, NETDATA_CACHESTAT_SUBMENU, @@ -445,6 +451,7 @@ static void ebpf_obsolete_cachestat_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_CHART, + "", "Number of accessed files", EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_SUBMENU, @@ -455,6 +462,7 @@ static void ebpf_obsolete_cachestat_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_MISSES_CHART, + "", "Files out of page cache", EBPF_CACHESTAT_DIMENSION_MISSES, NETDATA_CACHESTAT_SUBMENU, @@ -473,44 +481,57 @@ static void ebpf_obsolete_cachestat_global(ebpf_module_t *em) */ void ebpf_obsolete_cachestat_apps_charts(struct ebpf_module *em) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_CACHESTAT_HIT_RATIO_CHART, - "Hit ratio", - EBPF_COMMON_DIMENSION_PERCENTAGE, - NETDATA_CACHESTAT_SUBMENU, - NETDATA_EBPF_CHART_TYPE_LINE, - NULL, - 20090, - em->update_every); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = apps_groups_root_target; w; w = w->next) { + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_CACHESTAT_IDX)))) + continue; - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_CACHESTAT_DIRTY_CHART, - "Number of dirty pages", - EBPF_CACHESTAT_DIMENSION_PAGE, - NETDATA_CACHESTAT_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20091, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_cachestat_hit_ratio", + "Hit ratio", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + "app.ebpf_cachestat_hit_ratio", + 20260, + update_every); - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_HIT_CHART, - "Number of accessed files", - EBPF_CACHESTAT_DIMENSION_HITS, - NETDATA_CACHESTAT_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20092, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_cachestat_dirty_pages", + "Number of dirty pages", + EBPF_CACHESTAT_DIMENSION_PAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_cachestat_dirty_pages", + 20261, + update_every); - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_CACHESTAT_MISSES_CHART, - "Files out of page cache", - EBPF_CACHESTAT_DIMENSION_MISSES, - NETDATA_CACHESTAT_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20093, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_cachestat_access", + "Number of accessed files", + EBPF_CACHESTAT_DIMENSION_HITS, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_cachestat_access", + 20262, + update_every); + + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_cachestat_misses", + "Files out of page cache", + EBPF_CACHESTAT_DIMENSION_MISSES, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_cachestat_misses", + 20263, + update_every); + w->charts_created &= ~(1<<EBPF_MODULE_CACHESTAT_IDX); + } } /** @@ -804,41 +825,74 @@ static void ebpf_update_cachestat_cgroup(int maps_per_core) void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *ptr) { struct ebpf_target *root = ptr; - ebpf_create_charts_on_apps(NETDATA_CACHESTAT_HIT_RATIO_CHART, - "Hit ratio", - EBPF_COMMON_DIMENSION_PERCENTAGE, - NETDATA_CACHESTAT_SUBMENU, - NETDATA_EBPF_CHART_TYPE_LINE, - 20090, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); - - ebpf_create_charts_on_apps(NETDATA_CACHESTAT_DIRTY_CHART, - "Number of dirty pages", - EBPF_CACHESTAT_DIMENSION_PAGE, - NETDATA_CACHESTAT_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20091, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); - - ebpf_create_charts_on_apps(NETDATA_CACHESTAT_HIT_CHART, - "Number of accessed files", - EBPF_CACHESTAT_DIMENSION_HITS, - NETDATA_CACHESTAT_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20092, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); - - ebpf_create_charts_on_apps(NETDATA_CACHESTAT_MISSES_CHART, - "Files out of page cache", - EBPF_CACHESTAT_DIMENSION_MISSES, - NETDATA_CACHESTAT_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20093, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = root; w; w = w->next) { + if (unlikely(!w->exposed)) + continue; + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_cachestat_hit_ratio", + "Hit ratio", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + "app.ebpf_cachestat_hit_ratio", + 20260, + update_every, + NETDATA_EBPF_MODULE_NAME_CACHESTAT); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION ratio '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_cachestat_dirty_pages", + "Number of dirty pages", + EBPF_CACHESTAT_DIMENSION_PAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + "app.ebpf_cachestat_dirty_pages", + 20261, + update_every, + NETDATA_EBPF_MODULE_NAME_CACHESTAT); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION pages '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_cachestat_access", + "Number of accessed files", + EBPF_CACHESTAT_DIMENSION_HITS, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_cachestat_access", + 20262, + update_every, + NETDATA_EBPF_MODULE_NAME_CACHESTAT); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION hits '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_cachestat_misses", + "Files out of page cache", + EBPF_CACHESTAT_DIMENSION_MISSES, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_cachestat_misses", + 20263, + update_every, + NETDATA_EBPF_MODULE_NAME_CACHESTAT); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION misses '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + w->charts_created |= 1<<EBPF_MODULE_CACHESTAT_IDX; + } em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } @@ -939,53 +993,42 @@ void ebpf_cache_send_apps_data(struct ebpf_target *root) struct ebpf_target *w; collected_number value; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_HIT_RATIO_CHART); for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - ebpf_cachestat_sum_pids(&w->cachestat, w->root_pid); - netdata_cachestat_pid_t *current = &w->cachestat.current; - netdata_cachestat_pid_t *prev = &w->cachestat.prev; - - uint64_t mpa = current->mark_page_accessed - prev->mark_page_accessed; - uint64_t mbd = current->mark_buffer_dirty - prev->mark_buffer_dirty; - w->cachestat.dirty = mbd; - uint64_t apcl = current->add_to_page_cache_lru - prev->add_to_page_cache_lru; - uint64_t apd = current->account_page_dirtied - prev->account_page_dirtied; - - cachestat_update_publish(&w->cachestat, mpa, mbd, apcl, apd); - value = (collected_number) w->cachestat.ratio; - // Here we are using different approach to have a chart more smooth - write_chart_dimension(w->name, value); - } - } - write_end_chart(); + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_CACHESTAT_IDX)))) + continue; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_DIRTY_CHART); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = (collected_number) w->cachestat.dirty; - write_chart_dimension(w->name, value); - } - } - write_end_chart(); + ebpf_cachestat_sum_pids(&w->cachestat, w->root_pid); + netdata_cachestat_pid_t *current = &w->cachestat.current; + netdata_cachestat_pid_t *prev = &w->cachestat.prev; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_HIT_CHART); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = (collected_number) w->cachestat.hit; - write_chart_dimension(w->name, value); - } - } - write_end_chart(); + uint64_t mpa = current->mark_page_accessed - prev->mark_page_accessed; + uint64_t mbd = current->mark_buffer_dirty - prev->mark_buffer_dirty; + w->cachestat.dirty = mbd; + uint64_t apcl = current->add_to_page_cache_lru - prev->add_to_page_cache_lru; + uint64_t apd = current->account_page_dirtied - prev->account_page_dirtied; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_MISSES_CHART); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = (collected_number) w->cachestat.miss; - write_chart_dimension(w->name, value); - } + cachestat_update_publish(&w->cachestat, mpa, mbd, apcl, apd); + + value = (collected_number) w->cachestat.ratio; + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_cachestat_hit_ratio"); + write_chart_dimension("ratio", value); + ebpf_write_end_chart(); + + value = (collected_number) w->cachestat.dirty; + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_cachestat_dirty_pages"); + write_chart_dimension("pages", value); + ebpf_write_end_chart(); + + value = (collected_number) w->cachestat.hit; + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_cachestat_access"); + write_chart_dimension("hits", value); + ebpf_write_end_chart(); + + value = (collected_number) w->cachestat.miss; + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_cachestat_misses"); + write_chart_dimension("misses", value); + ebpf_write_end_chart(); } - write_end_chart(); } /** @@ -1087,37 +1130,37 @@ static void ebpf_send_systemd_cachestat_charts() { ebpf_cgroup_target_t *ect; - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_RATIO_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_RATIO_CHART, ""); for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_cachestat.ratio); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_DIRTY_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_DIRTY_CHART, ""); for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_cachestat.dirty); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_CHART, ""); for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_cachestat.hit); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_MISSES_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_MISSES_CHART, ""); for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_cachestat.miss); } } - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -1127,21 +1170,21 @@ static void ebpf_send_systemd_cachestat_charts() */ static void ebpf_send_specific_cachestat_data(char *type, netdata_publish_cachestat_t *npc) { - write_begin_chart(type, NETDATA_CACHESTAT_HIT_RATIO_CHART); + ebpf_write_begin_chart(type, NETDATA_CACHESTAT_HIT_RATIO_CHART, ""); write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_RATIO].name, (long long)npc->ratio); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_CACHESTAT_DIRTY_CHART); + ebpf_write_begin_chart(type, NETDATA_CACHESTAT_DIRTY_CHART, ""); write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_DIRTY].name, (long long)npc->dirty); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_CACHESTAT_HIT_CHART); + ebpf_write_begin_chart(type, NETDATA_CACHESTAT_HIT_CHART, ""); write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_HIT].name, (long long)npc->hit); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_CACHESTAT_MISSES_CHART); + ebpf_write_begin_chart(type, NETDATA_CACHESTAT_MISSES_CHART, ""); write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_MISS].name, (long long)npc->miss); - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -1201,24 +1244,28 @@ static void ebpf_create_specific_cachestat_charts(char *type, int update_every) static void ebpf_obsolete_specific_cachestat_charts(char *type, int update_every) { ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_HIT_RATIO_CHART, + "", "Hit ratio", EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_CACHESTAT_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_HIT_RATIO_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5200, update_every); ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_DIRTY_CHART, + "", "Number of dirty pages", EBPF_CACHESTAT_DIMENSION_PAGE, NETDATA_CACHESTAT_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_MODIFIED_CACHE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5201, update_every); ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_HIT_CHART, + "", "Number of accessed files", EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_HIT_FILES_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5202, update_every); ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_MISSES_CHART, + "", "Files out of page cache", EBPF_CACHESTAT_DIMENSION_MISSES, NETDATA_CACHESTAT_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_MISS_FILES_CONTEXT, diff --git a/collectors/ebpf.plugin/ebpf_cgroup.c b/collectors/ebpf.plugin/ebpf_cgroup.c index b1e2c0746..1aadfbaf8 100644 --- a/collectors/ebpf.plugin/ebpf_cgroup.c +++ b/collectors/ebpf.plugin/ebpf_cgroup.c @@ -331,7 +331,7 @@ void ebpf_create_charts_on_systemd(char *id, char *title, char *units, char *fam char *algorithm, char *context, char *module, int update_every) { ebpf_cgroup_target_t *w; - ebpf_write_chart_cmd(NETDATA_SERVICE_FAMILY, id, title, units, family, charttype, context, + ebpf_write_chart_cmd(NETDATA_SERVICE_FAMILY, id, "", title, units, family, charttype, context, order, update_every, module); for (w = ebpf_cgroup_pids; w; w = w->next) { diff --git a/collectors/ebpf.plugin/ebpf_dcstat.c b/collectors/ebpf.plugin/ebpf_dcstat.c index 8c6f60133..4ff6c82ab 100644 --- a/collectors/ebpf.plugin/ebpf_dcstat.c +++ b/collectors/ebpf.plugin/ebpf_dcstat.c @@ -302,6 +302,7 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_DC_HIT_CHART, + "", "Percentage of files inside directory cache", EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -312,6 +313,7 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_DC_REFERENCE_CHART, + "", "Count file access", EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -322,6 +324,7 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "", "Files not present inside directory cache", EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -332,6 +335,7 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "", "Files not found", EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -372,45 +376,58 @@ static inline void ebpf_obsolete_dc_cgroup_charts(ebpf_module_t *em) { */ void ebpf_obsolete_dc_apps_charts(struct ebpf_module *em) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_DC_HIT_CHART, - "Percentage of files inside directory cache", - EBPF_COMMON_DIMENSION_PERCENTAGE, - NETDATA_DIRECTORY_CACHE_SUBMENU, - NETDATA_EBPF_CHART_TYPE_LINE, - NULL, - 20100, - em->update_every); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = apps_groups_root_target; w; w = w->next) { + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_DCSTAT_IDX)))) + continue; - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_DC_REFERENCE_CHART, - "Count file access", - EBPF_COMMON_DIMENSION_FILES, - NETDATA_DIRECTORY_CACHE_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20101, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_dc_hit", + "Percentage of files inside directory cache.", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + "app.ebpf_dc_hit", + 20265, + update_every); - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_DC_REQUEST_NOT_CACHE_CHART, - "Files not present inside directory cache", - EBPF_COMMON_DIMENSION_FILES, - NETDATA_DIRECTORY_CACHE_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20102, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_dc_reference", + "Count file access.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_dc_reference", + 20266, + update_every); - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_DC_REQUEST_NOT_FOUND_CHART, - "Files not found", - EBPF_COMMON_DIMENSION_FILES, - NETDATA_DIRECTORY_CACHE_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20103, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_not_cache", + "Files not present inside directory cache.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_dc_not_cache", + 20267, + update_every); + + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_not_found", + "Files not found.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_dc_not_found", + 20268, + update_every); + + w->charts_created &= ~(1<<EBPF_MODULE_DCSTAT_IDX); + } } /** @@ -424,6 +441,7 @@ static void ebpf_obsolete_dc_global(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_DC_HIT_CHART, + "", "Percentage of files inside directory cache", EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -434,6 +452,7 @@ static void ebpf_obsolete_dc_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_DC_REFERENCE_CHART, + "", "Variables used to calculate hit ratio.", EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -513,41 +532,74 @@ static void ebpf_dcstat_exit(void *ptr) void ebpf_dcstat_create_apps_charts(struct ebpf_module *em, void *ptr) { struct ebpf_target *root = ptr; - ebpf_create_charts_on_apps(NETDATA_DC_HIT_CHART, - "Percentage of files inside directory cache", - EBPF_COMMON_DIMENSION_PERCENTAGE, - NETDATA_DIRECTORY_CACHE_SUBMENU, - NETDATA_EBPF_CHART_TYPE_LINE, - 20100, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); - - ebpf_create_charts_on_apps(NETDATA_DC_REFERENCE_CHART, - "Count file access", - EBPF_COMMON_DIMENSION_FILES, - NETDATA_DIRECTORY_CACHE_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20101, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); - - ebpf_create_charts_on_apps(NETDATA_DC_REQUEST_NOT_CACHE_CHART, - "Files not present inside directory cache", - EBPF_COMMON_DIMENSION_FILES, - NETDATA_DIRECTORY_CACHE_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20102, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); - - ebpf_create_charts_on_apps(NETDATA_DC_REQUEST_NOT_FOUND_CHART, - "Files not found", - EBPF_COMMON_DIMENSION_FILES, - NETDATA_DIRECTORY_CACHE_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20103, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = root; w; w = w->next) { + if (unlikely(!w->exposed)) + continue; + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_dc_hit", + "Percentage of files inside directory cache.", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + "app.ebpf_dc_hit", + 20265, + update_every, + NETDATA_EBPF_MODULE_NAME_DCSTAT); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION ratio '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_dc_reference", + "Count file access.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_dc_reference", + 20266, + update_every, + NETDATA_EBPF_MODULE_NAME_DCSTAT); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION files '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_not_cache", + "Files not present inside directory cache.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_dc_not_cache", + 20267, + update_every, + NETDATA_EBPF_MODULE_NAME_DCSTAT); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION files '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_not_found", + "Files not found.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_dc_not_found", + 20268, + update_every, + NETDATA_EBPF_MODULE_NAME_DCSTAT); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION files '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + w->charts_created |= 1<<EBPF_MODULE_DCSTAT_IDX; + } em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } @@ -746,64 +798,53 @@ void ebpf_dcache_send_apps_data(struct ebpf_target *root) struct ebpf_target *w; collected_number value; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_HIT_CHART); for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - ebpf_dcstat_sum_pids(&w->dcstat, w->root_pid); + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_DCSTAT_IDX)))) + continue; - uint64_t cache = w->dcstat.curr.cache_access; - uint64_t not_found = w->dcstat.curr.not_found; + ebpf_dcstat_sum_pids(&w->dcstat, w->root_pid); - dcstat_update_publish(&w->dcstat, cache, not_found); - value = (collected_number) w->dcstat.ratio; - write_chart_dimension(w->name, value); - } - } - write_end_chart(); + uint64_t cache = w->dcstat.curr.cache_access; + uint64_t not_found = w->dcstat.curr.not_found; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_REFERENCE_CHART); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - if (w->dcstat.curr.cache_access < w->dcstat.prev.cache_access) { - w->dcstat.prev.cache_access = 0; - } + dcstat_update_publish(&w->dcstat, cache, not_found); + + value = (collected_number) w->dcstat.ratio; + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_dc_hit"); + write_chart_dimension("ratio", value); + ebpf_write_end_chart(); - w->dcstat.cache_access = (long long)w->dcstat.curr.cache_access - (long long)w->dcstat.prev.cache_access; - value = (collected_number) w->dcstat.cache_access; - write_chart_dimension(w->name, value); - w->dcstat.prev.cache_access = w->dcstat.curr.cache_access; + if (w->dcstat.curr.cache_access < w->dcstat.prev.cache_access) { + w->dcstat.prev.cache_access = 0; } - } - write_end_chart(); + w->dcstat.cache_access = (long long)w->dcstat.curr.cache_access - (long long)w->dcstat.prev.cache_access; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_REQUEST_NOT_CACHE_CHART); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - if (w->dcstat.curr.file_system < w->dcstat.prev.file_system) { - w->dcstat.prev.file_system = 0; - } + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_dc_reference"); + value = (collected_number) w->dcstat.cache_access; + write_chart_dimension("files", value); + ebpf_write_end_chart(); + w->dcstat.prev.cache_access = w->dcstat.curr.cache_access; - value = (collected_number) (!w->dcstat.cache_access) ? 0 : - (long long )w->dcstat.curr.file_system - (long long)w->dcstat.prev.file_system; - write_chart_dimension(w->name, value); - w->dcstat.prev.file_system = w->dcstat.curr.file_system; + if (w->dcstat.curr.file_system < w->dcstat.prev.file_system) { + w->dcstat.prev.file_system = 0; } - } - write_end_chart(); - - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_REQUEST_NOT_FOUND_CHART); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - if (w->dcstat.curr.not_found < w->dcstat.prev.not_found) { - w->dcstat.prev.not_found = 0; - } - value = (collected_number) (!w->dcstat.cache_access) ? 0 : - (long long)w->dcstat.curr.not_found - (long long)w->dcstat.prev.not_found; - write_chart_dimension(w->name, value); - w->dcstat.prev.not_found = w->dcstat.curr.not_found; + value = (collected_number) (!w->dcstat.cache_access) ? 0 : + (long long )w->dcstat.curr.file_system - (long long)w->dcstat.prev.file_system; + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_not_cache"); + write_chart_dimension("files", value); + ebpf_write_end_chart(); + w->dcstat.prev.file_system = w->dcstat.curr.file_system; + + if (w->dcstat.curr.not_found < w->dcstat.prev.not_found) { + w->dcstat.prev.not_found = 0; } + value = (collected_number) (!w->dcstat.cache_access) ? 0 : + (long long)w->dcstat.curr.not_found - (long long)w->dcstat.prev.not_found; + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_not_found"); + write_chart_dimension("files", value); + ebpf_write_end_chart(); + w->dcstat.prev.not_found = w->dcstat.curr.not_found; } - write_end_chart(); } /** @@ -898,24 +939,28 @@ static void ebpf_create_specific_dc_charts(char *type, int update_every) static void ebpf_obsolete_specific_dc_charts(char *type, int update_every) { ebpf_write_chart_obsolete(type, NETDATA_DC_HIT_CHART, + "", "Percentage of files inside directory cache", EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_HIT_RATIO_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5700, update_every); ebpf_write_chart_obsolete(type, NETDATA_DC_REFERENCE_CHART, + "", "Count file access", EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_REFERENCE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5701, update_every); ebpf_write_chart_obsolete(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "", "Files not present inside directory cache", EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_NOT_CACHE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5702, update_every); ebpf_write_chart_obsolete(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "", "Files not found", EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_NOT_FOUND_CONTEXT, @@ -1029,23 +1074,23 @@ static void ebpf_send_systemd_dc_charts() { collected_number value; ebpf_cgroup_target_t *ect; - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_HIT_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_HIT_CHART, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long) ect->publish_dc.ratio); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REFERENCE_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REFERENCE_CHART, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long) ect->publish_dc.cache_access); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_CACHE_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_CACHE_CHART, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { value = (collected_number) (!ect->publish_dc.cache_access) ? 0 : @@ -1055,9 +1100,9 @@ static void ebpf_send_systemd_dc_charts() write_chart_dimension(ect->name, (long long) value); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_FOUND_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_FOUND_CHART, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { value = (collected_number) (!ect->publish_dc.cache_access) ? 0 : @@ -1068,7 +1113,7 @@ static void ebpf_send_systemd_dc_charts() write_chart_dimension(ect->name, (long long) value); } } - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -1080,31 +1125,31 @@ static void ebpf_send_systemd_dc_charts() static void ebpf_send_specific_dc_data(char *type, netdata_publish_dcstat_t *pdc) { collected_number value; - write_begin_chart(type, NETDATA_DC_HIT_CHART); + ebpf_write_begin_chart(type, NETDATA_DC_HIT_CHART, ""); write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_RATIO].name, (long long) pdc->ratio); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_DC_REFERENCE_CHART); + ebpf_write_begin_chart(type, NETDATA_DC_REFERENCE_CHART, ""); write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE].name, (long long) pdc->cache_access); - write_end_chart(); + ebpf_write_end_chart(); value = (collected_number) (!pdc->cache_access) ? 0 : (long long )pdc->curr.file_system - (long long)pdc->prev.file_system; pdc->prev.file_system = pdc->curr.file_system; - write_begin_chart(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART); + ebpf_write_begin_chart(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART, ""); write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_SLOW].name, (long long) value); - write_end_chart(); + ebpf_write_end_chart(); value = (collected_number) (!pdc->cache_access) ? 0 : (long long)pdc->curr.not_found - (long long)pdc->prev.not_found; pdc->prev.not_found = pdc->curr.not_found; - write_begin_chart(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART); + ebpf_write_begin_chart(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART, ""); write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_MISS].name, (long long) value); - write_end_chart(); + ebpf_write_end_chart(); } /** diff --git a/collectors/ebpf.plugin/ebpf_dcstat.h b/collectors/ebpf.plugin/ebpf_dcstat.h index 845b65908..4d6aff12e 100644 --- a/collectors/ebpf.plugin/ebpf_dcstat.h +++ b/collectors/ebpf.plugin/ebpf_dcstat.h @@ -13,7 +13,7 @@ #define NETDATA_DC_REQUEST_NOT_CACHE_CHART "dc_not_cache" #define NETDATA_DC_REQUEST_NOT_FOUND_CHART "dc_not_found" -#define NETDATA_DIRECTORY_CACHE_SUBMENU "directory cache (eBPF)" +#define NETDATA_DIRECTORY_CACHE_SUBMENU "directory cache" // configuration file #define NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE "dcstat.conf" diff --git a/collectors/ebpf.plugin/ebpf_disk.c b/collectors/ebpf.plugin/ebpf_disk.c index 9dce8dd18..466c2e3bb 100644 --- a/collectors/ebpf.plugin/ebpf_disk.c +++ b/collectors/ebpf.plugin/ebpf_disk.c @@ -485,6 +485,7 @@ static void ebpf_obsolete_disk_global(ebpf_module_t *em) if (flags & NETDATA_DISK_CHART_CREATED) { ebpf_write_chart_obsolete(ned->histogram.name, ned->family, + "", "Disk latency", EBPF_COMMON_DIMENSION_CALL, ned->family, @@ -655,7 +656,7 @@ static void read_hard_disk_tables(int table, int maps_per_core) */ static void ebpf_obsolete_hd_charts(netdata_ebpf_disks_t *w, int update_every) { - ebpf_write_chart_obsolete(w->histogram.name, w->family, w->histogram.title, EBPF_COMMON_DIMENSION_CALL, + ebpf_write_chart_obsolete(w->histogram.name, w->family, "", w->histogram.title, EBPF_COMMON_DIMENSION_CALL, w->family, NETDATA_EBPF_CHART_TYPE_STACKED, "disk.latency_io", w->histogram.order, update_every); diff --git a/collectors/ebpf.plugin/ebpf_fd.c b/collectors/ebpf.plugin/ebpf_fd.c index 49c19ca77..3c8f30d3e 100644 --- a/collectors/ebpf.plugin/ebpf_fd.c +++ b/collectors/ebpf.plugin/ebpf_fd.c @@ -386,45 +386,49 @@ static void ebpf_obsolete_fd_services(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN, + "", "Number of open files", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, NETDATA_CGROUP_FD_OPEN_CONTEXT, - 20061, + 20270, em->update_every); if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, + "", "Fails to open files", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT, - 20062, + 20271, em->update_every); } ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSED, + "", "Files closed", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, NETDATA_CGROUP_FD_CLOSE_CONTEXT, - 20063, + 20272, em->update_every); if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, + "", "Fails to close files", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT, - 20064, + 20273, em->update_every); } } @@ -460,48 +464,60 @@ static inline void ebpf_obsolete_fd_cgroup_charts(ebpf_module_t *em) { */ void ebpf_obsolete_fd_apps_charts(struct ebpf_module *em) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_FILE_OPEN, - "Number of open files", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_FILE_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20061, - em->update_every); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = apps_groups_root_target; w; w = w->next) { + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_FD_IDX)))) + continue; - if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, - "Fails to open files", + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_file_open", + "Number of open files", EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_FILE_GROUP, + NETDATA_APPS_FILE_FDS, NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20062, - em->update_every); - } - - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_FILE_CLOSED, - "Files closed", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_FILE_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20063, - em->update_every); + "app.ebpf_file_open", + 20220, + update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_file_open_error", + "Fails to open files.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_FDS, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_file_open_error", + 20221, + update_every); + } - if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, - "Fails to close files", + w->clean_name, + "_ebpf_file_closed", + "Files closed.", EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_FILE_GROUP, + NETDATA_APPS_FILE_FDS, NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20064, - em->update_every); + "app.ebpf_file_closed", + 20222, + update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + w->clean_name, + "_ebpf_file_close_error", + "Fails to close files.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_FDS, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_fd_close_error", + 20223, + update_every); + } + w->charts_created &= ~(1<<EBPF_MODULE_FD_IDX); } } @@ -516,6 +532,7 @@ static void ebpf_obsolete_fd_global(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_FILE_OPEN_CLOSE_COUNT, + "", "Open and close calls", EBPF_COMMON_DIMENSION_CALL, NETDATA_FILE_GROUP, @@ -527,6 +544,7 @@ static void ebpf_obsolete_fd_global(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_FILE_OPEN_ERR_COUNT, + "", "Open fails", EBPF_COMMON_DIMENSION_CALL, NETDATA_FILE_GROUP, @@ -802,45 +820,30 @@ void ebpf_fd_send_apps_data(ebpf_module_t *em, struct ebpf_target *root) { struct ebpf_target *w; for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - ebpf_fd_sum_pids(&w->fd, w->root_pid); - } - } + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_FD_IDX)))) + continue; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->fd.open_call); - } - } - write_end_chart(); + ebpf_fd_sum_pids(&w->fd, w->root_pid); - if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->fd.open_err); - } - } - write_end_chart(); - } + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_file_open"); + write_chart_dimension("calls", w->fd.open_call); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSED); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->fd.close_call); + if (em->mode < MODE_ENTRY) { + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_file_open_error"); + write_chart_dimension("calls", w->fd.open_err); + ebpf_write_end_chart(); } - } - write_end_chart(); - if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->fd.close_err); - } + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_file_closed"); + write_chart_dimension("calls", w->fd.close_call); + ebpf_write_end_chart(); + + if (em->mode < MODE_ENTRY) { + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_file_close_error"); + write_chart_dimension("calls", w->fd.close_err); + ebpf_write_end_chart(); } - write_end_chart(); } } @@ -933,25 +936,25 @@ static void ebpf_create_specific_fd_charts(char *type, ebpf_module_t *em) */ static void ebpf_obsolete_specific_fd_charts(char *type, ebpf_module_t *em) { - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN, "Number of open files", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN, "", "Number of open files", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_OPEN_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5400, em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "Fails to open files", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "", "Fails to open files", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5401, em->update_every); } - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSED, "Files closed", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSED, "", "Files closed", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_CLOSE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5402, em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "Fails to close files", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "", "Fails to close files", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5403, em->update_every); @@ -968,24 +971,24 @@ static void ebpf_obsolete_specific_fd_charts(char *type, ebpf_module_t *em) */ static void ebpf_send_specific_fd_data(char *type, netdata_fd_stat_t *values, ebpf_module_t *em) { - write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN, ""); write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].name, (long long)values->open_call); - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, ""); write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].name, (long long)values->open_err); - write_end_chart(); + ebpf_write_end_chart(); } - write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSED); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSED, ""); write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].name, (long long)values->close_call); - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, ""); write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].name, (long long)values->close_err); - write_end_chart(); + ebpf_write_end_chart(); } } @@ -1037,40 +1040,40 @@ static void ebpf_create_systemd_fd_charts(ebpf_module_t *em) static void ebpf_send_systemd_fd_charts(ebpf_module_t *em) { ebpf_cgroup_target_t *ect; - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_fd.open_call); } } - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_fd.open_err); } } - write_end_chart(); + ebpf_write_end_chart(); } - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSED); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSED, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_fd.close_call); } } - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_fd.close_err); } } - write_end_chart(); + ebpf_write_end_chart(); } } @@ -1197,44 +1200,77 @@ static void fd_collector(ebpf_module_t *em) void ebpf_fd_create_apps_charts(struct ebpf_module *em, void *ptr) { struct ebpf_target *root = ptr; - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_OPEN, - "Number of open files", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_FILE_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20061, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_FD); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = root; w; w = w->next) { + if (unlikely(!w->exposed)) + continue; - if (em->mode < MODE_ENTRY) { - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, - "Fails to open files", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_FILE_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20062, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_FD); - } + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_file_open", + "Number of open files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_FDS, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_file_open", + 20220, + update_every, + NETDATA_EBPF_MODULE_NAME_FD); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_file_open_error", + "Fails to open files.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_FDS, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_file_open_error", + 20221, + update_every, + NETDATA_EBPF_MODULE_NAME_FD); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + } - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_CLOSED, - "Files closed", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_FILE_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20063, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_FD); + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_file_closed", + "Files closed.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_FDS, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_file_closed", + 20222, + update_every, + NETDATA_EBPF_MODULE_NAME_FD); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_file_close_error", + "Fails to close files.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_FDS, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_file_close_error", + 20223, + update_every, + NETDATA_EBPF_MODULE_NAME_FD); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + } - if (em->mode < MODE_ENTRY) { - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, - "Fails to close files", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_FILE_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20064, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_FD); + w->charts_created |= 1<<EBPF_MODULE_FD_IDX; } em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; diff --git a/collectors/ebpf.plugin/ebpf_filesystem.c b/collectors/ebpf.plugin/ebpf_filesystem.c index 6203edd9a..b78e65532 100644 --- a/collectors/ebpf.plugin/ebpf_filesystem.c +++ b/collectors/ebpf.plugin/ebpf_filesystem.c @@ -351,20 +351,22 @@ static void ebpf_obsolete_fs_charts(int update_every) flags &= ~NETDATA_FILESYSTEM_FLAG_CHART_CREATED; ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hread.name, + "", efp->hread.title, EBPF_COMMON_DIMENSION_CALL, efp->family_name, NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hread.order, update_every); ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hwrite.name, + "", efp->hwrite.title, EBPF_COMMON_DIMENSION_CALL, efp->family_name, NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hwrite.order, update_every); - ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, efp->hopen.title, + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, "", efp->hopen.title, EBPF_COMMON_DIMENSION_CALL, efp->family_name, NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hopen.order, update_every); - ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name, efp->hadditional.title, + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name,"", efp->hadditional.title, EBPF_COMMON_DIMENSION_CALL, efp->family_name, NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hadditional.order, update_every); @@ -390,9 +392,9 @@ static void ebpf_create_fs_charts(int update_every) ebpf_filesystem_partitions_t *efp = &localfs[i]; uint32_t flags = efp->flags; if (flags & NETDATA_FILESYSTEM_FLAG_HAS_PARTITION && !(flags & test)) { - snprintfz(title, 255, "%s latency for each read request.", efp->filesystem); - snprintfz(family, 63, "%s_latency", efp->family); - snprintfz(chart_name, 63, "%s_read_latency", efp->filesystem); + snprintfz(title, sizeof(title) - 1, "%s latency for each read request.", efp->filesystem); + snprintfz(family, sizeof(family) - 1, "%s_latency", efp->family); + snprintfz(chart_name, sizeof(chart_name) - 1, "%s_read_latency", efp->filesystem); efp->hread.name = strdupz(chart_name); efp->hread.title = strdupz(title); efp->hread.ctx = NULL; @@ -408,8 +410,8 @@ static void ebpf_create_fs_charts(int update_every) update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); order++; - snprintfz(title, 255, "%s latency for each write request.", efp->filesystem); - snprintfz(chart_name, 63, "%s_write_latency", efp->filesystem); + snprintfz(title, sizeof(title) - 1, "%s latency for each write request.", efp->filesystem); + snprintfz(chart_name, sizeof(chart_name) - 1, "%s_write_latency", efp->filesystem); efp->hwrite.name = strdupz(chart_name); efp->hwrite.title = strdupz(title); efp->hwrite.ctx = NULL; @@ -423,8 +425,8 @@ static void ebpf_create_fs_charts(int update_every) update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); order++; - snprintfz(title, 255, "%s latency for each open request.", efp->filesystem); - snprintfz(chart_name, 63, "%s_open_latency", efp->filesystem); + snprintfz(title, sizeof(title) - 1, "%s latency for each open request.", efp->filesystem); + snprintfz(chart_name, sizeof(chart_name) - 1, "%s_open_latency", efp->filesystem); efp->hopen.name = strdupz(chart_name); efp->hopen.title = strdupz(title); efp->hopen.ctx = NULL; @@ -439,9 +441,9 @@ static void ebpf_create_fs_charts(int update_every) order++; char *type = (efp->flags & NETDATA_FILESYSTEM_ATTR_CHARTS) ? "attribute" : "sync"; - snprintfz(title, 255, "%s latency for each %s request.", efp->filesystem, type); - snprintfz(chart_name, 63, "%s_%s_latency", efp->filesystem, type); - snprintfz(ctx, 63, "filesystem.%s_latency", type); + snprintfz(title, sizeof(title) - 1, "%s latency for each %s request.", efp->filesystem, type); + snprintfz(chart_name, sizeof(chart_name) - 1, "%s_%s_latency", efp->filesystem, type); + snprintfz(ctx, sizeof(ctx) - 1, "filesystem.%s_latency", type); efp->hadditional.name = strdupz(chart_name); efp->hadditional.title = strdupz(title); efp->hadditional.ctx = strdupz(ctx); @@ -671,6 +673,7 @@ static void ebpf_obsolete_filesystem_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hread.name, + "", efp->hread.title, EBPF_COMMON_DIMENSION_CALL, efp->family_name, @@ -681,6 +684,7 @@ static void ebpf_obsolete_filesystem_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hwrite.name, + "", efp->hwrite.title, EBPF_COMMON_DIMENSION_CALL, efp->family_name, @@ -691,6 +695,7 @@ static void ebpf_obsolete_filesystem_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, + "", efp->hopen.title, EBPF_COMMON_DIMENSION_CALL, efp->family_name, @@ -701,6 +706,7 @@ static void ebpf_obsolete_filesystem_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name, + "", efp->hadditional.title, EBPF_COMMON_DIMENSION_CALL, efp->family_name, diff --git a/collectors/ebpf.plugin/ebpf_functions.c b/collectors/ebpf.plugin/ebpf_functions.c index ebee66395..6a481ad64 100644 --- a/collectors/ebpf.plugin/ebpf_functions.c +++ b/collectors/ebpf.plugin/ebpf_functions.c @@ -160,7 +160,7 @@ static void ebpf_function_thread_manipulation(const char *transaction, lem = ebpf_functions_select_module(thread_name); if (!lem) { - snprintfz(message, 511, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name); + snprintfz(message, sizeof(message) - 1, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name); ebpf_function_error(transaction, HTTP_RESP_NOT_FOUND, message); return; } @@ -189,7 +189,7 @@ static void ebpf_function_thread_manipulation(const char *transaction, const char *name = &keyword[sizeof(EBPF_THREADS_DISABLE_CATEGORY) - 1]; lem = ebpf_functions_select_module(name); if (!lem) { - snprintfz(message, 511, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name); + snprintfz(message, sizeof(message) - 1, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name); ebpf_function_error(transaction, HTTP_RESP_NOT_FOUND, message); return; } @@ -205,7 +205,7 @@ static void ebpf_function_thread_manipulation(const char *transaction, const char *name = &keyword[sizeof(EBPF_THREADS_SELECT_THREAD) - 1]; lem = ebpf_functions_select_module(name); if (!lem) { - snprintfz(message, 511, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name); + snprintfz(message, sizeof(message) - 1, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name); ebpf_function_error(transaction, HTTP_RESP_NOT_FOUND, message); return; } diff --git a/collectors/ebpf.plugin/ebpf_hardirq.c b/collectors/ebpf.plugin/ebpf_hardirq.c index 707d92577..465ee6434 100644 --- a/collectors/ebpf.plugin/ebpf_hardirq.c +++ b/collectors/ebpf.plugin/ebpf_hardirq.c @@ -226,6 +226,7 @@ static void ebpf_obsolete_hardirq_global(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, "hardirq_latency", + "", "Hardware IRQ latency", EBPF_COMMON_DIMENSION_MILLISECONDS, "interrupts", @@ -593,10 +594,10 @@ static void hardirq_collector(ebpf_module_t *em) pthread_mutex_lock(&lock); // write dims now for all hitherto discovered IRQs. - write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "hardirq_latency"); + ebpf_write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "hardirq_latency", ""); avl_traverse_lock(&hardirq_pub, hardirq_write_dims, NULL); hardirq_write_static_dims(); - write_end_chart(); + ebpf_write_end_chart(); pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_mdflush.c b/collectors/ebpf.plugin/ebpf_mdflush.c index c0adf2ea4..fe33ff6a4 100644 --- a/collectors/ebpf.plugin/ebpf_mdflush.c +++ b/collectors/ebpf.plugin/ebpf_mdflush.c @@ -140,6 +140,7 @@ static void ebpf_obsolete_mdflush_global(ebpf_module_t *em) { ebpf_write_chart_obsolete("mdstat", "mdstat_flush", + "", "MD flushes", "flushes", "flush (eBPF)", @@ -355,9 +356,9 @@ static void mdflush_collector(ebpf_module_t *em) mdflush_read_count_map(maps_per_core); pthread_mutex_lock(&lock); // write dims now for all hitherto discovered devices. - write_begin_chart("mdstat", "mdstat_flush"); + ebpf_write_begin_chart("mdstat", "mdstat_flush", ""); avl_traverse_lock(&mdflush_pub, mdflush_write_dims, NULL); - write_end_chart(); + ebpf_write_end_chart(); pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_mount.c b/collectors/ebpf.plugin/ebpf_mount.c index 473036bd7..05c76540a 100644 --- a/collectors/ebpf.plugin/ebpf_mount.c +++ b/collectors/ebpf.plugin/ebpf_mount.c @@ -233,6 +233,7 @@ static void ebpf_obsolete_mount_global(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, NETDATA_EBPF_MOUNT_CALLS, + "", "Calls to mount and umount syscalls", EBPF_COMMON_DIMENSION_CALL, NETDATA_EBPF_MOUNT_FAMILY, @@ -243,6 +244,7 @@ static void ebpf_obsolete_mount_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, NETDATA_EBPF_MOUNT_ERRORS, + "", "Errors to mount and umount file systems", EBPF_COMMON_DIMENSION_CALL, NETDATA_EBPF_MOUNT_FAMILY, diff --git a/collectors/ebpf.plugin/ebpf_oomkill.c b/collectors/ebpf.plugin/ebpf_oomkill.c index 16ce0bddf..2c34650c3 100644 --- a/collectors/ebpf.plugin/ebpf_oomkill.c +++ b/collectors/ebpf.plugin/ebpf_oomkill.c @@ -57,6 +57,7 @@ static void ebpf_obsolete_oomkill_services(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_OOMKILL_CHART, + "", "OOM kills. This chart is provided by eBPF plugin.", EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP, @@ -98,15 +99,25 @@ static inline void ebpf_obsolete_oomkill_cgroup_charts(ebpf_module_t *em) */ static void ebpf_obsolete_oomkill_apps(ebpf_module_t *em) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_OOMKILL_CHART, - "OOM kills", - EBPF_COMMON_DIMENSION_KILLS, - "mem", - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20020, - em->update_every); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = apps_groups_root_target; w; w = w->next) { + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_OOMKILL_IDX)))) + continue; + + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_app_oomkill", + "OOM kills.", + EBPF_COMMON_DIMENSION_KILLS, + NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "ebpf.app_oomkill", + 20020, + update_every); + + w->charts_created &= ~(1<<EBPF_MODULE_OOMKILL_IDX); + } } /** @@ -150,8 +161,11 @@ static void oomkill_write_data(int32_t *keys, uint32_t total) // for each app, see if it was OOM killed. record as 1 if so otherwise 0. struct ebpf_target *w; for (w = apps_groups_root_target; w != NULL; w = w->next) { - if (likely(w->exposed && w->processes)) { - bool was_oomkilled = false; + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_OOMKILL_IDX)))) + continue; + + bool was_oomkilled = false; + if (total) { struct ebpf_pid_on_target *pids = w->root_pid; while (pids) { uint32_t j; @@ -165,10 +179,11 @@ static void oomkill_write_data(int32_t *keys, uint32_t total) } pids = pids->next; } - - write_dim:; - write_chart_dimension(w->name, was_oomkilled); } +write_dim: + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_oomkill"); + write_chart_dimension(EBPF_COMMON_DIMENSION_KILLS, was_oomkilled); + ebpf_write_end_chart(); } // for any remaining keys for which we couldn't find a group, this could be @@ -231,14 +246,14 @@ static void ebpf_create_systemd_oomkill_charts(int update_every) static void ebpf_send_systemd_oomkill_charts() { ebpf_cgroup_target_t *ect; - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_OOMKILL_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_OOMKILL_CHART, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long) ect->oomkill); ect->oomkill = 0; } } - write_end_chart(); + ebpf_write_end_chart(); } /* @@ -251,9 +266,9 @@ static void ebpf_send_systemd_oomkill_charts() */ static void ebpf_send_specific_oomkill_data(char *type, int value) { - write_begin_chart(type, NETDATA_OOMKILL_CHART); + ebpf_write_begin_chart(type, NETDATA_OOMKILL_CHART, ""); write_chart_dimension(oomkill_publish_aggregated.name, (long long)value); - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -266,7 +281,7 @@ static void ebpf_send_specific_oomkill_data(char *type, int value) */ static void ebpf_obsolete_specific_oomkill_charts(char *type, int update_every) { - ebpf_write_chart_obsolete(type, NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.", + ebpf_write_chart_obsolete(type, NETDATA_OOMKILL_CHART, "", "OOM kills. This chart is provided by eBPF plugin.", EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_OOMKILLS_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5600, update_every); @@ -430,7 +445,6 @@ static void oomkill_collector(ebpf_module_t *em) uint32_t count = oomkill_read_data(keys); if (!count) { running_time = ebpf_update_oomkill_period(running_time, em); - continue; } stats[NETDATA_CONTROLLER_PID_TABLE_ADD] += (uint64_t) count; @@ -438,16 +452,14 @@ static void oomkill_collector(ebpf_module_t *em) pthread_mutex_lock(&collect_data_mutex); pthread_mutex_lock(&lock); - if (cgroups) { + if (cgroups && count) { ebpf_update_oomkill_cgroup(keys, count); // write everything from the ebpf map. ebpf_oomkill_send_cgroup_data(update_every); } if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_OOMKILL_CHART); oomkill_write_data(keys, count); - write_end_chart(); } pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); @@ -466,14 +478,29 @@ static void oomkill_collector(ebpf_module_t *em) void ebpf_oomkill_create_apps_charts(struct ebpf_module *em, void *ptr) { struct ebpf_target *root = ptr; - ebpf_create_charts_on_apps(NETDATA_OOMKILL_CHART, - "OOM kills", - EBPF_COMMON_DIMENSION_KILLS, - "mem", - NETDATA_EBPF_CHART_TYPE_STACKED, - 20020, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_OOMKILL); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = root; w; w = w->next) { + if (unlikely(!w->exposed)) + continue; + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_oomkill", + "OOM kills.", + EBPF_COMMON_DIMENSION_KILLS, + NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_oomkill", + 20072, + update_every, + NETDATA_EBPF_MODULE_NAME_OOMKILL); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION kills '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + w->charts_created |= 1<<EBPF_MODULE_OOMKILL_IDX; + } em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } diff --git a/collectors/ebpf.plugin/ebpf_process.c b/collectors/ebpf.plugin/ebpf_process.c index 577044e59..e3e2b884e 100644 --- a/collectors/ebpf.plugin/ebpf_process.c +++ b/collectors/ebpf.plugin/ebpf_process.c @@ -116,12 +116,12 @@ static void ebpf_update_global_publish(netdata_publish_syscall_t *publish, netda */ static void write_status_chart(char *family, netdata_publish_vfs_common_t *pvc) { - write_begin_chart(family, NETDATA_PROCESS_STATUS_NAME); + ebpf_write_begin_chart(family, NETDATA_PROCESS_STATUS_NAME, ""); write_chart_dimension(status[0], (long long)pvc->running); write_chart_dimension(status[1], (long long)pvc->zombie); - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -201,56 +201,43 @@ void ebpf_process_remove_pids() void ebpf_process_send_apps_data(struct ebpf_target *root, ebpf_module_t *em) { struct ebpf_target *w; - collected_number value; + // This algorithm is improved in https://github.com/netdata/netdata/pull/16030 + collected_number values[5]; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS); for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, create_process)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); - - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, create_thread)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_PROCESS_IDX)))) + continue; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_EXIT); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, + values[0] = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, create_process)); + values[1] = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, create_thread)); + values[2] = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, exit_call)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); - - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_CLOSE); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, + values[3] = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, release_call)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); + values[4] = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, + task_err)); - if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_ERROR); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, - task_err)); - write_chart_dimension(w->name, value); - } + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_process_start"); + write_chart_dimension("calls", values[0]); + ebpf_write_end_chart(); + + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_thread_start"); + write_chart_dimension("calls", values[1]); + ebpf_write_end_chart(); + + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_task_exit"); + write_chart_dimension("calls", values[2]); + ebpf_write_end_chart(); + + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_task_released"); + write_chart_dimension("calls", values[3]); + ebpf_write_end_chart(); + + if (em->mode < MODE_ENTRY) { + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_task_error"); + write_chart_dimension("calls", values[4]); + ebpf_write_end_chart(); } - write_end_chart(); } ebpf_process_remove_pids(); @@ -433,52 +420,89 @@ static void ebpf_create_global_charts(ebpf_module_t *em) void ebpf_process_create_apps_charts(struct ebpf_module *em, void *ptr) { struct ebpf_target *root = ptr; - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_PROCESS, - "Process started", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_PROCESS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20065, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_THREAD, - "Threads started", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_PROCESS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20066, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_EXIT, - "Tasks starts exit process.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_PROCESS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20067, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_CLOSE, - "Tasks closed", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_PROCESS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20068, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = root; w; w = w->next) { + if (unlikely(!w->exposed)) + continue; - if (em->mode < MODE_ENTRY) { - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_ERROR, - "Errors to create process or threads.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_PROCESS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20069, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root, - em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_process_start", + "Process started.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_process_start", + 20161, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_thread_start", + "Threads started.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_thread_start", + 20162, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_task_exit", + "Tasks starts exit process.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_task_exit", + 20163, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_task_released", + "Tasks released.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_task_released", + 20164, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_task_error", + "Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_task_error", + 20165, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + } + w->charts_created |= 1<<EBPF_MODULE_PROCESS_IDX; } em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; @@ -503,6 +527,7 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS, + "", "Process started", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, @@ -513,6 +538,7 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD, + "", "Threads started", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, @@ -523,6 +549,7 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_CLOSE, + "", "Tasks starts exit process.", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, @@ -533,6 +560,7 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_EXIT, + "", "Tasks closed", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, @@ -544,6 +572,7 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_ERROR, + "", "Errors to create process or threads.", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, @@ -585,56 +614,70 @@ static inline void ebpf_obsolete_process_cgroup_charts(ebpf_module_t *em) { */ void ebpf_obsolete_process_apps_charts(struct ebpf_module *em) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_TASK_PROCESS, - "Process started", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_PROCESS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20065, - em->update_every); - - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_TASK_THREAD, - "Threads started", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_PROCESS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20066, - em->update_every); - - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_TASK_EXIT, - "Tasks starts exit process.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_PROCESS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20067, - em->update_every); - - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_TASK_CLOSE, - "Tasks closed", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_PROCESS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20068, - em->update_every); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = apps_groups_root_target; w; w = w->next) { + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_PROCESS_IDX)))) + continue; - if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_TASK_ERROR, - "Errors to create process or threads.", + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_process_start", + "Process started.", EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20069, - em->update_every); + "app.ebpf_process_start", + 20161, + update_every); + + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_thread_start", + "Threads started.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_thread_start", + 20162, + update_every); + + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_task_exit", + "Tasks starts exit process.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_task_exit", + 20163, + update_every); + + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_task_released", + "Tasks released.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_task_released", + 20164, + update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_task_error", + "Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_task_error", + 20165, + update_every); + } + + w->charts_created &= ~(1<<EBPF_MODULE_PROCESS_IDX); } } @@ -649,6 +692,7 @@ static void ebpf_obsolete_process_global(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_PROCESS_SYSCALL, + "", "Start process", EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, @@ -659,6 +703,7 @@ static void ebpf_obsolete_process_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_EXIT_SYSCALL, + "", "Exit process", EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, @@ -669,6 +714,7 @@ static void ebpf_obsolete_process_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_PROCESS_STATUS_NAME, + "", "Process not closed", EBPF_COMMON_DIMENSION_DIFFERENCE, NETDATA_PROCESS_GROUP, @@ -680,6 +726,7 @@ static void ebpf_obsolete_process_global(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_PROCESS_ERROR_NAME, + "", "Fails to create process", EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, @@ -819,31 +866,31 @@ static void ebpf_process_sum_cgroup_pids(ebpf_process_stat_t *ps, struct pid_on_ */ static void ebpf_send_specific_process_data(char *type, ebpf_process_stat_t *values, ebpf_module_t *em) { - write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_PROCESS); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, ""); write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK].name, (long long) values->create_process); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_THREAD); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_THREAD, ""); write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_CLONE].name, (long long) values->create_thread); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_EXIT); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_EXIT, ""); write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT].name, (long long) values->release_call); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_CLOSE); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_CLOSE, ""); write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].name, (long long) values->release_call); - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_ERROR); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_ERROR, ""); write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT].name, (long long) values->task_err); - write_end_chart(); + ebpf_write_end_chart(); } } @@ -909,28 +956,28 @@ static void ebpf_create_specific_process_charts(char *type, ebpf_module_t *em) */ static void ebpf_obsolete_specific_process_charts(char *type, ebpf_module_t *em) { - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, "Process started", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, "", "Process started", EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_PROCESS_CREATE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5000, em->update_every); - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_THREAD, "Threads started", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_THREAD, "", "Threads started", EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_THREAD_CREATE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5001, em->update_every); - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_EXIT,"Tasks starts exit process.", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_EXIT, "","Tasks starts exit process.", EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_PROCESS_EXIT_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5002, em->update_every); - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_CLOSE,"Tasks closed", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_CLOSE, "","Tasks closed", EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_PROCESS_CLOSE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5003, em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_ERROR,"Errors to create process or threads.", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_ERROR, "","Errors to create process or threads.", EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_PROCESS_ERROR_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5004, em->update_every); @@ -989,46 +1036,46 @@ static void ebpf_create_systemd_process_charts(ebpf_module_t *em) static void ebpf_send_systemd_process_charts(ebpf_module_t *em) { ebpf_cgroup_target_t *ect; - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_ps.create_process); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_ps.create_thread); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_EXIT); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_EXIT, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_ps.exit_call); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_CLOSE); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_CLOSE, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_ps.release_call); } } - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_ERROR); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_ERROR, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_ps.task_err); } } - write_end_chart(); + ebpf_write_end_chart(); } } diff --git a/collectors/ebpf.plugin/ebpf_shm.c b/collectors/ebpf.plugin/ebpf_shm.c index c171762b6..f14eb67d0 100644 --- a/collectors/ebpf.plugin/ebpf_shm.c +++ b/collectors/ebpf.plugin/ebpf_shm.c @@ -305,7 +305,8 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SHMGET_CHART, - "Calls to syscall <code>shmget(2)</code>.", + "", + "Calls to syscall shmget(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, @@ -315,7 +316,8 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SHMAT_CHART, - "Calls to syscall <code>shmat(2)</code>.", + "", + "Calls to syscall shmat(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, @@ -325,7 +327,8 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SHMDT_CHART, - "Calls to syscall <code>shmdt(2)</code>.", + "", + "Calls to syscall shmdt(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, @@ -335,7 +338,8 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SHMCTL_CHART, - "Calls to syscall <code>shmctl(2)</code>.", + "", + "Calls to syscall shmctl(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, @@ -375,45 +379,58 @@ static inline void ebpf_obsolete_shm_cgroup_charts(ebpf_module_t *em) { */ void ebpf_obsolete_shm_apps_charts(struct ebpf_module *em) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SHMGET_CHART, - "Calls to syscall <code>shmget(2)</code>.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_IPC_SHM_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20191, - em->update_every); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = apps_groups_root_target; w; w = w->next) { + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_SHM_IDX)))) + continue; - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SHMAT_CHART, - "Calls to syscall <code>shmat(2)</code>.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_IPC_SHM_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20192, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_shmget_call", + "Calls to syscall shmget(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_shmget_call", + 20191, + update_every); - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SHMDT_CHART, - "Calls to syscall <code>shmdt(2)</code>.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_IPC_SHM_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20193, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_shmat_call", + "Calls to syscall shmat(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_shmat_call", + 20192, + update_every); - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SHMCTL_CHART, - "Calls to syscall <code>shmctl(2)</code>.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_IPC_SHM_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20194, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_shmdt_call", + "Calls to syscall shmdt(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_shmdt_call", + 20193, + update_every); + + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_shmctl_call", + "Calls to syscall shmctl(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_shmctl_call", + 20194, + update_every); + + w->charts_created &= ~(1<<EBPF_MODULE_SHM_IDX); + } } /** @@ -427,6 +444,7 @@ static void ebpf_obsolete_shm_global(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_SHM_GLOBAL_CHART, + "", "Calls to shared memory system calls", EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_IPC_SHM_SUBMENU, @@ -621,7 +639,7 @@ static void read_shm_apps_table(int maps_per_core) */ static void shm_send_global() { - write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_SHM_GLOBAL_CHART); + ebpf_write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_SHM_GLOBAL_CHART, ""); write_chart_dimension( shm_publish_aggregated[NETDATA_KEY_SHMGET_CALL].dimension, (long long) shm_hash_values[NETDATA_KEY_SHMGET_CALL] @@ -638,7 +656,7 @@ static void shm_send_global() shm_publish_aggregated[NETDATA_KEY_SHMCTL_CALL].dimension, (long long) shm_hash_values[NETDATA_KEY_SHMCTL_CALL] ); - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -699,42 +717,27 @@ void ebpf_shm_send_apps_data(struct ebpf_target *root) { struct ebpf_target *w; for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - ebpf_shm_sum_pids(&w->shm, w->root_pid); - } - } + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_SHM_IDX)))) + continue; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMGET_CHART); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, (long long) w->shm.get); - } - } - write_end_chart(); + ebpf_shm_sum_pids(&w->shm, w->root_pid); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMAT_CHART); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, (long long) w->shm.at); - } - } - write_end_chart(); + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_shmget_call"); + write_chart_dimension("calls", (long long) w->shm.get); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMDT_CHART); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, (long long) w->shm.dt); - } - } - write_end_chart(); + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_shmat_call"); + write_chart_dimension("calls", (long long) w->shm.at); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMCTL_CHART); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, (long long) w->shm.ctl); - } + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_shmdt_call"); + write_chart_dimension("calls", (long long) w->shm.dt); + ebpf_write_end_chart(); + + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_shmctl_call"); + write_chart_dimension("calls", (long long) w->shm.ctl); + ebpf_write_end_chart(); } - write_end_chart(); } /** @@ -768,7 +771,7 @@ static void ebpf_shm_sum_cgroup_pids(netdata_publish_shm_t *shm, struct pid_on_t static void ebpf_create_specific_shm_charts(char *type, int update_every) { ebpf_create_chart(type, NETDATA_SHMGET_CHART, - "Calls to syscall <code>shmget(2)</code>.", + "Calls to syscall shmget(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_CGROUP_SHM_GET_CONTEXT, @@ -781,7 +784,7 @@ static void ebpf_create_specific_shm_charts(char *type, int update_every) NETDATA_EBPF_MODULE_NAME_SHM); ebpf_create_chart(type, NETDATA_SHMAT_CHART, - "Calls to syscall <code>shmat(2)</code>.", + "Calls to syscall shmat(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_CGROUP_SHM_AT_CONTEXT, @@ -794,7 +797,7 @@ static void ebpf_create_specific_shm_charts(char *type, int update_every) NETDATA_EBPF_MODULE_NAME_SHM); ebpf_create_chart(type, NETDATA_SHMDT_CHART, - "Calls to syscall <code>shmdt(2)</code>.", + "Calls to syscall shmdt(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_CGROUP_SHM_DT_CONTEXT, @@ -807,7 +810,7 @@ static void ebpf_create_specific_shm_charts(char *type, int update_every) NETDATA_EBPF_MODULE_NAME_SHM); ebpf_create_chart(type, NETDATA_SHMCTL_CHART, - "Calls to syscall <code>shmctl(2)</code>.", + "Calls to syscall shmctl(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_CGROUP_SHM_CTL_CONTEXT, @@ -831,28 +834,32 @@ static void ebpf_create_specific_shm_charts(char *type, int update_every) static void ebpf_obsolete_specific_shm_charts(char *type, int update_every) { ebpf_write_chart_obsolete(type, NETDATA_SHMGET_CHART, - "Calls to syscall <code>shmget(2)</code>.", + "", + "Calls to syscall shmget(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_GET_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5800, update_every); ebpf_write_chart_obsolete(type, NETDATA_SHMAT_CHART, - "Calls to syscall <code>shmat(2)</code>.", + "", + "Calls to syscall shmat(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_AT_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5801, update_every); ebpf_write_chart_obsolete(type, NETDATA_SHMDT_CHART, - "Calls to syscall <code>shmdt(2)</code>.", + "", + "Calls to syscall shmdt(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_DT_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5802, update_every); ebpf_write_chart_obsolete(type, NETDATA_SHMCTL_CHART, - "Calls to syscall <code>shmctl(2)</code>.", + "", + "Calls to syscall shmctl(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_CTL_CONTEXT, @@ -869,7 +876,7 @@ static void ebpf_obsolete_specific_shm_charts(char *type, int update_every) static void ebpf_create_systemd_shm_charts(int update_every) { ebpf_create_charts_on_systemd(NETDATA_SHMGET_CHART, - "Calls to syscall <code>shmget(2)</code>.", + "Calls to syscall shmget(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, @@ -878,7 +885,7 @@ static void ebpf_create_systemd_shm_charts(int update_every) NETDATA_SYSTEMD_SHM_GET_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every); ebpf_create_charts_on_systemd(NETDATA_SHMAT_CHART, - "Calls to syscall <code>shmat(2)</code>.", + "Calls to syscall shmat(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, @@ -887,7 +894,7 @@ static void ebpf_create_systemd_shm_charts(int update_every) NETDATA_SYSTEMD_SHM_AT_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every); ebpf_create_charts_on_systemd(NETDATA_SHMDT_CHART, - "Calls to syscall <code>shmdt(2)</code>.", + "Calls to syscall shmdt(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, @@ -896,7 +903,7 @@ static void ebpf_create_systemd_shm_charts(int update_every) NETDATA_SYSTEMD_SHM_DT_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every); ebpf_create_charts_on_systemd(NETDATA_SHMCTL_CHART, - "Calls to syscall <code>shmctl(2)</code>.", + "Calls to syscall shmctl(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_IPC_SHM_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, @@ -913,37 +920,37 @@ static void ebpf_create_systemd_shm_charts(int update_every) static void ebpf_send_systemd_shm_charts() { ebpf_cgroup_target_t *ect; - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMGET_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMGET_CHART, ""); for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_shm.get); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMAT_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMAT_CHART, ""); for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_shm.at); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMDT_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMDT_CHART, ""); for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_shm.dt); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMCTL_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMCTL_CHART, ""); for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_shm.ctl); } } - write_end_chart(); + ebpf_write_end_chart(); } /* @@ -956,21 +963,21 @@ static void ebpf_send_systemd_shm_charts() */ static void ebpf_send_specific_shm_data(char *type, netdata_publish_shm_t *values) { - write_begin_chart(type, NETDATA_SHMGET_CHART); + ebpf_write_begin_chart(type, NETDATA_SHMGET_CHART, ""); write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMGET_CALL].name, (long long)values->get); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_SHMAT_CHART); + ebpf_write_begin_chart(type, NETDATA_SHMAT_CHART, ""); write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMAT_CALL].name, (long long)values->at); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_SHMDT_CHART); + ebpf_write_begin_chart(type, NETDATA_SHMDT_CHART, ""); write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMDT_CALL].name, (long long)values->dt); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_SHMCTL_CHART); + ebpf_write_begin_chart(type, NETDATA_SHMCTL_CHART, ""); write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMCTL_CALL].name, (long long)values->ctl); - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -1097,41 +1104,74 @@ static void shm_collector(ebpf_module_t *em) void ebpf_shm_create_apps_charts(struct ebpf_module *em, void *ptr) { struct ebpf_target *root = ptr; - ebpf_create_charts_on_apps(NETDATA_SHMGET_CHART, - "Calls to syscall <code>shmget(2)</code>.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_IPC_SHM_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20191, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM); - - ebpf_create_charts_on_apps(NETDATA_SHMAT_CHART, - "Calls to syscall <code>shmat(2)</code>.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_IPC_SHM_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20192, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM); - - ebpf_create_charts_on_apps(NETDATA_SHMDT_CHART, - "Calls to syscall <code>shmdt(2)</code>.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_IPC_SHM_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20193, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM); - - ebpf_create_charts_on_apps(NETDATA_SHMCTL_CHART, - "Calls to syscall <code>shmctl(2)</code>.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_IPC_SHM_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20194, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = root; w; w = w->next) { + if (unlikely(!w->exposed)) + continue; + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_shmget_call", + "Calls to syscall shmget(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_shmget_call", + 20191, + update_every, + NETDATA_EBPF_MODULE_NAME_SHM); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_shmat_call", + "Calls to syscall shmat(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_shmat_call", + 20192, + update_every, + NETDATA_EBPF_MODULE_NAME_SHM); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_shmdt_call", + "Calls to syscall shmdt(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_shmdt_call", + 20193, + update_every, + NETDATA_EBPF_MODULE_NAME_SHM); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_shmctl_call", + "Calls to syscall shmctl(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_shmctl_call", + 20194, + update_every, + NETDATA_EBPF_MODULE_NAME_SHM); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + w->charts_created |= 1<<EBPF_MODULE_SHM_IDX; + } em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } diff --git a/collectors/ebpf.plugin/ebpf_socket.c b/collectors/ebpf.plugin/ebpf_socket.c index 3e3897551..bbb5dca1b 100644 --- a/collectors/ebpf.plugin/ebpf_socket.c +++ b/collectors/ebpf.plugin/ebpf_socket.c @@ -75,6 +75,7 @@ static netdata_publish_syscall_t socket_publish_aggregated[NETDATA_MAX_SOCKET_VE netdata_socket_t *socket_values; ebpf_network_viewer_port_list_t *listen_ports = NULL; +ebpf_addresses_t tcp_v6_connect_address = {.function = "tcp_v6_connect", .hash = 0, .addr = 0, .type = 0}; struct config socket_config = { .first_section = NULL, .last_section = NULL, @@ -177,11 +178,13 @@ static void ebpf_set_trampoline_target(struct socket_bpf *obj) bpf_program__set_attach_target(obj->progs.netdata_tcp_v4_connect_fexit, 0, socket_targets[NETDATA_FCNT_TCP_V4_CONNECT].name); - bpf_program__set_attach_target(obj->progs.netdata_tcp_v6_connect_fentry, 0, - socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name); + if (tcp_v6_connect_address.type == 'T') { + bpf_program__set_attach_target( + obj->progs.netdata_tcp_v6_connect_fentry, 0, socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name); - bpf_program__set_attach_target(obj->progs.netdata_tcp_v6_connect_fexit, 0, + bpf_program__set_attach_target(obj->progs.netdata_tcp_v6_connect_fexit, 0, socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name); + } bpf_program__set_attach_target(obj->progs.netdata_tcp_retransmit_skb_fentry, 0, socket_targets[NETDATA_FCNT_TCP_RETRANSMIT].name); @@ -332,12 +335,13 @@ static long ebpf_socket_attach_probes(struct socket_bpf *obj, netdata_run_mode_t if (ret) return -1; - obj->links.netdata_tcp_v6_connect_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_v6_connect_kretprobe, - true, - socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name); - ret = libbpf_get_error(obj->links.netdata_tcp_v6_connect_kretprobe); - if (ret) - return -1; + if (tcp_v6_connect_address.type == 'T') { + obj->links.netdata_tcp_v6_connect_kretprobe = bpf_program__attach_kprobe( + obj->progs.netdata_tcp_v6_connect_kretprobe, true, socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name); + ret = libbpf_get_error(obj->links.netdata_tcp_v6_connect_kretprobe); + if (ret) + return -1; + } } else { obj->links.netdata_tcp_sendmsg_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_sendmsg_kprobe, false, @@ -360,12 +364,14 @@ static long ebpf_socket_attach_probes(struct socket_bpf *obj, netdata_run_mode_t if (ret) return -1; - obj->links.netdata_tcp_v6_connect_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_v6_connect_kprobe, - false, - socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name); - ret = libbpf_get_error(obj->links.netdata_tcp_v6_connect_kprobe); - if (ret) - return -1; + if (tcp_v6_connect_address.type == 'T') { + obj->links.netdata_tcp_v6_connect_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_v6_connect_kprobe, + false, + socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name); + ret = libbpf_get_error(obj->links.netdata_tcp_v6_connect_kprobe); + if (ret) + return -1; + } } return 0; @@ -411,6 +417,17 @@ static void ebpf_socket_adjust_map(struct socket_bpf *obj, ebpf_module_t *em) } /** + * Disable TCP V6 connect + */ +static void ebpf_disable_tcp_v6_connect(struct socket_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_kretprobe, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_kprobe, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_fexit, false); + bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_fentry, false); +} + +/** * Load and attach * * Load and attach the eBPF code in kernel. @@ -438,6 +455,10 @@ static inline int ebpf_socket_load_and_attach(struct socket_bpf *obj, ebpf_modul ebpf_socket_adjust_map(obj, em); + if (tcp_v6_connect_address.type != 'T') { + ebpf_disable_tcp_v6_connect(obj); + } + int ret = socket_bpf__load(obj); if (ret) { fprintf(stderr, "failed to load BPF object: %d\n", ret); @@ -494,6 +515,7 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every) int order = 20080; ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V4, + "", "Calls to tcp_v4_connection", EBPF_COMMON_DIMENSION_CONNECTIONS, NETDATA_APPS_NET_GROUP, @@ -502,18 +524,22 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every) order++, update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - NETDATA_NET_APPS_CONNECTION_TCP_V6, - "Calls to tcp_v6_connection", - EBPF_COMMON_DIMENSION_CONNECTIONS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT, - order++, - update_every); + if (tcp_v6_connect_address.type == 'T') { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_NET_APPS_CONNECTION_TCP_V6, + "", + "Calls to tcp_v6_connection", + EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT, + order++, + update_every); + } ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_RECV, + "", "Bytes received", EBPF_COMMON_DIMENSION_BITS, NETDATA_APPS_NET_GROUP, @@ -524,6 +550,7 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_SENT, + "", "Bytes sent", EBPF_COMMON_DIMENSION_BITS, NETDATA_APPS_NET_GROUP, @@ -534,6 +561,7 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, + "", "Calls to tcp_cleanup_rbuf.", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, @@ -544,6 +572,7 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, + "", "Calls to tcp_sendmsg.", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, @@ -554,6 +583,7 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, + "", "Calls to tcp_retransmit", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, @@ -564,6 +594,7 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, + "", "Calls to udp_sendmsg", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, @@ -574,6 +605,7 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, + "", "Calls to udp_recvmsg", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, @@ -615,96 +647,116 @@ static inline void ebpf_obsolete_socket_cgroup_charts(ebpf_module_t *em) { */ void ebpf_socket_obsolete_apps_charts(struct ebpf_module *em) { - int order = 20080; - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_NET_APPS_CONNECTION_TCP_V4, - "Calls to tcp_v4_connection", - EBPF_COMMON_DIMENSION_CONNECTIONS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - order++, - em->update_every); + int order = 20130; + struct ebpf_target *w; + int update_every = em->update_every; + for (w = apps_groups_root_target; w; w = w->next) { + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_SOCKET_IDX)))) + continue; - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_NET_APPS_CONNECTION_TCP_V6, - "Calls to tcp_v6_connection", - EBPF_COMMON_DIMENSION_CONNECTIONS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - order++, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_tcp_v4_connection", + "Calls to tcp_v4_connection.", + EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_tcp_v4_connection", + order++, + update_every); - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_NET_APPS_BANDWIDTH_SENT, - "Bytes sent", - EBPF_COMMON_DIMENSION_BITS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - order++, - em->update_every); + if (tcp_v6_connect_address.type == 'T') { + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_tcp_v6_connection", + "Calls to tcp_v6_connection.", + EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_tcp_v6_connection", + order++, + update_every); + } - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_NET_APPS_BANDWIDTH_RECV, - "bytes received", - EBPF_COMMON_DIMENSION_BITS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - order++, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_sock_bytes_sent", + "Bytes sent.", + EBPF_COMMON_DIMENSION_BITS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_sock_bytes_sent", + order++, + update_every); - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, - "Calls for tcp_sendmsg", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - order++, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_sock_bytes_received", + "Bytes received.", + EBPF_COMMON_DIMENSION_BITS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_sock_bytes_received", + order++, + update_every); - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, - "Calls for tcp_cleanup_rbuf", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - order++, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_tcp_sendmsg", + "Calls to tcp_sendmsg.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_tcp_sendmsg", + order++, + update_every); - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, - "Calls for tcp_retransmit", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - order++, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_tcp_cleanup_rbuf", + "Calls to tcp_cleanup_rbuf.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_tcp_cleanup_rbuf", + order++, + update_every); - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, - "Calls for udp_sendmsg", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - order++, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_tcp_retransmit", + "Calls to tcp_retransmit.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_tcp_retransmit", + order++, + update_every); - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, - "Calls for udp_recvmsg", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - order++, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_udp_sendmsg", + "Calls to udp_sendmsg.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_udp_sendmsg", + order++, + update_every); + + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_udp_recvmsg", + "Calls to udp_recvmsg.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_udp_recvmsg", + order++, + update_every); + + w->charts_created &= ~(1<<EBPF_MODULE_SOCKET_IDX); + } } /** @@ -719,6 +771,7 @@ static void ebpf_socket_obsolete_global_charts(ebpf_module_t *em) int order = 21070; ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY, NETDATA_INBOUND_CONNECTIONS, + "", "Inbound connections.", EBPF_COMMON_DIMENSION_CONNECTIONS, NETDATA_SOCKET_KERNEL_FUNCTIONS, @@ -729,6 +782,7 @@ static void ebpf_socket_obsolete_global_charts(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY, NETDATA_TCP_OUTBOUND_CONNECTIONS, + "", "TCP outbound connections.", EBPF_COMMON_DIMENSION_CONNECTIONS, NETDATA_SOCKET_KERNEL_FUNCTIONS, @@ -740,6 +794,7 @@ static void ebpf_socket_obsolete_global_charts(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY, NETDATA_TCP_FUNCTION_COUNT, + "", "Calls to internal functions", EBPF_COMMON_DIMENSION_CALL, NETDATA_SOCKET_KERNEL_FUNCTIONS, @@ -750,6 +805,7 @@ static void ebpf_socket_obsolete_global_charts(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY, NETDATA_TCP_FUNCTION_BITS, + "", "TCP bandwidth", EBPF_COMMON_DIMENSION_BITS, NETDATA_SOCKET_KERNEL_FUNCTIONS, @@ -761,6 +817,7 @@ static void ebpf_socket_obsolete_global_charts(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY, NETDATA_TCP_FUNCTION_ERROR, + "", "TCP errors", EBPF_COMMON_DIMENSION_CALL, NETDATA_SOCKET_KERNEL_FUNCTIONS, @@ -772,6 +829,7 @@ static void ebpf_socket_obsolete_global_charts(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY, NETDATA_TCP_RETRANSMIT, + "", "Packages retransmitted", EBPF_COMMON_DIMENSION_CALL, NETDATA_SOCKET_KERNEL_FUNCTIONS, @@ -782,6 +840,7 @@ static void ebpf_socket_obsolete_global_charts(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY, NETDATA_UDP_FUNCTION_COUNT, + "", "UDP calls", EBPF_COMMON_DIMENSION_CALL, NETDATA_SOCKET_KERNEL_FUNCTIONS, @@ -792,6 +851,7 @@ static void ebpf_socket_obsolete_global_charts(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY, NETDATA_UDP_FUNCTION_BITS, + "", "UDP bandwidth", EBPF_COMMON_DIMENSION_BITS, NETDATA_SOCKET_KERNEL_FUNCTIONS, @@ -803,6 +863,7 @@ static void ebpf_socket_obsolete_global_charts(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY, NETDATA_UDP_FUNCTION_ERROR, + "", "UDP errors", EBPF_COMMON_DIMENSION_CALL, NETDATA_SOCKET_KERNEL_FUNCTIONS, @@ -924,10 +985,10 @@ static void ebpf_socket_send_global_inbound_conn() move = move->next; } - write_begin_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_INBOUND_CONNECTIONS); + ebpf_write_begin_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_INBOUND_CONNECTIONS, ""); write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_INCOMING_CONNECTION_TCP].name, (long long) tcp_conn); write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_INCOMING_CONNECTION_UDP].name, (long long) udp_conn); - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -969,31 +1030,6 @@ static void ebpf_socket_send_data(ebpf_module_t *em) } /** - * Sum values for pid - * - * @param root the structure with all available PIDs - * - * @param offset the address that we are reading - * - * @return it returns the sum of all PIDs - */ -long long ebpf_socket_sum_values_for_pids(struct ebpf_pid_on_target *root, size_t offset) -{ - long long ret = 0; - while (root) { - int32_t pid = root->pid; - ebpf_socket_publish_apps_t *w = socket_bandwidth_curr[pid]; - if (w) { - ret += get_value_from_structure((char *)w, offset); - } - - root = root->next; - } - - return ret; -} - -/** * Send data to Netdata calling auxiliary functions. * * @param em the structure with thread information @@ -1004,100 +1040,74 @@ void ebpf_socket_send_apps_data(ebpf_module_t *em, struct ebpf_target *root) UNUSED(em); struct ebpf_target *w; - collected_number value; + // This algorithm is improved in https://github.com/netdata/netdata/pull/16030 + collected_number values[9]; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V4); for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, - call_tcp_v4_connection)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_SOCKET_IDX)))) + continue; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V6); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, - call_tcp_v6_connection)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); + struct ebpf_pid_on_target *move = w->root_pid; + // Simplify algorithm, but others will appear only in https://github.com/netdata/netdata/pull/16030 + memset(values, 0, sizeof(values)); + while (move) { + int32_t pid = move->pid; + ebpf_socket_publish_apps_t *ws = socket_bandwidth_curr[pid]; + if (ws) { + values[0] += (collected_number) ws->call_tcp_v4_connection; + values[1] += (collected_number) ws->call_tcp_v6_connection; + values[2] += (collected_number) ws->bytes_sent; + values[3] += (collected_number) ws->bytes_received; + values[4] += (collected_number) ws->call_tcp_sent; + values[5] += (collected_number) ws->call_tcp_received; + values[6] += (collected_number) ws->retransmit; + values[7] += (collected_number) ws->call_udp_sent; + values[8] += (collected_number) ws->call_udp_received; + } - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_SENT); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, - bytes_sent)); - // We multiply by 0.008, because we read bytes, but we display bits - write_chart_dimension(w->name, ((value)*8)/1000); + move = move->next; } - } - write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_RECV); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, - bytes_received)); - // We multiply by 0.008, because we read bytes, but we display bits - write_chart_dimension(w->name, ((value)*8)/1000); - } - } - write_end_chart(); + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_tcp_v4_connection"); + write_chart_dimension("connections", values[0]); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, - call_tcp_sent)); - write_chart_dimension(w->name, value); + if (tcp_v6_connect_address.type == 'T') { + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_call_tcp_v6_connection"); + write_chart_dimension("calls", values[1]); + ebpf_write_end_chart(); } - } - write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, - call_tcp_received)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_sock_bytes_sent"); + // We multiply by 0.008, because we read bytes, but we display bits + write_chart_dimension("bandwidth", ((values[2])*8)/1000); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, - retransmit)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_sock_bytes_received"); + // We multiply by 0.008, because we read bytes, but we display bits + write_chart_dimension("bandwidth", ((values[3])*8)/1000); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, - call_udp_sent)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_tcp_sendmsg"); + write_chart_dimension("calls", values[4]); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t, - call_udp_received)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_tcp_cleanup_rbuf"); + write_chart_dimension("calls", values[5]); + ebpf_write_end_chart(); + + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_tcp_retransmit"); + write_chart_dimension("calls", values[6]); + ebpf_write_end_chart(); + + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_udp_sendmsg"); + write_chart_dimension("calls", values[7]); + ebpf_write_end_chart(); + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_udp_recvmsg"); + write_chart_dimension("calls", values[8]); + ebpf_write_end_chart(); + } } /***************************************************************** @@ -1239,84 +1249,152 @@ static void ebpf_socket_create_global_charts(ebpf_module_t *em) void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) { struct ebpf_target *root = ptr; - int order = 20080; - ebpf_create_charts_on_apps(NETDATA_NET_APPS_CONNECTION_TCP_V4, - "Calls to tcp_v4_connection", EBPF_COMMON_DIMENSION_CONNECTIONS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - order++, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - - ebpf_create_charts_on_apps(NETDATA_NET_APPS_CONNECTION_TCP_V6, - "Calls to tcp_v6_connection", EBPF_COMMON_DIMENSION_CONNECTIONS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - order++, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - - ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_SENT, - "Bytes sent", EBPF_COMMON_DIMENSION_BITS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - order++, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - - ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_RECV, - "bytes received", EBPF_COMMON_DIMENSION_BITS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - order++, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - - ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, - "Calls for tcp_sendmsg", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - order++, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - - ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, - "Calls for tcp_cleanup_rbuf", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - order++, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - - ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, - "Calls for tcp_retransmit", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - order++, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - - ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, - "Calls for udp_sendmsg", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - order++, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - - ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, - "Calls for udp_recvmsg", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - order++, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + struct ebpf_target *w; + int order = 20130; + int update_every = em->update_every; + for (w = root; w; w = w->next) { + if (unlikely(!w->exposed)) + continue; + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_tcp_v4_connection", + "Calls to tcp_v4_connection.", + EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_tcp_v4_connection", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION connections '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + if (tcp_v6_connect_address.type == 'T') { + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_tcp_v6_connection", + "Calls to tcp_v6_connection.", + EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_tcp_v6_connection", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION connections '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + } + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_sock_bytes_sent", + "Bytes sent.", + EBPF_COMMON_DIMENSION_BITS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_sock_bytes_sent", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION bandwidth '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_sock_bytes_received", + "Bytes received.", + EBPF_COMMON_DIMENSION_BITS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_sock_bytes_received", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION bandwidth '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_tcp_sendmsg", + "Calls to tcp_sendmsg.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_tcp_sendmsg", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_tcp_cleanup_rbuf", + "Calls to tcp_cleanup_rbuf.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_tcp_cleanup_rbuf", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_tcp_retransmit", + "Calls to tcp_retransmit.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_tcp_retransmit", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_udp_sendmsg", + "Calls to udp_sendmsg.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_udp_sendmsg", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_udp_recvmsg", + "Calls to udp_recvmsg.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_udp_recvmsg", + order, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + w->charts_created |= 1<<EBPF_MODULE_SOCKET_IDX; + } em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } @@ -2032,15 +2110,21 @@ static void ebpf_create_specific_socket_charts(char *type, int update_every) &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4], 1, update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - ebpf_create_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V6, - "Calls to tcp_v6_connection", - EBPF_COMMON_DIMENSION_CONNECTIONS, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_TCP_V6_CONN_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6], 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + if (tcp_v6_connect_address.type == 'T') { + ebpf_create_chart(type, + NETDATA_NET_APPS_CONNECTION_TCP_V6, + "Calls to tcp_v6_connection", + EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_TCP_V6_CONN_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6], + 1, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); + } ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV, "Bytes received", @@ -2124,47 +2208,55 @@ static void ebpf_create_specific_socket_charts(char *type, int update_every) static void ebpf_obsolete_specific_socket_charts(char *type, int update_every) { int order_basis = 5300; - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, "Calls to tcp_v4_connection", + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, "", "Calls to tcp_v4_connection", EBPF_COMMON_DIMENSION_CONNECTIONS, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_CONNECTION_TCP_V6,"Calls to tcp_v6_connection", - EBPF_COMMON_DIMENSION_CONNECTIONS, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); + if (tcp_v6_connect_address.type == 'T') { + ebpf_write_chart_obsolete(type, + NETDATA_NET_APPS_CONNECTION_TCP_V6, + "", + "Calls to tcp_v6_connection", + EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every); + } - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_RECV, "Bytes received", + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_RECV, "", "Bytes received", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_SENT,"Bytes sent", + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_SENT, "","Bytes sent", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, "Calls to tcp_cleanup_rbuf.", + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, "", "Calls to tcp_cleanup_rbuf.", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, "Calls to tcp_sendmsg.", + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, "", "Calls to tcp_sendmsg.", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, "Calls to tcp_retransmit.", + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, "", "Calls to tcp_retransmit.", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, "Calls to udp_sendmsg", + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, "", "Calls to udp_sendmsg", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, "Calls to udp_recvmsg", + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, "", "Calls to udp_recvmsg", EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); @@ -2180,50 +2272,52 @@ static void ebpf_obsolete_specific_socket_charts(char *type, int update_every) */ static void ebpf_send_specific_socket_data(char *type, ebpf_socket_publish_apps_t *values) { - write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V4); + ebpf_write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, ""); write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4].name, (long long) values->call_tcp_v4_connection); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V6); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6].name, - (long long) values->call_tcp_v6_connection); - write_end_chart(); + if (tcp_v6_connect_address.type == 'T') { + ebpf_write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V6, ""); + write_chart_dimension( + socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6].name, (long long)values->call_tcp_v6_connection); + ebpf_write_end_chart(); + } - write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_SENT); + ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_SENT, ""); write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_SENDMSG].name, (long long) values->bytes_sent); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV); + ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV, ""); write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF].name, (long long) values->bytes_received); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS); + ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, ""); write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_SENDMSG].name, (long long) values->call_tcp_sent); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS); + ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, ""); write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF].name, (long long) values->call_tcp_received); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT); + ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, ""); write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT].name, (long long) values->retransmit); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS); + ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, ""); write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_UDP_SENDMSG].name, (long long) values->call_udp_sent); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS); + ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, ""); write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF].name, (long long) values->call_udp_received); - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -2245,14 +2339,18 @@ static void ebpf_create_systemd_socket_charts(int update_every) NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, update_every); - ebpf_create_charts_on_systemd(NETDATA_NET_APPS_CONNECTION_TCP_V6, - "Calls to tcp_v6_connection", EBPF_COMMON_DIMENSION_CONNECTIONS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - order++, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, - update_every); + if (tcp_v6_connect_address.type == 'T') { + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_CONNECTION_TCP_V6, + "Calls to tcp_v6_connection", + EBPF_COMMON_DIMENSION_CONNECTIONS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + order++, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT, + NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + } ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_RECV, "Bytes received", EBPF_COMMON_DIMENSION_BITS, @@ -2331,77 +2429,79 @@ static void ebpf_create_systemd_socket_charts(int update_every) static void ebpf_send_systemd_socket_charts() { ebpf_cgroup_target_t *ect; - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V4); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V4, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_v4_connection); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V6); - for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { - if (unlikely(ect->systemd) && unlikely(ect->updated)) { - write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_v6_connection); + if (tcp_v6_connect_address.type == 'T') { + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V6, ""); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_v6_connection); + } } + ebpf_write_end_chart(); } - write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_SENT); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_SENT, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_socket.bytes_sent); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_RECV); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_RECV, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_socket.bytes_received); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_sent); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_received); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_socket.retransmit); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_socket.call_udp_sent); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_socket.call_udp_received); } } - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -2562,6 +2662,8 @@ static void ebpf_socket_initialize_global_vectors() sizeof(netdata_socket_plus_t)); socket_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_socket_t)); + + ebpf_load_addresses(&tcp_v6_connect_address, -1); } /***************************************************************** diff --git a/collectors/ebpf.plugin/ebpf_socket.h b/collectors/ebpf.plugin/ebpf_socket.h index fb2404c24..a6d3e03b6 100644 --- a/collectors/ebpf.plugin/ebpf_socket.h +++ b/collectors/ebpf.plugin/ebpf_socket.h @@ -318,10 +318,6 @@ typedef struct netata_socket_plus { } socket_string; } netdata_socket_plus_t; -enum netdata_udp_ports { - NETDATA_EBPF_UDP_PORT = 53 -}; - extern ARAL *aral_socket_table; /** diff --git a/collectors/ebpf.plugin/ebpf_softirq.c b/collectors/ebpf.plugin/ebpf_softirq.c index 711ff43a6..106ff4f29 100644 --- a/collectors/ebpf.plugin/ebpf_softirq.c +++ b/collectors/ebpf.plugin/ebpf_softirq.c @@ -71,6 +71,7 @@ static void ebpf_obsolete_softirq_global(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, "softirq_latency", + "", "Software IRQ latency", EBPF_COMMON_DIMENSION_MILLISECONDS, "softirqs", @@ -228,9 +229,9 @@ static void softirq_collector(ebpf_module_t *em) pthread_mutex_lock(&lock); // write dims now for all hitherto discovered IRQs. - write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "softirq_latency"); + ebpf_write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "softirq_latency", ""); softirq_write_dims(); - write_end_chart(); + ebpf_write_end_chart(); pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_swap.c b/collectors/ebpf.plugin/ebpf_swap.c index d0c8cee3d..fb007f928 100644 --- a/collectors/ebpf.plugin/ebpf_swap.c +++ b/collectors/ebpf.plugin/ebpf_swap.c @@ -234,7 +234,8 @@ static void ebpf_obsolete_swap_services(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_READ_CHART, - "Calls to function <code>swap_readpage</code>.", + "", + "Calls to function swap_readpage.", EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, @@ -244,7 +245,8 @@ static void ebpf_obsolete_swap_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_WRITE_CHART, - "Calls to function <code>swap_writepage</code>.", + "", + "Calls to function swap_writepage.", EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, @@ -284,25 +286,35 @@ static inline void ebpf_obsolete_swap_cgroup_charts(ebpf_module_t *em) { */ void ebpf_obsolete_swap_apps_charts(struct ebpf_module *em) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_MEM_SWAP_READ_CHART, - "Calls to function <code>swap_readpage</code>.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_SWAP_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20191, - em->update_every); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = apps_groups_root_target; w; w = w->next) { + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_SWAP_IDX)))) + continue; - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_MEM_SWAP_WRITE_CHART, - "Calls to function <code>swap_writepage</code>.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_SWAP_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20192, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_swap_readpage", + "Calls to function swap_readpage.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_swap_readpage", + 20070, + update_every); + + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_swap_writepage", + "Calls to function swap_writepage.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_swap_writepage", + 20071, + update_every); + w->charts_created &= ~(1<<EBPF_MODULE_SWAP_IDX); + } } /** @@ -316,6 +328,7 @@ static void ebpf_obsolete_swap_global(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, NETDATA_MEM_SWAP_CHART, + "", "Calls to access swap memory", EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, @@ -568,26 +581,19 @@ void ebpf_swap_send_apps_data(struct ebpf_target *root) { struct ebpf_target *w; for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - ebpf_swap_sum_pids(&w->swap, w->root_pid); - } - } + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_SWAP_IDX)))) + continue; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_MEM_SWAP_READ_CHART); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, (long long) w->swap.read); - } - } - write_end_chart(); + ebpf_swap_sum_pids(&w->swap, w->root_pid); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_MEM_SWAP_WRITE_CHART); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, (long long) w->swap.write); - } + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_swap_readpage"); + write_chart_dimension("calls", (long long) w->swap.read); + ebpf_write_end_chart(); + + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_swap_writepage"); + write_chart_dimension("calls", (long long) w->swap.write); + ebpf_write_end_chart(); } - write_end_chart(); } /** @@ -624,21 +630,21 @@ static void ebpf_swap_sum_cgroup_pids(netdata_publish_swap_t *swap, struct pid_o static void ebpf_send_systemd_swap_charts() { ebpf_cgroup_target_t *ect; - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_READ_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_READ_CHART, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long) ect->publish_systemd_swap.read); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_WRITE_CHART); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_WRITE_CHART, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long) ect->publish_systemd_swap.write); } } - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -652,7 +658,7 @@ static void ebpf_send_systemd_swap_charts() static void ebpf_create_specific_swap_charts(char *type, int update_every) { ebpf_create_chart(type, NETDATA_MEM_SWAP_READ_CHART, - "Calls to function <code>swap_readpage</code>.", + "Calls to function swap_readpage.", EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, NETDATA_CGROUP_SWAP_READ_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5100, @@ -660,7 +666,7 @@ static void ebpf_create_specific_swap_charts(char *type, int update_every) swap_publish_aggregated, 1, update_every, NETDATA_EBPF_MODULE_NAME_SWAP); ebpf_create_chart(type, NETDATA_MEM_SWAP_WRITE_CHART, - "Calls to function <code>swap_writepage</code>.", + "Calls to function swap_writepage.", EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, NETDATA_CGROUP_SWAP_WRITE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5101, @@ -679,12 +685,12 @@ static void ebpf_create_specific_swap_charts(char *type, int update_every) */ static void ebpf_obsolete_specific_swap_charts(char *type, int update_every) { - ebpf_write_chart_obsolete(type, NETDATA_MEM_SWAP_READ_CHART,"Calls to function <code>swap_readpage</code>.", + ebpf_write_chart_obsolete(type, NETDATA_MEM_SWAP_READ_CHART, "", "Calls to function swap_readpage.", EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SWAP_READ_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5100, update_every); - ebpf_write_chart_obsolete(type, NETDATA_MEM_SWAP_WRITE_CHART, "Calls to function <code>swap_writepage</code>.", + ebpf_write_chart_obsolete(type, NETDATA_MEM_SWAP_WRITE_CHART, "", "Calls to function swap_writepage.", EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SWAP_WRITE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5101, update_every); @@ -700,13 +706,13 @@ static void ebpf_obsolete_specific_swap_charts(char *type, int update_every) */ static void ebpf_send_specific_swap_data(char *type, netdata_publish_swap_t *values) { - write_begin_chart(type, NETDATA_MEM_SWAP_READ_CHART); + ebpf_write_begin_chart(type, NETDATA_MEM_SWAP_READ_CHART, ""); write_chart_dimension(swap_publish_aggregated[NETDATA_KEY_SWAP_READPAGE_CALL].name, (long long) values->read); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_MEM_SWAP_WRITE_CHART); + ebpf_write_begin_chart(type, NETDATA_MEM_SWAP_WRITE_CHART, ""); write_chart_dimension(swap_publish_aggregated[NETDATA_KEY_SWAP_WRITEPAGE_CALL].name, (long long) values->write); - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -719,14 +725,14 @@ static void ebpf_send_specific_swap_data(char *type, netdata_publish_swap_t *val static void ebpf_create_systemd_swap_charts(int update_every) { ebpf_create_charts_on_systemd(NETDATA_MEM_SWAP_READ_CHART, - "Calls to <code>swap_readpage</code>.", + "Calls to swap_readpage.", EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, NETDATA_EBPF_CHART_TYPE_STACKED, 20191, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_SWAP_READ_CONTEXT, NETDATA_EBPF_MODULE_NAME_SWAP, update_every); ebpf_create_charts_on_systemd(NETDATA_MEM_SWAP_WRITE_CHART, - "Calls to function <code>swap_writepage</code>.", + "Calls to function swap_writepage.", EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, NETDATA_EBPF_CHART_TYPE_STACKED, 20192, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_SWAP_WRITE_CONTEXT, @@ -851,23 +857,44 @@ static void swap_collector(ebpf_module_t *em) void ebpf_swap_create_apps_charts(struct ebpf_module *em, void *ptr) { struct ebpf_target *root = ptr; - ebpf_create_charts_on_apps(NETDATA_MEM_SWAP_READ_CHART, - "Calls to function <code>swap_readpage</code>.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_SWAP_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20191, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); - - ebpf_create_charts_on_apps(NETDATA_MEM_SWAP_WRITE_CHART, - "Calls to function <code>swap_writepage</code>.", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_SWAP_SUBMENU, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20192, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + struct ebpf_target *w; + int update_every = em->update_every; + for (w = root; w; w = w->next) { + if (unlikely(!w->exposed)) + continue; + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_swap_readpage", + "Calls to function swap_readpage.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_swap_readpage", + 20070, + update_every, + NETDATA_EBPF_MODULE_NAME_SWAP); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_swap_writepage", + "Calls to function swap_writepage.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_swap_writepage", + 20071, + update_every, + NETDATA_EBPF_MODULE_NAME_SWAP); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + w->charts_created |= 1<<EBPF_MODULE_SWAP_IDX; + } em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } diff --git a/collectors/ebpf.plugin/ebpf_sync.c b/collectors/ebpf.plugin/ebpf_sync.c index 95dda19fd..a16318107 100644 --- a/collectors/ebpf.plugin/ebpf_sync.c +++ b/collectors/ebpf.plugin/ebpf_sync.c @@ -298,7 +298,8 @@ static void ebpf_obsolete_sync_global(ebpf_module_t *em) if (local_syscalls[NETDATA_SYNC_FSYNC_IDX].enabled && local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].enabled) ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_FILE_SYNC_CHART, - "Monitor calls for <code>fsync(2)</code> and <code>fdatasync(2)</code>.", + "", + "Monitor calls to fsync(2) and fdatasync(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_EBPF_SYNC_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, @@ -309,7 +310,8 @@ static void ebpf_obsolete_sync_global(ebpf_module_t *em) if (local_syscalls[NETDATA_SYNC_MSYNC_IDX].enabled) ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_MSYNC_CHART, - "Monitor calls for <code>msync(2)</code>.", + "", + "Monitor calls to msync(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_EBPF_SYNC_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, @@ -320,7 +322,8 @@ static void ebpf_obsolete_sync_global(ebpf_module_t *em) if (local_syscalls[NETDATA_SYNC_SYNC_IDX].enabled && local_syscalls[NETDATA_SYNC_SYNCFS_IDX].enabled) ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_SYNC_CHART, - "Monitor calls for <code>sync(2)</code> and <code>syncfs(2)</code>.", + "", + "Monitor calls to sync(2) and syncfs(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_EBPF_SYNC_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, @@ -331,7 +334,8 @@ static void ebpf_obsolete_sync_global(ebpf_module_t *em) if (local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].enabled) ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_FILE_SEGMENT_CHART, - "Monitor calls for <code>sync_file_range(2)</code>.", + "", + "Monitor calls to sync_file_range(2).", EBPF_COMMON_DIMENSION_CALL, NETDATA_EBPF_SYNC_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, @@ -507,7 +511,7 @@ static void ebpf_send_sync_chart(char *id, int idx, int end) { - write_begin_chart(NETDATA_EBPF_MEMORY_GROUP, id); + ebpf_write_begin_chart(NETDATA_EBPF_MEMORY_GROUP, id, ""); netdata_publish_syscall_t *move = &sync_counter_publish_aggregated[idx]; @@ -519,7 +523,7 @@ static void ebpf_send_sync_chart(char *id, idx++; } - write_end_chart(); + ebpf_write_end_chart(); } /** @@ -610,7 +614,7 @@ static void ebpf_create_sync_chart(char *id, int end, int update_every) { - ebpf_write_chart_cmd(NETDATA_EBPF_MEMORY_GROUP, id, title, EBPF_COMMON_DIMENSION_CALL, + ebpf_write_chart_cmd(NETDATA_EBPF_MEMORY_GROUP, id, "", title, EBPF_COMMON_DIMENSION_CALL, NETDATA_EBPF_SYNC_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NULL, order, update_every, NETDATA_EBPF_MODULE_NAME_SYNC); @@ -637,22 +641,22 @@ static void ebpf_create_sync_charts(int update_every) { if (local_syscalls[NETDATA_SYNC_FSYNC_IDX].enabled && local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].enabled) ebpf_create_sync_chart(NETDATA_EBPF_FILE_SYNC_CHART, - "Monitor calls for <code>fsync(2)</code> and <code>fdatasync(2)</code>.", 21300, + "Monitor calls to fsync(2) and fdatasync(2).", 21300, NETDATA_SYNC_FSYNC_IDX, NETDATA_SYNC_FDATASYNC_IDX, update_every); if (local_syscalls[NETDATA_SYNC_MSYNC_IDX].enabled) ebpf_create_sync_chart(NETDATA_EBPF_MSYNC_CHART, - "Monitor calls for <code>msync(2)</code>.", 21301, + "Monitor calls to msync(2).", 21301, NETDATA_SYNC_MSYNC_IDX, NETDATA_SYNC_MSYNC_IDX, update_every); if (local_syscalls[NETDATA_SYNC_SYNC_IDX].enabled && local_syscalls[NETDATA_SYNC_SYNCFS_IDX].enabled) ebpf_create_sync_chart(NETDATA_EBPF_SYNC_CHART, - "Monitor calls for <code>sync(2)</code> and <code>syncfs(2)</code>.", 21302, + "Monitor calls to sync(2) and syncfs(2).", 21302, NETDATA_SYNC_SYNC_IDX, NETDATA_SYNC_SYNCFS_IDX, update_every); if (local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].enabled) ebpf_create_sync_chart(NETDATA_EBPF_FILE_SEGMENT_CHART, - "Monitor calls for <code>sync_file_range(2)</code>.", 21303, + "Monitor calls to sync_file_range(2).", 21303, NETDATA_SYNC_SYNC_FILE_RANGE_IDX, NETDATA_SYNC_SYNC_FILE_RANGE_IDX, update_every); fflush(stdout); diff --git a/collectors/ebpf.plugin/ebpf_vfs.c b/collectors/ebpf.plugin/ebpf_vfs.c index 1e06e2a75..354901c9c 100644 --- a/collectors/ebpf.plugin/ebpf_vfs.c +++ b/collectors/ebpf.plugin/ebpf_vfs.c @@ -420,6 +420,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_DELETED, + "", "Files deleted", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, @@ -430,6 +431,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, + "", "Write to disk", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, @@ -441,6 +443,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, + "", "Fails to write", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, @@ -452,6 +455,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, + "", "Read from disk", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, @@ -463,6 +467,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, + "", "Fails to read", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, @@ -474,6 +479,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, + "", "Bytes written on disk", EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_CGROUP_GROUP, @@ -484,6 +490,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, + "", "Bytes read from disk", EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_CGROUP_GROUP, @@ -494,7 +501,8 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC, - "Calls to <code>vfs_fsync</code>", + "", + "Calls to vfs_fsync.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, @@ -505,6 +513,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, + "", "Sync error", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, @@ -515,7 +524,8 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em) } ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN, - "Calls to <code>vfs_open</code>", + "", + "Calls to vfs_open.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, @@ -526,6 +536,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, + "", "Open error", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, @@ -537,7 +548,8 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE, - "Calls to <code>vfs_create</code>", + "", + "Calls to vfs_create.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, @@ -548,6 +560,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, + "", "Create error", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, @@ -589,144 +602,166 @@ static inline void ebpf_obsolete_vfs_cgroup_charts(ebpf_module_t *em) { */ void ebpf_obsolete_vfs_apps_charts(struct ebpf_module *em) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_FILE_DELETED, - "Files deleted", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20065, - em->update_every); - - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, - "Write to disk", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20066, - em->update_every); + int order = 20275; + struct ebpf_target *w; + int update_every = em->update_every; + for (w = apps_groups_root_target; w; w = w->next) { + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_VFS_IDX)))) + continue; - if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, - "Fails to write", + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_unlink", + "Files deleted.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20067, - em->update_every); - } - - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_VFS_READ_CALLS, - "Read from disk", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20068, - em->update_every); - - if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, - "Fails to read", + "app.ebpf_call_vfs_unlink", + order++, + update_every); + + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_write", + "Write to disk.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20069, - em->update_every); - } - - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, - "Bytes written on disk", - EBPF_COMMON_DIMENSION_BYTES, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20070, - em->update_every); - - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_VFS_READ_BYTES, - "Bytes read from disk", - EBPF_COMMON_DIMENSION_BYTES, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20071, - em->update_every); - - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_VFS_FSYNC, - "Calls for <code>vfs_fsync</code>", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20072, - em->update_every); + "app.ebpf_call_vfs_write", + order++, + update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_write_error", + "Fails to write.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_write_error", + order++, + update_every); + } - if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, - "Sync error", + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_read", + "Read from disk.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20073, - em->update_every); - } - - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_VFS_OPEN, - "Calls for <code>vfs_open</code>", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20074, - em->update_every); + "app.ebpf_call_vfs_read", + order++, + update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_read_error", + "Fails to read.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_read_error", + order++, + update_every); + } - if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, - "Open error", + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_write_bytes", + "Bytes written on disk.", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_write_bytes", + order++, + update_every); + + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_read_bytes", + "Bytes read from disk.", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_read_bytes", + order++, + update_every); + + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_fsync", + "Calls to vfs_fsync.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20075, - em->update_every); - } + "app.ebpf_call_vfs_fsync", + order++, + update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_fsync_error", + "Fails to sync.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_fsync_error", + order++, + update_every); + } - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_VFS_CREATE, - "Calls for <code>vfs_create</code>", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20076, - em->update_every); + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_open", + "Calls to vfs_open.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_open", + order++, + update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_open_error", + "Fails to open.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_open_error", + order++, + update_every); + } - if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, - NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, - "Create error", + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_create", + "Calls to vfs_create.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - NULL, - 20077, - em->update_every); + "app.ebpf_call_vfs_create", + order++, + update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_create_error", + "Fails to create.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_create_error", + order++, + update_every); + } + w->charts_created &= ~(1<<EBPF_MODULE_VFS_IDX); } } @@ -741,6 +776,7 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em) { ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_VFS_FILE_CLEAN_COUNT, + "", "Remove files", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, @@ -751,6 +787,7 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_VFS_FILE_IO_COUNT, + "", "Calls to IO", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, @@ -761,6 +798,7 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_VFS_IO_FILE_BYTES, + "", "Bytes written and read", EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_GROUP, @@ -772,6 +810,7 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_VFS_FILE_ERR_COUNT, + "", "Fails to write or read", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, @@ -783,7 +822,8 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_VFS_FSYNC, - "Calls for <code>vfs_fsync</code>", + "", + "Calls to vfs_fsync.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, @@ -794,6 +834,7 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_VFS_FSYNC_ERR, + "", "Fails to synchronize", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, @@ -805,7 +846,8 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_VFS_OPEN, - "Calls for <code>vfs_open</code>", + "", + "Calls to vfs_open.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, @@ -816,6 +858,7 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_VFS_OPEN_ERR, + "", "Fails to open a file", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, @@ -827,7 +870,8 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em) ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_VFS_CREATE, - "Calls for <code>vfs_create</code>", + "", + "Calls to vfs_create.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, @@ -838,6 +882,7 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em) if (em->mode < MODE_ENTRY) { ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, NETDATA_VFS_CREATE_ERR, + "", "Fails to create a file.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, @@ -1086,123 +1131,72 @@ void ebpf_vfs_send_apps_data(ebpf_module_t *em, struct ebpf_target *root) { struct ebpf_target *w; for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - ebpf_vfs_sum_pids(&w->vfs, w->root_pid); - } - } + if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_VFS_IDX)))) + continue; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_DELETED); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->vfs.unlink_call); - } - } - write_end_chart(); + ebpf_vfs_sum_pids(&w->vfs, w->root_pid); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->vfs.write_call + w->vfs.writev_call); - } - } - write_end_chart(); + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_unlink"); + write_chart_dimension("calls", w->vfs.unlink_call); + ebpf_write_end_chart(); - if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->vfs.write_err + w->vfs.writev_err); - } - } - write_end_chart(); - } + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_write"); + write_chart_dimension("calls", w->vfs.write_call + w->vfs.writev_call); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->vfs.read_call + w->vfs.readv_call); + if (em->mode < MODE_ENTRY) { + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_write_error"); + write_chart_dimension("calls", w->vfs.write_err + w->vfs.writev_err); + ebpf_write_end_chart(); } - } - write_end_chart(); - if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->vfs.read_err + w->vfs.readv_err); - } - } - write_end_chart(); - } + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_read"); + write_chart_dimension("calls", w->vfs.read_call + w->vfs.readv_call); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->vfs.write_bytes + w->vfs.writev_bytes); + if (em->mode < MODE_ENTRY) { + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_read_error"); + write_chart_dimension("calls", w->vfs.read_err + w->vfs.readv_err); + ebpf_write_end_chart(); } - } - write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_BYTES); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->vfs.read_bytes + w->vfs.readv_bytes); - } - } - write_end_chart(); + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_write_bytes"); + write_chart_dimension("writes", w->vfs.write_bytes + w->vfs.writev_bytes); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->vfs.fsync_call); - } - } - write_end_chart(); + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_read_bytes"); + write_chart_dimension("reads", w->vfs.read_bytes + w->vfs.readv_bytes); + ebpf_write_end_chart(); - if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->vfs.fsync_err); - } - } - write_end_chart(); - } + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_fsync"); + write_chart_dimension("calls", w->vfs.fsync_call); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->vfs.open_call); + if (em->mode < MODE_ENTRY) { + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_fsync_error"); + write_chart_dimension("calls", w->vfs.fsync_err); + ebpf_write_end_chart(); } - } - write_end_chart(); - if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->vfs.open_err); - } - } - write_end_chart(); - } + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_open"); + write_chart_dimension("calls", w->vfs.open_call); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->vfs.create_call); + if (em->mode < MODE_ENTRY) { + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_open_error"); + write_chart_dimension("calls", w->vfs.open_err); + ebpf_write_end_chart(); } - } - write_end_chart(); - if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - write_chart_dimension(w->name, w->vfs.create_err); - } + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_create"); + write_chart_dimension("calls", w->vfs.create_call); + ebpf_write_end_chart(); + + if (em->mode < MODE_ENTRY) { + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_create_error"); + write_chart_dimension("calls", w->vfs.create_err); + ebpf_write_end_chart(); } - write_end_chart(); } } @@ -1451,7 +1445,7 @@ static void ebpf_create_specific_vfs_charts(char *type, ebpf_module_t *em) ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); - ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls for <code>vfs_fsync</code>", + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls to vfs_fsync.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_FSYNC_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5507, ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC], @@ -1465,7 +1459,7 @@ static void ebpf_create_specific_vfs_charts(char *type, ebpf_module_t *em) 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); } - ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls for <code>vfs_open</code>", + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls to vfs_open.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_OPEN_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5509, ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN], @@ -1479,7 +1473,7 @@ static void ebpf_create_specific_vfs_charts(char *type, ebpf_module_t *em) 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); } - ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls for <code>vfs_create</code>", + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls to vfs_create.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_CREATE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5511, ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], @@ -1504,76 +1498,76 @@ static void ebpf_create_specific_vfs_charts(char *type, ebpf_module_t *em) */ static void ebpf_obsolete_specific_vfs_charts(char *type, ebpf_module_t *em) { - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_DELETED, "Files deleted", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_DELETED, "", "Files deleted", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_UNLINK_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5500, em->update_every); - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, "Write to disk", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, "", "Write to disk", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_WRITE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5501, em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, "Fails to write", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, "", "Fails to write", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_WRITE_ERROR_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5502, em->update_every); } - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, "Read from disk", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, "", "Read from disk", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_READ_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5503, em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, "Fails to read", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, "", "Fails to read", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_READ_ERROR_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5504, em->update_every); } - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, "Bytes written on disk", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, "", "Bytes written on disk", EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_WRITE_BYTES_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5505, em->update_every); - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, "Bytes read from disk", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, "", "Bytes read from disk", EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_READ_BYTES_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5506, em->update_every); - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls for <code>vfs_fsync</code>", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, "", "Calls to vfs_fsync.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_FSYNC_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5507, em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, "Sync error", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, "", "Sync error", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_FSYNC_ERROR_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5508, em->update_every); } - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls for <code>vfs_open</code>", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_OPEN, "", "Calls to vfs_open.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_OPEN_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5509, em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, "Open error", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, "", "Open error", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_OPEN_ERROR_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5510, em->update_every); } - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls for <code>vfs_create</code>", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_CREATE, "", "Calls to vfs_create.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_CREATE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5511, em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, "Create error", + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, "", "Create error", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_CREATE_ERROR_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5512, em->update_every); @@ -1590,78 +1584,78 @@ static void ebpf_obsolete_specific_vfs_charts(char *type, ebpf_module_t *em) */ static void ebpf_send_specific_vfs_data(char *type, netdata_publish_vfs_t *values, ebpf_module_t *em) { - write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_DELETED); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_DELETED, ""); write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK].name, (long long)values->unlink_call); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, ""); write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].name, (long long)values->write_call + (long long)values->writev_call); - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, ""); write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].name, (long long)values->write_err + (long long)values->writev_err); - write_end_chart(); + ebpf_write_end_chart(); } - write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, ""); write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].name, (long long)values->read_call + (long long)values->readv_call); - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, ""); write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].name, (long long)values->read_err + (long long)values->readv_err); - write_end_chart(); + ebpf_write_end_chart(); } - write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, ""); write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].name, (long long)values->write_bytes + (long long)values->writev_bytes); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, ""); write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].name, (long long)values->read_bytes + (long long)values->readv_bytes); - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, ""); write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC].name, (long long)values->fsync_call); - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, ""); write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC].name, (long long)values->fsync_err); - write_end_chart(); + ebpf_write_end_chart(); } - write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN, ""); write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN].name, (long long)values->open_call); - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, ""); write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN].name, (long long)values->open_err); - write_end_chart(); + ebpf_write_end_chart(); } - write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE, ""); write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE].name, (long long)values->create_call); - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR); + ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, ""); write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE].name, (long long)values->create_err); - write_end_chart(); + ebpf_write_end_chart(); } } @@ -1722,7 +1716,7 @@ static void ebpf_create_systemd_vfs_charts(ebpf_module_t *em) ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_READ_BYTES_CONTEXT, NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); - ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls to <code>vfs_fsync</code>", + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls to vfs_fsync.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, 20072, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_FSYNC_CONTEXT, @@ -1735,7 +1729,7 @@ static void ebpf_create_systemd_vfs_charts(ebpf_module_t *em) ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_FSYNC_ERROR_CONTEXT, NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); } - ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls to <code>vfs_open</code>", + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls to vfs_open.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, 20074, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_OPEN_CONTEXT, @@ -1749,7 +1743,7 @@ static void ebpf_create_systemd_vfs_charts(ebpf_module_t *em) NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); } - ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls to <code>vfs_create</code>", + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls to vfs_create.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, 20076, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_CREATE_CONTEXT, @@ -1774,125 +1768,124 @@ static void ebpf_create_systemd_vfs_charts(ebpf_module_t *em) static void ebpf_send_systemd_vfs_charts(ebpf_module_t *em) { ebpf_cgroup_target_t *ect; - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_DELETED); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_DELETED, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.unlink_call); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.write_call + ect->publish_systemd_vfs.writev_call); } } - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.write_err + ect->publish_systemd_vfs.writev_err); } } - write_end_chart(); + ebpf_write_end_chart(); } - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.read_call + ect->publish_systemd_vfs.readv_call); } } - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.read_err + ect->publish_systemd_vfs.readv_err); } } - write_end_chart(); + ebpf_write_end_chart(); } - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.write_bytes + ect->publish_systemd_vfs.writev_bytes); } } - write_end_chart(); + ebpf_write_end_chart(); - - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_BYTES); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.read_bytes + ect->publish_systemd_vfs.readv_bytes); } } - write_end_chart(); + ebpf_write_end_chart(); - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.fsync_call); } } - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.fsync_err); } } - write_end_chart(); + ebpf_write_end_chart(); } - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.open_call); } } - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.open_err); } } - write_end_chart(); + ebpf_write_end_chart(); } - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.create_call); } } - write_end_chart(); + ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR); + ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, ""); for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.create_err); } } - write_end_chart(); + ebpf_write_end_chart(); } } @@ -2098,7 +2091,7 @@ static void ebpf_create_global_charts(ebpf_module_t *em) ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, NETDATA_VFS_FSYNC, - "Calls for <code>vfs_fsync</code>", + "Calls to vfs_fsync.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NULL, @@ -2124,7 +2117,7 @@ static void ebpf_create_global_charts(ebpf_module_t *em) ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, NETDATA_VFS_OPEN, - "Calls for <code>vfs_open</code>", + "Calls to vfs_open.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NULL, @@ -2150,7 +2143,7 @@ static void ebpf_create_global_charts(ebpf_module_t *em) ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, NETDATA_VFS_CREATE, - "Calls for <code>vfs_create</code>", + "Calls to vfs_create.", EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, NULL, @@ -2188,127 +2181,219 @@ static void ebpf_create_global_charts(ebpf_module_t *em) void ebpf_vfs_create_apps_charts(struct ebpf_module *em, void *ptr) { struct ebpf_target *root = ptr; + struct ebpf_target *w; + int order = 20275; + int update_every = em->update_every; + for (w = root; w; w = w->next) { + if (unlikely(!w->exposed)) + continue; - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_DELETED, - "Files deleted", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20065, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, - "Write to disk", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20066, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_unlink", + "Files deleted.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_unlink", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_write", + "Write to disk.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_write", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_write_error", + "Fails to write.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_write_error", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + } - if (em->mode < MODE_ENTRY) { - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, - "Fails to write", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20067, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); - } - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_CALLS, - "Read from disk", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20068, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_read", + "Read from disk.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_read", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_read_error", + "Fails to read.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_read_error", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + } - if (em->mode < MODE_ENTRY) { - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, - "Fails to read", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20069, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); - } - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, - "Bytes written on disk", EBPF_COMMON_DIMENSION_BYTES, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20070, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_BYTES, - "Bytes read from disk", EBPF_COMMON_DIMENSION_BYTES, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20071, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_FSYNC, - "Calls for <code>vfs_fsync</code>", EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20072, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_write_bytes", + "Bytes written on disk.", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_write_bytes", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION writes '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_read_bytes", + "Bytes read from disk.", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_read_bytes", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION reads '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_fsync", + "Calls to vfs_fsync.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_fsync", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_fsync_error", + "Fails to sync.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_fsync_error", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + } - if (em->mode < MODE_ENTRY) { - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, - "Sync error", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20073, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); - } - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_OPEN, - "Calls for <code>vfs_open</code>", EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20074, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_open", + "Calls to vfs_open.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_open", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_open_error", + "Fails to open.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_open_error", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + } - if (em->mode < MODE_ENTRY) { - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, - "Open error", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20075, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); - } - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_CREATE, - "Calls for <code>vfs_create</code>", EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20076, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_create", + "Calls to vfs_create.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_create", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_cmd(NETDATA_APP_FAMILY, + w->clean_name, + "_ebpf_call_vfs_create_error", + "Fails to create a file.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + "app.ebpf_call_vfs_create_error", + order++, + update_every, + NETDATA_EBPF_MODULE_NAME_VFS); + ebpf_create_chart_labels("app_group", w->name, 1); + ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + } - if (em->mode < MODE_ENTRY) { - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, - "Create error", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20077, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + w->charts_created |= 1<<EBPF_MODULE_VFS_IDX; } em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; diff --git a/collectors/ebpf.plugin/integrations/ebpf_cachestat.md b/collectors/ebpf.plugin/integrations/ebpf_cachestat.md index 3f2d2f57d..5bf0a3774 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_cachestat.md +++ b/collectors/ebpf.plugin/integrations/ebpf_cachestat.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF Cachestat" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -74,16 +75,20 @@ Metrics: These Metrics show grouped information per apps group. -This scope has no labels. +Labels: + +| Label | Description | +|:-----------|:----------------| +| app_group | The name of the group defined in the configuration. | Metrics: | Metric | Dimensions | Unit | |:------|:----------|:----| -| apps.cachestat_ratio | a dimension per app group | % | -| apps.cachestat_dirties | a dimension per app group | page/s | -| apps.cachestat_hits | a dimension per app group | hits/s | -| apps.cachestat_misses | a dimension per app group | misses/s | +| app.ebpf_cachestat_hit_ratio | ratio | % | +| app.ebpf_cachestat_dirty_pages | pages | page/s | +| app.ebpf_cachestat_access | hits | hits/s | +| app.ebpf_cachestat_misses | misses | misses/s | ### Per cgroup @@ -156,15 +161,15 @@ All options are defined inside section `[global]`. | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| apps | Enable or disable integration with apps.plugin | no | False | -| cgroups | Enable or disable integration with cgroup.plugin | no | False | -| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | False | -| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | False | -| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | False | -| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| apps | Enable or disable integration with apps.plugin | no | no | +| cgroups | Enable or disable integration with cgroup.plugin | no | no | +| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no | +| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no | +| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no | +| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_dcstat.md b/collectors/ebpf.plugin/integrations/ebpf_dcstat.md index 6d9abea2c..4c5719026 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_dcstat.md +++ b/collectors/ebpf.plugin/integrations/ebpf_dcstat.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF DCstat" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -59,16 +60,20 @@ The scope defines the instance that the metric belongs to. An instance is unique These Metrics show grouped information per apps group. -This scope has no labels. +Labels: + +| Label | Description | +|:-----------|:----------------| +| app_group | The name of the group defined in the configuration. | Metrics: | Metric | Dimensions | Unit | |:------|:----------|:----| -| apps.dc_ratio | a dimension per app group | % | -| apps.dc_reference | a dimension per app group | files | -| apps.dc_not_cache | a dimension per app group | files | -| apps.dc_not_found | a dimension per app group | files | +| app.ebpf_dc_ratio | ratio | % | +| app.ebpf_dc_reference | files | files | +| app.ebpf_dc_not_cache | files | files | +| app.ebpf_dc_not_found | files | files | ### Per filesystem @@ -154,15 +159,15 @@ All options are defined inside section `[global]`. | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| apps | Enable or disable integration with apps.plugin | no | False | -| cgroups | Enable or disable integration with cgroup.plugin | no | False | -| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | False | -| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | False | -| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | False | -| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| apps | Enable or disable integration with apps.plugin | no | no | +| cgroups | Enable or disable integration with cgroup.plugin | no | no | +| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no | +| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no | +| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no | +| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_disk.md b/collectors/ebpf.plugin/integrations/ebpf_disk.md index 12eafce86..557da125d 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_disk.md +++ b/collectors/ebpf.plugin/integrations/ebpf_disk.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF Disk" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -124,9 +125,9 @@ All options are defined inside section `[global]`. | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_filedescriptor.md b/collectors/ebpf.plugin/integrations/ebpf_filedescriptor.md index 0a749ec31..23f5bd26e 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_filedescriptor.md +++ b/collectors/ebpf.plugin/integrations/ebpf_filedescriptor.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF Filedescriptor" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -91,16 +92,20 @@ Metrics: These Metrics show grouped information per apps group. -This scope has no labels. +Labels: + +| Label | Description | +|:-----------|:----------------| +| app_group | The name of the group defined in the configuration. | Metrics: | Metric | Dimensions | Unit | |:------|:----------|:----| -| apps.file_open | a dimension per app group | calls/s | -| apps.file_open_error | a dimension per app group | calls/s | -| apps.file_closed | a dimension per app group | calls/s | -| apps.file_close_error | a dimension per app group | calls/s | +| app.ebpf_file_open | calls | calls/s | +| app.ebpf_file_open_error | calls | calls/s | +| app.ebpf_file_closed | calls | calls/s | +| app.ebpf_file_close_error | calls | calls/s | @@ -154,15 +159,15 @@ All options are defined inside section `[global]`. | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| apps | Enable or disable integration with apps.plugin | no | False | -| cgroups | Enable or disable integration with cgroup.plugin | no | False | -| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | False | -| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | False | -| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | False | -| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| apps | Enable or disable integration with apps.plugin | no | no | +| cgroups | Enable or disable integration with cgroup.plugin | no | no | +| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no | +| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no | +| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no | +| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_filesystem.md b/collectors/ebpf.plugin/integrations/ebpf_filesystem.md index b6050657b..7a1bb832b 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_filesystem.md +++ b/collectors/ebpf.plugin/integrations/ebpf_filesystem.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF Filesystem" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -145,14 +146,14 @@ This configuration file have two different sections. The `[global]` overwrites d | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | -| btrfsdist | Enable or disable latency monitoring for functions associated with btrfs filesystem. | yes | False | -| ext4dist | Enable or disable latency monitoring for functions associated with ext4 filesystem. | yes | False | -| nfsdist | Enable or disable latency monitoring for functions associated with nfs filesystem. | yes | False | -| xfsdist | Enable or disable latency monitoring for functions associated with xfs filesystem. | yes | False | -| zfsdist | Enable or disable latency monitoring for functions associated with zfs filesystem. | yes | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | +| btrfsdist | Enable or disable latency monitoring for functions associated with btrfs filesystem. | yes | no | +| ext4dist | Enable or disable latency monitoring for functions associated with ext4 filesystem. | yes | no | +| nfsdist | Enable or disable latency monitoring for functions associated with nfs filesystem. | yes | no | +| xfsdist | Enable or disable latency monitoring for functions associated with xfs filesystem. | yes | no | +| zfsdist | Enable or disable latency monitoring for functions associated with zfs filesystem. | yes | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_hardirq.md b/collectors/ebpf.plugin/integrations/ebpf_hardirq.md index cd89cd589..f9b529624 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_hardirq.md +++ b/collectors/ebpf.plugin/integrations/ebpf_hardirq.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF Hardirq" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -124,9 +125,9 @@ All options are defined inside section `[global]`. | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_mdflush.md b/collectors/ebpf.plugin/integrations/ebpf_mdflush.md index 51df30b47..0081b7d83 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_mdflush.md +++ b/collectors/ebpf.plugin/integrations/ebpf_mdflush.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF MDflush" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -119,9 +120,9 @@ All options are defined inside section `[global]`. | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_mount.md b/collectors/ebpf.plugin/integrations/ebpf_mount.md index 063ffcbad..d19e57809 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_mount.md +++ b/collectors/ebpf.plugin/integrations/ebpf_mount.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF Mount" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -125,11 +126,11 @@ All options are defined inside section `[global]`. | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | False | -| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no | +| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_oomkill.md b/collectors/ebpf.plugin/integrations/ebpf_oomkill.md index 372921387..897cddfac 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_oomkill.md +++ b/collectors/ebpf.plugin/integrations/ebpf_oomkill.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF OOMkill" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -72,13 +73,17 @@ Metrics: These metrics show cgroup/service that reached OOM. -This scope has no labels. +Labels: + +| Label | Description | +|:-----------|:----------------| +| app_group | The name of the group defined in the configuration. | Metrics: | Metric | Dimensions | Unit | |:------|:----------|:----| -| apps.oomkills | a dimension per app group | kills | +| app.oomkill | kills | kills | diff --git a/collectors/ebpf.plugin/integrations/ebpf_process.md b/collectors/ebpf.plugin/integrations/ebpf_process.md index 3bd92a06e..109890139 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_process.md +++ b/collectors/ebpf.plugin/integrations/ebpf_process.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF Process" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/ebpf.plugin/integrations/ebpf_processes.md b/collectors/ebpf.plugin/integrations/ebpf_processes.md index 6d3c0d40e..62542359a 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_processes.md +++ b/collectors/ebpf.plugin/integrations/ebpf_processes.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF Processes" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -74,17 +75,21 @@ Metrics: These Metrics show grouped information per apps group. -This scope has no labels. +Labels: + +| Label | Description | +|:-----------|:----------------| +| app_group | The name of the group defined in the configuration. | Metrics: | Metric | Dimensions | Unit | |:------|:----------|:----| -| apps.process_create | a dimension per app group | calls/s | -| apps.thread_create | a dimension per app group | calls/s | -| apps.task_exit | a dimension per app group | calls/s | -| apps.task_close | a dimension per app group | calls/s | -| apps.task_error | a dimension per app group | calls/s | +| app.process_create | calls | calls/s | +| app.thread_create | call | calls/s | +| app.task_exit | call | calls/s | +| app.task_close | call | calls/s | +| app.task_error | app | calls/s | ### Per cgroup @@ -164,15 +169,15 @@ All options are defined inside section `[global]`. | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| apps | Enable or disable integration with apps.plugin | no | False | -| cgroups | Enable or disable integration with cgroup.plugin | no | False | -| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | False | -| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | False | -| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). This plugin will always try to attach a tracepoint, so option here will impact only function used to monitor task (thread and process) creation. | trampoline | False | -| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| apps | Enable or disable integration with apps.plugin | no | no | +| cgroups | Enable or disable integration with cgroup.plugin | no | no | +| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no | +| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no | +| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). This plugin will always try to attach a tracepoint, so option here will impact only function used to monitor task (thread and process) creation. | trampoline | no | +| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_shm.md b/collectors/ebpf.plugin/integrations/ebpf_shm.md index 2cfcbeb16..ffa05c770 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_shm.md +++ b/collectors/ebpf.plugin/integrations/ebpf_shm.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF SHM" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -78,16 +79,20 @@ Metrics: These Metrics show grouped information per apps group. -This scope has no labels. +Labels: + +| Label | Description | +|:-----------|:----------------| +| app_group | The name of the group defined in the configuration. | Metrics: | Metric | Dimensions | Unit | |:------|:----------|:----| -| apps.shmget_call | a dimension per app group | calls/s | -| apps.shmat_call | a dimension per app group | calls/s | -| apps.shmdt_call | a dimension per app group | calls/s | -| apps.shmctl_call | a dimension per app group | calls/s | +| app.ebpf_shmget_call | calls | calls/s | +| app.ebpf_shmat_call | calls | calls/s | +| app.ebpf_shmdt_call | calls | calls/s | +| app.ebpf_shmctl_call | calls | calls/s | ### Per eBPF SHM instance @@ -158,19 +163,19 @@ This configuration file have two different sections. The `[global]` overwrites a | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| apps | Enable or disable integration with apps.plugin | no | False | -| cgroups | Enable or disable integration with cgroup.plugin | no | False | -| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | False | -| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | False | -| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | False | -| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | -| shmget | Enable or disable monitoring for syscall `shmget` | yes | False | -| shmat | Enable or disable monitoring for syscall `shmat` | yes | False | -| shmdt | Enable or disable monitoring for syscall `shmdt` | yes | False | -| shmctl | Enable or disable monitoring for syscall `shmctl` | yes | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| apps | Enable or disable integration with apps.plugin | no | no | +| cgroups | Enable or disable integration with cgroup.plugin | no | no | +| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no | +| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no | +| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no | +| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | +| shmget | Enable or disable monitoring for syscall `shmget` | yes | no | +| shmat | Enable or disable monitoring for syscall `shmat` | yes | no | +| shmdt | Enable or disable monitoring for syscall `shmdt` | yes | no | +| shmctl | Enable or disable monitoring for syscall `shmctl` | yes | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_socket.md b/collectors/ebpf.plugin/integrations/ebpf_socket.md index 3d621f439..dc7a7d07b 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_socket.md +++ b/collectors/ebpf.plugin/integrations/ebpf_socket.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF Socket" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -79,22 +80,25 @@ Metrics: These metrics show grouped information per apps group. -This scope has no labels. +Labels: + +| Label | Description | +|:-----------|:----------------| +| app_group | The name of the group defined in the configuration. | Metrics: | Metric | Dimensions | Unit | |:------|:----------|:----| -| apps.outbound_conn_v4 | a dimension per app group | connections/s | -| apps.outbound_conn_v6 | a dimension per app group | connections/s | -| apps.total_bandwidth_sent | a dimension per app group | kilobits/s | -| apps.total_bandwidth_recv | a dimension per app group | kilobits/s | -| apps.bandwidth_tcp_send | a dimension per app group | calls/s | -| apps.bandwidth_tcp_recv | a dimension per app group | calls/s | -| apps.bandwidth_tcp_retransmit | a dimension per app group | calls/s | -| apps.bandwidth_udp_send | a dimension per app group | calls/s | -| apps.bandwidth_udp_recv | a dimension per app group | calls/s | -| services.net_conn_ipv4 | a dimension per systemd service | connections/s | +| app.ebpf_call_tcp_v4_connection | connections | connections/s | +| app.app.ebpf_call_tcp_v6_connection | connections | connections/s | +| app.ebpf_sock_bytes_sent | bandwidth | kilobits/s | +| app.ebpf_sock_bytes_received | bandwidth | kilobits/s | +| app.ebpf_call_tcp_sendmsg | calls | calls/s | +| app.ebpf_call_tcp_cleanup_rbuf | calls | calls/s | +| app.ebpf_call_tcp_retransmit | calls | calls/s | +| app.ebpf_call_udp_sendmsg | calls | calls/s | +| app.ebpf_call_udp_recvmsg | calls | calls/s | ### Per cgroup @@ -176,18 +180,18 @@ All options are defined inside section `[global]`. Options inside `network conne | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| apps | Enable or disable integration with apps.plugin | no | False | -| cgroups | Enable or disable integration with cgroup.plugin | no | False | -| bandwidth table size | Number of elements stored inside hash tables used to monitor calls per PID. | 16384 | False | -| ipv4 connection table size | Number of elements stored inside hash tables used to monitor calls per IPV4 connections. | 16384 | False | -| ipv6 connection table size | Number of elements stored inside hash tables used to monitor calls per IPV6 connections. | 16384 | False | -| udp connection table size | Number of temporary elements stored inside hash tables used to monitor UDP connections. | 4096 | False | -| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | False | -| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | False | -| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| apps | Enable or disable integration with apps.plugin | no | no | +| cgroups | Enable or disable integration with cgroup.plugin | no | no | +| bandwidth table size | Number of elements stored inside hash tables used to monitor calls per PID. | 16384 | no | +| ipv4 connection table size | Number of elements stored inside hash tables used to monitor calls per IPV4 connections. | 16384 | no | +| ipv6 connection table size | Number of elements stored inside hash tables used to monitor calls per IPV6 connections. | 16384 | no | +| udp connection table size | Number of temporary elements stored inside hash tables used to monitor UDP connections. | 4096 | no | +| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no | +| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no | +| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_softirq.md b/collectors/ebpf.plugin/integrations/ebpf_softirq.md index 3a061368c..6a4312c6e 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_softirq.md +++ b/collectors/ebpf.plugin/integrations/ebpf_softirq.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF SoftIRQ" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -124,9 +125,9 @@ All options are defined inside section `[global]`. | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_swap.md b/collectors/ebpf.plugin/integrations/ebpf_swap.md index 502cd5bce..ce2423f8d 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_swap.md +++ b/collectors/ebpf.plugin/integrations/ebpf_swap.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF SWAP" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -74,14 +75,18 @@ Metrics: These Metrics show grouped information per apps group. -This scope has no labels. +Labels: + +| Label | Description | +|:-----------|:----------------| +| app_group | The name of the group defined in the configuration. | Metrics: | Metric | Dimensions | Unit | |:------|:----------|:----| -| apps.swap_read_call | a dimension per app group | calls/s | -| apps.swap_write_call | a dimension per app group | calls/s | +| app.ebpf_call_swap_readpage | a dimension per app group | calls/s | +| app.ebpf_call_swap_writepage | a dimension per app group | calls/s | ### Per eBPF SWAP instance @@ -147,15 +152,15 @@ All options are defined inside section `[global]`. | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| apps | Enable or disable integration with apps.plugin | no | False | -| cgroups | Enable or disable integration with cgroup.plugin | no | False | -| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | False | -| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | False | -| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | False | -| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| apps | Enable or disable integration with apps.plugin | no | no | +| cgroups | Enable or disable integration with cgroup.plugin | no | no | +| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no | +| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no | +| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no | +| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_sync.md b/collectors/ebpf.plugin/integrations/ebpf_sync.md index 024c3e30e..6f6c246a7 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_sync.md +++ b/collectors/ebpf.plugin/integrations/ebpf_sync.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF Sync" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -132,21 +133,21 @@ This configuration file have two different sections. The `[global]` overwrites a | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| apps | Enable or disable integration with apps.plugin | no | False | -| cgroups | Enable or disable integration with cgroup.plugin | no | False | -| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | False | -| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | False | -| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | False | -| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | -| sync | Enable or disable monitoring for syscall `sync` | yes | False | -| msync | Enable or disable monitoring for syscall `msync` | yes | False | -| fsync | Enable or disable monitoring for syscall `fsync` | yes | False | -| fdatasync | Enable or disable monitoring for syscall `fdatasync` | yes | False | -| syncfs | Enable or disable monitoring for syscall `syncfs` | yes | False | -| sync_file_range | Enable or disable monitoring for syscall `sync_file_range` | yes | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| apps | Enable or disable integration with apps.plugin | no | no | +| cgroups | Enable or disable integration with cgroup.plugin | no | no | +| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no | +| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no | +| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no | +| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | +| sync | Enable or disable monitoring for syscall `sync` | yes | no | +| msync | Enable or disable monitoring for syscall `msync` | yes | no | +| fsync | Enable or disable monitoring for syscall `fsync` | yes | no | +| fdatasync | Enable or disable monitoring for syscall `fdatasync` | yes | no | +| syncfs | Enable or disable monitoring for syscall `syncfs` | yes | no | +| sync_file_range | Enable or disable monitoring for syscall `sync_file_range` | yes | no | </details> diff --git a/collectors/ebpf.plugin/integrations/ebpf_vfs.md b/collectors/ebpf.plugin/integrations/ebpf_vfs.md index aa8d82caa..4b824e975 100644 --- a/collectors/ebpf.plugin/integrations/ebpf_vfs.md +++ b/collectors/ebpf.plugin/integrations/ebpf_vfs.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugi sidebar_label: "eBPF VFS" learn_status: "Published" learn_rel_path: "Data Collection/eBPF" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -117,25 +118,29 @@ Metrics: These Metrics show grouped information per apps group. -This scope has no labels. +Labels: + +| Label | Description | +|:-----------|:----------------| +| app_group | The name of the group defined in the configuration. | Metrics: | Metric | Dimensions | Unit | |:------|:----------|:----| -| apps.file_deleted | a dimension per app group | calls/s | -| apps.vfs_write_call | a dimension per app group | calls/s | -| apps.vfs_write_error | a dimension per app group | calls/s | -| apps.vfs_read_call | a dimension per app group | calls/s | -| apps.vfs_read_error | a dimension per app group | calls/s | -| apps.vfs_write_bytes | a dimension per app group | bytes/s | -| apps.vfs_read_bytes | a dimension per app group | bytes/s | -| apps.vfs_fsync | a dimension per app group | calls/s | -| apps.vfs_fsync_error | a dimension per app group | calls/s | -| apps.vfs_open | a dimension per app group | calls/s | -| apps.vfs_open_error | a dimension per app group | calls/s | -| apps.vfs_create | a dimension per app group | calls/s | -| apps.vfs_create_error | a dimension per app group | calls/s | +| app.ebpf_call_vfs_unlink | calls | calls/s | +| app.ebpf_call_vfs_write | calls | calls/s | +| app.ebpf_call_vfs_write_error | calls | calls/s | +| app.ebpf_call_vfs_read | calls | calls/s | +| app.ebpf_call_vfs_read_error | calls | calls/s | +| app.ebpf_call_vfs_write_bytes | writes | bytes/s | +| app.ebpf_call_vfs_read_bytes | reads | bytes/s | +| app.ebpf_call_vfs_fsync | calls | calls/s | +| app.ebpf_call_vfs_fsync_error | calls | calls/s | +| app.ebpf_call_vfs_open | calls | calls/s | +| app.ebpf_call_vfs_open_error | calls | calls/s | +| app.ebpf_call_vfs_create | calls | calls/s | +| app.ebpf_call_vfs_create_error | calls | calls/s | @@ -189,15 +194,15 @@ All options are defined inside section `[global]`. | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 5 | False | -| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | False | -| apps | Enable or disable integration with apps.plugin | no | False | -| cgroups | Enable or disable integration with cgroup.plugin | no | False | -| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | False | -| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | False | -| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | False | -| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | False | -| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | False | +| update every | Data collection frequency. | 5 | no | +| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no | +| apps | Enable or disable integration with apps.plugin | no | no | +| cgroups | Enable or disable integration with cgroup.plugin | no | no | +| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no | +| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no | +| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no | +| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no | +| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no | </details> diff --git a/collectors/ebpf.plugin/metadata.yaml b/collectors/ebpf.plugin/metadata.yaml index 232326778..97b5df389 100644 --- a/collectors/ebpf.plugin/metadata.yaml +++ b/collectors/ebpf.plugin/metadata.yaml @@ -196,32 +196,34 @@ modules: - name: close - name: apps description: "These Metrics show grouped information per apps group." - labels: [] + labels: + - name: app_group + description: The name of the group defined in the configuration. metrics: - - name: apps.file_open + - name: app.ebpf_file_open description: Number of open files unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.file_open_error + - name: calls + - name: app.ebpf_file_open_error description: Fails to open files unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.file_closed + - name: calls + - name: app.ebpf_file_closed description: Files closed unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.file_close_error + - name: calls + - name: app.ebpf_file_close_error description: Fails to close files unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group + - name: calls - meta: plugin_name: ebpf.plugin module_name: processes @@ -379,38 +381,40 @@ modules: - name: task - name: apps description: "These Metrics show grouped information per apps group." - labels: [] + labels: + - name: app_group + description: The name of the group defined in the configuration. metrics: - - name: apps.process_create + - name: app.process_create description: Process started unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.thread_create + - name: calls + - name: app.thread_create description: Threads started unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.task_exit + - name: call + - name: app.task_exit description: Tasks starts exit process unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.task_close + - name: call + - name: app.task_close description: Tasks closed unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.task_error + - name: call + - name: app.task_error description: Errors to create process or threads unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group + - name: app - name: cgroup description: "These Metrics show grouped information per cgroup/service." labels: [] @@ -841,32 +845,34 @@ modules: - name: miss - name: apps description: "These Metrics show grouped information per apps group." - labels: [] + labels: + - name: app_group + description: The name of the group defined in the configuration. metrics: - - name: apps.cachestat_ratio + - name: app.ebpf_cachestat_hit_ratio description: Hit ratio unit: "%" chart_type: line dimensions: - - name: a dimension per app group - - name: apps.cachestat_dirties + - name: ratio + - name: app.ebpf_cachestat_dirty_pages description: Number of dirty pages unit: "page/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.cachestat_hits + - name: pages + - name: app.ebpf_cachestat_access description: Number of accessed files unit: "hits/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.cachestat_misses + - name: hits + - name: app.ebpf_cachestat_misses description: Files out of page cache unit: "misses/s" chart_type: stacked dimensions: - - name: a dimension per app group + - name: misses - name: cgroup description: "" labels: [] @@ -1076,27 +1082,27 @@ modules: labels: [] metrics: - name: mem.file_sync - description: Monitor calls for <code>fsync(2)</code> and <code>fdatasync(2)</code>. + description: Monitor calls to fsync(2) and fdatasync(2). unit: "calls/s" chart_type: stacked dimensions: - name: fsync - name: fdatasync - name: mem.meory_map - description: Monitor calls for <code>msync(2)</code>. + description: Monitor calls to msync(2). unit: "calls/s" chart_type: line dimensions: - name: msync - name: mem.sync - description: Monitor calls for <code>sync(2)</code> and <code>syncfs(2)</code>. + description: Monitor calls to sync(2) and syncfs(2). unit: "calls/s" chart_type: line dimensions: - name: sync - name: syncfs - name: mem.file_segment - description: Monitor calls for <code>sync_file_range(2)</code>. + description: Monitor calls to sync_file_range(2). unit: "calls/s" chart_type: line dimensions: @@ -1333,41 +1339,43 @@ modules: labels: [] metrics: - name: cgroup.swap_read - description: Calls to function <code>swap_readpage</code>. + description: Calls to function swap_readpage. unit: "calls/s" chart_type: line dimensions: - name: read - name: cgroup.swap_write - description: Calls to function <code>swap_writepage</code>. + description: Calls to function swap_writepage. unit: "calls/s" chart_type: line dimensions: - name: write - name: services.swap_read - description: Calls to <code>swap_readpage</code>. + description: Calls to swap_readpage. unit: "calls/s" chart_type: stacked dimensions: - name: a dimension per systemd service - name: services.swap_write - description: Calls to function <code>swap_writepage</code>. + description: Calls to function swap_writepage. unit: "calls/s" chart_type: stacked dimensions: - name: a dimension per systemd service - name: apps description: "These Metrics show grouped information per apps group." - labels: [] + labels: + - name: app_group + description: The name of the group defined in the configuration. metrics: - - name: apps.swap_read_call - description: Calls to function <code>swap_readpage</code>. + - name: app.ebpf_call_swap_readpage + description: Calls to function swap_readpage. unit: "calls/s" chart_type: stacked dimensions: - name: a dimension per app group - - name: apps.swap_write_call - description: Calls to function <code>swap_writepage</code>. + - name: app.ebpf_call_swap_writepage + description: Calls to function swap_writepage. unit: "calls/s" chart_type: stacked dimensions: @@ -1501,14 +1509,16 @@ modules: - name: a dimension per systemd service - name: apps description: "These metrics show cgroup/service that reached OOM." - labels: [] + labels: + - name: app_group + description: The name of the group defined in the configuration. metrics: - - name: apps.oomkills + - name: app.oomkill description: OOM kills unit: "kills" chart_type: stacked dimensions: - - name: a dimension per app group + - name: kills - meta: plugin_name: ebpf.plugin module_name: socket @@ -1713,68 +1723,64 @@ modules: - name: send - name: apps description: "These metrics show grouped information per apps group." - labels: [] + labels: + - name: app_group + description: The name of the group defined in the configuration. metrics: - - name: apps.outbound_conn_v4 + - name: app.ebpf_call_tcp_v4_connection description: Calls to tcp_v4_connection unit: "connections/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.outbound_conn_v6 + - name: connections + - name: app.app.ebpf_call_tcp_v6_connection description: Calls to tcp_v6_connection unit: "connections/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.total_bandwidth_sent + - name: connections + - name: app.ebpf_sock_bytes_sent description: Bytes sent unit: "kilobits/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.total_bandwidth_recv + - name: bandwidth + - name: app.ebpf_sock_bytes_received description: bytes received unit: "kilobits/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.bandwidth_tcp_send + - name: bandwidth + - name: app.ebpf_call_tcp_sendmsg description: Calls for tcp_sendmsg unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.bandwidth_tcp_recv + - name: calls + - name: app.ebpf_call_tcp_cleanup_rbuf description: Calls for tcp_cleanup_rbuf unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.bandwidth_tcp_retransmit + - name: calls + - name: app.ebpf_call_tcp_retransmit description: Calls for tcp_retransmit unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.bandwidth_udp_send + - name: calls + - name: app.ebpf_call_udp_sendmsg description: Calls for udp_sendmsg unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.bandwidth_udp_recv + - name: calls + - name: app.ebpf_call_udp_recvmsg description: Calls for udp_recvmsg unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: services.net_conn_ipv4 - description: Calls to tcp_v4_connection - unit: "connections/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service + - name: calls - name: cgroup description: "" labels: [] @@ -2005,32 +2011,34 @@ modules: scopes: - name: apps description: "These Metrics show grouped information per apps group." - labels: [] + labels: + - name: app_group + description: The name of the group defined in the configuration. metrics: - - name: apps.dc_ratio + - name: app.ebpf_dc_ratio description: Percentage of files inside directory cache unit: "%" chart_type: line dimensions: - - name: a dimension per app group - - name: apps.dc_reference + - name: ratio + - name: app.ebpf_dc_reference description: Count file access unit: "files" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.dc_not_cache + - name: files + - name: app.ebpf_dc_not_cache description: Files not present inside directory cache unit: "files" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.dc_not_found + - name: files + - name: app.ebpf_dc_not_found description: Files not found unit: "files" chart_type: stacked dimensions: - - name: a dimension per app group + - name: files - name: filesystem description: "These metrics show total number of calls to functions inside kernel." labels: [] @@ -2409,81 +2417,83 @@ modules: labels: [] metrics: - name: cgroup.shmget - description: Calls to syscall <code>shmget(2)</code>. + description: Calls to syscall shmget(2). unit: "calls/s" chart_type: line dimensions: - name: get - name: cgroup.shmat - description: Calls to syscall <code>shmat(2)</code>. + description: Calls to syscall shmat(2). unit: "calls/s" chart_type: line dimensions: - name: at - name: cgroup.shmdt - description: Calls to syscall <code>shmdt(2)</code>. + description: Calls to syscall shmdt(2). unit: "calls/s" chart_type: line dimensions: - name: dt - name: cgroup.shmctl - description: Calls to syscall <code>shmctl(2)</code>. + description: Calls to syscall shmctl(2). unit: "calls/s" chart_type: line dimensions: - name: ctl - name: services.shmget - description: Calls to syscall <code>shmget(2)</code>. + description: Calls to syscall shmget(2). unit: "calls/s" chart_type: stacked dimensions: - name: a dimension per systemd service - name: services.shmat - description: Calls to syscall <code>shmat(2)</code>. + description: Calls to syscall shmat(2). unit: "calls/s" chart_type: stacked dimensions: - name: a dimension per systemd service - name: services.shmdt - description: Calls to syscall <code>shmdt(2)</code>. + description: Calls to syscall shmdt(2). unit: "calls/s" chart_type: stacked dimensions: - name: a dimension per systemd service - name: services.shmctl - description: Calls to syscall <code>shmctl(2)</code>. + description: Calls to syscall shmctl(2). unit: "calls/s" chart_type: stacked dimensions: - name: a dimension per systemd service - name: apps description: "These Metrics show grouped information per apps group." - labels: [] + labels: + - name: app_group + description: The name of the group defined in the configuration. metrics: - - name: apps.shmget_call - description: Calls to syscall <code>shmget(2)</code>. + - name: app.ebpf_shmget_call + description: Calls to syscall shmget(2). unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.shmat_call - description: Calls to syscall <code>shmat(2)</code>. + - name: calls + - name: app.ebpf_shmat_call + description: Calls to syscall shmat(2). unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.shmdt_call - description: Calls to syscall <code>shmdt(2)</code>. + - name: calls + - name: app.ebpf_shmdt_call + description: Calls to syscall shmdt(2). unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.shmctl_call - description: Calls to syscall <code>shmctl(2)</code>. + - name: calls + - name: app.ebpf_shmctl_call + description: Calls to syscall shmctl(2). unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group + - name: calls - name: global description: "These Metrics show number of calls for specified syscall." labels: [] @@ -2898,7 +2908,7 @@ modules: dimensions: - name: read - name: cgroup.vfs_fsync - description: Calls for <code>vfs_fsync</code> + description: Calls to vfs_fsync. unit: "calls/s" chart_type: line dimensions: @@ -2910,7 +2920,7 @@ modules: dimensions: - name: fsync - name: cgroup.vfs_open - description: Calls for <code>vfs_open</code> + description: Calls to vfs_open. unit: "calls/s" chart_type: line dimensions: @@ -2922,7 +2932,7 @@ modules: dimensions: - name: open - name: cgroup.vfs_create - description: Calls for <code>vfs_create</code> + description: Calls to vfs_create. unit: "calls/s" chart_type: line dimensions: @@ -2976,7 +2986,7 @@ modules: dimensions: - name: a dimension per systemd service - name: services.vfs_fsync - description: Calls to <code>vfs_fsync</code> + description: Calls to vfs_fsync. unit: "calls/s" chart_type: stacked dimensions: @@ -2988,7 +2998,7 @@ modules: dimensions: - name: a dimension per systemd service - name: services.vfs_open - description: Calls to <code>vfs_open</code> + description: Calls to vfs_open. unit: "calls/s" chart_type: stacked dimensions: @@ -3000,7 +3010,7 @@ modules: dimensions: - name: a dimension per systemd service - name: services.vfs_create - description: Calls to <code>vfs_create</code> + description: Calls to vfs_create. unit: "calls/s" chart_type: stacked dimensions: @@ -3043,7 +3053,7 @@ modules: - name: read - name: write - name: filesystem.vfs_fsync - description: Calls for <code>vfs_fsync</code> + description: Calls to vfs_fsync. unit: "calls/s" chart_type: line dimensions: @@ -3055,7 +3065,7 @@ modules: dimensions: - name: fsync - name: filesystem.vfs_open - description: Calls for <code>vfs_open</code> + description: Calls to vfs_open. unit: "calls/s" chart_type: line dimensions: @@ -3067,7 +3077,7 @@ modules: dimensions: - name: open - name: filesystem.vfs_create - description: Calls for <code>vfs_create</code> + description: Calls to vfs_create. unit: "calls/s" chart_type: line dimensions: @@ -3080,86 +3090,88 @@ modules: - name: create - name: apps description: "These Metrics show grouped information per apps group." - labels: [] + labels: + - name: app_group + description: The name of the group defined in the configuration. metrics: - - name: apps.file_deleted + - name: app.ebpf_call_vfs_unlink description: Files deleted unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.vfs_write_call + - name: calls + - name: app.ebpf_call_vfs_write description: Write to disk unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.vfs_write_error + - name: calls + - name: app.ebpf_call_vfs_write_error description: Fails to write unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.vfs_read_call + - name: calls + - name: app.ebpf_call_vfs_read description: Read from disk unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.vfs_read_error + - name: calls + - name: app.ebpf_call_vfs_read_error description: Fails to read unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.vfs_write_bytes + - name: calls + - name: app.ebpf_call_vfs_write_bytes description: Bytes written on disk unit: "bytes/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.vfs_read_bytes + - name: writes + - name: app.ebpf_call_vfs_read_bytes description: Bytes read on disk unit: "bytes/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.vfs_fsync - description: Calls for <code>vfs_fsync</code> + - name: reads + - name: app.ebpf_call_vfs_fsync + description: Calls to vfs_fsync. unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.vfs_fsync_error + - name: calls + - name: app.ebpf_call_vfs_fsync_error description: Sync error unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.vfs_open - description: Calls for <code>vfs_open</code> + - name: calls + - name: app.ebpf_call_vfs_open + description: Calls to vfs_open. unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.vfs_open_error + - name: calls + - name: app.ebpf_call_vfs_open_error description: Open error unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.vfs_create - description: Calls for <code>vfs_create</code> + - name: calls + - name: app.ebpf_call_vfs_create + description: Calls to vfs_create. unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group - - name: apps.vfs_create_error + - name: calls + - name: app.ebpf_call_vfs_create_error description: Create error unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per app group + - name: calls - meta: plugin_name: ebpf.plugin module_name: process diff --git a/collectors/freebsd.plugin/freebsd_devstat.c b/collectors/freebsd.plugin/freebsd_devstat.c index 65b8a2d5a..ca6048a16 100644 --- a/collectors/freebsd.plugin/freebsd_devstat.c +++ b/collectors/freebsd.plugin/freebsd_devstat.c @@ -89,21 +89,21 @@ static size_t disks_added = 0, disks_found = 0; static void disk_free(struct disk *dm) { if (likely(dm->st_io)) - rrdset_is_obsolete(dm->st_io); + rrdset_is_obsolete___safe_from_collector_thread(dm->st_io); if (likely(dm->st_ops)) - rrdset_is_obsolete(dm->st_ops); + rrdset_is_obsolete___safe_from_collector_thread(dm->st_ops); if (likely(dm->st_qops)) - rrdset_is_obsolete(dm->st_qops); + rrdset_is_obsolete___safe_from_collector_thread(dm->st_qops); if (likely(dm->st_util)) - rrdset_is_obsolete(dm->st_util); + rrdset_is_obsolete___safe_from_collector_thread(dm->st_util); if (likely(dm->st_iotime)) - rrdset_is_obsolete(dm->st_iotime); + rrdset_is_obsolete___safe_from_collector_thread(dm->st_iotime); if (likely(dm->st_await)) - rrdset_is_obsolete(dm->st_await); + rrdset_is_obsolete___safe_from_collector_thread(dm->st_await); if (likely(dm->st_avagsz)) - rrdset_is_obsolete(dm->st_avagsz); + rrdset_is_obsolete___safe_from_collector_thread(dm->st_avagsz); if (likely(dm->st_svctm)) - rrdset_is_obsolete(dm->st_svctm); + rrdset_is_obsolete___safe_from_collector_thread(dm->st_svctm); disks_added--; freez(dm->name); diff --git a/collectors/freebsd.plugin/freebsd_getifaddrs.c b/collectors/freebsd.plugin/freebsd_getifaddrs.c index 80a209105..36be68422 100644 --- a/collectors/freebsd.plugin/freebsd_getifaddrs.c +++ b/collectors/freebsd.plugin/freebsd_getifaddrs.c @@ -52,15 +52,15 @@ static size_t network_interfaces_added = 0, network_interfaces_found = 0; static void network_interface_free(struct cgroup_network_interface *ifm) { if (likely(ifm->st_bandwidth)) - rrdset_is_obsolete(ifm->st_bandwidth); + rrdset_is_obsolete___safe_from_collector_thread(ifm->st_bandwidth); if (likely(ifm->st_packets)) - rrdset_is_obsolete(ifm->st_packets); + rrdset_is_obsolete___safe_from_collector_thread(ifm->st_packets); if (likely(ifm->st_errors)) - rrdset_is_obsolete(ifm->st_errors); + rrdset_is_obsolete___safe_from_collector_thread(ifm->st_errors); if (likely(ifm->st_drops)) - rrdset_is_obsolete(ifm->st_drops); + rrdset_is_obsolete___safe_from_collector_thread(ifm->st_drops); if (likely(ifm->st_events)) - rrdset_is_obsolete(ifm->st_events); + rrdset_is_obsolete___safe_from_collector_thread(ifm->st_events); network_interfaces_added--; freez(ifm->name); diff --git a/collectors/freebsd.plugin/freebsd_getmntinfo.c b/collectors/freebsd.plugin/freebsd_getmntinfo.c index cc0abd906..d55eb3d4a 100644 --- a/collectors/freebsd.plugin/freebsd_getmntinfo.c +++ b/collectors/freebsd.plugin/freebsd_getmntinfo.c @@ -39,9 +39,9 @@ static size_t mount_points_added = 0, mount_points_found = 0; static void mount_point_free(struct mount_point *m) { if (likely(m->st_space)) - rrdset_is_obsolete(m->st_space); + rrdset_is_obsolete___safe_from_collector_thread(m->st_space); if (likely(m->st_inodes)) - rrdset_is_obsolete(m->st_inodes); + rrdset_is_obsolete___safe_from_collector_thread(m->st_inodes); mount_points_added--; freez(m->name); @@ -216,7 +216,7 @@ int do_getmntinfo(int update_every, usec_t dt) { (mntbuf[i].f_blocks > 2 || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { if (unlikely(!m->st_space)) { - snprintfz(title, 4096, "Disk Space Usage for %s [%s]", + snprintfz(title, sizeof(title) - 1, "Disk Space Usage for %s [%s]", mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname); m->st_space = rrdset_create_localhost("disk_space", mntbuf[i].f_mntonname, @@ -254,7 +254,7 @@ int do_getmntinfo(int update_every, usec_t dt) { (mntbuf[i].f_files > 1 || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { if (unlikely(!m->st_inodes)) { - snprintfz(title, 4096, "Disk Files (inodes) Usage for %s [%s]", + snprintfz(title, sizeof(title) - 1, "Disk Files (inodes) Usage for %s [%s]", mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname); m->st_inodes = rrdset_create_localhost("disk_inodes", mntbuf[i].f_mntonname, diff --git a/collectors/freebsd.plugin/integrations/dev.cpu.0.freq.md b/collectors/freebsd.plugin/integrations/dev.cpu.0.freq.md index 4172aec21..5f18661d0 100644 --- a/collectors/freebsd.plugin/integrations/dev.cpu.0.freq.md +++ b/collectors/freebsd.plugin/integrations/dev.cpu.0.freq.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "dev.cpu.0.freq" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -100,7 +101,7 @@ sudo ./edit-config Config options | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| dev.cpu.0.freq | Enable or disable CPU Scaling frequency metric. | yes | False | +| dev.cpu.0.freq | Enable or disable CPU Scaling frequency metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/dev.cpu.temperature.md b/collectors/freebsd.plugin/integrations/dev.cpu.temperature.md index 20a1a0256..a3736f771 100644 --- a/collectors/freebsd.plugin/integrations/dev.cpu.temperature.md +++ b/collectors/freebsd.plugin/integrations/dev.cpu.temperature.md @@ -4,13 +4,14 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "dev.cpu.temperature" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> # dev.cpu.temperature -<img src="https://netdata.cloud/img/freebsd.org" width="150"/> +<img src="https://netdata.cloud/img/freebsd.svg" width="150"/> Plugin: freebsd.plugin @@ -109,7 +110,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| dev.cpu.temperature | Enable or disable CPU temperature metric. | yes | False | +| dev.cpu.temperature | Enable or disable CPU temperature metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/devstat.md b/collectors/freebsd.plugin/integrations/devstat.md index bb578d1dd..9d9c6400b 100644 --- a/collectors/freebsd.plugin/integrations/devstat.md +++ b/collectors/freebsd.plugin/integrations/devstat.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "devstat" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -133,18 +134,18 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| enable new disks detected at runtime | Enable or disable possibility to detect new disks. | auto | False | -| performance metrics for pass devices | Enable or disable metrics for disks with type `PASS`. | auto | False | -| total bandwidth for all disks | Enable or disable total bandwidth metric for all disks. | yes | False | -| bandwidth for all disks | Enable or disable bandwidth for all disks metric. | auto | False | -| operations for all disks | Enable or disable operations for all disks metric. | auto | False | -| queued operations for all disks | Enable or disable queued operations for all disks metric. | auto | False | -| utilization percentage for all disks | Enable or disable utilization percentage for all disks metric. | auto | False | -| i/o time for all disks | Enable or disable I/O time for all disks metric. | auto | False | -| average completed i/o time for all disks | Enable or disable average completed I/O time for all disks metric. | auto | False | -| average completed i/o bandwidth for all disks | Enable or disable average completed I/O bandwidth for all disks metric. | auto | False | -| average service time for all disks | Enable or disable average service time for all disks metric. | auto | False | -| disable by default disks matching | Do not create charts for disks listed. | | False | +| enable new disks detected at runtime | Enable or disable possibility to detect new disks. | auto | no | +| performance metrics for pass devices | Enable or disable metrics for disks with type `PASS`. | auto | no | +| total bandwidth for all disks | Enable or disable total bandwidth metric for all disks. | yes | no | +| bandwidth for all disks | Enable or disable bandwidth for all disks metric. | auto | no | +| operations for all disks | Enable or disable operations for all disks metric. | auto | no | +| queued operations for all disks | Enable or disable queued operations for all disks metric. | auto | no | +| utilization percentage for all disks | Enable or disable utilization percentage for all disks metric. | auto | no | +| i/o time for all disks | Enable or disable I/O time for all disks metric. | auto | no | +| average completed i/o time for all disks | Enable or disable average completed I/O time for all disks metric. | auto | no | +| average completed i/o bandwidth for all disks | Enable or disable average completed I/O bandwidth for all disks metric. | auto | no | +| average service time for all disks | Enable or disable average service time for all disks metric. | auto | no | +| disable by default disks matching | Do not create charts for disks listed. | | no | </details> diff --git a/collectors/freebsd.plugin/integrations/getifaddrs.md b/collectors/freebsd.plugin/integrations/getifaddrs.md index 86950622e..63c4ce136 100644 --- a/collectors/freebsd.plugin/integrations/getifaddrs.md +++ b/collectors/freebsd.plugin/integrations/getifaddrs.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "getifaddrs" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -139,18 +140,18 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| enable new interfaces detected at runtime | Enable or disable possibility to discover new interface after plugin starts. | auto | False | -| total bandwidth for physical interfaces | Enable or disable total bandwidth for physical interfaces metric. | auto | False | -| total packets for physical interfaces | Enable or disable total packets for physical interfaces metric. | auto | False | -| total bandwidth for ipv4 interface | Enable or disable total bandwidth for IPv4 interface metric. | auto | False | -| total bandwidth for ipv6 interfaces | Enable or disable total bandwidth for ipv6 interfaces metric. | auto | False | -| bandwidth for all interfaces | Enable or disable bandwidth for all interfaces metric. | auto | False | -| packets for all interfaces | Enable or disable packets for all interfaces metric. | auto | False | -| errors for all interfaces | Enable or disable errors for all interfaces metric. | auto | False | -| drops for all interfaces | Enable or disable drops for all interfaces metric. | auto | False | -| collisions for all interface | Enable or disable collisions for all interface metric. | auto | False | -| disable by default interfaces matching | Do not display data for intterfaces listed. | lo* | False | -| set physical interfaces for system.net | Do not show network traffic for listed interfaces. | igb* ix* cxl* em* ixl* ixlv* bge* ixgbe* vtnet* vmx* re* igc* dwc* | False | +| enable new interfaces detected at runtime | Enable or disable possibility to discover new interface after plugin starts. | auto | no | +| total bandwidth for physical interfaces | Enable or disable total bandwidth for physical interfaces metric. | auto | no | +| total packets for physical interfaces | Enable or disable total packets for physical interfaces metric. | auto | no | +| total bandwidth for ipv4 interface | Enable or disable total bandwidth for IPv4 interface metric. | auto | no | +| total bandwidth for ipv6 interfaces | Enable or disable total bandwidth for ipv6 interfaces metric. | auto | no | +| bandwidth for all interfaces | Enable or disable bandwidth for all interfaces metric. | auto | no | +| packets for all interfaces | Enable or disable packets for all interfaces metric. | auto | no | +| errors for all interfaces | Enable or disable errors for all interfaces metric. | auto | no | +| drops for all interfaces | Enable or disable drops for all interfaces metric. | auto | no | +| collisions for all interface | Enable or disable collisions for all interface metric. | auto | no | +| disable by default interfaces matching | Do not display data for intterfaces listed. | lo* | no | +| set physical interfaces for system.net | Do not show network traffic for listed interfaces. | igb* ix* cxl* em* ixl* ixlv* bge* ixgbe* vtnet* vmx* re* igc* dwc* | no | </details> diff --git a/collectors/freebsd.plugin/integrations/getmntinfo.md b/collectors/freebsd.plugin/integrations/getmntinfo.md index 43c1df165..d26ad1c03 100644 --- a/collectors/freebsd.plugin/integrations/getmntinfo.md +++ b/collectors/freebsd.plugin/integrations/getmntinfo.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "getmntinfo" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -116,11 +117,11 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| enable new mount points detected at runtime | Cheeck new mount points during runtime. | auto | False | -| space usage for all disks | Enable or disable space usage for all disks metric. | auto | False | -| inodes usage for all disks | Enable or disable inodes usage for all disks metric. | auto | False | -| exclude space metrics on paths | Do not show metrics for listed paths. | /proc/* | False | -| exclude space metrics on filesystems | Do not monitor listed filesystems. | autofs procfs subfs devfs none | False | +| enable new mount points detected at runtime | Cheeck new mount points during runtime. | auto | no | +| space usage for all disks | Enable or disable space usage for all disks metric. | auto | no | +| inodes usage for all disks | Enable or disable inodes usage for all disks metric. | auto | no | +| exclude space metrics on paths | Do not show metrics for listed paths. | /proc/* | no | +| exclude space metrics on filesystems | Do not monitor listed filesystems. | autofs procfs subfs devfs none | no | </details> diff --git a/collectors/freebsd.plugin/integrations/hw.intrcnt.md b/collectors/freebsd.plugin/integrations/hw.intrcnt.md index 5865a6f15..49164c369 100644 --- a/collectors/freebsd.plugin/integrations/hw.intrcnt.md +++ b/collectors/freebsd.plugin/integrations/hw.intrcnt.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "hw.intrcnt" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -110,7 +111,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| hw.intrcnt | Enable or disable Interrupts metric. | yes | False | +| hw.intrcnt | Enable or disable Interrupts metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/ipfw.md b/collectors/freebsd.plugin/integrations/ipfw.md index 4bd4d120b..84e023bdf 100644 --- a/collectors/freebsd.plugin/integrations/ipfw.md +++ b/collectors/freebsd.plugin/integrations/ipfw.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "ipfw" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -113,9 +114,9 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| counters for static rules | Enable or disable counters for static rules metric. | yes | False | -| number of dynamic rules | Enable or disable number of dynamic rules metric. | yes | False | -| allocated memory | Enable or disable allocated memory metric. | yes | False | +| counters for static rules | Enable or disable counters for static rules metric. | yes | no | +| number of dynamic rules | Enable or disable number of dynamic rules metric. | yes | no | +| allocated memory | Enable or disable allocated memory metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/kern.cp_time.md b/collectors/freebsd.plugin/integrations/kern.cp_time.md index 8c509671b..95bdb8d90 100644 --- a/collectors/freebsd.plugin/integrations/kern.cp_time.md +++ b/collectors/freebsd.plugin/integrations/kern.cp_time.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "kern.cp_time" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -128,7 +129,7 @@ The netdata main configuration file. | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| kern.cp_time | Enable or disable Total CPU usage. | yes | False | +| kern.cp_time | Enable or disable Total CPU usage. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/kern.ipc.msq.md b/collectors/freebsd.plugin/integrations/kern.ipc.msq.md index 56fb11002..e7457e0c1 100644 --- a/collectors/freebsd.plugin/integrations/kern.ipc.msq.md +++ b/collectors/freebsd.plugin/integrations/kern.ipc.msq.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "kern.ipc.msq" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -111,7 +112,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| kern.ipc.msq | Enable or disable IPC message queue metric. | yes | False | +| kern.ipc.msq | Enable or disable IPC message queue metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/kern.ipc.sem.md b/collectors/freebsd.plugin/integrations/kern.ipc.sem.md index 6dc7d15a1..7bf7235e6 100644 --- a/collectors/freebsd.plugin/integrations/kern.ipc.sem.md +++ b/collectors/freebsd.plugin/integrations/kern.ipc.sem.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "kern.ipc.sem" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -116,7 +117,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| kern.ipc.sem | Enable or disable semaphore metrics. | yes | False | +| kern.ipc.sem | Enable or disable semaphore metrics. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/kern.ipc.shm.md b/collectors/freebsd.plugin/integrations/kern.ipc.shm.md index e29aa9617..1f10c1e6e 100644 --- a/collectors/freebsd.plugin/integrations/kern.ipc.shm.md +++ b/collectors/freebsd.plugin/integrations/kern.ipc.shm.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "kern.ipc.shm" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -110,7 +111,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| kern.ipc.shm | Enable or disable shared memory metric. | yes | False | +| kern.ipc.shm | Enable or disable shared memory metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/net.inet.icmp.stats.md b/collectors/freebsd.plugin/integrations/net.inet.icmp.stats.md index ad3b7c2c1..29562bc9a 100644 --- a/collectors/freebsd.plugin/integrations/net.inet.icmp.stats.md +++ b/collectors/freebsd.plugin/integrations/net.inet.icmp.stats.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "net.inet.icmp.stats" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -111,9 +112,9 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| IPv4 ICMP packets | Enable or disable IPv4 ICMP packets metric. | yes | False | -| IPv4 ICMP error | Enable or disable IPv4 ICMP error metric. | yes | False | -| IPv4 ICMP messages | Enable or disable IPv4 ICMP messages metric. | yes | False | +| IPv4 ICMP packets | Enable or disable IPv4 ICMP packets metric. | yes | no | +| IPv4 ICMP error | Enable or disable IPv4 ICMP error metric. | yes | no | +| IPv4 ICMP messages | Enable or disable IPv4 ICMP messages metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/net.inet.ip.stats.md b/collectors/freebsd.plugin/integrations/net.inet.ip.stats.md index e3a048391..785767e89 100644 --- a/collectors/freebsd.plugin/integrations/net.inet.ip.stats.md +++ b/collectors/freebsd.plugin/integrations/net.inet.ip.stats.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "net.inet.ip.stats" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -112,10 +113,10 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| ipv4 packets | Enable or disable IPv4 packets metric. | yes | False | -| ipv4 fragments sent | Enable or disable IPv4 fragments sent metric. | yes | False | -| ipv4 fragments assembly | Enable or disable IPv4 fragments assembly metric. | yes | False | -| ipv4 errors | Enable or disable IPv4 errors metric. | yes | False | +| ipv4 packets | Enable or disable IPv4 packets metric. | yes | no | +| ipv4 fragments sent | Enable or disable IPv4 fragments sent metric. | yes | no | +| ipv4 fragments assembly | Enable or disable IPv4 fragments assembly metric. | yes | no | +| ipv4 errors | Enable or disable IPv4 errors metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/net.inet.tcp.states.md b/collectors/freebsd.plugin/integrations/net.inet.tcp.states.md index 958d413af..5b4144580 100644 --- a/collectors/freebsd.plugin/integrations/net.inet.tcp.states.md +++ b/collectors/freebsd.plugin/integrations/net.inet.tcp.states.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "net.inet.tcp.states" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -114,7 +115,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| net.inet.tcp.states | Enable or disable TCP state metric. | yes | False | +| net.inet.tcp.states | Enable or disable TCP state metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/net.inet.tcp.stats.md b/collectors/freebsd.plugin/integrations/net.inet.tcp.stats.md index 9aaca2656..be779740d 100644 --- a/collectors/freebsd.plugin/integrations/net.inet.tcp.stats.md +++ b/collectors/freebsd.plugin/integrations/net.inet.tcp.stats.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "net.inet.tcp.stats" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -124,14 +125,14 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| ipv4 TCP packets | Enable or disable ipv4 TCP packets metric. | yes | False | -| ipv4 TCP errors | Enable or disable pv4 TCP errors metric. | yes | False | -| ipv4 TCP handshake issues | Enable or disable ipv4 TCP handshake issue metric. | yes | False | -| TCP connection aborts | Enable or disable TCP connection aborts metric. | auto | False | -| TCP out-of-order queue | Enable or disable TCP out-of-order queue metric. | auto | False | -| TCP SYN cookies | Enable or disable TCP SYN cookies metric. | auto | False | -| TCP listen issues | Enable or disable TCP listen issues metric. | auto | False | -| ECN packets | Enable or disable ECN packets metric. | auto | False | +| ipv4 TCP packets | Enable or disable ipv4 TCP packets metric. | yes | no | +| ipv4 TCP errors | Enable or disable pv4 TCP errors metric. | yes | no | +| ipv4 TCP handshake issues | Enable or disable ipv4 TCP handshake issue metric. | yes | no | +| TCP connection aborts | Enable or disable TCP connection aborts metric. | auto | no | +| TCP out-of-order queue | Enable or disable TCP out-of-order queue metric. | auto | no | +| TCP SYN cookies | Enable or disable TCP SYN cookies metric. | auto | no | +| TCP listen issues | Enable or disable TCP listen issues metric. | auto | no | +| ECN packets | Enable or disable ECN packets metric. | auto | no | </details> diff --git a/collectors/freebsd.plugin/integrations/net.inet.udp.stats.md b/collectors/freebsd.plugin/integrations/net.inet.udp.stats.md index 6e9d4a7e0..d3da40455 100644 --- a/collectors/freebsd.plugin/integrations/net.inet.udp.stats.md +++ b/collectors/freebsd.plugin/integrations/net.inet.udp.stats.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "net.inet.udp.stats" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -116,8 +117,8 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| ipv4 UDP packets | Enable or disable ipv4 UDP packets metric. | yes | False | -| ipv4 UDP errors | Enable or disable ipv4 UDP errors metric. | yes | False | +| ipv4 UDP packets | Enable or disable ipv4 UDP packets metric. | yes | no | +| ipv4 UDP errors | Enable or disable ipv4 UDP errors metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/net.inet6.icmp6.stats.md b/collectors/freebsd.plugin/integrations/net.inet6.icmp6.stats.md index b10088759..7344b79b3 100644 --- a/collectors/freebsd.plugin/integrations/net.inet6.icmp6.stats.md +++ b/collectors/freebsd.plugin/integrations/net.inet6.icmp6.stats.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "net.inet6.icmp6.stats" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -115,13 +116,13 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| icmp | Enable or disable ICMP metric. | auto | False | -| icmp redirects | Enable or disable ICMP redirects metric. | auto | False | -| icmp errors | Enable or disable ICMP errors metric. | auto | False | -| icmp echos | Enable or disable ICMP echos metric. | auto | False | -| icmp router | Enable or disable ICMP router metric. | auto | False | -| icmp neighbor | Enable or disable ICMP neighbor metric. | auto | False | -| icmp types | Enable or disable ICMP types metric. | auto | False | +| icmp | Enable or disable ICMP metric. | auto | no | +| icmp redirects | Enable or disable ICMP redirects metric. | auto | no | +| icmp errors | Enable or disable ICMP errors metric. | auto | no | +| icmp echos | Enable or disable ICMP echos metric. | auto | no | +| icmp router | Enable or disable ICMP router metric. | auto | no | +| icmp neighbor | Enable or disable ICMP neighbor metric. | auto | no | +| icmp types | Enable or disable ICMP types metric. | auto | no | </details> diff --git a/collectors/freebsd.plugin/integrations/net.inet6.ip6.stats.md b/collectors/freebsd.plugin/integrations/net.inet6.ip6.stats.md index f282bb972..d9128b529 100644 --- a/collectors/freebsd.plugin/integrations/net.inet6.ip6.stats.md +++ b/collectors/freebsd.plugin/integrations/net.inet6.ip6.stats.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "net.inet6.ip6.stats" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -112,10 +113,10 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| ipv6 packets | Enable or disable ipv6 packet metric. | auto | False | -| ipv6 fragments sent | Enable or disable ipv6 fragments sent metric. | auto | False | -| ipv6 fragments assembly | Enable or disable ipv6 fragments assembly metric. | auto | False | -| ipv6 errors | Enable or disable ipv6 errors metric. | auto | False | +| ipv6 packets | Enable or disable ipv6 packet metric. | auto | no | +| ipv6 fragments sent | Enable or disable ipv6 fragments sent metric. | auto | no | +| ipv6 fragments assembly | Enable or disable ipv6 fragments assembly metric. | auto | no | +| ipv6 errors | Enable or disable ipv6 errors metric. | auto | no | </details> diff --git a/collectors/freebsd.plugin/integrations/net.isr.md b/collectors/freebsd.plugin/integrations/net.isr.md index a378ea30f..2d75b825a 100644 --- a/collectors/freebsd.plugin/integrations/net.isr.md +++ b/collectors/freebsd.plugin/integrations/net.isr.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "net.isr" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -128,8 +129,8 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| netisr | Enable or disable general vision about softnet stat metrics. | yes | False | -| netisr per core | Enable or disable softnet stat metric per core. | yes | False | +| netisr | Enable or disable general vision about softnet stat metrics. | yes | no | +| netisr per core | Enable or disable softnet stat metric per core. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/system.ram.md b/collectors/freebsd.plugin/integrations/system.ram.md index a1163e9ca..7d4974922 100644 --- a/collectors/freebsd.plugin/integrations/system.ram.md +++ b/collectors/freebsd.plugin/integrations/system.ram.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "system.ram" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -118,7 +119,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| system.ram | Enable or disable system RAM metric. | yes | False | +| system.ram | Enable or disable system RAM metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/uptime.md b/collectors/freebsd.plugin/integrations/uptime.md index 1674be026..e3f1db3f1 100644 --- a/collectors/freebsd.plugin/integrations/uptime.md +++ b/collectors/freebsd.plugin/integrations/uptime.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "uptime" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -109,7 +110,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| vm.loadavg | Enable or disable load average metric. | yes | False | +| vm.loadavg | Enable or disable load average metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/vm.loadavg.md b/collectors/freebsd.plugin/integrations/vm.loadavg.md index a934cf8f4..88c47b7a4 100644 --- a/collectors/freebsd.plugin/integrations/vm.loadavg.md +++ b/collectors/freebsd.plugin/integrations/vm.loadavg.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "vm.loadavg" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -117,7 +118,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| vm.loadavg | Enable or disable load average metric. | yes | False | +| vm.loadavg | Enable or disable load average metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/vm.stats.sys.v_intr.md b/collectors/freebsd.plugin/integrations/vm.stats.sys.v_intr.md index 58a0d4ced..c3e7466e9 100644 --- a/collectors/freebsd.plugin/integrations/vm.stats.sys.v_intr.md +++ b/collectors/freebsd.plugin/integrations/vm.stats.sys.v_intr.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "vm.stats.sys.v_intr" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -109,7 +110,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| vm.stats.sys.v_intr | Enable or disable device interrupts metric. | yes | False | +| vm.stats.sys.v_intr | Enable or disable device interrupts metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/vm.stats.sys.v_soft.md b/collectors/freebsd.plugin/integrations/vm.stats.sys.v_soft.md index bc934239c..ce914bb50 100644 --- a/collectors/freebsd.plugin/integrations/vm.stats.sys.v_soft.md +++ b/collectors/freebsd.plugin/integrations/vm.stats.sys.v_soft.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "vm.stats.sys.v_soft" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -109,7 +110,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| vm.stats.sys.v_soft | Enable or disable software inerrupts metric. | yes | False | +| vm.stats.sys.v_soft | Enable or disable software inerrupts metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/vm.stats.sys.v_swtch.md b/collectors/freebsd.plugin/integrations/vm.stats.sys.v_swtch.md index 4e16a2869..cbcee311f 100644 --- a/collectors/freebsd.plugin/integrations/vm.stats.sys.v_swtch.md +++ b/collectors/freebsd.plugin/integrations/vm.stats.sys.v_swtch.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "vm.stats.sys.v_swtch" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -110,7 +111,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| vm.stats.sys.v_swtch | Enable or disable CPU context switch metric. | yes | False | +| vm.stats.sys.v_swtch | Enable or disable CPU context switch metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/vm.stats.vm.v_pgfaults.md b/collectors/freebsd.plugin/integrations/vm.stats.vm.v_pgfaults.md index 5c79c3443..19230dd56 100644 --- a/collectors/freebsd.plugin/integrations/vm.stats.vm.v_pgfaults.md +++ b/collectors/freebsd.plugin/integrations/vm.stats.vm.v_pgfaults.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "vm.stats.vm.v_pgfaults" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -109,7 +110,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| vm.stats.vm.v_pgfaults | Enable or disable Memory page fault metric. | yes | False | +| vm.stats.vm.v_pgfaults | Enable or disable Memory page fault metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/vm.stats.vm.v_swappgs.md b/collectors/freebsd.plugin/integrations/vm.stats.vm.v_swappgs.md index 8ce678c97..c6caaa682 100644 --- a/collectors/freebsd.plugin/integrations/vm.stats.vm.v_swappgs.md +++ b/collectors/freebsd.plugin/integrations/vm.stats.vm.v_swappgs.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "vm.stats.vm.v_swappgs" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -114,7 +115,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| vm.stats.vm.v_swappgs | Enable or disable infoormation about SWAP I/O metric. | yes | False | +| vm.stats.vm.v_swappgs | Enable or disable infoormation about SWAP I/O metric. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/vm.swap_info.md b/collectors/freebsd.plugin/integrations/vm.swap_info.md index 345e82327..caa22b3dc 100644 --- a/collectors/freebsd.plugin/integrations/vm.swap_info.md +++ b/collectors/freebsd.plugin/integrations/vm.swap_info.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "vm.swap_info" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -114,7 +115,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| vm.swap_info | Enable or disable SWAP metrics. | yes | False | +| vm.swap_info | Enable or disable SWAP metrics. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/vm.vmtotal.md b/collectors/freebsd.plugin/integrations/vm.vmtotal.md index 3d9d91633..f3f631af6 100644 --- a/collectors/freebsd.plugin/integrations/vm.vmtotal.md +++ b/collectors/freebsd.plugin/integrations/vm.vmtotal.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "vm.vmtotal" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -116,9 +117,9 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| enable total processes | Number of active processes. | yes | False | -| processes running | Show number of processes running or blocked. | yes | False | -| real memory | Memeory used on host. | yes | False | +| enable total processes | Number of active processes. | yes | no | +| processes running | Show number of processes running or blocked. | yes | no | +| real memory | Memeory used on host. | yes | no | </details> diff --git a/collectors/freebsd.plugin/integrations/zfs.md b/collectors/freebsd.plugin/integrations/zfs.md index bb099ddcb..99f10026d 100644 --- a/collectors/freebsd.plugin/integrations/zfs.md +++ b/collectors/freebsd.plugin/integrations/zfs.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freebsd.pl sidebar_label: "zfs" learn_status: "Published" learn_rel_path: "Data Collection/FreeBSD" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -141,7 +142,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| show zero charts | Do not show charts with zero metrics. | no | False | +| show zero charts | Do not show charts with zero metrics. | no | no | </details> diff --git a/collectors/freebsd.plugin/metadata.yaml b/collectors/freebsd.plugin/metadata.yaml index d68fc3137..36fba2430 100644 --- a/collectors/freebsd.plugin/metadata.yaml +++ b/collectors/freebsd.plugin/metadata.yaml @@ -323,7 +323,7 @@ modules: link: "https://www.freebsd.org/" categories: - data-collection.freebsd - icon_filename: "freebsd.org" + icon_filename: "freebsd.svg" related_resources: integrations: list: [] diff --git a/collectors/freeipmi.plugin/freeipmi_plugin.c b/collectors/freeipmi.plugin/freeipmi_plugin.c index 63147d621..6ec9b698b 100644 --- a/collectors/freeipmi.plugin/freeipmi_plugin.c +++ b/collectors/freeipmi.plugin/freeipmi_plugin.c @@ -22,6 +22,10 @@ #include "libnetdata/libnetdata.h" #include "libnetdata/required_dummies.h" +#define FREEIPMI_GLOBAL_FUNCTION_SENSORS() do { \ + fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " GLOBAL \"ipmi-sensors\" %d \"%s\"\n", 5, "Displays current sensor state and readings"); \ + } while(0) + // component names, based on our patterns #define NETDATA_SENSOR_COMPONENT_MEMORY_MODULE "Memory Module" #define NETDATA_SENSOR_COMPONENT_MEMORY "Memory" @@ -83,6 +87,12 @@ static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *stat /* Communication Configuration - Initialize accordingly */ +static netdata_mutex_t stdout_mutex = NETDATA_MUTEX_INITIALIZER; +static bool function_plugin_should_exit = false; + +int update_every = IPMI_SENSORS_MIN_UPDATE_EVERY; // this is the minimum update frequency +int update_every_sel = IPMI_SEL_MIN_UPDATE_EVERY; // this is the minimum update frequency for SEL events + /* Hostname, NULL for In-band communication, non-null for a hostname */ char *hostname = NULL; @@ -707,6 +717,8 @@ struct netdata_ipmi_state { } updates; }; +struct netdata_ipmi_state state = {0}; + // ---------------------------------------------------------------------------- // excluded record ids maintenance (both for sensor data and state) @@ -1297,6 +1309,7 @@ static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *sta int update_every = (int)(state->sensors.freq_ut / USEC_PER_SEC); struct sensor *sn; + netdata_mutex_lock(&stdout_mutex); // generate the CHART/DIMENSION lines, if we have to dfe_start_reentrant(state->sensors.dict, sn) { if(unlikely(!sn->do_metric && !sn->do_state)) @@ -1396,12 +1409,16 @@ static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *sta } dfe_done(sn); + netdata_mutex_unlock(&stdout_mutex); + return total_sensors_sent; } static size_t send_ipmi_sel_metrics_to_netdata(struct netdata_ipmi_state *state) { static bool sel_chart_generated = false; + netdata_mutex_lock(&stdout_mutex); + if(likely(state->sel.status == ICS_RUNNING)) { if(unlikely(!sel_chart_generated)) { sel_chart_generated = true; @@ -1422,39 +1439,197 @@ static size_t send_ipmi_sel_metrics_to_netdata(struct netdata_ipmi_state *state) ); } + netdata_mutex_unlock(&stdout_mutex); + return state->sel.events; } // ---------------------------------------------------------------------------- -// main, command line arguments parsing -int main (int argc, char **argv) { - bool netdata_do_sel = IPMI_ENABLE_SEL_BY_DEFAULT; +static const char *get_sensor_state_string(struct sensor *sn) { + switch (sn->sensor_state) { + case IPMI_MONITORING_STATE_NOMINAL: + return "nominal"; + case IPMI_MONITORING_STATE_WARNING: + return "warning"; + case IPMI_MONITORING_STATE_CRITICAL: + return "critical"; + default: + return "unknown"; + } +} - stderror = stderr; - clocks_init(); +static const char *get_sensor_function_priority(struct sensor *sn) { + switch (sn->sensor_state) { + case IPMI_MONITORING_STATE_WARNING: + return "warning"; + case IPMI_MONITORING_STATE_CRITICAL: + return "critical"; + default: + return "normal"; + } +} - int update_every = IPMI_SENSORS_MIN_UPDATE_EVERY; // this is the minimum update frequency - int update_every_sel = IPMI_SEL_MIN_UPDATE_EVERY; // this is the minimum update frequency for SEL events - bool debug = false; +static void freeimi_function_sensors(const char *transaction, char *function __maybe_unused, int timeout __maybe_unused, bool *cancelled __maybe_unused) { + time_t expires = now_realtime_sec() + update_every; - // ------------------------------------------------------------------------ - // initialization of netdata plugin + BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX, NULL); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_NEWLINE_ON_ARRAY_ITEMS); + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + buffer_json_member_add_string(wb, "type", "table"); + buffer_json_member_add_time_t(wb, "update_every", update_every); + buffer_json_member_add_string(wb, "help", "View IPMI sensor readings and its state"); + buffer_json_member_add_array(wb, "data"); - program_name = "freeipmi.plugin"; + struct sensor *sn; + dfe_start_reentrant(state.sensors.dict, sn) { + if (unlikely(!sn->do_metric && !sn->do_state)) + continue; + + double reading = NAN; + switch (sn->sensor_reading_type) { + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: + reading = (double)sn->sensor_reading.uint32_value; + break; + case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: + reading = (double)(sn->sensor_reading.double_value); + break; + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: + reading = (double)sn->sensor_reading.bool_value; + break; + } - // disable syslog - error_log_syslog = 0; + buffer_json_add_array_item_array(wb); - // set errors flood protection to 100 logs per hour - error_log_errors_per_period = 100; - error_log_throttle_period = 3600; + buffer_json_add_array_item_string(wb, sn->sensor_name); + buffer_json_add_array_item_string(wb, sn->type); + buffer_json_add_array_item_string(wb, sn->component); + buffer_json_add_array_item_double(wb, reading); + buffer_json_add_array_item_string(wb, sn->units); + buffer_json_add_array_item_string(wb, get_sensor_state_string(sn)); - log_set_global_severity_for_external_plugins(); + buffer_json_add_array_item_object(wb); + buffer_json_member_add_string(wb, "severity", get_sensor_function_priority(sn)); + buffer_json_object_close(wb); - // initialize the threads + buffer_json_array_close(wb); + } + dfe_done(sn); + + buffer_json_array_close(wb); // data + buffer_json_member_add_object(wb, "columns"); + { + size_t field_id = 0; + + buffer_rrdf_table_add_field(wb, field_id++, "Sensor", "Sensor Name", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_FULL_WIDTH, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Type", "Sensor Type", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Component", "Sensor Component", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Reading", "Sensor Current Reading", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 2, NULL, 0, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Units", "Sensor Reading Units", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "State", "Sensor State", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + buffer_rrdf_table_add_field( + wb, field_id++, + "rowOptions", "rowOptions", + RRDF_FIELD_TYPE_NONE, + RRDR_FIELD_VISUAL_ROW_OPTIONS, + RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_FIXED, + NULL, + RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_DUMMY, + NULL); + } + + buffer_json_object_close(wb); // columns + buffer_json_member_add_string(wb, "default_sort_column", "Type"); + + buffer_json_member_add_object(wb, "charts"); + { + buffer_json_member_add_object(wb, "Sensors"); + { + buffer_json_member_add_string(wb, "name", "Sensors"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Sensor"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // charts + + buffer_json_member_add_array(wb, "default_charts"); + { + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "Sensors"); + buffer_json_add_array_item_string(wb, "Component"); + buffer_json_array_close(wb); + + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "Sensors"); + buffer_json_add_array_item_string(wb, "State"); + buffer_json_array_close(wb); + } + buffer_json_array_close(wb); + + buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1); + buffer_json_finalize(wb); + + pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "application/json", expires, wb); + + buffer_free(wb); +} + +// ---------------------------------------------------------------------------- +// main, command line arguments parsing + +static void plugin_exit(int code) { + fflush(stdout); + function_plugin_should_exit = true; + exit(code); +} + +int main (int argc, char **argv) { + clocks_init(); + nd_log_initialize_for_external_plugins("freeipmi.plugin"); netdata_threads_init_for_external_plugins(0); // set the default threads stack size here + bool netdata_do_sel = IPMI_ENABLE_SEL_BY_DEFAULT; + + bool debug = false; + // ------------------------------------------------------------------------ // parse command line parameters @@ -1728,7 +1903,7 @@ int main (int argc, char **argv) { errno = 0; if(freq_s && freq_s < update_every) - collector_error("%s(): update frequency %d seconds is too small for IPMI. Using %d.", + collector_info("%s(): update frequency %d seconds is too small for IPMI. Using %d.", __FUNCTION__, freq_s, update_every); update_every = freq_s = MAX(freq_s, update_every); @@ -1801,16 +1976,17 @@ int main (int argc, char **argv) { heartbeat_t hb; heartbeat_init(&hb); + for(iteration = 0; 1 ; iteration++) { usec_t dt = heartbeat_next(&hb, step); if (!tty) { + netdata_mutex_lock(&stdout_mutex); fprintf(stdout, "\n"); // keepalive to avoid parser read timeout (2 minutes) during ipmi_detect_speed_secs() fflush(stdout); + netdata_mutex_unlock(&stdout_mutex); } - struct netdata_ipmi_state state = {0 }; - spinlock_lock(&sensors_data.spinlock); state.sensors = sensors_data.state.sensors; spinlock_unlock(&sensors_data.spinlock); @@ -1827,8 +2003,7 @@ int main (int argc, char **argv) { __FUNCTION__, (size_t)((now_monotonic_usec() - state.sensors.last_iteration_ut) / USEC_PER_SEC)); fprintf(stdout, "EXIT\n"); - fflush(stdout); - exit(0); + plugin_exit(0); } break; @@ -1838,14 +2013,12 @@ int main (int argc, char **argv) { case ICS_INIT_FAILED: collector_error("%s(): sensors failed to initialize. Calling DISABLE.", __FUNCTION__); fprintf(stdout, "DISABLE\n"); - fflush(stdout); - exit(0); + plugin_exit(0); case ICS_FAILED: collector_error("%s(): sensors fails repeatedly to collect metrics. Exiting to restart.", __FUNCTION__); fprintf(stdout, "EXIT\n"); - fflush(stdout); - exit(0); + plugin_exit(0); } if(netdata_do_sel) { @@ -1865,6 +2038,16 @@ int main (int argc, char **argv) { if(unlikely(debug)) fprintf(stderr, "%s: calling send_ipmi_sensor_metrics_to_netdata()\n", program_name); + static bool add_func_sensors = true; + if (add_func_sensors) { + add_func_sensors = false; + struct functions_evloop_globals *wg = + functions_evloop_init(1, "FREEIPMI", &stdout_mutex, &function_plugin_should_exit); + functions_evloop_add_function( + wg, "ipmi-sensors", freeimi_function_sensors, PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT); + FREEIPMI_GLOBAL_FUNCTION_SENSORS(); + } + state.updates.now_ut = now_monotonic_usec(); send_ipmi_sensor_metrics_to_netdata(&state); @@ -1880,6 +2063,8 @@ int main (int argc, char **argv) { , state.sensors.collected ); + netdata_mutex_lock(&stdout_mutex); + if (!global_chart_created) { global_chart_created = true; @@ -1899,10 +2084,11 @@ int main (int argc, char **argv) { if (now_monotonic_sec() - started_t > IPMI_RESTART_EVERY_SECONDS) { collector_info("%s(): reached my lifetime expectancy. Exiting to restart.", __FUNCTION__); fprintf(stdout, "EXIT\n"); - fflush(stdout); - exit(0); + plugin_exit(0); } fflush(stdout); + + netdata_mutex_unlock(&stdout_mutex); } } diff --git a/collectors/freeipmi.plugin/integrations/intelligent_platform_management_interface_ipmi.md b/collectors/freeipmi.plugin/integrations/intelligent_platform_management_interface_ipmi.md index 6d894667b..c0293fc37 100644 --- a/collectors/freeipmi.plugin/integrations/intelligent_platform_management_interface_ipmi.md +++ b/collectors/freeipmi.plugin/integrations/intelligent_platform_management_interface_ipmi.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/freeipmi.p sidebar_label: "Intelligent Platform Management Interface (IPMI)" learn_status: "Published" learn_rel_path: "Data Collection/Hardware Devices and Sensors" +most_popular: True message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -170,30 +171,30 @@ To display a help message listing the available command line options: | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| SECONDS | Data collection frequency. | | False | -| debug | Enable verbose output. | disabled | False | -| no-sel | Disable System Event Log (SEL) collection. | disabled | False | -| reread-sdr-cache | Re-read SDR cache on every iteration. | disabled | False | -| interpret-oem-data | Attempt to parse OEM data. | disabled | False | -| assume-system-event-record | treat illegal SEL events records as normal. | disabled | False | -| ignore-non-interpretable-sensors | Do not read sensors that cannot be interpreted. | disabled | False | -| bridge-sensors | Bridge sensors not owned by the BMC. | disabled | False | -| shared-sensors | Enable shared sensors if found. | disabled | False | -| no-discrete-reading | Do not read sensors if their event/reading type code is invalid. | enabled | False | -| ignore-scanning-disabled | Ignore the scanning bit and read sensors no matter what. | disabled | False | -| assume-bmc-owner | Assume the BMC is the sensor owner no matter what (usually bridging is required too). | disabled | False | -| hostname HOST | Remote IPMI hostname or IP address. | local | False | -| username USER | Username that will be used when connecting to the remote host. | | False | -| password PASS | Password that will be used when connecting to the remote host. | | False | -| noauthcodecheck / no-auth-code-check | Don't check the authentication codes returned. | | False | -| driver-type IPMIDRIVER | Specify the driver type to use instead of doing an auto selection. The currently available outofband drivers are LAN and LAN_2_0, which perform IPMI 1.5 and IPMI 2.0 respectively. The currently available inband drivers are KCS, SSIF, OPENIPMI and SUNBMC. | | False | -| sdr-cache-dir PATH | SDR cache files directory. | /tmp | False | -| sensor-config-file FILE | Sensors configuration filename. | system default | False | -| sel-config-file FILE | SEL configuration filename. | system default | False | -| ignore N1,N2,N3,... | Sensor IDs to ignore. | | False | -| ignore-status N1,N2,N3,... | Sensor IDs to ignore status (nominal/warning/critical). | | False | -| -v | Print version and exit. | | False | -| --help | Print usage message and exit. | | False | +| SECONDS | Data collection frequency. | | no | +| debug | Enable verbose output. | disabled | no | +| no-sel | Disable System Event Log (SEL) collection. | disabled | no | +| reread-sdr-cache | Re-read SDR cache on every iteration. | disabled | no | +| interpret-oem-data | Attempt to parse OEM data. | disabled | no | +| assume-system-event-record | treat illegal SEL events records as normal. | disabled | no | +| ignore-non-interpretable-sensors | Do not read sensors that cannot be interpreted. | disabled | no | +| bridge-sensors | Bridge sensors not owned by the BMC. | disabled | no | +| shared-sensors | Enable shared sensors if found. | disabled | no | +| no-discrete-reading | Do not read sensors if their event/reading type code is invalid. | enabled | no | +| ignore-scanning-disabled | Ignore the scanning bit and read sensors no matter what. | disabled | no | +| assume-bmc-owner | Assume the BMC is the sensor owner no matter what (usually bridging is required too). | disabled | no | +| hostname HOST | Remote IPMI hostname or IP address. | local | no | +| username USER | Username that will be used when connecting to the remote host. | | no | +| password PASS | Password that will be used when connecting to the remote host. | | no | +| noauthcodecheck / no-auth-code-check | Don't check the authentication codes returned. | | no | +| driver-type IPMIDRIVER | Specify the driver type to use instead of doing an auto selection. The currently available outofband drivers are LAN and LAN_2_0, which perform IPMI 1.5 and IPMI 2.0 respectively. The currently available inband drivers are KCS, SSIF, OPENIPMI and SUNBMC. | | no | +| sdr-cache-dir PATH | SDR cache files directory. | /tmp | no | +| sensor-config-file FILE | Sensors configuration filename. | system default | no | +| sel-config-file FILE | SEL configuration filename. | system default | no | +| ignore N1,N2,N3,... | Sensor IDs to ignore. | | no | +| ignore-status N1,N2,N3,... | Sensor IDs to ignore status (nominal/warning/critical). | | no | +| -v | Print version and exit. | | no | +| --help | Print usage message and exit. | | no | </details> diff --git a/collectors/idlejitter.plugin/integrations/idle_os_jitter.md b/collectors/idlejitter.plugin/integrations/idle_os_jitter.md index da650cde9..44463f6f5 100644 --- a/collectors/idlejitter.plugin/integrations/idle_os_jitter.md +++ b/collectors/idlejitter.plugin/integrations/idle_os_jitter.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/idlejitter sidebar_label: "Idle OS Jitter" learn_status: "Published" learn_rel_path: "Data Collection/Synthetic Checks" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -109,7 +110,7 @@ This integration only supports a single configuration option, and most users wil | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| loop time in ms | Specifies the target time for the data collection thread to sleep, measured in miliseconds. | 20 | False | +| loop time in ms | Specifies the target time for the data collection thread to sleep, measured in miliseconds. | 20 | no | #### Examples There are no configuration examples. diff --git a/collectors/ioping.plugin/integrations/ioping.md b/collectors/ioping.plugin/integrations/ioping.md index 4c16d2e3a..39a07ed62 100644 --- a/collectors/ioping.plugin/integrations/ioping.md +++ b/collectors/ioping.plugin/integrations/ioping.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/ioping.plu sidebar_label: "IOPing" learn_status: "Published" learn_rel_path: "Data Collection/Synthetic Checks" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -108,10 +109,10 @@ sudo ./edit-config ioping.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Data collection frequency. | 1s | False | -| destination | The directory/file/device to ioping. | | True | -| request_size | The request size in bytes to ioping the destination (symbolic modifiers are supported) | 4k | False | -| ioping_opts | Options passed to `ioping` commands. | -T 1000000 | False | +| update_every | Data collection frequency. | 1s | no | +| destination | The directory/file/device to ioping. | | yes | +| request_size | The request size in bytes to ioping the destination (symbolic modifiers are supported) | 4k | no | +| ioping_opts | Options passed to `ioping` commands. | -T 1000000 | no | </details> diff --git a/collectors/ioping.plugin/ioping.plugin.in b/collectors/ioping.plugin/ioping.plugin.in index d1283bad9..171e384db 100755 --- a/collectors/ioping.plugin/ioping.plugin.in +++ b/collectors/ioping.plugin/ioping.plugin.in @@ -9,7 +9,7 @@ # This plugin requires a latest version of ioping. # You can compile it from source, by running me with option: install -export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin:@sbindir_POST@" export LC_ALL=C usage="$(basename "$0") [install] [-h] [-e] @@ -93,60 +93,103 @@ if [ "$INSTALL" == "1" ] fi # ----------------------------------------------------------------------------- +# logging PROGRAM_NAME="$(basename "${0}")" -LOG_LEVEL_ERR=1 -LOG_LEVEL_WARN=2 -LOG_LEVEL_INFO=3 -LOG_LEVEL="$LOG_LEVEL_INFO" - -set_log_severity_level() { - case ${NETDATA_LOG_SEVERITY_LEVEL,,} in - "info") LOG_LEVEL="$LOG_LEVEL_INFO";; - "warn" | "warning") LOG_LEVEL="$LOG_LEVEL_WARN";; - "err" | "error") LOG_LEVEL="$LOG_LEVEL_ERR";; +# these should be the same with syslog() priorities +NDLP_EMERG=0 # system is unusable +NDLP_ALERT=1 # action must be taken immediately +NDLP_CRIT=2 # critical conditions +NDLP_ERR=3 # error conditions +NDLP_WARN=4 # warning conditions +NDLP_NOTICE=5 # normal but significant condition +NDLP_INFO=6 # informational +NDLP_DEBUG=7 # debug-level messages + +# the max (numerically) log level we will log +LOG_LEVEL=$NDLP_INFO + +set_log_min_priority() { + case "${NETDATA_LOG_LEVEL,,}" in + "emerg" | "emergency") + LOG_LEVEL=$NDLP_EMERG + ;; + + "alert") + LOG_LEVEL=$NDLP_ALERT + ;; + + "crit" | "critical") + LOG_LEVEL=$NDLP_CRIT + ;; + + "err" | "error") + LOG_LEVEL=$NDLP_ERR + ;; + + "warn" | "warning") + LOG_LEVEL=$NDLP_WARN + ;; + + "notice") + LOG_LEVEL=$NDLP_NOTICE + ;; + + "info") + LOG_LEVEL=$NDLP_INFO + ;; + + "debug") + LOG_LEVEL=$NDLP_DEBUG + ;; esac } -set_log_severity_level - -logdate() { - date "+%Y-%m-%d %H:%M:%S" -} +set_log_min_priority log() { - local status="${1}" - shift + local level="${1}" + shift 1 - echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" + [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return + systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <<EOFLOG +INVOCATION_ID=${NETDATA_INVOCATION_ID} +SYSLOG_IDENTIFIER=${PROGRAM_NAME} +PRIORITY=${level} +THREAD_TAG=ioping.plugin +ND_LOG_SOURCE=collector +MESSAGE=${MODULE_NAME}: ${*//\\n/--NEWLINE--} + +EOFLOG + # AN EMPTY LINE IS NEEDED ABOVE } info() { - [[ -n "$LOG_LEVEL" && "$LOG_LEVEL_INFO" -gt "$LOG_LEVEL" ]] && return - log INFO "${@}" + log "$NDLP_INFO" "${@}" } warning() { - [[ -n "$LOG_LEVEL" && "$LOG_LEVEL_WARN" -gt "$LOG_LEVEL" ]] && return - log WARNING "${@}" + log "$NDLP_WARN" "${@}" } error() { - [[ -n "$LOG_LEVEL" && "$LOG_LEVEL_ERR" -gt "$LOG_LEVEL" ]] && return - log ERROR "${@}" + log "$NDLP_ERR" "${@}" } -fatal() { - log FATAL "${@}" +disable() { + log "${@}" echo "DISABLE" - exit 1 + exit 1 +} + +fatal() { + disable "$NDLP_ALERT" "${@}" } -debug=0 debug() { - [ $debug -eq 1 ] && log DEBUG "${@}" + log "$NDLP_DEBUG" "${@}" } # ----------------------------------------------------------------------------- @@ -194,35 +237,36 @@ ioping_opts="-T 1000000" for CONFIG in "${NETDATA_STOCK_CONFIG_DIR}/${plugin}.conf" "${NETDATA_USER_CONFIG_DIR}/${plugin}.conf"; do if [ -f "${CONFIG}" ]; then - info "Loading config file '${CONFIG}'..." + debug "Loading config file '${CONFIG}'..." source "${CONFIG}" - [ $? -ne 0 ] && error "Failed to load config file '${CONFIG}'." + [ $? -ne 0 ] && warn "Failed to load config file '${CONFIG}'." elif [[ $CONFIG =~ ^$NETDATA_USER_CONFIG_DIR ]]; then - warning "Cannot find file '${CONFIG}'." + debug "Cannot find file '${CONFIG}'." fi done if [ -z "${destination}" ] then - fatal "destination is not configured - nothing to do." + disable $NDLP_DEBUG "destination is not configured - nothing to do." fi if [ ! -f "${ioping}" ] then - fatal "ioping command is not found. Please set its full path in '${NETDATA_USER_CONFIG_DIR}/${plugin}.conf'" + disable $NDLP_ERR "ioping command is not found. Please set its full path in '${NETDATA_USER_CONFIG_DIR}/${plugin}.conf'" fi if [ ! -x "${ioping}" ] then - fatal "ioping command '${ioping}' is not executable - cannot proceed." + disable $NDLP_ERR "ioping command '${ioping}' is not executable - cannot proceed." fi # the ioping options we will use options=( -N -i ${update_every} -s ${request_size} ${ioping_opts} ${destination} ) # execute ioping -info "starting ioping: ${ioping} ${options[*]}" +debug "starting ioping: ${ioping} ${options[*]}" + exec "${ioping}" "${options[@]}" # if we cannot execute ioping, stop -fatal "command '${ioping} ${options[*]}' failed to be executed (returned code $?)." +error "command '${ioping} ${options[*]}' failed to be executed (returned code $?)." diff --git a/collectors/log2journal/Makefile.am b/collectors/log2journal/Makefile.am new file mode 100644 index 000000000..b13d2160b --- /dev/null +++ b/collectors/log2journal/Makefile.am @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + tests.sh \ + README.md \ + tests.d/* \ + $(NULL) + +log2journalconfigdir=$(libconfigdir)/log2journal.d +dist_log2journalconfig_DATA = \ + log2journal.d/nginx-combined.yaml \ + log2journal.d/nginx-json.yaml \ + log2journal.d/default.yaml \ + $(NULL) diff --git a/collectors/log2journal/README.md b/collectors/log2journal/README.md new file mode 100644 index 000000000..16ccc033c --- /dev/null +++ b/collectors/log2journal/README.md @@ -0,0 +1,912 @@ + +# log2journal + +`log2journal` and `systemd-cat-native` can be used to convert a structured log file, such as the ones generated by web servers, into `systemd-journal` entries. + +By combining these tools you can create advanced log processing pipelines sending any kind of structured text logs to systemd-journald. This is a simple, but powerful and efficient way to handle log processing. + +The process involves the usual piping of shell commands, to get and process the log files in realtime. + +The result is like this: nginx logs into systemd-journal: + +![image](https://github.com/netdata/netdata/assets/2662304/16b471ff-c5a1-4fcc-bcd5-83551e089f6c) + + +The overall process looks like this: + +```bash +tail -F /var/log/nginx/*.log |\ # outputs log lines + log2journal 'PATTERN' |\ # outputs Journal Export Format + systemd-cat-native # send to local/remote journald +``` + +These are the steps: + +1. `tail -F /var/log/nginx/*.log`<br/>this command will tail all `*.log` files in `/var/log/nginx/`. We use `-F` instead of `-f` to ensure that files will still be tailed after log rotation. +2. `log2joural` is a Netdata program. It reads log entries and extracts fields, according to the PCRE2 pattern it accepts. It can also apply some basic operations on the fields, like injecting new fields or duplicating existing ones or rewriting their values. The output of `log2journal` is in Systemd Journal Export Format, and it looks like this: + ```bash + KEY1=VALUE1 # << start of the first log line + KEY2=VALUE2 + # << log lines separator + KEY1=VALUE1 # << start of the second log line + KEY2=VALUE2 + ``` +3. `systemd-cat-native` is a Netdata program. I can send the logs to a local `systemd-journald` (journal namespaces supported), or to a remote `systemd-journal-remote`. + + +## Processing pipeline + +The sequence of processing in Netdata's `log2journal` is designed to methodically transform and prepare log data for export in the systemd Journal Export Format. This transformation occurs through a pipeline of stages, each with a specific role in processing the log entries. Here's a description of each stage in the sequence: + +1. **Input**<br/> + The tool reads one log line at a time from the input source. It supports different input formats such as JSON, logfmt, and free-form logs defined by PCRE2 patterns. + +2. **Extract Fields and Values**<br/> + Based on the input format (JSON, logfmt, or custom pattern), it extracts fields and their values from each log line. In the case of JSON and logfmt, it automatically extracts all fields. For custom patterns, it uses PCRE2 regular expressions, and fields are extracted based on sub-expressions defined in the pattern. + +3. **Transliteration**<br/> + Extracted fields are transliterated to the limited character set accepted by systemd-journal: capitals A-Z, digits 0-9, underscores. + +4. **Apply Optional Prefix**<br/> + If a prefix is specified, it is added to all keys. This happens before any other processing so that all subsequent matches and manipulations take the prefix into account. + +5. **Rename Fields**<br/> + Renames fields as specified in the configuration. This is used to change the names of the fields to match desired or required naming conventions. + +6. **Inject New Fields**<br/> + New fields are injected into the log data. This can include constants or values derived from other fields, using variable substitution. + +7. **Rewrite Field Values**<br/> + Applies rewriting rules to alter the values of the fields. This can involve complex transformations, including regular expressions and variable substitutions. The rewrite rules can also inject new fields into the data. + +8. **Filter Fields**<br/> + Fields are filtered based on include and exclude patterns. This stage selects which fields are to be sent to the journal, allowing for selective logging. + +9. **Output**<br/> + Finally, the processed log data is output in the Journal Export Format. This format is compatible with systemd's journaling system and can be sent to local or remote systemd journal systems, by piping the output of `log2journal` to `systemd-cat-native`. + +This pipeline ensures a flexible and comprehensive approach to log processing, allowing for a wide range of modifications and customizations to fit various logging requirements. Each stage builds upon the previous one, enabling complex log transformations and enrichments before the data is exported to the systemd journal. + +## Real-life example + +We have an nginx server logging in this standard combined log format: + +```bash + log_format combined '$remote_addr - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent"'; +``` + +### Extracting fields with a pattern + +First, let's find the right pattern for `log2journal`. We ask ChatGPT: + +``` +My nginx log uses this log format: + +log_format access '$remote_addr - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent"'; + +I want to use `log2joural` to convert this log for systemd-journal. +`log2journal` accepts a PCRE2 regular expression, using the named groups +in the pattern as the journal fields to extract from the logs. + +Please give me the PCRE2 pattern to extract all the fields from my nginx +log files. +``` + +ChatGPT replies with this: + +```regexp + (?x) # Enable PCRE2 extended mode + ^ + (?<remote_addr>[^ ]+) \s - \s + (?<remote_user>[^ ]+) \s + \[ + (?<time_local>[^\]]+) + \] + \s+ " + (?<request> + (?<request_method>[A-Z]+) \s+ + (?<request_uri>[^ ]+) \s+ + (?<server_protocol>[^"]+) + ) + " \s+ + (?<status>\d+) \s+ + (?<body_bytes_sent>\d+) \s+ + "(?<http_referer>[^"]*)" \s+ + "(?<http_user_agent>[^"]*)" +``` + +Let's see what the above says: + +1. `(?x)`: enable PCRE2 extended mode. In this mode spaces and newlines in the pattern are ignored. To match a space you have to use `\s`. This mode allows us to split the pattern is multiple lines and add comments to it. +1. `^`: match the beginning of the line +2. `(?<remote_addr[^ ]+)`: match anything up to the first space (`[^ ]+`), and name it `remote_addr`. +3. `\s`: match a space +4. `-`: match a hyphen +5. and so on... + +We edit `nginx.yaml` and add it, like this: + +```yaml +pattern: | + (?x) # Enable PCRE2 extended mode + ^ + (?<remote_addr>[^ ]+) \s - \s + (?<remote_user>[^ ]+) \s + \[ + (?<time_local>[^\]]+) + \] + \s+ " + (?<request> + (?<request_method>[A-Z]+) \s+ + (?<request_uri>[^ ]+) \s+ + (?<server_protocol>[^"]+) + ) + " \s+ + (?<status>\d+) \s+ + (?<body_bytes_sent>\d+) \s+ + "(?<http_referer>[^"]*)" \s+ + "(?<http_user_agent>[^"]*)" +``` + +Let's test it with a sample line (instead of `tail`): + +```bash +# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal -f nginx.yaml +BODY_BYTES_SENT=4172 +HTTP_REFERER=- +HTTP_USER_AGENT=Go-http-client/1.1 +REMOTE_ADDR=1.2.3.4 +REMOTE_USER=- +REQUEST=GET /index.html HTTP/1.1 +REQUEST_METHOD=GET +REQUEST_URI=/index.html +SERVER_PROTOCOL=HTTP/1.1 +STATUS=200 +TIME_LOCAL=19/Nov/2023:00:24:43 +0000 + +``` + +As you can see, it extracted all the fields and made them capitals, as systemd-journal expects them. + +### Prefixing field names + +To make sure the fields are unique for nginx and do not interfere with other applications, we should prefix them with `NGINX_`: + +```yaml +pattern: | + (?x) # Enable PCRE2 extended mode + ^ + (?<remote_addr>[^ ]+) \s - \s + (?<remote_user>[^ ]+) \s + \[ + (?<time_local>[^\]]+) + \] + \s+ " + (?<request> + (?<request_method>[A-Z]+) \s+ + (?<request_uri>[^ ]+) \s+ + (?<server_protocol>[^"]+) + ) + " \s+ + (?<status>\d+) \s+ + (?<body_bytes_sent>\d+) \s+ + "(?<http_referer>[^"]*)" \s+ + "(?<http_user_agent>[^"]*)" + +prefix: 'NGINX_' # <<< we added this +``` + +And let's try it: + +```bash +# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 "-" "Go-http-client/1.1"' | log2journal -f nginx.yaml +NGINX_BODY_BYTES_SENT=4172 +NGINX_HTTP_REFERER=- +NGINX_HTTP_USER_AGENT=Go-http-client/1.1 +NGINX_REMOTE_ADDR=1.2.3.4 +NGINX_REMOTE_USER=- +NGINX_REQUEST=GET /index.html HTTP/1.1 +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_URI=/index.html +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_TIME_LOCAL=19/Nov/2023:00:24:43 +0000 + +``` + +### Renaming fields + +Now, all fields start with `NGINX_` but we want `NGINX_REQUEST` to be the `MESSAGE` of the log line, as we will see it by default in `journalctl` and the Netdata dashboard. Let's rename it: + +```yaml +pattern: | + (?x) # Enable PCRE2 extended mode + ^ + (?<remote_addr>[^ ]+) \s - \s + (?<remote_user>[^ ]+) \s + \[ + (?<time_local>[^\]]+) + \] + \s+ " + (?<request> + (?<request_method>[A-Z]+) \s+ + (?<request_uri>[^ ]+) \s+ + (?<server_protocol>[^"]+) + ) + " \s+ + (?<status>\d+) \s+ + (?<body_bytes_sent>\d+) \s+ + "(?<http_referer>[^"]*)" \s+ + "(?<http_user_agent>[^"]*)" + +prefix: 'NGINX_' + +rename: # <<< we added this + - new_key: MESSAGE # <<< we added this + old_key: NGINX_REQUEST # <<< we added this +``` + +Let's test it: + +```bash +# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 "-" "Go-http-client/1.1"' | log2journal -f nginx.yaml +MESSAGE=GET /index.html HTTP/1.1 # <<< renamed ! +NGINX_BODY_BYTES_SENT=4172 +NGINX_HTTP_REFERER=- +NGINX_HTTP_USER_AGENT=Go-http-client/1.1 +NGINX_REMOTE_ADDR=1.2.3.4 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_URI=/index.html +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_TIME_LOCAL=19/Nov/2023:00:24:43 +0000 + +``` + +### Injecting new fields + +To have a complete message in journals we need 3 fields: `MESSAGE`, `PRIORITY` and `SYSLOG_IDENTIFIER`. We have already added `MESSAGE` by renaming `NGINX_REQUEST`. We can also inject a `SYSLOG_IDENTIFIER` and `PRIORITY`. + +Ideally, we would want the 5xx errors to be red in our `journalctl` output and the dashboard. To achieve that we need to set the `PRIORITY` field to the right log level. Log priorities are numeric and follow the `syslog` priorities. Checking `/usr/include/sys/syslog.h` we can see these: + +```c +#define LOG_EMERG 0 /* system is unusable */ +#define LOG_ALERT 1 /* action must be taken immediately */ +#define LOG_CRIT 2 /* critical conditions */ +#define LOG_ERR 3 /* error conditions */ +#define LOG_WARNING 4 /* warning conditions */ +#define LOG_NOTICE 5 /* normal but significant condition */ +#define LOG_INFO 6 /* informational */ +#define LOG_DEBUG 7 /* debug-level messages */ +``` + +Avoid setting priority to 0 (`LOG_EMERG`), because these will be on your terminal (the journal uses `wall` to let you know of such events). A good priority for errors is 3 (red), or 4 (yellow). + +To set the PRIORITY field in the output, we can use `NGINX_STATUS`. We will do this in 2 steps: a) inject the priority field as a copy is `NGINX_STATUS` and then b) use a pattern on its value to rewrite it to the priority level we want. + +First, let's inject `SYSLOG_IDENTIFIER` and `PRIORITY`: + +```yaml +pattern: | + (?x) # Enable PCRE2 extended mode + ^ + (?<remote_addr>[^ ]+) \s - \s + (?<remote_user>[^ ]+) \s + \[ + (?<time_local>[^\]]+) + \] + \s+ " + (?<request> + (?<request_method>[A-Z]+) \s+ + (?<request_uri>[^ ]+) \s+ + (?<server_protocol>[^"]+) + ) + " \s+ + (?<status>\d+) \s+ + (?<body_bytes_sent>\d+) \s+ + "(?<http_referer>[^"]*)" \s+ + "(?<http_user_agent>[^"]*)" + +prefix: 'NGINX_' + +rename: + - new_key: MESSAGE + old_key: NGINX_REQUEST + +inject: # <<< we added this + - key: PRIORITY # <<< we added this + value: '${NGINX_STATUS}' # <<< we added this + + - key: SYSLOG_IDENTIFIER # <<< we added this + value: 'nginx-log' # <<< we added this +``` + +Let's see what this does: + +```bash +# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 "-" "Go-http-client/1.1"' | log2journal -f nginx.yaml +MESSAGE=GET /index.html HTTP/1.1 +NGINX_BODY_BYTES_SENT=4172 +NGINX_HTTP_REFERER=- +NGINX_HTTP_USER_AGENT=Go-http-client/1.1 +NGINX_REMOTE_ADDR=1.2.3.4 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_URI=/index.html +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_TIME_LOCAL=19/Nov/2023:00:24:43 +0000 +PRIORITY=200 # <<< PRIORITY added +SYSLOG_IDENTIFIER=nginx-log # <<< SYSLOG_IDENTIFIER added + +``` + +### Rewriting field values + +Now we need to rewrite `PRIORITY` to the right syslog level based on its value (`NGINX_STATUS`). We will assign the priority 6 (info) when the status is 1xx, 2xx, 3xx, priority 5 (notice) when status is 4xx, priority 3 (error) when status is 5xx and anything else will go to priority 4 (warning). Let's do it: + +```yaml +pattern: | + (?x) # Enable PCRE2 extended mode + ^ + (?<remote_addr>[^ ]+) \s - \s + (?<remote_user>[^ ]+) \s + \[ + (?<time_local>[^\]]+) + \] + \s+ " + (?<request> + (?<request_method>[A-Z]+) \s+ + (?<request_uri>[^ ]+) \s+ + (?<server_protocol>[^"]+) + ) + " \s+ + (?<status>\d+) \s+ + (?<body_bytes_sent>\d+) \s+ + "(?<http_referer>[^"]*)" \s+ + "(?<http_user_agent>[^"]*)" + +prefix: 'NGINX_' + +rename: + - new_key: MESSAGE + old_key: NGINX_REQUEST + +inject: + - key: PRIORITY + value: '${NGINX_STATUS}' + +rewrite: # <<< we added this + - key: PRIORITY # <<< we added this + match: '^[123]' # <<< we added this + value: 6 # <<< we added this + + - key: PRIORITY # <<< we added this + match: '^4' # <<< we added this + value: 5 # <<< we added this + + - key: PRIORITY # <<< we added this + match: '^5' # <<< we added this + value: 3 # <<< we added this + + - key: PRIORITY # <<< we added this + match: '.*' # <<< we added this + value: 4 # <<< we added this +``` + +Rewrite rules are processed in order and the first matching a field, stops by default processing for this field. This is why the last rule, that matches everything does not always change the priority to 4. + +Let's test it: + +```bash +# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 "-" "Go-http-client/1.1"' | log2journal -f nginx.yaml +MESSAGE=GET /index.html HTTP/1.1 +NGINX_BODY_BYTES_SENT=4172 +NGINX_HTTP_REFERER=- +NGINX_HTTP_USER_AGENT=Go-http-client/1.1 +NGINX_REMOTE_ADDR=1.2.3.4 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_URI=/index.html +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_TIME_LOCAL=19/Nov/2023:00:24:43 +0000 +PRIORITY=6 # <<< PRIORITY rewritten here +SYSLOG_IDENTIFIER=nginx-log + +``` + +Rewrite rules are powerful. You can have named groups in them, like in the main pattern, to extract sub-fields from them, which you can then use in variable substitution. You can use rewrite rules to anonymize the URLs, e.g to remove customer IDs or transaction details from them. + +### Sending logs to systemd-journal + +Now the message is ready to be sent to a systemd-journal. For this we use `systemd-cat-native`. This command can send such messages to a journal running on the localhost, a local journal namespace, or a `systemd-journal-remote` running on another server. By just appending `| systemd-cat-native` to the command, the message will be sent to the local journal. + + +```bash +# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 "-" "Go-http-client/1.1"' | log2journal -f nginx.yaml | systemd-cat-native +# no output + +# let's find the message +# journalctl -r -o verbose SYSLOG_IDENTIFIER=nginx-log +Wed 2023-12-06 13:23:07.083299 EET [s=5290f0133f25407aaa1e2c451c0e4756;i=57194;b=0dfa96ecc2094cecaa8ec0efcb93b865;m=b133308867;t=60bd59346a289;x=5c1bdacf2b9c4bbd] + PRIORITY=6 + _UID=0 + _GID=0 + _CAP_EFFECTIVE=1ffffffffff + _SELINUX_CONTEXT=unconfined + _BOOT_ID=0dfa96ecc2094cecaa8ec0efcb93b865 + _MACHINE_ID=355c8eca894d462bbe4c9422caf7a8bb + _HOSTNAME=lab-logtest-src + _RUNTIME_SCOPE=system + _TRANSPORT=journal + MESSAGE=GET /index.html HTTP/1.1 + NGINX_BODY_BYTES_SENT=4172 + NGINX_HTTP_REFERER=- + NGINX_HTTP_USER_AGENT=Go-http-client/1.1 + NGINX_REMOTE_ADDR=1.2.3.4 + NGINX_REMOTE_USER=- + NGINX_REQUEST_METHOD=GET + NGINX_REQUEST_URI=/index.html + NGINX_SERVER_PROTOCOL=HTTP/1.1 + NGINX_STATUS=200 + NGINX_TIME_LOCAL=19/Nov/2023:00:24:43 +0000 + SYSLOG_IDENTIFIER=nginx-log + _PID=114343 + _COMM=systemd-cat-nat + _AUDIT_SESSION=253 + _AUDIT_LOGINUID=1000 + _SYSTEMD_CGROUP=/user.slice/user-1000.slice/session-253.scope + _SYSTEMD_SESSION=253 + _SYSTEMD_OWNER_UID=1000 + _SYSTEMD_UNIT=session-253.scope + _SYSTEMD_SLICE=user-1000.slice + _SYSTEMD_USER_SLICE=-.slice + _SYSTEMD_INVOCATION_ID=c59e33ead8c24880b027e317b89f9f76 + _SOURCE_REALTIME_TIMESTAMP=1701861787083299 + +``` + +So, the log line, with all its fields parsed, ended up in systemd-journal. Now we can send all the nginx logs to systemd-journal like this: + +```bash +tail -F /var/log/nginx/access.log |\ + log2journal -f nginx.yaml |\ + systemd-cat-native +``` + +## Best practices + +**Create a systemd service unit**: Add the above commands to a systemd unit file. When you run it in a systemd unit file you will be able to start/stop it and also see its status. Furthermore you can use the `LogNamespace=` directive of systemd service units to isolate your nginx logs from the logs of the rest of the system. Here is how to do it: + +Create the file `/etc/systemd/system/nginx-logs.service` (change `/path/to/nginx.yaml` to the right path): + +``` +[Unit] +Description=NGINX Log to Systemd Journal +After=network.target + +[Service] +ExecStart=/bin/sh -c 'tail -F /var/log/nginx/access.log | log2journal -f /path/to/nginx.yaml' | systemd-cat-native +LogNamespace=nginx-logs +Restart=always +RestartSec=3 + +[Install] +WantedBy=multi-user.target +``` + +Reload systemd to grab this file: + +```bash +sudo systemctl daemon-reload +``` + +Enable and start the service: + +```bash +sudo systemctl enable nginx-logs.service +sudo systemctl start nginx-logs.service +``` + +To see the logs of the namespace, use: + +```bash +journalctl -f --namespace=nginx-logs +``` + +Netdata will automatically pick the new namespace and present it at the list of sources of the dashboard. + +You can also instruct `systemd-cat-native` to log to a remote system, sending the logs to a `systemd-journal-remote` instance running on another server. Check [the manual of systemd-cat-native](https://github.com/netdata/netdata/blob/master/libnetdata/log/systemd-cat-native.md). + + +## Performance + +`log2journal` and `systemd-cat-native` have been designed to process hundreds of thousands of log lines per second. They both utilize high performance indexing hashtables to speed up lookups, and queues that dynamically adapt to the number of log lines offered, offering a smooth and fast experience under all conditions. + +In our tests, the combined CPU utilization of `log2journal` and `systemd-cat-native` versus `promtail` with similar configuration is 1 to 5. So, `log2journal` and `systemd-cat-native` combined, are 5 times faster than `promtail`. + +### PCRE2 patterns + +The key characteristic that can influence the performance of a logs processing pipeline using these tools, is the quality of the PCRE2 patterns used. Poorly created PCRE2 patterns can make processing significantly slower, or CPU consuming. + +Especially the pattern `.*` seems to have the biggest impact on CPU consumption, especially when multiple `.*` are on the same pattern. + +Usually we use `.*` to indicate that we need to match everything up to a character, e.g. `.* ` to match up to a space. By replacing it with `[^ ]+` (meaning: match at least a character up to a space), the regular expression engine can be a lot more efficient, reducing the overall CPU utilization significantly. + +### Performance of systemd journals + +The ingestion pipeline of logs, from `tail` to `systemd-journald` or `systemd-journal-remote` is very efficient in all aspects. CPU utilization is better than any other system we tested and RAM usage is independent of the number of fields indexed, making systemd-journal one of the most efficient log management engines for ingesting high volumes of structured logs. + +High fields cardinality does not have a noticable impact on systemd-journal. The amount of fields indexed and the amount of unique values per field, have a linear and predictable result in the resource utilization of `systemd-journald` and `systemd-journal-remote`. This is unlike other logs management solutions, like Loki, that their RAM requirements grow exponentially as the cardinality increases, making it impractical for them to index the amount of information systemd journals can index. + +However, the number of fields added to journals influences the overall disk footprint. Less fields means more log entries per journal file, smaller overall disk footprint and faster queries. + +systemd-journal files are primarily designed for security and reliability. This comes at the cost of disk footprint. The internal structure of journal files is such that in case of corruption, minimum data loss will incur. To achieve such a unique characteristic, certain data within the files need to be aligned at predefined boundaries, so that in case there is a corruption, non-corrupted parts of the journal file can be recovered. + +Despite the fact that systemd-journald employees several techniques to optimize disk footprint, like deduplication of log entries, shared indexes for fields and their values, compression of long log entries, etc. the disk footprint of journal files is generally 10x more compared to other monitoring solutions, like Loki. + +This can be improved by storing journal files in a compressed filesystem. In our tests, a compressed filesystem can save up to 75% of the space required by journal files. The journal files will still be bigger than the overall disk footprint of other solutions, but the flexibility (index any number of fields), reliability (minimal potential data loss) and security (tampering protection and sealing) features of systemd-journal justify the difference. + +When using versions of systemd prior to 254 and you are centralizing logs to a remote system, `systemd-journal-remote` creates very small files (32MB). This results in increased duplication of information across the files, increasing the overall disk footprint. systemd versions 254+, added options to `systemd-journal-remote` to control the max size per file. This can significantly reduce the duplication of information. + +Another limitation of the `systemd-journald` ecosystem is the uncompressed transmission of logs across systems. `systemd-journal-remote` up to version 254 that we tested, accepts encrypted, but uncompressed data. This means that when centralizing logs to a logs server, the bandwidth required will be increased compared to other log management solution. + +## Security Considerations + +`log2journal` and `systemd-cat-native` are used to convert log files to structured logs in the systemd-journald ecosystem. + +Systemd-journal is a logs management solution designed primarily for security and reliability. When configured properly, it can reliably and securely store your logs, ensuring they will available and unchanged for as long as you need them. + +When sending logs to a remote system, `systemd-cat-native` can be configured the same way `systemd-journal-upload` is configured, using HTTPS and private keys to encrypt and secure their transmission over the network. + +When dealing with sensitive logs, organizations usually follow 2 strategies: + +1. Anonymize the logs before storing them, so that the stored logs do not have any sensitive information. +2. Store the logs in full, including sensitive information, and carefully control who and how has access to them. + +Netdata can help in both cases. + +If you want to anonymize the logs before storing them, use rewriting rules at the `log2journal` phase to remove sensitive information from them. This process usually means matching the sensitive part and replacing with `XXX` or `CUSTOMER_ID`, or `CREDIT_CARD_NUMBER`, so that the resulting log entries stored in journal files will not include any such sensitive information. + +If on other hand your organization prefers to maintain the full logs and control who and how has access on them, use Netdata Cloud to assign roles to your team members and control which roles can access the journal logs in your environment. + +## `log2journal` options + +``` + +Netdata log2journal v1.43.0-341-gdac4df856 + +Convert logs to systemd Journal Export Format. + + - JSON logs: extracts all JSON fields. + - logfmt logs: extracts all logfmt fields. + - free-form logs: uses PCRE2 patterns to extracts fields. + +Usage: ./log2journal [OPTIONS] PATTERN|json + +Options: + + --file /path/to/file.yaml or -f /path/to/file.yaml + Read yaml configuration file for instructions. + + --config CONFIG_NAME or -c CONFIG_NAME + Run with the internal YAML configuration named CONFIG_NAME. + Available internal YAML configs: + + nginx-combined nginx-json default + +-------------------------------------------------------------------------------- + INPUT PROCESSING + + PATTERN + PATTERN should be a valid PCRE2 regular expression. + RE2 regular expressions (like the ones usually used in Go applications), + are usually valid PCRE2 patterns too. + Sub-expressions without named groups are evaluated, but their matches are + not added to the output. + + - JSON mode + JSON mode is enabled when the pattern is set to: json + Field names are extracted from the JSON logs and are converted to the + format expected by Journal Export Format (all caps, only _ is allowed). + + - logfmt mode + logfmt mode is enabled when the pattern is set to: logfmt + Field names are extracted from the logfmt logs and are converted to the + format expected by Journal Export Format (all caps, only _ is allowed). + + All keys extracted from the input, are transliterated to match Journal + semantics (capital A-Z, digits 0-9, underscore). + + In a YAML file: + ```yaml + pattern: 'PCRE2 pattern | json | logfmt' + ``` + +-------------------------------------------------------------------------------- + GLOBALS + + --prefix PREFIX + Prefix all fields with PREFIX. The PREFIX is added before any other + processing, so that the extracted keys have to be matched with the PREFIX in + them. PREFIX is NOT transliterated and it is assumed to be systemd-journal + friendly. + + In a YAML file: + ```yaml + prefix: 'PREFIX_' # prepend all keys with this prefix. + ``` + + --filename-key KEY + Add a field with KEY as the key and the current filename as value. + Automatically detects filenames when piped after 'tail -F', + and tail matches multiple filenames. + To inject the filename when tailing a single file, use --inject. + + In a YAML file: + ```yaml + filename: + key: KEY + ``` + +-------------------------------------------------------------------------------- + RENAMING OF KEYS + + --rename NEW=OLD + Rename fields. OLD has been transliterated and PREFIX has been added. + NEW is assumed to be systemd journal friendly. + + Up to 512 renaming rules are allowed. + + In a YAML file: + ```yaml + rename: + - new_key: KEY1 + old_key: KEY2 # transliterated with PREFIX added + - new_key: KEY3 + old_key: KEY4 # transliterated with PREFIX added + # add as many as required + ``` + +-------------------------------------------------------------------------------- + INJECTING NEW KEYS + + --inject KEY=VALUE + Inject constant fields to the output (both matched and unmatched logs). + --inject entries are added to unmatched lines too, when their key is + not used in --inject-unmatched (--inject-unmatched override --inject). + VALUE can use variable like ${OTHER_KEY} to be replaced with the values + of other keys available. + + Up to 512 fields can be injected. + + In a YAML file: + ```yaml + inject: + - key: KEY1 + value: 'VALUE1' + - key: KEY2 + value: '${KEY3}${KEY4}' # gets the values of KEY3 and KEY4 + # add as many as required + ``` + +-------------------------------------------------------------------------------- + REWRITING KEY VALUES + + --rewrite KEY=/MATCH/REPLACE[/OPTIONS] + Apply a rewrite rule to the values of a specific key. + The first character after KEY= is the separator, which should also + be used between the MATCH, REPLACE and OPTIONS. + + OPTIONS can be a comma separated list of `non-empty`, `dont-stop` and + `inject`. + + When `non-empty` is given, MATCH is expected to be a variable + substitution using `${KEY1}${KEY2}`. Once the substitution is completed + the rule is matching the KEY only if the result is not empty. + When `non-empty` is not set, the MATCH string is expected to be a PCRE2 + regular expression to be checked against the KEY value. This PCRE2 + pattern may include named groups to extract parts of the KEY's value. + + REPLACE supports variable substitution like `${variable}` against MATCH + named groups (when MATCH is a PCRE2 pattern) and `${KEY}` against the + keys defined so far. + + Example: + --rewrite DATE=/^(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})$/ + ${day}/${month}/${year} + The above will rewrite dates in the format YYYY-MM-DD to DD/MM/YYYY. + + Only one rewrite rule is applied per key; the sequence of rewrites for a + given key, stops once a rule matches it. This allows providing a sequence + of independent rewriting rules for the same key, matching the different + values the key may get, and also provide a catch-all rewrite rule at the + end, for setting the key value if no other rule matched it. The rewrite + rule can allow processing more rewrite rules when OPTIONS includes + the keyword 'dont-stop'. + + Up to 512 rewriting rules are allowed. + + In a YAML file: + ```yaml + rewrite: + # the order if these rules in important - processed top to bottom + - key: KEY1 + match: 'PCRE2 PATTERN WITH NAMED GROUPS' + value: 'all match fields and input keys as ${VARIABLE}' + inject: BOOLEAN # yes = inject the field, don't just rewrite it + stop: BOOLEAN # no = continue processing, don't stop if matched + - key: KEY2 + non_empty: '${KEY3}${KEY4}' # match only if this evaluates to non empty + value: 'all input keys as ${VARIABLE}' + inject: BOOLEAN # yes = inject the field, don't just rewrite it + stop: BOOLEAN # no = continue processing, don't stop if matched + # add as many rewrites as required + ``` + + By default rewrite rules are applied only on fields already defined. + This allows shipping YAML files that include more rewrites than are + required for a specific input file. + Rewrite rules however allow injecting new fields when OPTIONS include + the keyword `inject` or in YAML `inject: yes` is given. + + MATCH on the command line can be empty to define an unconditional rule. + Similarly, `match` and `non_empty` can be omitted in the YAML file. +-------------------------------------------------------------------------------- + UNMATCHED LINES + + --unmatched-key KEY + Include unmatched log entries in the output with KEY as the field name. + Use this to include unmatched entries to the output stream. + Usually it should be set to --unmatched-key=MESSAGE so that the + unmatched entry will appear as the log message in the journals. + Use --inject-unmatched to inject additional fields to unmatched lines. + + In a YAML file: + ```yaml + unmatched: + key: MESSAGE # inject the error log as MESSAGE + ``` + + --inject-unmatched LINE + Inject lines into the output for each unmatched log entry. + Usually, --inject-unmatched=PRIORITY=3 is needed to mark the unmatched + lines as errors, so that they can easily be spotted in the journals. + + Up to 512 such lines can be injected. + + In a YAML file: + ```yaml + unmatched: + key: MESSAGE # inject the error log as MESSAGE + inject:: + - key: KEY1 + value: 'VALUE1' + # add as many constants as required + ``` + +-------------------------------------------------------------------------------- + FILTERING + + --include PATTERN + Include only keys matching the PCRE2 PATTERN. + Useful when parsing JSON of logfmt logs, to include only the keys given. + The keys are matched after the PREFIX has been added to them. + + --exclude PATTERN + Exclude the keys matching the PCRE2 PATTERN. + Useful when parsing JSON of logfmt logs, to exclude some of the keys given. + The keys are matched after the PREFIX has been added to them. + + When both include and exclude patterns are set and both match a key, + exclude wins and the key will not be added, like a pipeline, we first + include it and then exclude it. + + In a YAML file: + ```yaml + filter: + include: 'PCRE2 PATTERN MATCHING KEY NAMES TO INCLUDE' + exclude: 'PCRE2 PATTERN MATCHING KEY NAMES TO EXCLUDE' + ``` + +-------------------------------------------------------------------------------- + OTHER + + -h, or --help + Display this help and exit. + + --show-config + Show the configuration in YAML format before starting the job. + This is also an easy way to convert command line parameters to yaml. + +The program accepts all parameters as both --option=value and --option value. + +The maximum log line length accepted is 1048576 characters. + +PIPELINE AND SEQUENCE OF PROCESSING + +This is a simple diagram of the pipeline taking place: + + +---------------------------------------------------+ + | INPUT | + | read one log line at a time | + +---------------------------------------------------+ + v v v v v v + +---------------------------------------------------+ + | EXTRACT FIELDS AND VALUES | + | JSON, logfmt, or pattern based | + | (apply optional PREFIX - all keys use capitals) | + +---------------------------------------------------+ + v v v v v v + +---------------------------------------------------+ + | RENAME FIELDS | + | change the names of the fields | + +---------------------------------------------------+ + v v v v v v + +---------------------------------------------------+ + | INJECT NEW FIELDS | + | constants, or other field values as variables | + +---------------------------------------------------+ + v v v v v v + +---------------------------------------------------+ + | REWRITE FIELD VALUES | + | pipeline multiple rewriting rules to alter | + | the values of the fields | + +---------------------------------------------------+ + v v v v v v + +---------------------------------------------------+ + | FILTER FIELDS | + | use include and exclude patterns on the field | + | names, to select which fields are sent to journal | + +---------------------------------------------------+ + v v v v v v + +---------------------------------------------------+ + | OUTPUT | + | generate Journal Export Format | + +---------------------------------------------------+ + +-------------------------------------------------------------------------------- +JOURNAL FIELDS RULES (enforced by systemd-journald) + + - field names can be up to 64 characters + - the only allowed field characters are A-Z, 0-9 and underscore + - the first character of fields cannot be a digit + - protected journal fields start with underscore: + * they are accepted by systemd-journal-remote + * they are NOT accepted by a local systemd-journald + + For best results, always include these fields: + + MESSAGE=TEXT + The MESSAGE is the body of the log entry. + This field is what we usually see in our logs. + + PRIORITY=NUMBER + PRIORITY sets the severity of the log entry. + 0=emerg, 1=alert, 2=crit, 3=err, 4=warn, 5=notice, 6=info, 7=debug + - Emergency events (0) are usually broadcast to all terminals. + - Emergency, alert, critical, and error (0-3) are usually colored red. + - Warning (4) entries are usually colored yellow. + - Notice (5) entries are usually bold or have a brighter white color. + - Info (6) entries are the default. + - Debug (7) entries are usually grayed or dimmed. + + SYSLOG_IDENTIFIER=NAME + SYSLOG_IDENTIFIER sets the name of application. + Use something descriptive, like: SYSLOG_IDENTIFIER=nginx-logs + +You can find the most common fields at 'man systemd.journal-fields'. + +``` + +`log2journal` supports YAML configuration files, like the ones found [in this directory](https://github.com/netdata/netdata/tree/master/collectors/log2journal/log2journal.d). + +## `systemd-cat-native` options + +Read [the manual of systemd-cat-native](https://github.com/netdata/netdata/blob/master/libnetdata/log/systemd-cat-native.md). diff --git a/collectors/log2journal/log2journal-help.c b/collectors/log2journal/log2journal-help.c new file mode 100644 index 000000000..21be948e8 --- /dev/null +++ b/collectors/log2journal/log2journal-help.c @@ -0,0 +1,377 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "log2journal.h" + +static void config_dir_print_available(void) { + const char *path = LOG2JOURNAL_CONFIG_PATH; + DIR *dir; + struct dirent *entry; + + dir = opendir(path); + + if (dir == NULL) { + log2stderr(" >>> Cannot open directory:\n %s", path); + return; + } + + size_t column_width = 80; + size_t current_columns = 7; // Start with 7 spaces for the first line + + while ((entry = readdir(dir))) { + if (entry->d_type == DT_REG) { // Check if it's a regular file + const char *file_name = entry->d_name; + size_t len = strlen(file_name); + if (len >= 5 && strcmp(file_name + len - 5, ".yaml") == 0) { + // Remove the ".yaml" extension + len -= 5; + if (current_columns == 7) { + printf(" "); // Print 7 spaces at the beginning of a new line + } + if (current_columns + len + 1 > column_width) { + // Start a new line if the current line is full + printf("\n "); // Print newline and 7 spaces + current_columns = 7; + } + printf("%.*s ", (int)len, file_name); // Print the filename without extension + current_columns += len + 1; // Add filename length and a space + } + } + } + + closedir(dir); + printf("\n"); // Add a newline at the end +} + +void log_job_command_line_help(const char *name) { + printf("\n"); + printf("Netdata log2journal " PACKAGE_VERSION "\n"); + printf("\n"); + printf("Convert logs to systemd Journal Export Format.\n"); + printf("\n"); + printf(" - JSON logs: extracts all JSON fields.\n"); + printf(" - logfmt logs: extracts all logfmt fields.\n"); + printf(" - free-form logs: uses PCRE2 patterns to extracts fields.\n"); + printf("\n"); + printf("Usage: %s [OPTIONS] PATTERN|json\n", name); + printf("\n"); + printf("Options:\n"); + printf("\n"); +#ifdef HAVE_LIBYAML + printf(" --file /path/to/file.yaml or -f /path/to/file.yaml\n"); + printf(" Read yaml configuration file for instructions.\n"); + printf("\n"); + printf(" --config CONFIG_NAME or -c CONFIG_NAME\n"); + printf(" Run with the internal YAML configuration named CONFIG_NAME.\n"); + printf(" Available internal YAML configs:\n"); + printf("\n"); + config_dir_print_available(); + printf("\n"); +#else + printf(" IMPORTANT:\n"); + printf(" YAML configuration parsing is not compiled in this binary.\n"); + printf("\n"); +#endif + printf("--------------------------------------------------------------------------------\n"); + printf(" INPUT PROCESSING\n"); + printf("\n"); + printf(" PATTERN\n"); + printf(" PATTERN should be a valid PCRE2 regular expression.\n"); + printf(" RE2 regular expressions (like the ones usually used in Go applications),\n"); + printf(" are usually valid PCRE2 patterns too.\n"); + printf(" Sub-expressions without named groups are evaluated, but their matches are\n"); + printf(" not added to the output.\n"); + printf("\n"); + printf(" - JSON mode\n"); + printf(" JSON mode is enabled when the pattern is set to: json\n"); + printf(" Field names are extracted from the JSON logs and are converted to the\n"); + printf(" format expected by Journal Export Format (all caps, only _ is allowed).\n"); + printf("\n"); + printf(" - logfmt mode\n"); + printf(" logfmt mode is enabled when the pattern is set to: logfmt\n"); + printf(" Field names are extracted from the logfmt logs and are converted to the\n"); + printf(" format expected by Journal Export Format (all caps, only _ is allowed).\n"); + printf("\n"); + printf(" All keys extracted from the input, are transliterated to match Journal\n"); + printf(" semantics (capital A-Z, digits 0-9, underscore).\n"); + printf("\n"); + printf(" In a YAML file:\n"); + printf(" ```yaml\n"); + printf(" pattern: 'PCRE2 pattern | json | logfmt'\n"); + printf(" ```\n"); + printf("\n"); + printf("--------------------------------------------------------------------------------\n"); + printf(" GLOBALS\n"); + printf("\n"); + printf(" --prefix PREFIX\n"); + printf(" Prefix all fields with PREFIX. The PREFIX is added before any other\n"); + printf(" processing, so that the extracted keys have to be matched with the PREFIX in\n"); + printf(" them. PREFIX is NOT transliterated and it is assumed to be systemd-journal\n"); + printf(" friendly.\n"); + printf("\n"); + printf(" In a YAML file:\n"); + printf(" ```yaml\n"); + printf(" prefix: 'PREFIX_' # prepend all keys with this prefix.\n"); + printf(" ```\n"); + printf("\n"); + printf(" --filename-key KEY\n"); + printf(" Add a field with KEY as the key and the current filename as value.\n"); + printf(" Automatically detects filenames when piped after 'tail -F',\n"); + printf(" and tail matches multiple filenames.\n"); + printf(" To inject the filename when tailing a single file, use --inject.\n"); + printf("\n"); + printf(" In a YAML file:\n"); + printf(" ```yaml\n"); + printf(" filename:\n"); + printf(" key: KEY\n"); + printf(" ```\n"); + printf("\n"); + printf("--------------------------------------------------------------------------------\n"); + printf(" RENAMING OF KEYS\n"); + printf("\n"); + printf(" --rename NEW=OLD\n"); + printf(" Rename fields. OLD has been transliterated and PREFIX has been added.\n"); + printf(" NEW is assumed to be systemd journal friendly.\n"); + printf("\n"); + printf(" Up to %d renaming rules are allowed.\n", MAX_RENAMES); + printf("\n"); + printf(" In a YAML file:\n"); + printf(" ```yaml\n"); + printf(" rename:\n"); + printf(" - new_key: KEY1\n"); + printf(" old_key: KEY2 # transliterated with PREFIX added\n"); + printf(" - new_key: KEY3\n"); + printf(" old_key: KEY4 # transliterated with PREFIX added\n"); + printf(" # add as many as required\n"); + printf(" ```\n"); + printf("\n"); + printf("--------------------------------------------------------------------------------\n"); + printf(" INJECTING NEW KEYS\n"); + printf("\n"); + printf(" --inject KEY=VALUE\n"); + printf(" Inject constant fields to the output (both matched and unmatched logs).\n"); + printf(" --inject entries are added to unmatched lines too, when their key is\n"); + printf(" not used in --inject-unmatched (--inject-unmatched override --inject).\n"); + printf(" VALUE can use variable like ${OTHER_KEY} to be replaced with the values\n"); + printf(" of other keys available.\n"); + printf("\n"); + printf(" Up to %d fields can be injected.\n", MAX_INJECTIONS); + printf("\n"); + printf(" In a YAML file:\n"); + printf(" ```yaml\n"); + printf(" inject:\n"); + printf(" - key: KEY1\n"); + printf(" value: 'VALUE1'\n"); + printf(" - key: KEY2\n"); + printf(" value: '${KEY3}${KEY4}' # gets the values of KEY3 and KEY4\n"); + printf(" # add as many as required\n"); + printf(" ```\n"); + printf("\n"); + printf("--------------------------------------------------------------------------------\n"); + printf(" REWRITING KEY VALUES\n"); + printf("\n"); + printf(" --rewrite KEY=/MATCH/REPLACE[/OPTIONS]\n"); + printf(" Apply a rewrite rule to the values of a specific key.\n"); + printf(" The first character after KEY= is the separator, which should also\n"); + printf(" be used between the MATCH, REPLACE and OPTIONS.\n"); + printf("\n"); + printf(" OPTIONS can be a comma separated list of `non-empty`, `dont-stop` and\n"); + printf(" `inject`.\n"); + printf("\n"); + printf(" When `non-empty` is given, MATCH is expected to be a variable\n"); + printf(" substitution using `${KEY1}${KEY2}`. Once the substitution is completed\n"); + printf(" the rule is matching the KEY only if the result is not empty.\n"); + printf(" When `non-empty` is not set, the MATCH string is expected to be a PCRE2\n"); + printf(" regular expression to be checked against the KEY value. This PCRE2\n"); + printf(" pattern may include named groups to extract parts of the KEY's value.\n"); + printf("\n"); + printf(" REPLACE supports variable substitution like `${variable}` against MATCH\n"); + printf(" named groups (when MATCH is a PCRE2 pattern) and `${KEY}` against the\n"); + printf(" keys defined so far.\n"); + printf("\n"); + printf(" Example:\n"); + printf(" --rewrite DATE=/^(?<year>\\d{4})-(?<month>\\d{2})-(?<day>\\d{2})$/\n"); + printf(" ${day}/${month}/${year}\n"); + printf(" The above will rewrite dates in the format YYYY-MM-DD to DD/MM/YYYY.\n"); + printf("\n"); + printf(" Only one rewrite rule is applied per key; the sequence of rewrites for a\n"); + printf(" given key, stops once a rule matches it. This allows providing a sequence\n"); + printf(" of independent rewriting rules for the same key, matching the different\n"); + printf(" values the key may get, and also provide a catch-all rewrite rule at the\n"); + printf(" end, for setting the key value if no other rule matched it. The rewrite\n"); + printf(" rule can allow processing more rewrite rules when OPTIONS includes\n"); + printf(" the keyword 'dont-stop'.\n"); + printf("\n"); + printf(" Up to %d rewriting rules are allowed.\n", MAX_REWRITES); + printf("\n"); + printf(" In a YAML file:\n"); + printf(" ```yaml\n"); + printf(" rewrite:\n"); + printf(" # the order if these rules in important - processed top to bottom\n"); + printf(" - key: KEY1\n"); + printf(" match: 'PCRE2 PATTERN WITH NAMED GROUPS'\n"); + printf(" value: 'all match fields and input keys as ${VARIABLE}'\n"); + printf(" inject: BOOLEAN # yes = inject the field, don't just rewrite it\n"); + printf(" stop: BOOLEAN # no = continue processing, don't stop if matched\n"); + printf(" - key: KEY2\n"); + printf(" non_empty: '${KEY3}${KEY4}' # match only if this evaluates to non empty\n"); + printf(" value: 'all input keys as ${VARIABLE}'\n"); + printf(" inject: BOOLEAN # yes = inject the field, don't just rewrite it\n"); + printf(" stop: BOOLEAN # no = continue processing, don't stop if matched\n"); + printf(" # add as many rewrites as required\n"); + printf(" ```\n"); + printf("\n"); + printf(" By default rewrite rules are applied only on fields already defined.\n"); + printf(" This allows shipping YAML files that include more rewrites than are\n"); + printf(" required for a specific input file.\n"); + printf(" Rewrite rules however allow injecting new fields when OPTIONS include\n"); + printf(" the keyword `inject` or in YAML `inject: yes` is given.\n"); + printf("\n"); + printf(" MATCH on the command line can be empty to define an unconditional rule.\n"); + printf(" Similarly, `match` and `non_empty` can be omitted in the YAML file."); + printf("\n"); + printf("--------------------------------------------------------------------------------\n"); + printf(" UNMATCHED LINES\n"); + printf("\n"); + printf(" --unmatched-key KEY\n"); + printf(" Include unmatched log entries in the output with KEY as the field name.\n"); + printf(" Use this to include unmatched entries to the output stream.\n"); + printf(" Usually it should be set to --unmatched-key=MESSAGE so that the\n"); + printf(" unmatched entry will appear as the log message in the journals.\n"); + printf(" Use --inject-unmatched to inject additional fields to unmatched lines.\n"); + printf("\n"); + printf(" In a YAML file:\n"); + printf(" ```yaml\n"); + printf(" unmatched:\n"); + printf(" key: MESSAGE # inject the error log as MESSAGE\n"); + printf(" ```\n"); + printf("\n"); + printf(" --inject-unmatched LINE\n"); + printf(" Inject lines into the output for each unmatched log entry.\n"); + printf(" Usually, --inject-unmatched=PRIORITY=3 is needed to mark the unmatched\n"); + printf(" lines as errors, so that they can easily be spotted in the journals.\n"); + printf("\n"); + printf(" Up to %d such lines can be injected.\n", MAX_INJECTIONS); + printf("\n"); + printf(" In a YAML file:\n"); + printf(" ```yaml\n"); + printf(" unmatched:\n"); + printf(" key: MESSAGE # inject the error log as MESSAGE\n"); + printf(" inject::\n"); + printf(" - key: KEY1\n"); + printf(" value: 'VALUE1'\n"); + printf(" # add as many constants as required\n"); + printf(" ```\n"); + printf("\n"); + printf("--------------------------------------------------------------------------------\n"); + printf(" FILTERING\n"); + printf("\n"); + printf(" --include PATTERN\n"); + printf(" Include only keys matching the PCRE2 PATTERN.\n"); + printf(" Useful when parsing JSON of logfmt logs, to include only the keys given.\n"); + printf(" The keys are matched after the PREFIX has been added to them.\n"); + printf("\n"); + printf(" --exclude PATTERN\n"); + printf(" Exclude the keys matching the PCRE2 PATTERN.\n"); + printf(" Useful when parsing JSON of logfmt logs, to exclude some of the keys given.\n"); + printf(" The keys are matched after the PREFIX has been added to them.\n"); + printf("\n"); + printf(" When both include and exclude patterns are set and both match a key,\n"); + printf(" exclude wins and the key will not be added, like a pipeline, we first\n"); + printf(" include it and then exclude it.\n"); + printf("\n"); + printf(" In a YAML file:\n"); + printf(" ```yaml\n"); + printf(" filter:\n"); + printf(" include: 'PCRE2 PATTERN MATCHING KEY NAMES TO INCLUDE'\n"); + printf(" exclude: 'PCRE2 PATTERN MATCHING KEY NAMES TO EXCLUDE'\n"); + printf(" ```\n"); + printf("\n"); + printf("--------------------------------------------------------------------------------\n"); + printf(" OTHER\n"); + printf("\n"); + printf(" -h, or --help\n"); + printf(" Display this help and exit.\n"); + printf("\n"); + printf(" --show-config\n"); + printf(" Show the configuration in YAML format before starting the job.\n"); + printf(" This is also an easy way to convert command line parameters to yaml.\n"); + printf("\n"); + printf("The program accepts all parameters as both --option=value and --option value.\n"); + printf("\n"); + printf("The maximum log line length accepted is %d characters.\n", MAX_LINE_LENGTH); + printf("\n"); + printf("PIPELINE AND SEQUENCE OF PROCESSING\n"); + printf("\n"); + printf("This is a simple diagram of the pipeline taking place:\n"); + printf(" \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" | INPUT | \n"); + printf(" | read one log line at a time | \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" v v v v v v \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" | EXTRACT FIELDS AND VALUES | \n"); + printf(" | JSON, logfmt, or pattern based | \n"); + printf(" | (apply optional PREFIX - all keys use capitals) | \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" v v v v v v \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" | RENAME FIELDS | \n"); + printf(" | change the names of the fields | \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" v v v v v v \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" | INJECT NEW FIELDS | \n"); + printf(" | constants, or other field values as variables | \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" v v v v v v \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" | REWRITE FIELD VALUES | \n"); + printf(" | pipeline multiple rewriting rules to alter | \n"); + printf(" | the values of the fields | \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" v v v v v v \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" | FILTER FIELDS | \n"); + printf(" | use include and exclude patterns on the field | \n"); + printf(" | names, to select which fields are sent to journal | \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" v v v v v v \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" | OUTPUT | \n"); + printf(" | generate Journal Export Format | \n"); + printf(" +---------------------------------------------------+ \n"); + printf(" \n"); + printf("--------------------------------------------------------------------------------\n"); + printf("JOURNAL FIELDS RULES (enforced by systemd-journald)\n"); + printf("\n"); + printf(" - field names can be up to 64 characters\n"); + printf(" - the only allowed field characters are A-Z, 0-9 and underscore\n"); + printf(" - the first character of fields cannot be a digit\n"); + printf(" - protected journal fields start with underscore:\n"); + printf(" * they are accepted by systemd-journal-remote\n"); + printf(" * they are NOT accepted by a local systemd-journald\n"); + printf("\n"); + printf(" For best results, always include these fields:\n"); + printf("\n"); + printf(" MESSAGE=TEXT\n"); + printf(" The MESSAGE is the body of the log entry.\n"); + printf(" This field is what we usually see in our logs.\n"); + printf("\n"); + printf(" PRIORITY=NUMBER\n"); + printf(" PRIORITY sets the severity of the log entry.\n"); + printf(" 0=emerg, 1=alert, 2=crit, 3=err, 4=warn, 5=notice, 6=info, 7=debug\n"); + printf(" - Emergency events (0) are usually broadcast to all terminals.\n"); + printf(" - Emergency, alert, critical, and error (0-3) are usually colored red.\n"); + printf(" - Warning (4) entries are usually colored yellow.\n"); + printf(" - Notice (5) entries are usually bold or have a brighter white color.\n"); + printf(" - Info (6) entries are the default.\n"); + printf(" - Debug (7) entries are usually grayed or dimmed.\n"); + printf("\n"); + printf(" SYSLOG_IDENTIFIER=NAME\n"); + printf(" SYSLOG_IDENTIFIER sets the name of application.\n"); + printf(" Use something descriptive, like: SYSLOG_IDENTIFIER=nginx-logs\n"); + printf("\n"); + printf("You can find the most common fields at 'man systemd.journal-fields'.\n"); + printf("\n"); +} diff --git a/collectors/log2journal/log2journal-inject.c b/collectors/log2journal/log2journal-inject.c new file mode 100644 index 000000000..45158066b --- /dev/null +++ b/collectors/log2journal/log2journal-inject.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "log2journal.h" + +void injection_cleanup(INJECTION *inj) { + hashed_key_cleanup(&inj->key); + replace_pattern_cleanup(&inj->value); +} + +static inline bool log_job_injection_replace(INJECTION *inj, const char *key, size_t key_len, const char *value, size_t value_len) { + if(key_len > JOURNAL_MAX_KEY_LEN) + log2stderr("WARNING: injection key '%.*s' is too long for journal. Will be truncated.", (int)key_len, key); + + if(value_len > JOURNAL_MAX_VALUE_LEN) + log2stderr("WARNING: injection value of key '%.*s' is too long for journal. Will be truncated.", (int)key_len, key); + + hashed_key_len_set(&inj->key, key, key_len); + char *v = strndupz(value, value_len); + bool ret = replace_pattern_set(&inj->value, v); + freez(v); + + return ret; +} + +bool log_job_injection_add(LOG_JOB *jb, const char *key, size_t key_len, const char *value, size_t value_len, bool unmatched) { + if (unmatched) { + if (jb->unmatched.injections.used >= MAX_INJECTIONS) { + log2stderr("Error: too many unmatched injections. You can inject up to %d lines.", MAX_INJECTIONS); + return false; + } + } + else { + if (jb->injections.used >= MAX_INJECTIONS) { + log2stderr("Error: too many injections. You can inject up to %d lines.", MAX_INJECTIONS); + return false; + } + } + + bool ret; + if (unmatched) { + ret = log_job_injection_replace(&jb->unmatched.injections.keys[jb->unmatched.injections.used++], + key, key_len, value, value_len); + } else { + ret = log_job_injection_replace(&jb->injections.keys[jb->injections.used++], + key, key_len, value, value_len); + } + + return ret; +} diff --git a/collectors/log2journal/log2journal-json.c b/collectors/log2journal/log2journal-json.c new file mode 100644 index 000000000..2ca294e4d --- /dev/null +++ b/collectors/log2journal/log2journal-json.c @@ -0,0 +1,630 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "log2journal.h" + +#define JSON_ERROR_LINE_MAX 1024 +#define JSON_KEY_MAX 1024 +#define JSON_DEPTH_MAX 100 + +struct log_json_state { + LOG_JOB *jb; + + const char *line; + uint32_t pos; + uint32_t depth; + char *stack[JSON_DEPTH_MAX]; + + char key[JSON_KEY_MAX]; + char msg[JSON_ERROR_LINE_MAX]; +}; + +static inline bool json_parse_object(LOG_JSON_STATE *js); +static inline bool json_parse_array(LOG_JSON_STATE *js); + +#define json_current_pos(js) &(js)->line[(js)->pos] +#define json_consume_char(js) ++(js)->pos + +static inline void json_process_key_value(LOG_JSON_STATE *js, const char *value, size_t len) { + log_job_send_extracted_key_value(js->jb, js->key, value, len); +} + +static inline void json_skip_spaces(LOG_JSON_STATE *js) { + const char *s = json_current_pos(js); + const char *start = s; + + while(isspace(*s)) s++; + + js->pos += s - start; +} + +static inline bool json_expect_char_after_white_space(LOG_JSON_STATE *js, const char *expected) { + json_skip_spaces(js); + + const char *s = json_current_pos(js); + for(const char *e = expected; *e ;e++) { + if (*s == *e) + return true; + } + + snprintf(js->msg, sizeof(js->msg), + "JSON PARSER: character '%c' is not one of the expected characters (%s), at pos %zu", + *s ? *s : '?', expected, js->pos); + + return false; +} + +static inline bool json_parse_null(LOG_JSON_STATE *js) { + const char *s = json_current_pos(js); + if (strncmp(s, "null", 4) == 0) { + json_process_key_value(js, "null", 4); + js->pos += 4; + return true; + } + else { + snprintf(js->msg, sizeof(js->msg), + "JSON PARSER: expected 'null', found '%.4s' at position %zu", s, js->pos); + return false; + } +} + +static inline bool json_parse_true(LOG_JSON_STATE *js) { + const char *s = json_current_pos(js); + if (strncmp(s, "true", 4) == 0) { + json_process_key_value(js, "true", 4); + js->pos += 4; + return true; + } + else { + snprintf(js->msg, sizeof(js->msg), + "JSON PARSER: expected 'true', found '%.4s' at position %zu", s, js->pos); + return false; + } +} + +static inline bool json_parse_false(LOG_JSON_STATE *js) { + const char *s = json_current_pos(js); + if (strncmp(s, "false", 5) == 0) { + json_process_key_value(js, "false", 5); + js->pos += 5; + return true; + } + else { + snprintf(js->msg, sizeof(js->msg), + "JSON PARSER: expected 'false', found '%.4s' at position %zu", s, js->pos); + return false; + } +} + +static inline bool json_parse_number(LOG_JSON_STATE *js) { + static __thread char value[8192]; + + value[0] = '\0'; + char *d = value; + const char *s = json_current_pos(js); + size_t remaining = sizeof(value) - 1; // Reserve space for null terminator + + // Optional minus sign + if (*s == '-') { + *d++ = *s++; + remaining--; + } + + // Digits before decimal point + while (*s >= '0' && *s <= '9') { + if (remaining < 2) { + snprintf(js->msg, sizeof(js->msg), "JSON PARSER: truncated number value at pos %zu", js->pos); + return false; + } + *d++ = *s++; + remaining--; + } + + // Decimal point and fractional part + if (*s == '.') { + *d++ = *s++; + remaining--; + + while (*s >= '0' && *s <= '9') { + if (remaining < 2) { + snprintf(js->msg, sizeof(js->msg), "JSON PARSER: truncated fractional part at pos %zu", js->pos); + return false; + } + *d++ = *s++; + remaining--; + } + } + + // Exponent part + if (*s == 'e' || *s == 'E') { + *d++ = *s++; + remaining--; + + // Optional sign in exponent + if (*s == '+' || *s == '-') { + *d++ = *s++; + remaining--; + } + + while (*s >= '0' && *s <= '9') { + if (remaining < 2) { + snprintf(js->msg, sizeof(js->msg), "JSON PARSER: truncated exponent at pos %zu", js->pos); + return false; + } + *d++ = *s++; + remaining--; + } + } + + *d = '\0'; + js->pos += d - value; + + if (d > value) { + json_process_key_value(js, value, d - value); + return true; + } else { + snprintf(js->msg, sizeof(js->msg), "JSON PARSER: invalid number format at pos %zu", js->pos); + return false; + } +} + +static inline bool encode_utf8(unsigned codepoint, char **d, size_t *remaining) { + if (codepoint <= 0x7F) { + // 1-byte sequence + if (*remaining < 2) return false; // +1 for the null + *(*d)++ = (char)codepoint; + (*remaining)--; + } + else if (codepoint <= 0x7FF) { + // 2-byte sequence + if (*remaining < 3) return false; // +1 for the null + *(*d)++ = (char)(0xC0 | ((codepoint >> 6) & 0x1F)); + *(*d)++ = (char)(0x80 | (codepoint & 0x3F)); + (*remaining) -= 2; + } + else if (codepoint <= 0xFFFF) { + // 3-byte sequence + if (*remaining < 4) return false; // +1 for the null + *(*d)++ = (char)(0xE0 | ((codepoint >> 12) & 0x0F)); + *(*d)++ = (char)(0x80 | ((codepoint >> 6) & 0x3F)); + *(*d)++ = (char)(0x80 | (codepoint & 0x3F)); + (*remaining) -= 3; + } + else if (codepoint <= 0x10FFFF) { + // 4-byte sequence + if (*remaining < 5) return false; // +1 for the null + *(*d)++ = (char)(0xF0 | ((codepoint >> 18) & 0x07)); + *(*d)++ = (char)(0x80 | ((codepoint >> 12) & 0x3F)); + *(*d)++ = (char)(0x80 | ((codepoint >> 6) & 0x3F)); + *(*d)++ = (char)(0x80 | (codepoint & 0x3F)); + (*remaining) -= 4; + } + else + // Invalid code point + return false; + + return true; +} + +size_t parse_surrogate(const char *s, char *d, size_t *remaining) { + if (s[0] != '\\' || (s[1] != 'u' && s[1] != 'U')) { + return 0; // Not a valid Unicode escape sequence + } + + char hex[9] = {0}; // Buffer for the hexadecimal value + unsigned codepoint; + + if (s[1] == 'u') { + // Handle \uXXXX + if (!isxdigit(s[2]) || !isxdigit(s[3]) || !isxdigit(s[4]) || !isxdigit(s[5])) { + return 0; // Not a valid \uXXXX sequence + } + + hex[0] = s[2]; + hex[1] = s[3]; + hex[2] = s[4]; + hex[3] = s[5]; + codepoint = (unsigned)strtoul(hex, NULL, 16); + + if (codepoint >= 0xD800 && codepoint <= 0xDBFF) { + // Possible start of surrogate pair + if (s[6] == '\\' && s[7] == 'u' && isxdigit(s[8]) && isxdigit(s[9]) && + isxdigit(s[10]) && isxdigit(s[11])) { + // Valid low surrogate + unsigned low_surrogate = strtoul(&s[8], NULL, 16); + if (low_surrogate < 0xDC00 || low_surrogate > 0xDFFF) { + return 0; // Invalid low surrogate + } + codepoint = 0x10000 + ((codepoint - 0xD800) << 10) + (low_surrogate - 0xDC00); + return encode_utf8(codepoint, &d, remaining) ? 12 : 0; // \uXXXX\uXXXX + } + } + + // Single \uXXXX + return encode_utf8(codepoint, &d, remaining) ? 6 : 0; + } + else { + // Handle \UXXXXXXXX + for (int i = 2; i < 10; i++) { + if (!isxdigit(s[i])) { + return 0; // Not a valid \UXXXXXXXX sequence + } + hex[i - 2] = s[i]; + } + codepoint = (unsigned)strtoul(hex, NULL, 16); + return encode_utf8(codepoint, &d, remaining) ? 10 : 0; // \UXXXXXXXX + } +} + +static inline void copy_newline(LOG_JSON_STATE *js __maybe_unused, char **d, size_t *remaining) { + if(*remaining > 3) { + *(*d)++ = '\\'; + *(*d)++ = 'n'; + (*remaining) -= 2; + } +} + +static inline void copy_tab(LOG_JSON_STATE *js __maybe_unused, char **d, size_t *remaining) { + if(*remaining > 3) { + *(*d)++ = '\\'; + *(*d)++ = 't'; + (*remaining) -= 2; + } +} + +static inline bool json_parse_string(LOG_JSON_STATE *js) { + static __thread char value[JOURNAL_MAX_VALUE_LEN]; + + if(!json_expect_char_after_white_space(js, "\"")) + return false; + + json_consume_char(js); + + value[0] = '\0'; + char *d = value; + const char *s = json_current_pos(js); + size_t remaining = sizeof(value); + + while (*s && *s != '"') { + char c; + + if (*s == '\\') { + s++; + + switch (*s) { + case 'n': + copy_newline(js, &d, &remaining); + s++; + continue; + + case 't': + copy_tab(js, &d, &remaining); + s++; + continue; + + case 'f': + case 'b': + case 'r': + c = ' '; + s++; + break; + + case 'u': { + size_t old_remaining = remaining; + size_t consumed = parse_surrogate(s - 1, d, &remaining); + if (consumed > 0) { + s += consumed - 1; // -1 because we already incremented s after '\\' + d += old_remaining - remaining; + continue; + } + else { + *d++ = '\\'; + remaining--; + c = *s++; + } + } + break; + + default: + c = *s++; + break; + } + } + else + c = *s++; + + if(remaining < 2) { + snprintf(js->msg, sizeof(js->msg), + "JSON PARSER: truncated string value at pos %zu", js->pos); + return false; + } + else { + *d++ = c; + remaining--; + } + } + *d = '\0'; + js->pos += s - json_current_pos(js); + + if(!json_expect_char_after_white_space(js, "\"")) + return false; + + json_consume_char(js); + + if(d > value) + json_process_key_value(js, value, d - value); + + return true; +} + +static inline bool json_parse_key_and_push(LOG_JSON_STATE *js) { + if (!json_expect_char_after_white_space(js, "\"")) + return false; + + if(js->depth >= JSON_DEPTH_MAX - 1) { + snprintf(js->msg, sizeof(js->msg), + "JSON PARSER: object too deep, at pos %zu", js->pos); + return false; + } + + json_consume_char(js); + + char *d = js->stack[js->depth]; + if(js->depth) + *d++ = '_'; + + size_t remaining = sizeof(js->key) - (d - js->key); + + const char *s = json_current_pos(js); + char last_c = '\0'; + while(*s && *s != '\"') { + char c; + + if (*s == '\\') { + s++; + c = (char)((*s == 'u') ? '_' : journal_key_characters_map[(unsigned char)*s]); + s += (*s == 'u') ? 5 : 1; + } + else + c = journal_key_characters_map[(unsigned char)*s++]; + + if(c == '_' && last_c == '_') + continue; + else { + if(remaining < 2) { + snprintf(js->msg, sizeof(js->msg), + "JSON PARSER: key buffer full - keys are too long, at pos %zu", js->pos); + return false; + } + *d++ = c; + remaining--; + } + + last_c = c; + } + *d = '\0'; + js->pos += s - json_current_pos(js); + + if (!json_expect_char_after_white_space(js, "\"")) + return false; + + json_consume_char(js); + + js->stack[++js->depth] = d; + + return true; +} + +static inline bool json_key_pop(LOG_JSON_STATE *js) { + if(js->depth <= 0) { + snprintf(js->msg, sizeof(js->msg), + "JSON PARSER: cannot pop a key at depth %zu, at pos %zu", js->depth, js->pos); + return false; + } + + char *k = js->stack[js->depth--]; + *k = '\0'; + return true; +} + +static inline bool json_parse_value(LOG_JSON_STATE *js) { + if(!json_expect_char_after_white_space(js, "-.0123456789tfn\"{[")) + return false; + + const char *s = json_current_pos(js); + switch(*s) { + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return json_parse_number(js); + + case 't': + return json_parse_true(js); + + case 'f': + return json_parse_false(js); + + case 'n': + return json_parse_null(js); + + case '"': + return json_parse_string(js); + + case '{': + return json_parse_object(js); + + case '[': + return json_parse_array(js); + } + + snprintf(js->msg, sizeof(js->msg), + "JSON PARSER: unexpected character at pos %zu", js->pos); + return false; +} + +static inline bool json_key_index_and_push(LOG_JSON_STATE *js, size_t index) { + char *d = js->stack[js->depth]; + if(js->depth > 0) { + *d++ = '_'; + } + + // Convert index to string manually + char temp[32]; + char *t = temp + sizeof(temp) - 1; // Start at the end of the buffer + *t = '\0'; + + do { + *--t = (char)((index % 10) + '0'); + index /= 10; + } while (index > 0); + + size_t remaining = sizeof(js->key) - (d - js->key); + + // Append the index to the key + while (*t) { + if(remaining < 2) { + snprintf(js->msg, sizeof(js->msg), + "JSON PARSER: key buffer full - keys are too long, at pos %zu", js->pos); + return false; + } + + *d++ = *t++; + remaining--; + } + + *d = '\0'; // Null-terminate the key + js->stack[++js->depth] = d; + + return true; +} + +static inline bool json_parse_array(LOG_JSON_STATE *js) { + if(!json_expect_char_after_white_space(js, "[")) + return false; + + json_consume_char(js); + + size_t index = 0; + do { + if(!json_key_index_and_push(js, index)) + return false; + + if(!json_parse_value(js)) + return false; + + json_key_pop(js); + + if(!json_expect_char_after_white_space(js, ",]")) + return false; + + const char *s = json_current_pos(js); + json_consume_char(js); + if(*s == ',') { + index++; + continue; + } + else // } + break; + + } while(true); + + return true; +} + +static inline bool json_parse_object(LOG_JSON_STATE *js) { + if(!json_expect_char_after_white_space(js, "{")) + return false; + + json_consume_char(js); + + do { + if (!json_expect_char_after_white_space(js, "\"")) + return false; + + if(!json_parse_key_and_push(js)) + return false; + + if(!json_expect_char_after_white_space(js, ":")) + return false; + + json_consume_char(js); + + if(!json_parse_value(js)) + return false; + + json_key_pop(js); + + if(!json_expect_char_after_white_space(js, ",}")) + return false; + + const char *s = json_current_pos(js); + json_consume_char(js); + if(*s == ',') + continue; + else // } + break; + + } while(true); + + return true; +} + +LOG_JSON_STATE *json_parser_create(LOG_JOB *jb) { + LOG_JSON_STATE *js = mallocz(sizeof(LOG_JSON_STATE)); + memset(js, 0, sizeof(LOG_JSON_STATE)); + js->jb = jb; + + if(jb->prefix) + copy_to_buffer(js->key, sizeof(js->key), js->jb->prefix, strlen(js->jb->prefix)); + + js->stack[0] = &js->key[strlen(js->key)]; + + return js; +} + +void json_parser_destroy(LOG_JSON_STATE *js) { + if(js) + freez(js); +} + +const char *json_parser_error(LOG_JSON_STATE *js) { + return js->msg; +} + +bool json_parse_document(LOG_JSON_STATE *js, const char *txt) { + js->line = txt; + js->pos = 0; + js->msg[0] = '\0'; + js->stack[0][0] = '\0'; + js->depth = 0; + + if(!json_parse_object(js)) + return false; + + json_skip_spaces(js); + const char *s = json_current_pos(js); + + if(*s) { + snprintf(js->msg, sizeof(js->msg), + "JSON PARSER: excess characters found after document is finished, at pos %zu", js->pos); + return false; + } + + return true; +} + +void json_test(void) { + LOG_JOB jb = { .prefix = "NIGNX_" }; + LOG_JSON_STATE *json = json_parser_create(&jb); + + json_parse_document(json, "{\"value\":\"\\u\\u039A\\u03B1\\u03BB\\u03B7\\u03BC\\u03AD\\u03C1\\u03B1\"}"); + + json_parser_destroy(json); +} diff --git a/collectors/log2journal/log2journal-logfmt.c b/collectors/log2journal/log2journal-logfmt.c new file mode 100644 index 000000000..5966cce90 --- /dev/null +++ b/collectors/log2journal/log2journal-logfmt.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "log2journal.h" + +#define LOGFMT_ERROR_LINE_MAX 1024 +#define LOGFMT_KEY_MAX 1024 + +struct logfmt_state { + LOG_JOB *jb; + + const char *line; + uint32_t pos; + uint32_t key_start; + + char key[LOGFMT_KEY_MAX]; + char msg[LOGFMT_ERROR_LINE_MAX]; +}; + +#define logfmt_current_pos(lfs) &(lfs)->line[(lfs)->pos] +#define logfmt_consume_char(lfs) ++(lfs)->pos + +static inline void logfmt_process_key_value(LOGFMT_STATE *lfs, const char *value, size_t len) { + log_job_send_extracted_key_value(lfs->jb, lfs->key, value, len); +} + +static inline void logfmt_skip_spaces(LOGFMT_STATE *lfs) { + const char *s = logfmt_current_pos(lfs); + const char *start = s; + + while(isspace(*s)) s++; + + lfs->pos += s - start; +} + +static inline void copy_newline(LOGFMT_STATE *lfs __maybe_unused, char **d, size_t *remaining) { + if(*remaining > 3) { + *(*d)++ = '\\'; + *(*d)++ = 'n'; + (*remaining) -= 2; + } +} + +static inline void copy_tab(LOGFMT_STATE *lfs __maybe_unused, char **d, size_t *remaining) { + if(*remaining > 3) { + *(*d)++ = '\\'; + *(*d)++ = 't'; + (*remaining) -= 2; + } +} + +static inline bool logftm_parse_value(LOGFMT_STATE *lfs) { + static __thread char value[JOURNAL_MAX_VALUE_LEN]; + + char quote = '\0'; + const char *s = logfmt_current_pos(lfs); + if(*s == '\"' || *s == '\'') { + quote = *s; + logfmt_consume_char(lfs); + } + + value[0] = '\0'; + char *d = value; + s = logfmt_current_pos(lfs); + size_t remaining = sizeof(value); + + char end_char = (char)(quote == '\0' ? ' ' : quote); + while (*s && *s != end_char) { + char c; + + if (*s == '\\') { + s++; + + switch (*s) { + case 'n': + copy_newline(lfs, &d, &remaining); + s++; + continue; + + case 't': + copy_tab(lfs, &d, &remaining); + s++; + continue; + + case 'f': + case 'b': + case 'r': + c = ' '; + s++; + break; + + default: + c = *s++; + break; + } + } + else + c = *s++; + + if(remaining < 2) { + snprintf(lfs->msg, sizeof(lfs->msg), + "LOGFMT PARSER: truncated string value at pos %zu", lfs->pos); + return false; + } + else { + *d++ = c; + remaining--; + } + } + *d = '\0'; + lfs->pos += s - logfmt_current_pos(lfs); + + s = logfmt_current_pos(lfs); + + if(quote != '\0') { + if (*s != quote) { + snprintf(lfs->msg, sizeof(lfs->msg), + "LOGFMT PARSER: missing quote at pos %zu: '%s'", + lfs->pos, s); + return false; + } + else + logfmt_consume_char(lfs); + } + + if(d > value) + logfmt_process_key_value(lfs, value, d - value); + + return true; +} + +static inline bool logfmt_parse_key(LOGFMT_STATE *lfs) { + logfmt_skip_spaces(lfs); + + char *d = &lfs->key[lfs->key_start]; + + size_t remaining = sizeof(lfs->key) - (d - lfs->key); + + const char *s = logfmt_current_pos(lfs); + char last_c = '\0'; + while(*s && *s != '=') { + char c; + + if (*s == '\\') + s++; + + c = journal_key_characters_map[(unsigned char)*s++]; + + if(c == '_' && last_c == '_') + continue; + else { + if(remaining < 2) { + snprintf(lfs->msg, sizeof(lfs->msg), + "LOGFMT PARSER: key buffer full - keys are too long, at pos %zu", lfs->pos); + return false; + } + *d++ = c; + remaining--; + } + + last_c = c; + } + *d = '\0'; + lfs->pos += s - logfmt_current_pos(lfs); + + s = logfmt_current_pos(lfs); + if(*s != '=') { + snprintf(lfs->msg, sizeof(lfs->msg), + "LOGFMT PARSER: key is missing the equal sign, at pos %zu", lfs->pos); + return false; + } + + logfmt_consume_char(lfs); + + return true; +} + +LOGFMT_STATE *logfmt_parser_create(LOG_JOB *jb) { + LOGFMT_STATE *lfs = mallocz(sizeof(LOGFMT_STATE)); + memset(lfs, 0, sizeof(LOGFMT_STATE)); + lfs->jb = jb; + + if(jb->prefix) + lfs->key_start = copy_to_buffer(lfs->key, sizeof(lfs->key), lfs->jb->prefix, strlen(lfs->jb->prefix)); + + return lfs; +} + +void logfmt_parser_destroy(LOGFMT_STATE *lfs) { + if(lfs) + freez(lfs); +} + +const char *logfmt_parser_error(LOGFMT_STATE *lfs) { + return lfs->msg; +} + +bool logfmt_parse_document(LOGFMT_STATE *lfs, const char *txt) { + lfs->line = txt; + lfs->pos = 0; + lfs->msg[0] = '\0'; + + const char *s; + do { + if(!logfmt_parse_key(lfs)) + return false; + + if(!logftm_parse_value(lfs)) + return false; + + logfmt_skip_spaces(lfs); + + s = logfmt_current_pos(lfs); + } while(*s); + + return true; +} + + +void logfmt_test(void) { + LOG_JOB jb = { .prefix = "NIGNX_" }; + LOGFMT_STATE *logfmt = logfmt_parser_create(&jb); + + logfmt_parse_document(logfmt, "x=1 y=2 z=\"3 \\ 4\" 5 "); + + logfmt_parser_destroy(logfmt); +} diff --git a/collectors/log2journal/log2journal-params.c b/collectors/log2journal/log2journal-params.c new file mode 100644 index 000000000..a7bb3e263 --- /dev/null +++ b/collectors/log2journal/log2journal-params.c @@ -0,0 +1,404 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "log2journal.h" + +// ---------------------------------------------------------------------------- + +void log_job_init(LOG_JOB *jb) { + memset(jb, 0, sizeof(*jb)); + simple_hashtable_init_KEY(&jb->hashtable, 32); + hashed_key_set(&jb->line.key, "LINE"); +} + +static void simple_hashtable_cleanup_allocated_keys(SIMPLE_HASHTABLE_KEY *ht) { + SIMPLE_HASHTABLE_FOREACH_READ_ONLY(ht, sl, _KEY) { + HASHED_KEY *k = SIMPLE_HASHTABLE_FOREACH_READ_ONLY_VALUE(sl); + if(k && k->flags & HK_HASHTABLE_ALLOCATED) { + // the order of these statements is important! + simple_hashtable_del_slot_KEY(ht, sl); // remove any references to n + hashed_key_cleanup(k); // cleanup the internals of n + freez(k); // free n + } + } +} + +void log_job_cleanup(LOG_JOB *jb) { + hashed_key_cleanup(&jb->line.key); + + if(jb->prefix) { + freez((void *) jb->prefix); + jb->prefix = NULL; + } + + if(jb->pattern) { + freez((void *) jb->pattern); + jb->pattern = NULL; + } + + for(size_t i = 0; i < jb->injections.used ;i++) + injection_cleanup(&jb->injections.keys[i]); + + for(size_t i = 0; i < jb->unmatched.injections.used ;i++) + injection_cleanup(&jb->unmatched.injections.keys[i]); + + for(size_t i = 0; i < jb->renames.used ;i++) + rename_cleanup(&jb->renames.array[i]); + + for(size_t i = 0; i < jb->rewrites.used; i++) + rewrite_cleanup(&jb->rewrites.array[i]); + + txt_cleanup(&jb->rewrites.tmp); + txt_cleanup(&jb->filename.current); + + simple_hashtable_cleanup_allocated_keys(&jb->hashtable); + simple_hashtable_destroy_KEY(&jb->hashtable); + + // remove references to everything else, to reveal them in valgrind + memset(jb, 0, sizeof(*jb)); +} + +// ---------------------------------------------------------------------------- + +bool log_job_filename_key_set(LOG_JOB *jb, const char *key, size_t key_len) { + if(!key || !*key) { + log2stderr("filename key cannot be empty."); + return false; + } + + hashed_key_len_set(&jb->filename.key, key, key_len); + + return true; +} + +bool log_job_key_prefix_set(LOG_JOB *jb, const char *prefix, size_t prefix_len) { + if(!prefix || !*prefix) { + log2stderr("filename key cannot be empty."); + return false; + } + + if(jb->prefix) + freez((char*)jb->prefix); + + jb->prefix = strndupz(prefix, prefix_len); + + return true; +} + +bool log_job_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len) { + if(!pattern || !*pattern) { + log2stderr("filename key cannot be empty."); + return false; + } + + if(jb->pattern) + freez((char*)jb->pattern); + + jb->pattern = strndupz(pattern, pattern_len); + + return true; +} + +bool log_job_include_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len) { + if(jb->filter.include.re) { + log2stderr("FILTER INCLUDE: there is already an include filter set"); + return false; + } + + if(!search_pattern_set(&jb->filter.include, pattern, pattern_len)) { + log2stderr("FILTER INCLUDE: failed: %s", jb->filter.include.error.txt); + return false; + } + + return true; +} + +bool log_job_exclude_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len) { + if(jb->filter.exclude.re) { + log2stderr("FILTER INCLUDE: there is already an exclude filter set"); + return false; + } + + if(!search_pattern_set(&jb->filter.exclude, pattern, pattern_len)) { + log2stderr("FILTER EXCLUDE: failed: %s", jb->filter.exclude.error.txt); + return false; + } + + return true; +} + +// ---------------------------------------------------------------------------- + +static bool parse_rename(LOG_JOB *jb, const char *param) { + // Search for '=' in param + const char *equal_sign = strchr(param, '='); + if (!equal_sign || equal_sign == param) { + log2stderr("Error: Invalid rename format, '=' not found in %s", param); + return false; + } + + const char *new_key = param; + size_t new_key_len = equal_sign - new_key; + + const char *old_key = equal_sign + 1; + size_t old_key_len = strlen(old_key); + + return log_job_rename_add(jb, new_key, new_key_len, old_key, old_key_len); +} + +static bool is_symbol(char c) { + return !isalpha(c) && !isdigit(c) && !iscntrl(c); +} + +struct { + const char *keyword; + int action; + RW_FLAGS flag; +} rewrite_flags[] = { + {"match", 1, RW_MATCH_PCRE2}, + {"match", 0, RW_MATCH_NON_EMPTY}, + + {"regex", 1, RW_MATCH_PCRE2}, + {"regex", 0, RW_MATCH_NON_EMPTY}, + + {"pcre2", 1, RW_MATCH_PCRE2}, + {"pcre2", 0, RW_MATCH_NON_EMPTY}, + + {"non_empty", 1, RW_MATCH_NON_EMPTY}, + {"non_empty", 0, RW_MATCH_PCRE2}, + + {"non-empty", 1, RW_MATCH_NON_EMPTY}, + {"non-empty", 0, RW_MATCH_PCRE2}, + + {"not_empty", 1, RW_MATCH_NON_EMPTY}, + {"not_empty", 0, RW_MATCH_PCRE2}, + + {"not-empty", 1, RW_MATCH_NON_EMPTY}, + {"not-empty", 0, RW_MATCH_PCRE2}, + + {"stop", 0, RW_DONT_STOP}, + {"no-stop", 1, RW_DONT_STOP}, + {"no_stop", 1, RW_DONT_STOP}, + {"dont-stop", 1, RW_DONT_STOP}, + {"dont_stop", 1, RW_DONT_STOP}, + {"continue", 1, RW_DONT_STOP}, + {"inject", 1, RW_INJECT}, + {"existing", 0, RW_INJECT}, +}; + +RW_FLAGS parse_rewrite_flags(const char *options) { + RW_FLAGS flags = RW_MATCH_PCRE2; // Default option + + // Tokenize the input options using "," + char *token; + char *optionsCopy = strdup(options); // Make a copy to avoid modifying the original + token = strtok(optionsCopy, ","); + + while (token != NULL) { + // Find the keyword-action mapping + bool found = false; + + for (size_t i = 0; i < sizeof(rewrite_flags) / sizeof(rewrite_flags[0]); i++) { + if (strcmp(token, rewrite_flags[i].keyword) == 0) { + if (rewrite_flags[i].action == 1) { + flags |= rewrite_flags[i].flag; // Set the flag + } else { + flags &= ~rewrite_flags[i].flag; // Unset the flag + } + + found = true; + } + } + + if(!found) + log2stderr("Warning: rewrite options '%s' is not understood.", token); + + // Get the next token + token = strtok(NULL, ","); + } + + free(optionsCopy); // Free the copied string + + return flags; +} + + +static bool parse_rewrite(LOG_JOB *jb, const char *param) { + // Search for '=' in param + const char *equal_sign = strchr(param, '='); + if (!equal_sign || equal_sign == param) { + log2stderr("Error: Invalid rewrite format, '=' not found in %s", param); + return false; + } + + // Get the next character as the separator + char separator = *(equal_sign + 1); + if (!separator || !is_symbol(separator)) { + log2stderr("Error: rewrite separator not found after '=', or is not one of /\\|-# in: %s", param); + return false; + } + + // Find the next occurrence of the separator + const char *second_separator = strchr(equal_sign + 2, separator); + if (!second_separator) { + log2stderr("Error: rewrite second separator not found in: %s", param); + return false; + } + + // Check if the search pattern is empty + if (equal_sign + 1 == second_separator) { + log2stderr("Error: rewrite search pattern is empty in: %s", param); + return false; + } + + // Check if the replacement pattern is empty + if (*(second_separator + 1) == '\0') { + log2stderr("Error: rewrite replacement pattern is empty in: %s", param); + return false; + } + + RW_FLAGS flags = RW_MATCH_PCRE2; + const char *third_separator = strchr(second_separator + 1, separator); + if(third_separator) + flags = parse_rewrite_flags(third_separator + 1); + + // Extract key, search pattern, and replacement pattern + char *key = strndupz(param, equal_sign - param); + char *search_pattern = strndupz(equal_sign + 2, second_separator - (equal_sign + 2)); + char *replace_pattern = third_separator ? strndup(second_separator + 1, third_separator - (second_separator + 1)) : strdupz(second_separator + 1); + + if(!*search_pattern) + flags &= ~RW_MATCH_PCRE2; + + bool ret = log_job_rewrite_add(jb, key, flags, search_pattern, replace_pattern); + + freez(key); + freez(search_pattern); + freez(replace_pattern); + + return ret; +} + +static bool parse_inject(LOG_JOB *jb, const char *value, bool unmatched) { + const char *equal = strchr(value, '='); + if (!equal) { + log2stderr("Error: injection '%s' does not have an equal sign.", value); + return false; + } + + const char *key = value; + const char *val = equal + 1; + log_job_injection_add(jb, key, equal - key, val, strlen(val), unmatched); + + return true; +} + +bool log_job_command_line_parse_parameters(LOG_JOB *jb, int argc, char **argv) { + for (int i = 1; i < argc; i++) { + char *arg = argv[i]; + if (strcmp(arg, "--help") == 0 || strcmp(arg, "-h") == 0) { + log_job_command_line_help(argv[0]); + exit(0); + } +#if defined(NETDATA_DEV_MODE) || defined(NETDATA_INTERNAL_CHECKS) + else if(strcmp(arg, "--test") == 0) { + // logfmt_test(); + json_test(); + exit(1); + } +#endif + else if (strcmp(arg, "--show-config") == 0) { + jb->show_config = true; + } + else { + char buffer[1024]; + char *param = NULL; + char *value = NULL; + + char *equal_sign = strchr(arg, '='); + if (equal_sign) { + copy_to_buffer(buffer, sizeof(buffer), arg, equal_sign - arg); + param = buffer; + value = equal_sign + 1; + } + else { + param = arg; + if (i + 1 < argc) { + value = argv[++i]; + } + else { + if (!jb->pattern) { + log_job_pattern_set(jb, arg, strlen(arg)); + continue; + } else { + log2stderr("Error: Multiple patterns detected. Specify only one pattern. The first is '%s', the second is '%s'", jb->pattern, arg); + return false; + } + } + } + + if (strcmp(param, "--filename-key") == 0) { + if(!log_job_filename_key_set(jb, value, value ? strlen(value) : 0)) + return false; + } + else if (strcmp(param, "--prefix") == 0) { + if(!log_job_key_prefix_set(jb, value, value ? strlen(value) : 0)) + return false; + } +#ifdef HAVE_LIBYAML + else if (strcmp(param, "-f") == 0 || strcmp(param, "--file") == 0) { + if (!yaml_parse_file(value, jb)) + return false; + } + else if (strcmp(param, "-c") == 0 || strcmp(param, "--config") == 0) { + if (!yaml_parse_config(value, jb)) + return false; + } +#endif + else if (strcmp(param, "--unmatched-key") == 0) + hashed_key_set(&jb->unmatched.key, value); + else if (strcmp(param, "--inject") == 0) { + if (!parse_inject(jb, value, false)) + return false; + } + else if (strcmp(param, "--inject-unmatched") == 0) { + if (!parse_inject(jb, value, true)) + return false; + } + else if (strcmp(param, "--rewrite") == 0) { + if (!parse_rewrite(jb, value)) + return false; + } + else if (strcmp(param, "--rename") == 0) { + if (!parse_rename(jb, value)) + return false; + } + else if (strcmp(param, "--include") == 0) { + if (!log_job_include_pattern_set(jb, value, strlen(value))) + return false; + } + else if (strcmp(param, "--exclude") == 0) { + if (!log_job_exclude_pattern_set(jb, value, strlen(value))) + return false; + } + else { + i--; + if (!jb->pattern) { + log_job_pattern_set(jb, arg, strlen(arg)); + continue; + } else { + log2stderr("Error: Multiple patterns detected. Specify only one pattern. The first is '%s', the second is '%s'", jb->pattern, arg); + return false; + } + } + } + } + + // Check if a pattern is set and exactly one pattern is specified + if (!jb->pattern) { + log2stderr("Warning: pattern not specified. Try the default config with: -c default"); + log_job_command_line_help(argv[0]); + return false; + } + + return true; +} diff --git a/collectors/log2journal/log2journal-pattern.c b/collectors/log2journal/log2journal-pattern.c new file mode 100644 index 000000000..4b7e9026b --- /dev/null +++ b/collectors/log2journal/log2journal-pattern.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "log2journal.h" + +void search_pattern_cleanup(SEARCH_PATTERN *sp) { + if(sp->pattern) { + freez((void *)sp->pattern); + sp->pattern = NULL; + } + + if(sp->re) { + pcre2_code_free(sp->re); + sp->re = NULL; + } + + if(sp->match_data) { + pcre2_match_data_free(sp->match_data); + sp->match_data = NULL; + } + + txt_cleanup(&sp->error); +} + +static void pcre2_error_message(SEARCH_PATTERN *sp, int rc, int pos) { + char msg[1024]; + pcre2_get_error_in_buffer(msg, sizeof(msg), rc, pos); + txt_replace(&sp->error, msg, strlen(msg)); +} + +static inline bool compile_pcre2(SEARCH_PATTERN *sp) { + int error_number; + PCRE2_SIZE error_offset; + PCRE2_SPTR pattern_ptr = (PCRE2_SPTR)sp->pattern; + + sp->re = pcre2_compile(pattern_ptr, PCRE2_ZERO_TERMINATED, 0, &error_number, &error_offset, NULL); + if (!sp->re) { + pcre2_error_message(sp, error_number, (int) error_offset); + return false; + } + + return true; +} + +bool search_pattern_set(SEARCH_PATTERN *sp, const char *search_pattern, size_t search_pattern_len) { + search_pattern_cleanup(sp); + + sp->pattern = strndupz(search_pattern, search_pattern_len); + if (!compile_pcre2(sp)) + return false; + + sp->match_data = pcre2_match_data_create_from_pattern(sp->re, NULL); + + return true; +} diff --git a/collectors/log2journal/log2journal-pcre2.c b/collectors/log2journal/log2journal-pcre2.c new file mode 100644 index 000000000..185e69108 --- /dev/null +++ b/collectors/log2journal/log2journal-pcre2.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "log2journal.h" + +#define PCRE2_ERROR_LINE_MAX 1024 +#define PCRE2_KEY_MAX 1024 + +struct pcre2_state { + LOG_JOB *jb; + + const char *line; + uint32_t pos; + uint32_t key_start; + + pcre2_code *re; + pcre2_match_data *match_data; + + char key[PCRE2_KEY_MAX]; + char msg[PCRE2_ERROR_LINE_MAX]; +}; + +static inline void copy_and_convert_key(PCRE2_STATE *pcre2, const char *key) { + char *d = &pcre2->key[pcre2->key_start]; + size_t remaining = sizeof(pcre2->key) - pcre2->key_start; + + while(remaining >= 2 && *key) { + *d = journal_key_characters_map[(unsigned) (*key)]; + remaining--; + key++; + d++; + } + + *d = '\0'; +} + +static inline void jb_traverse_pcre2_named_groups_and_send_keys(PCRE2_STATE *pcre2, pcre2_code *re, pcre2_match_data *match_data, char *line) { + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); + uint32_t names_count; + pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &names_count); + + if (names_count > 0) { + PCRE2_SPTR name_table; + pcre2_pattern_info(re, PCRE2_INFO_NAMETABLE, &name_table); + uint32_t name_entry_size; + pcre2_pattern_info(re, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size); + + const unsigned char *table_ptr = name_table; + for (uint32_t i = 0; i < names_count; i++) { + int n = (table_ptr[0] << 8) | table_ptr[1]; + const char *group_name = (const char *)(table_ptr + 2); + + PCRE2_SIZE start_offset = ovector[2 * n]; + PCRE2_SIZE end_offset = ovector[2 * n + 1]; + PCRE2_SIZE group_length = end_offset - start_offset; + + copy_and_convert_key(pcre2, group_name); + log_job_send_extracted_key_value(pcre2->jb, pcre2->key, line + start_offset, group_length); + + table_ptr += name_entry_size; + } + } +} + +void pcre2_get_error_in_buffer(char *msg, size_t msg_len, int rc, int pos) { + int l; + + if(pos >= 0) + l = snprintf(msg, msg_len, "PCRE2 error %d at pos %d on: ", rc, pos); + else + l = snprintf(msg, msg_len, "PCRE2 error %d on: ", rc); + + pcre2_get_error_message(rc, (PCRE2_UCHAR *)&msg[l], msg_len - l); +} + +static void pcre2_error_message(PCRE2_STATE *pcre2, int rc, int pos) { + pcre2_get_error_in_buffer(pcre2->msg, sizeof(pcre2->msg), rc, pos); +} + +bool pcre2_has_error(PCRE2_STATE *pcre2) { + return !pcre2->re || pcre2->msg[0]; +} + +PCRE2_STATE *pcre2_parser_create(LOG_JOB *jb) { + PCRE2_STATE *pcre2 = mallocz(sizeof(PCRE2_STATE)); + memset(pcre2, 0, sizeof(PCRE2_STATE)); + pcre2->jb = jb; + + if(jb->prefix) + pcre2->key_start = copy_to_buffer(pcre2->key, sizeof(pcre2->key), pcre2->jb->prefix, strlen(pcre2->jb->prefix)); + + int rc; + PCRE2_SIZE pos; + pcre2->re = pcre2_compile((PCRE2_SPTR)jb->pattern, PCRE2_ZERO_TERMINATED, 0, &rc, &pos, NULL); + if (!pcre2->re) { + pcre2_error_message(pcre2, rc, pos); + return pcre2; + } + + pcre2->match_data = pcre2_match_data_create_from_pattern(pcre2->re, NULL); + + return pcre2; +} + +void pcre2_parser_destroy(PCRE2_STATE *pcre2) { + if(pcre2) + freez(pcre2); +} + +const char *pcre2_parser_error(PCRE2_STATE *pcre2) { + return pcre2->msg; +} + +bool pcre2_parse_document(PCRE2_STATE *pcre2, const char *txt, size_t len) { + pcre2->line = txt; + pcre2->pos = 0; + pcre2->msg[0] = '\0'; + + if(!len) + len = strlen(txt); + + int rc = pcre2_match(pcre2->re, (PCRE2_SPTR)pcre2->line, len, 0, 0, pcre2->match_data, NULL); + if(rc < 0) { + pcre2_error_message(pcre2, rc, -1); + return false; + } + + jb_traverse_pcre2_named_groups_and_send_keys(pcre2, pcre2->re, pcre2->match_data, (char *)pcre2->line); + + return true; +} + +void pcre2_test(void) { + LOG_JOB jb = { .prefix = "NIGNX_" }; + PCRE2_STATE *pcre2 = pcre2_parser_create(&jb); + + pcre2_parse_document(pcre2, "{\"value\":\"\\u\\u039A\\u03B1\\u03BB\\u03B7\\u03BC\\u03AD\\u03C1\\u03B1\"}", 0); + + pcre2_parser_destroy(pcre2); +} diff --git a/collectors/log2journal/log2journal-rename.c b/collectors/log2journal/log2journal-rename.c new file mode 100644 index 000000000..c6975779f --- /dev/null +++ b/collectors/log2journal/log2journal-rename.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "log2journal.h" + +void rename_cleanup(RENAME *rn) { + hashed_key_cleanup(&rn->new_key); + hashed_key_cleanup(&rn->old_key); +} + +bool log_job_rename_add(LOG_JOB *jb, const char *new_key, size_t new_key_len, const char *old_key, size_t old_key_len) { + if(jb->renames.used >= MAX_RENAMES) { + log2stderr("Error: too many renames. You can rename up to %d fields.", MAX_RENAMES); + return false; + } + + RENAME *rn = &jb->renames.array[jb->renames.used++]; + hashed_key_len_set(&rn->new_key, new_key, new_key_len); + hashed_key_len_set(&rn->old_key, old_key, old_key_len); + + return true; +} diff --git a/collectors/log2journal/log2journal-replace.c b/collectors/log2journal/log2journal-replace.c new file mode 100644 index 000000000..429d615da --- /dev/null +++ b/collectors/log2journal/log2journal-replace.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "log2journal.h" + +void replace_node_free(REPLACE_NODE *rpn) { + hashed_key_cleanup(&rpn->name); + rpn->next = NULL; + freez(rpn); +} + +void replace_pattern_cleanup(REPLACE_PATTERN *rp) { + if(rp->pattern) { + freez((void *)rp->pattern); + rp->pattern = NULL; + } + + while(rp->nodes) { + REPLACE_NODE *rpn = rp->nodes; + rp->nodes = rpn->next; + replace_node_free(rpn); + } +} + +static REPLACE_NODE *replace_pattern_add_node(REPLACE_NODE **head, bool is_variable, const char *text) { + REPLACE_NODE *new_node = callocz(1, sizeof(REPLACE_NODE)); + if (!new_node) + return NULL; + + hashed_key_set(&new_node->name, text); + new_node->is_variable = is_variable; + new_node->next = NULL; + + if (*head == NULL) + *head = new_node; + + else { + REPLACE_NODE *current = *head; + + // append it + while (current->next != NULL) + current = current->next; + + current->next = new_node; + } + + return new_node; +} + +bool replace_pattern_set(REPLACE_PATTERN *rp, const char *pattern) { + replace_pattern_cleanup(rp); + + rp->pattern = strdupz(pattern); + const char *current = rp->pattern; + + while (*current != '\0') { + if (*current == '$' && *(current + 1) == '{') { + // Start of a variable + const char *end = strchr(current, '}'); + if (!end) { + log2stderr("Error: Missing closing brace in replacement pattern: %s", rp->pattern); + return false; + } + + size_t name_length = end - current - 2; // Length of the variable name + char *variable_name = strndupz(current + 2, name_length); + if (!variable_name) { + log2stderr("Error: Memory allocation failed for variable name."); + return false; + } + + REPLACE_NODE *node = replace_pattern_add_node(&(rp->nodes), true, variable_name); + if (!node) { + freez(variable_name); + log2stderr("Error: Failed to add replacement node for variable."); + return false; + } + + current = end + 1; // Move past the variable + } + else { + // Start of literal text + const char *start = current; + while (*current != '\0' && !(*current == '$' && *(current + 1) == '{')) { + current++; + } + + size_t text_length = current - start; + char *text = strndupz(start, text_length); + if (!text) { + log2stderr("Error: Memory allocation failed for literal text."); + return false; + } + + REPLACE_NODE *node = replace_pattern_add_node(&(rp->nodes), false, text); + if (!node) { + freez(text); + log2stderr("Error: Failed to add replacement node for text."); + return false; + } + } + } + + for(REPLACE_NODE *node = rp->nodes; node; node = node->next) { + if(node->is_variable) { + rp->has_variables = true; + break; + } + } + + return true; +} diff --git a/collectors/log2journal/log2journal-rewrite.c b/collectors/log2journal/log2journal-rewrite.c new file mode 100644 index 000000000..112391bf0 --- /dev/null +++ b/collectors/log2journal/log2journal-rewrite.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "log2journal.h" + +void rewrite_cleanup(REWRITE *rw) { + hashed_key_cleanup(&rw->key); + + if(rw->flags & RW_MATCH_PCRE2) + search_pattern_cleanup(&rw->match_pcre2); + else if(rw->flags & RW_MATCH_NON_EMPTY) + replace_pattern_cleanup(&rw->match_non_empty); + + replace_pattern_cleanup(&rw->value); + rw->flags = RW_NONE; +} + +bool log_job_rewrite_add(LOG_JOB *jb, const char *key, RW_FLAGS flags, const char *search_pattern, const char *replace_pattern) { + if(jb->rewrites.used >= MAX_REWRITES) { + log2stderr("Error: too many rewrites. You can add up to %d rewrite rules.", MAX_REWRITES); + return false; + } + + if((flags & (RW_MATCH_PCRE2|RW_MATCH_NON_EMPTY)) && (!search_pattern || !*search_pattern)) { + log2stderr("Error: rewrite for key '%s' does not specify a search pattern.", key); + return false; + } + + REWRITE *rw = &jb->rewrites.array[jb->rewrites.used++]; + rw->flags = flags; + + hashed_key_set(&rw->key, key); + + if((flags & RW_MATCH_PCRE2) && !search_pattern_set(&rw->match_pcre2, search_pattern, strlen(search_pattern))) { + rewrite_cleanup(rw); + jb->rewrites.used--; + return false; + } + else if((flags & RW_MATCH_NON_EMPTY) && !replace_pattern_set(&rw->match_non_empty, search_pattern)) { + rewrite_cleanup(rw); + jb->rewrites.used--; + return false; + } + + if(replace_pattern && *replace_pattern && !replace_pattern_set(&rw->value, replace_pattern)) { + rewrite_cleanup(rw); + jb->rewrites.used--; + return false; + } + + return true; +} diff --git a/collectors/log2journal/log2journal-yaml.c b/collectors/log2journal/log2journal-yaml.c new file mode 100644 index 000000000..862e7bf4b --- /dev/null +++ b/collectors/log2journal/log2journal-yaml.c @@ -0,0 +1,964 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "log2journal.h" + +// ---------------------------------------------------------------------------- +// yaml configuration file + +#ifdef HAVE_LIBYAML + +static const char *yaml_event_name(yaml_event_type_t type) { + switch (type) { + case YAML_NO_EVENT: + return "YAML_NO_EVENT"; + + case YAML_SCALAR_EVENT: + return "YAML_SCALAR_EVENT"; + + case YAML_ALIAS_EVENT: + return "YAML_ALIAS_EVENT"; + + case YAML_MAPPING_START_EVENT: + return "YAML_MAPPING_START_EVENT"; + + case YAML_MAPPING_END_EVENT: + return "YAML_MAPPING_END_EVENT"; + + case YAML_SEQUENCE_START_EVENT: + return "YAML_SEQUENCE_START_EVENT"; + + case YAML_SEQUENCE_END_EVENT: + return "YAML_SEQUENCE_END_EVENT"; + + case YAML_STREAM_START_EVENT: + return "YAML_STREAM_START_EVENT"; + + case YAML_STREAM_END_EVENT: + return "YAML_STREAM_END_EVENT"; + + case YAML_DOCUMENT_START_EVENT: + return "YAML_DOCUMENT_START_EVENT"; + + case YAML_DOCUMENT_END_EVENT: + return "YAML_DOCUMENT_END_EVENT"; + + default: + return "UNKNOWN"; + } +} + +#define yaml_error(parser, event, fmt, args...) yaml_error_with_trace(parser, event, __LINE__, __FUNCTION__, __FILE__, fmt, ##args) +static void yaml_error_with_trace(yaml_parser_t *parser, yaml_event_t *event, size_t line, const char *function, const char *file, const char *format, ...) __attribute__ ((format(__printf__, 6, 7))); +static void yaml_error_with_trace(yaml_parser_t *parser, yaml_event_t *event, size_t line, const char *function, const char *file, const char *format, ...) { + char buf[1024] = ""; // Initialize buf to an empty string + const char *type = ""; + + if(event) { + type = yaml_event_name(event->type); + + switch (event->type) { + case YAML_SCALAR_EVENT: + copy_to_buffer(buf, sizeof(buf), (char *)event->data.scalar.value, event->data.scalar.length); + break; + + case YAML_ALIAS_EVENT: + snprintf(buf, sizeof(buf), "%s", event->data.alias.anchor); + break; + + default: + break; + } + } + + fprintf(stderr, "YAML %zu@%s, %s(): (line %d, column %d, %s%s%s): ", + line, file, function, + (int)(parser->mark.line + 1), (int)(parser->mark.column + 1), + type, buf[0]? ", near ": "", buf); + + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + fprintf(stderr, "\n"); +} + +#define yaml_parse(parser, event) yaml_parse_with_trace(parser, event, __LINE__, __FUNCTION__, __FILE__) +static bool yaml_parse_with_trace(yaml_parser_t *parser, yaml_event_t *event, size_t line __maybe_unused, const char *function __maybe_unused, const char *file __maybe_unused) { + if (!yaml_parser_parse(parser, event)) { + yaml_error(parser, NULL, "YAML parser error %d", parser->error); + return false; + } + +// fprintf(stderr, ">>> %s >>> %.*s\n", +// yaml_event_name(event->type), +// event->type == YAML_SCALAR_EVENT ? event->data.scalar.length : 0, +// event->type == YAML_SCALAR_EVENT ? (char *)event->data.scalar.value : ""); + + return true; +} + +#define yaml_parse_expect_event(parser, type) yaml_parse_expect_event_with_trace(parser, type, __LINE__, __FUNCTION__, __FILE__) +static bool yaml_parse_expect_event_with_trace(yaml_parser_t *parser, yaml_event_type_t type, size_t line, const char *function, const char *file) { + yaml_event_t event; + if (!yaml_parse(parser, &event)) + return false; + + bool ret = true; + if(event.type != type) { + yaml_error_with_trace(parser, &event, line, function, file, "unexpected event - expecting: %s", yaml_event_name(type)); + ret = false; + } +// else +// fprintf(stderr, "OK (%zu@%s, %s()\n", line, file, function); + + yaml_event_delete(&event); + return ret; +} + +#define yaml_scalar_matches(event, s, len) yaml_scalar_matches_with_trace(event, s, len, __LINE__, __FUNCTION__, __FILE__) +static bool yaml_scalar_matches_with_trace(yaml_event_t *event, const char *s, size_t len, size_t line __maybe_unused, const char *function __maybe_unused, const char *file __maybe_unused) { + if(event->type != YAML_SCALAR_EVENT) + return false; + + if(len != event->data.scalar.length) + return false; +// else +// fprintf(stderr, "OK (%zu@%s, %s()\n", line, file, function); + + return strcmp((char *)event->data.scalar.value, s) == 0; +} + +// ---------------------------------------------------------------------------- + +static size_t yaml_parse_filename_injection(yaml_parser_t *parser, LOG_JOB *jb) { + yaml_event_t event; + size_t errors = 0; + + if(!yaml_parse_expect_event(parser, YAML_MAPPING_START_EVENT)) + return 1; + + if (!yaml_parse(parser, &event)) + return 1; + + if (yaml_scalar_matches(&event, "key", strlen("key"))) { + yaml_event_t sub_event; + if (!yaml_parse(parser, &sub_event)) + errors++; + + else { + if (sub_event.type == YAML_SCALAR_EVENT) { + if(!log_job_filename_key_set(jb, (char *) sub_event.data.scalar.value, + sub_event.data.scalar.length)) + errors++; + } + + else { + yaml_error(parser, &sub_event, "expected the filename as %s", yaml_event_name(YAML_SCALAR_EVENT)); + errors++; + } + + yaml_event_delete(&sub_event); + } + } + + if(!yaml_parse_expect_event(parser, YAML_MAPPING_END_EVENT)) + errors++; + + yaml_event_delete(&event); + return errors; +} + +static size_t yaml_parse_filters(yaml_parser_t *parser, LOG_JOB *jb) { + if(!yaml_parse_expect_event(parser, YAML_MAPPING_START_EVENT)) + return 1; + + size_t errors = 0; + bool finished = false; + + while(!errors && !finished) { + yaml_event_t event; + + if(!yaml_parse(parser, &event)) + return 1; + + if(event.type == YAML_SCALAR_EVENT) { + if(yaml_scalar_matches(&event, "include", strlen("include"))) { + yaml_event_t sub_event; + if(!yaml_parse(parser, &sub_event)) + errors++; + + else { + if(sub_event.type == YAML_SCALAR_EVENT) { + if(!log_job_include_pattern_set(jb, (char *) sub_event.data.scalar.value, + sub_event.data.scalar.length)) + errors++; + } + + else { + yaml_error(parser, &sub_event, "expected the include as %s", + yaml_event_name(YAML_SCALAR_EVENT)); + errors++; + } + + yaml_event_delete(&sub_event); + } + } + else if(yaml_scalar_matches(&event, "exclude", strlen("exclude"))) { + yaml_event_t sub_event; + if(!yaml_parse(parser, &sub_event)) + errors++; + + else { + if(sub_event.type == YAML_SCALAR_EVENT) { + if(!log_job_exclude_pattern_set(jb,(char *) sub_event.data.scalar.value, + sub_event.data.scalar.length)) + errors++; + } + + else { + yaml_error(parser, &sub_event, "expected the exclude as %s", + yaml_event_name(YAML_SCALAR_EVENT)); + errors++; + } + + yaml_event_delete(&sub_event); + } + } + } + else if(event.type == YAML_MAPPING_END_EVENT) + finished = true; + else { + yaml_error(parser, &event, "expected %s or %s", + yaml_event_name(YAML_SCALAR_EVENT), + yaml_event_name(YAML_MAPPING_END_EVENT)); + errors++; + } + + yaml_event_delete(&event); + } + + return errors; +} + +static size_t yaml_parse_prefix(yaml_parser_t *parser, LOG_JOB *jb) { + yaml_event_t event; + size_t errors = 0; + + if (!yaml_parse(parser, &event)) + return 1; + + if (event.type == YAML_SCALAR_EVENT) { + if(!log_job_key_prefix_set(jb, (char *) event.data.scalar.value, event.data.scalar.length)) + errors++; + } + + yaml_event_delete(&event); + return errors; +} + +static bool yaml_parse_constant_field_injection(yaml_parser_t *parser, LOG_JOB *jb, bool unmatched) { + yaml_event_t event; + if (!yaml_parse(parser, &event) || event.type != YAML_SCALAR_EVENT) { + yaml_error(parser, &event, "Expected scalar for constant field injection key"); + yaml_event_delete(&event); + return false; + } + + char *key = strndupz((char *)event.data.scalar.value, event.data.scalar.length); + char *value = NULL; + bool ret = false; + + yaml_event_delete(&event); + + if (!yaml_parse(parser, &event) || event.type != YAML_SCALAR_EVENT) { + yaml_error(parser, &event, "Expected scalar for constant field injection value"); + goto cleanup; + } + + if(!yaml_scalar_matches(&event, "value", strlen("value"))) { + yaml_error(parser, &event, "Expected scalar 'value'"); + goto cleanup; + } + + if (!yaml_parse(parser, &event) || event.type != YAML_SCALAR_EVENT) { + yaml_error(parser, &event, "Expected scalar for constant field injection value"); + goto cleanup; + } + + value = strndupz((char *)event.data.scalar.value, event.data.scalar.length); + + if(!log_job_injection_add(jb, key, strlen(key), value, strlen(value), unmatched)) + ret = false; + else + ret = true; + + ret = true; + +cleanup: + yaml_event_delete(&event); + freez(key); + freez(value); + return !ret ? 1 : 0; +} + +static bool yaml_parse_injection_mapping(yaml_parser_t *parser, LOG_JOB *jb, bool unmatched) { + yaml_event_t event; + size_t errors = 0; + bool finished = false; + + while (!errors && !finished) { + if (!yaml_parse(parser, &event)) { + errors++; + continue; + } + + switch (event.type) { + case YAML_SCALAR_EVENT: + if (yaml_scalar_matches(&event, "key", strlen("key"))) { + errors += yaml_parse_constant_field_injection(parser, jb, unmatched); + } else { + yaml_error(parser, &event, "Unexpected scalar in injection mapping"); + errors++; + } + break; + + case YAML_MAPPING_END_EVENT: + finished = true; + break; + + default: + yaml_error(parser, &event, "Unexpected event in injection mapping"); + errors++; + break; + } + + yaml_event_delete(&event); + } + + return errors == 0; +} + +static size_t yaml_parse_injections(yaml_parser_t *parser, LOG_JOB *jb, bool unmatched) { + yaml_event_t event; + size_t errors = 0; + bool finished = false; + + if (!yaml_parse_expect_event(parser, YAML_SEQUENCE_START_EVENT)) + return 1; + + while (!errors && !finished) { + if (!yaml_parse(parser, &event)) { + errors++; + continue; + } + + switch (event.type) { + case YAML_MAPPING_START_EVENT: + if (!yaml_parse_injection_mapping(parser, jb, unmatched)) + errors++; + break; + + case YAML_SEQUENCE_END_EVENT: + finished = true; + break; + + default: + yaml_error(parser, &event, "Unexpected event in injections sequence"); + errors++; + break; + } + + yaml_event_delete(&event); + } + + return errors; +} + +static size_t yaml_parse_unmatched(yaml_parser_t *parser, LOG_JOB *jb) { + size_t errors = 0; + bool finished = false; + + if (!yaml_parse_expect_event(parser, YAML_MAPPING_START_EVENT)) + return 1; + + while (!errors && !finished) { + yaml_event_t event; + if (!yaml_parse(parser, &event)) { + errors++; + continue; + } + + switch (event.type) { + case YAML_SCALAR_EVENT: + if (yaml_scalar_matches(&event, "key", strlen("key"))) { + yaml_event_t sub_event; + if (!yaml_parse(parser, &sub_event)) { + errors++; + } else { + if (sub_event.type == YAML_SCALAR_EVENT) { + hashed_key_len_set(&jb->unmatched.key, (char *)sub_event.data.scalar.value, sub_event.data.scalar.length); + } else { + yaml_error(parser, &sub_event, "expected a scalar value for 'key'"); + errors++; + } + yaml_event_delete(&sub_event); + } + } else if (yaml_scalar_matches(&event, "inject", strlen("inject"))) { + errors += yaml_parse_injections(parser, jb, true); + } else { + yaml_error(parser, &event, "Unexpected scalar in unmatched section"); + errors++; + } + break; + + case YAML_MAPPING_END_EVENT: + finished = true; + break; + + default: + yaml_error(parser, &event, "Unexpected event in unmatched section"); + errors++; + break; + } + + yaml_event_delete(&event); + } + + return errors; +} + +static size_t yaml_parse_rewrites(yaml_parser_t *parser, LOG_JOB *jb) { + size_t errors = 0; + + if (!yaml_parse_expect_event(parser, YAML_SEQUENCE_START_EVENT)) + return 1; + + bool finished = false; + while (!errors && !finished) { + yaml_event_t event; + if (!yaml_parse(parser, &event)) { + errors++; + continue; + } + + switch (event.type) { + case YAML_MAPPING_START_EVENT: + { + RW_FLAGS flags = RW_NONE; + char *key = NULL; + char *search_pattern = NULL; + char *replace_pattern = NULL; + + bool mapping_finished = false; + while (!errors && !mapping_finished) { + yaml_event_t sub_event; + if (!yaml_parse(parser, &sub_event)) { + errors++; + continue; + } + + switch (sub_event.type) { + case YAML_SCALAR_EVENT: + if (yaml_scalar_matches(&sub_event, "key", strlen("key"))) { + if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) { + yaml_error(parser, &sub_event, "Expected scalar for rewrite key"); + errors++; + } else { + key = strndupz((char *)sub_event.data.scalar.value, sub_event.data.scalar.length); + yaml_event_delete(&sub_event); + } + } else if (yaml_scalar_matches(&sub_event, "match", strlen("match"))) { + if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) { + yaml_error(parser, &sub_event, "Expected scalar for rewrite match PCRE2 pattern"); + errors++; + } + else { + if(search_pattern) + freez(search_pattern); + flags |= RW_MATCH_PCRE2; + flags &= ~RW_MATCH_NON_EMPTY; + search_pattern = strndupz((char *)sub_event.data.scalar.value, sub_event.data.scalar.length); + yaml_event_delete(&sub_event); + } + } else if (yaml_scalar_matches(&sub_event, "not_empty", strlen("not_empty"))) { + if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) { + yaml_error(parser, &sub_event, "Expected scalar for rewrite not empty condition"); + errors++; + } + else { + if(search_pattern) + freez(search_pattern); + flags |= RW_MATCH_NON_EMPTY; + flags &= ~RW_MATCH_PCRE2; + search_pattern = strndupz((char *)sub_event.data.scalar.value, sub_event.data.scalar.length); + yaml_event_delete(&sub_event); + } + } else if (yaml_scalar_matches(&sub_event, "value", strlen("value"))) { + if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) { + yaml_error(parser, &sub_event, "Expected scalar for rewrite value"); + errors++; + } else { + replace_pattern = strndupz((char *)sub_event.data.scalar.value, sub_event.data.scalar.length); + yaml_event_delete(&sub_event); + } + } else if (yaml_scalar_matches(&sub_event, "stop", strlen("stop"))) { + if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) { + yaml_error(parser, &sub_event, "Expected scalar for rewrite stop boolean"); + errors++; + } else { + if(strncmp((char*)sub_event.data.scalar.value, "no", 2) == 0 || + strncmp((char*)sub_event.data.scalar.value, "false", 5) == 0) + flags |= RW_DONT_STOP; + else + flags &= ~RW_DONT_STOP; + + yaml_event_delete(&sub_event); + } + } else if (yaml_scalar_matches(&sub_event, "inject", strlen("inject"))) { + if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) { + yaml_error(parser, &sub_event, "Expected scalar for rewrite inject boolean"); + errors++; + } else { + if(strncmp((char*)sub_event.data.scalar.value, "yes", 3) == 0 || + strncmp((char*)sub_event.data.scalar.value, "true", 4) == 0) + flags |= RW_INJECT; + else + flags &= ~RW_INJECT; + + yaml_event_delete(&sub_event); + } + } else { + yaml_error(parser, &sub_event, "Unexpected scalar in rewrite mapping"); + errors++; + } + break; + + case YAML_MAPPING_END_EVENT: + if(key) { + if (!log_job_rewrite_add(jb, key, flags, search_pattern, replace_pattern)) + errors++; + } + + freez(key); + key = NULL; + + freez(search_pattern); + search_pattern = NULL; + + freez(replace_pattern); + replace_pattern = NULL; + + flags = RW_NONE; + + mapping_finished = true; + break; + + default: + yaml_error(parser, &sub_event, "Unexpected event in rewrite mapping"); + errors++; + break; + } + + yaml_event_delete(&sub_event); + } + } + break; + + case YAML_SEQUENCE_END_EVENT: + finished = true; + break; + + default: + yaml_error(parser, &event, "Unexpected event in rewrites sequence"); + errors++; + break; + } + + yaml_event_delete(&event); + } + + return errors; +} + +static size_t yaml_parse_renames(yaml_parser_t *parser, LOG_JOB *jb) { + size_t errors = 0; + + if (!yaml_parse_expect_event(parser, YAML_SEQUENCE_START_EVENT)) + return 1; + + bool finished = false; + while (!errors && !finished) { + yaml_event_t event; + if (!yaml_parse(parser, &event)) { + errors++; + continue; + } + + switch (event.type) { + case YAML_MAPPING_START_EVENT: + { + struct key_rename rn = { 0 }; + + bool mapping_finished = false; + while (!errors && !mapping_finished) { + yaml_event_t sub_event; + if (!yaml_parse(parser, &sub_event)) { + errors++; + continue; + } + + switch (sub_event.type) { + case YAML_SCALAR_EVENT: + if (yaml_scalar_matches(&sub_event, "new_key", strlen("new_key"))) { + if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) { + yaml_error(parser, &sub_event, "Expected scalar for rename new_key"); + errors++; + } else { + hashed_key_len_set(&rn.new_key, (char *)sub_event.data.scalar.value, sub_event.data.scalar.length); + yaml_event_delete(&sub_event); + } + } else if (yaml_scalar_matches(&sub_event, "old_key", strlen("old_key"))) { + if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) { + yaml_error(parser, &sub_event, "Expected scalar for rename old_key"); + errors++; + } else { + hashed_key_len_set(&rn.old_key, (char *)sub_event.data.scalar.value, sub_event.data.scalar.length); + yaml_event_delete(&sub_event); + } + } else { + yaml_error(parser, &sub_event, "Unexpected scalar in rewrite mapping"); + errors++; + } + break; + + case YAML_MAPPING_END_EVENT: + if(rn.old_key.key && rn.new_key.key) { + if (!log_job_rename_add(jb, rn.new_key.key, rn.new_key.len, + rn.old_key.key, rn.old_key.len)) + errors++; + } + rename_cleanup(&rn); + + mapping_finished = true; + break; + + default: + yaml_error(parser, &sub_event, "Unexpected event in rewrite mapping"); + errors++; + break; + } + + yaml_event_delete(&sub_event); + } + } + break; + + case YAML_SEQUENCE_END_EVENT: + finished = true; + break; + + default: + yaml_error(parser, &event, "Unexpected event in rewrites sequence"); + errors++; + break; + } + + yaml_event_delete(&event); + } + + return errors; +} + +static size_t yaml_parse_pattern(yaml_parser_t *parser, LOG_JOB *jb) { + yaml_event_t event; + size_t errors = 0; + + if (!yaml_parse(parser, &event)) + return 1; + + if(event.type == YAML_SCALAR_EVENT) + log_job_pattern_set(jb, (char *) event.data.scalar.value, event.data.scalar.length); + else { + yaml_error(parser, &event, "unexpected event type"); + errors++; + } + + yaml_event_delete(&event); + return errors; +} + +static size_t yaml_parse_initialized(yaml_parser_t *parser, LOG_JOB *jb) { + size_t errors = 0; + + if(!yaml_parse_expect_event(parser, YAML_STREAM_START_EVENT)) { + errors++; + goto cleanup; + } + + if(!yaml_parse_expect_event(parser, YAML_DOCUMENT_START_EVENT)) { + errors++; + goto cleanup; + } + + if(!yaml_parse_expect_event(parser, YAML_MAPPING_START_EVENT)) { + errors++; + goto cleanup; + } + + bool finished = false; + while (!errors && !finished) { + yaml_event_t event; + if(!yaml_parse(parser, &event)) { + errors++; + continue; + } + + switch(event.type) { + default: + yaml_error(parser, &event, "unexpected type"); + errors++; + break; + + case YAML_MAPPING_END_EVENT: + finished = true; + break; + + case YAML_SCALAR_EVENT: + if (yaml_scalar_matches(&event, "pattern", strlen("pattern"))) + errors += yaml_parse_pattern(parser, jb); + + else if (yaml_scalar_matches(&event, "prefix", strlen("prefix"))) + errors += yaml_parse_prefix(parser, jb); + + else if (yaml_scalar_matches(&event, "filename", strlen("filename"))) + errors += yaml_parse_filename_injection(parser, jb); + + else if (yaml_scalar_matches(&event, "filter", strlen("filter"))) + errors += yaml_parse_filters(parser, jb); + + else if (yaml_scalar_matches(&event, "inject", strlen("inject"))) + errors += yaml_parse_injections(parser, jb, false); + + else if (yaml_scalar_matches(&event, "unmatched", strlen("unmatched"))) + errors += yaml_parse_unmatched(parser, jb); + + else if (yaml_scalar_matches(&event, "rewrite", strlen("rewrite"))) + errors += yaml_parse_rewrites(parser, jb); + + else if (yaml_scalar_matches(&event, "rename", strlen("rename"))) + errors += yaml_parse_renames(parser, jb); + + else { + yaml_error(parser, &event, "unexpected scalar"); + errors++; + } + break; + } + + yaml_event_delete(&event); + } + + if(!errors && !yaml_parse_expect_event(parser, YAML_DOCUMENT_END_EVENT)) { + errors++; + goto cleanup; + } + + if(!errors && !yaml_parse_expect_event(parser, YAML_STREAM_END_EVENT)) { + errors++; + goto cleanup; + } + +cleanup: + return errors; +} + +bool yaml_parse_file(const char *config_file_path, LOG_JOB *jb) { + if(!config_file_path || !*config_file_path) { + log2stderr("yaml configuration filename cannot be empty."); + return false; + } + + FILE *fp = fopen(config_file_path, "r"); + if (!fp) { + log2stderr("Error opening config file: %s", config_file_path); + return false; + } + + yaml_parser_t parser; + yaml_parser_initialize(&parser); + yaml_parser_set_input_file(&parser, fp); + + size_t errors = yaml_parse_initialized(&parser, jb); + + yaml_parser_delete(&parser); + fclose(fp); + return errors == 0; +} + +bool yaml_parse_config(const char *config_name, LOG_JOB *jb) { + char filename[FILENAME_MAX + 1]; + + snprintf(filename, sizeof(filename), "%s/%s.yaml", LOG2JOURNAL_CONFIG_PATH, config_name); + return yaml_parse_file(filename, jb); +} + +#endif // HAVE_LIBYAML + +// ---------------------------------------------------------------------------- +// printing yaml + +static void yaml_print_multiline_value(const char *s, size_t depth) { + if (!s) + s = ""; + + do { + const char* next = strchr(s, '\n'); + if(next) next++; + + size_t len = next ? (size_t)(next - s) : strlen(s); + char buf[len + 1]; + copy_to_buffer(buf, sizeof(buf), s, len); + + fprintf(stderr, "%.*s%s%s", + (int)(depth * 2), " ", + buf, next ? "" : "\n"); + + s = next; + } while(s && *s); +} + +static bool needs_quotes_in_yaml(const char *str) { + // Lookup table for special YAML characters + static bool special_chars[256] = { false }; + static bool table_initialized = false; + + if (!table_initialized) { + // Initialize the lookup table + const char *special_chars_str = ":{}[],&*!|>'\"%@`^"; + for (const char *c = special_chars_str; *c; ++c) { + special_chars[(unsigned char)*c] = true; + } + table_initialized = true; + } + + while (*str) { + if (special_chars[(unsigned char)*str]) { + return true; + } + str++; + } + return false; +} + +static void yaml_print_node(const char *key, const char *value, size_t depth, bool dash) { + if(depth > 10) depth = 10; + const char *quote = "'"; + + const char *second_line = NULL; + if(value && strchr(value, '\n')) { + second_line = value; + value = "|"; + quote = ""; + } + else if(!value || !needs_quotes_in_yaml(value)) + quote = ""; + + fprintf(stderr, "%.*s%s%s%s%s%s%s\n", + (int)(depth * 2), " ", dash ? "- ": "", + key ? key : "", key ? ": " : "", + quote, value ? value : "", quote); + + if(second_line) { + yaml_print_multiline_value(second_line, depth + 1); + } +} + +void log_job_configuration_to_yaml(LOG_JOB *jb) { + if(jb->pattern) + yaml_print_node("pattern", jb->pattern, 0, false); + + if(jb->prefix) { + fprintf(stderr, "\n"); + yaml_print_node("prefix", jb->prefix, 0, false); + } + + if(jb->filename.key.key) { + fprintf(stderr, "\n"); + yaml_print_node("filename", NULL, 0, false); + yaml_print_node("key", jb->filename.key.key, 1, false); + } + + if(jb->filter.include.pattern || jb->filter.exclude.pattern) { + fprintf(stderr, "\n"); + yaml_print_node("filter", NULL, 0, false); + + if(jb->filter.include.pattern) + yaml_print_node("include", jb->filter.include.pattern, 1, false); + + if(jb->filter.exclude.pattern) + yaml_print_node("exclude", jb->filter.exclude.pattern, 1, false); + } + + if(jb->renames.used) { + fprintf(stderr, "\n"); + yaml_print_node("rename", NULL, 0, false); + + for(size_t i = 0; i < jb->renames.used ;i++) { + yaml_print_node("new_key", jb->renames.array[i].new_key.key, 1, true); + yaml_print_node("old_key", jb->renames.array[i].old_key.key, 2, false); + } + } + + if(jb->injections.used) { + fprintf(stderr, "\n"); + yaml_print_node("inject", NULL, 0, false); + + for (size_t i = 0; i < jb->injections.used; i++) { + yaml_print_node("key", jb->injections.keys[i].key.key, 1, true); + yaml_print_node("value", jb->injections.keys[i].value.pattern, 2, false); + } + } + + if(jb->rewrites.used) { + fprintf(stderr, "\n"); + yaml_print_node("rewrite", NULL, 0, false); + + for(size_t i = 0; i < jb->rewrites.used ;i++) { + REWRITE *rw = &jb->rewrites.array[i]; + + yaml_print_node("key", rw->key.key, 1, true); + + if(rw->flags & RW_MATCH_PCRE2) + yaml_print_node("match", rw->match_pcre2.pattern, 2, false); + + else if(rw->flags & RW_MATCH_NON_EMPTY) + yaml_print_node("not_empty", rw->match_non_empty.pattern, 2, false); + + yaml_print_node("value", rw->value.pattern, 2, false); + + if(rw->flags & RW_INJECT) + yaml_print_node("inject", "yes", 2, false); + + if(rw->flags & RW_DONT_STOP) + yaml_print_node("stop", "no", 2, false); + } + } + + if(jb->unmatched.key.key || jb->unmatched.injections.used) { + fprintf(stderr, "\n"); + yaml_print_node("unmatched", NULL, 0, false); + + if(jb->unmatched.key.key) + yaml_print_node("key", jb->unmatched.key.key, 1, false); + + if(jb->unmatched.injections.used) { + fprintf(stderr, "\n"); + yaml_print_node("inject", NULL, 1, false); + + for (size_t i = 0; i < jb->unmatched.injections.used; i++) { + yaml_print_node("key", jb->unmatched.injections.keys[i].key.key, 2, true); + yaml_print_node("value", jb->unmatched.injections.keys[i].value.pattern, 3, false); + } + } + } +} diff --git a/collectors/log2journal/log2journal.c b/collectors/log2journal/log2journal.c new file mode 100644 index 000000000..c3204939c --- /dev/null +++ b/collectors/log2journal/log2journal.c @@ -0,0 +1,569 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "log2journal.h" + +// ---------------------------------------------------------------------------- + +const char journal_key_characters_map[256] = { + // control characters + [0] = '\0', [1] = '_', [2] = '_', [3] = '_', [4] = '_', [5] = '_', [6] = '_', [7] = '_', + [8] = '_', [9] = '_', [10] = '_', [11] = '_', [12] = '_', [13] = '_', [14] = '_', [15] = '_', + [16] = '_', [17] = '_', [18] = '_', [19] = '_', [20] = '_', [21] = '_', [22] = '_', [23] = '_', + [24] = '_', [25] = '_', [26] = '_', [27] = '_', [28] = '_', [29] = '_', [30] = '_', [31] = '_', + + // symbols + [' '] = '_', ['!'] = '_', ['"'] = '_', ['#'] = '_', ['$'] = '_', ['%'] = '_', ['&'] = '_', ['\''] = '_', + ['('] = '_', [')'] = '_', ['*'] = '_', ['+'] = '_', [','] = '_', ['-'] = '_', ['.'] = '_', ['/'] = '_', + + // numbers + ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', + ['8'] = '8', ['9'] = '9', + + // symbols + [':'] = '_', [';'] = '_', ['<'] = '_', ['='] = '_', ['>'] = '_', ['?'] = '_', ['@'] = '_', + + // capitals + ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H', + ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P', + ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X', + ['Y'] = 'Y', ['Z'] = 'Z', + + // symbols + ['['] = '_', ['\\'] = '_', [']'] = '_', ['^'] = '_', ['_'] = '_', ['`'] = '_', + + // lower to upper + ['a'] = 'A', ['b'] = 'B', ['c'] = 'C', ['d'] = 'D', ['e'] = 'E', ['f'] = 'F', ['g'] = 'G', ['h'] = 'H', + ['i'] = 'I', ['j'] = 'J', ['k'] = 'K', ['l'] = 'L', ['m'] = 'M', ['n'] = 'N', ['o'] = 'O', ['p'] = 'P', + ['q'] = 'Q', ['r'] = 'R', ['s'] = 'S', ['t'] = 'T', ['u'] = 'U', ['v'] = 'V', ['w'] = 'W', ['x'] = 'X', + ['y'] = 'Y', ['z'] = 'Z', + + // symbols + ['{'] = '_', ['|'] = '_', ['}'] = '_', ['~'] = '_', [127] = '_', // Delete (DEL) + + // Extended ASCII characters (128-255) set to underscore + [128] = '_', [129] = '_', [130] = '_', [131] = '_', [132] = '_', [133] = '_', [134] = '_', [135] = '_', + [136] = '_', [137] = '_', [138] = '_', [139] = '_', [140] = '_', [141] = '_', [142] = '_', [143] = '_', + [144] = '_', [145] = '_', [146] = '_', [147] = '_', [148] = '_', [149] = '_', [150] = '_', [151] = '_', + [152] = '_', [153] = '_', [154] = '_', [155] = '_', [156] = '_', [157] = '_', [158] = '_', [159] = '_', + [160] = '_', [161] = '_', [162] = '_', [163] = '_', [164] = '_', [165] = '_', [166] = '_', [167] = '_', + [168] = '_', [169] = '_', [170] = '_', [171] = '_', [172] = '_', [173] = '_', [174] = '_', [175] = '_', + [176] = '_', [177] = '_', [178] = '_', [179] = '_', [180] = '_', [181] = '_', [182] = '_', [183] = '_', + [184] = '_', [185] = '_', [186] = '_', [187] = '_', [188] = '_', [189] = '_', [190] = '_', [191] = '_', + [192] = '_', [193] = '_', [194] = '_', [195] = '_', [196] = '_', [197] = '_', [198] = '_', [199] = '_', + [200] = '_', [201] = '_', [202] = '_', [203] = '_', [204] = '_', [205] = '_', [206] = '_', [207] = '_', + [208] = '_', [209] = '_', [210] = '_', [211] = '_', [212] = '_', [213] = '_', [214] = '_', [215] = '_', + [216] = '_', [217] = '_', [218] = '_', [219] = '_', [220] = '_', [221] = '_', [222] = '_', [223] = '_', + [224] = '_', [225] = '_', [226] = '_', [227] = '_', [228] = '_', [229] = '_', [230] = '_', [231] = '_', + [232] = '_', [233] = '_', [234] = '_', [235] = '_', [236] = '_', [237] = '_', [238] = '_', [239] = '_', + [240] = '_', [241] = '_', [242] = '_', [243] = '_', [244] = '_', [245] = '_', [246] = '_', [247] = '_', + [248] = '_', [249] = '_', [250] = '_', [251] = '_', [252] = '_', [253] = '_', [254] = '_', [255] = '_', +}; + +// ---------------------------------------------------------------------------- + +static inline HASHED_KEY *get_key_from_hashtable(LOG_JOB *jb, HASHED_KEY *k) { + if(k->flags & HK_HASHTABLE_ALLOCATED) + return k; + + if(!k->hashtable_ptr) { + HASHED_KEY *ht_key; + SIMPLE_HASHTABLE_SLOT_KEY *slot = simple_hashtable_get_slot_KEY(&jb->hashtable, k->hash, true); + if((ht_key = SIMPLE_HASHTABLE_SLOT_DATA(slot))) { + if(!(ht_key->flags & HK_COLLISION_CHECKED)) { + ht_key->flags |= HK_COLLISION_CHECKED; + + if(strcmp(ht_key->key, k->key) != 0) + log2stderr("Hashtable collision detected on key '%s' (hash %lx) and '%s' (hash %lx). " + "Please file a bug report.", ht_key->key, (unsigned long) ht_key->hash, k->key + , (unsigned long) k->hash + ); + } + } + else { + ht_key = callocz(1, sizeof(HASHED_KEY)); + ht_key->key = strdupz(k->key); + ht_key->len = k->len; + ht_key->hash = k->hash; + ht_key->flags = HK_HASHTABLE_ALLOCATED; + + simple_hashtable_set_slot_KEY(&jb->hashtable, slot, ht_key->hash, ht_key); + } + + k->hashtable_ptr = ht_key; + } + + return k->hashtable_ptr; +} + +static inline HASHED_KEY *get_key_from_hashtable_with_char_ptr(LOG_JOB *jb, const char *key) { + HASHED_KEY find = { + .key = key, + .len = strlen(key), + }; + find.hash = XXH3_64bits(key, find.len); + + return get_key_from_hashtable(jb, &find); +} + +// ---------------------------------------------------------------------------- + +static inline void validate_key(LOG_JOB *jb __maybe_unused, HASHED_KEY *k) { + if(k->len > JOURNAL_MAX_KEY_LEN) + log2stderr("WARNING: key '%s' has length %zu, which is more than %zu, the max systemd-journal allows", + k->key, (size_t)k->len, (size_t)JOURNAL_MAX_KEY_LEN); + + for(size_t i = 0; i < k->len ;i++) { + char c = k->key[i]; + + if((c < 'A' || c > 'Z') && !isdigit(c) && c != '_') { + log2stderr("WARNING: key '%s' contains characters that are not allowed by systemd-journal.", k->key); + break; + } + } + + if(isdigit(k->key[0])) + log2stderr("WARNING: key '%s' starts with a digit and may not be accepted by systemd-journal.", k->key); + + if(k->key[0] == '_') + log2stderr("WARNING: key '%s' starts with an underscore, which makes it a systemd-journal trusted field. " + "Such fields are accepted by systemd-journal-remote, but not by systemd-journald.", k->key); +} + +// ---------------------------------------------------------------------------- + +static inline size_t replace_evaluate_to_buffer(LOG_JOB *jb, HASHED_KEY *k __maybe_unused, REPLACE_PATTERN *rp, char *dst, size_t dst_size) { + size_t remaining = dst_size; + char *copy_to = dst; + + for(REPLACE_NODE *node = rp->nodes; node != NULL && remaining > 1; node = node->next) { + if(node->is_variable) { + if(hashed_keys_match(&node->name, &jb->line.key)) { + size_t copied = copy_to_buffer(copy_to, remaining, jb->line.trimmed, jb->line.trimmed_len); + copy_to += copied; + remaining -= copied; + } + else { + HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key); + if(ktmp->value.len) { + size_t copied = copy_to_buffer(copy_to, remaining, ktmp->value.txt, ktmp->value.len); + copy_to += copied; + remaining -= copied; + } + } + } + else { + size_t copied = copy_to_buffer(copy_to, remaining, node->name.key, node->name.len); + copy_to += copied; + remaining -= copied; + } + } + + return copy_to - dst; +} + +static inline void replace_evaluate(LOG_JOB *jb, HASHED_KEY *k, REPLACE_PATTERN *rp) { + HASHED_KEY *ht_key = get_key_from_hashtable(jb, k); + + // set it to empty value + k->value.len = 0; + + for(REPLACE_NODE *node = rp->nodes; node != NULL; node = node->next) { + if(node->is_variable) { + if(hashed_keys_match(&node->name, &jb->line.key)) + txt_expand_and_append(&ht_key->value, jb->line.trimmed, jb->line.trimmed_len); + + else { + HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key); + if(ktmp->value.len) + txt_expand_and_append(&ht_key->value, ktmp->value.txt, ktmp->value.len); + } + } + else + txt_expand_and_append(&ht_key->value, node->name.key, node->name.len); + } +} + +static inline void replace_evaluate_from_pcre2(LOG_JOB *jb, HASHED_KEY *k, REPLACE_PATTERN *rp, SEARCH_PATTERN *sp) { + assert(k->flags & HK_HASHTABLE_ALLOCATED); + + // set the temporary TEXT to zero length + jb->rewrites.tmp.len = 0; + + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(sp->match_data); + + // Iterate through the linked list of replacement nodes + for(REPLACE_NODE *node = rp->nodes; node != NULL; node = node->next) { + if(node->is_variable) { + int group_number = pcre2_substring_number_from_name( + sp->re, (PCRE2_SPTR) node->name.key); + + if(group_number >= 0) { + PCRE2_SIZE start_offset = ovector[2 * group_number]; + PCRE2_SIZE end_offset = ovector[2 * group_number + 1]; + PCRE2_SIZE length = end_offset - start_offset; + + txt_expand_and_append(&jb->rewrites.tmp, k->value.txt + start_offset, length); + } + else { + if(hashed_keys_match(&node->name, &jb->line.key)) + txt_expand_and_append(&jb->rewrites.tmp, jb->line.trimmed, jb->line.trimmed_len); + + else { + HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key); + if(ktmp->value.len) + txt_expand_and_append(&jb->rewrites.tmp, ktmp->value.txt, ktmp->value.len); + } + } + } + else { + txt_expand_and_append(&jb->rewrites.tmp, node->name.key, node->name.len); + } + } + + // swap the values of the temporary TEXT and the key value + TEXT tmp = k->value; + k->value = jb->rewrites.tmp; + jb->rewrites.tmp = tmp; +} + +static inline bool rewrite_conditions_satisfied(LOG_JOB *jb, HASHED_KEY *k, REWRITE *rw) { + assert(k->flags & HK_HASHTABLE_ALLOCATED); + + if(rw->flags & RW_MATCH_PCRE2) { + return search_pattern_matches(&rw->match_pcre2, k->value.txt, k->value.len); + } + else if(rw->flags & RW_MATCH_NON_EMPTY) { + char buffer[2]; // we don't need a big buffer - we just check if anything is written + if(replace_evaluate_to_buffer(jb, k, &rw->match_non_empty, buffer, sizeof(buffer))) + // it copied something + return true; + else + // it copied nothing + return false; + } + else + // no conditions + return true; +} + +// ---------------------------------------------------------------------------- + +static inline HASHED_KEY *rename_key(LOG_JOB *jb, HASHED_KEY *k) { + if(!(k->flags & HK_RENAMES_CHECKED) || k->flags & HK_HAS_RENAMES) { + k->flags |= HK_RENAMES_CHECKED; + + for(size_t i = 0; i < jb->renames.used; i++) { + RENAME *rn = &jb->renames.array[i]; + + if(hashed_keys_match(&rn->old_key, k)) { + k->flags |= HK_HAS_RENAMES; + + return get_key_from_hashtable(jb, &rn->new_key); + } + } + } + + return k; +} + +// ---------------------------------------------------------------------------- + +static inline void send_key_value_constant(LOG_JOB *jb __maybe_unused, HASHED_KEY *key, const char *value, size_t len) { + HASHED_KEY *ht_key = get_key_from_hashtable(jb, key); + + txt_replace(&ht_key->value, value, len); + ht_key->flags |= HK_VALUE_FROM_LOG; + + // fprintf(stderr, "SET %s=%.*s\n", ht_key->key, (int)ht_key->value.len, ht_key->value.txt); +} + +static inline void send_key_value_error(LOG_JOB *jb, HASHED_KEY *key, const char *format, ...) __attribute__ ((format(__printf__, 3, 4))); +static inline void send_key_value_error(LOG_JOB *jb, HASHED_KEY *key, const char *format, ...) { + HASHED_KEY *ht_key = get_key_from_hashtable(jb, key); + + printf("%s=", ht_key->key); + va_list args; + va_start(args, format); + vprintf(format, args); + va_end(args); + printf("\n"); +} + +inline void log_job_send_extracted_key_value(LOG_JOB *jb, const char *key, const char *value, size_t len) { + HASHED_KEY *ht_key = get_key_from_hashtable_with_char_ptr(jb, key); + HASHED_KEY *nk = rename_key(jb, ht_key); + txt_replace(&nk->value, value, len); + ht_key->flags |= HK_VALUE_FROM_LOG; + +// fprintf(stderr, "SET %s=%.*s\n", ht_key->key, (int)ht_key->value.len, ht_key->value.txt); +} + +static inline void log_job_process_rewrites(LOG_JOB *jb) { + for(size_t i = 0; i < jb->rewrites.used ;i++) { + REWRITE *rw = &jb->rewrites.array[i]; + + HASHED_KEY *k = get_key_from_hashtable(jb, &rw->key); + + if(!(rw->flags & RW_INJECT) && !(k->flags & HK_VALUE_FROM_LOG) && !k->value.len) + continue; + + if(!(k->flags & HK_VALUE_REWRITTEN) && rewrite_conditions_satisfied(jb, k, rw)) { + if(rw->flags & RW_MATCH_PCRE2) + replace_evaluate_from_pcre2(jb, k, &rw->value, &rw->match_pcre2); + else + replace_evaluate(jb, k, &rw->value); + + if(!(rw->flags & RW_DONT_STOP)) + k->flags |= HK_VALUE_REWRITTEN; + +// fprintf(stderr, "REWRITE %s=%.*s\n", k->key, (int)k->value.len, k->value.txt); + } + } +} + +static inline void send_all_fields(LOG_JOB *jb) { + SIMPLE_HASHTABLE_SORTED_FOREACH_READ_ONLY(&jb->hashtable, kptr, HASHED_KEY, _KEY) { + HASHED_KEY *k = SIMPLE_HASHTABLE_SORTED_FOREACH_READ_ONLY_VALUE(kptr); + + if(k->value.len) { + // the key exists and has some value + + if(!(k->flags & HK_FILTERED)) { + k->flags |= HK_FILTERED; + + bool included = jb->filter.include.re ? search_pattern_matches(&jb->filter.include, k->key, k->len) : true; + bool excluded = jb->filter.exclude.re ? search_pattern_matches(&jb->filter.exclude, k->key, k->len) : false; + + if(included && !excluded) + k->flags |= HK_FILTERED_INCLUDED; + else + k->flags &= ~HK_FILTERED_INCLUDED; + + // log some error if the key does not comply to journal standards + validate_key(jb, k); + } + + if(k->flags & HK_FILTERED_INCLUDED) + printf("%s=%.*s\n", k->key, (int)k->value.len, k->value.txt); + + // reset it for the next round + k->value.txt[0] = '\0'; + k->value.len = 0; + } + + k->flags &= ~(HK_VALUE_REWRITTEN | HK_VALUE_FROM_LOG); + } +} + +// ---------------------------------------------------------------------------- +// injection of constant fields + +static void select_which_injections_should_be_injected_on_unmatched(LOG_JOB *jb) { + // mark all injections to be added to unmatched logs + for(size_t i = 0; i < jb->injections.used ; i++) + jb->injections.keys[i].on_unmatched = true; + + if(jb->injections.used && jb->unmatched.injections.used) { + // we have both injections and injections on unmatched + + // we find all the injections that are also configured as injections on unmatched, + // and we disable them, so that the output will not have the same key twice + + for(size_t i = 0; i < jb->injections.used ;i++) { + for(size_t u = 0; u < jb->unmatched.injections.used ; u++) { + if(strcmp(jb->injections.keys[i].key.key, jb->unmatched.injections.keys[u].key.key) == 0) + jb->injections.keys[i].on_unmatched = false; + } + } + } +} + + +static inline void jb_finalize_injections(LOG_JOB *jb, bool line_is_matched) { + for (size_t j = 0; j < jb->injections.used; j++) { + if(!line_is_matched && !jb->injections.keys[j].on_unmatched) + continue; + + INJECTION *inj = &jb->injections.keys[j]; + + replace_evaluate(jb, &inj->key, &inj->value); + } +} + +// ---------------------------------------------------------------------------- +// filename injection + +static inline void jb_inject_filename(LOG_JOB *jb) { + if (jb->filename.key.key && jb->filename.current.len) + send_key_value_constant(jb, &jb->filename.key, jb->filename.current.txt, jb->filename.current.len); +} + +static inline bool jb_switched_filename(LOG_JOB *jb, const char *line, size_t len) { + // IMPORTANT: + // Return TRUE when the caller should skip this line (because it is ours). + // Unfortunately, we have to consume empty lines too. + + // IMPORTANT: + // filename may not be NULL terminated and have more data than the filename. + + if (!len) { + jb->filename.last_line_was_empty = true; + return true; + } + + // Check if it's a log file change line + if (jb->filename.last_line_was_empty && line[0] == '=' && strncmp(line, "==> ", 4) == 0) { + const char *start = line + 4; + const char *end = strstr(line, " <=="); + while (*start == ' ') start++; + if (*start != '\n' && *start != '\0' && end) { + txt_replace(&jb->filename.current, start, end - start); + return true; + } + } + + jb->filename.last_line_was_empty = false; + return false; +} + +static inline bool jb_send_unmatched_line(LOG_JOB *jb, const char *line) { + if (!jb->unmatched.key.key) + return false; + + // we are sending errors to systemd-journal + send_key_value_error(jb, &jb->unmatched.key, "Parsing error on: %s", line); + + for (size_t j = 0; j < jb->unmatched.injections.used; j++) { + INJECTION *inj = &jb->unmatched.injections.keys[j]; + + replace_evaluate(jb, &inj->key, &inj->value); + } + + return true; +} + +// ---------------------------------------------------------------------------- +// running a job + +static char *get_next_line(LOG_JOB *jb __maybe_unused, char *buffer, size_t size, size_t *line_length) { + if(!fgets(buffer, (int)size, stdin)) { + *line_length = 0; + return NULL; + } + + char *line = buffer; + size_t len = strlen(line); + + // remove trailing newlines and spaces + while(len > 1 && (line[len - 1] == '\n' || isspace(line[len - 1]))) + line[--len] = '\0'; + + // skip leading spaces + while(isspace(*line)) { + line++; + len--; + } + + *line_length = len; + return line; +} + +int log_job_run(LOG_JOB *jb) { + select_which_injections_should_be_injected_on_unmatched(jb); + + PCRE2_STATE *pcre2 = NULL; + LOG_JSON_STATE *json = NULL; + LOGFMT_STATE *logfmt = NULL; + + if(strcmp(jb->pattern, "json") == 0) { + json = json_parser_create(jb); + // never fails + } + else if(strcmp(jb->pattern, "logfmt") == 0) { + logfmt = logfmt_parser_create(jb); + // never fails + } + else if(strcmp(jb->pattern, "none") != 0) { + pcre2 = pcre2_parser_create(jb); + if(pcre2_has_error(pcre2)) { + log2stderr("%s", pcre2_parser_error(pcre2)); + pcre2_parser_destroy(pcre2); + return 1; + } + } + + jb->line.buffer = mallocz(MAX_LINE_LENGTH + 1); + jb->line.size = MAX_LINE_LENGTH + 1; + jb->line.trimmed_len = 0; + jb->line.trimmed = jb->line.buffer; + + while ((jb->line.trimmed = get_next_line(jb, (char *)jb->line.buffer, jb->line.size, &jb->line.trimmed_len))) { + const char *line = jb->line.trimmed; + size_t len = jb->line.trimmed_len; + + if(jb_switched_filename(jb, line, len)) + continue; + + bool line_is_matched = true; + + if(json) + line_is_matched = json_parse_document(json, line); + else if(logfmt) + line_is_matched = logfmt_parse_document(logfmt, line); + else if(pcre2) + line_is_matched = pcre2_parse_document(pcre2, line, len); + + if(!line_is_matched) { + if(json) + log2stderr("%s", json_parser_error(json)); + else if(logfmt) + log2stderr("%s", logfmt_parser_error(logfmt)); + else if(pcre2) + log2stderr("%s", pcre2_parser_error(pcre2)); + + if(!jb_send_unmatched_line(jb, line)) + // just logging to stderr, not sending unmatched lines + continue; + } + + jb_inject_filename(jb); + jb_finalize_injections(jb, line_is_matched); + + log_job_process_rewrites(jb); + send_all_fields(jb); + printf("\n"); + fflush(stdout); + } + + if(json) + json_parser_destroy(json); + + else if(logfmt) + logfmt_parser_destroy(logfmt); + + else if(pcre2) + pcre2_parser_destroy(pcre2); + + freez((void *)jb->line.buffer); + + return 0; +} + +// ---------------------------------------------------------------------------- + +int main(int argc, char *argv[]) { + LOG_JOB log_job; + + log_job_init(&log_job); + + if(!log_job_command_line_parse_parameters(&log_job, argc, argv)) + exit(1); + + if(log_job.show_config) + log_job_configuration_to_yaml(&log_job); + + int ret = log_job_run(&log_job); + + log_job_cleanup(&log_job); + return ret; +} diff --git a/collectors/log2journal/log2journal.d/default.yaml b/collectors/log2journal/log2journal.d/default.yaml new file mode 100644 index 000000000..d41efc4ab --- /dev/null +++ b/collectors/log2journal/log2journal.d/default.yaml @@ -0,0 +1,15 @@ +pattern: none + +filename: + key: LOG_FILENAME + +inject: + - key: MESSAGE + value: '${LINE}' # a special variable that resolves to the whole line read from the log + + - key: PRIORITY + value: 6 # Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug + + - key: SYSLOG_IDENTIFIER + value: log2journal # the name of the application sending the logs + diff --git a/collectors/log2journal/log2journal.d/nginx-combined.yaml b/collectors/log2journal/log2journal.d/nginx-combined.yaml new file mode 100644 index 000000000..003c774d7 --- /dev/null +++ b/collectors/log2journal/log2journal.d/nginx-combined.yaml @@ -0,0 +1,91 @@ +# Netdata log2journal Configuration +# The following parses nginx log files using the combined format. + +# The PCRE2 pattern to match log entries and give names to the fields. +# The journal will have these names, so follow their rules. You can +# initiate an extended PCRE2 pattern by starting the pattern with (?x) +pattern: | + (?x) # Enable PCRE2 extended mode + ^ + (?<NGINX_REMOTE_ADDR>[^ ]+) \s - \s # NGINX_REMOTE_ADDR + (?<NGINX_REMOTE_USER>[^ ]+) \s # NGINX_REMOTE_USER + \[ + (?<NGINX_TIME_LOCAL>[^\]]+) # NGINX_TIME_LOCAL + \] + \s+ " + (?<NGINX_REQUEST> + (?<NGINX_REQUEST_METHOD>[A-Z]+) \s+ # NGINX_METHOD + (?<NGINX_REQUEST_URI>[^ ]+) \s+ + (?<NGINX_SERVER_PROTOCOL>[^"]+) + ) + " \s+ + (?<NGINX_STATUS>\d+) \s+ # NGINX_STATUS + (?<NGINX_BODY_BYTES_SENT>\d+) \s+ # NGINX_BODY_BYTES_SENT + "(?<NGINX_HTTP_REFERER>[^"]*)" \s+ # NGINX_HTTP_REFERER + "(?<NGINX_HTTP_USER_AGENT>[^"]*)" # NGINX_HTTP_USER_AGENT + +# When log2journal can detect the filename of each log entry (tail gives it +# only when it tails multiple files), this key will be used to send the +# filename to the journals. +filename: + key: NGINX_LOG_FILENAME + +rename: + - new_key: MESSAGE + old_key: NGINX_REQUEST + +# Inject constant fields into the journal logs. +inject: + - key: SYSLOG_IDENTIFIER + value: nginx-log + + # inject PRIORITY is a duplicate of NGINX_STATUS + - key: PRIORITY + value: '${NGINX_STATUS}' + + # Inject NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS + - key: NGINX_STATUS_FAMILY + value: '${NGINX_STATUS}' + +# Rewrite the value of fields (including the duplicated ones). +# The search pattern can have named groups, and the replace pattern can use +# them as ${name}. +rewrite: + # PRIORITY is a duplicate of NGINX_STATUS + # Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug + - key: PRIORITY + match: '^[123]' + value: 6 + + - key: PRIORITY + match: '^4' + value: 5 + + - key: PRIORITY + match: '^5' + value: 3 + + - key: PRIORITY + match: '.*' + value: 4 + + # NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS + - key: NGINX_STATUS_FAMILY + match: '^(?<first_digit>[1-5])' + value: '${first_digit}xx' + + - key: NGINX_STATUS_FAMILY + match: '.*' + value: 'UNKNOWN' + +# Control what to do when input logs do not match the main PCRE2 pattern. +unmatched: + # The journal key to log the PCRE2 error message to. + # Set this to MESSAGE, so you to see the error in the log. + key: MESSAGE + + # Inject static fields to the unmatched entries. + # Set PRIORITY=1 (alert) to help you spot unmatched entries in the logs. + inject: + - key: PRIORITY + value: 1 diff --git a/collectors/log2journal/log2journal.d/nginx-json.yaml b/collectors/log2journal/log2journal.d/nginx-json.yaml new file mode 100644 index 000000000..7fdc4be58 --- /dev/null +++ b/collectors/log2journal/log2journal.d/nginx-json.yaml @@ -0,0 +1,164 @@ +# For all nginx variables, check this: +# https://nginx.org/en/docs/http/ngx_http_core_module.html#var_connection_requests + +pattern: json + +prefix: NGINX_ + +# When log2journal can detect the filename of each log entry (tail gives it +# only when it tails multiple files), this key will be used to send the +# filename to the journals. +filename: + key: NGINX_LOG_FILENAME + +filter: + exclude: '^(NGINX_BINARY_REMOTE_ADDR)$' + +rename: + - new_key: MESSAGE + old_key: NGINX_REQUEST + + # args is an alias for query_string + - new_key: NGINX_QUERY_STRING + old_key: NGINX_ARGS + + # document_uri is an alias for uri + - new_key: NGINX_URI + old_key: NGINX_DOCUMENT_URI + + # is_args states if the request had a query string or not + - new_key: NGINX_HAS_QUERY_STRING + old_key: NGINX_IS_ARGS + + # msec is the timestamp in seconds, with fractional digits for milliseconds + - new_key: NGINX_TIMESTAMP_SEC + old_key: NGINX_MSEC + + # nginx_version is already prefixed with nginx, let's remove one of them + - new_key: NGINX_VERSION + old_key: NGINX_NGINX_VERSION + + # pipe states if the request was pipelined or not + - new_key: NGINX_PIPELINED + old_key: NGINX_PIPE + + # rename numeric TLVs to their names + - new_key: NGINX_PROXY_PROTOCOL_TLV_ALPN + old_key: NGINX_PROXY_PROTOCOL_TLV_0X01 + - new_key: NGINX_PROXY_PROTOCOL_TLV_AUTHORITY + old_key: NGINX_PROXY_PROTOCOL_TLV_0X02 + - new_key: NGINX_PROXY_PROTOCOL_TLV_UNIQUE_ID + old_key: NGINX_PROXY_PROTOCOL_TLV_0X05 + - new_key: NGINX_PROXY_PROTOCOL_TLV_SSL + old_key: NGINX_PROXY_PROTOCOL_TLV_0X20 + - new_key: NGINX_PROXY_PROTOCOL_TLV_NETNS + old_key: NGINX_PROXY_PROTOCOL_TLV_0X30 + + # rename numeric SSL TLVs to their names + - new_key: NGINX_PROXY_PROTOCOL_TLV_SSL_VERSION + old_key: NGINX_PROXY_PROTOCOL_TLV_SSL_0X21 + - new_key: NGINX_PROXY_PROTOCOL_TLV_SSL_CN + old_key: NGINX_PROXY_PROTOCOL_TLV_SSL_0X22 + - new_key: NGINX_PROXY_PROTOCOL_TLV_SSL_CIPHER + old_key: NGINX_PROXY_PROTOCOL_TLV_SSL_0X23 + - new_key: NGINX_PROXY_PROTOCOL_TLV_SSL_SIG_ALG + old_key: NGINX_PROXY_PROTOCOL_TLV_SSL_0X24 + - new_key: NGINX_PROXY_PROTOCOL_TLV_SSL_KEY_ALG + old_key: NGINX_PROXY_PROTOCOL_TLV_SSL_0X25 + +# Inject constant fields into the journal logs. +inject: + - key: SYSLOG_IDENTIFIER + value: nginx-log + + # inject PRIORITY is a duplicate of NGINX_STATUS + - key: PRIORITY + value: '${NGINX_STATUS}' + + # Inject NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS + - key: NGINX_STATUS_FAMILY + value: '${NGINX_STATUS}' + + +# Rewrite the value of fields (including the duplicated ones). +# The search pattern can have named groups, and the replace pattern can use +# them as ${name}. +rewrite: + # a ? means it has query string, everything else means it does not + - key: NGINX_HAS_QUERY_STRING + match: '^\?$' + value: yes + - key: NGINX_HAS_QUERY_STRING + match: '.*' + value: no + + # 'on' means it was HTTPS, everything else means it was not + - key: NGINX_HTTPS + match: '^on$' + value: yes + - key: NGINX_HTTPS + match: '.*' + value: no + + # 'p' means it was pipelined, everything else means it was not + - key: NGINX_PIPELINED + match: '^p$' + value: yes + - key: NGINX_PIPELINED + match: '.*' + value: no + + # zero means client sent a certificate and it was verified, non-zero means otherwise + - key: NGINX_PROXY_PROTOCOL_TLV_SSL_VERIFY + match: '^0$' + value: yes + - key: NGINX_PROXY_PROTOCOL_TLV_SSL_VERIFY + match: '.*' + value: no + + # 'OK' means request completed, everything else means it didn't + - key: NGINX_REQUEST_COMPLETION + match: '^OK$' + value: 'completed' + - key: NGINX_REQUEST_COMPLETION + match: '.*' + value: 'not completed' + + # PRIORTY is a duplicate of NGINX_STATUS + # Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug + - key: PRIORITY + match: '^[123]' + value: 6 + + - key: PRIORITY + match: '^4' + value: 5 + + - key: PRIORITY + match: '^5' + value: 3 + + - key: PRIORITY + match: '.*' + value: 4 + + # NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS + - key: NGINX_STATUS_FAMILY + match: '^(?<first_digit>[1-5])' + value: '${first_digit}xx' + + - key: NGINX_STATUS_FAMILY + match: '.*' + value: 'UNKNOWN' + +# Control what to do when input logs do not match the main PCRE2 pattern. +unmatched: + # The journal key to log the PCRE2 error message to. + # Set this to MESSAGE, so you to see the error in the log. + key: MESSAGE + + # Inject static fields to the unmatched entries. + # Set PRIORITY=1 (alert) to help you spot unmatched entries in the logs. + inject: + - key: PRIORITY + value: 1 diff --git a/collectors/log2journal/log2journal.h b/collectors/log2journal/log2journal.h new file mode 100644 index 000000000..834a5b135 --- /dev/null +++ b/collectors/log2journal/log2journal.h @@ -0,0 +1,501 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_LOG2JOURNAL_H +#define NETDATA_LOG2JOURNAL_H + +// only for PACKAGE_VERSION +#include "config.h" + +#include <stdio.h> +#include <stdlib.h> +#include <dirent.h> +#include <string.h> +#include <stdbool.h> +#include <string.h> +#include <ctype.h> +#include <math.h> +#include <stdarg.h> +#include <assert.h> + +// ---------------------------------------------------------------------------- +// logging + +// enable the compiler to check for printf like errors on our log2stderr() function +static inline void log2stderr(const char *format, ...) __attribute__ ((format(__printf__, 1, 2))); +static inline void log2stderr(const char *format, ...) { + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + fprintf(stderr, "\n"); +} + +// ---------------------------------------------------------------------------- +// allocation functions abstraction + +static inline void *mallocz(size_t size) { + void *ptr = malloc(size); + if (!ptr) { + log2stderr("Fatal Error: Memory allocation failed. Requested size: %zu bytes.", size); + exit(EXIT_FAILURE); + } + return ptr; +} + +static inline void *callocz(size_t elements, size_t size) { + void *ptr = calloc(elements, size); + if (!ptr) { + log2stderr("Fatal Error: Memory allocation failed. Requested size: %zu bytes.", elements * size); + exit(EXIT_FAILURE); + } + return ptr; +} + +static inline void *reallocz(void *ptr, size_t size) { + void *new_ptr = realloc(ptr, size); + if (!new_ptr) { + log2stderr("Fatal Error: Memory reallocation failed. Requested size: %zu bytes.", size); + exit(EXIT_FAILURE); + } + return new_ptr; +} + +static inline char *strdupz(const char *s) { + char *ptr = strdup(s); + if (!ptr) { + log2stderr("Fatal Error: Memory allocation failed in strdup."); + exit(EXIT_FAILURE); + } + return ptr; +} + +static inline char *strndupz(const char *s, size_t n) { + char *ptr = strndup(s, n); + if (!ptr) { + log2stderr("Fatal Error: Memory allocation failed in strndup. Requested size: %zu bytes.", n); + exit(EXIT_FAILURE); + } + return ptr; +} + +static inline void freez(void *ptr) { + if (ptr) + free(ptr); +} + +// ---------------------------------------------------------------------------- + +#define XXH_INLINE_ALL +#include "../../libnetdata/xxhash.h" + +#define PCRE2_CODE_UNIT_WIDTH 8 +#include <pcre2.h> + +#ifdef HAVE_LIBYAML +#include <yaml.h> +#endif + +// ---------------------------------------------------------------------------- +// hashtable for HASHED_KEY + +// cleanup hashtable defines +#undef SIMPLE_HASHTABLE_SORT_FUNCTION +#undef SIMPLE_HASHTABLE_VALUE_TYPE +#undef SIMPLE_HASHTABLE_NAME +#undef NETDATA_SIMPLE_HASHTABLE_H + +struct hashed_key; +static inline int compare_keys(struct hashed_key *k1, struct hashed_key *k2); +#define SIMPLE_HASHTABLE_SORT_FUNCTION compare_keys +#define SIMPLE_HASHTABLE_VALUE_TYPE struct hashed_key +#define SIMPLE_HASHTABLE_NAME _KEY +#include "../../libnetdata/simple_hashtable.h" + +// ---------------------------------------------------------------------------- + +#define MAX_OUTPUT_KEYS 1024 +#define MAX_LINE_LENGTH (1024 * 1024) +#define MAX_INJECTIONS (MAX_OUTPUT_KEYS / 2) +#define MAX_REWRITES (MAX_OUTPUT_KEYS / 2) +#define MAX_RENAMES (MAX_OUTPUT_KEYS / 2) + +#define JOURNAL_MAX_KEY_LEN 64 // according to systemd-journald +#define JOURNAL_MAX_VALUE_LEN (48 * 1024) // according to systemd-journald + +#define LOG2JOURNAL_CONFIG_PATH LIBCONFIG_DIR "/log2journal.d" + +// ---------------------------------------------------------------------------- +// character conversion for journal keys + +extern const char journal_key_characters_map[256]; + +// ---------------------------------------------------------------------------- +// copy to buffer, while ensuring there is no buffer overflow + +static inline size_t copy_to_buffer(char *dst, size_t dst_size, const char *src, size_t src_len) { + if(dst_size < 2) { + if(dst_size == 1) + *dst = '\0'; + + return 0; + } + + if(src_len <= dst_size - 1) { + memcpy(dst, src, src_len); + dst[src_len] = '\0'; + return src_len; + } + else { + memcpy(dst, src, dst_size - 1); + dst[dst_size - 1] = '\0'; + return dst_size - 1; + } +} + +// ---------------------------------------------------------------------------- +// A dynamically sized, reusable text buffer, +// allowing us to be fast (no allocations during iterations) while having the +// smallest possible allocations. + +typedef struct txt { + char *txt; + uint32_t size; + uint32_t len; +} TEXT; + +static inline void txt_cleanup(TEXT *t) { + if(!t) + return; + + if(t->txt) + freez(t->txt); + + t->txt = NULL; + t->size = 0; + t->len = 0; +} + +static inline void txt_replace(TEXT *t, const char *s, size_t len) { + if(!s || !*s || len == 0) { + s = ""; + len = 0; + } + + if(len + 1 <= t->size) { + // the existing value allocation, fits our value + + memcpy(t->txt, s, len); + t->txt[len] = '\0'; + t->len = len; + } + else { + // no existing value allocation, or too small for our value + // cleanup and increase the buffer + + txt_cleanup(t); + + t->txt = strndupz(s, len); + t->size = len + 1; + t->len = len; + } +} + +static inline void txt_expand_and_append(TEXT *t, const char *s, size_t len) { + if(len + 1 > (t->size - t->len)) { + size_t new_size = t->len + len + 1; + if(new_size < t->size * 2) + new_size = t->size * 2; + + t->txt = reallocz(t->txt, new_size); + t->size = new_size; + } + + char *copy_to = &t->txt[t->len]; + memcpy(copy_to, s, len); + copy_to[len] = '\0'; + t->len += len; +} + +// ---------------------------------------------------------------------------- + +typedef enum __attribute__((__packed__)) { + HK_NONE = 0, + + // permanent flags - they are set once to optimize various decisions and lookups + + HK_HASHTABLE_ALLOCATED = (1 << 0), // this is key object allocated in the hashtable + // objects that do not have this, have a pointer to a key in the hashtable + // objects that have this, value a value allocated + + HK_FILTERED = (1 << 1), // we checked once if this key in filtered + HK_FILTERED_INCLUDED = (1 << 2), // the result of the filtering was to include it in the output + + HK_COLLISION_CHECKED = (1 << 3), // we checked once for collision check of this key + + HK_RENAMES_CHECKED = (1 << 4), // we checked once if there are renames on this key + HK_HAS_RENAMES = (1 << 5), // and we found there is a rename rule related to it + + // ephemeral flags - they are unset at the end of each log line + + HK_VALUE_FROM_LOG = (1 << 14), // the value of this key has been read from the log (or from injection, duplication) + HK_VALUE_REWRITTEN = (1 << 15), // the value of this key has been rewritten due to one of our rewrite rules + +} HASHED_KEY_FLAGS; + +typedef struct hashed_key { + const char *key; + uint32_t len; + HASHED_KEY_FLAGS flags; + XXH64_hash_t hash; + union { + struct hashed_key *hashtable_ptr; // HK_HASHTABLE_ALLOCATED is not set + TEXT value; // HK_HASHTABLE_ALLOCATED is set + }; +} HASHED_KEY; + +static inline void hashed_key_cleanup(HASHED_KEY *k) { + if(k->key) { + freez((void *)k->key); + k->key = NULL; + } + + if(k->flags & HK_HASHTABLE_ALLOCATED) + txt_cleanup(&k->value); + else + k->hashtable_ptr = NULL; +} + +static inline void hashed_key_set(HASHED_KEY *k, const char *name) { + hashed_key_cleanup(k); + + k->key = strdupz(name); + k->len = strlen(k->key); + k->hash = XXH3_64bits(k->key, k->len); + k->flags = HK_NONE; +} + +static inline void hashed_key_len_set(HASHED_KEY *k, const char *name, size_t len) { + hashed_key_cleanup(k); + + k->key = strndupz(name, len); + k->len = len; + k->hash = XXH3_64bits(k->key, k->len); + k->flags = HK_NONE; +} + +static inline bool hashed_keys_match(HASHED_KEY *k1, HASHED_KEY *k2) { + return ((k1 == k2) || (k1->hash == k2->hash && strcmp(k1->key, k2->key) == 0)); +} + +static inline int compare_keys(struct hashed_key *k1, struct hashed_key *k2) { + return strcmp(k1->key, k2->key); +} + +// ---------------------------------------------------------------------------- + +typedef struct search_pattern { + const char *pattern; + pcre2_code *re; + pcre2_match_data *match_data; + TEXT error; +} SEARCH_PATTERN; + +void search_pattern_cleanup(SEARCH_PATTERN *sp); +bool search_pattern_set(SEARCH_PATTERN *sp, const char *search_pattern, size_t search_pattern_len); + +static inline bool search_pattern_matches(SEARCH_PATTERN *sp, const char *value, size_t value_len) { + return pcre2_match(sp->re, (PCRE2_SPTR)value, value_len, 0, 0, sp->match_data, NULL) >= 0; +} + +// ---------------------------------------------------------------------------- + +typedef struct replacement_node { + HASHED_KEY name; + bool is_variable; + bool logged_error; + + struct replacement_node *next; +} REPLACE_NODE; + +void replace_node_free(REPLACE_NODE *rpn); + +typedef struct replace_pattern { + const char *pattern; + REPLACE_NODE *nodes; + bool has_variables; +} REPLACE_PATTERN; + +void replace_pattern_cleanup(REPLACE_PATTERN *rp); +bool replace_pattern_set(REPLACE_PATTERN *rp, const char *pattern); + +// ---------------------------------------------------------------------------- + +typedef struct injection { + bool on_unmatched; + HASHED_KEY key; + REPLACE_PATTERN value; +} INJECTION; + +void injection_cleanup(INJECTION *inj); + +// ---------------------------------------------------------------------------- + +typedef struct key_rename { + HASHED_KEY new_key; + HASHED_KEY old_key; +} RENAME; + +void rename_cleanup(RENAME *rn); + +// ---------------------------------------------------------------------------- + +typedef enum __attribute__((__packed__)) { + RW_NONE = 0, + RW_MATCH_PCRE2 = (1 << 1), // a rewrite rule + RW_MATCH_NON_EMPTY = (1 << 2), // a rewrite rule + RW_DONT_STOP = (1 << 3), + RW_INJECT = (1 << 4), +} RW_FLAGS; + +typedef struct key_rewrite { + RW_FLAGS flags; + HASHED_KEY key; + union { + SEARCH_PATTERN match_pcre2; + REPLACE_PATTERN match_non_empty; + }; + REPLACE_PATTERN value; +} REWRITE; + +void rewrite_cleanup(REWRITE *rw); + +// ---------------------------------------------------------------------------- +// A job configuration and runtime structures + +typedef struct log_job { + bool show_config; + + const char *pattern; + const char *prefix; + + SIMPLE_HASHTABLE_KEY hashtable; + + struct { + const char *buffer; + const char *trimmed; + size_t trimmed_len; + size_t size; + HASHED_KEY key; + } line; + + struct { + SEARCH_PATTERN include; + SEARCH_PATTERN exclude; + } filter; + + struct { + bool last_line_was_empty; + HASHED_KEY key; + TEXT current; + } filename; + + struct { + uint32_t used; + INJECTION keys[MAX_INJECTIONS]; + } injections; + + struct { + HASHED_KEY key; + struct { + uint32_t used; + INJECTION keys[MAX_INJECTIONS]; + } injections; + } unmatched; + + struct { + uint32_t used; + REWRITE array[MAX_REWRITES]; + TEXT tmp; + } rewrites; + + struct { + uint32_t used; + RENAME array[MAX_RENAMES]; + } renames; +} LOG_JOB; + +// initialize a log job +void log_job_init(LOG_JOB *jb); + +// free all resources consumed by the log job +void log_job_cleanup(LOG_JOB *jb); + +// ---------------------------------------------------------------------------- + +// the entry point to send key value pairs to the output +// this implements the pipeline of processing renames, rewrites and duplications +void log_job_send_extracted_key_value(LOG_JOB *jb, const char *key, const char *value, size_t len); + +// ---------------------------------------------------------------------------- +// configuration related + +// management of configuration to set settings +bool log_job_filename_key_set(LOG_JOB *jb, const char *key, size_t key_len); +bool log_job_key_prefix_set(LOG_JOB *jb, const char *prefix, size_t prefix_len); +bool log_job_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len); +bool log_job_injection_add(LOG_JOB *jb, const char *key, size_t key_len, const char *value, size_t value_len, bool unmatched); +bool log_job_rewrite_add(LOG_JOB *jb, const char *key, RW_FLAGS flags, const char *search_pattern, const char *replace_pattern); +bool log_job_rename_add(LOG_JOB *jb, const char *new_key, size_t new_key_len, const char *old_key, size_t old_key_len); +bool log_job_include_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len); +bool log_job_exclude_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len); + +// entry point to parse command line parameters +bool log_job_command_line_parse_parameters(LOG_JOB *jb, int argc, char **argv); +void log_job_command_line_help(const char *name); + +// ---------------------------------------------------------------------------- +// YAML configuration related + +#ifdef HAVE_LIBYAML +bool yaml_parse_file(const char *config_file_path, LOG_JOB *jb); +bool yaml_parse_config(const char *config_name, LOG_JOB *jb); +#endif + +void log_job_configuration_to_yaml(LOG_JOB *jb); + +// ---------------------------------------------------------------------------- +// JSON parser + +typedef struct log_json_state LOG_JSON_STATE; +LOG_JSON_STATE *json_parser_create(LOG_JOB *jb); +void json_parser_destroy(LOG_JSON_STATE *js); +const char *json_parser_error(LOG_JSON_STATE *js); +bool json_parse_document(LOG_JSON_STATE *js, const char *txt); +void json_test(void); + +size_t parse_surrogate(const char *s, char *d, size_t *remaining); + +// ---------------------------------------------------------------------------- +// logfmt parser + +typedef struct logfmt_state LOGFMT_STATE; +LOGFMT_STATE *logfmt_parser_create(LOG_JOB *jb); +void logfmt_parser_destroy(LOGFMT_STATE *lfs); +const char *logfmt_parser_error(LOGFMT_STATE *lfs); +bool logfmt_parse_document(LOGFMT_STATE *js, const char *txt); +void logfmt_test(void); + +// ---------------------------------------------------------------------------- +// pcre2 parser + +typedef struct pcre2_state PCRE2_STATE; +PCRE2_STATE *pcre2_parser_create(LOG_JOB *jb); +void pcre2_parser_destroy(PCRE2_STATE *pcre2); +const char *pcre2_parser_error(PCRE2_STATE *pcre2); +bool pcre2_parse_document(PCRE2_STATE *pcre2, const char *txt, size_t len); +bool pcre2_has_error(PCRE2_STATE *pcre2); +void pcre2_test(void); + +void pcre2_get_error_in_buffer(char *msg, size_t msg_len, int rc, int pos); + +#endif //NETDATA_LOG2JOURNAL_H diff --git a/collectors/log2journal/tests.d/default.output b/collectors/log2journal/tests.d/default.output new file mode 100644 index 000000000..ef17cb2c7 --- /dev/null +++ b/collectors/log2journal/tests.d/default.output @@ -0,0 +1,20 @@ +MESSAGE=key1=value01 key2=value02 key3=value03 key4=value04 +PRIORITY=6 +SYSLOG_IDENTIFIER=log2journal + +MESSAGE=key1=value11 key2=value12 key3=value13 key4= +PRIORITY=6 +SYSLOG_IDENTIFIER=log2journal + +MESSAGE=key1=value21 key2=value22 key3=value23 key4=value24 +PRIORITY=6 +SYSLOG_IDENTIFIER=log2journal + +MESSAGE=key1=value31 key2=value32 key3=value33 key4= +PRIORITY=6 +SYSLOG_IDENTIFIER=log2journal + +MESSAGE=key1=value41 key2=value42 key3=value43 key4=value44 +PRIORITY=6 +SYSLOG_IDENTIFIER=log2journal + diff --git a/collectors/log2journal/tests.d/full.output b/collectors/log2journal/tests.d/full.output new file mode 100644 index 000000000..074092d4e --- /dev/null +++ b/collectors/log2journal/tests.d/full.output @@ -0,0 +1,77 @@ +pattern: | + (?x) # Enable PCRE2 extended mode + ^ + (?<NGINX_REMOTE_ADDR>[^ ]+) \s - \s # NGINX_REMOTE_ADDR + (?<NGINX_REMOTE_USER>[^ ]+) \s # NGINX_REMOTE_USER + \[ + (?<NGINX_TIME_LOCAL>[^\]]+) # NGINX_TIME_LOCAL + \] + \s+ " + (?<MESSAGE> + (?<NGINX_METHOD>[A-Z]+) \s+ # NGINX_METHOD + (?<NGINX_URL>[^ ]+) \s+ + HTTP/(?<NGINX_HTTP_VERSION>[^"]+) + ) + " \s+ + (?<NGINX_STATUS>\d+) \s+ # NGINX_STATUS + (?<NGINX_BODY_BYTES_SENT>\d+) \s+ # NGINX_BODY_BYTES_SENT + "(?<NGINX_HTTP_REFERER>[^"]*)" \s+ # NGINX_HTTP_REFERER + "(?<NGINX_HTTP_USER_AGENT>[^"]*)" # NGINX_HTTP_USER_AGENT + +prefix: NGINX_ + +filename: + key: NGINX_LOG_FILENAME + +filter: + include: '.*' + exclude: '.*HELLO.*WORLD.*' + +rename: + - new_key: TEST1 + old_key: TEST2 + - new_key: TEST3 + old_key: TEST4 + +inject: + - key: SYSLOG_IDENTIFIER + value: nginx-log + - key: SYSLOG_IDENTIFIER2 + value: nginx-log2 + - key: PRIORITY + value: '${NGINX_STATUS}' + - key: NGINX_STATUS_FAMILY + value: '${NGINX_STATUS}${NGINX_METHOD}' + +rewrite: + - key: PRIORITY + value: '${NGINX_STATUS}' + inject: yes + stop: no + - key: PRIORITY + match: '^[123]' + value: 6 + - key: PRIORITY + match: '^4' + value: 5 + - key: PRIORITY + match: '^5' + value: 3 + - key: PRIORITY + match: '.*' + value: 4 + - key: NGINX_STATUS_FAMILY + match: '^(?<first_digit>[1-5])' + value: '${first_digit}xx' + - key: NGINX_STATUS_FAMILY + match: '.*' + value: UNKNOWN + +unmatched: + key: MESSAGE + + inject: + - key: PRIORITY + value: 1 + - key: PRIORITY2 + value: 2 diff --git a/collectors/log2journal/tests.d/full.yaml b/collectors/log2journal/tests.d/full.yaml new file mode 100644 index 000000000..86cafb5a2 --- /dev/null +++ b/collectors/log2journal/tests.d/full.yaml @@ -0,0 +1,76 @@ +pattern: | + (?x) # Enable PCRE2 extended mode + ^ + (?<NGINX_REMOTE_ADDR>[^ ]+) \s - \s # NGINX_REMOTE_ADDR + (?<NGINX_REMOTE_USER>[^ ]+) \s # NGINX_REMOTE_USER + \[ + (?<NGINX_TIME_LOCAL>[^\]]+) # NGINX_TIME_LOCAL + \] + \s+ " + (?<MESSAGE> + (?<NGINX_METHOD>[A-Z]+) \s+ # NGINX_METHOD + (?<NGINX_URL>[^ ]+) \s+ + HTTP/(?<NGINX_HTTP_VERSION>[^"]+) + ) + " \s+ + (?<NGINX_STATUS>\d+) \s+ # NGINX_STATUS + (?<NGINX_BODY_BYTES_SENT>\d+) \s+ # NGINX_BODY_BYTES_SENT + "(?<NGINX_HTTP_REFERER>[^"]*)" \s+ # NGINX_HTTP_REFERER + "(?<NGINX_HTTP_USER_AGENT>[^"]*)" # NGINX_HTTP_USER_AGENT + +prefix: NGINX_ + +filename: + key: NGINX_LOG_FILENAME + +filter: + include: '.*' + exclude: '.*HELLO.*WORLD.*' + +rename: + - new_key: TEST1 + old_key: TEST2 + - new_key: TEST3 + old_key: TEST4 + +inject: + - key: SYSLOG_IDENTIFIER + value: 'nginx-log' + - key: SYSLOG_IDENTIFIER2 + value: 'nginx-log2' + - key: PRIORITY + value: '${NGINX_STATUS}' + - key: NGINX_STATUS_FAMILY + value: '${NGINX_STATUS}${NGINX_METHOD}' + +rewrite: + - key: "PRIORITY" + value: "${NGINX_STATUS}" + inject: yes + stop: no + - key: "PRIORITY" + match: "^[123]" + value: 6 + - key: "PRIORITY" + match: "^4" + value: 5 + - key: "PRIORITY" + match: "^5" + value: 3 + - key: "PRIORITY" + match: ".*" + value: 4 + - key: "NGINX_STATUS_FAMILY" + match: "^(?<first_digit>[1-5])" + value: "${first_digit}xx" + - key: "NGINX_STATUS_FAMILY" + match: ".*" + value: "UNKNOWN" + +unmatched: + key: MESSAGE + inject: + - key: PRIORITY + value: 1 + - key: PRIORITY2 + value: 2 diff --git a/collectors/log2journal/tests.d/json-exclude.output b/collectors/log2journal/tests.d/json-exclude.output new file mode 100644 index 000000000..a8f6f83e6 --- /dev/null +++ b/collectors/log2journal/tests.d/json-exclude.output @@ -0,0 +1,153 @@ +ARRAY2_0=1 +ARRAY2_1=-2.345 +ARRAY2_2=Array Element +ARRAY2_3=true +ARRAY2_4=false +ARRAY2_5=null +ARRAY2_6_BOOLEANFALSE=false +ARRAY2_6_BOOLEANTRUE=true +ARRAY2_6_FLOATNEGATIVE=-0.123 +ARRAY2_6_FLOATPOSITIVE=0.987 +ARRAY2_6_NULLVALUE=null +ARRAY2_6_NUMERICNEGATIVE=-456 +ARRAY2_6_NUMERICPOSITIVE=123 +ARRAY2_6_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +ARRAY2_6_SCIENTIFICINTPOSITIVE=6e4 +ARRAY2_6_SCIENTIFICSMALLPOSITIVE=5e-5 +ARRAY2_6_STRING=Nested Object in Array2 +ARRAY2_7_BOOLEANFALSE=false +ARRAY2_7_BOOLEANTRUE=true +ARRAY2_7_FLOATNEGATIVE=-2.71828 +ARRAY2_7_FLOATPOSITIVE=3.14159 +ARRAY2_7_NULLVALUE=null +ARRAY2_7_NUMERICNEGATIVE=-123 +ARRAY2_7_NUMERICPOSITIVE=42 +ARRAY2_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3 +ARRAY2_7_SCIENTIFICINTPOSITIVE=1e5 +ARRAY2_7_SCIENTIFICSMALLPOSITIVE=1e-4 +ARRAY2_7_STRING=Array Element with Object in Array2 +BOOLEANFALSE=false +BOOLEANTRUE=true +FLOATNEGATIVE=-2.71828 +FLOATPOSITIVE=3.14159 +NULLVALUE=null +NUMERICNEGATIVE=-123 +NUMERICPOSITIVE=42 +OBJECT_BOOLEANFALSE=false +OBJECT_BOOLEANTRUE=true +OBJECT_FLOATNEGATIVE=-0.123 +OBJECT_FLOATPOSITIVE=0.987 +OBJECT_NULLVALUE=null +OBJECT_NUMERICNEGATIVE=-456 +OBJECT_NUMERICPOSITIVE=123 +OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +OBJECT_SCIENTIFICINTPOSITIVE=6e4 +OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5 +OBJECT_STRING=Nested Object +SCIENTIFICFLOATNEGATIVE=-2.5e-3 +SCIENTIFICINTPOSITIVE=1e5 +SCIENTIFICSMALLPOSITIVE=1e-4 +STRING=Hello, World! + +ARRAY2_0=1 +ARRAY2_1=-2.345 +ARRAY2_2=Array Element +ARRAY2_3=true +ARRAY2_4=false +ARRAY2_5=null +ARRAY2_6_BOOLEANFALSE=false +ARRAY2_6_BOOLEANTRUE=true +ARRAY2_6_FLOATNEGATIVE=-0.123 +ARRAY2_6_FLOATPOSITIVE=0.987 +ARRAY2_6_NULLVALUE=null +ARRAY2_6_NUMERICNEGATIVE=-456 +ARRAY2_6_NUMERICPOSITIVE=123 +ARRAY2_6_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +ARRAY2_6_SCIENTIFICINTPOSITIVE=6e4 +ARRAY2_6_SCIENTIFICSMALLPOSITIVE=5e-5 +ARRAY2_6_STRING=Nested Object in Array2 +ARRAY2_7_BOOLEANFALSE=false +ARRAY2_7_BOOLEANTRUE=true +ARRAY2_7_FLOATNEGATIVE=-2.71828 +ARRAY2_7_FLOATPOSITIVE=3.14159 +ARRAY2_7_NULLVALUE=null +ARRAY2_7_NUMERICNEGATIVE=-123 +ARRAY2_7_NUMERICPOSITIVE=42 +ARRAY2_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3 +ARRAY2_7_SCIENTIFICINTPOSITIVE=1e5 +ARRAY2_7_SCIENTIFICSMALLPOSITIVE=1e-4 +ARRAY2_7_STRING=Array Element with Object in Array2 +BOOLEANFALSE=false +BOOLEANTRUE=true +FLOATNEGATIVE=-2.71828 +FLOATPOSITIVE=3.14159 +NULLVALUE=null +NUMERICNEGATIVE=-123 +NUMERICPOSITIVE=42 +OBJECT_BOOLEANFALSE=false +OBJECT_BOOLEANTRUE=true +OBJECT_FLOATNEGATIVE=-0.123 +OBJECT_FLOATPOSITIVE=0.987 +OBJECT_NULLVALUE=null +OBJECT_NUMERICNEGATIVE=-456 +OBJECT_NUMERICPOSITIVE=123 +OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +OBJECT_SCIENTIFICINTPOSITIVE=6e4 +OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5 +OBJECT_STRING=Nested Object +SCIENTIFICFLOATNEGATIVE=-2.5e-3 +SCIENTIFICINTPOSITIVE=1e5 +SCIENTIFICSMALLPOSITIVE=1e-4 +STRING=Hello, World! + +ARRAY2_0=1 +ARRAY2_1=-2.345 +ARRAY2_2=Array Element +ARRAY2_3=true +ARRAY2_4=false +ARRAY2_5=null +ARRAY2_6_BOOLEANFALSE=false +ARRAY2_6_BOOLEANTRUE=true +ARRAY2_6_FLOATNEGATIVE=-0.123 +ARRAY2_6_FLOATPOSITIVE=0.987 +ARRAY2_6_NULLVALUE=null +ARRAY2_6_NUMERICNEGATIVE=-456 +ARRAY2_6_NUMERICPOSITIVE=123 +ARRAY2_6_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +ARRAY2_6_SCIENTIFICINTPOSITIVE=6e4 +ARRAY2_6_SCIENTIFICSMALLPOSITIVE=5e-5 +ARRAY2_6_STRING=Nested Object in Array2 +ARRAY2_7_BOOLEANFALSE=false +ARRAY2_7_BOOLEANTRUE=true +ARRAY2_7_FLOATNEGATIVE=-2.71828 +ARRAY2_7_FLOATPOSITIVE=3.14159 +ARRAY2_7_NULLVALUE=null +ARRAY2_7_NUMERICNEGATIVE=-123 +ARRAY2_7_NUMERICPOSITIVE=42 +ARRAY2_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3 +ARRAY2_7_SCIENTIFICINTPOSITIVE=1e5 +ARRAY2_7_SCIENTIFICSMALLPOSITIVE=1e-4 +ARRAY2_7_STRING=Array Element with Object in Array2 +BOOLEANFALSE=false +BOOLEANTRUE=true +FLOATNEGATIVE=-2.71828 +FLOATPOSITIVE=3.14159 +NULLVALUE=null +NUMERICNEGATIVE=-123 +NUMERICPOSITIVE=42 +OBJECT_BOOLEANFALSE=false +OBJECT_BOOLEANTRUE=true +OBJECT_FLOATNEGATIVE=-0.123 +OBJECT_FLOATPOSITIVE=0.987 +OBJECT_NULLVALUE=null +OBJECT_NUMERICNEGATIVE=-456 +OBJECT_NUMERICPOSITIVE=123 +OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +OBJECT_SCIENTIFICINTPOSITIVE=6e4 +OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5 +OBJECT_STRING=Nested Object +SCIENTIFICFLOATNEGATIVE=-2.5e-3 +SCIENTIFICINTPOSITIVE=1e5 +SCIENTIFICSMALLPOSITIVE=1e-4 +STRING=Hello, World! + diff --git a/collectors/log2journal/tests.d/json-include.output b/collectors/log2journal/tests.d/json-include.output new file mode 100644 index 000000000..326c58da2 --- /dev/null +++ b/collectors/log2journal/tests.d/json-include.output @@ -0,0 +1,54 @@ +OBJECT_ARRAY_0=1 +OBJECT_ARRAY_1=-2 +OBJECT_ARRAY_2=3 +OBJECT_ARRAY_3=Nested Array +OBJECT_ARRAY_4=true +OBJECT_ARRAY_5=null +OBJECT_BOOLEANFALSE=false +OBJECT_BOOLEANTRUE=true +OBJECT_FLOATNEGATIVE=-0.123 +OBJECT_FLOATPOSITIVE=0.987 +OBJECT_NULLVALUE=null +OBJECT_NUMERICNEGATIVE=-456 +OBJECT_NUMERICPOSITIVE=123 +OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +OBJECT_SCIENTIFICINTPOSITIVE=6e4 +OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5 +OBJECT_STRING=Nested Object + +OBJECT_ARRAY_0=1 +OBJECT_ARRAY_1=-2 +OBJECT_ARRAY_2=3 +OBJECT_ARRAY_3=Nested Array +OBJECT_ARRAY_4=true +OBJECT_ARRAY_5=null +OBJECT_BOOLEANFALSE=false +OBJECT_BOOLEANTRUE=true +OBJECT_FLOATNEGATIVE=-0.123 +OBJECT_FLOATPOSITIVE=0.987 +OBJECT_NULLVALUE=null +OBJECT_NUMERICNEGATIVE=-456 +OBJECT_NUMERICPOSITIVE=123 +OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +OBJECT_SCIENTIFICINTPOSITIVE=6e4 +OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5 +OBJECT_STRING=Nested Object + +OBJECT_ARRAY_0=1 +OBJECT_ARRAY_1=-2 +OBJECT_ARRAY_2=3 +OBJECT_ARRAY_3=Nested Array +OBJECT_ARRAY_4=true +OBJECT_ARRAY_5=null +OBJECT_BOOLEANFALSE=false +OBJECT_BOOLEANTRUE=true +OBJECT_FLOATNEGATIVE=-0.123 +OBJECT_FLOATPOSITIVE=0.987 +OBJECT_NULLVALUE=null +OBJECT_NUMERICNEGATIVE=-456 +OBJECT_NUMERICPOSITIVE=123 +OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +OBJECT_SCIENTIFICINTPOSITIVE=6e4 +OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5 +OBJECT_STRING=Nested Object + diff --git a/collectors/log2journal/tests.d/json.log b/collectors/log2journal/tests.d/json.log new file mode 100644 index 000000000..3f1334960 --- /dev/null +++ b/collectors/log2journal/tests.d/json.log @@ -0,0 +1,3 @@ +{ "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Hello, World!", "nullValue": null, "object": { "numericPositive": 123, "numericNegative": -456, "floatPositive": 0.987, "floatNegative": -0.123, "scientificIntPositive": 6e4, "scientificFloatNegative": -1.5e-2, "scientificSmallPositive": 5e-5, "booleanTrue": true, "booleanFalse": false, "string": "Nested Object", "nullValue": null, "array": [1, -2, 3, "Nested Array", true, null] }, "array": [ 1, -2.345, "Array Element", true, false, null, { "numericPositive": 987, "numericNegative": -654, "string": "Nested Object in Array", "array": [null, false, true] }, { "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Array Element with Object", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object", true, null] } ], "array2": [ 1, -2.345, "Array Element", true, false, null, { "numericPositive": 123, "numericNegative": -456, "floatPositive": 0.987, "floatNegative": -0.123, "scientificIntPositive": 6e4, "scientificFloatNegative": -1.5e-2, "scientificSmallPositive": 5e-5, "booleanTrue": true, "booleanFalse": false, "string": "Nested Object in Array2", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object2", true, null] }, { "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Array Element with Object in Array2", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object2", true, null]}]} +{ "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Hello, World!", "nullValue": null, "object": { "numericPositive": 123, "numericNegative": -456, "floatPositive": 0.987, "floatNegative": -0.123, "scientificIntPositive": 6e4, "scientificFloatNegative": -1.5e-2, "scientificSmallPositive": 5e-5, "booleanTrue": true, "booleanFalse": false, "string": "Nested Object", "nullValue": null, "array": [1, -2, 3, "Nested Array", true, null] }, "array": [ 1, -2.345, "Array Element", true, false, null, { "numericPositive": 987, "numericNegative": -654, "string": "Nested Object in Array", "array": [null, false, true] }, { "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Array Element with Object", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object", true, null] } ], "array2": [ 1, -2.345, "Array Element", true, false, null, { "numericPositive": 123, "numericNegative": -456, "floatPositive": 0.987, "floatNegative": -0.123, "scientificIntPositive": 6e4, "scientificFloatNegative": -1.5e-2, "scientificSmallPositive": 5e-5, "booleanTrue": true, "booleanFalse": false, "string": "Nested Object in Array2", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object2", true, null] }, { "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Array Element with Object in Array2", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object2", true, null]}]} +{ "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Hello, World!", "nullValue": null, "object": { "numericPositive": 123, "numericNegative": -456, "floatPositive": 0.987, "floatNegative": -0.123, "scientificIntPositive": 6e4, "scientificFloatNegative": -1.5e-2, "scientificSmallPositive": 5e-5, "booleanTrue": true, "booleanFalse": false, "string": "Nested Object", "nullValue": null, "array": [1, -2, 3, "Nested Array", true, null] }, "array": [ 1, -2.345, "Array Element", true, false, null, { "numericPositive": 987, "numericNegative": -654, "string": "Nested Object in Array", "array": [null, false, true] }, { "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Array Element with Object", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object", true, null] } ], "array2": [ 1, -2.345, "Array Element", true, false, null, { "numericPositive": 123, "numericNegative": -456, "floatPositive": 0.987, "floatNegative": -0.123, "scientificIntPositive": 6e4, "scientificFloatNegative": -1.5e-2, "scientificSmallPositive": 5e-5, "booleanTrue": true, "booleanFalse": false, "string": "Nested Object in Array2", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object2", true, null] }, { "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Array Element with Object in Array2", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object2", true, null]}]} diff --git a/collectors/log2journal/tests.d/json.output b/collectors/log2journal/tests.d/json.output new file mode 100644 index 000000000..83499cc55 --- /dev/null +++ b/collectors/log2journal/tests.d/json.output @@ -0,0 +1,294 @@ +ARRAY2_0=1 +ARRAY2_1=-2.345 +ARRAY2_2=Array Element +ARRAY2_3=true +ARRAY2_4=false +ARRAY2_5=null +ARRAY2_6_ARRAY_0=1 +ARRAY2_6_ARRAY_1=-2 +ARRAY2_6_ARRAY_2=3 +ARRAY2_6_ARRAY_3=Nested Array in Object2 +ARRAY2_6_ARRAY_4=true +ARRAY2_6_ARRAY_5=null +ARRAY2_6_BOOLEANFALSE=false +ARRAY2_6_BOOLEANTRUE=true +ARRAY2_6_FLOATNEGATIVE=-0.123 +ARRAY2_6_FLOATPOSITIVE=0.987 +ARRAY2_6_NULLVALUE=null +ARRAY2_6_NUMERICNEGATIVE=-456 +ARRAY2_6_NUMERICPOSITIVE=123 +ARRAY2_6_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +ARRAY2_6_SCIENTIFICINTPOSITIVE=6e4 +ARRAY2_6_SCIENTIFICSMALLPOSITIVE=5e-5 +ARRAY2_6_STRING=Nested Object in Array2 +ARRAY2_7_ARRAY_0=1 +ARRAY2_7_ARRAY_1=-2 +ARRAY2_7_ARRAY_2=3 +ARRAY2_7_ARRAY_3=Nested Array in Object2 +ARRAY2_7_ARRAY_4=true +ARRAY2_7_ARRAY_5=null +ARRAY2_7_BOOLEANFALSE=false +ARRAY2_7_BOOLEANTRUE=true +ARRAY2_7_FLOATNEGATIVE=-2.71828 +ARRAY2_7_FLOATPOSITIVE=3.14159 +ARRAY2_7_NULLVALUE=null +ARRAY2_7_NUMERICNEGATIVE=-123 +ARRAY2_7_NUMERICPOSITIVE=42 +ARRAY2_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3 +ARRAY2_7_SCIENTIFICINTPOSITIVE=1e5 +ARRAY2_7_SCIENTIFICSMALLPOSITIVE=1e-4 +ARRAY2_7_STRING=Array Element with Object in Array2 +ARRAY_0=1 +ARRAY_1=-2.345 +ARRAY_2=Array Element +ARRAY_3=true +ARRAY_4=false +ARRAY_5=null +ARRAY_6_ARRAY_0=null +ARRAY_6_ARRAY_1=false +ARRAY_6_ARRAY_2=true +ARRAY_6_NUMERICNEGATIVE=-654 +ARRAY_6_NUMERICPOSITIVE=987 +ARRAY_6_STRING=Nested Object in Array +ARRAY_7_ARRAY_0=1 +ARRAY_7_ARRAY_1=-2 +ARRAY_7_ARRAY_2=3 +ARRAY_7_ARRAY_3=Nested Array in Object +ARRAY_7_ARRAY_4=true +ARRAY_7_ARRAY_5=null +ARRAY_7_BOOLEANFALSE=false +ARRAY_7_BOOLEANTRUE=true +ARRAY_7_FLOATNEGATIVE=-2.71828 +ARRAY_7_FLOATPOSITIVE=3.14159 +ARRAY_7_NULLVALUE=null +ARRAY_7_NUMERICNEGATIVE=-123 +ARRAY_7_NUMERICPOSITIVE=42 +ARRAY_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3 +ARRAY_7_SCIENTIFICINTPOSITIVE=1e5 +ARRAY_7_SCIENTIFICSMALLPOSITIVE=1e-4 +ARRAY_7_STRING=Array Element with Object +BOOLEANFALSE=false +BOOLEANTRUE=true +FLOATNEGATIVE=-2.71828 +FLOATPOSITIVE=3.14159 +NULLVALUE=null +NUMERICNEGATIVE=-123 +NUMERICPOSITIVE=42 +OBJECT_ARRAY_0=1 +OBJECT_ARRAY_1=-2 +OBJECT_ARRAY_2=3 +OBJECT_ARRAY_3=Nested Array +OBJECT_ARRAY_4=true +OBJECT_ARRAY_5=null +OBJECT_BOOLEANFALSE=false +OBJECT_BOOLEANTRUE=true +OBJECT_FLOATNEGATIVE=-0.123 +OBJECT_FLOATPOSITIVE=0.987 +OBJECT_NULLVALUE=null +OBJECT_NUMERICNEGATIVE=-456 +OBJECT_NUMERICPOSITIVE=123 +OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +OBJECT_SCIENTIFICINTPOSITIVE=6e4 +OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5 +OBJECT_STRING=Nested Object +SCIENTIFICFLOATNEGATIVE=-2.5e-3 +SCIENTIFICINTPOSITIVE=1e5 +SCIENTIFICSMALLPOSITIVE=1e-4 +STRING=Hello, World! + +ARRAY2_0=1 +ARRAY2_1=-2.345 +ARRAY2_2=Array Element +ARRAY2_3=true +ARRAY2_4=false +ARRAY2_5=null +ARRAY2_6_ARRAY_0=1 +ARRAY2_6_ARRAY_1=-2 +ARRAY2_6_ARRAY_2=3 +ARRAY2_6_ARRAY_3=Nested Array in Object2 +ARRAY2_6_ARRAY_4=true +ARRAY2_6_ARRAY_5=null +ARRAY2_6_BOOLEANFALSE=false +ARRAY2_6_BOOLEANTRUE=true +ARRAY2_6_FLOATNEGATIVE=-0.123 +ARRAY2_6_FLOATPOSITIVE=0.987 +ARRAY2_6_NULLVALUE=null +ARRAY2_6_NUMERICNEGATIVE=-456 +ARRAY2_6_NUMERICPOSITIVE=123 +ARRAY2_6_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +ARRAY2_6_SCIENTIFICINTPOSITIVE=6e4 +ARRAY2_6_SCIENTIFICSMALLPOSITIVE=5e-5 +ARRAY2_6_STRING=Nested Object in Array2 +ARRAY2_7_ARRAY_0=1 +ARRAY2_7_ARRAY_1=-2 +ARRAY2_7_ARRAY_2=3 +ARRAY2_7_ARRAY_3=Nested Array in Object2 +ARRAY2_7_ARRAY_4=true +ARRAY2_7_ARRAY_5=null +ARRAY2_7_BOOLEANFALSE=false +ARRAY2_7_BOOLEANTRUE=true +ARRAY2_7_FLOATNEGATIVE=-2.71828 +ARRAY2_7_FLOATPOSITIVE=3.14159 +ARRAY2_7_NULLVALUE=null +ARRAY2_7_NUMERICNEGATIVE=-123 +ARRAY2_7_NUMERICPOSITIVE=42 +ARRAY2_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3 +ARRAY2_7_SCIENTIFICINTPOSITIVE=1e5 +ARRAY2_7_SCIENTIFICSMALLPOSITIVE=1e-4 +ARRAY2_7_STRING=Array Element with Object in Array2 +ARRAY_0=1 +ARRAY_1=-2.345 +ARRAY_2=Array Element +ARRAY_3=true +ARRAY_4=false +ARRAY_5=null +ARRAY_6_ARRAY_0=null +ARRAY_6_ARRAY_1=false +ARRAY_6_ARRAY_2=true +ARRAY_6_NUMERICNEGATIVE=-654 +ARRAY_6_NUMERICPOSITIVE=987 +ARRAY_6_STRING=Nested Object in Array +ARRAY_7_ARRAY_0=1 +ARRAY_7_ARRAY_1=-2 +ARRAY_7_ARRAY_2=3 +ARRAY_7_ARRAY_3=Nested Array in Object +ARRAY_7_ARRAY_4=true +ARRAY_7_ARRAY_5=null +ARRAY_7_BOOLEANFALSE=false +ARRAY_7_BOOLEANTRUE=true +ARRAY_7_FLOATNEGATIVE=-2.71828 +ARRAY_7_FLOATPOSITIVE=3.14159 +ARRAY_7_NULLVALUE=null +ARRAY_7_NUMERICNEGATIVE=-123 +ARRAY_7_NUMERICPOSITIVE=42 +ARRAY_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3 +ARRAY_7_SCIENTIFICINTPOSITIVE=1e5 +ARRAY_7_SCIENTIFICSMALLPOSITIVE=1e-4 +ARRAY_7_STRING=Array Element with Object +BOOLEANFALSE=false +BOOLEANTRUE=true +FLOATNEGATIVE=-2.71828 +FLOATPOSITIVE=3.14159 +NULLVALUE=null +NUMERICNEGATIVE=-123 +NUMERICPOSITIVE=42 +OBJECT_ARRAY_0=1 +OBJECT_ARRAY_1=-2 +OBJECT_ARRAY_2=3 +OBJECT_ARRAY_3=Nested Array +OBJECT_ARRAY_4=true +OBJECT_ARRAY_5=null +OBJECT_BOOLEANFALSE=false +OBJECT_BOOLEANTRUE=true +OBJECT_FLOATNEGATIVE=-0.123 +OBJECT_FLOATPOSITIVE=0.987 +OBJECT_NULLVALUE=null +OBJECT_NUMERICNEGATIVE=-456 +OBJECT_NUMERICPOSITIVE=123 +OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +OBJECT_SCIENTIFICINTPOSITIVE=6e4 +OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5 +OBJECT_STRING=Nested Object +SCIENTIFICFLOATNEGATIVE=-2.5e-3 +SCIENTIFICINTPOSITIVE=1e5 +SCIENTIFICSMALLPOSITIVE=1e-4 +STRING=Hello, World! + +ARRAY2_0=1 +ARRAY2_1=-2.345 +ARRAY2_2=Array Element +ARRAY2_3=true +ARRAY2_4=false +ARRAY2_5=null +ARRAY2_6_ARRAY_0=1 +ARRAY2_6_ARRAY_1=-2 +ARRAY2_6_ARRAY_2=3 +ARRAY2_6_ARRAY_3=Nested Array in Object2 +ARRAY2_6_ARRAY_4=true +ARRAY2_6_ARRAY_5=null +ARRAY2_6_BOOLEANFALSE=false +ARRAY2_6_BOOLEANTRUE=true +ARRAY2_6_FLOATNEGATIVE=-0.123 +ARRAY2_6_FLOATPOSITIVE=0.987 +ARRAY2_6_NULLVALUE=null +ARRAY2_6_NUMERICNEGATIVE=-456 +ARRAY2_6_NUMERICPOSITIVE=123 +ARRAY2_6_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +ARRAY2_6_SCIENTIFICINTPOSITIVE=6e4 +ARRAY2_6_SCIENTIFICSMALLPOSITIVE=5e-5 +ARRAY2_6_STRING=Nested Object in Array2 +ARRAY2_7_ARRAY_0=1 +ARRAY2_7_ARRAY_1=-2 +ARRAY2_7_ARRAY_2=3 +ARRAY2_7_ARRAY_3=Nested Array in Object2 +ARRAY2_7_ARRAY_4=true +ARRAY2_7_ARRAY_5=null +ARRAY2_7_BOOLEANFALSE=false +ARRAY2_7_BOOLEANTRUE=true +ARRAY2_7_FLOATNEGATIVE=-2.71828 +ARRAY2_7_FLOATPOSITIVE=3.14159 +ARRAY2_7_NULLVALUE=null +ARRAY2_7_NUMERICNEGATIVE=-123 +ARRAY2_7_NUMERICPOSITIVE=42 +ARRAY2_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3 +ARRAY2_7_SCIENTIFICINTPOSITIVE=1e5 +ARRAY2_7_SCIENTIFICSMALLPOSITIVE=1e-4 +ARRAY2_7_STRING=Array Element with Object in Array2 +ARRAY_0=1 +ARRAY_1=-2.345 +ARRAY_2=Array Element +ARRAY_3=true +ARRAY_4=false +ARRAY_5=null +ARRAY_6_ARRAY_0=null +ARRAY_6_ARRAY_1=false +ARRAY_6_ARRAY_2=true +ARRAY_6_NUMERICNEGATIVE=-654 +ARRAY_6_NUMERICPOSITIVE=987 +ARRAY_6_STRING=Nested Object in Array +ARRAY_7_ARRAY_0=1 +ARRAY_7_ARRAY_1=-2 +ARRAY_7_ARRAY_2=3 +ARRAY_7_ARRAY_3=Nested Array in Object +ARRAY_7_ARRAY_4=true +ARRAY_7_ARRAY_5=null +ARRAY_7_BOOLEANFALSE=false +ARRAY_7_BOOLEANTRUE=true +ARRAY_7_FLOATNEGATIVE=-2.71828 +ARRAY_7_FLOATPOSITIVE=3.14159 +ARRAY_7_NULLVALUE=null +ARRAY_7_NUMERICNEGATIVE=-123 +ARRAY_7_NUMERICPOSITIVE=42 +ARRAY_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3 +ARRAY_7_SCIENTIFICINTPOSITIVE=1e5 +ARRAY_7_SCIENTIFICSMALLPOSITIVE=1e-4 +ARRAY_7_STRING=Array Element with Object +BOOLEANFALSE=false +BOOLEANTRUE=true +FLOATNEGATIVE=-2.71828 +FLOATPOSITIVE=3.14159 +NULLVALUE=null +NUMERICNEGATIVE=-123 +NUMERICPOSITIVE=42 +OBJECT_ARRAY_0=1 +OBJECT_ARRAY_1=-2 +OBJECT_ARRAY_2=3 +OBJECT_ARRAY_3=Nested Array +OBJECT_ARRAY_4=true +OBJECT_ARRAY_5=null +OBJECT_BOOLEANFALSE=false +OBJECT_BOOLEANTRUE=true +OBJECT_FLOATNEGATIVE=-0.123 +OBJECT_FLOATPOSITIVE=0.987 +OBJECT_NULLVALUE=null +OBJECT_NUMERICNEGATIVE=-456 +OBJECT_NUMERICPOSITIVE=123 +OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2 +OBJECT_SCIENTIFICINTPOSITIVE=6e4 +OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5 +OBJECT_STRING=Nested Object +SCIENTIFICFLOATNEGATIVE=-2.5e-3 +SCIENTIFICINTPOSITIVE=1e5 +SCIENTIFICSMALLPOSITIVE=1e-4 +STRING=Hello, World! + diff --git a/collectors/log2journal/tests.d/logfmt.log b/collectors/log2journal/tests.d/logfmt.log new file mode 100644 index 000000000..e55a83bbb --- /dev/null +++ b/collectors/log2journal/tests.d/logfmt.log @@ -0,0 +1,5 @@ +key1=value01 key2=value02 key3=value03 key4=value04 +key1=value11 key2=value12 key3=value13 key4= +key1=value21 key2=value22 key3=value23 key4=value24 +key1=value31 key2=value32 key3=value33 key4= +key1=value41 key2=value42 key3=value43 key4=value44 diff --git a/collectors/log2journal/tests.d/logfmt.output b/collectors/log2journal/tests.d/logfmt.output new file mode 100644 index 000000000..4291c9665 --- /dev/null +++ b/collectors/log2journal/tests.d/logfmt.output @@ -0,0 +1,37 @@ +INJECTED=Key INJECTED had value 'value01 - value02' and now has this, but only on the first row of the log. +KEY1=value01 +KEY2=value02 +KEY3=value03 +KEY4=value04 +SIMPLE_INJECTION=An unset variable looks like '', while the value of KEY2 is 'value02' +YET_ANOTHER_INJECTION=value01 - value02 - Key INJECTED had value 'value01 - value02' and now has this, but only on the first row of the log. - this should work because inject is yes + +INJECTED=value11 - value12 +KEY1=value11 +KEY2=value12 +KEY3=value13 +SIMPLE_INJECTION=An unset variable looks like '', while the value of KEY2 is 'value12' +YET_ANOTHER_INJECTION=value11 - value12 - value11 - value12 - this should work because inject is yes + +INJECTED=KEY4 has the value 'value24'; it is not empty, so INJECTED has been rewritten. +KEY1=value21 +KEY2=value22 +KEY3=value23 +KEY4=value24 +SIMPLE_INJECTION=An unset variable looks like '', while the value of KEY2 is 'value22' +YET_ANOTHER_INJECTION=value21 - value22 - KEY4 has the value 'value24'; it is not empty, so INJECTED has been rewritten. - this should work because inject is yes + +INJECTED=value31 - value32 +KEY1=value31 +KEY2=value32 +KEY3=value33 +YET_ANOTHER_INJECTION=value31 - value32 - value31 - value32 - this should work because inject is yes + +INJECTED=KEY4 has the value 'value44'; it is not empty, so INJECTED has been rewritten. +KEY1=value41 +KEY2=value42 +KEY3=value43 +KEY4=value44 +SIMPLE_INJECTION=An unset variable looks like '', while the value of KEY2 is 'value42' +YET_ANOTHER_INJECTION=value41 - value42 - KEY4 has the value 'value44'; it is not empty, so INJECTED has been rewritten. - this should work because inject is yes + diff --git a/collectors/log2journal/tests.d/logfmt.yaml b/collectors/log2journal/tests.d/logfmt.yaml new file mode 100644 index 000000000..91e93a71e --- /dev/null +++ b/collectors/log2journal/tests.d/logfmt.yaml @@ -0,0 +1,34 @@ +pattern: logfmt + +inject: + - key: SIMPLE_INJECTION + value: "An unset variable looks like '${this}', while the value of KEY2 is '${KEY2}'" + +rewrite: + - key: INJECTED + value: "${KEY1} - ${KEY2}" + inject: yes + stop: no + + - key: INJECTED + match: '^value01' + value: "Key INJECTED had value '${INJECTED}' and now has this, but only on the first row of the log." + + - key: INJECTED + not_empty: "${KEY4}" + value: "KEY4 has the value '${KEY4}'; it is not empty, so INJECTED has been rewritten." + + - key: INJECTED + match: '^KEY4 has the value' + value: "This value should not appear in the logs, because the previous one matched and stopped the pipeline." + + - key: ANOTHER_INJECTION + value: "${KEY1} - ${KEY2} - ${INJECTED} - should not work because inject is not true amd ANOTHER_INJECTION is not in the log file." + + - key: YET_ANOTHER_INJECTION + value: "${KEY1} - ${KEY2} - ${INJECTED} - this should work because inject is yes" + inject: yes + + - key: SIMPLE_INJECTION + match: "KEY2 is 'value32'" + value: "" # empty, so SIMPLE_INJECTION should not be available on row 3 diff --git a/collectors/log2journal/tests.d/nginx-combined.log b/collectors/log2journal/tests.d/nginx-combined.log new file mode 100644 index 000000000..b0faa81e9 --- /dev/null +++ b/collectors/log2journal/tests.d/nginx-combined.log @@ -0,0 +1,14 @@ +2a02:169:1210::2000 - - [30/Nov/2023:19:35:27 +0000] "GET /api/v1/data?chart=system.net&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775349 HTTP/1.1" 200 4844 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36" +2a02:169:1210::2000 - - [30/Nov/2023:19:35:27 +0000] "OPTIONS /api/v1/data?chart=netdata.clients&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775358 HTTP/1.1" 200 29 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36" +2a02:169:1210::2000 - - [30/Nov/2023:19:35:27 +0000] "OPTIONS /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=out&_=1701372775359 HTTP/1.1" 200 29 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36" +2a02:169:1210::2000 - - [30/Nov/2023:19:35:27 +0000] "OPTIONS /api/v1/data?chart=netdata.requests&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775357 HTTP/1.1" 200 29 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36" +127.0.0.1 - - [30/Nov/2023:19:35:28 +0000] "GET /stub_status HTTP/1.1" 200 120 "-" "Go-http-client/1.1" +2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "GET /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=out&_=1701372775359 HTTP/1.1" 200 1918 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36" +2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "GET /api/v1/data?chart=netdata.requests&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775357 HTTP/1.1" 200 1632 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36" +2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "GET /api/v1/data?chart=netdata.clients&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775358 HTTP/1.1" 200 588 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36" +2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "OPTIONS /api/v1/data?chart=system.cpu&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775360 HTTP/1.1" 200 29 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36" +2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "OPTIONS /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=in&_=1701372775361 HTTP/1.1" 200 29 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36" +2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "GET /api/v1/data?chart=system.cpu&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775360 HTTP/1.1" 200 6085 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36" +2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "GET /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=in&_=1701372775361 HTTP/1.1" 200 1918 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36" +2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "OPTIONS /api/v1/data?chart=system.io&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775362 HTTP/1.1" 200 29 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36" +2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "GET /api/v1/data?chart=system.io&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775362 HTTP/1.1" 200 3503 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36" diff --git a/collectors/log2journal/tests.d/nginx-combined.output b/collectors/log2journal/tests.d/nginx-combined.output new file mode 100644 index 000000000..07fd11014 --- /dev/null +++ b/collectors/log2journal/tests.d/nginx-combined.output @@ -0,0 +1,210 @@ +MESSAGE=GET /api/v1/data?chart=system.net&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775349 HTTP/1.1 +NGINX_BODY_BYTES_SENT=4844 +NGINX_HTTP_REFERER=http://192.168.69.5:19999/ +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36 +NGINX_REMOTE_ADDR=2a02:169:1210::2000 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_URI=/api/v1/data?chart=system.net&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775349 +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:27 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=OPTIONS /api/v1/data?chart=netdata.clients&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775358 HTTP/1.1 +NGINX_BODY_BYTES_SENT=29 +NGINX_HTTP_REFERER=http://192.168.69.5:19999/ +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36 +NGINX_REMOTE_ADDR=2a02:169:1210::2000 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=OPTIONS +NGINX_REQUEST_URI=/api/v1/data?chart=netdata.clients&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775358 +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:27 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=OPTIONS /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=out&_=1701372775359 HTTP/1.1 +NGINX_BODY_BYTES_SENT=29 +NGINX_HTTP_REFERER=http://192.168.69.5:19999/ +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36 +NGINX_REMOTE_ADDR=2a02:169:1210::2000 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=OPTIONS +NGINX_REQUEST_URI=/api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=out&_=1701372775359 +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:27 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=OPTIONS /api/v1/data?chart=netdata.requests&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775357 HTTP/1.1 +NGINX_BODY_BYTES_SENT=29 +NGINX_HTTP_REFERER=http://192.168.69.5:19999/ +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36 +NGINX_REMOTE_ADDR=2a02:169:1210::2000 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=OPTIONS +NGINX_REQUEST_URI=/api/v1/data?chart=netdata.requests&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775357 +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:27 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=GET /stub_status HTTP/1.1 +NGINX_BODY_BYTES_SENT=120 +NGINX_HTTP_REFERER=- +NGINX_HTTP_USER_AGENT=Go-http-client/1.1 +NGINX_REMOTE_ADDR=127.0.0.1 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_URI=/stub_status +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=GET /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=out&_=1701372775359 HTTP/1.1 +NGINX_BODY_BYTES_SENT=1918 +NGINX_HTTP_REFERER=http://192.168.69.5:19999/ +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36 +NGINX_REMOTE_ADDR=2a02:169:1210::2000 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_URI=/api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=out&_=1701372775359 +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=GET /api/v1/data?chart=netdata.requests&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775357 HTTP/1.1 +NGINX_BODY_BYTES_SENT=1632 +NGINX_HTTP_REFERER=http://192.168.69.5:19999/ +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36 +NGINX_REMOTE_ADDR=2a02:169:1210::2000 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_URI=/api/v1/data?chart=netdata.requests&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775357 +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=GET /api/v1/data?chart=netdata.clients&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775358 HTTP/1.1 +NGINX_BODY_BYTES_SENT=588 +NGINX_HTTP_REFERER=http://192.168.69.5:19999/ +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36 +NGINX_REMOTE_ADDR=2a02:169:1210::2000 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_URI=/api/v1/data?chart=netdata.clients&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775358 +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=OPTIONS /api/v1/data?chart=system.cpu&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775360 HTTP/1.1 +NGINX_BODY_BYTES_SENT=29 +NGINX_HTTP_REFERER=http://192.168.69.5:19999/ +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36 +NGINX_REMOTE_ADDR=2a02:169:1210::2000 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=OPTIONS +NGINX_REQUEST_URI=/api/v1/data?chart=system.cpu&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775360 +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=OPTIONS /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=in&_=1701372775361 HTTP/1.1 +NGINX_BODY_BYTES_SENT=29 +NGINX_HTTP_REFERER=http://192.168.69.5:19999/ +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36 +NGINX_REMOTE_ADDR=2a02:169:1210::2000 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=OPTIONS +NGINX_REQUEST_URI=/api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=in&_=1701372775361 +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=GET /api/v1/data?chart=system.cpu&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775360 HTTP/1.1 +NGINX_BODY_BYTES_SENT=6085 +NGINX_HTTP_REFERER=http://192.168.69.5:19999/ +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36 +NGINX_REMOTE_ADDR=2a02:169:1210::2000 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_URI=/api/v1/data?chart=system.cpu&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775360 +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=GET /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=in&_=1701372775361 HTTP/1.1 +NGINX_BODY_BYTES_SENT=1918 +NGINX_HTTP_REFERER=http://192.168.69.5:19999/ +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36 +NGINX_REMOTE_ADDR=2a02:169:1210::2000 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_URI=/api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=in&_=1701372775361 +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=OPTIONS /api/v1/data?chart=system.io&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775362 HTTP/1.1 +NGINX_BODY_BYTES_SENT=29 +NGINX_HTTP_REFERER=http://192.168.69.5:19999/ +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36 +NGINX_REMOTE_ADDR=2a02:169:1210::2000 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=OPTIONS +NGINX_REQUEST_URI=/api/v1/data?chart=system.io&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775362 +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=GET /api/v1/data?chart=system.io&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775362 HTTP/1.1 +NGINX_BODY_BYTES_SENT=3503 +NGINX_HTTP_REFERER=http://192.168.69.5:19999/ +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36 +NGINX_REMOTE_ADDR=2a02:169:1210::2000 +NGINX_REMOTE_USER=- +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_URI=/api/v1/data?chart=system.io&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775362 +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + diff --git a/collectors/log2journal/tests.d/nginx-json.log b/collectors/log2journal/tests.d/nginx-json.log new file mode 100644 index 000000000..7e2b5d5f5 --- /dev/null +++ b/collectors/log2journal/tests.d/nginx-json.log @@ -0,0 +1,9 @@ +{"msec":"1644997905.123","connection":12345,"connection_requests":5,"pid":9876,"request_id":"8f3ebc1e38fbb92f","request_length":345,"remote_addr":"192.168.1.100","remote_user":"john_doe","remote_port":54321,"time_local":"19/Feb/2023:14:15:05 +0000","request":"GET /index.html HTTP/1.1","request_uri":"/index.html?param=value","args":"param=value","status":200,"body_bytes_sent":5432,"bytes_sent":6543,"http_referer":"https://example.com","http_user_agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)","http_x_forwarded_for":"192.168.1.50, 10.0.0.1","host":"example.com","request_time":0.123,"upstream":"10.0.0.2:8080","upstream_connect_time":0.045,"upstream_header_time":0.020,"upstream_response_time":0.058,"upstream_response_length":7890,"upstream_cache_status":"MISS","ssl_protocol":"TLSv1.2","ssl_cipher":"AES256-SHA256","scheme":"https","request_method":"GET","server_protocol":"HTTP/1.1","pipe":".","gzip_ratio":"2.1","http_cf_ray":"abc123def456","geoip_country_code":"US"} +{"msec":"1644997910.789","connection":54321,"connection_requests":10,"pid":5432,"request_id":"4a7bca5e19d3f8e7","request_length":432,"remote_addr":"10.0.0.3","remote_user":"","remote_port":12345,"time_local":"19/Feb/2023:14:15:10 +0000","request":"POST /api/update HTTP/1.1","request_uri":"/api/update","args":"","status":204,"body_bytes_sent":0,"bytes_sent":123,"http_referer":"","http_user_agent":"curl/7.68.0","http_x_forwarded_for":"","host":"api.example.com","request_time":0.032,"upstream":"backend-server-1:8080","upstream_connect_time":0.012,"upstream_header_time":0.020,"upstream_response_time":0.010,"upstream_response_length":0,"upstream_cache_status":"","ssl_protocol":"","ssl_cipher":"","scheme":"http","request_method":"POST","server_protocol":"HTTP/1.1","pipe":"p","gzip_ratio":"","http_cf_ray":"","geoip_country_code":""} +{"msec":"1644997920.456","connection":98765,"connection_requests":15,"pid":1234,"request_id":"63f8ad2c3e1b4090","request_length":567,"remote_addr":"2001:0db8:85a3:0000:0000:8a2e:0370:7334","remote_user":"alice","remote_port":6789,"time_local":"19/Feb/2023:14:15:20 +0000","request":"GET /page?param1=value1¶m2=value2 HTTP/2.0","request_uri":"/page?param1=value1¶m2=value2","args":"param1=value1¶m2=value2","status":404,"body_bytes_sent":0,"bytes_sent":0,"http_referer":"","http_user_agent":"Mozilla/5.0 (Linux; Android 10; Pixel 3)","http_x_forwarded_for":"","host":"example.org","request_time":0.045,"upstream":"","upstream_connect_time":0.0,"upstream_header_time":0.0,"upstream_response_time":0.0,"upstream_response_length":0,"upstream_cache_status":"","ssl_protocol":"","ssl_cipher":"","scheme":"https","request_method":"GET","server_protocol":"HTTP/2.0","pipe":".","gzip_ratio":"","http_cf_ray":"","geoip_country_code":"GB"} +{"msec":"1644997930.987","connection":123,"connection_requests":3,"pid":5678,"request_id":"9e632a5b24c18f76","request_length":234,"remote_addr":"192.168.0.1","remote_user":"jane_doe","remote_port":9876,"time_local":"19/Feb/2023:14:15:30 +0000","request":"PUT /api/update HTTP/1.1","request_uri":"/api/update","args":"","status":500,"body_bytes_sent":543,"bytes_sent":876,"http_referer":"https://example.com/page","http_user_agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)","http_x_forwarded_for":"","host":"api.example.com","request_time":0.123,"upstream":"backend-server-2:8080","upstream_connect_time":0.045,"upstream_header_time":0.020,"upstream_response_time":0.058,"upstream_response_length":7890,"upstream_cache_status":"HIT","ssl_protocol":"TLSv1.2","ssl_cipher":"AES256-SHA256","scheme":"https","request_method":"PUT","server_protocol":"HTTP/1.1","pipe":"p","gzip_ratio":"1.8","http_cf_ray":"xyz789abc123","geoip_country_code":"CA"} +{"msec":"1644997940.234","connection":9876,"connection_requests":8,"pid":4321,"request_id":"1b6c59c8aef7d24a","request_length":456,"remote_addr":"203.0.113.1","remote_user":"","remote_port":5432,"time_local":"19/Feb/2023:14:15:40 +0000","request":"DELETE /api/resource HTTP/2.0","request_uri":"/api/resource","args":"","status":204,"body_bytes_sent":0,"bytes_sent":123,"http_referer":"","http_user_agent":"curl/7.68.0","http_x_forwarded_for":"","host":"api.example.com","request_time":0.032,"upstream":"backend-server-1:8080","upstream_connect_time":0.012,"upstream_header_time":0.020,"upstream_response_time":0.010,"upstream_response_length":0,"upstream_cache_status":"","ssl_protocol":"","ssl_cipher":"","scheme":"http","request_method":"DELETE","server_protocol":"HTTP/2.0","pipe":".","gzip_ratio":"","http_cf_ray":"","geoip_country_code":""} +{"msec":"1644997950.789","connection":5432,"connection_requests":12,"pid":6543,"request_id":"72692d781d0b8a4f","request_length":789,"remote_addr":"198.51.100.2","remote_user":"bob","remote_port":8765,"time_local":"19/Feb/2023:14:15:50 +0000","request":"GET /profile?user=bob HTTP/1.1","request_uri":"/profile?user=bob","args":"user=bob","status":200,"body_bytes_sent":1234,"bytes_sent":2345,"http_referer":"","http_user_agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)","http_x_forwarded_for":"","host":"example.com","request_time":0.065,"upstream":"10.0.0.2:8080","upstream_connect_time":0.045,"upstream_header_time":0.020,"upstream_response_time":0.058,"upstream_response_length":7890,"upstream_cache_status":"MISS","ssl_protocol":"TLSv1.3","ssl_cipher":"AES128-GCM-SHA256","scheme":"https","request_method":"GET","server_protocol":"HTTP/1.1","pipe":"p","gzip_ratio":"","http_cf_ray":"","geoip_country_code":"US"} +{"msec":"1644997960.321","connection":65432,"connection_requests":7,"pid":7890,"request_id":"c3e158d41e75a9d7","request_length":321,"remote_addr":"203.0.113.2","remote_user":"","remote_port":9876,"time_local":"19/Feb/2023:14:15:60 +0000","request":"GET /dashboard HTTP/2.0","request_uri":"/dashboard","args":"","status":301,"body_bytes_sent":0,"bytes_sent":123,"http_referer":"","http_user_agent":"Mozilla/5.0 (Linux; Android 10; Pixel 3)","http_x_forwarded_for":"","host":"dashboard.example.org","request_time":0.032,"upstream":"","upstream_connect_time":0.0,"upstream_header_time":0.0,"upstream_response_time":0.0,"upstream_response_length":0,"upstream_cache_status":"","ssl_protocol":"","ssl_cipher":"","scheme":"https","request_method":"GET","server_protocol":"HTTP/2.0","pipe":".","gzip_ratio":"","http_cf_ray":"","geoip_country_code":""} +{"msec":"1644997970.555","connection":8765,"connection_requests":9,"pid":8765,"request_id":"f9f6e8235de54af4","request_length":654,"remote_addr":"10.0.0.4","remote_user":"","remote_port":12345,"time_local":"19/Feb/2023:14:15:70 +0000","request":"POST /submit-form HTTP/1.1","request_uri":"/submit-form","args":"","status":201,"body_bytes_sent":876,"bytes_sent":987,"http_referer":"","http_user_agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)","http_x_forwarded_for":"","host":"example.com","request_time":0.045,"upstream":"backend-server-3:8080","upstream_connect_time":0.012,"upstream_header_time":0.020,"upstream_response_time":0.010,"upstream_response_length":0,"upstream_cache_status":"","ssl_protocol":"","ssl_cipher":"","scheme":"http","request_method":"POST","server_protocol":"HTTP/1.1","pipe":"p","gzip_ratio":"","http_cf_ray":"","geoip_country_code":""} +{"msec":"1644997980.987","connection":23456,"connection_requests":6,"pid":3456,"request_id":"2ec3e8859e7a406c","request_length":432,"remote_addr":"198.51.100.3","remote_user":"mary","remote_port":5678,"time_local":"19/Feb/2023:14:15:80 +0000","request":"GET /contact HTTP/1.1","request_uri":"/contact","args":"","status":404,"body_bytes_sent":0,"bytes_sent":0,"http_referer":"","http_user_agent":"Mozilla/5.0 (Linux; Android 10; Pixel 3)","http_x_forwarded_for":"","host":"example.org","request_time":0.032,"upstream":"","upstream_connect_time":0.0,"upstream_header_time":0.0,"upstream_response_time":0.0,"upstream_response_length":0,"upstream_cache_status":"","ssl_protocol":"","ssl_cipher":"","scheme":"https","request_method":"GET","server_protocol":"HTTP/1.1","pipe":".","gzip_ratio":"","http_cf_ray":"","geoip_country_code":"FR"} diff --git a/collectors/log2journal/tests.d/nginx-json.output b/collectors/log2journal/tests.d/nginx-json.output new file mode 100644 index 000000000..e7db9dcbd --- /dev/null +++ b/collectors/log2journal/tests.d/nginx-json.output @@ -0,0 +1,296 @@ +MESSAGE=GET /index.html HTTP/1.1 +NGINX_BODY_BYTES_SENT=5432 +NGINX_BYTES_SENT=6543 +NGINX_CONNECTION=12345 +NGINX_CONNECTION_REQUESTS=5 +NGINX_GEOIP_COUNTRY_CODE=US +NGINX_GZIP_RATIO=2.1 +NGINX_HOST=example.com +NGINX_HTTP_CF_RAY=abc123def456 +NGINX_HTTP_REFERER=https://example.com +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) +NGINX_HTTP_X_FORWARDED_FOR=192.168.1.50, 10.0.0.1 +NGINX_PID=9876 +NGINX_PIPELINED=no +NGINX_QUERY_STRING=param=value +NGINX_REMOTE_ADDR=192.168.1.100 +NGINX_REMOTE_PORT=54321 +NGINX_REMOTE_USER=john_doe +NGINX_REQUEST_ID=8f3ebc1e38fbb92f +NGINX_REQUEST_LENGTH=345 +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_TIME=0.123 +NGINX_REQUEST_URI=/index.html?param=value +NGINX_SCHEME=https +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_SSL_CIPHER=AES256-SHA256 +NGINX_SSL_PROTOCOL=TLSv1.2 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIMESTAMP_SEC=1644997905.123 +NGINX_TIME_LOCAL=19/Feb/2023:14:15:05 +0000 +NGINX_UPSTREAM=10.0.0.2:8080 +NGINX_UPSTREAM_CACHE_STATUS=MISS +NGINX_UPSTREAM_CONNECT_TIME=0.045 +NGINX_UPSTREAM_HEADER_TIME=0.020 +NGINX_UPSTREAM_RESPONSE_LENGTH=7890 +NGINX_UPSTREAM_RESPONSE_TIME=0.058 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=POST /api/update HTTP/1.1 +NGINX_BODY_BYTES_SENT=0 +NGINX_BYTES_SENT=123 +NGINX_CONNECTION=54321 +NGINX_CONNECTION_REQUESTS=10 +NGINX_HOST=api.example.com +NGINX_HTTP_USER_AGENT=curl/7.68.0 +NGINX_PID=5432 +NGINX_PIPELINED=yes +NGINX_REMOTE_ADDR=10.0.0.3 +NGINX_REMOTE_PORT=12345 +NGINX_REQUEST_ID=4a7bca5e19d3f8e7 +NGINX_REQUEST_LENGTH=432 +NGINX_REQUEST_METHOD=POST +NGINX_REQUEST_TIME=0.032 +NGINX_REQUEST_URI=/api/update +NGINX_SCHEME=http +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=204 +NGINX_STATUS_FAMILY=2xx +NGINX_TIMESTAMP_SEC=1644997910.789 +NGINX_TIME_LOCAL=19/Feb/2023:14:15:10 +0000 +NGINX_UPSTREAM=backend-server-1:8080 +NGINX_UPSTREAM_CONNECT_TIME=0.012 +NGINX_UPSTREAM_HEADER_TIME=0.020 +NGINX_UPSTREAM_RESPONSE_LENGTH=0 +NGINX_UPSTREAM_RESPONSE_TIME=0.010 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=GET /page?param1=value1¶m2=value2 HTTP/2.0 +NGINX_BODY_BYTES_SENT=0 +NGINX_BYTES_SENT=0 +NGINX_CONNECTION=98765 +NGINX_CONNECTION_REQUESTS=15 +NGINX_GEOIP_COUNTRY_CODE=GB +NGINX_HOST=example.org +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Linux; Android 10; Pixel 3) +NGINX_PID=1234 +NGINX_PIPELINED=no +NGINX_QUERY_STRING=param1=value1¶m2=value2 +NGINX_REMOTE_ADDR=2001:0db8:85a3:0000:0000:8a2e:0370:7334 +NGINX_REMOTE_PORT=6789 +NGINX_REMOTE_USER=alice +NGINX_REQUEST_ID=63f8ad2c3e1b4090 +NGINX_REQUEST_LENGTH=567 +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_TIME=0.045 +NGINX_REQUEST_URI=/page?param1=value1¶m2=value2 +NGINX_SCHEME=https +NGINX_SERVER_PROTOCOL=HTTP/2.0 +NGINX_STATUS=404 +NGINX_STATUS_FAMILY=4xx +NGINX_TIMESTAMP_SEC=1644997920.456 +NGINX_TIME_LOCAL=19/Feb/2023:14:15:20 +0000 +NGINX_UPSTREAM_CONNECT_TIME=0.0 +NGINX_UPSTREAM_HEADER_TIME=0.0 +NGINX_UPSTREAM_RESPONSE_LENGTH=0 +NGINX_UPSTREAM_RESPONSE_TIME=0.0 +PRIORITY=5 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=PUT /api/update HTTP/1.1 +NGINX_BODY_BYTES_SENT=543 +NGINX_BYTES_SENT=876 +NGINX_CONNECTION=123 +NGINX_CONNECTION_REQUESTS=3 +NGINX_GEOIP_COUNTRY_CODE=CA +NGINX_GZIP_RATIO=1.8 +NGINX_HOST=api.example.com +NGINX_HTTP_CF_RAY=xyz789abc123 +NGINX_HTTP_REFERER=https://example.com/page +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) +NGINX_PID=5678 +NGINX_PIPELINED=yes +NGINX_REMOTE_ADDR=192.168.0.1 +NGINX_REMOTE_PORT=9876 +NGINX_REMOTE_USER=jane_doe +NGINX_REQUEST_ID=9e632a5b24c18f76 +NGINX_REQUEST_LENGTH=234 +NGINX_REQUEST_METHOD=PUT +NGINX_REQUEST_TIME=0.123 +NGINX_REQUEST_URI=/api/update +NGINX_SCHEME=https +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_SSL_CIPHER=AES256-SHA256 +NGINX_SSL_PROTOCOL=TLSv1.2 +NGINX_STATUS=500 +NGINX_STATUS_FAMILY=5xx +NGINX_TIMESTAMP_SEC=1644997930.987 +NGINX_TIME_LOCAL=19/Feb/2023:14:15:30 +0000 +NGINX_UPSTREAM=backend-server-2:8080 +NGINX_UPSTREAM_CACHE_STATUS=HIT +NGINX_UPSTREAM_CONNECT_TIME=0.045 +NGINX_UPSTREAM_HEADER_TIME=0.020 +NGINX_UPSTREAM_RESPONSE_LENGTH=7890 +NGINX_UPSTREAM_RESPONSE_TIME=0.058 +PRIORITY=3 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=DELETE /api/resource HTTP/2.0 +NGINX_BODY_BYTES_SENT=0 +NGINX_BYTES_SENT=123 +NGINX_CONNECTION=9876 +NGINX_CONNECTION_REQUESTS=8 +NGINX_HOST=api.example.com +NGINX_HTTP_USER_AGENT=curl/7.68.0 +NGINX_PID=4321 +NGINX_PIPELINED=no +NGINX_REMOTE_ADDR=203.0.113.1 +NGINX_REMOTE_PORT=5432 +NGINX_REQUEST_ID=1b6c59c8aef7d24a +NGINX_REQUEST_LENGTH=456 +NGINX_REQUEST_METHOD=DELETE +NGINX_REQUEST_TIME=0.032 +NGINX_REQUEST_URI=/api/resource +NGINX_SCHEME=http +NGINX_SERVER_PROTOCOL=HTTP/2.0 +NGINX_STATUS=204 +NGINX_STATUS_FAMILY=2xx +NGINX_TIMESTAMP_SEC=1644997940.234 +NGINX_TIME_LOCAL=19/Feb/2023:14:15:40 +0000 +NGINX_UPSTREAM=backend-server-1:8080 +NGINX_UPSTREAM_CONNECT_TIME=0.012 +NGINX_UPSTREAM_HEADER_TIME=0.020 +NGINX_UPSTREAM_RESPONSE_LENGTH=0 +NGINX_UPSTREAM_RESPONSE_TIME=0.010 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=GET /profile?user=bob HTTP/1.1 +NGINX_BODY_BYTES_SENT=1234 +NGINX_BYTES_SENT=2345 +NGINX_CONNECTION=5432 +NGINX_CONNECTION_REQUESTS=12 +NGINX_GEOIP_COUNTRY_CODE=US +NGINX_HOST=example.com +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) +NGINX_PID=6543 +NGINX_PIPELINED=yes +NGINX_QUERY_STRING=user=bob +NGINX_REMOTE_ADDR=198.51.100.2 +NGINX_REMOTE_PORT=8765 +NGINX_REMOTE_USER=bob +NGINX_REQUEST_ID=72692d781d0b8a4f +NGINX_REQUEST_LENGTH=789 +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_TIME=0.065 +NGINX_REQUEST_URI=/profile?user=bob +NGINX_SCHEME=https +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_SSL_CIPHER=AES128-GCM-SHA256 +NGINX_SSL_PROTOCOL=TLSv1.3 +NGINX_STATUS=200 +NGINX_STATUS_FAMILY=2xx +NGINX_TIMESTAMP_SEC=1644997950.789 +NGINX_TIME_LOCAL=19/Feb/2023:14:15:50 +0000 +NGINX_UPSTREAM=10.0.0.2:8080 +NGINX_UPSTREAM_CACHE_STATUS=MISS +NGINX_UPSTREAM_CONNECT_TIME=0.045 +NGINX_UPSTREAM_HEADER_TIME=0.020 +NGINX_UPSTREAM_RESPONSE_LENGTH=7890 +NGINX_UPSTREAM_RESPONSE_TIME=0.058 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=GET /dashboard HTTP/2.0 +NGINX_BODY_BYTES_SENT=0 +NGINX_BYTES_SENT=123 +NGINX_CONNECTION=65432 +NGINX_CONNECTION_REQUESTS=7 +NGINX_HOST=dashboard.example.org +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Linux; Android 10; Pixel 3) +NGINX_PID=7890 +NGINX_PIPELINED=no +NGINX_REMOTE_ADDR=203.0.113.2 +NGINX_REMOTE_PORT=9876 +NGINX_REQUEST_ID=c3e158d41e75a9d7 +NGINX_REQUEST_LENGTH=321 +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_TIME=0.032 +NGINX_REQUEST_URI=/dashboard +NGINX_SCHEME=https +NGINX_SERVER_PROTOCOL=HTTP/2.0 +NGINX_STATUS=301 +NGINX_STATUS_FAMILY=3xx +NGINX_TIMESTAMP_SEC=1644997960.321 +NGINX_TIME_LOCAL=19/Feb/2023:14:15:60 +0000 +NGINX_UPSTREAM_CONNECT_TIME=0.0 +NGINX_UPSTREAM_HEADER_TIME=0.0 +NGINX_UPSTREAM_RESPONSE_LENGTH=0 +NGINX_UPSTREAM_RESPONSE_TIME=0.0 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=POST /submit-form HTTP/1.1 +NGINX_BODY_BYTES_SENT=876 +NGINX_BYTES_SENT=987 +NGINX_CONNECTION=8765 +NGINX_CONNECTION_REQUESTS=9 +NGINX_HOST=example.com +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) +NGINX_PID=8765 +NGINX_PIPELINED=yes +NGINX_REMOTE_ADDR=10.0.0.4 +NGINX_REMOTE_PORT=12345 +NGINX_REQUEST_ID=f9f6e8235de54af4 +NGINX_REQUEST_LENGTH=654 +NGINX_REQUEST_METHOD=POST +NGINX_REQUEST_TIME=0.045 +NGINX_REQUEST_URI=/submit-form +NGINX_SCHEME=http +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=201 +NGINX_STATUS_FAMILY=2xx +NGINX_TIMESTAMP_SEC=1644997970.555 +NGINX_TIME_LOCAL=19/Feb/2023:14:15:70 +0000 +NGINX_UPSTREAM=backend-server-3:8080 +NGINX_UPSTREAM_CONNECT_TIME=0.012 +NGINX_UPSTREAM_HEADER_TIME=0.020 +NGINX_UPSTREAM_RESPONSE_LENGTH=0 +NGINX_UPSTREAM_RESPONSE_TIME=0.010 +PRIORITY=6 +SYSLOG_IDENTIFIER=nginx-log + +MESSAGE=GET /contact HTTP/1.1 +NGINX_BODY_BYTES_SENT=0 +NGINX_BYTES_SENT=0 +NGINX_CONNECTION=23456 +NGINX_CONNECTION_REQUESTS=6 +NGINX_GEOIP_COUNTRY_CODE=FR +NGINX_HOST=example.org +NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Linux; Android 10; Pixel 3) +NGINX_PID=3456 +NGINX_PIPELINED=no +NGINX_REMOTE_ADDR=198.51.100.3 +NGINX_REMOTE_PORT=5678 +NGINX_REMOTE_USER=mary +NGINX_REQUEST_ID=2ec3e8859e7a406c +NGINX_REQUEST_LENGTH=432 +NGINX_REQUEST_METHOD=GET +NGINX_REQUEST_TIME=0.032 +NGINX_REQUEST_URI=/contact +NGINX_SCHEME=https +NGINX_SERVER_PROTOCOL=HTTP/1.1 +NGINX_STATUS=404 +NGINX_STATUS_FAMILY=4xx +NGINX_TIMESTAMP_SEC=1644997980.987 +NGINX_TIME_LOCAL=19/Feb/2023:14:15:80 +0000 +NGINX_UPSTREAM_CONNECT_TIME=0.0 +NGINX_UPSTREAM_HEADER_TIME=0.0 +NGINX_UPSTREAM_RESPONSE_LENGTH=0 +NGINX_UPSTREAM_RESPONSE_TIME=0.0 +PRIORITY=5 +SYSLOG_IDENTIFIER=nginx-log + diff --git a/collectors/log2journal/tests.sh b/collectors/log2journal/tests.sh new file mode 100755 index 000000000..402438866 --- /dev/null +++ b/collectors/log2journal/tests.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash + +if [ -f "${PWD}/log2journal" ]; then + log2journal_bin="${PWD}/log2journal" +else + log2journal_bin="$(which log2journal)" +fi + +[ -z "${log2journal_bin}" ] && echo >&2 "Cannot find log2journal binary" && exit 1 +echo >&2 "Using: ${log2journal_bin}" + +script_dir=$(dirname "$(readlink -f "$0")") +tests="${script_dir}/tests.d" + +if [ ! -d "${tests}" ]; then + echo >&2 "tests directory '${tests}' is not found." + exit 1 +fi + +# Create a random directory name in /tmp +tmp=$(mktemp -d /tmp/script_temp.XXXXXXXXXX) + +# Function to clean up the temporary directory on exit +cleanup() { + echo "Cleaning up..." + rm -rf "$tmp" +} + +# Register the cleanup function to run on script exit +trap cleanup EXIT + +# Change to the temporary directory +cd "$tmp" || exit 1 + +# ----------------------------------------------------------------------------- + +test_log2journal_config() { + local in="${1}" + local out="${2}" + shift 2 + + [ -f output ] && rm output + + printf >&2 "running: " + printf >&2 "%q " "${log2journal_bin}" "${@}" + printf >&2 "\n" + + "${log2journal_bin}" <"${in}" "${@}" >output 2>&1 + ret=$? + + [ $ret -ne 0 ] && echo >&2 "${log2journal_bin} exited with code: $ret" && cat output && exit 1 + + diff --ignore-all-space "${out}" output + [ $? -ne -0 ] && echo >&2 "${log2journal_bin} output does not match!" && exit 1 + + echo >&2 "OK" + echo >&2 + + return 0 +} + +# test yaml parsing +echo >&2 +echo >&2 "Testing full yaml config parsing..." +test_log2journal_config /dev/null "${tests}/full.output" -f "${tests}/full.yaml" --show-config || exit 1 + +echo >&2 "Testing command line parsing..." +test_log2journal_config /dev/null "${tests}/full.output" --show-config \ + --prefix=NGINX_ \ + --filename-key NGINX_LOG_FILENAME \ + --inject SYSLOG_IDENTIFIER=nginx-log \ + --inject=SYSLOG_IDENTIFIER2=nginx-log2 \ + --inject 'PRIORITY=${NGINX_STATUS}' \ + --inject='NGINX_STATUS_FAMILY=${NGINX_STATUS}${NGINX_METHOD}' \ + --rewrite 'PRIORITY=//${NGINX_STATUS}/inject,dont-stop' \ + --rewrite "PRIORITY=/^[123]/6" \ + --rewrite='PRIORITY=|^4|5' \ + '--rewrite=PRIORITY=-^5-3' \ + --rewrite "PRIORITY=;.*;4" \ + --rewrite 'NGINX_STATUS_FAMILY=|^(?<first_digit>[1-5])|${first_digit}xx' \ + --rewrite 'NGINX_STATUS_FAMILY=|.*|UNKNOWN' \ + --rename TEST1=TEST2 \ + --rename=TEST3=TEST4 \ + --unmatched-key MESSAGE \ + --inject-unmatched PRIORITY=1 \ + --inject-unmatched=PRIORITY2=2 \ + --include=".*" \ + --exclude ".*HELLO.*WORLD.*" \ + '(?x) # Enable PCRE2 extended mode + ^ + (?<NGINX_REMOTE_ADDR>[^ ]+) \s - \s # NGINX_REMOTE_ADDR + (?<NGINX_REMOTE_USER>[^ ]+) \s # NGINX_REMOTE_USER + \[ + (?<NGINX_TIME_LOCAL>[^\]]+) # NGINX_TIME_LOCAL + \] + \s+ " + (?<MESSAGE> + (?<NGINX_METHOD>[A-Z]+) \s+ # NGINX_METHOD + (?<NGINX_URL>[^ ]+) \s+ + HTTP/(?<NGINX_HTTP_VERSION>[^"]+) + ) + " \s+ + (?<NGINX_STATUS>\d+) \s+ # NGINX_STATUS + (?<NGINX_BODY_BYTES_SENT>\d+) \s+ # NGINX_BODY_BYTES_SENT + "(?<NGINX_HTTP_REFERER>[^"]*)" \s+ # NGINX_HTTP_REFERER + "(?<NGINX_HTTP_USER_AGENT>[^"]*)" # NGINX_HTTP_USER_AGENT' \ + || exit 1 + +# ----------------------------------------------------------------------------- + +test_log2journal() { + local n="${1}" + local in="${2}" + local out="${3}" + shift 3 + + printf >&2 "running test No ${n}: " + printf >&2 "%q " "${log2journal_bin}" "${@}" + printf >&2 "\n" + echo >&2 "using as input : ${in}" + echo >&2 "expecting output: ${out}" + + [ -f output ] && rm output + + "${log2journal_bin}" <"${in}" "${@}" >output 2>&1 + ret=$? + + [ $ret -ne 0 ] && echo >&2 "${log2journal_bin} exited with code: $ret" && cat output && exit 1 + + diff "${out}" output + [ $? -ne -0 ] && echo >&2 "${log2journal_bin} output does not match! - here is what we got:" && cat output && exit 1 + + echo >&2 "OK" + echo >&2 + + return 0 +} + +echo >&2 +echo >&2 "Testing parsing and output..." + +test_log2journal 1 "${tests}/json.log" "${tests}/json.output" json +test_log2journal 2 "${tests}/json.log" "${tests}/json-include.output" json --include "OBJECT" +test_log2journal 3 "${tests}/json.log" "${tests}/json-exclude.output" json --exclude "ARRAY[^2]" +test_log2journal 4 "${tests}/nginx-json.log" "${tests}/nginx-json.output" -f "${script_dir}/log2journal.d/nginx-json.yaml" +test_log2journal 5 "${tests}/nginx-combined.log" "${tests}/nginx-combined.output" -f "${script_dir}/log2journal.d/nginx-combined.yaml" +test_log2journal 6 "${tests}/logfmt.log" "${tests}/logfmt.output" -f "${tests}/logfmt.yaml" +test_log2journal 7 "${tests}/logfmt.log" "${tests}/default.output" -f "${script_dir}/log2journal.d/default.yaml" diff --git a/collectors/macos.plugin/integrations/macos.md b/collectors/macos.plugin/integrations/macos.md index e5c54c3dc..5128a5a77 100644 --- a/collectors/macos.plugin/integrations/macos.md +++ b/collectors/macos.plugin/integrations/macos.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/macos.plug sidebar_label: "macOS" learn_status: "Published" learn_rel_path: "Data Collection/macOS Systems" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -207,44 +208,44 @@ There are three sections in the file which you can configure: | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| enable load average | Enable or disable monitoring of load average metrics (load1, load5, load15). | yes | False | -| system swap | Enable or disable monitoring of system swap metrics (free, used). | yes | False | -| bandwidth | Enable or disable monitoring of network bandwidth metrics (received, sent). | yes | False | -| ipv4 TCP packets | Enable or disable monitoring of IPv4 TCP total packets metrics (received, sent). | yes | False | -| ipv4 TCP errors | Enable or disable monitoring of IPv4 TCP packets metrics (Input Errors, Checksum, Retransmission segments). | yes | False | -| ipv4 TCP handshake issues | Enable or disable monitoring of IPv4 TCP handshake metrics (Established Resets, Active Opens, Passive Opens, Attempt Fails). | yes | False | -| ECN packets | Enable or disable monitoring of ECN statistics metrics (InCEPkts, InNoECTPkts). | auto | False | -| TCP SYN cookies | Enable or disable monitoring of TCP SYN cookies metrics (received, sent, failed). | auto | False | -| TCP out-of-order queue | Enable or disable monitoring of TCP out-of-order queue metrics (inqueue). | auto | False | -| TCP connection aborts | Enable or disable monitoring of TCP connection aborts metrics (Bad Data, User closed, No memory, Timeout). | auto | False | -| ipv4 UDP packets | Enable or disable monitoring of ipv4 UDP packets metrics (sent, received.). | yes | False | -| ipv4 UDP errors | Enable or disable monitoring of ipv4 UDP errors metrics (Recieved Buffer error, Input Errors, No Ports, IN Checksum Errors, Ignore Multi). | yes | False | -| ipv4 icmp packets | Enable or disable monitoring of IPv4 ICMP packets metrics (sent, received, in error, OUT error, IN Checksum error). | yes | False | -| ipv4 icmp messages | Enable or disable monitoring of ipv4 ICMP messages metrics (I/O messages, I/O Errors, In Checksum). | yes | False | -| ipv4 packets | Enable or disable monitoring of ipv4 packets metrics (received, sent, forwarded, delivered). | yes | False | -| ipv4 fragments sent | Enable or disable monitoring of IPv4 fragments sent metrics (ok, fails, creates). | yes | False | -| ipv4 fragments assembly | Enable or disable monitoring of IPv4 fragments assembly metrics (ok, failed, all). | yes | False | -| ipv4 errors | Enable or disable monitoring of IPv4 errors metrics (I/O discard, I/O HDR errors, In Addr errors, In Unknown protos, OUT No Routes). | yes | False | -| ipv6 packets | Enable or disable monitoring of IPv6 packets metrics (received, sent, forwarded, delivered). | auto | False | -| ipv6 fragments sent | Enable or disable monitoring of IPv6 fragments sent metrics (ok, failed, all). | auto | False | -| ipv6 fragments assembly | Enable or disable monitoring of IPv6 fragments assembly metrics (ok, failed, timeout, all). | auto | False | -| ipv6 errors | Enable or disable monitoring of IPv6 errors metrics (I/O Discards, In Hdr Errors, In Addr Errors, In Truncaedd Packets, I/O No Routes). | auto | False | -| icmp | Enable or disable monitoring of ICMP metrics (sent, received). | auto | False | -| icmp redirects | Enable or disable monitoring of ICMP redirects metrics (received, sent). | auto | False | -| icmp errors | Enable or disable monitoring of ICMP metrics (I/O Errors, In Checksums, In Destination Unreachable, In Packet too big, In Time Exceeds, In Parm Problem, Out Dest Unreachable, Out Timee Exceeds, Out Parm Problems.). | auto | False | -| icmp echos | Enable or disable monitoring of ICMP echos metrics (I/O Echos, I/O Echo Reply). | auto | False | -| icmp router | Enable or disable monitoring of ICMP router metrics (I/O Solicits, I/O Advertisements). | auto | False | -| icmp neighbor | Enable or disable monitoring of ICMP neighbor metrics (I/O Solicits, I/O Advertisements). | auto | False | -| icmp types | Enable or disable monitoring of ICMP types metrics (I/O Type1, I/O Type128, I/O Type129, Out Type133, Out Type135, In Type136, Out Type145). | auto | False | -| space usage for all disks | Enable or disable monitoring of space usage for all disks metrics (available, used, reserved for root). | yes | False | -| inodes usage for all disks | Enable or disable monitoring of inodes usage for all disks metrics (available, used, reserved for root). | yes | False | -| bandwidth | Enable or disable monitoring of bandwidth metrics (received, sent). | yes | False | -| system uptime | Enable or disable monitoring of system uptime metrics (uptime). | yes | False | -| cpu utilization | Enable or disable monitoring of CPU utilization metrics (user, nice, system, idel). | yes | False | -| system ram | Enable or disable monitoring of system RAM metrics (Active, Wired, throttled, compressor, inactive, purgeable, speculative, free). | yes | False | -| swap i/o | Enable or disable monitoring of SWAP I/O metrics (I/O Swap). | yes | False | -| memory page faults | Enable or disable monitoring of memory page faults metrics (memory, cow, I/O page, compress, decompress, zero fill, reactivate, purge). | yes | False | -| disk i/o | Enable or disable monitoring of disk I/O metrics (In, Out). | yes | False | +| enable load average | Enable or disable monitoring of load average metrics (load1, load5, load15). | yes | no | +| system swap | Enable or disable monitoring of system swap metrics (free, used). | yes | no | +| bandwidth | Enable or disable monitoring of network bandwidth metrics (received, sent). | yes | no | +| ipv4 TCP packets | Enable or disable monitoring of IPv4 TCP total packets metrics (received, sent). | yes | no | +| ipv4 TCP errors | Enable or disable monitoring of IPv4 TCP packets metrics (Input Errors, Checksum, Retransmission segments). | yes | no | +| ipv4 TCP handshake issues | Enable or disable monitoring of IPv4 TCP handshake metrics (Established Resets, Active Opens, Passive Opens, Attempt Fails). | yes | no | +| ECN packets | Enable or disable monitoring of ECN statistics metrics (InCEPkts, InNoECTPkts). | auto | no | +| TCP SYN cookies | Enable or disable monitoring of TCP SYN cookies metrics (received, sent, failed). | auto | no | +| TCP out-of-order queue | Enable or disable monitoring of TCP out-of-order queue metrics (inqueue). | auto | no | +| TCP connection aborts | Enable or disable monitoring of TCP connection aborts metrics (Bad Data, User closed, No memory, Timeout). | auto | no | +| ipv4 UDP packets | Enable or disable monitoring of ipv4 UDP packets metrics (sent, received.). | yes | no | +| ipv4 UDP errors | Enable or disable monitoring of ipv4 UDP errors metrics (Recieved Buffer error, Input Errors, No Ports, IN Checksum Errors, Ignore Multi). | yes | no | +| ipv4 icmp packets | Enable or disable monitoring of IPv4 ICMP packets metrics (sent, received, in error, OUT error, IN Checksum error). | yes | no | +| ipv4 icmp messages | Enable or disable monitoring of ipv4 ICMP messages metrics (I/O messages, I/O Errors, In Checksum). | yes | no | +| ipv4 packets | Enable or disable monitoring of ipv4 packets metrics (received, sent, forwarded, delivered). | yes | no | +| ipv4 fragments sent | Enable or disable monitoring of IPv4 fragments sent metrics (ok, fails, creates). | yes | no | +| ipv4 fragments assembly | Enable or disable monitoring of IPv4 fragments assembly metrics (ok, failed, all). | yes | no | +| ipv4 errors | Enable or disable monitoring of IPv4 errors metrics (I/O discard, I/O HDR errors, In Addr errors, In Unknown protos, OUT No Routes). | yes | no | +| ipv6 packets | Enable or disable monitoring of IPv6 packets metrics (received, sent, forwarded, delivered). | auto | no | +| ipv6 fragments sent | Enable or disable monitoring of IPv6 fragments sent metrics (ok, failed, all). | auto | no | +| ipv6 fragments assembly | Enable or disable monitoring of IPv6 fragments assembly metrics (ok, failed, timeout, all). | auto | no | +| ipv6 errors | Enable or disable monitoring of IPv6 errors metrics (I/O Discards, In Hdr Errors, In Addr Errors, In Truncaedd Packets, I/O No Routes). | auto | no | +| icmp | Enable or disable monitoring of ICMP metrics (sent, received). | auto | no | +| icmp redirects | Enable or disable monitoring of ICMP redirects metrics (received, sent). | auto | no | +| icmp errors | Enable or disable monitoring of ICMP metrics (I/O Errors, In Checksums, In Destination Unreachable, In Packet too big, In Time Exceeds, In Parm Problem, Out Dest Unreachable, Out Timee Exceeds, Out Parm Problems.). | auto | no | +| icmp echos | Enable or disable monitoring of ICMP echos metrics (I/O Echos, I/O Echo Reply). | auto | no | +| icmp router | Enable or disable monitoring of ICMP router metrics (I/O Solicits, I/O Advertisements). | auto | no | +| icmp neighbor | Enable or disable monitoring of ICMP neighbor metrics (I/O Solicits, I/O Advertisements). | auto | no | +| icmp types | Enable or disable monitoring of ICMP types metrics (I/O Type1, I/O Type128, I/O Type129, Out Type133, Out Type135, In Type136, Out Type145). | auto | no | +| space usage for all disks | Enable or disable monitoring of space usage for all disks metrics (available, used, reserved for root). | yes | no | +| inodes usage for all disks | Enable or disable monitoring of inodes usage for all disks metrics (available, used, reserved for root). | yes | no | +| bandwidth | Enable or disable monitoring of bandwidth metrics (received, sent). | yes | no | +| system uptime | Enable or disable monitoring of system uptime metrics (uptime). | yes | no | +| cpu utilization | Enable or disable monitoring of CPU utilization metrics (user, nice, system, idel). | yes | no | +| system ram | Enable or disable monitoring of system RAM metrics (Active, Wired, throttled, compressor, inactive, purgeable, speculative, free). | yes | no | +| swap i/o | Enable or disable monitoring of SWAP I/O metrics (I/O Swap). | yes | no | +| memory page faults | Enable or disable monitoring of memory page faults metrics (memory, cow, I/O page, compress, decompress, zero fill, reactivate, purge). | yes | no | +| disk i/o | Enable or disable monitoring of disk I/O metrics (In, Out). | yes | no | </details> diff --git a/collectors/macos.plugin/macos_fw.c b/collectors/macos.plugin/macos_fw.c index ca06f428e..75ef386b9 100644 --- a/collectors/macos.plugin/macos_fw.c +++ b/collectors/macos.plugin/macos_fw.c @@ -435,7 +435,7 @@ int do_macos_iokit(int update_every, usec_t dt) { if (likely(do_space)) { st = rrdset_find_active_bytype_localhost("disk_space", mntbuf[i].f_mntonname); if (unlikely(!st)) { - snprintfz(title, 4096, "Disk Space Usage for %s [%s]", mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname); + snprintfz(title, sizeof(title) - 1, "Disk Space Usage for %s [%s]", mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname); st = rrdset_create_localhost( "disk_space" , mntbuf[i].f_mntonname @@ -467,7 +467,7 @@ int do_macos_iokit(int update_every, usec_t dt) { if (likely(do_inodes)) { st = rrdset_find_active_bytype_localhost("disk_inodes", mntbuf[i].f_mntonname); if (unlikely(!st)) { - snprintfz(title, 4096, "Disk Files (inodes) Usage for %s [%s]", mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname); + snprintfz(title, sizeof(title) - 1, "Disk Files (inodes) Usage for %s [%s]", mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname); st = rrdset_create_localhost( "disk_inodes" , mntbuf[i].f_mntonname diff --git a/collectors/nfacct.plugin/integrations/netfilter.md b/collectors/nfacct.plugin/integrations/netfilter.md index 616e29e97..831b6fb5b 100644 --- a/collectors/nfacct.plugin/integrations/netfilter.md +++ b/collectors/nfacct.plugin/integrations/netfilter.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/nfacct.plu sidebar_label: "Netfilter" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Firewall" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -120,8 +121,8 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 1 | False | -| command options | Additinal parameters for collector | | False | +| update every | Data collection frequency. | 1 | no | +| command options | Additinal parameters for collector | | no | </details> diff --git a/collectors/nfacct.plugin/plugin_nfacct.c b/collectors/nfacct.plugin/plugin_nfacct.c index a788d1a03..2863cd7eb 100644 --- a/collectors/nfacct.plugin/plugin_nfacct.c +++ b/collectors/nfacct.plugin/plugin_nfacct.c @@ -747,22 +747,8 @@ void nfacct_signals() } int main(int argc, char **argv) { - stderror = stderr; clocks_init(); - - // ------------------------------------------------------------------------ - // initialization of netdata plugin - - program_name = "nfacct.plugin"; - - // disable syslog - error_log_syslog = 0; - - // set errors flood protection to 100 logs per hour - error_log_errors_per_period = 100; - error_log_throttle_period = 3600; - - log_set_global_severity_for_external_plugins(); + nd_log_initialize_for_external_plugins("nfacct.plugin"); // ------------------------------------------------------------------------ // parse command line parameters diff --git a/collectors/perf.plugin/integrations/cpu_performance.md b/collectors/perf.plugin/integrations/cpu_performance.md index a4adeb1ca..d3c316d2e 100644 --- a/collectors/perf.plugin/integrations/cpu_performance.md +++ b/collectors/perf.plugin/integrations/cpu_performance.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/perf.plugi sidebar_label: "CPU performance" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -151,8 +152,8 @@ You can get the available options running: | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 1 | False | -| command options | Command options that specify charts shown by plugin. `cycles`, `instructions`, `branch`, `cache`, `bus`, `stalled`, `migrations`, `alignment`, `emulation`, `L1D`, `L1D-prefetch`, `L1I`, `LL`, `DTLB`, `ITLB`, `PBU`. | 1 | True | +| update every | Data collection frequency. | 1 | no | +| command options | Command options that specify charts shown by plugin. `cycles`, `instructions`, `branch`, `cache`, `bus`, `stalled`, `migrations`, `alignment`, `emulation`, `L1D`, `L1D-prefetch`, `L1I`, `LL`, `DTLB`, `ITLB`, `PBU`. | 1 | yes | </details> diff --git a/collectors/perf.plugin/perf_plugin.c b/collectors/perf.plugin/perf_plugin.c index 31dae03e5..fe3b04daa 100644 --- a/collectors/perf.plugin/perf_plugin.c +++ b/collectors/perf.plugin/perf_plugin.c @@ -1283,22 +1283,8 @@ void parse_command_line(int argc, char **argv) { } int main(int argc, char **argv) { - stderror = stderr; clocks_init(); - - // ------------------------------------------------------------------------ - // initialization of netdata plugin - - program_name = "perf.plugin"; - - // disable syslog - error_log_syslog = 0; - - // set errors flood protection to 100 logs per hour - error_log_errors_per_period = 100; - error_log_throttle_period = 3600; - - log_set_global_severity_for_external_plugins(); + nd_log_initialize_for_external_plugins("perf.plugin"); parse_command_line(argc, argv); diff --git a/collectors/plugins.d/gperf-config.txt b/collectors/plugins.d/gperf-config.txt index a1d0c51ba..bad51367c 100644 --- a/collectors/plugins.d/gperf-config.txt +++ b/collectors/plugins.d/gperf-config.txt @@ -12,46 +12,47 @@ PARSER_KEYWORD; # # Plugins Only Keywords # -FLUSH, 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1 -DISABLE, 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2 -EXIT, 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3 -HOST, 71, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 4 -HOST_DEFINE, 72, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 5 -HOST_DEFINE_END, 73, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 6 -HOST_LABEL, 74, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 7 +FLUSH, 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1 +DISABLE, 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2 +EXIT, 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3 +HOST, 71, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 4 +HOST_DEFINE, 72, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 5 +HOST_DEFINE_END, 73, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 6 +HOST_LABEL, 74, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 7 # # Common keywords # -BEGIN, 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8 -CHART, 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 9 -CLABEL, 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 10 -CLABEL_COMMIT, 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 11 -DIMENSION, 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 12 -END, 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13 -FUNCTION, 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 14 -FUNCTION_RESULT_BEGIN, 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15 -LABEL, 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 16 -OVERWRITE, 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 17 -SET, 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18 -VARIABLE, 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 19 -DYNCFG_ENABLE, 101, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20 -DYNCFG_REGISTER_MODULE, 102, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21 -DYNCFG_REGISTER_JOB, 103, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22 -REPORT_JOB_STATUS, 110, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23 -DELETE_JOB, 111, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24 +BEGIN, 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8 +CHART, 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 9 +CLABEL, 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 10 +CLABEL_COMMIT, 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 11 +DIMENSION, 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 12 +END, 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13 +FUNCTION, 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 14 +FUNCTION_RESULT_BEGIN, 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15 +LABEL, 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 16 +OVERWRITE, 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 17 +SET, 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18 +VARIABLE, 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 19 +DYNCFG_ENABLE, 101, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20 +DYNCFG_REGISTER_MODULE, 102, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21 +DYNCFG_REGISTER_JOB, 103, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22 +DYNCFG_RESET, 104, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23 +REPORT_JOB_STATUS, 110, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24 +DELETE_JOB, 111, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25 # # Streaming only keywords # -CLAIMED_ID, 61, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25 -BEGIN2, 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 26 -SET2, 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27 -END2, 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28 +CLAIMED_ID, 61, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 26 +BEGIN2, 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27 +SET2, 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28 +END2, 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29 # # Streaming Replication keywords # -CHART_DEFINITION_END, 33, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29 -RBEGIN, 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 30 -RDSTATE, 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 31 -REND, 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 32 -RSET, 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 33 -RSSTATE, 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 34 +CHART_DEFINITION_END, 33, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 30 +RBEGIN, 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 31 +RDSTATE, 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 32 +REND, 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 33 +RSET, 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 34 +RSSTATE, 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 35 diff --git a/collectors/plugins.d/gperf-hashtable.h b/collectors/plugins.d/gperf-hashtable.h index 5bbf9fa98..b327d8d6d 100644 --- a/collectors/plugins.d/gperf-hashtable.h +++ b/collectors/plugins.d/gperf-hashtable.h @@ -30,12 +30,12 @@ #endif -#define GPERF_PARSER_TOTAL_KEYWORDS 34 +#define GPERF_PARSER_TOTAL_KEYWORDS 35 #define GPERF_PARSER_MIN_WORD_LENGTH 3 #define GPERF_PARSER_MAX_WORD_LENGTH 22 #define GPERF_PARSER_MIN_HASH_VALUE 3 -#define GPERF_PARSER_MAX_HASH_VALUE 36 -/* maximum key range = 34, duplicates = 0 */ +#define GPERF_PARSER_MAX_HASH_VALUE 47 +/* maximum key range = 45, duplicates = 0 */ #ifdef __GNUC__ __inline @@ -49,32 +49,32 @@ gperf_keyword_hash_function (register const char *str, register size_t len) { static unsigned char asso_values[] = { - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 12, 28, 5, 2, 0, - 0, 37, 3, 13, 37, 37, 14, 37, 0, 2, - 37, 37, 1, 3, 37, 6, 10, 37, 32, 2, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37 + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 11, 18, 0, 0, 0, + 6, 48, 9, 0, 48, 48, 20, 48, 0, 8, + 48, 48, 1, 12, 48, 20, 18, 48, 2, 0, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48 }; return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]]; } @@ -83,73 +83,79 @@ static PARSER_KEYWORD gperf_keywords[] = { {(char*)0}, {(char*)0}, {(char*)0}, #line 30 "gperf-config.txt" - {"END", 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13}, -#line 48 "gperf-config.txt" - {"END2", 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28}, + {"END", 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13}, +#line 49 "gperf-config.txt" + {"END2", 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29}, +#line 56 "gperf-config.txt" + {"REND", 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 33}, +#line 17 "gperf-config.txt" + {"EXIT", 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3}, +#line 16 "gperf-config.txt" + {"DISABLE", 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2}, #line 55 "gperf-config.txt" - {"REND", 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 32}, + {"RDSTATE", 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 32}, +#line 29 "gperf-config.txt" + {"DIMENSION", 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 12}, +#line 42 "gperf-config.txt" + {"DELETE_JOB", 111, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25}, + {(char*)0}, +#line 40 "gperf-config.txt" + {"DYNCFG_RESET", 104, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23}, +#line 37 "gperf-config.txt" + {"DYNCFG_ENABLE", 101, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20}, +#line 26 "gperf-config.txt" + {"CHART", 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 9}, #line 35 "gperf-config.txt" - {"SET", 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18}, -#line 47 "gperf-config.txt" - {"SET2", 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27}, -#line 56 "gperf-config.txt" - {"RSET", 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 33}, -#line 18 "gperf-config.txt" - {"HOST", 71, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 4}, -#line 54 "gperf-config.txt" - {"RDSTATE", 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 31}, + {"SET", 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18}, +#line 48 "gperf-config.txt" + {"SET2", 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28}, #line 57 "gperf-config.txt" - {"RSSTATE", 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 34}, + {"RSET", 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 34}, #line 41 "gperf-config.txt" - {"DELETE_JOB", 111, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24}, -#line 26 "gperf-config.txt" - {"CHART", 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 9}, -#line 31 "gperf-config.txt" - {"FUNCTION", 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 14}, + {"REPORT_JOB_STATUS", 110, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24}, +#line 39 "gperf-config.txt" + {"DYNCFG_REGISTER_JOB", 103, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22}, +#line 58 "gperf-config.txt" + {"RSSTATE", 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 35}, +#line 18 "gperf-config.txt" + {"HOST", 71, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 4}, +#line 38 "gperf-config.txt" + {"DYNCFG_REGISTER_MODULE", 102, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21}, +#line 25 "gperf-config.txt" + {"BEGIN", 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8}, +#line 47 "gperf-config.txt" + {"BEGIN2", 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27}, +#line 54 "gperf-config.txt" + {"RBEGIN", 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 31}, +#line 27 "gperf-config.txt" + {"CLABEL", 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 10}, #line 21 "gperf-config.txt" - {"HOST_LABEL", 74, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 7}, + {"HOST_LABEL", 74, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 7}, #line 19 "gperf-config.txt" - {"HOST_DEFINE", 72, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 5}, -#line 37 "gperf-config.txt" - {"DYNCFG_ENABLE", 101, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20}, -#line 40 "gperf-config.txt" - {"REPORT_JOB_STATUS", 110, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23}, + {"HOST_DEFINE", 72, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 5}, +#line 53 "gperf-config.txt" + {"CHART_DEFINITION_END", 33, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 30}, +#line 46 "gperf-config.txt" + {"CLAIMED_ID", 61, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 26}, #line 15 "gperf-config.txt" - {"FLUSH", 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1}, + {"FLUSH", 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1}, #line 20 "gperf-config.txt" - {"HOST_DEFINE_END", 73, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 6}, + {"HOST_DEFINE_END", 73, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 6}, +#line 28 "gperf-config.txt" + {"CLABEL_COMMIT", 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 11}, +#line 31 "gperf-config.txt" + {"FUNCTION", 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 14}, #line 34 "gperf-config.txt" - {"OVERWRITE", 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 17}, -#line 16 "gperf-config.txt" - {"DISABLE", 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2}, -#line 39 "gperf-config.txt" - {"DYNCFG_REGISTER_JOB", 103, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22}, -#line 29 "gperf-config.txt" - {"DIMENSION", 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 12}, -#line 27 "gperf-config.txt" - {"CLABEL", 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 10}, -#line 38 "gperf-config.txt" - {"DYNCFG_REGISTER_MODULE", 102, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21}, -#line 32 "gperf-config.txt" - {"FUNCTION_RESULT_BEGIN", 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15}, -#line 52 "gperf-config.txt" - {"CHART_DEFINITION_END", 33, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29}, -#line 45 "gperf-config.txt" - {"CLAIMED_ID", 61, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25}, -#line 36 "gperf-config.txt" - {"VARIABLE", 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 19}, + {"OVERWRITE", 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 17}, #line 33 "gperf-config.txt" - {"LABEL", 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 16}, -#line 28 "gperf-config.txt" - {"CLABEL_COMMIT", 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 11}, -#line 25 "gperf-config.txt" - {"BEGIN", 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8}, -#line 46 "gperf-config.txt" - {"BEGIN2", 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 26}, -#line 53 "gperf-config.txt" - {"RBEGIN", 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 30}, -#line 17 "gperf-config.txt" - {"EXIT", 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3} + {"LABEL", 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 16}, +#line 36 "gperf-config.txt" + {"VARIABLE", 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 19}, + {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, + {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, + {(char*)0}, +#line 32 "gperf-config.txt" + {"FUNCTION_RESULT_BEGIN", 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15} }; PARSER_KEYWORD * diff --git a/collectors/plugins.d/local_listeners.c b/collectors/plugins.d/local_listeners.c index a39de7974..f2c5e688b 100644 --- a/collectors/plugins.d/local_listeners.c +++ b/collectors/plugins.d/local_listeners.c @@ -338,25 +338,59 @@ bool read_proc_net_x(const char *filename, PROC_NET_PROTOCOLS protocol) { } // ---------------------------------------------------------------------------- - -int main(int argc __maybe_unused, char **argv __maybe_unused) { +typedef struct { + bool read_tcp; + bool read_tcp6; + bool read_udp; + bool read_udp6; +} CommandLineArguments; + +int main(int argc, char **argv) { char path[FILENAME_MAX + 1]; hashTable_key_inode_port_value = createHashTable(); netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); if(!netdata_configured_host_prefix) netdata_configured_host_prefix = ""; - snprintfz(path, FILENAME_MAX, "%s/proc/net/tcp", netdata_configured_host_prefix); - read_proc_net_x(path, PROC_NET_PROTOCOL_TCP); + CommandLineArguments args = {.read_tcp = false, .read_tcp6 = false, .read_udp = false, .read_udp6 = false}; + + for (int i = 1; i < argc; i++) { + if (strcmp("tcp", argv[i]) == 0) { + args.read_tcp = true; + continue; + } else if (strcmp("tcp6", argv[i]) == 0) { + args.read_tcp6 = true; + continue; + } else if (strcmp("udp", argv[i]) == 0) { + args.read_udp = true; + continue; + } else if (strcmp("udp6", argv[i]) == 0) { + args.read_udp6 = true; + continue; + } + } + + bool read_all_files = (!args.read_tcp && !args.read_tcp6 && !args.read_udp && !args.read_udp6); - snprintfz(path, FILENAME_MAX, "%s/proc/net/udp", netdata_configured_host_prefix); - read_proc_net_x(path, PROC_NET_PROTOCOL_UDP); + if (read_all_files || args.read_tcp) { + snprintfz(path, FILENAME_MAX, "%s/proc/net/tcp", netdata_configured_host_prefix); + read_proc_net_x(path, PROC_NET_PROTOCOL_TCP); + } - snprintfz(path, FILENAME_MAX, "%s/proc/net/tcp6", netdata_configured_host_prefix); - read_proc_net_x(path, PROC_NET_PROTOCOL_TCP6); + if (read_all_files || args.read_udp) { + snprintfz(path, FILENAME_MAX, "%s/proc/net/udp", netdata_configured_host_prefix); + read_proc_net_x(path, PROC_NET_PROTOCOL_UDP); + } - snprintfz(path, FILENAME_MAX, "%s/proc/net/udp6", netdata_configured_host_prefix); - read_proc_net_x(path, PROC_NET_PROTOCOL_UDP6); + if (read_all_files || args.read_tcp6) { + snprintfz(path, FILENAME_MAX, "%s/proc/net/tcp6", netdata_configured_host_prefix); + read_proc_net_x(path, PROC_NET_PROTOCOL_TCP6); + } + + if (read_all_files || args.read_udp6) { + snprintfz(path, FILENAME_MAX, "%s/proc/net/udp6", netdata_configured_host_prefix); + read_proc_net_x(path, PROC_NET_PROTOCOL_UDP6); + } snprintfz(path, FILENAME_MAX, "%s/proc", netdata_configured_host_prefix); find_all_sockets_in_proc(path); diff --git a/collectors/plugins.d/plugins_d.c b/collectors/plugins.d/plugins_d.c index 08c26a198..20061ad29 100644 --- a/collectors/plugins.d/plugins_d.c +++ b/collectors/plugins.d/plugins_d.c @@ -47,8 +47,7 @@ static inline bool plugin_is_running(struct plugind *cd) { return ret; } -static void pluginsd_worker_thread_cleanup(void *arg) -{ +static void pluginsd_worker_thread_cleanup(void *arg) { struct plugind *cd = (struct plugind *)arg; worker_unregister(); @@ -143,41 +142,64 @@ static void *pluginsd_worker_thread(void *arg) { netdata_thread_cleanup_push(pluginsd_worker_thread_cleanup, arg); - struct plugind *cd = (struct plugind *)arg; - plugin_set_running(cd); + { + struct plugind *cd = (struct plugind *) arg; + plugin_set_running(cd); - size_t count = 0; + size_t count = 0; - while (service_running(SERVICE_COLLECTORS)) { - FILE *fp_child_input = NULL; - FILE *fp_child_output = netdata_popen(cd->cmd, &cd->unsafe.pid, &fp_child_input); + while(service_running(SERVICE_COLLECTORS)) { + FILE *fp_child_input = NULL; + FILE *fp_child_output = netdata_popen(cd->cmd, &cd->unsafe.pid, &fp_child_input); - if (unlikely(!fp_child_input || !fp_child_output)) { - netdata_log_error("PLUGINSD: 'host:%s', cannot popen(\"%s\", \"r\").", rrdhost_hostname(cd->host), cd->cmd); - break; - } + if(unlikely(!fp_child_input || !fp_child_output)) { + netdata_log_error("PLUGINSD: 'host:%s', cannot popen(\"%s\", \"r\").", + rrdhost_hostname(cd->host), cd->cmd); + break; + } - netdata_log_info("PLUGINSD: 'host:%s' connected to '%s' running on pid %d", - rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid); + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "PLUGINSD: 'host:%s' connected to '%s' running on pid %d", + rrdhost_hostname(cd->host), + cd->fullfilename, cd->unsafe.pid); - count = pluginsd_process(cd->host, cd, fp_child_input, fp_child_output, 0); + const char *plugin = strrchr(cd->fullfilename, '/'); + if(plugin) + plugin++; + else + plugin = cd->fullfilename; - netdata_log_info("PLUGINSD: 'host:%s', '%s' (pid %d) disconnected after %zu successful data collections (ENDs).", - rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid, count); + char module[100]; + snprintfz(module, sizeof(module), "plugins.d[%s]", plugin); + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_TXT(NDF_MODULE, module), + ND_LOG_FIELD_TXT(NDF_NIDL_NODE, rrdhost_hostname(cd->host)), + ND_LOG_FIELD_TXT(NDF_SRC_TRANSPORT, "pluginsd"), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); - killpid(cd->unsafe.pid); + count = pluginsd_process(cd->host, cd, fp_child_input, fp_child_output, 0); - int worker_ret_code = netdata_pclose(fp_child_input, fp_child_output, cd->unsafe.pid); + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "PLUGINSD: 'host:%s', '%s' (pid %d) disconnected after %zu successful data collections (ENDs).", + rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid, count); - if (likely(worker_ret_code == 0)) - pluginsd_worker_thread_handle_success(cd); - else - pluginsd_worker_thread_handle_error(cd, worker_ret_code); + killpid(cd->unsafe.pid); - cd->unsafe.pid = 0; - if (unlikely(!plugin_is_enabled(cd))) - break; - } + int worker_ret_code = netdata_pclose(fp_child_input, fp_child_output, cd->unsafe.pid); + + if(likely(worker_ret_code == 0)) + pluginsd_worker_thread_handle_success(cd); + else + pluginsd_worker_thread_handle_error(cd, worker_ret_code); + + cd->unsafe.pid = 0; + + if(unlikely(!plugin_is_enabled(cd))) + break; + } + } netdata_thread_cleanup_pop(1); return NULL; @@ -217,6 +239,13 @@ void *pluginsd_main(void *ptr) // disable some plugins by default config_get_boolean(CONFIG_SECTION_PLUGINS, "slabinfo", CONFIG_BOOLEAN_NO); + config_get_boolean(CONFIG_SECTION_PLUGINS, "logs-management", +#if defined(LOGS_MANAGEMENT_DEV_MODE) + CONFIG_BOOLEAN_YES +#else + CONFIG_BOOLEAN_NO +#endif + ); // it crashes (both threads) on Alpine after we made it multi-threaded // works with "--device /dev/ipmi0", but this is not default // see https://github.com/netdata/netdata/pull/15564 for details diff --git a/collectors/plugins.d/plugins_d.h b/collectors/plugins.d/plugins_d.h index 7c5df4168..37c70f7e3 100644 --- a/collectors/plugins.d/plugins_d.h +++ b/collectors/plugins.d/plugins_d.h @@ -16,14 +16,11 @@ #define PLUGINSD_KEYWORD_DYNCFG_ENABLE "DYNCFG_ENABLE" #define PLUGINSD_KEYWORD_DYNCFG_REGISTER_MODULE "DYNCFG_REGISTER_MODULE" #define PLUGINSD_KEYWORD_DYNCFG_REGISTER_JOB "DYNCFG_REGISTER_JOB" +#define PLUGINSD_KEYWORD_DYNCFG_RESET "DYNCFG_RESET" #define PLUGINSD_KEYWORD_REPORT_JOB_STATUS "REPORT_JOB_STATUS" #define PLUGINSD_KEYWORD_DELETE_JOB "DELETE_JOB" -#define PLUGINSD_LINE_MAX_SSL_READ 512 - -#define PLUGINSD_MAX_WORDS 20 - #define PLUGINSD_MAX_DIRECTORIES 20 extern char *plugin_directories[PLUGINSD_MAX_DIRECTORIES]; diff --git a/collectors/plugins.d/pluginsd_parser.c b/collectors/plugins.d/pluginsd_parser.c index 2e69c7da5..3b47c6c0f 100644 --- a/collectors/plugins.d/pluginsd_parser.c +++ b/collectors/plugins.d/pluginsd_parser.c @@ -4,8 +4,8 @@ #define LOG_FUNCTIONS false -#define SERVING_STREAMING(parser) (parser->repertoire == PARSER_INIT_STREAMING) -#define SERVING_PLUGINSD(parser) (parser->repertoire == PARSER_INIT_PLUGINSD) +#define SERVING_STREAMING(parser) ((parser)->repertoire == PARSER_INIT_STREAMING) +#define SERVING_PLUGINSD(parser) ((parser)->repertoire == PARSER_INIT_PLUGINSD) static ssize_t send_to_plugin(const char *txt, void *data) { PARSER *parser = data; @@ -13,6 +13,11 @@ static ssize_t send_to_plugin(const char *txt, void *data) { if(!txt || !*txt) return 0; +#ifdef ENABLE_H2O + if(parser->h2o_ctx) + return h2o_stream_write(parser->h2o_ctx, txt, strlen(txt)); +#endif + errno = 0; spinlock_lock(&parser->writer.spinlock); ssize_t bytes = -1; @@ -110,23 +115,6 @@ static inline bool pluginsd_unlock_rrdset_data_collection(PARSER *parser) { return false; } -void pluginsd_rrdset_cleanup(RRDSET *st) { - spinlock_lock(&st->pluginsd.spinlock); - - for(size_t i = 0; i < st->pluginsd.size ; i++) { - rrddim_acquired_release(st->pluginsd.rda[i]); // can be NULL - st->pluginsd.rda[i] = NULL; - } - - freez(st->pluginsd.rda); - st->pluginsd.collector_tid = 0; - st->pluginsd.rda = NULL; - st->pluginsd.size = 0; - st->pluginsd.pos = 0; - - spinlock_unlock(&st->pluginsd.spinlock); -} - static inline void pluginsd_unlock_previous_scope_chart(PARSER *parser, const char *keyword, bool stale) { if(unlikely(pluginsd_unlock_rrdset_data_collection(parser))) { if(stale) @@ -150,7 +138,12 @@ static inline void pluginsd_unlock_previous_scope_chart(PARSER *parser, const ch static inline void pluginsd_clear_scope_chart(PARSER *parser, const char *keyword) { pluginsd_unlock_previous_scope_chart(parser, keyword, true); + + if(parser->user.cleanup_slots && parser->user.st) + rrdset_pluginsd_receive_unslot(parser->user.st); + parser->user.st = NULL; + parser->user.cleanup_slots = false; } static inline bool pluginsd_set_scope_chart(PARSER *parser, RRDSET *st, const char *keyword) { @@ -160,11 +153,12 @@ static inline bool pluginsd_set_scope_chart(PARSER *parser, RRDSET *st, const ch if(unlikely(old_collector_tid)) { if(old_collector_tid != my_collector_tid) { - error_limit_static_global_var(erl, 1, 0); - error_limit(&erl, "PLUGINSD: keyword %s: 'host:%s/chart:%s' is collected twice (my tid %d, other collector tid %d)", - keyword ? keyword : "UNKNOWN", - rrdhost_hostname(st->rrdhost), rrdset_id(st), - my_collector_tid, old_collector_tid); + nd_log_limit_static_global_var(erl, 1, 0); + nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_WARNING, + "PLUGINSD: keyword %s: 'host:%s/chart:%s' is collected twice (my tid %d, other collector tid %d)", + keyword ? keyword : "UNKNOWN", + rrdhost_hostname(st->rrdhost), rrdset_id(st), + my_collector_tid, old_collector_tid); return false; } @@ -176,61 +170,141 @@ static inline bool pluginsd_set_scope_chart(PARSER *parser, RRDSET *st, const ch pluginsd_clear_scope_chart(parser, keyword); - size_t dims = dictionary_entries(st->rrddim_root_index); - if(unlikely(st->pluginsd.size < dims)) { - st->pluginsd.rda = reallocz(st->pluginsd.rda, dims * sizeof(RRDDIM_ACQUIRED *)); + st->pluginsd.pos = 0; + parser->user.st = st; + parser->user.cleanup_slots = false; + + return true; +} + +static inline void pluginsd_rrddim_put_to_slot(PARSER *parser, RRDSET *st, RRDDIM *rd, ssize_t slot, bool obsolete) { + size_t wanted_size = st->pluginsd.size; + + if(slot >= 1) { + st->pluginsd.dims_with_slots = true; + wanted_size = slot; + } + else { + st->pluginsd.dims_with_slots = false; + wanted_size = dictionary_entries(st->rrddim_root_index); + } + + if(wanted_size > st->pluginsd.size) { + st->pluginsd.prd_array = reallocz(st->pluginsd.prd_array, wanted_size * sizeof(struct pluginsd_rrddim)); // initialize the empty slots - for(ssize_t i = (ssize_t)dims - 1; i >= (ssize_t)st->pluginsd.size ;i--) - st->pluginsd.rda[i] = NULL; + for(ssize_t i = (ssize_t) wanted_size - 1; i >= (ssize_t) st->pluginsd.size; i--) { + st->pluginsd.prd_array[i].rda = NULL; + st->pluginsd.prd_array[i].rd = NULL; + st->pluginsd.prd_array[i].id = NULL; + } - st->pluginsd.size = dims; + st->pluginsd.size = wanted_size; } - st->pluginsd.pos = 0; - parser->user.st = st; + if(st->pluginsd.dims_with_slots) { + struct pluginsd_rrddim *prd = &st->pluginsd.prd_array[slot - 1]; - return true; + if(prd->rd != rd) { + prd->rda = rrddim_find_and_acquire(st, string2str(rd->id)); + prd->rd = rrddim_acquired_to_rrddim(prd->rda); + prd->id = string2str(prd->rd->id); + } + + if(obsolete) + parser->user.cleanup_slots = true; + } } -static inline RRDDIM *pluginsd_acquire_dimension(RRDHOST *host, RRDSET *st, const char *dimension, const char *cmd) { +static inline RRDDIM *pluginsd_acquire_dimension(RRDHOST *host, RRDSET *st, const char *dimension, ssize_t slot, const char *cmd) { if (unlikely(!dimension || !*dimension)) { netdata_log_error("PLUGINSD: 'host:%s/chart:%s' got a %s, without a dimension.", rrdhost_hostname(host), rrdset_id(st), cmd); return NULL; } - if(unlikely(st->pluginsd.pos >= st->pluginsd.size)) - st->pluginsd.pos = 0; + if (unlikely(!st->pluginsd.size)) { + netdata_log_error("PLUGINSD: 'host:%s/chart:%s' got a %s, but the chart has no dimensions.", + rrdhost_hostname(host), rrdset_id(st), cmd); + return NULL; + } + + struct pluginsd_rrddim *prd; + RRDDIM *rd; + + if(likely(st->pluginsd.dims_with_slots)) { + // caching with slots + + if(unlikely(slot < 1 || slot > st->pluginsd.size)) { + netdata_log_error("PLUGINSD: 'host:%s/chart:%s' got a %s with slot %zd, but slots in the range [1 - %u] are expected.", + rrdhost_hostname(host), rrdset_id(st), cmd, slot, st->pluginsd.size); + return NULL; + } - RRDDIM_ACQUIRED *rda = st->pluginsd.rda[st->pluginsd.pos]; + prd = &st->pluginsd.prd_array[slot - 1]; - if(likely(rda)) { - RRDDIM *rd = rrddim_acquired_to_rrddim(rda); - if (likely(rd && string_strcmp(rd->id, dimension) == 0)) { - // we found a cached RDA - st->pluginsd.pos++; + rd = prd->rd; + if(likely(rd)) { +#ifdef NETDATA_INTERNAL_CHECKS + if(strcmp(prd->id, dimension) != 0) { + ssize_t t; + for(t = 0; t < st->pluginsd.size ;t++) { + if (strcmp(st->pluginsd.prd_array[t].id, dimension) == 0) + break; + } + if(t >= st->pluginsd.size) + t = -1; + + internal_fatal(true, + "PLUGINSD: expected to find dimension '%s' on slot %zd, but found '%s', " + "the right slot is %zd", + dimension, slot, prd->id, t); + } +#endif return rd; } - else { - // the collector is sending dimensions in a different order - // release the previous one, to reuse this slot - rrddim_acquired_release(rda); - st->pluginsd.rda[st->pluginsd.pos] = NULL; + } + else { + // caching without slots + + if(unlikely(st->pluginsd.pos >= st->pluginsd.size)) + st->pluginsd.pos = 0; + + prd = &st->pluginsd.prd_array[st->pluginsd.pos++]; + + rd = prd->rd; + if(likely(rd)) { + const char *id = prd->id; + + if(strcmp(id, dimension) == 0) { + // we found it cached + return rd; + } + else { + // the cached one is not good for us + rrddim_acquired_release(prd->rda); + prd->rda = NULL; + prd->rd = NULL; + prd->id = NULL; + } } } - rda = rrddim_find_and_acquire(st, dimension); + // we need to find the dimension and set it to prd + + RRDDIM_ACQUIRED *rda = rrddim_find_and_acquire(st, dimension); if (unlikely(!rda)) { netdata_log_error("PLUGINSD: 'host:%s/chart:%s/dim:%s' got a %s but dimension does not exist.", - rrdhost_hostname(host), rrdset_id(st), dimension, cmd); + rrdhost_hostname(host), rrdset_id(st), dimension, cmd); return NULL; } - st->pluginsd.rda[st->pluginsd.pos++] = rda; + prd->rda = rda; + prd->rd = rd = rrddim_acquired_to_rrddim(rda); + prd->id = string2str(rd->id); - return rrddim_acquired_to_rrddim(rda); + return rd; } static inline RRDSET *pluginsd_find_chart(RRDHOST *host, const char *chart, const char *cmd) { @@ -248,20 +322,89 @@ static inline RRDSET *pluginsd_find_chart(RRDHOST *host, const char *chart, cons return st; } +static inline ssize_t pluginsd_parse_rrd_slot(char **words, size_t num_words) { + ssize_t slot = -1; + char *id = get_word(words, num_words, 1); + if(id && id[0] == PLUGINSD_KEYWORD_SLOT[0] && id[1] == PLUGINSD_KEYWORD_SLOT[1] && + id[2] == PLUGINSD_KEYWORD_SLOT[2] && id[3] == PLUGINSD_KEYWORD_SLOT[3] && id[4] == ':') { + slot = (ssize_t) str2ull_encoded(&id[5]); + if(slot < 0) slot = 0; // to make the caller increment its idx of the words + } + + return slot; +} + +static inline void pluginsd_rrdset_cache_put_to_slot(PARSER *parser, RRDSET *st, ssize_t slot, bool obsolete) { + // clean possible old cached data + rrdset_pluginsd_receive_unslot(st); + + if(unlikely(slot < 1 || slot >= INT32_MAX)) + return; + + RRDHOST *host = st->rrdhost; + + if(unlikely((size_t)slot > host->rrdpush.receive.pluginsd_chart_slots.size)) { + spinlock_lock(&host->rrdpush.receive.pluginsd_chart_slots.spinlock); + size_t old_slots = host->rrdpush.receive.pluginsd_chart_slots.size; + size_t new_slots = (old_slots < PLUGINSD_MIN_RRDSET_POINTERS_CACHE) ? PLUGINSD_MIN_RRDSET_POINTERS_CACHE : old_slots * 2; + + if(new_slots < (size_t)slot) + new_slots = slot; + + host->rrdpush.receive.pluginsd_chart_slots.array = + reallocz(host->rrdpush.receive.pluginsd_chart_slots.array, new_slots * sizeof(RRDSET *)); + + for(size_t i = old_slots; i < new_slots ;i++) + host->rrdpush.receive.pluginsd_chart_slots.array[i] = NULL; + + host->rrdpush.receive.pluginsd_chart_slots.size = new_slots; + spinlock_unlock(&host->rrdpush.receive.pluginsd_chart_slots.spinlock); + } + + host->rrdpush.receive.pluginsd_chart_slots.array[slot - 1] = st; + st->pluginsd.last_slot = (int32_t)slot - 1; + parser->user.cleanup_slots = obsolete; +} + +static inline RRDSET *pluginsd_rrdset_cache_get_from_slot(PARSER *parser, RRDHOST *host, const char *id, ssize_t slot, const char *keyword) { + if(unlikely(slot < 1 || (size_t)slot > host->rrdpush.receive.pluginsd_chart_slots.size)) + return pluginsd_find_chart(host, id, keyword); + + RRDSET *st = host->rrdpush.receive.pluginsd_chart_slots.array[slot - 1]; + + if(!st) { + st = pluginsd_find_chart(host, id, keyword); + if(st) + pluginsd_rrdset_cache_put_to_slot(parser, st, slot, rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)); + } + else { + internal_fatal(string_strcmp(st->id, id) != 0, + "PLUGINSD: wrong chart in slot %zd, expected '%s', found '%s'", + slot - 1, id, string2str(st->id)); + } + + return st; +} + static inline PARSER_RC PLUGINSD_DISABLE_PLUGIN(PARSER *parser, const char *keyword, const char *msg) { parser->user.enabled = 0; if(keyword && msg) { - error_limit_static_global_var(erl, 1, 0); - error_limit(&erl, "PLUGINSD: keyword %s: %s", keyword, msg); + nd_log_limit_static_global_var(erl, 1, 0); + nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_INFO, + "PLUGINSD: keyword %s: %s", keyword, msg); } return PARSER_RC_ERROR; } static inline PARSER_RC pluginsd_set(char **words, size_t num_words, PARSER *parser) { - char *dimension = get_word(words, num_words, 1); - char *value = get_word(words, num_words, 2); + int idx = 1; + ssize_t slot = pluginsd_parse_rrd_slot(words, num_words); + if(slot >= 0) idx++; + + char *dimension = get_word(words, num_words, idx++); + char *value = get_word(words, num_words, idx++); RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_SET); if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); @@ -269,7 +412,7 @@ static inline PARSER_RC pluginsd_set(char **words, size_t num_words, PARSER *par RRDSET *st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_SET, PLUGINSD_KEYWORD_CHART); if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_SET); + RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, slot, PLUGINSD_KEYWORD_SET); if(!rd) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); st->pluginsd.set = true; @@ -285,13 +428,17 @@ static inline PARSER_RC pluginsd_set(char **words, size_t num_words, PARSER *par } static inline PARSER_RC pluginsd_begin(char **words, size_t num_words, PARSER *parser) { - char *id = get_word(words, num_words, 1); - char *microseconds_txt = get_word(words, num_words, 2); + int idx = 1; + ssize_t slot = pluginsd_parse_rrd_slot(words, num_words); + if(slot >= 0) idx++; + + char *id = get_word(words, num_words, idx++); + char *microseconds_txt = get_word(words, num_words, idx++); RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_BEGIN); if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDSET *st = pluginsd_find_chart(host, id, PLUGINSD_KEYWORD_BEGIN); + RRDSET *st = pluginsd_rrdset_cache_get_from_slot(parser, host, id, slot, PLUGINSD_KEYWORD_BEGIN); if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); if(!pluginsd_set_scope_chart(parser, st, PLUGINSD_KEYWORD_BEGIN)) @@ -332,8 +479,9 @@ static inline PARSER_RC pluginsd_begin(char **words, size_t num_words, PARSER *p } static inline PARSER_RC pluginsd_end(char **words, size_t num_words, PARSER *parser) { - UNUSED(words); - UNUSED(num_words); + char *tv_sec = get_word(words, num_words, 1); + char *tv_usec = get_word(words, num_words, 2); + char *pending_rrdset_next = get_word(words, num_words, 3); RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_END); if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); @@ -347,9 +495,15 @@ static inline PARSER_RC pluginsd_end(char **words, size_t num_words, PARSER *par pluginsd_clear_scope_chart(parser, PLUGINSD_KEYWORD_END); parser->user.data_collections_count++; - struct timeval now; - now_realtime_timeval(&now); - rrdset_timed_done(st, now, /* pending_rrdset_next = */ false); + struct timeval tv = { + .tv_sec = (tv_sec && *tv_sec) ? str2ll(tv_sec, NULL) : 0, + .tv_usec = (tv_usec && *tv_usec) ? str2ll(tv_usec, NULL) : 0 + }; + + if(!tv.tv_sec) + now_realtime_timeval(&tv); + + rrdset_timed_done(st, tv, pending_rrdset_next && *pending_rrdset_next ? true : false); return PARSER_RC_OK; } @@ -419,30 +573,29 @@ static inline PARSER_RC pluginsd_host_define_end(char **words __maybe_unused, si return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_HOST_DEFINE_END, "missing initialization, send " PLUGINSD_KEYWORD_HOST_DEFINE " before this"); RRDHOST *host = rrdhost_find_or_create( - string2str(parser->user.host_define.hostname), - string2str(parser->user.host_define.hostname), - parser->user.host_define.machine_guid_str, - "Netdata Virtual Host 1.0", - netdata_configured_timezone, - netdata_configured_abbrev_timezone, - netdata_configured_utc_offset, - NULL, - program_name, - program_version, - default_rrd_update_every, - default_rrd_history_entries, - default_rrd_memory_mode, - default_health_enabled, - default_rrdpush_enabled, - default_rrdpush_destination, - default_rrdpush_api_key, - default_rrdpush_send_charts_matching, - default_rrdpush_enable_replication, - default_rrdpush_seconds_to_replicate, - default_rrdpush_replication_step, - rrdhost_labels_to_system_info(parser->user.host_define.rrdlabels), - false - ); + string2str(parser->user.host_define.hostname), + string2str(parser->user.host_define.hostname), + parser->user.host_define.machine_guid_str, + "Netdata Virtual Host 1.0", + netdata_configured_timezone, + netdata_configured_abbrev_timezone, + netdata_configured_utc_offset, + NULL, + program_name, + program_version, + default_rrd_update_every, + default_rrd_history_entries, + default_rrd_memory_mode, + default_health_enabled, + default_rrdpush_enabled, + default_rrdpush_destination, + default_rrdpush_api_key, + default_rrdpush_send_charts_matching, + default_rrdpush_enable_replication, + default_rrdpush_seconds_to_replicate, + default_rrdpush_replication_step, + rrdhost_labels_to_system_info(parser->user.host_define.rrdlabels), + false); rrdhost_option_set(host, RRDHOST_OPTION_VIRTUAL_HOST); @@ -492,18 +645,22 @@ static inline PARSER_RC pluginsd_chart(char **words, size_t num_words, PARSER *p RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_CHART); if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - char *type = get_word(words, num_words, 1); - char *name = get_word(words, num_words, 2); - char *title = get_word(words, num_words, 3); - char *units = get_word(words, num_words, 4); - char *family = get_word(words, num_words, 5); - char *context = get_word(words, num_words, 6); - char *chart = get_word(words, num_words, 7); - char *priority_s = get_word(words, num_words, 8); - char *update_every_s = get_word(words, num_words, 9); - char *options = get_word(words, num_words, 10); - char *plugin = get_word(words, num_words, 11); - char *module = get_word(words, num_words, 12); + int idx = 1; + ssize_t slot = pluginsd_parse_rrd_slot(words, num_words); + if(slot >= 0) idx++; + + char *type = get_word(words, num_words, idx++); + char *name = get_word(words, num_words, idx++); + char *title = get_word(words, num_words, idx++); + char *units = get_word(words, num_words, idx++); + char *family = get_word(words, num_words, idx++); + char *context = get_word(words, num_words, idx++); + char *chart = get_word(words, num_words, idx++); + char *priority_s = get_word(words, num_words, idx++); + char *update_every_s = get_word(words, num_words, idx++); + char *options = get_word(words, num_words, idx++); + char *plugin = get_word(words, num_words, idx++); + char *module = get_word(words, num_words, idx++); // parse the id from type char *id = NULL; @@ -570,14 +727,15 @@ static inline PARSER_RC pluginsd_chart(char **words, size_t num_words, PARSER *p module, priority, update_every, chart_type); + bool obsolete = false; if (likely(st)) { if (options && *options) { if (strstr(options, "obsolete")) { - pluginsd_rrdset_cleanup(st); - rrdset_is_obsolete(st); + rrdset_is_obsolete___safe_from_collector_thread(st); + obsolete = true; } else - rrdset_isnot_obsolete(st); + rrdset_isnot_obsolete___safe_from_collector_thread(st); if (strstr(options, "detail")) rrdset_flag_set(st, RRDSET_FLAG_DETAIL); @@ -595,13 +753,15 @@ static inline PARSER_RC pluginsd_chart(char **words, size_t num_words, PARSER *p rrdset_flag_clear(st, RRDSET_FLAG_STORE_FIRST); } else { - rrdset_isnot_obsolete(st); + rrdset_isnot_obsolete___safe_from_collector_thread(st); rrdset_flag_clear(st, RRDSET_FLAG_DETAIL); rrdset_flag_clear(st, RRDSET_FLAG_STORE_FIRST); } if(!pluginsd_set_scope_chart(parser, st, PLUGINSD_KEYWORD_CHART)) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); + + pluginsd_rrdset_cache_put_to_slot(parser, st, slot, obsolete); } else pluginsd_clear_scope_chart(parser, PLUGINSD_KEYWORD_CHART); @@ -652,12 +812,16 @@ static inline PARSER_RC pluginsd_chart_definition_end(char **words, size_t num_w } static inline PARSER_RC pluginsd_dimension(char **words, size_t num_words, PARSER *parser) { - char *id = get_word(words, num_words, 1); - char *name = get_word(words, num_words, 2); - char *algorithm = get_word(words, num_words, 3); - char *multiplier_s = get_word(words, num_words, 4); - char *divisor_s = get_word(words, num_words, 5); - char *options = get_word(words, num_words, 6); + int idx = 1; + ssize_t slot = pluginsd_parse_rrd_slot(words, num_words); + if(slot >= 0) idx++; + + char *id = get_word(words, num_words, idx++); + char *name = get_word(words, num_words, idx++); + char *algorithm = get_word(words, num_words, idx++); + char *multiplier_s = get_word(words, num_words, idx++); + char *divisor_s = get_word(words, num_words, idx++); + char *options = get_word(words, num_words, idx++); RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_DIMENSION); if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); @@ -696,11 +860,14 @@ static inline PARSER_RC pluginsd_dimension(char **words, size_t num_words, PARSE int unhide_dimension = 1; rrddim_option_clear(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS); + bool obsolete = false; if (options && *options) { - if (strstr(options, "obsolete") != NULL) - rrddim_is_obsolete(st, rd); + if (strstr(options, "obsolete") != NULL) { + obsolete = true; + rrddim_is_obsolete___safe_from_collector_thread(st, rd); + } else - rrddim_isnot_obsolete(st, rd); + rrddim_isnot_obsolete___safe_from_collector_thread(st, rd); unhide_dimension = !strstr(options, "hidden"); @@ -708,8 +875,9 @@ static inline PARSER_RC pluginsd_dimension(char **words, size_t num_words, PARSE rrddim_option_set(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS); if (strstr(options, "nooverflow") != NULL) rrddim_option_set(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS); - } else - rrddim_isnot_obsolete(st, rd); + } + else + rrddim_isnot_obsolete___safe_from_collector_thread(st, rd); bool should_update_dimension = false; @@ -727,6 +895,8 @@ static inline PARSER_RC pluginsd_dimension(char **words, size_t num_words, PARSE rrdhost_flag_set(rd->rrdset->rrdhost, RRDHOST_FLAG_METADATA_UPDATE); } + pluginsd_rrddim_put_to_slot(parser, st, rd, slot, obsolete); + return PARSER_RC_OK; } @@ -759,7 +929,7 @@ static void inflight_functions_insert_callback(const DICTIONARY_ITEM *item, void const char *transaction = dictionary_acquired_item_name(item); char buffer[2048 + 1]; - snprintfz(buffer, 2048, "%s %s %d \"%s\"\n", + snprintfz(buffer, sizeof(buffer) - 1, "%s %s %d \"%s\"\n", pf->payload ? "FUNCTION_PAYLOAD" : "FUNCTION", transaction, pf->timeout, @@ -932,7 +1102,7 @@ void pluginsd_function_cancel(void *data) { internal_error(true, "PLUGINSD: sending function cancellation to plugin for transaction '%s'", transaction); char buffer[2048 + 1]; - snprintfz(buffer, 2048, "%s %s\n", + snprintfz(buffer, sizeof(buffer) - 1, "%s %s\n", PLUGINSD_KEYWORD_FUNCTION_CANCEL, transaction); @@ -947,7 +1117,8 @@ void pluginsd_function_cancel(void *data) { dfe_done(t); if(sent <= 0) - netdata_log_error("PLUGINSD: FUNCTION_CANCEL request didn't match any pending function requests in pluginsd.d."); + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "PLUGINSD: FUNCTION_CANCEL request didn't match any pending function requests in pluginsd.d."); } // this is the function that is called from @@ -1247,6 +1418,15 @@ static inline PARSER_RC pluginsd_label(char **words, size_t num_words, PARSER *p if(unlikely(!(parser->user.new_host_labels))) parser->user.new_host_labels = rrdlabels_create(); + if (strcmp(name,HOST_LABEL_IS_EPHEMERAL) == 0) { + int is_ephemeral = appconfig_test_boolean_value((char *) value); + if (is_ephemeral) { + RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_LABEL); + if (likely(host)) + rrdhost_option_set(host, RRDHOST_OPTION_EPHEMERAL_HOST); + } + } + rrdlabels_add(parser->user.new_host_labels, name, store, str2l(label_source)); if (allocated_store) @@ -1265,6 +1445,8 @@ static inline PARSER_RC pluginsd_overwrite(char **words __maybe_unused, size_t n host->rrdlabels = rrdlabels_create(); rrdlabels_migrate_to_these(host->rrdlabels, parser->user.new_host_labels); + if (rrdhost_option_check(host, RRDHOST_OPTION_EPHEMERAL_HOST)) + rrdlabels_add(host->rrdlabels, HOST_LABEL_IS_EPHEMERAL, "true", RRDLABEL_SRC_CONFIG); rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_LABELS | RRDHOST_FLAG_METADATA_UPDATE); rrdlabels_destroy(parser->user.new_host_labels); @@ -1311,16 +1493,21 @@ static inline PARSER_RC pluginsd_clabel_commit(char **words __maybe_unused, size rrdset_flag_set(st, RRDSET_FLAG_METADATA_UPDATE); rrdhost_flag_set(st->rrdhost, RRDHOST_FLAG_METADATA_UPDATE); - rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + rrdset_metadata_updated(st); + parser->user.chart_rrdlabels_linked_temporarily = NULL; return PARSER_RC_OK; } static inline PARSER_RC pluginsd_replay_begin(char **words, size_t num_words, PARSER *parser) { - char *id = get_word(words, num_words, 1); - char *start_time_str = get_word(words, num_words, 2); - char *end_time_str = get_word(words, num_words, 3); - char *child_now_str = get_word(words, num_words, 4); + int idx = 1; + ssize_t slot = pluginsd_parse_rrd_slot(words, num_words); + if(slot >= 0) idx++; + + char *id = get_word(words, num_words, idx++); + char *start_time_str = get_word(words, num_words, idx++); + char *end_time_str = get_word(words, num_words, idx++); + char *child_now_str = get_word(words, num_words, idx++); RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_BEGIN); if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); @@ -1329,7 +1516,7 @@ static inline PARSER_RC pluginsd_replay_begin(char **words, size_t num_words, PA if (likely(!id || !*id)) st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_BEGIN, PLUGINSD_KEYWORD_REPLAY_BEGIN); else - st = pluginsd_find_chart(host, id, PLUGINSD_KEYWORD_REPLAY_BEGIN); + st = pluginsd_rrdset_cache_get_from_slot(parser, host, id, slot, PLUGINSD_KEYWORD_REPLAY_BEGIN); if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); @@ -1444,9 +1631,13 @@ static inline SN_FLAGS pluginsd_parse_storage_number_flags(const char *flags_str } static inline PARSER_RC pluginsd_replay_set(char **words, size_t num_words, PARSER *parser) { - char *dimension = get_word(words, num_words, 1); - char *value_str = get_word(words, num_words, 2); - char *flags_str = get_word(words, num_words, 3); + int idx = 1; + ssize_t slot = pluginsd_parse_rrd_slot(words, num_words); + if(slot >= 0) idx++; + + char *dimension = get_word(words, num_words, idx++); + char *value_str = get_word(words, num_words, idx++); + char *flags_str = get_word(words, num_words, idx++); RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_SET); if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); @@ -1455,15 +1646,16 @@ static inline PARSER_RC pluginsd_replay_set(char **words, size_t num_words, PARS if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); if(!parser->user.replay.rset_enabled) { - error_limit_static_thread_var(erl, 1, 0); - error_limit(&erl, "PLUGINSD: 'host:%s/chart:%s' got a %s but it is disabled by %s errors", - rrdhost_hostname(host), rrdset_id(st), PLUGINSD_KEYWORD_REPLAY_SET, PLUGINSD_KEYWORD_REPLAY_BEGIN); + nd_log_limit_static_thread_var(erl, 1, 0); + nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_ERR, + "PLUGINSD: 'host:%s/chart:%s' got a %s but it is disabled by %s errors", + rrdhost_hostname(host), rrdset_id(st), PLUGINSD_KEYWORD_REPLAY_SET, PLUGINSD_KEYWORD_REPLAY_BEGIN); // we have to return OK here return PARSER_RC_OK; } - RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_REPLAY_SET); + RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, slot, PLUGINSD_KEYWORD_REPLAY_SET); if(!rd) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); st->pluginsd.set = true; @@ -1504,8 +1696,10 @@ static inline PARSER_RC pluginsd_replay_set(char **words, size_t num_words, PARS rd->collector.counter++; } else { - error_limit_static_global_var(erl, 1, 0); - error_limit(&erl, "PLUGINSD: 'host:%s/chart:%s/dim:%s' has the ARCHIVED flag set, but it is replicated. Ignoring data.", + nd_log_limit_static_global_var(erl, 1, 0); + nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_WARNING, + "PLUGINSD: 'host:%s/chart:%s/dim:%s' has the ARCHIVED flag set, but it is replicated. " + "Ignoring data.", rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_name(rd)); } } @@ -1517,11 +1711,15 @@ static inline PARSER_RC pluginsd_replay_rrddim_collection_state(char **words, si if(parser->user.replay.rset_enabled == false) return PARSER_RC_OK; - char *dimension = get_word(words, num_words, 1); - char *last_collected_ut_str = get_word(words, num_words, 2); - char *last_collected_value_str = get_word(words, num_words, 3); - char *last_calculated_value_str = get_word(words, num_words, 4); - char *last_stored_value_str = get_word(words, num_words, 5); + int idx = 1; + ssize_t slot = pluginsd_parse_rrd_slot(words, num_words); + if(slot >= 0) idx++; + + char *dimension = get_word(words, num_words, idx++); + char *last_collected_ut_str = get_word(words, num_words, idx++); + char *last_collected_value_str = get_word(words, num_words, idx++); + char *last_calculated_value_str = get_word(words, num_words, idx++); + char *last_stored_value_str = get_word(words, num_words, idx++); RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE); if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); @@ -1535,7 +1733,7 @@ static inline PARSER_RC pluginsd_replay_rrddim_collection_state(char **words, si st->pluginsd.set = false; } - RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE); + RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, slot, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE); if(!rd) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); usec_t dim_last_collected_ut = (usec_t)rd->collector.last_collected_time.tv_sec * USEC_PER_SEC + (usec_t)rd->collector.last_collected_time.tv_usec; @@ -1699,10 +1897,14 @@ static inline PARSER_RC pluginsd_replay_end(char **words, size_t num_words, PARS static inline PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, PARSER *parser) { timing_init(); - char *id = get_word(words, num_words, 1); - char *update_every_str = get_word(words, num_words, 2); - char *end_time_str = get_word(words, num_words, 3); - char *wall_clock_time_str = get_word(words, num_words, 4); + int idx = 1; + ssize_t slot = pluginsd_parse_rrd_slot(words, num_words); + if(slot >= 0) idx++; + + char *id = get_word(words, num_words, idx++); + char *update_every_str = get_word(words, num_words, idx++); + char *end_time_str = get_word(words, num_words, idx++); + char *wall_clock_time_str = get_word(words, num_words, idx++); if(unlikely(!id || !update_every_str || !end_time_str || !wall_clock_time_str)) return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_BEGIN_V2, "missing parameters"); @@ -1712,14 +1914,15 @@ static inline PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, PARSER timing_step(TIMING_STEP_BEGIN2_PREPARE); - RRDSET *st = pluginsd_find_chart(host, id, PLUGINSD_KEYWORD_BEGIN_V2); + RRDSET *st = pluginsd_rrdset_cache_get_from_slot(parser, host, id, slot, PLUGINSD_KEYWORD_BEGIN_V2); + if(unlikely(!st)) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); if(!pluginsd_set_scope_chart(parser, st, PLUGINSD_KEYWORD_BEGIN_V2)) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))) - rrdset_isnot_obsolete(st); + rrdset_isnot_obsolete___safe_from_collector_thread(st); timing_step(TIMING_STEP_BEGIN2_FIND_CHART); @@ -1759,9 +1962,12 @@ static inline PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, PARSER parser->user.v2.stream_buffer = rrdset_push_metric_initialize(parser->user.st, wall_clock_time); if(parser->user.v2.stream_buffer.v2 && parser->user.v2.stream_buffer.wb) { - // check if receiver and sender have the same number parsing capabilities + // check receiver capabilities bool can_copy = stream_has_capability(&parser->user, STREAM_CAP_IEEE754) == stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754); - NUMBER_ENCODING encoding = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX; + + // check sender capabilities + bool with_slots = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_SLOTS) ? true : false; + NUMBER_ENCODING integer_encoding = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX; BUFFER *wb = parser->user.v2.stream_buffer.wb; @@ -1770,28 +1976,35 @@ static inline PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, PARSER if(unlikely(parser->user.v2.stream_buffer.begin_v2_added)) buffer_fast_strcat(wb, PLUGINSD_KEYWORD_END_V2 "\n", sizeof(PLUGINSD_KEYWORD_END_V2) - 1 + 1); - buffer_fast_strcat(wb, PLUGINSD_KEYWORD_BEGIN_V2 " '", sizeof(PLUGINSD_KEYWORD_BEGIN_V2) - 1 + 2); + buffer_fast_strcat(wb, PLUGINSD_KEYWORD_BEGIN_V2, sizeof(PLUGINSD_KEYWORD_BEGIN_V2) - 1); + + if(with_slots) { + buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2); + buffer_print_uint64_encoded(wb, integer_encoding, st->rrdpush.sender.chart_slot); + } + + buffer_fast_strcat(wb, " '", 2); buffer_fast_strcat(wb, rrdset_id(st), string_strlen(st->id)); buffer_fast_strcat(wb, "' ", 2); if(can_copy) buffer_strcat(wb, update_every_str); else - buffer_print_uint64_encoded(wb, encoding, update_every); + buffer_print_uint64_encoded(wb, integer_encoding, update_every); buffer_fast_strcat(wb, " ", 1); if(can_copy) buffer_strcat(wb, end_time_str); else - buffer_print_uint64_encoded(wb, encoding, end_time); + buffer_print_uint64_encoded(wb, integer_encoding, end_time); buffer_fast_strcat(wb, " ", 1); if(can_copy) buffer_strcat(wb, wall_clock_time_str); else - buffer_print_uint64_encoded(wb, encoding, wall_clock_time); + buffer_print_uint64_encoded(wb, integer_encoding, wall_clock_time); buffer_fast_strcat(wb, "\n", 1); @@ -1824,10 +2037,14 @@ static inline PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, PARSER static inline PARSER_RC pluginsd_set_v2(char **words, size_t num_words, PARSER *parser) { timing_init(); - char *dimension = get_word(words, num_words, 1); - char *collected_str = get_word(words, num_words, 2); - char *value_str = get_word(words, num_words, 3); - char *flags_str = get_word(words, num_words, 4); + int idx = 1; + ssize_t slot = pluginsd_parse_rrd_slot(words, num_words); + if(slot >= 0) idx++; + + char *dimension = get_word(words, num_words, idx++); + char *collected_str = get_word(words, num_words, idx++); + char *value_str = get_word(words, num_words, idx++); + char *flags_str = get_word(words, num_words, idx++); if(unlikely(!dimension || !collected_str || !value_str || !flags_str)) return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_SET_V2, "missing parameters"); @@ -1840,13 +2057,13 @@ static inline PARSER_RC pluginsd_set_v2(char **words, size_t num_words, PARSER * timing_step(TIMING_STEP_SET2_PREPARE); - RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_SET_V2); + RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, slot, PLUGINSD_KEYWORD_SET_V2); if(unlikely(!rd)) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); st->pluginsd.set = true; if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE | RRDDIM_FLAG_ARCHIVED))) - rrddim_isnot_obsolete(st, rd); + rrddim_isnot_obsolete___safe_from_collector_thread(st, rd); timing_step(TIMING_STEP_SET2_LOOKUP_DIMENSION); @@ -1892,12 +2109,22 @@ static inline PARSER_RC pluginsd_set_v2(char **words, size_t num_words, PARSER * if(parser->user.v2.stream_buffer.v2 && parser->user.v2.stream_buffer.begin_v2_added && parser->user.v2.stream_buffer.wb) { // check if receiver and sender have the same number parsing capabilities bool can_copy = stream_has_capability(&parser->user, STREAM_CAP_IEEE754) == stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754); + + // check the sender capabilities + bool with_slots = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_SLOTS) ? true : false; NUMBER_ENCODING integer_encoding = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX; NUMBER_ENCODING doubles_encoding = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_DECIMAL; BUFFER *wb = parser->user.v2.stream_buffer.wb; buffer_need_bytes(wb, 1024); - buffer_fast_strcat(wb, PLUGINSD_KEYWORD_SET_V2 " '", sizeof(PLUGINSD_KEYWORD_SET_V2) - 1 + 2); + buffer_fast_strcat(wb, PLUGINSD_KEYWORD_SET_V2, sizeof(PLUGINSD_KEYWORD_SET_V2) - 1); + + if(with_slots) { + buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2); + buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdpush.sender.dim_slot); + } + + buffer_fast_strcat(wb, " '", 2); buffer_fast_strcat(wb, rrddim_id(rd), string_strlen(rd->id)); buffer_fast_strcat(wb, "' ", 2); if(can_copy) @@ -1978,13 +2205,27 @@ static inline PARSER_RC pluginsd_end_v2(char **words __maybe_unused, size_t num_ // ------------------------------------------------------------------------ // cleanup RRDSET / RRDDIM - RRDDIM *rd; - rrddim_foreach_read(rd, st) { - rd->collector.calculated_value = 0; - rd->collector.collected_value = 0; - rrddim_clear_updated(rd); + if(likely(st->pluginsd.dims_with_slots)) { + for(size_t i = 0; i < st->pluginsd.size ;i++) { + RRDDIM *rd = st->pluginsd.prd_array[i].rd; + + if(!rd) + continue; + + rd->collector.calculated_value = 0; + rd->collector.collected_value = 0; + rrddim_clear_updated(rd); + } + } + else { + RRDDIM *rd; + rrddim_foreach_read(rd, st){ + rd->collector.calculated_value = 0; + rd->collector.collected_value = 0; + rrddim_clear_updated(rd); + } + rrddim_foreach_done(rd); } - rrddim_foreach_done(rd); // ------------------------------------------------------------------------ // reset state @@ -2438,24 +2679,35 @@ static inline PARSER_RC pluginsd_register_module(char **words __maybe_unused, si } static inline PARSER_RC pluginsd_register_job_common(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser __maybe_unused, const char *plugin_name) { - if (atol(words[3]) < 0) - return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_JOB, "invalid flags"); - dyncfg_job_flg_t flags = atol(words[3]); + const char *module_name = words[0]; + const char *job_name = words[1]; + const char *job_type_str = words[2]; + const char *flags_str = words[3]; + + long f = str2l(flags_str); + + if (f < 0) + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_JOB, "invalid flags received"); + + dyncfg_job_flg_t flags = f; + if (SERVING_PLUGINSD(parser)) flags |= JOB_FLG_PLUGIN_PUSHED; else flags |= JOB_FLG_STREAMING_PUSHED; - enum job_type job_type = str2job_type(words[2]); + enum job_type job_type = dyncfg_str2job_type(job_type_str); if (job_type == JOB_TYPE_UNKNOWN) return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_JOB, "unknown job type"); + if (SERVING_PLUGINSD(parser) && job_type == JOB_TYPE_USER) return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_JOB, "plugins cannot push jobs of type \"user\" (this is allowed only in streaming)"); - if (register_job(parser->user.host->configurable_plugins, plugin_name, words[0], words[1], job_type, flags, 0)) // ignore existing is off as this is explicitly called register job + if (register_job(parser->user.host->configurable_plugins, plugin_name, module_name, job_name, job_type, flags, 0)) // ignore existing is off as this is explicitly called register job return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_JOB, "error registering job"); - rrdpush_send_dyncfg_reg_job(parser->user.host, plugin_name, words[0], words[1], job_type, flags); + rrdpush_send_dyncfg_reg_job(parser->user.host, plugin_name, module_name, job_name, job_type, flags); + return PARSER_RC_OK; } @@ -2474,6 +2726,25 @@ static inline PARSER_RC pluginsd_register_job(char **words __maybe_unused, size_ return pluginsd_register_job_common(&words[2], num_words - 2, parser, words[1]); } +static inline PARSER_RC pluginsd_dyncfg_reset(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser __maybe_unused) { + if (unlikely(num_words != (SERVING_PLUGINSD(parser) ? 1 : 2))) + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_RESET, SERVING_PLUGINSD(parser) ? "expected 0 parameters" : "expected 1 parameter: plugin_name"); + + if (SERVING_PLUGINSD(parser)) { + unregister_plugin(parser->user.host->configurable_plugins, parser->user.cd->cfg_dict_item); + rrdpush_send_dyncfg_reset(parser->user.host, parser->user.cd->configuration->name); + parser->user.cd->configuration = NULL; + } else { + const DICTIONARY_ITEM *di = dictionary_get_and_acquire_item(parser->user.host->configurable_plugins, words[1]); + if (unlikely(di == NULL)) + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_RESET, "plugin not found"); + unregister_plugin(parser->user.host->configurable_plugins, di); + rrdpush_send_dyncfg_reset(parser->user.host, words[1]); + } + + return PARSER_RC_OK; +} + static inline PARSER_RC pluginsd_job_status_common(char **words, size_t num_words, PARSER *parser, const char *plugin_name) { int state = str2i(words[3]); @@ -2482,7 +2753,7 @@ static inline PARSER_RC pluginsd_job_status_common(char **words, size_t num_word return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_REPORT_JOB_STATUS, "unknown job status"); char *message = NULL; - if (num_words == 5) + if (num_words == 5 && strlen(words[4]) > 0) message = words[4]; const DICTIONARY_ITEM *plugin_item; @@ -2584,70 +2855,49 @@ static inline PARSER_RC streaming_claimed_id(char **words, size_t num_words, PAR // ---------------------------------------------------------------------------- -static inline bool buffered_reader_read(struct buffered_reader *reader, int fd) { -#ifdef NETDATA_INTERNAL_CHECKS - if(reader->read_buffer[reader->read_len] != '\0') - fatal("%s(): read_buffer does not start with zero", __FUNCTION__ ); -#endif - - ssize_t bytes_read = read(fd, reader->read_buffer + reader->read_len, sizeof(reader->read_buffer) - reader->read_len - 1); - if(unlikely(bytes_read <= 0)) - return false; - - reader->read_len += bytes_read; - reader->read_buffer[reader->read_len] = '\0'; - - return true; -} - -static inline bool buffered_reader_read_timeout(struct buffered_reader *reader, int fd, int timeout_ms) { - errno = 0; - struct pollfd fds[1]; +void pluginsd_process_thread_cleanup(void *ptr) { + PARSER *parser = (PARSER *)ptr; - fds[0].fd = fd; - fds[0].events = POLLIN; + pluginsd_cleanup_v2(parser); + pluginsd_host_define_cleanup(parser); - int ret = poll(fds, 1, timeout_ms); + rrd_collector_finished(); - if (ret > 0) { - /* There is data to read */ - if (fds[0].revents & POLLIN) - return buffered_reader_read(reader, fd); +#ifdef NETDATA_LOG_STREAM_RECEIVE + if(parser->user.stream_log_fp) { + fclose(parser->user.stream_log_fp); + parser->user.stream_log_fp = NULL; + } +#endif - else if(fds[0].revents & POLLERR) { - netdata_log_error("PARSER: read failed: POLLERR."); - return false; - } - else if(fds[0].revents & POLLHUP) { - netdata_log_error("PARSER: read failed: POLLHUP."); - return false; - } - else if(fds[0].revents & POLLNVAL) { - netdata_log_error("PARSER: read failed: POLLNVAL."); - return false; - } + parser_destroy(parser); +} - netdata_log_error("PARSER: poll() returned positive number, but POLLIN|POLLERR|POLLHUP|POLLNVAL are not set."); - return false; - } - else if (ret == 0) { - netdata_log_error("PARSER: timeout while waiting for data."); +bool parser_reconstruct_node(BUFFER *wb, void *ptr) { + PARSER *parser = ptr; + if(!parser || !parser->user.host) return false; - } - netdata_log_error("PARSER: poll() failed with code %d.", ret); - return false; + buffer_strcat(wb, rrdhost_hostname(parser->user.host)); + return true; } -void pluginsd_process_thread_cleanup(void *ptr) { - PARSER *parser = (PARSER *)ptr; +bool parser_reconstruct_instance(BUFFER *wb, void *ptr) { + PARSER *parser = ptr; + if(!parser || !parser->user.st) + return false; - pluginsd_cleanup_v2(parser); - pluginsd_host_define_cleanup(parser); + buffer_strcat(wb, rrdset_name(parser->user.st)); + return true; +} - rrd_collector_finished(); +bool parser_reconstruct_context(BUFFER *wb, void *ptr) { + PARSER *parser = ptr; + if(!parser || !parser->user.st) + return false; - parser_destroy(parser); + buffer_strcat(wb, string2str(parser->user.st->context)); + return true; } inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp_plugin_input, FILE *fp_plugin_output, int trust_durations) @@ -2697,33 +2947,51 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp_plugi // so, parser needs to be allocated before pushing it netdata_thread_cleanup_push(pluginsd_process_thread_cleanup, parser); - buffered_reader_init(&parser->reader); - BUFFER *buffer = buffer_create(sizeof(parser->reader.read_buffer) + 2, NULL); - while(likely(service_running(SERVICE_COLLECTORS))) { - if (unlikely(!buffered_reader_next_line(&parser->reader, buffer))) { - if(unlikely(!buffered_reader_read_timeout(&parser->reader, fileno((FILE *)parser->fp_input), 2 * 60 * MSEC_PER_SEC))) - break; - - continue; - } - - if(unlikely(parser_action(parser, buffer->buffer))) - break; - - buffer->len = 0; - buffer->buffer[0] = '\0'; - } - buffer_free(buffer); - - cd->unsafe.enabled = parser->user.enabled; - count = parser->user.data_collections_count; - - if (likely(count)) { - cd->successful_collections += count; - cd->serial_failures = 0; - } - else - cd->serial_failures++; + { + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_CB(NDF_REQUEST, line_splitter_reconstruct_line, &parser->line), + ND_LOG_FIELD_CB(NDF_NIDL_NODE, parser_reconstruct_node, parser), + ND_LOG_FIELD_CB(NDF_NIDL_INSTANCE, parser_reconstruct_instance, parser), + ND_LOG_FIELD_CB(NDF_NIDL_CONTEXT, parser_reconstruct_context, parser), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + buffered_reader_init(&parser->reader); + BUFFER *buffer = buffer_create(sizeof(parser->reader.read_buffer) + 2, NULL); + while(likely(service_running(SERVICE_COLLECTORS))) { + + if(unlikely(!buffered_reader_next_line(&parser->reader, buffer))) { + buffered_reader_ret_t ret = buffered_reader_read_timeout( + &parser->reader, + fileno((FILE *) parser->fp_input), + 2 * 60 * MSEC_PER_SEC, true + ); + + if(unlikely(ret != BUFFERED_READER_READ_OK)) + break; + + continue; + } + + if(unlikely(parser_action(parser, buffer->buffer))) + break; + + buffer->len = 0; + buffer->buffer[0] = '\0'; + } + buffer_free(buffer); + + cd->unsafe.enabled = parser->user.enabled; + count = parser->user.data_collections_count; + + if(likely(count)) { + cd->successful_collections += count; + cd->serial_failures = 0; + } + else + cd->serial_failures++; + } // free parser with the pop function netdata_thread_cleanup_pop(1); @@ -2855,6 +3123,9 @@ PARSER_RC parser_execute(PARSER *parser, PARSER_KEYWORD *keyword, char **words, case 103: return pluginsd_register_job(words, num_words, parser); + case 104: + return pluginsd_dyncfg_reset(words, num_words, parser); + case 110: return pluginsd_job_status(words, num_words, parser); diff --git a/collectors/plugins.d/pluginsd_parser.h b/collectors/plugins.d/pluginsd_parser.h index 74767569b..1fce9a89a 100644 --- a/collectors/plugins.d/pluginsd_parser.h +++ b/collectors/plugins.d/pluginsd_parser.h @@ -11,8 +11,11 @@ #define WORKER_RECEIVER_JOB_REPLICATION_COMPLETION (WORKER_PARSER_FIRST_JOB - 3) // this controls the max response size of a function -#define PLUGINSD_MAX_DEFERRED_SIZE (20 * 1024 * 1024) +#define PLUGINSD_MAX_DEFERRED_SIZE (100 * 1024 * 1024) +#define PLUGINSD_MIN_RRDSET_POINTERS_CACHE 1024 + +#define HOST_LABEL_IS_EPHEMERAL "_is_ephemeral" // PARSER return codes typedef enum __attribute__ ((__packed__)) parser_rc { PARSER_RC_OK, // Callback was successful, go on @@ -28,6 +31,7 @@ typedef enum __attribute__ ((__packed__)) parser_input_type { typedef enum __attribute__ ((__packed__)) { PARSER_INIT_PLUGINSD = (1 << 1), PARSER_INIT_STREAMING = (1 << 2), + PARSER_REP_METADATA = (1 << 3), } PARSER_REPERTOIRE; struct parser; @@ -41,6 +45,7 @@ typedef struct parser_keyword { } PARSER_KEYWORD; typedef struct parser_user_object { + bool cleanup_slots; RRDSET *st; RRDHOST *host; void *opaque; @@ -51,6 +56,11 @@ typedef struct parser_user_object { size_t data_collections_count; int enabled; +#ifdef NETDATA_LOG_STREAM_RECEIVE + FILE *stream_log_fp; + PARSER_REPERTOIRE stream_log_repertoire; +#endif + STREAM_CAPABILITIES capabilities; // receiver capabilities struct { @@ -88,17 +98,21 @@ typedef struct parser { PARSER_REPERTOIRE repertoire; uint32_t flags; int fd; // Socket - size_t line; FILE *fp_input; // Input source e.g. stream FILE *fp_output; // Stream to send commands to plugin #ifdef ENABLE_HTTPS NETDATA_SSL *ssl_output; #endif +#ifdef ENABLE_H2O + void *h2o_ctx; // if set we use h2o_stream functions to send data +#endif PARSER_USER_OBJECT user; // User defined structure to hold extra state between calls struct buffered_reader reader; + struct line_splitter line; + PARSER_KEYWORD *keyword; struct { const char *end_keyword; @@ -150,8 +164,17 @@ static inline PARSER_KEYWORD *parser_find_keyword(PARSER *parser, const char *co return NULL; } +bool parser_reconstruct_node(BUFFER *wb, void *ptr); +bool parser_reconstruct_instance(BUFFER *wb, void *ptr); +bool parser_reconstruct_context(BUFFER *wb, void *ptr); + static inline int parser_action(PARSER *parser, char *input) { - parser->line++; +#ifdef NETDATA_LOG_STREAM_RECEIVE + static __thread char line[PLUGINSD_LINE_MAX + 1]; + strncpyz(line, input, sizeof(line) - 1); +#endif + + parser->line.count++; if(unlikely(parser->flags & PARSER_DEFER_UNTIL_KEYWORD)) { char command[100 + 1]; @@ -183,18 +206,25 @@ static inline int parser_action(PARSER *parser, char *input) { return 0; } - static __thread char *words[PLUGINSD_MAX_WORDS]; - size_t num_words = quoted_strings_splitter_pluginsd(input, words, PLUGINSD_MAX_WORDS); - const char *command = get_word(words, num_words, 0); + parser->line.num_words = quoted_strings_splitter_pluginsd(input, parser->line.words, PLUGINSD_MAX_WORDS); + const char *command = get_word(parser->line.words, parser->line.num_words, 0); - if(unlikely(!command)) + if(unlikely(!command)) { + line_splitter_reset(&parser->line); return 0; + } PARSER_RC rc; - PARSER_KEYWORD *t = parser_find_keyword(parser, command); - if(likely(t)) { - worker_is_busy(t->worker_job_id); - rc = parser_execute(parser, t, words, num_words); + parser->keyword = parser_find_keyword(parser, command); + if(likely(parser->keyword)) { + worker_is_busy(parser->keyword->worker_job_id); + +#ifdef NETDATA_LOG_STREAM_RECEIVE + if(parser->user.stream_log_fp && parser->keyword->repertoire & parser->user.stream_log_repertoire) + fprintf(parser->user.stream_log_fp, "%s", line); +#endif + + rc = parser_execute(parser, parser->keyword, parser->line.words, parser->line.num_words); // rc = (*t->func)(words, num_words, parser); worker_is_idle(); } @@ -202,22 +232,13 @@ static inline int parser_action(PARSER *parser, char *input) { rc = PARSER_RC_ERROR; if(rc == PARSER_RC_ERROR) { - BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX, NULL); - for(size_t i = 0; i < num_words ;i++) { - if(i) buffer_fast_strcat(wb, " ", 1); - - buffer_fast_strcat(wb, "\"", 1); - const char *s = get_word(words, num_words, i); - buffer_strcat(wb, s?s:""); - buffer_fast_strcat(wb, "\"", 1); - } - + CLEAN_BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX, NULL); + line_splitter_reconstruct_line(wb, &parser->line); netdata_log_error("PLUGINSD: parser_action('%s') failed on line %zu: { %s } (quotes added to show parsing)", - command, parser->line, buffer_tostring(wb)); - - buffer_free(wb); + command, parser->line.count, buffer_tostring(wb)); } + line_splitter_reset(&parser->line); return (rc == PARSER_RC_ERROR || rc == PARSER_RC_STOP); } diff --git a/collectors/proc.plugin/integrations/amd_gpu.md b/collectors/proc.plugin/integrations/amd_gpu.md index c9964dbb7..e85cce221 100644 --- a/collectors/proc.plugin/integrations/amd_gpu.md +++ b/collectors/proc.plugin/integrations/amd_gpu.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "AMD GPU" learn_status: "Published" learn_rel_path: "Data Collection/Hardware Devices and Sensors" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/btrfs.md b/collectors/proc.plugin/integrations/btrfs.md index 7c0764cf0..5f994c841 100644 --- a/collectors/proc.plugin/integrations/btrfs.md +++ b/collectors/proc.plugin/integrations/btrfs.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "BTRFS" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Filesystem/BTRFS" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/conntrack.md b/collectors/proc.plugin/integrations/conntrack.md index 543aafc16..b38f6b508 100644 --- a/collectors/proc.plugin/integrations/conntrack.md +++ b/collectors/proc.plugin/integrations/conntrack.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Conntrack" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Firewall" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/disk_statistics.md b/collectors/proc.plugin/integrations/disk_statistics.md index fc2ce5b08..8f7448c39 100644 --- a/collectors/proc.plugin/integrations/disk_statistics.md +++ b/collectors/proc.plugin/integrations/disk_statistics.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Disk Statistics" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Disk" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/entropy.md b/collectors/proc.plugin/integrations/entropy.md index debf2e75e..8432a1f96 100644 --- a/collectors/proc.plugin/integrations/entropy.md +++ b/collectors/proc.plugin/integrations/entropy.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Entropy" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/System" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/infiniband.md b/collectors/proc.plugin/integrations/infiniband.md index 6ebefe73e..6cb5fdc53 100644 --- a/collectors/proc.plugin/integrations/infiniband.md +++ b/collectors/proc.plugin/integrations/infiniband.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "InfiniBand" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Network" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/inter_process_communication.md b/collectors/proc.plugin/integrations/inter_process_communication.md index b36b02d3b..55708a4b0 100644 --- a/collectors/proc.plugin/integrations/inter_process_communication.md +++ b/collectors/proc.plugin/integrations/inter_process_communication.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Inter Process Communication" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/IPC" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/interrupts.md b/collectors/proc.plugin/integrations/interrupts.md index 756324163..1b85fb767 100644 --- a/collectors/proc.plugin/integrations/interrupts.md +++ b/collectors/proc.plugin/integrations/interrupts.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Interrupts" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/CPU" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/ip_virtual_server.md b/collectors/proc.plugin/integrations/ip_virtual_server.md index 22f43544e..5c7afd2eb 100644 --- a/collectors/proc.plugin/integrations/ip_virtual_server.md +++ b/collectors/proc.plugin/integrations/ip_virtual_server.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "IP Virtual Server" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Network" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/ipv6_socket_statistics.md b/collectors/proc.plugin/integrations/ipv6_socket_statistics.md index bf0fbaa00..2c1ee2721 100644 --- a/collectors/proc.plugin/integrations/ipv6_socket_statistics.md +++ b/collectors/proc.plugin/integrations/ipv6_socket_statistics.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "IPv6 Socket Statistics" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Network" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/kernel_same-page_merging.md b/collectors/proc.plugin/integrations/kernel_same-page_merging.md index bed7891bd..336f0feaf 100644 --- a/collectors/proc.plugin/integrations/kernel_same-page_merging.md +++ b/collectors/proc.plugin/integrations/kernel_same-page_merging.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Kernel Same-Page Merging" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Memory" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/md_raid.md b/collectors/proc.plugin/integrations/md_raid.md index ef78b8269..34a4840bb 100644 --- a/collectors/proc.plugin/integrations/md_raid.md +++ b/collectors/proc.plugin/integrations/md_raid.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "MD RAID" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Disk" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/memory_modules_dimms.md b/collectors/proc.plugin/integrations/memory_modules_dimms.md index dc59fe5fc..351c6fcd7 100644 --- a/collectors/proc.plugin/integrations/memory_modules_dimms.md +++ b/collectors/proc.plugin/integrations/memory_modules_dimms.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Memory modules (DIMMs)" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Memory" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/memory_statistics.md b/collectors/proc.plugin/integrations/memory_statistics.md index 712b4b5e8..52f1bf530 100644 --- a/collectors/proc.plugin/integrations/memory_statistics.md +++ b/collectors/proc.plugin/integrations/memory_statistics.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Memory Statistics" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Memory" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/memory_usage.md b/collectors/proc.plugin/integrations/memory_usage.md index 0eef72b12..141bd29ad 100644 --- a/collectors/proc.plugin/integrations/memory_usage.md +++ b/collectors/proc.plugin/integrations/memory_usage.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Memory Usage" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Memory" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/network_interfaces.md b/collectors/proc.plugin/integrations/network_interfaces.md index 0d26b5b66..0cfd56fae 100644 --- a/collectors/proc.plugin/integrations/network_interfaces.md +++ b/collectors/proc.plugin/integrations/network_interfaces.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Network interfaces" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Network" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/network_statistics.md b/collectors/proc.plugin/integrations/network_statistics.md index f43da8339..726fd9d61 100644 --- a/collectors/proc.plugin/integrations/network_statistics.md +++ b/collectors/proc.plugin/integrations/network_statistics.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Network statistics" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Network" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/nfs_client.md b/collectors/proc.plugin/integrations/nfs_client.md index 696e0c0d6..db5847714 100644 --- a/collectors/proc.plugin/integrations/nfs_client.md +++ b/collectors/proc.plugin/integrations/nfs_client.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "NFS Client" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Filesystem/NFS" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/nfs_server.md b/collectors/proc.plugin/integrations/nfs_server.md index ddbf03f90..0c906b4d8 100644 --- a/collectors/proc.plugin/integrations/nfs_server.md +++ b/collectors/proc.plugin/integrations/nfs_server.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "NFS Server" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Filesystem/NFS" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/non-uniform_memory_access.md b/collectors/proc.plugin/integrations/non-uniform_memory_access.md index 58b96a3e7..6f495fb79 100644 --- a/collectors/proc.plugin/integrations/non-uniform_memory_access.md +++ b/collectors/proc.plugin/integrations/non-uniform_memory_access.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Non-Uniform Memory Access" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Memory" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/page_types.md b/collectors/proc.plugin/integrations/page_types.md index 7f84182de..b228629b6 100644 --- a/collectors/proc.plugin/integrations/page_types.md +++ b/collectors/proc.plugin/integrations/page_types.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Page types" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Memory" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/power_supply.md b/collectors/proc.plugin/integrations/power_supply.md index 4980f845b..9a474e82a 100644 --- a/collectors/proc.plugin/integrations/power_supply.md +++ b/collectors/proc.plugin/integrations/power_supply.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Power Supply" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Power Supply" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/pressure_stall_information.md b/collectors/proc.plugin/integrations/pressure_stall_information.md index e590a8d38..53f4aa050 100644 --- a/collectors/proc.plugin/integrations/pressure_stall_information.md +++ b/collectors/proc.plugin/integrations/pressure_stall_information.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Pressure Stall Information" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Pressure" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/sctp_statistics.md b/collectors/proc.plugin/integrations/sctp_statistics.md index ad9c26bf5..15c0d424d 100644 --- a/collectors/proc.plugin/integrations/sctp_statistics.md +++ b/collectors/proc.plugin/integrations/sctp_statistics.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "SCTP Statistics" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Network" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/socket_statistics.md b/collectors/proc.plugin/integrations/socket_statistics.md index 2c59f9883..d8ef26647 100644 --- a/collectors/proc.plugin/integrations/socket_statistics.md +++ b/collectors/proc.plugin/integrations/socket_statistics.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Socket statistics" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Network" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/softirq_statistics.md b/collectors/proc.plugin/integrations/softirq_statistics.md index 56cf9ab5c..f966cf971 100644 --- a/collectors/proc.plugin/integrations/softirq_statistics.md +++ b/collectors/proc.plugin/integrations/softirq_statistics.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "SoftIRQ statistics" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/CPU" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/softnet_statistics.md b/collectors/proc.plugin/integrations/softnet_statistics.md index 84ac5ac88..58e6cf6e5 100644 --- a/collectors/proc.plugin/integrations/softnet_statistics.md +++ b/collectors/proc.plugin/integrations/softnet_statistics.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Softnet Statistics" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Network" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/synproxy.md b/collectors/proc.plugin/integrations/synproxy.md index 04169773b..2db17ef6f 100644 --- a/collectors/proc.plugin/integrations/synproxy.md +++ b/collectors/proc.plugin/integrations/synproxy.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Synproxy" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Firewall" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/system_load_average.md b/collectors/proc.plugin/integrations/system_load_average.md index caff72737..6e986d90c 100644 --- a/collectors/proc.plugin/integrations/system_load_average.md +++ b/collectors/proc.plugin/integrations/system_load_average.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "System Load Average" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/System" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/system_statistics.md b/collectors/proc.plugin/integrations/system_statistics.md index 2932dd8d2..f3df1a19a 100644 --- a/collectors/proc.plugin/integrations/system_statistics.md +++ b/collectors/proc.plugin/integrations/system_statistics.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "System statistics" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/System" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/system_uptime.md b/collectors/proc.plugin/integrations/system_uptime.md index 7eedd4313..0954c0642 100644 --- a/collectors/proc.plugin/integrations/system_uptime.md +++ b/collectors/proc.plugin/integrations/system_uptime.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "System Uptime" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/System" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/wireless_network_interfaces.md b/collectors/proc.plugin/integrations/wireless_network_interfaces.md index 57375b975..a8d2406ee 100644 --- a/collectors/proc.plugin/integrations/wireless_network_interfaces.md +++ b/collectors/proc.plugin/integrations/wireless_network_interfaces.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "Wireless network interfaces" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Network" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/zfs_adaptive_replacement_cache.md b/collectors/proc.plugin/integrations/zfs_adaptive_replacement_cache.md index d62d12ee6..c200ba673 100644 --- a/collectors/proc.plugin/integrations/zfs_adaptive_replacement_cache.md +++ b/collectors/proc.plugin/integrations/zfs_adaptive_replacement_cache.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "ZFS Adaptive Replacement Cache" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Filesystem/ZFS" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/zfs_pools.md b/collectors/proc.plugin/integrations/zfs_pools.md index b913572e3..2985d39b0 100644 --- a/collectors/proc.plugin/integrations/zfs_pools.md +++ b/collectors/proc.plugin/integrations/zfs_pools.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "ZFS Pools" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Filesystem/ZFS" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/integrations/zram.md b/collectors/proc.plugin/integrations/zram.md index 0bcda3eaf..111b17c62 100644 --- a/collectors/proc.plugin/integrations/zram.md +++ b/collectors/proc.plugin/integrations/zram.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/proc.plugi sidebar_label: "ZRAM" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Memory" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> diff --git a/collectors/proc.plugin/ipc.c b/collectors/proc.plugin/ipc.c index b166deba6..204977bdf 100644 --- a/collectors/proc.plugin/ipc.c +++ b/collectors/proc.plugin/ipc.c @@ -451,8 +451,8 @@ int do_ipc(int update_every, usec_t dt) { msq->found = 0; } else { - rrddim_is_obsolete(st_msq_messages, msq->rd_messages); - rrddim_is_obsolete(st_msq_bytes, msq->rd_bytes); + rrddim_is_obsolete___safe_from_collector_thread(st_msq_messages, msq->rd_messages); + rrddim_is_obsolete___safe_from_collector_thread(st_msq_bytes, msq->rd_bytes); // remove message queue from the linked list if(!msq_prev) @@ -480,19 +480,19 @@ int do_ipc(int update_every, usec_t dt) { if(unlikely(dimensions_num > dimensions_limit)) { collector_info("Message queue statistics has been disabled"); collector_info("There are %lld dimensions in memory but limit was set to %lld", dimensions_num, dimensions_limit); - rrdset_is_obsolete(st_msq_messages); - rrdset_is_obsolete(st_msq_bytes); + rrdset_is_obsolete___safe_from_collector_thread(st_msq_messages); + rrdset_is_obsolete___safe_from_collector_thread(st_msq_bytes); st_msq_messages = NULL; st_msq_bytes = NULL; do_msg = CONFIG_BOOLEAN_NO; } else if(unlikely(!message_queue_root)) { collector_info("Making chart %s (%s) obsolete since it does not have any dimensions", rrdset_name(st_msq_messages), rrdset_id(st_msq_messages)); - rrdset_is_obsolete(st_msq_messages); + rrdset_is_obsolete___safe_from_collector_thread(st_msq_messages); st_msq_messages = NULL; collector_info("Making chart %s (%s) obsolete since it does not have any dimensions", rrdset_name(st_msq_bytes), rrdset_id(st_msq_bytes)); - rrdset_is_obsolete(st_msq_bytes); + rrdset_is_obsolete___safe_from_collector_thread(st_msq_bytes); st_msq_bytes = NULL; } } diff --git a/collectors/proc.plugin/plugin_proc.c b/collectors/proc.plugin/plugin_proc.c index fbcaa614a..3f11aaf6c 100644 --- a/collectors/proc.plugin/plugin_proc.c +++ b/collectors/proc.plugin/plugin_proc.c @@ -138,10 +138,18 @@ static bool is_lxcfs_proc_mounted() { return false; } +static bool log_proc_module(BUFFER *wb, void *data) { + struct proc_module *pm = data; + buffer_sprintf(wb, "proc.plugin[%s]", pm->name); + return true; +} + void *proc_main(void *ptr) { worker_register("PROC"); + rrd_collector_started(); + if (config_get_boolean("plugin:proc", "/proc/net/dev", CONFIG_BOOLEAN_YES)) { netdev_thread = mallocz(sizeof(netdata_thread_t)); netdata_log_debug(D_SYSTEM, "Starting thread %s.", THREAD_NETDEV_NAME); @@ -151,46 +159,56 @@ void *proc_main(void *ptr) netdata_thread_cleanup_push(proc_main_cleanup, ptr); - config_get_boolean("plugin:proc", "/proc/pagetypeinfo", CONFIG_BOOLEAN_NO); + { + config_get_boolean("plugin:proc", "/proc/pagetypeinfo", CONFIG_BOOLEAN_NO); - // check the enabled status for each module - int i; - for (i = 0; proc_modules[i].name; i++) { - struct proc_module *pm = &proc_modules[i]; + // check the enabled status for each module + int i; + for(i = 0; proc_modules[i].name; i++) { + struct proc_module *pm = &proc_modules[i]; - pm->enabled = config_get_boolean("plugin:proc", pm->name, CONFIG_BOOLEAN_YES); - pm->rd = NULL; + pm->enabled = config_get_boolean("plugin:proc", pm->name, CONFIG_BOOLEAN_YES); + pm->rd = NULL; - worker_register_job_name(i, proc_modules[i].dim); - } + worker_register_job_name(i, proc_modules[i].dim); + } - usec_t step = localhost->rrd_update_every * USEC_PER_SEC; - heartbeat_t hb; - heartbeat_init(&hb); + usec_t step = localhost->rrd_update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); - inside_lxc_container = is_lxcfs_proc_mounted(); + inside_lxc_container = is_lxcfs_proc_mounted(); - while (service_running(SERVICE_COLLECTORS)) { - worker_is_idle(); - usec_t hb_dt = heartbeat_next(&hb, step); +#define LGS_MODULE_ID 0 - if (unlikely(!service_running(SERVICE_COLLECTORS))) - break; + ND_LOG_STACK lgs[] = { + [LGS_MODULE_ID] = ND_LOG_FIELD_TXT(NDF_MODULE, "proc.plugin"), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); - for (i = 0; proc_modules[i].name; i++) { - if (unlikely(!service_running(SERVICE_COLLECTORS))) - break; + while(service_running(SERVICE_COLLECTORS)) { + worker_is_idle(); + usec_t hb_dt = heartbeat_next(&hb, step); - struct proc_module *pm = &proc_modules[i]; - if (unlikely(!pm->enabled)) - continue; + if(unlikely(!service_running(SERVICE_COLLECTORS))) + break; - netdata_log_debug(D_PROCNETDEV_LOOP, "PROC calling %s.", pm->name); + for(i = 0; proc_modules[i].name; i++) { + if(unlikely(!service_running(SERVICE_COLLECTORS))) + break; - worker_is_busy(i); - pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt); - } - } + struct proc_module *pm = &proc_modules[i]; + if(unlikely(!pm->enabled)) + continue; + + worker_is_busy(i); + lgs[LGS_MODULE_ID] = ND_LOG_FIELD_CB(NDF_MODULE, log_proc_module, pm); + pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt); + lgs[LGS_MODULE_ID] = ND_LOG_FIELD_TXT(NDF_MODULE, "proc.plugin"); + } + } + } netdata_thread_cleanup_pop(1); return NULL; diff --git a/collectors/proc.plugin/plugin_proc.h b/collectors/proc.plugin/plugin_proc.h index a0ddd76c4..e4fc105ba 100644 --- a/collectors/proc.plugin/plugin_proc.h +++ b/collectors/proc.plugin/plugin_proc.h @@ -59,7 +59,8 @@ void netdev_rename_device_add( const char *container_device, const char *container_name, RRDLABELS *labels, - const char *ctx_prefix); + const char *ctx_prefix, + const DICTIONARY_ITEM *cgroup_netdev_link); void netdev_rename_device_del(const char *host_device); diff --git a/collectors/proc.plugin/proc_diskstats.c b/collectors/proc.plugin/proc_diskstats.c index e65c42212..475d90835 100644 --- a/collectors/proc.plugin/proc_diskstats.c +++ b/collectors/proc.plugin/proc_diskstats.c @@ -6,6 +6,8 @@ #define PLUGIN_PROC_MODULE_DISKSTATS_NAME "/proc/diskstats" #define CONFIG_SECTION_PLUGIN_PROC_DISKSTATS "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_DISKSTATS_NAME +#define RRDFUNCTIONS_DISKSTATS_HELP "View block device statistics" + #define DISK_TYPE_UNKNOWN 0 #define DISK_TYPE_PHYSICAL 1 #define DISK_TYPE_PARTITION 2 @@ -14,6 +16,8 @@ #define DEFAULT_PREFERRED_IDS "*" #define DEFAULT_EXCLUDED_DISKS "loop* ram*" +static netdata_mutex_t diskstats_dev_mutex = NETDATA_MUTEX_INITIALIZER; + static struct disk { char *disk; // the name of the disk (sda, sdb, etc, after being looked up) char *device; // the device of the disk (before being looked up) @@ -28,6 +32,9 @@ static struct disk { int sector_size; int type; + bool excluded; + bool function_ready; + char *mount_point; char *chart_id; @@ -168,7 +175,7 @@ static struct disk { struct disk *next; } *disk_root = NULL; -#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); (st) = NULL; } } while(st) +#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete___safe_from_collector_thread(st); (st) = NULL; } } while(st) // static char *path_to_get_hw_sector_size = NULL; // static char *path_to_get_hw_sector_size_partitions = NULL; @@ -359,7 +366,10 @@ static inline int get_disk_name_from_path(const char *path, char *result, size_t DIR *dir = opendir(path); if (!dir) { - collector_error("DEVICE-MAPPER ('%s', %lu:%lu): Cannot open directory '%s'.", disk, major, minor, path); + if (errno == ENOENT) + nd_log_collector(NDLP_DEBUG, "DEVICE-MAPPER ('%s', %lu:%lu): Cannot open directory '%s': no such file or directory.", disk, major, minor, path); + else + collector_error("DEVICE-MAPPER ('%s', %lu:%lu): Cannot open directory '%s'.", disk, major, minor, path); goto failed; } @@ -490,7 +500,7 @@ static inline bool ends_with(const char *str, const char *suffix) { static inline char *get_disk_by_id(char *device) { char pathname[256 + 1]; - snprintfz(pathname, 256, "%s/by-id", path_to_dev_disk); + snprintfz(pathname, sizeof(pathname) - 1, "%s/by-id", path_to_dev_disk); struct dirent *entry; DIR *dp = opendir(pathname); @@ -536,21 +546,25 @@ static inline char *get_disk_model(char *device) { char path[256 + 1]; char buffer[256 + 1]; - snprintfz(path, 256, "%s/%s/device/model", path_to_sys_block, device); + snprintfz(path, sizeof(path) - 1, "%s/%s/device/model", path_to_sys_block, device); if(read_file(path, buffer, 256) != 0) { - snprintfz(path, 256, "%s/%s/device/name", path_to_sys_block, device); + snprintfz(path, sizeof(path) - 1, "%s/%s/device/name", path_to_sys_block, device); if(read_file(path, buffer, 256) != 0) return NULL; } - return strdupz(buffer); + char *clean = trim(buffer); + if (!clean) + return NULL; + + return strdupz(clean); } static inline char *get_disk_serial(char *device) { char path[256 + 1]; char buffer[256 + 1]; - snprintfz(path, 256, "%s/%s/device/serial", path_to_sys_block, device); + snprintfz(path, sizeof(path) - 1, "%s/%s/device/serial", path_to_sys_block, device); if(read_file(path, buffer, 256) != 0) return NULL; @@ -582,13 +596,17 @@ static inline char *get_disk_serial(char *device) { static void get_disk_config(struct disk *d) { int def_enable = global_enable_new_disks_detected_at_runtime; - if(def_enable != CONFIG_BOOLEAN_NO && (simple_pattern_matches(excluded_disks, d->device) || simple_pattern_matches(excluded_disks, d->disk))) + if(def_enable != CONFIG_BOOLEAN_NO && (simple_pattern_matches(excluded_disks, d->device) || simple_pattern_matches(excluded_disks, d->disk))) { + d->excluded = true; def_enable = CONFIG_BOOLEAN_NO; + } char var_name[4096 + 1]; snprintfz(var_name, 4096, CONFIG_SECTION_PLUGIN_PROC_DISKSTATS ":%s", d->disk); - def_enable = config_get_boolean_ondemand(var_name, "enable", def_enable); + if (config_exists(var_name, "enable")) + def_enable = config_get_boolean_ondemand(var_name, "enable", def_enable); + if(unlikely(def_enable == CONFIG_BOOLEAN_NO)) { // the user does not want any metrics for this disk d->do_io = CONFIG_BOOLEAN_NO; @@ -640,7 +658,8 @@ static void get_disk_config(struct disk *d) { // def_performance // check the user configuration (this will also show our 'on demand' decision) - def_performance = config_get_boolean_ondemand(var_name, "enable performance metrics", def_performance); + if (config_exists(var_name, "enable performance metrics")) + def_performance = config_get_boolean_ondemand(var_name, "enable performance metrics", def_performance); int ddo_io = CONFIG_BOOLEAN_NO, ddo_ops = CONFIG_BOOLEAN_NO, @@ -663,21 +682,44 @@ static void get_disk_config(struct disk *d) { ddo_ext = global_do_ext, ddo_backlog = global_do_backlog, ddo_bcache = global_do_bcache; + } else { + d->excluded = true; } - d->do_io = config_get_boolean_ondemand(var_name, "bandwidth", ddo_io); - d->do_ops = config_get_boolean_ondemand(var_name, "operations", ddo_ops); - d->do_mops = config_get_boolean_ondemand(var_name, "merged operations", ddo_mops); - d->do_iotime = config_get_boolean_ondemand(var_name, "i/o time", ddo_iotime); - d->do_qops = config_get_boolean_ondemand(var_name, "queued operations", ddo_qops); - d->do_util = config_get_boolean_ondemand(var_name, "utilization percentage", ddo_util); - d->do_ext = config_get_boolean_ondemand(var_name, "extended operations", ddo_ext); - d->do_backlog = config_get_boolean_ondemand(var_name, "backlog", ddo_backlog); - - if(d->device_is_bcache) - d->do_bcache = config_get_boolean_ondemand(var_name, "bcache", ddo_bcache); - else + d->do_io = ddo_io; + d->do_ops = ddo_ops; + d->do_mops = ddo_mops; + d->do_iotime = ddo_iotime; + d->do_qops = ddo_qops; + d->do_util = ddo_util; + d->do_ext = ddo_ext; + d->do_backlog = ddo_backlog; + + if (config_exists(var_name, "bandwidth")) + d->do_io = config_get_boolean_ondemand(var_name, "bandwidth", ddo_io); + if (config_exists(var_name, "operations")) + d->do_ops = config_get_boolean_ondemand(var_name, "operations", ddo_ops); + if (config_exists(var_name, "merged operations")) + d->do_mops = config_get_boolean_ondemand(var_name, "merged operations", ddo_mops); + if (config_exists(var_name, "i/o time")) + d->do_iotime = config_get_boolean_ondemand(var_name, "i/o time", ddo_iotime); + if (config_exists(var_name, "queued operations")) + d->do_qops = config_get_boolean_ondemand(var_name, "queued operations", ddo_qops); + if (config_exists(var_name, "utilization percentage")) + d->do_util = config_get_boolean_ondemand(var_name, "utilization percentage", ddo_util); + if (config_exists(var_name, "extended operations")) + d->do_ext = config_get_boolean_ondemand(var_name, "extended operations", ddo_ext); + if (config_exists(var_name, "backlog")) + d->do_backlog = config_get_boolean_ondemand(var_name, "backlog", ddo_backlog); + + d->do_bcache = ddo_bcache; + + if (d->device_is_bcache) { + if (config_exists(var_name, "bcache")) + d->do_bcache = config_get_boolean_ondemand(var_name, "bcache", ddo_bcache); + } else { d->do_bcache = 0; + } } } @@ -702,6 +744,8 @@ static struct disk *get_disk(unsigned long major, unsigned long minor, char *dis // create a new disk structure d = (struct disk *)callocz(1, sizeof(struct disk)); + d->excluded = false; + d->function_ready = false; d->disk = get_disk_name(major, minor, disk); d->device = strdupz(disk); d->disk_by_id = get_disk_by_id(disk); @@ -963,35 +1007,399 @@ static struct disk *get_disk(unsigned long major, unsigned long minor, char *dis } get_disk_config(d); + return d; } +static const char *get_disk_type_string(int disk_type) { + switch (disk_type) { + case DISK_TYPE_PHYSICAL: + return "physical"; + case DISK_TYPE_PARTITION: + return "partition"; + case DISK_TYPE_VIRTUAL: + return "virtual"; + default: + return "unknown"; + } +} + static void add_labels_to_disk(struct disk *d, RRDSET *st) { rrdlabels_add(st->rrdlabels, "device", d->disk, RRDLABEL_SRC_AUTO); rrdlabels_add(st->rrdlabels, "mount_point", d->mount_point, RRDLABEL_SRC_AUTO); rrdlabels_add(st->rrdlabels, "id", d->disk_by_id, RRDLABEL_SRC_AUTO); rrdlabels_add(st->rrdlabels, "model", d->model, RRDLABEL_SRC_AUTO); rrdlabels_add(st->rrdlabels, "serial", d->serial, RRDLABEL_SRC_AUTO); -// rrdlabels_add(st->rrdlabels, "rotational", d->rotational ? "true" : "false", RRDLABEL_SRC_AUTO); -// rrdlabels_add(st->rrdlabels, "removable", d->removable ? "true" : "false", RRDLABEL_SRC_AUTO); + rrdlabels_add(st->rrdlabels, "device_type", get_disk_type_string(d->type), RRDLABEL_SRC_AUTO); +} - switch (d->type) { - default: - case DISK_TYPE_UNKNOWN: - rrdlabels_add(st->rrdlabels, "device_type", "unknown", RRDLABEL_SRC_AUTO); - break; +static int diskstats_function_block_devices(BUFFER *wb, int timeout __maybe_unused, const char *function __maybe_unused, + void *collector_data __maybe_unused, + rrd_function_result_callback_t result_cb, void *result_cb_data, + rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data, + rrd_function_register_canceller_cb_t register_canceller_cb __maybe_unused, + void *register_canceller_cb_data __maybe_unused) { + + buffer_flush(wb); + wb->content_type = CT_APPLICATION_JSON; + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); + + buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(localhost)); + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + buffer_json_member_add_string(wb, "type", "table"); + buffer_json_member_add_time_t(wb, "update_every", 1); + buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_DISKSTATS_HELP); + buffer_json_member_add_array(wb, "data"); + + double max_io_reads = 0.0; + double max_io_writes = 0.0; + double max_io = 0.0; + double max_backlog_time = 0.0; + double max_busy_time = 0.0; + double max_busy_perc = 0.0; + double max_iops_reads = 0.0; + double max_iops_writes = 0.0; + double max_iops_time_reads = 0.0; + double max_iops_time_writes = 0.0; + double max_iops_avg_time_read = 0.0; + double max_iops_avg_time_write = 0.0; + double max_iops_avg_size_read = 0.0; + double max_iops_avg_size_write = 0.0; + + netdata_mutex_lock(&diskstats_dev_mutex); + + for (struct disk *d = disk_root; d; d = d->next) { + if (unlikely(!d->function_ready)) + continue; - case DISK_TYPE_PHYSICAL: - rrdlabels_add(st->rrdlabels, "device_type", "physical", RRDLABEL_SRC_AUTO); - break; + buffer_json_add_array_item_array(wb); + + buffer_json_add_array_item_string(wb, d->device); + buffer_json_add_array_item_string(wb, get_disk_type_string(d->type)); + buffer_json_add_array_item_string(wb, d->disk_by_id); + buffer_json_add_array_item_string(wb, d->model); + buffer_json_add_array_item_string(wb, d->serial); + + // IO + double io_reads = rrddim_get_last_stored_value(d->rd_io_reads, &max_io_reads, 1024.0); + double io_writes = rrddim_get_last_stored_value(d->rd_io_writes, &max_io_writes, 1024.0); + double io_total = NAN; + if (!isnan(io_reads) && !isnan(io_writes)) { + io_total = io_reads + io_writes; + max_io = MAX(max_io, io_total); + } + // Backlog and Busy Time + double busy_perc = rrddim_get_last_stored_value(d->rd_util_utilization, &max_busy_perc, 1); + double busy_time = rrddim_get_last_stored_value(d->rd_busy_busy, &max_busy_time, 1); + double backlog_time = rrddim_get_last_stored_value(d->rd_backlog_backlog, &max_backlog_time, 1); + // IOPS + double iops_reads = rrddim_get_last_stored_value(d->rd_ops_reads, &max_iops_reads, 1); + double iops_writes = rrddim_get_last_stored_value(d->rd_ops_writes, &max_iops_writes, 1); + // IO Time + double iops_time_reads = rrddim_get_last_stored_value(d->rd_iotime_reads, &max_iops_time_reads, 1); + double iops_time_writes = rrddim_get_last_stored_value(d->rd_iotime_writes, &max_iops_time_writes, 1); + // Avg IO Time + double iops_avg_time_read = rrddim_get_last_stored_value(d->rd_await_reads, &max_iops_avg_time_read, 1); + double iops_avg_time_write = rrddim_get_last_stored_value(d->rd_await_writes, &max_iops_avg_time_write, 1); + // Avg IO Size + double iops_avg_size_read = rrddim_get_last_stored_value(d->rd_avgsz_reads, &max_iops_avg_size_read, 1); + double iops_avg_size_write = rrddim_get_last_stored_value(d->rd_avgsz_writes, &max_iops_avg_size_write, 1); + + + buffer_json_add_array_item_double(wb, io_reads); + buffer_json_add_array_item_double(wb, io_writes); + buffer_json_add_array_item_double(wb, io_total); + buffer_json_add_array_item_double(wb, busy_perc); + buffer_json_add_array_item_double(wb, busy_time); + buffer_json_add_array_item_double(wb, backlog_time); + buffer_json_add_array_item_double(wb, iops_reads); + buffer_json_add_array_item_double(wb, iops_writes); + buffer_json_add_array_item_double(wb, iops_time_reads); + buffer_json_add_array_item_double(wb, iops_time_writes); + buffer_json_add_array_item_double(wb, iops_avg_time_read); + buffer_json_add_array_item_double(wb, iops_avg_time_write); + buffer_json_add_array_item_double(wb, iops_avg_size_read); + buffer_json_add_array_item_double(wb, iops_avg_size_write); + + // End + buffer_json_array_close(wb); + } - case DISK_TYPE_PARTITION: - rrdlabels_add(st->rrdlabels, "device_type", "partition", RRDLABEL_SRC_AUTO); - break; + netdata_mutex_unlock(&diskstats_dev_mutex); - case DISK_TYPE_VIRTUAL: - rrdlabels_add(st->rrdlabels, "device_type", "virtual", RRDLABEL_SRC_AUTO); - break; + buffer_json_array_close(wb); // data + buffer_json_member_add_object(wb, "columns"); + { + size_t field_id = 0; + + buffer_rrdf_table_add_field(wb, field_id++, "Device", "Device Name", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Type", "Device Type", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "ID", "Device ID", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Model", "Device Model", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Serial", "Device Serial Number", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "Read", "Data Read from Device", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "MiB", max_io_reads, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Written", "Data Writen to Device", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "MiB", max_io_writes, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Total", "Data Transferred to and from Device", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "MiB", max_io, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_NONE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "Busy%", "Disk Busy Percentage", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "%", max_busy_perc, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Busy", "Disk Busy Time", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "milliseconds", max_busy_time, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Backlog", "Disk Backlog", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "milliseconds", max_backlog_time, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "Reads", "Completed Read Operations", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "ops", max_iops_reads, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Writes", "Completed Write Operations", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "ops", max_iops_writes, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "ReadsTime", "Read Operations Time", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "milliseconds", max_iops_time_reads, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "WritesTime", "Write Operations Time", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "milliseconds", max_iops_time_writes, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "ReadAvgTime", "Average Read Operation Service Time", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "milliseconds", max_iops_avg_time_read, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "WriteAvgTime", "Average Write Operation Service Time", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "milliseconds", max_iops_avg_time_write, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "ReadAvgSz", "Average Read Operation Size", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "KiB", max_iops_avg_size_read, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + buffer_rrdf_table_add_field(wb, field_id++, "WriteAvgSz", "Average Write Operation Size", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "KiB", max_iops_avg_size_write, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + } + + buffer_json_object_close(wb); // columns + buffer_json_member_add_string(wb, "default_sort_column", "Total"); + + buffer_json_member_add_object(wb, "charts"); + { + buffer_json_member_add_object(wb, "IO"); + { + buffer_json_member_add_string(wb, "name", "IO"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Read"); + buffer_json_add_array_item_string(wb, "Written"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "Busy"); + { + buffer_json_member_add_string(wb, "name", "Busy"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Busy"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // charts + + buffer_json_member_add_array(wb, "default_charts"); + { + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "IO"); + buffer_json_add_array_item_string(wb, "Device"); + buffer_json_array_close(wb); + + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "Busy"); + buffer_json_add_array_item_string(wb, "Device"); + buffer_json_array_close(wb); + } + buffer_json_array_close(wb); + + buffer_json_member_add_object(wb, "group_by"); + { + buffer_json_member_add_object(wb, "Type"); + { + buffer_json_member_add_string(wb, "name", "Type"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Type"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // group_by + + buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1); + buffer_json_finalize(wb); + + int response = HTTP_RESP_OK; + if(is_cancelled_cb && is_cancelled_cb(is_cancelled_cb_data)) { + buffer_flush(wb); + response = HTTP_RESP_CLIENT_CLOSED_REQUEST; + } + + if(result_cb) + result_cb(wb, response, result_cb_data); + + return response; +} + +static void diskstats_cleanup_disks() { + struct disk *d = disk_root, *last = NULL; + while (d) { + if (unlikely(global_cleanup_removed_disks && !d->updated)) { + struct disk *t = d; + + rrdset_obsolete_and_pointer_null(d->st_avgsz); + rrdset_obsolete_and_pointer_null(d->st_ext_avgsz); + rrdset_obsolete_and_pointer_null(d->st_await); + rrdset_obsolete_and_pointer_null(d->st_ext_await); + rrdset_obsolete_and_pointer_null(d->st_backlog); + rrdset_obsolete_and_pointer_null(d->st_busy); + rrdset_obsolete_and_pointer_null(d->st_io); + rrdset_obsolete_and_pointer_null(d->st_ext_io); + rrdset_obsolete_and_pointer_null(d->st_iotime); + rrdset_obsolete_and_pointer_null(d->st_ext_iotime); + rrdset_obsolete_and_pointer_null(d->st_mops); + rrdset_obsolete_and_pointer_null(d->st_ext_mops); + rrdset_obsolete_and_pointer_null(d->st_ops); + rrdset_obsolete_and_pointer_null(d->st_ext_ops); + rrdset_obsolete_and_pointer_null(d->st_qops); + rrdset_obsolete_and_pointer_null(d->st_svctm); + rrdset_obsolete_and_pointer_null(d->st_util); + rrdset_obsolete_and_pointer_null(d->st_bcache); + rrdset_obsolete_and_pointer_null(d->st_bcache_bypass); + rrdset_obsolete_and_pointer_null(d->st_bcache_rates); + rrdset_obsolete_and_pointer_null(d->st_bcache_size); + rrdset_obsolete_and_pointer_null(d->st_bcache_usage); + rrdset_obsolete_and_pointer_null(d->st_bcache_hit_ratio); + rrdset_obsolete_and_pointer_null(d->st_bcache_cache_allocations); + rrdset_obsolete_and_pointer_null(d->st_bcache_cache_read_races); + + if (d == disk_root) { + disk_root = d = d->next; + last = NULL; + } else if (last) { + last->next = d = d->next; + } + + freez(t->bcache_filename_dirty_data); + freez(t->bcache_filename_writeback_rate); + freez(t->bcache_filename_cache_congested); + freez(t->bcache_filename_cache_available_percent); + freez(t->bcache_filename_stats_five_minute_cache_hit_ratio); + freez(t->bcache_filename_stats_hour_cache_hit_ratio); + freez(t->bcache_filename_stats_day_cache_hit_ratio); + freez(t->bcache_filename_stats_total_cache_hit_ratio); + freez(t->bcache_filename_stats_total_cache_hits); + freez(t->bcache_filename_stats_total_cache_misses); + freez(t->bcache_filename_stats_total_cache_miss_collisions); + freez(t->bcache_filename_stats_total_cache_bypass_hits); + freez(t->bcache_filename_stats_total_cache_bypass_misses); + freez(t->bcache_filename_stats_total_cache_readaheads); + freez(t->bcache_filename_cache_read_races); + freez(t->bcache_filename_cache_io_errors); + freez(t->bcache_filename_priority_stats); + + freez(t->disk); + freez(t->device); + freez(t->disk_by_id); + freez(t->model); + freez(t->serial); + freez(t->mount_point); + freez(t->chart_id); + freez(t); + } else { + d->updated = 0; + last = d; + d = d->next; + } } } @@ -1080,12 +1488,20 @@ int do_proc_diskstats(int update_every, usec_t dt) { ff = procfile_readall(ff); if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time + static bool add_func = true; + if (add_func) { + rrd_function_add(localhost, NULL, "block-devices", 10, RRDFUNCTIONS_DISKSTATS_HELP, true, diskstats_function_block_devices, NULL); + add_func = false; + } + size_t lines = procfile_lines(ff), l; collected_number system_read_kb = 0, system_write_kb = 0; int do_dc_stats = 0, do_fl_stats = 0; + netdata_mutex_lock(&diskstats_dev_mutex); + for(l = 0; l < lines ;l++) { // -------------------------------------------------------------------------- // Read parameters @@ -1210,7 +1626,6 @@ int do_proc_diskstats(int update_every, usec_t dt) { // -------------------------------------------------------------------------- // Do performance metrics - if(d->do_io == CONFIG_BOOLEAN_YES || (d->do_io == CONFIG_BOOLEAN_AUTO && (readsectors || writesectors || discardsectors || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { @@ -2056,8 +2471,13 @@ int do_proc_diskstats(int update_every, usec_t dt) { rrdset_done(d->st_bcache_bypass); } } + + d->function_ready = !d->excluded; } + diskstats_cleanup_disks(); + + netdata_mutex_unlock(&diskstats_dev_mutex); // update the system total I/O if(global_do_io == CONFIG_BOOLEAN_YES || (global_do_io == CONFIG_BOOLEAN_AUTO && @@ -2091,80 +2511,5 @@ int do_proc_diskstats(int update_every, usec_t dt) { rrdset_done(st_io); } - // cleanup removed disks - - struct disk *d = disk_root, *last = NULL; - while(d) { - if(unlikely(global_cleanup_removed_disks && !d->updated)) { - struct disk *t = d; - - rrdset_obsolete_and_pointer_null(d->st_avgsz); - rrdset_obsolete_and_pointer_null(d->st_ext_avgsz); - rrdset_obsolete_and_pointer_null(d->st_await); - rrdset_obsolete_and_pointer_null(d->st_ext_await); - rrdset_obsolete_and_pointer_null(d->st_backlog); - rrdset_obsolete_and_pointer_null(d->st_busy); - rrdset_obsolete_and_pointer_null(d->st_io); - rrdset_obsolete_and_pointer_null(d->st_ext_io); - rrdset_obsolete_and_pointer_null(d->st_iotime); - rrdset_obsolete_and_pointer_null(d->st_ext_iotime); - rrdset_obsolete_and_pointer_null(d->st_mops); - rrdset_obsolete_and_pointer_null(d->st_ext_mops); - rrdset_obsolete_and_pointer_null(d->st_ops); - rrdset_obsolete_and_pointer_null(d->st_ext_ops); - rrdset_obsolete_and_pointer_null(d->st_qops); - rrdset_obsolete_and_pointer_null(d->st_svctm); - rrdset_obsolete_and_pointer_null(d->st_util); - rrdset_obsolete_and_pointer_null(d->st_bcache); - rrdset_obsolete_and_pointer_null(d->st_bcache_bypass); - rrdset_obsolete_and_pointer_null(d->st_bcache_rates); - rrdset_obsolete_and_pointer_null(d->st_bcache_size); - rrdset_obsolete_and_pointer_null(d->st_bcache_usage); - rrdset_obsolete_and_pointer_null(d->st_bcache_hit_ratio); - rrdset_obsolete_and_pointer_null(d->st_bcache_cache_allocations); - rrdset_obsolete_and_pointer_null(d->st_bcache_cache_read_races); - - if(d == disk_root) { - disk_root = d = d->next; - last = NULL; - } - else if(last) { - last->next = d = d->next; - } - - freez(t->bcache_filename_dirty_data); - freez(t->bcache_filename_writeback_rate); - freez(t->bcache_filename_cache_congested); - freez(t->bcache_filename_cache_available_percent); - freez(t->bcache_filename_stats_five_minute_cache_hit_ratio); - freez(t->bcache_filename_stats_hour_cache_hit_ratio); - freez(t->bcache_filename_stats_day_cache_hit_ratio); - freez(t->bcache_filename_stats_total_cache_hit_ratio); - freez(t->bcache_filename_stats_total_cache_hits); - freez(t->bcache_filename_stats_total_cache_misses); - freez(t->bcache_filename_stats_total_cache_miss_collisions); - freez(t->bcache_filename_stats_total_cache_bypass_hits); - freez(t->bcache_filename_stats_total_cache_bypass_misses); - freez(t->bcache_filename_stats_total_cache_readaheads); - freez(t->bcache_filename_cache_read_races); - freez(t->bcache_filename_cache_io_errors); - freez(t->bcache_filename_priority_stats); - - freez(t->disk); - freez(t->device); - freez(t->disk_by_id); - freez(t->model); - freez(t->serial); - freez(t->mount_point); - freez(t->chart_id); - freez(t); - } - else { - d->updated = 0; - last = d; - d = d->next; - } - } - return 0; } diff --git a/collectors/proc.plugin/proc_interrupts.c b/collectors/proc.plugin/proc_interrupts.c index 9a20700a3..37071b22f 100644 --- a/collectors/proc.plugin/proc_interrupts.c +++ b/collectors/proc.plugin/proc_interrupts.c @@ -201,10 +201,10 @@ int do_proc_interrupts(int update_every, usec_t dt) { for(c = 0; c < cpus ;c++) { if(unlikely(!core_st[c])) { char id[50+1]; - snprintfz(id, 50, "cpu%d_interrupts", c); + snprintfz(id, sizeof(id) - 1, "cpu%d_interrupts", c); char title[100+1]; - snprintfz(title, 100, "CPU Interrupts"); + snprintfz(title, sizeof(title) - 1, "CPU Interrupts"); core_st[c] = rrdset_create_localhost( "cpu" , id @@ -221,7 +221,7 @@ int do_proc_interrupts(int update_every, usec_t dt) { ); char core[50+1]; - snprintfz(core, 50, "cpu%d", c); + snprintfz(core, sizeof(core) - 1, "cpu%d", c); rrdlabels_add(core_st[c]->rrdlabels, "cpu", core, RRDLABEL_SRC_AUTO); } diff --git a/collectors/proc.plugin/proc_mdstat.c b/collectors/proc.plugin/proc_mdstat.c index c3d1793cb..3857d9ec4 100644 --- a/collectors/proc.plugin/proc_mdstat.c +++ b/collectors/proc.plugin/proc_mdstat.c @@ -70,10 +70,10 @@ static inline void make_chart_obsolete(char *name, const char *id_modifier) RRDSET *st = NULL; if (likely(name && id_modifier)) { - snprintfz(id, 50, "mdstat.%s_%s", name, id_modifier); + snprintfz(id, sizeof(id) - 1, "mdstat.%s_%s", name, id_modifier); st = rrdset_find_active_byname_localhost(id); if (likely(st)) - rrdset_is_obsolete(st); + rrdset_is_obsolete___safe_from_collector_thread(st); } } @@ -409,7 +409,7 @@ int do_proc_mdstat(int update_every, usec_t dt) update_every, RRDSET_TYPE_LINE); - rrdset_isnot_obsolete(st_mdstat_health); + rrdset_isnot_obsolete___safe_from_collector_thread(st_mdstat_health); } if (!redundant_num) { @@ -438,10 +438,10 @@ int do_proc_mdstat(int update_every, usec_t dt) if (likely(raid->redundant)) { if (likely(do_disks)) { - snprintfz(id, 50, "%s_disks", raid->name); + snprintfz(id, sizeof(id) - 1, "%s_disks", raid->name); if (unlikely(!raid->st_disks && !(raid->st_disks = rrdset_find_active_byname_localhost(id)))) { - snprintfz(family, 50, "%s (%s)", raid->name, raid->level); + snprintfz(family, sizeof(family) - 1, "%s (%s)", raid->name, raid->level); raid->st_disks = rrdset_create_localhost( "mdstat", @@ -457,7 +457,7 @@ int do_proc_mdstat(int update_every, usec_t dt) update_every, RRDSET_TYPE_STACKED); - rrdset_isnot_obsolete(raid->st_disks); + rrdset_isnot_obsolete___safe_from_collector_thread(raid->st_disks); add_labels_to_mdstat(raid, raid->st_disks); } @@ -473,10 +473,10 @@ int do_proc_mdstat(int update_every, usec_t dt) } if (likely(do_mismatch)) { - snprintfz(id, 50, "%s_mismatch", raid->name); + snprintfz(id, sizeof(id) - 1, "%s_mismatch", raid->name); if (unlikely(!raid->st_mismatch_cnt && !(raid->st_mismatch_cnt = rrdset_find_active_byname_localhost(id)))) { - snprintfz(family, 50, "%s (%s)", raid->name, raid->level); + snprintfz(family, sizeof(family) - 1, "%s (%s)", raid->name, raid->level); raid->st_mismatch_cnt = rrdset_create_localhost( "mdstat", @@ -492,7 +492,7 @@ int do_proc_mdstat(int update_every, usec_t dt) update_every, RRDSET_TYPE_LINE); - rrdset_isnot_obsolete(raid->st_mismatch_cnt); + rrdset_isnot_obsolete___safe_from_collector_thread(raid->st_mismatch_cnt); add_labels_to_mdstat(raid, raid->st_mismatch_cnt); } @@ -505,10 +505,10 @@ int do_proc_mdstat(int update_every, usec_t dt) } if (likely(do_operations)) { - snprintfz(id, 50, "%s_operation", raid->name); + snprintfz(id, sizeof(id) - 1, "%s_operation", raid->name); if (unlikely(!raid->st_operation && !(raid->st_operation = rrdset_find_active_byname_localhost(id)))) { - snprintfz(family, 50, "%s (%s)", raid->name, raid->level); + snprintfz(family, sizeof(family) - 1, "%s (%s)", raid->name, raid->level); raid->st_operation = rrdset_create_localhost( "mdstat", @@ -524,7 +524,7 @@ int do_proc_mdstat(int update_every, usec_t dt) update_every, RRDSET_TYPE_LINE); - rrdset_isnot_obsolete(raid->st_operation); + rrdset_isnot_obsolete___safe_from_collector_thread(raid->st_operation); add_labels_to_mdstat(raid, raid->st_operation); } @@ -544,9 +544,9 @@ int do_proc_mdstat(int update_every, usec_t dt) rrddim_set_by_pointer(raid->st_operation, raid->rd_reshape, raid->reshape); rrdset_done(raid->st_operation); - snprintfz(id, 50, "%s_finish", raid->name); + snprintfz(id, sizeof(id) - 1, "%s_finish", raid->name); if (unlikely(!raid->st_finish && !(raid->st_finish = rrdset_find_active_byname_localhost(id)))) { - snprintfz(family, 50, "%s (%s)", raid->name, raid->level); + snprintfz(family, sizeof(family) - 1, "%s (%s)", raid->name, raid->level); raid->st_finish = rrdset_create_localhost( "mdstat", @@ -561,7 +561,7 @@ int do_proc_mdstat(int update_every, usec_t dt) NETDATA_CHART_PRIO_MDSTAT_FINISH + raid_idx * 10, update_every, RRDSET_TYPE_LINE); - rrdset_isnot_obsolete(raid->st_finish); + rrdset_isnot_obsolete___safe_from_collector_thread(raid->st_finish); add_labels_to_mdstat(raid, raid->st_finish); } @@ -572,9 +572,9 @@ int do_proc_mdstat(int update_every, usec_t dt) rrddim_set_by_pointer(raid->st_finish, raid->rd_finish_in, raid->finish_in); rrdset_done(raid->st_finish); - snprintfz(id, 50, "%s_speed", raid->name); + snprintfz(id, sizeof(id) - 1, "%s_speed", raid->name); if (unlikely(!raid->st_speed && !(raid->st_speed = rrdset_find_active_byname_localhost(id)))) { - snprintfz(family, 50, "%s (%s)", raid->name, raid->level); + snprintfz(family, sizeof(family) - 1, "%s (%s)", raid->name, raid->level); raid->st_speed = rrdset_create_localhost( "mdstat", @@ -590,7 +590,7 @@ int do_proc_mdstat(int update_every, usec_t dt) update_every, RRDSET_TYPE_LINE); - rrdset_isnot_obsolete(raid->st_speed); + rrdset_isnot_obsolete___safe_from_collector_thread(raid->st_speed); add_labels_to_mdstat(raid, raid->st_speed); } @@ -603,10 +603,10 @@ int do_proc_mdstat(int update_every, usec_t dt) } } else { if (likely(do_nonredundant)) { - snprintfz(id, 50, "%s_availability", raid->name); + snprintfz(id, sizeof(id) - 1, "%s_availability", raid->name); if (unlikely(!raid->st_nonredundant && !(raid->st_nonredundant = rrdset_find_active_localhost(id)))) { - snprintfz(family, 50, "%s (%s)", raid->name, raid->level); + snprintfz(family, sizeof(family) - 1, "%s (%s)", raid->name, raid->level); raid->st_nonredundant = rrdset_create_localhost( "mdstat", @@ -622,7 +622,7 @@ int do_proc_mdstat(int update_every, usec_t dt) update_every, RRDSET_TYPE_LINE); - rrdset_isnot_obsolete(raid->st_nonredundant); + rrdset_isnot_obsolete___safe_from_collector_thread(raid->st_nonredundant); add_labels_to_mdstat(raid, raid->st_nonredundant); } diff --git a/collectors/proc.plugin/proc_net_dev.c b/collectors/proc.plugin/proc_net_dev.c index 8539c7725..b39f39683 100644 --- a/collectors/proc.plugin/proc_net_dev.c +++ b/collectors/proc.plugin/proc_net_dev.c @@ -5,16 +5,35 @@ #define PLUGIN_PROC_MODULE_NETDEV_NAME "/proc/net/dev" #define CONFIG_SECTION_PLUGIN_PROC_NETDEV "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_NETDEV_NAME +#define RRDFUNCTIONS_NETDEV_HELP "View network interface statistics" + #define STATE_LENGTH_MAX 32 #define READ_RETRY_PERIOD 60 // seconds +void cgroup_netdev_reset_all(void); +void cgroup_netdev_release(const DICTIONARY_ITEM *link); +const void *cgroup_netdev_dup(const DICTIONARY_ITEM *link); +void cgroup_netdev_add_bandwidth(const DICTIONARY_ITEM *link, NETDATA_DOUBLE received, NETDATA_DOUBLE sent); + enum { NETDEV_DUPLEX_UNKNOWN, NETDEV_DUPLEX_HALF, NETDEV_DUPLEX_FULL }; +static const char *get_duplex_string(int duplex) +{ + switch (duplex) { + case NETDEV_DUPLEX_FULL: + return "full"; + case NETDEV_DUPLEX_HALF: + return "half"; + default: + return "unknown"; + } +} + enum { NETDEV_OPERSTATE_UNKNOWN, NETDEV_OPERSTATE_NOTPRESENT, @@ -44,6 +63,26 @@ static inline int get_operstate(char *operstate) return NETDEV_OPERSTATE_UNKNOWN; } +static const char *get_operstate_string(int operstate) +{ + switch (operstate) { + case NETDEV_OPERSTATE_UP: + return "up"; + case NETDEV_OPERSTATE_DOWN: + return "down"; + case NETDEV_OPERSTATE_NOTPRESENT: + return "notpresent"; + case NETDEV_OPERSTATE_LOWERLAYERDOWN: + return "lowerlayerdown"; + case NETDEV_OPERSTATE_TESTING: + return "testing"; + case NETDEV_OPERSTATE_DORMANT: + return "dormant"; + default: + return "unknown"; + } +} + // ---------------------------------------------------------------------------- // netdev list @@ -58,6 +97,8 @@ static struct netdev { int enabled; int updated; + bool function_ready; + time_t discover_time; int carrier_file_exists; @@ -208,6 +249,8 @@ static struct netdev { char *filename_carrier; char *filename_mtu; + const DICTIONARY_ITEM *cgroup_netdev_link; + struct netdev *next; } *netdev_root = NULL, *netdev_last_used = NULL; @@ -216,18 +259,18 @@ static size_t netdev_added = 0, netdev_found = 0; // ---------------------------------------------------------------------------- static void netdev_charts_release(struct netdev *d) { - if(d->st_bandwidth) rrdset_is_obsolete(d->st_bandwidth); - if(d->st_packets) rrdset_is_obsolete(d->st_packets); - if(d->st_errors) rrdset_is_obsolete(d->st_errors); - if(d->st_drops) rrdset_is_obsolete(d->st_drops); - if(d->st_fifo) rrdset_is_obsolete(d->st_fifo); - if(d->st_compressed) rrdset_is_obsolete(d->st_compressed); - if(d->st_events) rrdset_is_obsolete(d->st_events); - if(d->st_speed) rrdset_is_obsolete(d->st_speed); - if(d->st_duplex) rrdset_is_obsolete(d->st_duplex); - if(d->st_operstate) rrdset_is_obsolete(d->st_operstate); - if(d->st_carrier) rrdset_is_obsolete(d->st_carrier); - if(d->st_mtu) rrdset_is_obsolete(d->st_mtu); + if(d->st_bandwidth) rrdset_is_obsolete___safe_from_collector_thread(d->st_bandwidth); + if(d->st_packets) rrdset_is_obsolete___safe_from_collector_thread(d->st_packets); + if(d->st_errors) rrdset_is_obsolete___safe_from_collector_thread(d->st_errors); + if(d->st_drops) rrdset_is_obsolete___safe_from_collector_thread(d->st_drops); + if(d->st_fifo) rrdset_is_obsolete___safe_from_collector_thread(d->st_fifo); + if(d->st_compressed) rrdset_is_obsolete___safe_from_collector_thread(d->st_compressed); + if(d->st_events) rrdset_is_obsolete___safe_from_collector_thread(d->st_events); + if(d->st_speed) rrdset_is_obsolete___safe_from_collector_thread(d->st_speed); + if(d->st_duplex) rrdset_is_obsolete___safe_from_collector_thread(d->st_duplex); + if(d->st_operstate) rrdset_is_obsolete___safe_from_collector_thread(d->st_operstate); + if(d->st_carrier) rrdset_is_obsolete___safe_from_collector_thread(d->st_carrier); + if(d->st_mtu) rrdset_is_obsolete___safe_from_collector_thread(d->st_mtu); d->st_bandwidth = NULL; d->st_compressed = NULL; @@ -326,6 +369,7 @@ static void netdev_free(struct netdev *d) { netdev_charts_release(d); netdev_free_chart_strings(d); rrdlabels_destroy(d->chart_labels); + cgroup_netdev_release(d->cgroup_netdev_link); freez((void *)d->name); freez((void *)d->filename_speed); @@ -352,11 +396,14 @@ static struct netdev_rename { int processed; + const DICTIONARY_ITEM *cgroup_netdev_link; + struct netdev_rename *next; } *netdev_rename_root = NULL; static int netdev_pending_renames = 0; static netdata_mutex_t netdev_rename_mutex = NETDATA_MUTEX_INITIALIZER; +static netdata_mutex_t netdev_dev_mutex = NETDATA_MUTEX_INITIALIZER; static struct netdev_rename *netdev_rename_find(const char *host_device, uint32_t hash) { struct netdev_rename *r; @@ -374,7 +421,8 @@ void netdev_rename_device_add( const char *container_device, const char *container_name, RRDLABELS *labels, - const char *ctx_prefix) + const char *ctx_prefix, + const DICTIONARY_ITEM *cgroup_netdev_link) { netdata_mutex_lock(&netdev_rename_mutex); @@ -391,6 +439,8 @@ void netdev_rename_device_add( r->hash = hash; r->next = netdev_rename_root; r->processed = 0; + r->cgroup_netdev_link = cgroup_netdev_link; + netdev_rename_root = r; netdev_pending_renames++; collector_info("CGROUP: registered network interface rename for '%s' as '%s' under '%s'", r->host_device, r->container_device, r->container_name); @@ -406,6 +456,8 @@ void netdev_rename_device_add( rrdlabels_migrate_to_these(r->chart_labels, labels); r->processed = 0; + r->cgroup_netdev_link = cgroup_netdev_link; + netdev_pending_renames++; collector_info("CGROUP: altered network interface rename for '%s' as '%s' under '%s'", r->host_device, r->container_device, r->container_name); } @@ -438,6 +490,7 @@ void netdev_rename_device_del(const char *host_device) { freez((void *) r->container_device); freez((void *) r->ctx_prefix); rrdlabels_destroy(r->chart_labels); + cgroup_netdev_release(r->cgroup_netdev_link); freez((void *) r); break; } @@ -451,6 +504,7 @@ static inline void netdev_rename_cgroup(struct netdev *d, struct netdev_rename * netdev_charts_release(d); netdev_free_chart_strings(d); + d->cgroup_netdev_link = cgroup_netdev_dup(r->cgroup_netdev_link); char buffer[RRD_ID_LENGTH_MAX + 1]; @@ -521,6 +575,7 @@ static inline void netdev_rename_cgroup(struct netdev *d, struct netdev_rename * d->chart_family = strdupz("net"); rrdlabels_copy(d->chart_labels, r->chart_labels); + rrdlabels_add(d->chart_labels, "container_device", r->container_device, RRDLABEL_SRC_AUTO); d->priority = NETDATA_CHART_PRIO_CGROUP_NET_IFACE; d->flipped = 1; @@ -554,6 +609,319 @@ static inline void netdev_rename_all_lock(void) { } // ---------------------------------------------------------------------------- + +int netdev_function_net_interfaces(BUFFER *wb, int timeout __maybe_unused, const char *function __maybe_unused, + void *collector_data __maybe_unused, + rrd_function_result_callback_t result_cb, void *result_cb_data, + rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data, + rrd_function_register_canceller_cb_t register_canceller_cb __maybe_unused, + void *register_canceller_cb_data __maybe_unused) { + + buffer_flush(wb); + wb->content_type = CT_APPLICATION_JSON; + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); + + buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(localhost)); + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + buffer_json_member_add_string(wb, "type", "table"); + buffer_json_member_add_time_t(wb, "update_every", 1); + buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_NETDEV_HELP); + buffer_json_member_add_array(wb, "data"); + + double max_traffic_rx = 0.0; + double max_traffic_tx = 0.0; + double max_traffic = 0.0; + double max_packets_rx = 0.0; + double max_packets_tx = 0.0; + double max_mcast_rx = 0.0; + double max_drops_rx = 0.0; + double max_drops_tx = 0.0; + + netdata_mutex_lock(&netdev_dev_mutex); + + RRDDIM *rd = NULL; + + for (struct netdev *d = netdev_root; d != netdev_last_used; d = d->next) { + if (unlikely(!d->function_ready)) + continue; + + buffer_json_add_array_item_array(wb); + + buffer_json_add_array_item_string(wb, d->name); + + buffer_json_add_array_item_string(wb, d->virtual ? "virtual" : "physical"); + buffer_json_add_array_item_string(wb, d->flipped ? "cgroup" : "host"); + buffer_json_add_array_item_string(wb, d->carrier == 1 ? "up" : "down"); + buffer_json_add_array_item_string(wb, get_operstate_string(d->operstate)); + buffer_json_add_array_item_string(wb, get_duplex_string(d->duplex)); + buffer_json_add_array_item_double(wb, d->speed > 0 ? d->speed : NAN); + buffer_json_add_array_item_double(wb, d->mtu > 0 ? d->mtu : NAN); + + rd = d->flipped ? d->rd_tbytes : d->rd_rbytes; + double traffic_rx = rrddim_get_last_stored_value(rd, &max_traffic_rx, 1000.0); + rd = d->flipped ? d->rd_rbytes : d->rd_tbytes; + double traffic_tx = rrddim_get_last_stored_value(rd, &max_traffic_tx, 1000.0); + + rd = d->flipped ? d->rd_tpackets : d->rd_rpackets; + double packets_rx = rrddim_get_last_stored_value(rd, &max_packets_rx, 1000.0); + rd = d->flipped ? d->rd_rpackets : d->rd_tpackets; + double packets_tx = rrddim_get_last_stored_value(rd, &max_packets_tx, 1000.0); + + double mcast_rx = rrddim_get_last_stored_value(d->rd_rmulticast, &max_mcast_rx, 1000.0); + + rd = d->flipped ? d->rd_tdrops : d->rd_rdrops; + double drops_rx = rrddim_get_last_stored_value(rd, &max_drops_rx, 1.0); + rd = d->flipped ? d->rd_rdrops : d->rd_tdrops; + double drops_tx = rrddim_get_last_stored_value(rd, &max_drops_tx, 1.0); + + // FIXME: "traffic" (total) is needed only for default_sorting + // can be removed when default_sorting will accept multiple columns (sum) + double traffic = NAN; + if (!isnan(traffic_rx) && !isnan(traffic_tx)) { + traffic = traffic_rx + traffic_tx; + max_traffic = MAX(max_traffic, traffic); + } + + + buffer_json_add_array_item_double(wb, traffic_rx); + buffer_json_add_array_item_double(wb, traffic_tx); + buffer_json_add_array_item_double(wb, traffic); + buffer_json_add_array_item_double(wb, packets_rx); + buffer_json_add_array_item_double(wb, packets_tx); + buffer_json_add_array_item_double(wb, mcast_rx); + buffer_json_add_array_item_double(wb, drops_rx); + buffer_json_add_array_item_double(wb, drops_tx); + + buffer_json_add_array_item_object(wb); + { + buffer_json_member_add_string(wb, "severity", drops_rx + drops_tx > 0 ? "warning" : "normal"); + } + buffer_json_object_close(wb); + + buffer_json_array_close(wb); + } + + netdata_mutex_unlock(&netdev_dev_mutex); + + buffer_json_array_close(wb); // data + buffer_json_member_add_object(wb, "columns"); + { + size_t field_id = 0; + + buffer_rrdf_table_add_field(wb, field_id++, "Interface", "Network Interface Name", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "Type", "Network Interface Type", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "UsedBy", "Indicates whether the network interface is used by a cgroup or by the host system", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "PhState", "Current Physical State", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OpState", "Current Operational State", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "Duplex", "Current Duplex Mode", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "Speed", "Current Link Speed", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "Mbit", NAN, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "MTU", "Maximum Transmission Unit", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "Octets", NAN, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "In", "Traffic Received", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "Mbit", max_traffic_rx, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "Out", "Traffic Sent", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "Mbit", max_traffic_tx, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "Total", "Traffic Received and Sent", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "Mbit", max_traffic, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_NONE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "PktsIn", "Received Packets", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "Kpps", max_packets_rx, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "PktsOut", "Sent Packets", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "Kpps", max_packets_tx, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "McastIn", "Multicast Received Packets", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "Kpps", max_mcast_rx, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_NONE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "DropsIn", "Dropped Inbound Packets", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "Drops", max_drops_rx, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "DropsOut", "Dropped Outbound Packets", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "Drops", max_drops_tx, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + buffer_rrdf_table_add_field( + wb, field_id++, + "rowOptions", "rowOptions", + RRDF_FIELD_TYPE_NONE, + RRDR_FIELD_VISUAL_ROW_OPTIONS, + RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_FIXED, + NULL, + RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_DUMMY, + NULL); + } + + buffer_json_object_close(wb); // columns + buffer_json_member_add_string(wb, "default_sort_column", "Total"); + + buffer_json_member_add_object(wb, "charts"); + { + buffer_json_member_add_object(wb, "Traffic"); + { + buffer_json_member_add_string(wb, "name", "Traffic"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "In"); + buffer_json_add_array_item_string(wb, "Out"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "Packets"); + { + buffer_json_member_add_string(wb, "name", "Packets"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "PktsIn"); + buffer_json_add_array_item_string(wb, "PktsOut"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // charts + + buffer_json_member_add_array(wb, "default_charts"); + { + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "Traffic"); + buffer_json_add_array_item_string(wb, "Interface"); + buffer_json_array_close(wb); + + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "Traffic"); + buffer_json_add_array_item_string(wb, "Type"); + buffer_json_array_close(wb); + } + buffer_json_array_close(wb); + + buffer_json_member_add_object(wb, "group_by"); + { + buffer_json_member_add_object(wb, "Type"); + { + buffer_json_member_add_string(wb, "name", "Type"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Type"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "UsedBy"); + { + buffer_json_member_add_string(wb, "name", "UsedBy"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "UsedBy"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // group_by + + buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1); + buffer_json_finalize(wb); + + int response = HTTP_RESP_OK; + if(is_cancelled_cb && is_cancelled_cb(is_cancelled_cb_data)) { + buffer_flush(wb); + response = HTTP_RESP_CLIENT_CLOSED_REQUEST; + } + + if(result_cb) + result_cb(wb, response, result_cb_data); + + return response; +} + // netdev data collection static void netdev_cleanup() { @@ -615,6 +983,7 @@ static struct netdev *get_netdev(const char *name) { d->hash = simple_hash(d->name); d->len = strlen(d->name); d->chart_labels = rrdlabels_create(); + d->function_ready = false; d->chart_type_net_bytes = strdupz("net"); d->chart_type_net_compressed = strdupz("net_compressed"); @@ -779,56 +1148,88 @@ int do_proc_net_dev(int update_every, usec_t dt) { if(d->enabled) d->enabled = !simple_pattern_matches(disabled_list, d->name); - char buffer[FILENAME_MAX + 1]; + char buf[FILENAME_MAX + 1]; + snprintfz(buf, FILENAME_MAX, path_to_sys_devices_virtual_net, d->name); - snprintfz(buffer, FILENAME_MAX, path_to_sys_devices_virtual_net, d->name); - if (likely(access(buffer, R_OK) == 0)) { - d->virtual = 1; - rrdlabels_add(d->chart_labels, "interface_type", "virtual", RRDLABEL_SRC_AUTO); - } - else { + d->virtual = likely(access(buf, R_OK) == 0) ? 1 : 0; + + // At least on Proxmox inside LXC: eth0 is virtual. + // Virtual interfaces are not taken into account in system.net calculations + if (inside_lxc_container && d->virtual && strncmp(d->name, "eth", 3) == 0) d->virtual = 0; + + if (d->virtual) + rrdlabels_add(d->chart_labels, "interface_type", "virtual", RRDLABEL_SRC_AUTO); + else rrdlabels_add(d->chart_labels, "interface_type", "real", RRDLABEL_SRC_AUTO); - } + rrdlabels_add(d->chart_labels, "device", name, RRDLABEL_SRC_AUTO); if(likely(!d->virtual)) { // set the filename to get the interface speed - snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_speed, d->name); - d->filename_speed = strdupz(buffer); + snprintfz(buf, FILENAME_MAX, path_to_sys_class_net_speed, d->name); + d->filename_speed = strdupz(buf); - snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_duplex, d->name); - d->filename_duplex = strdupz(buffer); + snprintfz(buf, FILENAME_MAX, path_to_sys_class_net_duplex, d->name); + d->filename_duplex = strdupz(buf); } - snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_operstate, d->name); - d->filename_operstate = strdupz(buffer); + snprintfz(buf, FILENAME_MAX, path_to_sys_class_net_operstate, d->name); + d->filename_operstate = strdupz(buf); - snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_carrier, d->name); - d->filename_carrier = strdupz(buffer); + snprintfz(buf, FILENAME_MAX, path_to_sys_class_net_carrier, d->name); + d->filename_carrier = strdupz(buf); - snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_mtu, d->name); - d->filename_mtu = strdupz(buffer); + snprintfz(buf, FILENAME_MAX, path_to_sys_class_net_mtu, d->name); + d->filename_mtu = strdupz(buf); - snprintfz(buffer, FILENAME_MAX, "plugin:proc:/proc/net/dev:%s", d->name); - d->enabled = config_get_boolean_ondemand(buffer, "enabled", d->enabled); - d->virtual = config_get_boolean(buffer, "virtual", d->virtual); + snprintfz(buf, FILENAME_MAX, "plugin:proc:/proc/net/dev:%s", d->name); + + if (config_exists(buf, "enabled")) + d->enabled = config_get_boolean_ondemand(buf, "enabled", d->enabled); + if (config_exists(buf, "virtual")) + d->virtual = config_get_boolean(buf, "virtual", d->virtual); if(d->enabled == CONFIG_BOOLEAN_NO) continue; - d->do_bandwidth = config_get_boolean_ondemand(buffer, "bandwidth", do_bandwidth); - d->do_packets = config_get_boolean_ondemand(buffer, "packets", do_packets); - d->do_errors = config_get_boolean_ondemand(buffer, "errors", do_errors); - d->do_drops = config_get_boolean_ondemand(buffer, "drops", do_drops); - d->do_fifo = config_get_boolean_ondemand(buffer, "fifo", do_fifo); - d->do_compressed = config_get_boolean_ondemand(buffer, "compressed", do_compressed); - d->do_events = config_get_boolean_ondemand(buffer, "events", do_events); - d->do_speed = config_get_boolean_ondemand(buffer, "speed", do_speed); - d->do_duplex = config_get_boolean_ondemand(buffer, "duplex", do_duplex); - d->do_operstate = config_get_boolean_ondemand(buffer, "operstate", do_operstate); - d->do_carrier = config_get_boolean_ondemand(buffer, "carrier", do_carrier); - d->do_mtu = config_get_boolean_ondemand(buffer, "mtu", do_mtu); + d->do_bandwidth = do_bandwidth; + d->do_packets = do_packets; + d->do_errors = do_errors; + d->do_drops = do_drops; + d->do_fifo = do_fifo; + d->do_compressed = do_compressed; + d->do_events = do_events; + d->do_speed = do_speed; + d->do_duplex = do_duplex; + d->do_operstate = do_operstate; + d->do_carrier = do_carrier; + d->do_mtu = do_mtu; + + if (config_exists(buf, "bandwidth")) + d->do_bandwidth = config_get_boolean_ondemand(buf, "bandwidth", do_bandwidth); + if (config_exists(buf, "packets")) + d->do_packets = config_get_boolean_ondemand(buf, "packets", do_packets); + if (config_exists(buf, "errors")) + d->do_errors = config_get_boolean_ondemand(buf, "errors", do_errors); + if (config_exists(buf, "drops")) + d->do_drops = config_get_boolean_ondemand(buf, "drops", do_drops); + if (config_exists(buf, "fifo")) + d->do_fifo = config_get_boolean_ondemand(buf, "fifo", do_fifo); + if (config_exists(buf, "compressed")) + d->do_compressed = config_get_boolean_ondemand(buf, "compressed", do_compressed); + if (config_exists(buf, "events")) + d->do_events = config_get_boolean_ondemand(buf, "events", do_events); + if (config_exists(buf, "speed")) + d->do_speed = config_get_boolean_ondemand(buf, "speed", do_speed); + if (config_exists(buf, "duplex")) + d->do_duplex = config_get_boolean_ondemand(buf, "duplex", do_duplex); + if (config_exists(buf, "operstate")) + d->do_operstate = config_get_boolean_ondemand(buf, "operstate", do_operstate); + if (config_exists(buf, "carrier")) + d->do_carrier = config_get_boolean_ondemand(buf, "carrier", do_carrier); + if (config_exists(buf, "mtu")) + d->do_mtu = config_get_boolean_ondemand(buf, "mtu", do_mtu); } if(unlikely(!d->enabled)) @@ -1008,6 +1409,11 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrddim_set_by_pointer(d->st_bandwidth, d->rd_tbytes, (collected_number)d->tbytes); rrdset_done(d->st_bandwidth); + if(d->cgroup_netdev_link) + cgroup_netdev_add_bandwidth(d->cgroup_netdev_link, + d->flipped ? d->rd_tbytes->collector.last_stored_value : -d->rd_rbytes->collector.last_stored_value, + d->flipped ? -d->rd_rbytes->collector.last_stored_value : d->rd_tbytes->collector.last_stored_value); + // update the interface speed if(d->filename_speed) { if(unlikely(!d->chart_var_speed)) { @@ -1462,6 +1868,8 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrddim_set_by_pointer(d->st_events, d->rd_tcarrier, (collected_number)d->tcarrier); rrdset_done(d->st_events); } + + d->function_ready = true; } if(do_bandwidth == CONFIG_BOOLEAN_YES || (do_bandwidth == CONFIG_BOOLEAN_AUTO && @@ -1518,6 +1926,9 @@ void *netdev_main(void *ptr) netdata_thread_cleanup_push(netdev_main_cleanup, ptr); + rrd_collector_started(); + rrd_function_add(localhost, NULL, "network-interfaces", 10, RRDFUNCTIONS_NETDEV_HELP, true, netdev_function_net_interfaces, NULL); + usec_t step = localhost->rrd_update_every * USEC_PER_SEC; heartbeat_t hb; heartbeat_init(&hb); @@ -1529,11 +1940,17 @@ void *netdev_main(void *ptr) if (unlikely(!service_running(SERVICE_COLLECTORS))) break; + cgroup_netdev_reset_all(); + worker_is_busy(0); + + netdata_mutex_lock(&netdev_dev_mutex); if(do_proc_net_dev(localhost->rrd_update_every, hb_dt)) break; + netdata_mutex_unlock(&netdev_dev_mutex); } netdata_thread_cleanup_pop(1); + return NULL; } diff --git a/collectors/proc.plugin/proc_net_softnet_stat.c b/collectors/proc.plugin/proc_net_softnet_stat.c index dfd372b2a..2f01b8859 100644 --- a/collectors/proc.plugin/proc_net_softnet_stat.c +++ b/collectors/proc.plugin/proc_net_softnet_stat.c @@ -111,12 +111,12 @@ int do_proc_net_softnet_stat(int update_every, usec_t dt) { if(do_per_core) { for(l = 0; l < lines ;l++) { char id[50+1]; - snprintfz(id, 50, "cpu%zu_softnet_stat", l); + snprintfz(id, sizeof(id) - 1,"cpu%zu_softnet_stat", l); st = rrdset_find_active_bytype_localhost("cpu", id); if(unlikely(!st)) { char title[100+1]; - snprintfz(title, 100, "CPU softnet_stat"); + snprintfz(title, sizeof(title) - 1, "CPU softnet_stat"); st = rrdset_create_localhost( "cpu" diff --git a/collectors/proc.plugin/proc_net_wireless.c b/collectors/proc.plugin/proc_net_wireless.c index 08ab2eada..c7efa3335 100644 --- a/collectors/proc.plugin/proc_net_wireless.c +++ b/collectors/proc.plugin/proc_net_wireless.c @@ -85,12 +85,13 @@ static struct netwireless { static void netwireless_free_st(struct netwireless *wireless_dev) { - if (wireless_dev->st_status) rrdset_is_obsolete(wireless_dev->st_status); - if (wireless_dev->st_link) rrdset_is_obsolete(wireless_dev->st_link); - if (wireless_dev->st_level) rrdset_is_obsolete(wireless_dev->st_level); - if (wireless_dev->st_noise) rrdset_is_obsolete(wireless_dev->st_noise); - if (wireless_dev->st_discarded_packets) rrdset_is_obsolete(wireless_dev->st_discarded_packets); - if (wireless_dev->st_missed_beacon) rrdset_is_obsolete(wireless_dev->st_missed_beacon); + if (wireless_dev->st_status) rrdset_is_obsolete___safe_from_collector_thread(wireless_dev->st_status); + if (wireless_dev->st_link) rrdset_is_obsolete___safe_from_collector_thread(wireless_dev->st_link); + if (wireless_dev->st_level) rrdset_is_obsolete___safe_from_collector_thread(wireless_dev->st_level); + if (wireless_dev->st_noise) rrdset_is_obsolete___safe_from_collector_thread(wireless_dev->st_noise); + if (wireless_dev->st_discarded_packets) + rrdset_is_obsolete___safe_from_collector_thread(wireless_dev->st_discarded_packets); + if (wireless_dev->st_missed_beacon) rrdset_is_obsolete___safe_from_collector_thread(wireless_dev->st_missed_beacon); wireless_dev->st_status = NULL; wireless_dev->st_link = NULL; diff --git a/collectors/proc.plugin/proc_pagetypeinfo.c b/collectors/proc.plugin/proc_pagetypeinfo.c index e5318ce8f..fc5496c63 100644 --- a/collectors/proc.plugin/proc_pagetypeinfo.c +++ b/collectors/proc.plugin/proc_pagetypeinfo.c @@ -211,7 +211,7 @@ int do_proc_pagetypeinfo(int update_every, usec_t dt) { ); for (o = 0; o < pageorders_cnt; o++) { char id[3+1]; - snprintfz(id, 3, "%lu", o); + snprintfz(id, sizeof(id) - 1, "%lu", o); char name[20+1]; dim_name(name, o, pagesize); @@ -234,7 +234,7 @@ int do_proc_pagetypeinfo(int update_every, usec_t dt) { // "pagetype Node" + NUMA-NodeId + ZoneName + TypeName char setid[13+1+2+1+MAX_ZONETYPE_NAME+1+MAX_PAGETYPE_NAME+1]; - snprintfz(setid, 13+1+2+1+MAX_ZONETYPE_NAME+1+MAX_PAGETYPE_NAME, "pagetype_Node%d_%s_%s", pgl->node, pgl->zone, pgl->type); + snprintfz(setid, sizeof(setid) - 1, "pagetype_Node%d_%s_%s", pgl->node, pgl->zone, pgl->type); // Skip explicitly refused charts if (simple_pattern_matches(filter_types, setid)) @@ -260,14 +260,14 @@ int do_proc_pagetypeinfo(int update_every, usec_t dt) { ); char node[50+1]; - snprintfz(node, 50, "node%d", pgl->node); + snprintfz(node, sizeof(node) - 1, "node%d", pgl->node); rrdlabels_add(st_nodezonetype[p]->rrdlabels, "node_id", node, RRDLABEL_SRC_AUTO); rrdlabels_add(st_nodezonetype[p]->rrdlabels, "node_zone", pgl->zone, RRDLABEL_SRC_AUTO); rrdlabels_add(st_nodezonetype[p]->rrdlabels, "node_type", pgl->type, RRDLABEL_SRC_AUTO); for (o = 0; o < pageorders_cnt; o++) { char dimid[3+1]; - snprintfz(dimid, 3, "%lu", o); + snprintfz(dimid, sizeof(dimid) - 1, "%lu", o); char dimname[20+1]; dim_name(dimname, o, pagesize); diff --git a/collectors/proc.plugin/proc_softirqs.c b/collectors/proc.plugin/proc_softirqs.c index ccf46cb8a..5f0502f66 100644 --- a/collectors/proc.plugin/proc_softirqs.c +++ b/collectors/proc.plugin/proc_softirqs.c @@ -197,10 +197,10 @@ int do_proc_softirqs(int update_every, usec_t dt) { if (unlikely(core_sum == 0)) continue; // try next core char id[50 + 1]; - snprintfz(id, 50, "cpu%d_softirqs", c); + snprintfz(id, sizeof(id) - 1, "cpu%d_softirqs", c); char title[100 + 1]; - snprintfz(title, 100, "CPU softirqs"); + snprintfz(title, sizeof(title) - 1, "CPU softirqs"); core_st[c] = rrdset_create_localhost( "cpu" @@ -218,7 +218,7 @@ int do_proc_softirqs(int update_every, usec_t dt) { ); char core[50+1]; - snprintfz(core, 50, "cpu%d", c); + snprintfz(core, sizeof(core) - 1, "cpu%d", c); rrdlabels_add(core_st[c]->rrdlabels, "cpu", core, RRDLABEL_SRC_AUTO); } diff --git a/collectors/proc.plugin/proc_spl_kstat_zfs.c b/collectors/proc.plugin/proc_spl_kstat_zfs.c index 428ef0d32..27178b60f 100644 --- a/collectors/proc.plugin/proc_spl_kstat_zfs.c +++ b/collectors/proc.plugin/proc_spl_kstat_zfs.c @@ -240,7 +240,7 @@ DICTIONARY *zfs_pools = NULL; void disable_zfs_pool_state(struct zfs_pool *pool) { if (pool->st) - rrdset_is_obsolete(pool->st); + rrdset_is_obsolete___safe_from_collector_thread(pool->st); pool->st = NULL; @@ -335,7 +335,10 @@ int do_proc_spl_kstat_zfs_pool_state(int update_every, usec_t dt) if (likely(do_zfs_pool_state)) { DIR *dir = opendir(dirname); if (unlikely(!dir)) { - collector_error("Cannot read directory '%s'", dirname); + if (errno == ENOENT) + collector_info("Cannot read directory '%s'", dirname); + else + collector_error("Cannot read directory '%s'", dirname); return 1; } diff --git a/collectors/proc.plugin/proc_stat.c b/collectors/proc.plugin/proc_stat.c index a4f76796b..84160f22f 100644 --- a/collectors/proc.plugin/proc_stat.c +++ b/collectors/proc.plugin/proc_stat.c @@ -1038,7 +1038,7 @@ int do_proc_stat(int update_every, usec_t dt) { ); char corebuf[50+1]; - snprintfz(corebuf, 50, "cpu%zu", core); + snprintfz(corebuf, sizeof(corebuf) - 1, "cpu%zu", core); rrdlabels_add(cpuidle_charts[core].st->rrdlabels, "cpu", corebuf, RRDLABEL_SRC_AUTO); char cpuidle_dim_id[RRD_ID_LENGTH_MAX + 1]; diff --git a/collectors/proc.plugin/sys_block_zram.c b/collectors/proc.plugin/sys_block_zram.c index f9166ace0..dac7cac0f 100644 --- a/collectors/proc.plugin/sys_block_zram.c +++ b/collectors/proc.plugin/sys_block_zram.c @@ -3,7 +3,7 @@ #include "plugin_proc.h" #define PLUGIN_PROC_MODULE_ZRAM_NAME "/sys/block/zram" -#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); (st) = NULL; } } while(st) +#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete___safe_from_collector_thread(st); (st) = NULL; } } while(st) typedef struct mm_stat { unsigned long long orig_data_size; diff --git a/collectors/proc.plugin/sys_class_drm.c b/collectors/proc.plugin/sys_class_drm.c index 284662cf6..3ed1fb875 100644 --- a/collectors/proc.plugin/sys_class_drm.c +++ b/collectors/proc.plugin/sys_class_drm.c @@ -648,13 +648,17 @@ static int read_clk_freq_file(procfile **p_ff, const char *const pathname, colle *p_ff = procfile_open(pathname, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); if(unlikely(!*p_ff)) return -2; } - + if(unlikely(NULL == (*p_ff = procfile_readall(*p_ff)))) return -3; for(size_t l = 0; l < procfile_lines(*p_ff) ; l++) { + char *str_with_units = NULL; + if((*p_ff)->lines->lines[l].words >= 3 && !strcmp(procfile_lineword((*p_ff), l, 2), "*")) //format: X: collected_number * + str_with_units = procfile_lineword((*p_ff), l, 1); + else if ((*p_ff)->lines->lines[l].words == 2 && !strcmp(procfile_lineword((*p_ff), l, 1), "*")) //format: collected_number * + str_with_units = procfile_lineword((*p_ff), l, 0); - if((*p_ff)->lines->lines[l].words >= 3 && !strcmp(procfile_lineword((*p_ff), l, 2), "*")){ - char *str_with_units = procfile_lineword((*p_ff), l, 1); + if (str_with_units) { char *delim = strchr(str_with_units, 'M'); char str_without_units[10]; memcpy(str_without_units, str_with_units, delim - str_with_units); @@ -707,7 +711,7 @@ static int do_rrd_util_gpu(struct card *const c){ else { collector_error("Cannot read util_gpu for %s: [%s]", c->pathname, c->id.marketing_name); freez((void *) c->pathname_util_gpu); - rrdset_is_obsolete(c->st_util_gpu); + rrdset_is_obsolete___safe_from_collector_thread(c->st_util_gpu); return 1; } } @@ -721,7 +725,7 @@ static int do_rrd_util_mem(struct card *const c){ else { collector_error("Cannot read util_mem for %s: [%s]", c->pathname, c->id.marketing_name); freez((void *) c->pathname_util_mem); - rrdset_is_obsolete(c->st_util_mem); + rrdset_is_obsolete___safe_from_collector_thread(c->st_util_mem); return 1; } } @@ -735,7 +739,7 @@ static int do_rrd_clk_gpu(struct card *const c){ else { collector_error("Cannot read clk_gpu for %s: [%s]", c->pathname, c->id.marketing_name); freez((void *) c->pathname_clk_gpu); - rrdset_is_obsolete(c->st_clk_gpu); + rrdset_is_obsolete___safe_from_collector_thread(c->st_clk_gpu); return 1; } } @@ -749,7 +753,7 @@ static int do_rrd_clk_mem(struct card *const c){ else { collector_error("Cannot read clk_mem for %s: [%s]", c->pathname, c->id.marketing_name); freez((void *) c->pathname_clk_mem); - rrdset_is_obsolete(c->st_clk_mem); + rrdset_is_obsolete___safe_from_collector_thread(c->st_clk_mem); return 1; } } @@ -771,8 +775,8 @@ static int do_rrd_vram(struct card *const c){ collector_error("Cannot read used_vram for %s: [%s]", c->pathname, c->id.marketing_name); freez((void *) c->pathname_mem_used_vram); freez((void *) c->pathname_mem_total_vram); - rrdset_is_obsolete(c->st_mem_usage_perc_vram); - rrdset_is_obsolete(c->st_mem_usage_vram); + rrdset_is_obsolete___safe_from_collector_thread(c->st_mem_usage_perc_vram); + rrdset_is_obsolete___safe_from_collector_thread(c->st_mem_usage_vram); return 1; } } @@ -794,8 +798,8 @@ static int do_rrd_vis_vram(struct card *const c){ collector_error("Cannot read used_vis_vram for %s: [%s]", c->pathname, c->id.marketing_name); freez((void *) c->pathname_mem_used_vis_vram); freez((void *) c->pathname_mem_total_vis_vram); - rrdset_is_obsolete(c->st_mem_usage_perc_vis_vram); - rrdset_is_obsolete(c->st_mem_usage_vis_vram); + rrdset_is_obsolete___safe_from_collector_thread(c->st_mem_usage_perc_vis_vram); + rrdset_is_obsolete___safe_from_collector_thread(c->st_mem_usage_vis_vram); return 1; } } @@ -817,8 +821,8 @@ static int do_rrd_gtt(struct card *const c){ collector_error("Cannot read used_gtt for %s: [%s]", c->pathname, c->id.marketing_name); freez((void *) c->pathname_mem_used_gtt); freez((void *) c->pathname_mem_total_gtt); - rrdset_is_obsolete(c->st_mem_usage_perc_gtt); - rrdset_is_obsolete(c->st_mem_usage_gtt); + rrdset_is_obsolete___safe_from_collector_thread(c->st_mem_usage_perc_gtt); + rrdset_is_obsolete___safe_from_collector_thread(c->st_mem_usage_gtt); return 1; } } diff --git a/collectors/proc.plugin/sys_class_power_supply.c b/collectors/proc.plugin/sys_class_power_supply.c index 8687ecb55..3f793b9c6 100644 --- a/collectors/proc.plugin/sys_class_power_supply.c +++ b/collectors/proc.plugin/sys_class_power_supply.c @@ -65,7 +65,7 @@ void power_supply_free(struct power_supply *ps) { // free capacity structure if(likely(ps->capacity)) { - if(likely(ps->capacity->st)) rrdset_is_obsolete(ps->capacity->st); + if(likely(ps->capacity->st)) rrdset_is_obsolete___safe_from_collector_thread(ps->capacity->st); freez(ps->capacity->filename); if(likely(ps->capacity->fd != -1)) close(ps->capacity->fd); files_num--; @@ -89,7 +89,7 @@ void power_supply_free(struct power_supply *ps) { } // free properties - if(likely(pr->st)) rrdset_is_obsolete(pr->st); + if(likely(pr->st)) rrdset_is_obsolete___safe_from_collector_thread(pr->st); freez(pr->name); freez(pr->title); freez(pr->units); diff --git a/collectors/proc.plugin/sys_devices_pci_aer.c b/collectors/proc.plugin/sys_devices_pci_aer.c index 296195182..563ebf051 100644 --- a/collectors/proc.plugin/sys_devices_pci_aer.c +++ b/collectors/proc.plugin/sys_devices_pci_aer.c @@ -240,8 +240,8 @@ int do_proc_sys_devices_pci_aer(int update_every, usec_t dt __maybe_unused) { continue; if(!a->st) { - const char *title; - const char *context; + const char *title = ""; + const char *context = ""; switch(a->type) { case AER_DEV_NONFATAL: diff --git a/collectors/proc.plugin/sys_fs_btrfs.c b/collectors/proc.plugin/sys_fs_btrfs.c index da89411bd..f1d6fe720 100644 --- a/collectors/proc.plugin/sys_fs_btrfs.c +++ b/collectors/proc.plugin/sys_fs_btrfs.c @@ -196,8 +196,8 @@ static inline int collect_btrfs_commits_stats(BTRFS_NODE *node, int update_every static inline void btrfs_free_commits_stats(BTRFS_NODE *node){ if(node->st_commits){ - rrdset_is_obsolete(node->st_commits); - rrdset_is_obsolete(node->st_commit_timings); + rrdset_is_obsolete___safe_from_collector_thread(node->st_commits); + rrdset_is_obsolete___safe_from_collector_thread(node->st_commit_timings); } freez(node->commit_stats_filename); node->commit_stats_filename = NULL; @@ -211,7 +211,7 @@ static inline void btrfs_free_disk(BTRFS_DISK *d) { static inline void btrfs_free_device(BTRFS_DEVICE *d) { if(d->st_error_stats) - rrdset_is_obsolete(d->st_error_stats); + rrdset_is_obsolete___safe_from_collector_thread(d->st_error_stats); freez(d->error_stats_filename); freez(d); } @@ -220,16 +220,16 @@ static inline void btrfs_free_node(BTRFS_NODE *node) { // collector_info("BTRFS: destroying '%s'", node->id); if(node->st_allocation_disks) - rrdset_is_obsolete(node->st_allocation_disks); + rrdset_is_obsolete___safe_from_collector_thread(node->st_allocation_disks); if(node->st_allocation_data) - rrdset_is_obsolete(node->st_allocation_data); + rrdset_is_obsolete___safe_from_collector_thread(node->st_allocation_data); if(node->st_allocation_metadata) - rrdset_is_obsolete(node->st_allocation_metadata); + rrdset_is_obsolete___safe_from_collector_thread(node->st_allocation_metadata); if(node->st_allocation_system) - rrdset_is_obsolete(node->st_allocation_system); + rrdset_is_obsolete___safe_from_collector_thread(node->st_allocation_system); freez(node->allocation_data_bytes_used_filename); freez(node->allocation_data_total_bytes_filename); @@ -392,14 +392,14 @@ static inline int find_btrfs_devices(BTRFS_NODE *node, const char *path) { continue; } - collector_info("BTRFS: device found '%s'", de->d_name); + // internal_error("BTRFS: device found '%s'", de->d_name); // -------------------------------------------------------------------- // search for it for(d = node->devices ; d ; d = d->next) { if(str2ll(de->d_name, NULL) == d->id){ - collector_info("BTRFS: existing device id '%d'", d->id); + // collector_info("BTRFS: existing device id '%d'", d->id); break; } } @@ -411,11 +411,11 @@ static inline int find_btrfs_devices(BTRFS_NODE *node, const char *path) { d = callocz(sizeof(BTRFS_DEVICE), 1); d->id = str2ll(de->d_name, NULL); - collector_info("BTRFS: new device with id '%d'", d->id); + // collector_info("BTRFS: new device with id '%d'", d->id); snprintfz(filename, FILENAME_MAX, "%s/%d/error_stats", path, d->id); d->error_stats_filename = strdupz(filename); - collector_info("BTRFS: error_stats_filename '%s'", filename); + // collector_info("BTRFS: error_stats_filename '%s'", filename); // link it d->next = node->devices; @@ -795,7 +795,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) { snprintfz(id, RRD_ID_LENGTH_MAX, "disk_%s", node->id); snprintfz(name, RRD_ID_LENGTH_MAX, "disk_%s", node->label); - snprintfz(title, 200, "BTRFS Physical Disk Allocation"); + snprintfz(title, sizeof(title) - 1, "BTRFS Physical Disk Allocation"); netdata_fix_chart_id(id); netdata_fix_chart_name(name); @@ -854,7 +854,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) { snprintfz(id, RRD_ID_LENGTH_MAX, "data_%s", node->id); snprintfz(name, RRD_ID_LENGTH_MAX, "data_%s", node->label); - snprintfz(title, 200, "BTRFS Data Allocation"); + snprintfz(title, sizeof(title) - 1, "BTRFS Data Allocation"); netdata_fix_chart_id(id); netdata_fix_chart_name(name); @@ -898,7 +898,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) { snprintfz(id, RRD_ID_LENGTH_MAX, "metadata_%s", node->id); snprintfz(name, RRD_ID_LENGTH_MAX, "metadata_%s", node->label); - snprintfz(title, 200, "BTRFS Metadata Allocation"); + snprintfz(title, sizeof(title) - 1, "BTRFS Metadata Allocation"); netdata_fix_chart_id(id); netdata_fix_chart_name(name); @@ -944,7 +944,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) { snprintfz(id, RRD_ID_LENGTH_MAX, "system_%s", node->id); snprintfz(name, RRD_ID_LENGTH_MAX, "system_%s", node->label); - snprintfz(title, 200, "BTRFS System Allocation"); + snprintfz(title, sizeof(title) - 1, "BTRFS System Allocation"); netdata_fix_chart_id(id); netdata_fix_chart_name(name); @@ -988,7 +988,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) { snprintfz(id, RRD_ID_LENGTH_MAX, "commits_%s", node->id); snprintfz(name, RRD_ID_LENGTH_MAX, "commits_%s", node->label); - snprintfz(title, 200, "BTRFS Commits"); + snprintfz(title, sizeof(title) - 1, "BTRFS Commits"); netdata_fix_chart_id(id); netdata_fix_chart_name(name); @@ -1021,7 +1021,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) { snprintfz(id, RRD_ID_LENGTH_MAX, "commits_perc_time_%s", node->id); snprintfz(name, RRD_ID_LENGTH_MAX, "commits_perc_time_%s", node->label); - snprintfz(title, 200, "BTRFS Commits Time Share"); + snprintfz(title, sizeof(title) - 1, "BTRFS Commits Time Share"); netdata_fix_chart_id(id); netdata_fix_chart_name(name); @@ -1055,7 +1055,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) { snprintfz(id, RRD_ID_LENGTH_MAX, "commit_timings_%s", node->id); snprintfz(name, RRD_ID_LENGTH_MAX, "commit_timings_%s", node->label); - snprintfz(title, 200, "BTRFS Commit Timings"); + snprintfz(title, sizeof(title) - 1, "BTRFS Commit Timings"); netdata_fix_chart_id(id); netdata_fix_chart_name(name); @@ -1101,7 +1101,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) { snprintfz(id, RRD_ID_LENGTH_MAX, "device_errors_dev%d_%s", d->id, node->id); snprintfz(name, RRD_ID_LENGTH_MAX, "device_errors_dev%d_%s", d->id, node->label); - snprintfz(title, 200, "BTRFS Device Errors"); + snprintfz(title, sizeof(title) - 1, "BTRFS Device Errors"); netdata_fix_chart_id(id); netdata_fix_chart_name(name); diff --git a/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py index bb59d88e1..1995ad681 100644 --- a/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py +++ b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py @@ -87,7 +87,7 @@ def find_pds(d): elif row.startswith('Temperature'): v = row.split(':')[-1].split()[0] pd.temperature = v - elif row.startswith('NCQ status'): + elif row.startswith(('NCQ status', 'Device Phy')) or not row: if pd.id and pd.state and pd.smart_warnings: pds.append(pd) pd = PD() diff --git a/collectors/python.d.plugin/adaptec_raid/integrations/adaptecraid.md b/collectors/python.d.plugin/adaptec_raid/integrations/adaptecraid.md index 59e359d0d..13d22ba54 100644 --- a/collectors/python.d.plugin/adaptec_raid/integrations/adaptecraid.md +++ b/collectors/python.d.plugin/adaptec_raid/integrations/adaptecraid.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "AdaptecRAID" learn_status: "Published" learn_rel_path: "Data Collection/Storage, Mount Points and Filesystems" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -150,10 +151,10 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | </details> diff --git a/collectors/python.d.plugin/alarms/integrations/netdata_agent_alarms.md b/collectors/python.d.plugin/alarms/integrations/netdata_agent_alarms.md index 95e4a4a3b..9fb69878a 100644 --- a/collectors/python.d.plugin/alarms/integrations/netdata_agent_alarms.md +++ b/collectors/python.d.plugin/alarms/integrations/netdata_agent_alarms.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Netdata Agent alarms" learn_status: "Published" learn_rel_path: "Data Collection/Other" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -110,17 +111,17 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| url | Netdata agent alarms endpoint to collect from. Can be local or remote so long as reachable by agent. | http://127.0.0.1:19999/api/v1/alarms?all | True | -| status_map | Mapping of alarm status to integer number that will be the metric value collected. | {"CLEAR": 0, "WARNING": 1, "CRITICAL": 2} | True | -| collect_alarm_values | set to true to include a chart with calculated alarm values over time. | False | True | -| alarm_status_chart_type | define the type of chart for plotting status over time e.g. 'line' or 'stacked'. | line | True | -| alarm_contains_words | A "," separated list of words you want to filter alarm names for. For example 'cpu,load' would filter for only alarms with "cpu" or "load" in alarm name. Default includes all. | | True | -| alarm_excludes_words | A "," separated list of words you want to exclude based on alarm name. For example 'cpu,load' would exclude all alarms with "cpu" or "load" in alarm name. Default excludes None. | | True | -| update_every | Sets the default data collection frequency. | 10 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | +| url | Netdata agent alarms endpoint to collect from. Can be local or remote so long as reachable by agent. | http://127.0.0.1:19999/api/v1/alarms?all | yes | +| status_map | Mapping of alarm status to integer number that will be the metric value collected. | {"CLEAR": 0, "WARNING": 1, "CRITICAL": 2} | yes | +| collect_alarm_values | set to true to include a chart with calculated alarm values over time. | no | yes | +| alarm_status_chart_type | define the type of chart for plotting status over time e.g. 'line' or 'stacked'. | line | yes | +| alarm_contains_words | A "," separated list of words you want to filter alarm names for. For example 'cpu,load' would filter for only alarms with "cpu" or "load" in alarm name. Default includes all. | | yes | +| alarm_excludes_words | A "," separated list of words you want to exclude based on alarm name. For example 'cpu,load' would exclude all alarms with "cpu" or "load" in alarm name. Default excludes None. | | yes | +| update_every | Sets the default data collection frequency. | 10 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | </details> diff --git a/collectors/python.d.plugin/am2320/integrations/am2320.md b/collectors/python.d.plugin/am2320/integrations/am2320.md index 9b41a8fd6..72b351eb5 100644 --- a/collectors/python.d.plugin/am2320/integrations/am2320.md +++ b/collectors/python.d.plugin/am2320/integrations/am2320.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "AM2320" learn_status: "Published" learn_rel_path: "Data Collection/Hardware Devices and Sensors" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -130,11 +131,11 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | </details> diff --git a/collectors/python.d.plugin/beanstalk/integrations/beanstalk.md b/collectors/python.d.plugin/beanstalk/integrations/beanstalk.md index cf2f0dac1..5095c0c28 100644 --- a/collectors/python.d.plugin/beanstalk/integrations/beanstalk.md +++ b/collectors/python.d.plugin/beanstalk/integrations/beanstalk.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Beanstalk" learn_status: "Published" learn_rel_path: "Data Collection/Message Brokers" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -141,13 +142,13 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| host | IP or URL to a beanstalk service. | 127.0.0.1 | False | -| port | Port to the IP or URL to a beanstalk service. | 11300 | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| host | IP or URL to a beanstalk service. | 127.0.0.1 | no | +| port | Port to the IP or URL to a beanstalk service. | 11300 | no | </details> diff --git a/collectors/python.d.plugin/bind_rndc/integrations/isc_bind_rndc.md b/collectors/python.d.plugin/bind_rndc/integrations/isc_bind_rndc.md index cc847272d..163f8282c 100644 --- a/collectors/python.d.plugin/bind_rndc/integrations/isc_bind_rndc.md +++ b/collectors/python.d.plugin/bind_rndc/integrations/isc_bind_rndc.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "ISC Bind (RNDC)" learn_status: "Published" learn_rel_path: "Data Collection/DNS and DHCP Servers" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -163,12 +164,12 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| named_stats_path | Path to the named stats, after being dumped by `nrdc` | /var/log/bind/named.stats | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| named_stats_path | Path to the named stats, after being dumped by `nrdc` | /var/log/bind/named.stats | no | </details> diff --git a/collectors/python.d.plugin/boinc/integrations/boinc.md b/collectors/python.d.plugin/boinc/integrations/boinc.md index 961f79537..d6874d455 100644 --- a/collectors/python.d.plugin/boinc/integrations/boinc.md +++ b/collectors/python.d.plugin/boinc/integrations/boinc.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "BOINC" learn_status: "Published" learn_rel_path: "Data Collection/Distributed Computing Systems" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -123,14 +124,14 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| hostname | Define a hostname where boinc is running. | localhost | False | -| port | The port of boinc RPC interface. | | False | -| password | Provide a password to connect to a boinc RPC interface. | | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| hostname | Define a hostname where boinc is running. | localhost | no | +| port | The port of boinc RPC interface. | | no | +| password | Provide a password to connect to a boinc RPC interface. | | no | </details> diff --git a/collectors/python.d.plugin/ceph/integrations/ceph.md b/collectors/python.d.plugin/ceph/integrations/ceph.md index 051121148..cfda01fbe 100644 --- a/collectors/python.d.plugin/ceph/integrations/ceph.md +++ b/collectors/python.d.plugin/ceph/integrations/ceph.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Ceph" learn_status: "Published" learn_rel_path: "Data Collection/Storage, Mount Points and Filesystems" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -139,14 +140,14 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| config_file | Ceph config file | | True | -| keyring_file | Ceph keyring file. netdata user must be added into ceph group and keyring file must be read group permission. | | True | -| rados_id | A rados user id to use for connecting to the Ceph cluster. | admin | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| config_file | Ceph config file | | yes | +| keyring_file | Ceph keyring file. netdata user must be added into ceph group and keyring file must be read group permission. | | yes | +| rados_id | A rados user id to use for connecting to the Ceph cluster. | admin | no | </details> diff --git a/collectors/python.d.plugin/changefinder/integrations/python.d_changefinder.md b/collectors/python.d.plugin/changefinder/integrations/python.d_changefinder.md index 2265d9620..c338c9374 100644 --- a/collectors/python.d.plugin/changefinder/integrations/python.d_changefinder.md +++ b/collectors/python.d.plugin/changefinder/integrations/python.d_changefinder.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "python.d changefinder" learn_status: "Published" learn_rel_path: "Data Collection/Other" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -144,15 +145,15 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| charts_regex | what charts to pull data for - A regex like `system\..*/` or `system\..*/apps.cpu/apps.mem` etc. | system\..* | True | -| charts_to_exclude | charts to exclude, useful if you would like to exclude some specific charts. note: should be a ',' separated string like 'chart.name,chart.name'. | | False | -| mode | get ChangeFinder scores 'per_dim' or 'per_chart'. | per_chart | True | -| cf_r | default parameters that can be passed to the changefinder library. | 0.5 | False | -| cf_order | default parameters that can be passed to the changefinder library. | 1 | False | -| cf_smooth | default parameters that can be passed to the changefinder library. | 15 | False | -| cf_threshold | the percentile above which scores will be flagged. | 99 | False | -| n_score_samples | the number of recent scores to use when calculating the percentile of the changefinder score. | 14400 | False | -| show_scores | set to true if you also want to chart the percentile scores in addition to the flags. (mainly useful for debugging or if you want to dive deeper on how the scores are evolving over time) | False | False | +| charts_regex | what charts to pull data for - A regex like `system\..*/` or `system\..*/apps.cpu/apps.mem` etc. | system\..* | yes | +| charts_to_exclude | charts to exclude, useful if you would like to exclude some specific charts. note: should be a ',' separated string like 'chart.name,chart.name'. | | no | +| mode | get ChangeFinder scores 'per_dim' or 'per_chart'. | per_chart | yes | +| cf_r | default parameters that can be passed to the changefinder library. | 0.5 | no | +| cf_order | default parameters that can be passed to the changefinder library. | 1 | no | +| cf_smooth | default parameters that can be passed to the changefinder library. | 15 | no | +| cf_threshold | the percentile above which scores will be flagged. | 99 | no | +| n_score_samples | the number of recent scores to use when calculating the percentile of the changefinder score. | 14400 | no | +| show_scores | set to true if you also want to chart the percentile scores in addition to the flags. (mainly useful for debugging or if you want to dive deeper on how the scores are evolving over time) | no | no | </details> diff --git a/collectors/python.d.plugin/dovecot/integrations/dovecot.md b/collectors/python.d.plugin/dovecot/integrations/dovecot.md index 4057a5b6c..4e7952765 100644 --- a/collectors/python.d.plugin/dovecot/integrations/dovecot.md +++ b/collectors/python.d.plugin/dovecot/integrations/dovecot.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Dovecot" learn_status: "Published" learn_rel_path: "Data Collection/Mail Servers" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -123,14 +124,14 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| socket | Use this socket to communicate with Devcot | /var/run/dovecot/stats | False | -| host | Instead of using a socket, you can point the collector to an ip for devcot statistics. | | False | -| port | Used in combination with host, configures the port devcot listens to. | | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| socket | Use this socket to communicate with Devcot | /var/run/dovecot/stats | no | +| host | Instead of using a socket, you can point the collector to an ip for devcot statistics. | | no | +| port | Used in combination with host, configures the port devcot listens to. | | no | </details> diff --git a/collectors/python.d.plugin/example/integrations/example_collector.md b/collectors/python.d.plugin/example/integrations/example_collector.md index 44b405a7d..7dded67ba 100644 --- a/collectors/python.d.plugin/example/integrations/example_collector.md +++ b/collectors/python.d.plugin/example/integrations/example_collector.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Example collector" learn_status: "Published" learn_rel_path: "Data Collection/Other" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -110,14 +111,14 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| num_lines | The number of lines to create. | 4 | False | -| lower | The lower bound of numbers to randomly sample from. | 0 | False | -| upper | The upper bound of numbers to randomly sample from. | 100 | False | -| update_every | Sets the default data collection frequency. | 1 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | +| num_lines | The number of lines to create. | 4 | no | +| lower | The lower bound of numbers to randomly sample from. | 0 | no | +| upper | The upper bound of numbers to randomly sample from. | 100 | no | +| update_every | Sets the default data collection frequency. | 1 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | </details> diff --git a/collectors/python.d.plugin/exim/integrations/exim.md b/collectors/python.d.plugin/exim/integrations/exim.md index 328d17870..f0ae33d3e 100644 --- a/collectors/python.d.plugin/exim/integrations/exim.md +++ b/collectors/python.d.plugin/exim/integrations/exim.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Exim" learn_status: "Published" learn_rel_path: "Data Collection/Mail Servers" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -129,12 +130,12 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| command | Path and command to the `exim` binary | exim -bpc | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| command | Path and command to the `exim` binary | exim -bpc | no | </details> diff --git a/collectors/python.d.plugin/fail2ban/integrations/fail2ban.md b/collectors/python.d.plugin/fail2ban/integrations/fail2ban.md index 64bfe21ba..a7116be5e 100644 --- a/collectors/python.d.plugin/fail2ban/integrations/fail2ban.md +++ b/collectors/python.d.plugin/fail2ban/integrations/fail2ban.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Fail2ban" learn_status: "Published" learn_rel_path: "Data Collection/Authentication and Authorization" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -149,15 +150,15 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| log_path | path to fail2ban.log. | /var/log/fail2ban.log | False | -| conf_path | path to jail.local/jail.conf. | /etc/fail2ban/jail.local | False | -| conf_dir | path to jail.d/. | /etc/fail2ban/jail.d/ | False | -| exclude | jails you want to exclude from autodetection. | | False | -| update_every | Sets the default data collection frequency. | 1 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | +| log_path | path to fail2ban.log. | /var/log/fail2ban.log | no | +| conf_path | path to jail.local/jail.conf. | /etc/fail2ban/jail.local | no | +| conf_dir | path to jail.d/. | /etc/fail2ban/jail.d/ | no | +| exclude | jails you want to exclude from autodetection. | | no | +| update_every | Sets the default data collection frequency. | 1 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | </details> diff --git a/collectors/python.d.plugin/gearman/integrations/gearman.md b/collectors/python.d.plugin/gearman/integrations/gearman.md index f988e7448..3923d1401 100644 --- a/collectors/python.d.plugin/gearman/integrations/gearman.md +++ b/collectors/python.d.plugin/gearman/integrations/gearman.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Gearman" learn_status: "Published" learn_rel_path: "Data Collection/Distributed Computing Systems" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -129,16 +130,16 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| host | URL or IP where gearman is running. | localhost | False | -| port | Port of URL or IP where gearman is running. | 4730 | False | -| tls | Use tls to connect to gearman. | false | False | -| cert | Provide a certificate file if needed to connect to a TLS gearman instance. | | False | -| key | Provide a key file if needed to connect to a TLS gearman instance. | | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| host | URL or IP where gearman is running. | localhost | no | +| port | Port of URL or IP where gearman is running. | 4730 | no | +| tls | Use tls to connect to gearman. | false | no | +| cert | Provide a certificate file if needed to connect to a TLS gearman instance. | | no | +| key | Provide a key file if needed to connect to a TLS gearman instance. | | no | </details> diff --git a/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md b/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md index be4db4b70..8d61fa2ae 100644 --- a/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md +++ b/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Go applications (EXPVAR)" learn_status: "Published" learn_rel_path: "Data Collection/APM" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -195,16 +196,16 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| url | the URL and port of the expvar endpoint. Please include the whole path of the endpoint, as the expvar handler can be installed in a non-standard location. | | True | -| user | If the URL is password protected, this is the username to use. | | False | -| pass | If the URL is password protected, this is the password to use. | | False | -| collect_memstats | Enables charts for Go runtime's memory statistics. | | False | -| extra_charts | Defines extra data/charts to monitor, please see the example below. | | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| url | the URL and port of the expvar endpoint. Please include the whole path of the endpoint, as the expvar handler can be installed in a non-standard location. | | yes | +| user | If the URL is password protected, this is the username to use. | | no | +| pass | If the URL is password protected, this is the password to use. | | no | +| collect_memstats | Enables charts for Go runtime's memory statistics. | | no | +| extra_charts | Defines extra data/charts to monitor, please see the example below. | | no | </details> diff --git a/collectors/python.d.plugin/haproxy/haproxy.chart.py b/collectors/python.d.plugin/haproxy/haproxy.chart.py index 6f94c9a07..f412febb7 100644 --- a/collectors/python.d.plugin/haproxy/haproxy.chart.py +++ b/collectors/python.d.plugin/haproxy/haproxy.chart.py @@ -44,6 +44,7 @@ ORDER = [ 'bctime', 'health_sup', 'health_sdown', + 'health_smaint', 'health_bdown', 'health_idle' ] @@ -167,6 +168,10 @@ CHARTS = { 'options': [None, 'Backend Servers In UP State', 'health servers', 'health', 'haproxy_hs.up', 'line'], 'lines': [] }, + 'health_smaint': { + 'options': [None, 'Backend Servers In MAINT State', 'maintenance servers', 'health', 'haproxy_hs.maint', 'line'], + 'lines': [] + }, 'health_bdown': { 'options': [None, 'Is Backend Failed?', 'boolean', 'health', 'haproxy_hb.down', 'line'], 'lines': [] @@ -267,6 +272,8 @@ class Service(UrlService, SocketService): if server_status(server, name, 'UP')]) stat_data['hsdown_' + idx] = len([server for server in self.data['servers'] if server_status(server, name, 'DOWN')]) + stat_data['hsmaint_' + idx] = len([server for server in self.data['servers'] + if server_status(server, name, 'MAINT')]) stat_data['hbdown_' + idx] = 1 if backend.get('status') == 'DOWN' else 0 for metric in BACKEND_METRICS: stat_data['_'.join(['backend', metric, idx])] = backend.get(metric) or 0 @@ -321,6 +328,7 @@ class Service(UrlService, SocketService): BACKEND_METRICS[metric]['divisor']]) self.definitions['health_sup']['lines'].append(['hsup_' + idx, name, 'absolute']) self.definitions['health_sdown']['lines'].append(['hsdown_' + idx, name, 'absolute']) + self.definitions['health_smaint']['lines'].append(['hsmaint_' + idx, name, 'absolute']) self.definitions['health_bdown']['lines'].append(['hbdown_' + idx, name, 'absolute']) @@ -352,7 +360,7 @@ def parse_data_(data): def server_status(server, backend_name, status='DOWN'): - return server.get('# pxname') == backend_name and server.get('status') == status + return server.get('# pxname') == backend_name and server.get('status').partition(' ')[0] == status def url_remove_params(url): diff --git a/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md b/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md index 29512bba3..4a1504f07 100644 --- a/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md +++ b/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "HDD temperature" learn_status: "Published" learn_rel_path: "Data Collection/Hardware Devices and Sensors" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -121,14 +122,14 @@ By default this collector will try to autodetect disks (autodetection works only | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 1 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | False | -| devices | Array of desired disks to detect, in case their name doesn't start with `sd`. | | False | -| host | The IP or HOSTNAME to connect to. | localhost | True | -| port | The port to connect to. | 7634 | False | +| update_every | Sets the default data collection frequency. | 1 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | no | +| devices | Array of desired disks to detect, in case their name doesn't start with `sd`. | | no | +| host | The IP or HOSTNAME to connect to. | localhost | yes | +| port | The port to connect to. | 7634 | no | </details> diff --git a/collectors/python.d.plugin/hpssa/integrations/hp_smart_storage_arrays.md b/collectors/python.d.plugin/hpssa/integrations/hp_smart_storage_arrays.md index 8ec7a5c5c..d46cc9065 100644 --- a/collectors/python.d.plugin/hpssa/integrations/hp_smart_storage_arrays.md +++ b/collectors/python.d.plugin/hpssa/integrations/hp_smart_storage_arrays.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "HP Smart Storage Arrays" learn_status: "Published" learn_rel_path: "Data Collection/Storage, Mount Points and Filesystems" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -152,13 +153,13 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| ssacli_path | Path to the `ssacli` command line utility. Configure this if `ssacli` is not in the $PATH | | False | -| use_sudo | Whether or not to use `sudo` to execute `ssacli` | True | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| ssacli_path | Path to the `ssacli` command line utility. Configure this if `ssacli` is not in the $PATH | | no | +| use_sudo | Whether or not to use `sudo` to execute `ssacli` | True | no | </details> diff --git a/collectors/python.d.plugin/icecast/integrations/icecast.md b/collectors/python.d.plugin/icecast/integrations/icecast.md index 06c317864..12d7d59ee 100644 --- a/collectors/python.d.plugin/icecast/integrations/icecast.md +++ b/collectors/python.d.plugin/icecast/integrations/icecast.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Icecast" learn_status: "Published" learn_rel_path: "Data Collection/Media Services" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -112,14 +113,14 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| url | The URL (and port) to the icecast server. Needs to also include `/status-json.xsl` | http://localhost:8443/status-json.xsl | False | -| user | Username to use to connect to `url` if it's password protected. | | False | -| pass | Password to use to connect to `url` if it's password protected. | | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| url | The URL (and port) to the icecast server. Needs to also include `/status-json.xsl` | http://localhost:8443/status-json.xsl | no | +| user | Username to use to connect to `url` if it's password protected. | | no | +| pass | Password to use to connect to `url` if it's password protected. | | no | </details> diff --git a/collectors/python.d.plugin/ipfs/integrations/ipfs.md b/collectors/python.d.plugin/ipfs/integrations/ipfs.md index c43c27b34..77dc745aa 100644 --- a/collectors/python.d.plugin/ipfs/integrations/ipfs.md +++ b/collectors/python.d.plugin/ipfs/integrations/ipfs.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "IPFS" learn_status: "Published" learn_rel_path: "Data Collection/Storage, Mount Points and Filesystems" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -121,14 +122,14 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | The JOB's name as it will appear at the dashboard (by default is the job_name) | job_name | False | -| url | URL to the IPFS API | no | True | -| repoapi | Collect repo metrics. | no | False | -| pinapi | Set status of IPFS pinned object polling. | no | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | The JOB's name as it will appear at the dashboard (by default is the job_name) | job_name | no | +| url | URL to the IPFS API | no | yes | +| repoapi | Collect repo metrics. | no | no | +| pinapi | Set status of IPFS pinned object polling. | no | no | </details> diff --git a/collectors/python.d.plugin/litespeed/integrations/litespeed.md b/collectors/python.d.plugin/litespeed/integrations/litespeed.md index 511c112e9..87f2d0b12 100644 --- a/collectors/python.d.plugin/litespeed/integrations/litespeed.md +++ b/collectors/python.d.plugin/litespeed/integrations/litespeed.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Litespeed" learn_status: "Published" learn_rel_path: "Data Collection/Web Servers and Web Proxies" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -117,12 +118,12 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| path | Use a different path than the default, where the lightspeed stats files reside. | /tmp/lshttpd/ | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| path | Use a different path than the default, where the lightspeed stats files reside. | /tmp/lshttpd/ | no | </details> diff --git a/collectors/python.d.plugin/megacli/integrations/megacli.md b/collectors/python.d.plugin/megacli/integrations/megacli.md index bb3bdf6f2..0c4af78a9 100644 --- a/collectors/python.d.plugin/megacli/integrations/megacli.md +++ b/collectors/python.d.plugin/megacli/integrations/megacli.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "MegaCLI" learn_status: "Published" learn_rel_path: "Data Collection/Storage, Mount Points and Filesystems" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -165,11 +166,11 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| do_battery | default is no. Battery stats (adds additional call to megacli `megacli -AdpBbuCmd -a0`). | no | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| do_battery | default is no. Battery stats (adds additional call to megacli `megacli -AdpBbuCmd -a0`). | no | no | </details> diff --git a/collectors/python.d.plugin/megacli/megacli.chart.py b/collectors/python.d.plugin/megacli/megacli.chart.py index ef35ff63f..8222092a8 100644 --- a/collectors/python.d.plugin/megacli/megacli.chart.py +++ b/collectors/python.d.plugin/megacli/megacli.chart.py @@ -91,7 +91,7 @@ def battery_charts(bats): RE_ADAPTER = re.compile( - r'Adapter #([0-9]+) State(?:\s+)?: ([a-zA-Z]+)' + r'Adapter #([0-9]+) State(?:\s+)?: ([a-zA-Z ]+)' ) RE_VD = re.compile( @@ -124,14 +124,14 @@ def find_batteries(d): class Adapter: def __init__(self, n, state): self.id = n - self.state = int(state == 'Degraded') + # TODO: Rewrite all of this + self.state = int(state in ("Partially Degraded", "Degraded", "Failed")) def data(self): return { 'adapter_{0}_degraded'.format(self.id): self.state, } - class PD: def __init__(self, n, media_err, predict_fail): self.id = n diff --git a/collectors/python.d.plugin/memcached/integrations/memcached.md b/collectors/python.d.plugin/memcached/integrations/memcached.md index 012758304..113b86c8c 100644 --- a/collectors/python.d.plugin/memcached/integrations/memcached.md +++ b/collectors/python.d.plugin/memcached/integrations/memcached.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Memcached" learn_status: "Published" learn_rel_path: "Data Collection/Databases" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -130,13 +131,13 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| host | the host to connect to. | 127.0.0.1 | False | -| port | the port to connect to. | 11211 | False | -| update_every | Sets the default data collection frequency. | 10 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | +| host | the host to connect to. | 127.0.0.1 | no | +| port | the port to connect to. | 11211 | no | +| update_every | Sets the default data collection frequency. | 10 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | </details> diff --git a/collectors/python.d.plugin/monit/integrations/monit.md b/collectors/python.d.plugin/monit/integrations/monit.md index ecf522f84..18219141d 100644 --- a/collectors/python.d.plugin/monit/integrations/monit.md +++ b/collectors/python.d.plugin/monit/integrations/monit.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Monit" learn_status: "Published" learn_rel_path: "Data Collection/Synthetic Checks" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -122,14 +123,14 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 1 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | False | -| url | The URL to fetch Monit's metrics. | http://localhost:2812 | True | -| user | Username in case the URL is password protected. | | False | -| pass | Password in case the URL is password protected. | | False | +| update_every | Sets the default data collection frequency. | 1 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | no | +| url | The URL to fetch Monit's metrics. | http://localhost:2812 | yes | +| user | Username in case the URL is password protected. | | no | +| pass | Password in case the URL is password protected. | | no | </details> diff --git a/collectors/python.d.plugin/nsd/integrations/name_server_daemon.md b/collectors/python.d.plugin/nsd/integrations/name_server_daemon.md index 8ed86bdf9..0e66c44eb 100644 --- a/collectors/python.d.plugin/nsd/integrations/name_server_daemon.md +++ b/collectors/python.d.plugin/nsd/integrations/name_server_daemon.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Name Server Daemon" learn_status: "Published" learn_rel_path: "Data Collection/DNS and DHCP Servers" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -146,12 +147,12 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 30 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| command | The command to run | nsd-control stats_noreset | False | +| update_every | Sets the default data collection frequency. | 30 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| command | The command to run | nsd-control stats_noreset | no | </details> diff --git a/collectors/python.d.plugin/openldap/integrations/openldap.md b/collectors/python.d.plugin/openldap/integrations/openldap.md index 375132edb..a9480a490 100644 --- a/collectors/python.d.plugin/openldap/integrations/openldap.md +++ b/collectors/python.d.plugin/openldap/integrations/openldap.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "OpenLDAP" learn_status: "Published" learn_rel_path: "Data Collection/Authentication and Authorization" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -152,19 +153,19 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| username | The bind user with right to access monitor statistics | | True | -| password | The password for the binded user | | True | -| server | The listening address of the LDAP server. In case of TLS, use the hostname which the certificate is published for. | | True | -| port | The listening port of the LDAP server. Change to 636 port in case of TLS connection. | 389 | True | -| use_tls | Make True if a TLS connection is used over ldaps:// | False | False | -| use_start_tls | Make True if a TLS connection is used over ldap:// | False | False | -| cert_check | False if you want to ignore certificate check | True | True | -| timeout | Seconds to timeout if no connection exist | | True | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| username | The bind user with right to access monitor statistics | | yes | +| password | The password for the binded user | | yes | +| server | The listening address of the LDAP server. In case of TLS, use the hostname which the certificate is published for. | | yes | +| port | The listening port of the LDAP server. Change to 636 port in case of TLS connection. | 389 | yes | +| use_tls | Make True if a TLS connection is used over ldaps:// | no | no | +| use_start_tls | Make True if a TLS connection is used over ldap:// | no | no | +| cert_check | False if you want to ignore certificate check | True | yes | +| timeout | Seconds to timeout if no connection exist | | yes | </details> diff --git a/collectors/python.d.plugin/oracledb/integrations/oracle_db.md b/collectors/python.d.plugin/oracledb/integrations/oracle_db.md index cb6637e8a..30557c021 100644 --- a/collectors/python.d.plugin/oracledb/integrations/oracle_db.md +++ b/collectors/python.d.plugin/oracledb/integrations/oracle_db.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Oracle DB" learn_status: "Published" learn_rel_path: "Data Collection/Databases" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -160,15 +161,15 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| user | The username for the user account. | no | True | -| password | The password for the user account. | no | True | -| server | The IP address or hostname (and port) of the Oracle Database Server. | no | True | -| service | The Oracle Database service name. To view the services available on your server run this query, `select SERVICE_NAME from gv$session where sid in (select sid from V$MYSTAT)`. | no | True | -| protocol | one of the strings "tcp" or "tcps" indicating whether to use unencrypted network traffic or encrypted network traffic | no | True | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| user | The username for the user account. | no | yes | +| password | The password for the user account. | no | yes | +| server | The IP address or hostname (and port) of the Oracle Database Server. | no | yes | +| service | The Oracle Database service name. To view the services available on your server run this query, `select SERVICE_NAME from gv$session where sid in (select sid from V$MYSTAT)`. | no | yes | +| protocol | one of the strings "tcp" or "tcps" indicating whether to use unencrypted network traffic or encrypted network traffic | no | yes | </details> diff --git a/collectors/python.d.plugin/pandas/integrations/pandas.md b/collectors/python.d.plugin/pandas/integrations/pandas.md index d5da2f262..83c5c66b1 100644 --- a/collectors/python.d.plugin/pandas/integrations/pandas.md +++ b/collectors/python.d.plugin/pandas/integrations/pandas.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Pandas" learn_status: "Published" learn_rel_path: "Data Collection/Generic Data Collection" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -132,19 +133,19 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| chart_configs | an array of chart configuration dictionaries | [] | True | -| chart_configs.name | name of the chart to be displayed in the dashboard. | None | True | -| chart_configs.title | title of the chart to be displayed in the dashboard. | None | True | -| chart_configs.family | [family](https://github.com/netdata/netdata/blob/master/docs/cloud/visualize/interact-new-charts.md#families) of the chart to be displayed in the dashboard. | None | True | -| chart_configs.context | [context](https://github.com/netdata/netdata/blob/master/docs/cloud/visualize/interact-new-charts.md#contexts) of the chart to be displayed in the dashboard. | None | True | -| chart_configs.type | the type of the chart to be displayed in the dashboard. | None | True | -| chart_configs.units | the units of the chart to be displayed in the dashboard. | None | True | -| chart_configs.df_steps | a series of pandas operations (one per line) that each returns a dataframe. | None | True | -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | +| chart_configs | an array of chart configuration dictionaries | [] | yes | +| chart_configs.name | name of the chart to be displayed in the dashboard. | None | yes | +| chart_configs.title | title of the chart to be displayed in the dashboard. | None | yes | +| chart_configs.family | [family](https://github.com/netdata/netdata/blob/master/docs/cloud/visualize/interact-new-charts.md#families) of the chart to be displayed in the dashboard. | None | yes | +| chart_configs.context | [context](https://github.com/netdata/netdata/blob/master/docs/cloud/visualize/interact-new-charts.md#contexts) of the chart to be displayed in the dashboard. | None | yes | +| chart_configs.type | the type of the chart to be displayed in the dashboard. | None | yes | +| chart_configs.units | the units of the chart to be displayed in the dashboard. | None | yes | +| chart_configs.df_steps | a series of pandas operations (one per line) that each returns a dataframe. | None | yes | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | </details> diff --git a/collectors/python.d.plugin/postfix/integrations/postfix.md b/collectors/python.d.plugin/postfix/integrations/postfix.md index 7113d7ddd..2bb99922c 100644 --- a/collectors/python.d.plugin/postfix/integrations/postfix.md +++ b/collectors/python.d.plugin/postfix/integrations/postfix.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Postfix" learn_status: "Published" learn_rel_path: "Data Collection/Mail Servers" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -108,11 +109,11 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 1 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | +| update_every | Sets the default data collection frequency. | 1 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | </details> diff --git a/collectors/python.d.plugin/puppet/integrations/puppet.md b/collectors/python.d.plugin/puppet/integrations/puppet.md index be68749a3..ca190b576 100644 --- a/collectors/python.d.plugin/puppet/integrations/puppet.md +++ b/collectors/python.d.plugin/puppet/integrations/puppet.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Puppet" learn_status: "Published" learn_rel_path: "Data Collection/CICD Platforms" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -121,16 +122,16 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| url | HTTP or HTTPS URL, exact Fully Qualified Domain Name of the node should be used. | https://fqdn.example.com:8081 | True | -| tls_verify | Control HTTPS server certificate verification. | False | False | -| tls_ca_file | Optional CA (bundle) file to use | | False | -| tls_cert_file | Optional client certificate file | | False | -| tls_key_file | Optional client key file | | False | -| update_every | Sets the default data collection frequency. | 30 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | +| url | HTTP or HTTPS URL, exact Fully Qualified Domain Name of the node should be used. | https://fqdn.example.com:8081 | yes | +| tls_verify | Control HTTPS server certificate verification. | False | no | +| tls_ca_file | Optional CA (bundle) file to use | | no | +| tls_cert_file | Optional client certificate file | | no | +| tls_key_file | Optional client key file | | no | +| update_every | Sets the default data collection frequency. | 30 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | </details> diff --git a/collectors/python.d.plugin/python.d.plugin.in b/collectors/python.d.plugin/python.d.plugin.in index bc171e032..86fea209c 100644 --- a/collectors/python.d.plugin/python.d.plugin.in +++ b/collectors/python.d.plugin/python.d.plugin.in @@ -222,8 +222,11 @@ class ModuleConfig: def __init__(self, name, config=None): self.name = name self.config = config or OrderedDict() + self.is_stock = False def load(self, abs_path): + if not IS_ATTY: + self.is_stock = abs_path.startswith(DIRS.modules_stock_config) self.config.update(load_config(abs_path) or dict()) def defaults(self): @@ -242,6 +245,7 @@ class ModuleConfig: config = OrderedDict() config.update(job_config) config['job_name'] = job_name + config['__is_stock'] = self.is_stock for k, v in self.defaults().items(): config.setdefault(k, v) @@ -309,7 +313,8 @@ class JobsConfigsBuilder: return None configs = config.create_jobs() - self.log.info("[{0}] built {1} job(s) configs".format(module_name, len(configs))) + if not config.is_stock: + self.log.info("[{0}] built {1} job(s) configs".format(module_name, len(configs))) self.apply_defaults(configs, self.module_defaults) self.apply_defaults(configs, self.job_defaults) @@ -338,6 +343,7 @@ class Job(threading.Thread): self.autodetection_retry = config['autodetection_retry'] self.checks = self.inf self.job = None + self.is_stock = config.get('__is_stock', False) self.status = JOB_STATUS_INIT def is_inited(self): @@ -350,8 +356,14 @@ class Job(threading.Thread): return self.job.name def check(self): + if self.is_stock: + self.job.logger.mute() + ok = self.job.check() + + self.job.logger.unmute() self.checks -= self.checks != self.inf and not ok + return ok def create(self): @@ -503,7 +515,6 @@ class FileLockRegistry: name = "docker" + name[7:] return name - def register(self, name): name = self.rename(name) if name in self.locks: @@ -685,12 +696,14 @@ class Plugin: try: ok = job.check() except Exception as error: - self.log.warning("{0}[{1}] : unhandled exception on check : {2}, skipping the job".format( - job.module_name, job.real_name, repr(error))) + if not job.is_stock: + self.log.warning("{0}[{1}] : unhandled exception on check : {2}, skipping the job".format( + job.module_name, job.real_name, repr(error))) job.status = JOB_STATUS_DROPPED continue if not ok: - self.log.info('{0}[{1}] : check failed'.format(job.module_name, job.real_name)) + if not job.is_stock: + self.log.info('{0}[{1}] : check failed'.format(job.module_name, job.real_name)) job.status = JOB_STATUS_RECOVERING if job.need_to_recheck() else JOB_STATUS_DROPPED continue self.log.info('{0}[{1}] : check success'.format(job.module_name, job.real_name)) @@ -876,7 +889,7 @@ def main(): cmd = parse_command_line() log = PythonDLogger() - level = os.getenv('NETDATA_LOG_SEVERITY_LEVEL') or str() + level = os.getenv('NETDATA_LOG_LEVEL') or str() level = level.lower() if level == 'debug': log.logger.severity = 'DEBUG' diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py index a7acc23b6..3f122e1d9 100644 --- a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py @@ -8,7 +8,7 @@ import os from bases.charts import Charts, ChartError, create_runtime_chart from bases.collection import safe_print -from bases.loggers import PythonDLimitedLogger +from bases.loggers import PythonDLogger from third_party.monotonic import monotonic from time import sleep, time @@ -62,7 +62,7 @@ def clean_module_name(name): return name -class SimpleService(PythonDLimitedLogger, object): +class SimpleService(PythonDLogger, object): """ Prototype of Service class. Implemented basic functionality to run jobs by `python.d.plugin` @@ -73,7 +73,7 @@ class SimpleService(PythonDLimitedLogger, object): :param configuration: <dict> :param name: <str> """ - PythonDLimitedLogger.__init__(self) + PythonDLogger.__init__(self) self.configuration = configuration self.order = list() self.definitions = dict() diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py index 1faf036a4..76129d376 100644 --- a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py @@ -6,8 +6,6 @@ import urllib3 -from distutils.version import StrictVersion as version - from bases.FrameworkServices.SimpleService import SimpleService try: @@ -15,28 +13,11 @@ try: except AttributeError: pass -# https://github.com/urllib3/urllib3/blob/master/CHANGES.rst#19-2014-07-04 -# New retry logic and urllib3.util.retry.Retry configuration object. (Issue https://github.com/urllib3/urllib3/pull/326) -URLLIB3_MIN_REQUIRED_VERSION = '1.9' URLLIB3_VERSION = urllib3.__version__ URLLIB3 = 'urllib3' - -def version_check(): - if version(URLLIB3_VERSION) >= version(URLLIB3_MIN_REQUIRED_VERSION): - return - - err = '{0} version: {1}, minimum required version: {2}, please upgrade'.format( - URLLIB3, - URLLIB3_VERSION, - URLLIB3_MIN_REQUIRED_VERSION, - ) - raise Exception(err) - - class UrlService(SimpleService): def __init__(self, configuration=None, name=None): - version_check() SimpleService.__init__(self, configuration=configuration, name=name) self.debug("{0} version: {1}".format(URLLIB3, URLLIB3_VERSION)) self.url = self.configuration.get('url') diff --git a/collectors/python.d.plugin/python_modules/bases/loggers.py b/collectors/python.d.plugin/python_modules/bases/loggers.py index 47f196a6d..7ae8ab0c1 100644 --- a/collectors/python.d.plugin/python_modules/bases/loggers.py +++ b/collectors/python.d.plugin/python_modules/bases/loggers.py @@ -4,6 +4,8 @@ # SPDX-License-Identifier: GPL-3.0-or-later import logging +import os +import stat import traceback from sys import exc_info @@ -15,39 +17,46 @@ except ImportError: from bases.collection import on_try_except_finally, unicode_str +LOGGING_LEVELS = { + 'CRITICAL': 50, + 'ERROR': 40, + 'WARNING': 30, + 'INFO': 20, + 'DEBUG': 10, + 'NOTSET': 0, +} -LOGGING_LEVELS = {'CRITICAL': 50, - 'ERROR': 40, - 'WARNING': 30, - 'INFO': 20, - 'DEBUG': 10, - 'NOTSET': 0} -DEFAULT_LOG_LINE_FORMAT = '%(asctime)s: %(name)s %(levelname)s : %(message)s' -DEFAULT_LOG_TIME_FORMAT = '%Y-%m-%d %H:%M:%S' +def is_stderr_connected_to_journal(): + journal_stream = os.environ.get("JOURNAL_STREAM") + if not journal_stream: + return False -PYTHON_D_LOG_LINE_FORMAT = '%(asctime)s: %(name)s %(levelname)s: %(module_name)s[%(job_name)s] : %(message)s' -PYTHON_D_LOG_NAME = 'python.d' + colon_index = journal_stream.find(":") + if colon_index <= 0: + return False + device, inode = journal_stream[:colon_index], journal_stream[colon_index + 1:] -def limiter(log_max_count=30, allowed_in_seconds=60): - def on_decorator(func): + try: + device_number, inode_number = os.fstat(2)[stat.ST_DEV], os.fstat(2)[stat.ST_INO] + except OSError: + return False - def on_call(*args): - current_time = args[0]._runtime_counters.start_mono - lc = args[0]._logger_counters + return str(device_number) == device and str(inode_number) == inode - if lc.logged and lc.logged % log_max_count == 0: - if current_time - lc.time_to_compare <= allowed_in_seconds: - lc.dropped += 1 - return - lc.time_to_compare = current_time - lc.logged += 1 - func(*args) +is_journal = is_stderr_connected_to_journal() + +DEFAULT_LOG_LINE_FORMAT = '%(asctime)s: %(name)s %(levelname)s : %(message)s' +PYTHON_D_LOG_LINE_FORMAT = '%(asctime)s: %(name)s %(levelname)s: %(module_name)s[%(job_name)s] : %(message)s' + +if is_journal: + DEFAULT_LOG_LINE_FORMAT = '%(name)s %(levelname)s : %(message)s' + PYTHON_D_LOG_LINE_FORMAT = '%(name)s %(levelname)s: %(module_name)s[%(job_name)s] : %(message)s ' - return on_call - return on_decorator +DEFAULT_LOG_TIME_FORMAT = '%Y-%m-%d %H:%M:%S' +PYTHON_D_LOG_NAME = 'python.d' def add_traceback(func): @@ -66,27 +75,16 @@ def add_traceback(func): return on_call -class LoggerCounters: - def __init__(self): - self.logged = 0 - self.dropped = 0 - self.time_to_compare = time() - - def __repr__(self): - return 'LoggerCounter(logged: {logged}, dropped: {dropped})'.format(logged=self.logged, - dropped=self.dropped) - - class BaseLogger(object): - def __init__(self, logger_name, log_fmt=DEFAULT_LOG_LINE_FORMAT, date_fmt=DEFAULT_LOG_TIME_FORMAT, - handler=logging.StreamHandler): - """ - :param logger_name: <str> - :param log_fmt: <str> - :param date_fmt: <str> - :param handler: <logging handler> - """ + def __init__( + self, + logger_name, + log_fmt=DEFAULT_LOG_LINE_FORMAT, + date_fmt=DEFAULT_LOG_TIME_FORMAT, + handler=logging.StreamHandler, + ): self.logger = logging.getLogger(logger_name) + self._muted = False if not self.has_handlers(): self.severity = 'INFO' self.logger.addHandler(handler()) @@ -96,11 +94,6 @@ class BaseLogger(object): return '<Logger: {name})>'.format(name=self.logger.name) def set_formatter(self, fmt, date_fmt=DEFAULT_LOG_TIME_FORMAT): - """ - :param fmt: <str> - :param date_fmt: <str> - :return: - """ if self.has_handlers(): self.logger.handlers[0].setFormatter(logging.Formatter(fmt=fmt, datefmt=date_fmt)) @@ -113,43 +106,48 @@ class BaseLogger(object): @severity.setter def severity(self, level): - """ - :param level: <str> or <int> - :return: - """ if level in LOGGING_LEVELS: self.logger.setLevel(LOGGING_LEVELS[level]) + def _log(self, level, *msg, **kwargs): + if not self._muted: + self.logger.log(level, ' '.join(map(unicode_str, msg)), **kwargs) + def debug(self, *msg, **kwargs): - self.logger.debug(' '.join(map(unicode_str, msg)), **kwargs) + self._log(logging.DEBUG, *msg, **kwargs) def info(self, *msg, **kwargs): - self.logger.info(' '.join(map(unicode_str, msg)), **kwargs) + self._log(logging.INFO, *msg, **kwargs) def warning(self, *msg, **kwargs): - self.logger.warning(' '.join(map(unicode_str, msg)), **kwargs) + self._log(logging.WARN, *msg, **kwargs) def error(self, *msg, **kwargs): - self.logger.error(' '.join(map(unicode_str, msg)), **kwargs) + self._log(logging.ERROR, *msg, **kwargs) - def alert(self, *msg, **kwargs): - self.logger.critical(' '.join(map(unicode_str, msg)), **kwargs) + def alert(self, *msg, **kwargs): + self._log(logging.CRITICAL, *msg, **kwargs) @on_try_except_finally(on_finally=(exit, 1)) def fatal(self, *msg, **kwargs): - self.logger.critical(' '.join(map(unicode_str, msg)), **kwargs) + self._log(logging.CRITICAL, *msg, **kwargs) + + def mute(self): + self._muted = True + + def unmute(self): + self._muted = False class PythonDLogger(object): - def __init__(self, logger_name=PYTHON_D_LOG_NAME, log_fmt=PYTHON_D_LOG_LINE_FORMAT): - """ - :param logger_name: <str> - :param log_fmt: <str> - """ + def __init__( + self, + logger_name=PYTHON_D_LOG_NAME, + log_fmt=PYTHON_D_LOG_LINE_FORMAT, + ): self.logger = BaseLogger(logger_name, log_fmt=log_fmt) self.module_name = 'plugin' self.job_name = 'main' - self._logger_counters = LoggerCounters() _LOG_TRACEBACK = False @@ -162,45 +160,39 @@ class PythonDLogger(object): PythonDLogger._LOG_TRACEBACK = value def debug(self, *msg): - self.logger.debug(*msg, extra={'module_name': self.module_name, - 'job_name': self.job_name or self.module_name}) + self.logger.debug(*msg, extra={ + 'module_name': self.module_name, + 'job_name': self.job_name or self.module_name, + }) def info(self, *msg): - self.logger.info(*msg, extra={'module_name': self.module_name, - 'job_name': self.job_name or self.module_name}) + self.logger.info(*msg, extra={ + 'module_name': self.module_name, + 'job_name': self.job_name or self.module_name, + }) def warning(self, *msg): - self.logger.warning(*msg, extra={'module_name': self.module_name, - 'job_name': self.job_name or self.module_name}) + self.logger.warning(*msg, extra={ + 'module_name': self.module_name, + 'job_name': self.job_name or self.module_name, + }) @add_traceback def error(self, *msg): - self.logger.error(*msg, extra={'module_name': self.module_name, - 'job_name': self.job_name or self.module_name}) + self.logger.error(*msg, extra={ + 'module_name': self.module_name, + 'job_name': self.job_name or self.module_name, + }) @add_traceback def alert(self, *msg): - self.logger.alert(*msg, extra={'module_name': self.module_name, - 'job_name': self.job_name or self.module_name}) + self.logger.alert(*msg, extra={ + 'module_name': self.module_name, + 'job_name': self.job_name or self.module_name, + }) def fatal(self, *msg): - self.logger.fatal(*msg, extra={'module_name': self.module_name, - 'job_name': self.job_name or self.module_name}) - - -class PythonDLimitedLogger(PythonDLogger): - @limiter() - def info(self, *msg): - PythonDLogger.info(self, *msg) - - @limiter() - def warning(self, *msg): - PythonDLogger.warning(self, *msg) - - @limiter() - def error(self, *msg): - PythonDLogger.error(self, *msg) - - @limiter() - def alert(self, *msg): - PythonDLogger.alert(self, *msg) + self.logger.fatal(*msg, extra={ + 'module_name': self.module_name, + 'job_name': self.job_name or self.module_name, + }) diff --git a/collectors/python.d.plugin/rethinkdbs/integrations/rethinkdb.md b/collectors/python.d.plugin/rethinkdbs/integrations/rethinkdb.md index c0b2cfbfd..ab51c0514 100644 --- a/collectors/python.d.plugin/rethinkdbs/integrations/rethinkdb.md +++ b/collectors/python.d.plugin/rethinkdbs/integrations/rethinkdb.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "RethinkDB" learn_status: "Published" learn_rel_path: "Data Collection/Databases" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -130,16 +131,16 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| host | Hostname or ip of the RethinkDB server. | localhost | False | -| port | Port to connect to the RethinkDB server. | 28015 | False | -| user | The username to use to connect to the RethinkDB server. | admin | False | -| password | The password to use to connect to the RethinkDB server. | | False | -| timeout | Set a connect timeout to the RethinkDB server. | 2 | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| host | Hostname or ip of the RethinkDB server. | localhost | no | +| port | Port to connect to the RethinkDB server. | 28015 | no | +| user | The username to use to connect to the RethinkDB server. | admin | no | +| password | The password to use to connect to the RethinkDB server. | | no | +| timeout | Set a connect timeout to the RethinkDB server. | 2 | no | </details> diff --git a/collectors/python.d.plugin/retroshare/integrations/retroshare.md b/collectors/python.d.plugin/retroshare/integrations/retroshare.md index 753a218c1..4fc003c6f 100644 --- a/collectors/python.d.plugin/retroshare/integrations/retroshare.md +++ b/collectors/python.d.plugin/retroshare/integrations/retroshare.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "RetroShare" learn_status: "Published" learn_rel_path: "Data Collection/Media Services" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -120,12 +121,12 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| url | The URL to the RetroShare Web UI. | http://localhost:9090 | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| url | The URL to the RetroShare Web UI. | http://localhost:9090 | no | </details> diff --git a/collectors/python.d.plugin/riakkv/integrations/riakkv.md b/collectors/python.d.plugin/riakkv/integrations/riakkv.md index f83def446..2e8279bc3 100644 --- a/collectors/python.d.plugin/riakkv/integrations/riakkv.md +++ b/collectors/python.d.plugin/riakkv/integrations/riakkv.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "RiakKV" learn_status: "Published" learn_rel_path: "Data Collection/Databases" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -150,11 +151,11 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| url | The url of the server | no | True | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| url | The url of the server | no | yes | </details> diff --git a/collectors/python.d.plugin/samba/integrations/samba.md b/collectors/python.d.plugin/samba/integrations/samba.md index 5638c6d94..1bd1664ee 100644 --- a/collectors/python.d.plugin/samba/integrations/samba.md +++ b/collectors/python.d.plugin/samba/integrations/samba.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Samba" learn_status: "Published" learn_rel_path: "Data Collection/Storage, Mount Points and Filesystems" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -166,10 +167,10 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | </details> diff --git a/collectors/python.d.plugin/sensors/integrations/linux_sensors_lm-sensors.md b/collectors/python.d.plugin/sensors/integrations/linux_sensors_lm-sensors.md index c807d6b3e..e426c8c83 100644 --- a/collectors/python.d.plugin/sensors/integrations/linux_sensors_lm-sensors.md +++ b/collectors/python.d.plugin/sensors/integrations/linux_sensors_lm-sensors.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Linux Sensors (lm-sensors)" learn_status: "Published" learn_rel_path: "Data Collection/Hardware Devices and Sensors" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -122,11 +123,11 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| types | The types of sensors to collect. | temperature, fan, voltage, current, power, energy, humidity | True | -| update_every | Sets the default data collection frequency. | 1 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | +| types | The types of sensors to collect. | temperature, fan, voltage, current, power, energy, humidity | yes | +| update_every | Sets the default data collection frequency. | 1 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | </details> diff --git a/collectors/python.d.plugin/smartd_log/integrations/s.m.a.r.t..md b/collectors/python.d.plugin/smartd_log/integrations/s.m.a.r.t..md index a943f8704..5c5b569e9 100644 --- a/collectors/python.d.plugin/smartd_log/integrations/s.m.a.r.t..md +++ b/collectors/python.d.plugin/smartd_log/integrations/s.m.a.r.t..md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "S.M.A.R.T." learn_status: "Published" learn_rel_path: "Data Collection/Hardware Devices and Sensors" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -168,14 +169,14 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| log_path | path to smartd log files. | /var/log/smartd | True | -| exclude_disks | Space-separated patterns. If the pattern is in the drive name, the module will not collect data for it. | | False | -| age | Time in minutes since the last dump to file. | 30 | False | -| update_every | Sets the default data collection frequency. | 1 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | +| log_path | path to smartd log files. | /var/log/smartd | yes | +| exclude_disks | Space-separated patterns. If the pattern is in the drive name, the module will not collect data for it. | | no | +| age | Time in minutes since the last dump to file. | 30 | no | +| update_every | Sets the default data collection frequency. | 1 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | </details> diff --git a/collectors/python.d.plugin/smartd_log/smartd_log.chart.py b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py index dc4e95dec..a896164df 100644 --- a/collectors/python.d.plugin/smartd_log/smartd_log.chart.py +++ b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py @@ -39,6 +39,7 @@ ATTR171 = '171' ATTR172 = '172' ATTR173 = '173' ATTR174 = '174' +ATTR177 = '177' ATTR180 = '180' ATTR183 = '183' ATTR190 = '190' @@ -50,6 +51,8 @@ ATTR199 = '199' ATTR202 = '202' ATTR206 = '206' ATTR233 = '233' +ATTR241 = '241' +ATTR242 = '242' ATTR249 = '249' ATTR_READ_ERR_COR = 'read-total-err-corrected' ATTR_READ_ERR_UNC = 'read-total-unc-errors' @@ -114,6 +117,8 @@ ORDER = [ 'offline_uncorrectable_sector_count', 'percent_lifetime_used', 'media_wearout_indicator', + 'total_lbas_written', + 'total_lbas_read', ] CHARTS = { @@ -329,7 +334,7 @@ CHARTS = { 'media_wearout_indicator': { 'options': [None, 'Media Wearout Indicator', 'percentage', 'wear', 'smartd_log.media_wearout_indicator', 'line'], 'lines': [], - 'attrs': [ATTR233], + 'attrs': [ATTR233, ATTR177], 'algo': ABSOLUTE, }, 'nand_writes_1gib': { @@ -338,6 +343,18 @@ CHARTS = { 'attrs': [ATTR249], 'algo': ABSOLUTE, }, + 'total_lbas_written': { + 'options': [None, 'Total LBAs Written', 'sectors', 'wear', 'smartd_log.total_lbas_written', 'line'], + 'lines': [], + 'attrs': [ATTR241], + 'algo': ABSOLUTE, + }, + 'total_lbas_read': { + 'options': [None, 'Total LBAs Read', 'sectors', 'wear', 'smartd_log.total_lbas_read', 'line'], + 'lines': [], + 'attrs': [ATTR242], + 'algo': ABSOLUTE, + }, } # NOTE: 'parse_temp' decodes ATA 194 raw value. Not heavily tested. Written by @Ferroin @@ -519,6 +536,7 @@ def ata_attribute_factory(value): elif name in [ ATTR1, ATTR7, + ATTR177, ATTR202, ATTR206, ATTR233, diff --git a/collectors/python.d.plugin/spigotmc/integrations/spigotmc.md b/collectors/python.d.plugin/spigotmc/integrations/spigotmc.md index af330bdd1..55ec8fa22 100644 --- a/collectors/python.d.plugin/spigotmc/integrations/spigotmc.md +++ b/collectors/python.d.plugin/spigotmc/integrations/spigotmc.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "SpigotMC" learn_status: "Published" learn_rel_path: "Data Collection/Gaming" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -121,14 +122,14 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 1 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| host | The host's IP to connect to. | localhost | True | -| port | The port the remote console is listening on. | 25575 | True | -| password | Remote console password if any. | | False | +| update_every | Sets the default data collection frequency. | 1 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| host | The host's IP to connect to. | localhost | yes | +| port | The port the remote console is listening on. | 25575 | yes | +| password | Remote console password if any. | | no | </details> diff --git a/collectors/python.d.plugin/squid/integrations/squid.md b/collectors/python.d.plugin/squid/integrations/squid.md index 484d8706c..6599826da 100644 --- a/collectors/python.d.plugin/squid/integrations/squid.md +++ b/collectors/python.d.plugin/squid/integrations/squid.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Squid" learn_status: "Published" learn_rel_path: "Data Collection/Web Servers and Web Proxies" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -117,14 +118,14 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 1 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | False | -| host | The host to connect to. | | True | -| port | The port to connect to. | | True | -| request | The URL to request from Squid. | | True | +| update_every | Sets the default data collection frequency. | 1 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | no | +| host | The host to connect to. | | yes | +| port | The port to connect to. | | yes | +| request | The URL to request from Squid. | | yes | </details> diff --git a/collectors/python.d.plugin/tomcat/integrations/tomcat.md b/collectors/python.d.plugin/tomcat/integrations/tomcat.md index 8210835c1..883f29dd3 100644 --- a/collectors/python.d.plugin/tomcat/integrations/tomcat.md +++ b/collectors/python.d.plugin/tomcat/integrations/tomcat.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Tomcat" learn_status: "Published" learn_rel_path: "Data Collection/Web Servers and Web Proxies" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -120,14 +121,14 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| url | The URL of the Tomcat server's status endpoint. Always add the suffix ?XML=true. | no | True | -| user | A valid user with read permission to access the /manager/status endpoint of the server. Required if the endpoint is password protected | no | False | -| pass | A valid password for the user in question. Required if the endpoint is password protected | no | False | -| connector_name | The connector component that communicates with a web connector via the AJP protocol, e.g ajp-bio-8009 | | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| url | The URL of the Tomcat server's status endpoint. Always add the suffix ?XML=true. | no | yes | +| user | A valid user with read permission to access the /manager/status endpoint of the server. Required if the endpoint is password protected | no | no | +| pass | A valid password for the user in question. Required if the endpoint is password protected | no | no | +| connector_name | The connector component that communicates with a web connector via the AJP protocol, e.g ajp-bio-8009 | | no | </details> diff --git a/collectors/python.d.plugin/tor/integrations/tor.md b/collectors/python.d.plugin/tor/integrations/tor.md index f5c0026af..0e57fa793 100644 --- a/collectors/python.d.plugin/tor/integrations/tor.md +++ b/collectors/python.d.plugin/tor/integrations/tor.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Tor" learn_status: "Published" learn_rel_path: "Data Collection/VPNs" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -122,14 +123,14 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| control_addr | Tor control IP address | 127.0.0.1 | False | -| control_port | Tor control port. Can be either a tcp port, or a path to a socket file. | 9051 | False | -| password | Tor control password | | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| control_addr | Tor control IP address | 127.0.0.1 | no | +| control_port | Tor control port. Can be either a tcp port, or a path to a socket file. | 9051 | no | +| password | Tor control password | | no | </details> diff --git a/collectors/python.d.plugin/uwsgi/integrations/uwsgi.md b/collectors/python.d.plugin/uwsgi/integrations/uwsgi.md index 309265789..af58608bd 100644 --- a/collectors/python.d.plugin/uwsgi/integrations/uwsgi.md +++ b/collectors/python.d.plugin/uwsgi/integrations/uwsgi.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "uWSGI" learn_status: "Published" learn_rel_path: "Data Collection/Web Servers and Web Proxies" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -122,14 +123,14 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | The JOB's name as it will appear at the dashboard (by default is the job_name) | job_name | False | -| socket | The 'path/to/uwsgistats.sock' | no | False | -| host | The host to connect to | no | False | -| port | The port to connect to | no | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | The JOB's name as it will appear at the dashboard (by default is the job_name) | job_name | no | +| socket | The 'path/to/uwsgistats.sock' | no | no | +| host | The host to connect to | no | no | +| port | The port to connect to | no | no | </details> diff --git a/collectors/python.d.plugin/varnish/integrations/varnish.md b/collectors/python.d.plugin/varnish/integrations/varnish.md index 142875f4b..da74dcf8f 100644 --- a/collectors/python.d.plugin/varnish/integrations/varnish.md +++ b/collectors/python.d.plugin/varnish/integrations/varnish.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "Varnish" learn_status: "Published" learn_rel_path: "Data Collection/Web Servers and Web Proxies" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -161,12 +162,12 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| instance_name | the name of the varnishd instance to get logs from. If not specified, the local host name is used. | | True | -| update_every | Sets the default data collection frequency. | 10 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | +| instance_name | the name of the varnishd instance to get logs from. If not specified, the local host name is used. | | yes | +| update_every | Sets the default data collection frequency. | 10 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | </details> diff --git a/collectors/python.d.plugin/w1sensor/integrations/1-wire_sensors.md b/collectors/python.d.plugin/w1sensor/integrations/1-wire_sensors.md index 39987743e..fe3c05ba6 100644 --- a/collectors/python.d.plugin/w1sensor/integrations/1-wire_sensors.md +++ b/collectors/python.d.plugin/w1sensor/integrations/1-wire_sensors.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "1-Wire Sensors" learn_status: "Published" learn_rel_path: "Data Collection/Hardware Devices and Sensors" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -114,12 +115,12 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | False | -| name_<1-Wire id> | This allows associating a human readable name with a sensor's 1-Wire identifier. | | False | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| name_<1-Wire id> | This allows associating a human readable name with a sensor's 1-Wire identifier. | | no | </details> diff --git a/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md b/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md index 1ebe865f0..9d7d1c3d5 100644 --- a/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md +++ b/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/python.d.p sidebar_label: "python.d zscores" learn_status: "Published" learn_rel_path: "Data Collection/Other" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -123,20 +124,20 @@ Every configuration JOB starts with a `job_name` value which will appear in the | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| charts_regex | what charts to pull data for - A regex like `system\..*/` or `system\..*/apps.cpu/apps.mem` etc. | system\..* | True | -| train_secs | length of time (in seconds) to base calculations off for mean and stddev. | 14400 | True | -| offset_secs | offset (in seconds) preceding latest data to ignore when calculating mean and stddev. | 300 | True | -| train_every_n | recalculate the mean and stddev every n steps of the collector. | 900 | True | -| z_smooth_n | smooth the z score (to reduce sensitivity to spikes) by averaging it over last n values. | 15 | True | -| z_clip | cap absolute value of zscore (before smoothing) for better stability. | 10 | True | -| z_abs | set z_abs: 'true' to make all zscores be absolute values only. | true | True | -| burn_in | burn in period in which to initially calculate mean and stddev on every step. | 2 | True | -| mode | mode can be to get a zscore 'per_dim' or 'per_chart'. | per_chart | True | -| per_chart_agg | per_chart_agg is how you aggregate from dimension to chart when mode='per_chart'. | mean | True | -| update_every | Sets the default data collection frequency. | 5 | False | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | False | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | False | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | False | +| charts_regex | what charts to pull data for - A regex like `system\..*/` or `system\..*/apps.cpu/apps.mem` etc. | system\..* | yes | +| train_secs | length of time (in seconds) to base calculations off for mean and stddev. | 14400 | yes | +| offset_secs | offset (in seconds) preceding latest data to ignore when calculating mean and stddev. | 300 | yes | +| train_every_n | recalculate the mean and stddev every n steps of the collector. | 900 | yes | +| z_smooth_n | smooth the z score (to reduce sensitivity to spikes) by averaging it over last n values. | 15 | yes | +| z_clip | cap absolute value of zscore (before smoothing) for better stability. | 10 | yes | +| z_abs | set z_abs: 'true' to make all zscores be absolute values only. | true | yes | +| burn_in | burn in period in which to initially calculate mean and stddev on every step. | 2 | yes | +| mode | mode can be to get a zscore 'per_dim' or 'per_chart'. | per_chart | yes | +| per_chart_agg | per_chart_agg is how you aggregate from dimension to chart when mode='per_chart'. | mean | yes | +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | </details> diff --git a/collectors/slabinfo.plugin/integrations/linux_kernel_slab_allocator_statistics.md b/collectors/slabinfo.plugin/integrations/linux_kernel_slab_allocator_statistics.md index 54ccf605f..ce8115270 100644 --- a/collectors/slabinfo.plugin/integrations/linux_kernel_slab_allocator_statistics.md +++ b/collectors/slabinfo.plugin/integrations/linux_kernel_slab_allocator_statistics.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/slabinfo.p sidebar_label: "Linux kernel SLAB allocator statistics" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Kernel" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -120,7 +121,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| Enable plugin | As described above plugin is disabled by default, this option is used to enable plugin. | no | True | +| Enable plugin | As described above plugin is disabled by default, this option is used to enable plugin. | no | yes | </details> diff --git a/collectors/slabinfo.plugin/slabinfo.c b/collectors/slabinfo.plugin/slabinfo.c index 366cba643..9b9119a6e 100644 --- a/collectors/slabinfo.plugin/slabinfo.c +++ b/collectors/slabinfo.plugin/slabinfo.c @@ -336,14 +336,11 @@ void usage(void) { } int main(int argc, char **argv) { - stderror = stderr; clocks_init(); + nd_log_initialize_for_external_plugins("slabinfo.plugin"); program_name = argv[0]; program_version = "0.1"; - error_log_syslog = 0; - - log_set_global_severity_for_external_plugins(); int update_every = 1, i, n, freq = 0; diff --git a/collectors/statsd.plugin/README.md b/collectors/statsd.plugin/README.md index d80849dba..e3c8f9f81 100644 --- a/collectors/statsd.plugin/README.md +++ b/collectors/statsd.plugin/README.md @@ -173,8 +173,8 @@ You can find the configuration at `/etc/netdata/netdata.conf`: # update every (flushInterval) = 1 # udp messages to process at once = 10 # create private charts for metrics matching = * - # max private charts allowed = 200 # max private charts hard limit = 1000 + # cleanup obsolete charts after secs = 0 # private charts memory mode = save # private charts history = 3996 # histograms and timers percentile (percentThreshold) = 95.00000 @@ -234,13 +234,11 @@ The default behavior is to use the same settings as the rest of the Netdata Agen - `private charts memory mode` - `private charts history` -### Optimize private metric charts visualization and storage +### Optimize private metric charts storage -If you have thousands of metrics, each with its own private chart, you may notice that your web browser becomes slow when you view the Netdata dashboard (this is a web browser issue we need to address at the Netdata UI). So, Netdata has a protection to stop creating charts when `max private charts allowed = 200` (soft limit) is reached. +For optimization reasons, Netdata imposes a hard limit on private metric charts. The limit is set via the `max private charts hard limit` setting (which defaults to 1000 charts). Metrics above this hard limit are still collected, but they can only be used in synthetic charts (once a metric is added to chart, it will be sent to backend servers too). -The metrics above this soft limit are still processed by Netdata, can be used in synthetic charts and will be available to be sent to backend time-series databases, up to `max private charts hard limit = 1000`. So, between 200 and 1000 charts, Netdata will still generate charts, but they will automatically be created with `memory mode = none` (Netdata will not maintain a database for them). These metrics will be sent to backend time series databases, if the backend configuration is set to `as collected`. - -Metrics above the hard limit are still collected, but they can only be used in synthetic charts (once a metric is added to chart, it will be sent to backend servers too). +If you have many ephemeral metrics collected (i.e. that you collect values for a certain amount of time), you can set the configuration option `set charts as obsolete after secs`. Setting a value in seconds here, means that Netdata will mark those metrics (and their private charts) as obsolete after the specified time has passed since the last sent metric value. Those charts will later be deleted according to the setting in `cleanup obsolete charts after secs`. Setting `set charts as obsolete after secs` to 0 (which is also the default value) will disable this functionality. Example private charts (automatically generated without any configuration): diff --git a/collectors/statsd.plugin/statsd.c b/collectors/statsd.plugin/statsd.c index 5422d2905..9cc3a9d97 100644 --- a/collectors/statsd.plugin/statsd.c +++ b/collectors/statsd.plugin/statsd.c @@ -95,6 +95,7 @@ typedef enum __attribute__((packed)) statsd_metric_options { STATSD_METRIC_OPTION_USEFUL = 0x00000080, // set when the charting thread finds the metric useful (i.e. used in a chart) STATSD_METRIC_OPTION_COLLECTION_FULL_LOGGED = 0x00000100, // set when the collection is full for this metric STATSD_METRIC_OPTION_UPDATED_CHART_METADATA = 0x00000200, // set when the private chart metadata have been updated via tags + STATSD_METRIC_OPTION_OBSOLETE = 0x00004000, // set when the metric is obsoleted } STATS_METRIC_OPTIONS; typedef enum __attribute__((packed)) statsd_metric_type { @@ -117,6 +118,7 @@ typedef struct statsd_metric { // metadata about data collection collected_number events; // the number of times this metric has been collected (never resets) uint32_t count; // the number of times this metric has been collected since the last flush + time_t last_collected; // timestamp of the last incoming value // the actual collected data union { @@ -268,6 +270,7 @@ static struct statsd { collected_number decimal_detail; uint32_t private_charts; uint32_t max_private_charts_hard; + uint32_t set_obsolete_after; STATSD_APP *apps; uint32_t recvmmsg_size; @@ -476,6 +479,16 @@ static inline int value_is_zinit(const char *value) { #define is_metric_checked(m) ((m)->options & STATSD_METRIC_OPTION_CHECKED) #define is_metric_useful_for_collection(m) (!is_metric_checked(m) || ((m)->options & STATSD_METRIC_OPTION_USEFUL)) +static inline void metric_update_counters_and_obsoletion(STATSD_METRIC *m) { + m->events++; + m->count++; + m->last_collected = now_realtime_sec(); + if (m->st && unlikely(rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE))) { + rrdset_isnot_obsolete___safe_from_collector_thread(m->st); + m->options &= ~STATSD_METRIC_OPTION_OBSOLETE; + } +} + static inline void statsd_process_gauge(STATSD_METRIC *m, const char *value, const char *sampling) { if(!is_metric_useful_for_collection(m)) return; @@ -498,8 +511,7 @@ static inline void statsd_process_gauge(STATSD_METRIC *m, const char *value, con else m->gauge.value = statsd_parse_float(value, 1.0); - m->events++; - m->count++; + metric_update_counters_and_obsoletion(m); } } @@ -516,8 +528,7 @@ static inline void statsd_process_counter_or_meter(STATSD_METRIC *m, const char else { m->counter.value += llrintndd((NETDATA_DOUBLE) statsd_parse_int(value, 1) / statsd_parse_sampling_rate(sampling)); - m->events++; - m->count++; + metric_update_counters_and_obsoletion(m); } } @@ -559,8 +570,7 @@ static inline void statsd_process_histogram_or_timer(STATSD_METRIC *m, const cha m->histogram.ext->values[m->histogram.ext->used++] = v; } - m->events++; - m->count++; + metric_update_counters_and_obsoletion(m); } } @@ -597,8 +607,7 @@ static inline void statsd_process_set(STATSD_METRIC *m, const char *value) { #else dictionary_set(m->set.dict, value, NULL, 0); #endif - m->events++; - m->count++; + metric_update_counters_and_obsoletion(m); } } @@ -630,8 +639,7 @@ static inline void statsd_process_dictionary(STATSD_METRIC *m, const char *value } t->count++; - m->events++; - m->count++; + metric_update_counters_and_obsoletion(m); } } @@ -1627,6 +1635,9 @@ static inline RRDSET *statsd_private_rrdset_create( static inline void statsd_private_chart_gauge(STATSD_METRIC *m) { netdata_log_debug(D_STATSD, "updating private chart for gauge metric '%s'", m->name); + if(m->st && unlikely(rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE))) + return; + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; @@ -1667,6 +1678,9 @@ static inline void statsd_private_chart_gauge(STATSD_METRIC *m) { static inline void statsd_private_chart_counter_or_meter(STATSD_METRIC *m, const char *dim, const char *family) { netdata_log_debug(D_STATSD, "updating private chart for %s metric '%s'", dim, m->name); + if(m->st && unlikely(rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE))) + return; + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; @@ -1707,6 +1721,9 @@ static inline void statsd_private_chart_counter_or_meter(STATSD_METRIC *m, const static inline void statsd_private_chart_set(STATSD_METRIC *m) { netdata_log_debug(D_STATSD, "updating private chart for set metric '%s'", m->name); + if(m->st && unlikely(rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE))) + return; + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; @@ -1747,6 +1764,9 @@ static inline void statsd_private_chart_set(STATSD_METRIC *m) { static inline void statsd_private_chart_dictionary(STATSD_METRIC *m) { netdata_log_debug(D_STATSD, "updating private chart for dictionary metric '%s'", m->name); + if(m->st && unlikely(rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE))) + return; + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; @@ -1790,6 +1810,9 @@ static inline void statsd_private_chart_dictionary(STATSD_METRIC *m) { static inline void statsd_private_chart_timer_or_histogram(STATSD_METRIC *m, const char *dim, const char *family, const char *units) { netdata_log_debug(D_STATSD, "updating private chart for %s metric '%s'", dim, m->name); + if(m->st && unlikely(rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE))) + return; + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; @@ -1842,6 +1865,16 @@ static inline void statsd_private_chart_timer_or_histogram(STATSD_METRIC *m, con // -------------------------------------------------------------------------------------------------------------------- // statsd flush metrics +static inline void metric_check_obsoletion(STATSD_METRIC *m) { + if(statsd.set_obsolete_after && + !rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE) && + m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && + m->last_collected + statsd.set_obsolete_after < now_realtime_sec()) { + rrdset_is_obsolete___safe_from_collector_thread(m->st); + m->options |= STATSD_METRIC_OPTION_OBSOLETE; + } +} + static inline void statsd_flush_gauge(STATSD_METRIC *m) { netdata_log_debug(D_STATSD, "flushing gauge metric '%s'", m->name); @@ -1855,6 +1888,8 @@ static inline void statsd_flush_gauge(STATSD_METRIC *m) { if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) statsd_private_chart_gauge(m); + + metric_check_obsoletion(m); } static inline void statsd_flush_counter_or_meter(STATSD_METRIC *m, const char *dim, const char *family) { @@ -1870,6 +1905,8 @@ static inline void statsd_flush_counter_or_meter(STATSD_METRIC *m, const char *d if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) statsd_private_chart_counter_or_meter(m, dim, family); + + metric_check_obsoletion(m); } static inline void statsd_flush_counter(STATSD_METRIC *m) { @@ -1896,6 +1933,8 @@ static inline void statsd_flush_set(STATSD_METRIC *m) { if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) statsd_private_chart_set(m); + + metric_check_obsoletion(m); } static inline void statsd_flush_dictionary(STATSD_METRIC *m) { @@ -1924,6 +1963,8 @@ static inline void statsd_flush_dictionary(STATSD_METRIC *m) { dictionary_entries(m->dictionary.dict)); } } + + metric_check_obsoletion(m); } static inline void statsd_flush_timer_or_histogram(STATSD_METRIC *m, const char *dim, const char *family, const char *units) { @@ -1977,6 +2018,8 @@ static inline void statsd_flush_timer_or_histogram(STATSD_METRIC *m, const char if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) statsd_private_chart_timer_or_histogram(m, dim, family, units); + + metric_check_obsoletion(m); } static inline void statsd_flush_timer(STATSD_METRIC *m) { @@ -2283,7 +2326,7 @@ static inline void statsd_flush_index_metrics(STATSD_INDEX *index, void (*flush_ if(unlikely(is_metric_checked(m))) break; if(unlikely(!(m->options & STATSD_METRIC_OPTION_CHECKED_IN_APPS))) { - netdata_log_access("NEW STATSD METRIC '%s': '%s'", statsd_metric_type_string(m->type), m->name); + nd_log(NDLS_ACCESS, NDLP_DEBUG, "NEW STATSD METRIC '%s': '%s'", statsd_metric_type_string(m->type), m->name); check_if_metric_is_for_app(index, m); m->options |= STATSD_METRIC_OPTION_CHECKED_IN_APPS; } @@ -2326,8 +2369,20 @@ static inline void statsd_flush_index_metrics(STATSD_INDEX *index, void (*flush_ dfe_done(m); // flush all the useful metrics - for(m = index->first_useful; m ; m = m->next_useful) { + STATSD_METRIC *m_prev; + for(m_prev = m = index->first_useful; m ; m = m->next_useful) { flush_metric(m); + if (m->options & STATSD_METRIC_OPTION_OBSOLETE) { + if (m == index->first_useful) + index->first_useful = m->next_useful; + else + m_prev->next_useful = m->next_useful; + dictionary_del(index->dict, m->name); + index->useful--; + index->metrics--; + statsd.private_charts--; + } else + m_prev = m; } } @@ -2447,6 +2502,7 @@ void *statsd_main(void *ptr) { config_get(CONFIG_SECTION_STATSD, "create private charts for metrics matching", "*"), NULL, SIMPLE_PATTERN_EXACT, true); statsd.max_private_charts_hard = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts hard limit", (long long)statsd.max_private_charts_hard); + statsd.set_obsolete_after = (size_t)config_get_number(CONFIG_SECTION_STATSD, "set charts as obsolete after secs", (long long)statsd.set_obsolete_after); statsd.decimal_detail = (collected_number)config_get_number(CONFIG_SECTION_STATSD, "decimal detail", (long long int)statsd.decimal_detail); statsd.tcp_idle_timeout = (size_t) config_get_number(CONFIG_SECTION_STATSD, "disconnect idle tcp clients after seconds", (long long int)statsd.tcp_idle_timeout); statsd.private_charts_hidden = (unsigned int)config_get_boolean(CONFIG_SECTION_STATSD, "private charts hidden", statsd.private_charts_hidden); @@ -2458,7 +2514,7 @@ void *statsd_main(void *ptr) { } { char buffer[314 + 1]; - snprintfz(buffer, 314, "%0.1f%%", statsd.histogram_percentile); + snprintfz(buffer, sizeof(buffer) - 1, "%0.1f%%", statsd.histogram_percentile); statsd.histogram_percentile_str = strdupz(buffer); } diff --git a/collectors/systemd-journal.plugin/Makefile.am b/collectors/systemd-journal.plugin/Makefile.am index fd8f4ab21..48f667c1b 100644 --- a/collectors/systemd-journal.plugin/Makefile.am +++ b/collectors/systemd-journal.plugin/Makefile.am @@ -5,6 +5,11 @@ MAINTAINERCLEANFILES = $(srcdir)/Makefile.in dist_noinst_DATA = \ README.md \ + systemd-journal-self-signed-certs.sh \ + forward_secure_sealing.md \ + active_journal_centralization_guide_no_encryption.md \ + passive_journal_centralization_guide_no_encryption.md \ + passive_journal_centralization_guide_self_signed_certs.md \ $(NULL) dist_libconfig_DATA = \ diff --git a/collectors/systemd-journal.plugin/README.md b/collectors/systemd-journal.plugin/README.md index 51aa1b7cd..c3c639045 100644 --- a/collectors/systemd-journal.plugin/README.md +++ b/collectors/systemd-journal.plugin/README.md @@ -40,31 +40,34 @@ For more information check [this discussion](https://github.com/netdata/netdata/ The following are limitations related to the availability of the plugin: -- This plugin is not available when Netdata is installed in a container. The problem is that `libsystemd` is not - available in Alpine Linux (there is a `libsystemd`, but it is a dummy that returns failure on all calls). We plan to - change this, by shipping Netdata containers based on Debian. +- Netdata versions prior to 1.44 shipped in a docker container do not include this plugin. + The problem is that `libsystemd` is not available in Alpine Linux (there is a `libsystemd`, but it is a dummy that + returns failure on all calls). Starting with Netdata version 1.44, Netdata containers use a Debian base image + making this plugin available when Netdata is running in a container. - For the same reason (lack of `systemd` support for Alpine Linux), the plugin is not available on `static` builds of - Netdata (which are based on `muslc`, not `glibc`). + Netdata (which are based on `muslc`, not `glibc`). If your Netdata is installed in `/opt/netdata` you most likely have + a static build of Netdata. - On old systemd systems (like Centos 7), the plugin runs always in "full data query" mode, which makes it slower. The reason, is that systemd API is missing some important calls we need to use the field indexes of `systemd` journal. However, when running in this mode, the plugin offers also negative matches on the data (like filtering for all logs that do not have set some field), and this is the reason "full data query" mode is also offered as an option even on newer versions of `systemd`. -To use the plugin, install one of our native distribution packages, or install it from source. - #### `systemd` journal features The following are limitations related to the features of `systemd` journal: -- This plugin does not support binary field values. `systemd` journal has the ability to assign fields with binary data. - This plugin assumes all fields contain text values (text in this context includes numbers). +- This plugin assumes that binary field values are text fields with newlines in them. `systemd-journal` has the ability + to support binary fields, without specifying the nature of the binary data. However, binary fields are commonly used + to store log entries that include multiple lines of text. The plugin treats all binary fields are multi-line text. - This plugin does not support multiple values per field for any given log entry. `systemd` journal has the ability to accept the same field key, multiple times, with multiple values on a single log entry. This plugin will present the last value and ignore the others for this log entry. -- This plugin will only read journal files located in `/var/log/journal` or `/run/log/journal`. `systemd-remote` has the +- This plugin will only read journal files located in `/var/log/journal` or `/run/log/journal`. `systemd-journal-remote` has the ability to store journal files anywhere (user configured). If journal files are not located in `/var/log/journal` - or `/run/log/journal` (and any of their subdirectories), the plugin will not find them. + or `/run/log/journal` (and any of their subdirectories), the plugin will not find them. A simple solution is to link + the other directories somewhere inside `/var/log/journal`. The plugin will pick them up, even if a sub-directory of + `/var/log/journal` is a link to a directory outside `/var/log/journal`. Other than the above, this plugin supports all features of `systemd` journals. @@ -125,8 +128,8 @@ Usually `remote` journals are named by the IP of the server sending these logs. extracts these IPs and performs a reverse DNS lookup to find their hostnames. When this is successful, `remote` journals are named by the hostnames of the origin servers. -For information about configuring a journals' centralization server, -check [this FAQ item](#how-do-i-configure-a-journals-centralization-server). +For information about configuring a journal centralization server, +check [this FAQ item](#how-do-i-configure-a-journal-centralization-server). ## Journal Fields @@ -153,6 +156,7 @@ The plugin automatically enriches certain fields to make them more user-friendly - `_GID`, `OBJECT_GID`: the local group database is consulted to annotate them with group names. - `_CAP_EFFECTIVE`: the encoded value is annotated with a human-readable list of the linux capabilities. - `_SOURCE_REALTIME_TIMESTAMP`: the numeric value is annotated with human-readable datetime in UTC. +- `MESSAGE_ID`: for the known `MESSAGE_ID`s, the value is replaced with the well known name of the event. The values of all other fields are presented as found in the journals. @@ -237,6 +241,11 @@ Full text search is combined with the selected filters. The text box accepts asterisks `*` as wildcards. So, `a*b*c` means match anything that contains `a`, then `b` and then `c` with anything between them. +Spaces are treated as OR expressions. So that `a*b c*d` means `a*b OR c*d`. + +Negative expressions are supported, by prefixing any string with `!`. Example: `!systemd *` means match anything that +does not contain `systemd` on any of its fields. + ## Query performance Journal files are designed to be accessed by multiple readers and one writer, concurrently. @@ -278,9 +287,9 @@ multiple journal files, over long time-frames. During the development of this plugin, we submitted, to `systemd`, a number of patches to improve `journalctl` performance by a factor of 14: -- https://github.com/systemd/systemd/pull/29365 -- https://github.com/systemd/systemd/pull/29366 -- https://github.com/systemd/systemd/pull/29261 +- <https://github.com/systemd/systemd/pull/29365> +- <https://github.com/systemd/systemd/pull/29366> +- <https://github.com/systemd/systemd/pull/29261> However, even after these patches are merged, `journalctl` will still be 2x slower than this Netdata plugin, on multi-journal queries. @@ -290,13 +299,85 @@ the Netdata plugin queries each file individually and it then it merges the resu This is transparent, thanks to the `facets` library in `libnetdata` that handles on-the-fly indexing, filtering, and searching of any dataset, independently of its source. +## Performance at scale + +On busy logs servers, or when querying long timeframes that match millions of log entries, the plugin has a sampling +algorithm to allow it respond promptly. It works like this: + +1. The latest 500k log entries are queried in full, evaluating all the fields of every single log entry. This evaluation + allows counting the unique values per field, updating the counters next to each value at the filters section of the + dashboard. +2. When the latest 500k log entries have been processed and there are more data to read, the plugin divides evenly 500k + more log entries to the number of journal files matched by the query. So, it will continue to evaluate all the fields + of all log entries, up to the budget per file, aiming to fully query 1 million log entries in total. +3. When the budget is hit for a given file, the plugin continues to scan log entries, but this time it does not evaluate + the fields and their values, so the counters per field and value are not updated. These unsampled log entries are + shown in the histogram with the label `[unsampled]`. +4. The plugin continues to count `[unsampled]` entries until as many as sampled entries have been evaluated and at least + 1% of the journal file has been processed. +5. When the `[unsampled]` budget is exhausted, the plugin stops processing the journal file and based on the processing + completed so far and the number of entries in the journal file, it estimates the remaining number of log entries in + that file. This is shown as `[estimated]` at the histogram. +6. In systemd versions 254 or later, the plugin fetches the unique sequence number of each log entry and calculates the + the percentage of the file matched by the query, versus the total number of the log entries in the journal file. +7. In systemd versions prior to 254, the plugin estimates the number of entries the journal file contributes to the + query, using the amount of log entries matched it vs. the total duration the log file has entries for. + +The above allow the plugin to respond promptly even when the number of log entries in the journal files is several +dozens millions, while providing accurate estimations of the log entries over time at the histogram and enough counters +at the fields filtering section to help users get an overview of the whole timeframe. + +The fact that the latest 500k log entries and 1% of all journal files (which are spread over time) have been fully +evaluated, including counting the number of appearances for each field value, the plugin usually provides an accurate +representation of the whole timeframe. + +Keep in mind that although the plugin is quite effective and responds promptly when there are hundreds of journal files +matching a query, response times may be longer when there are several thousands of smaller files. systemd versions 254+ +attempt to solve this problem by allowing `systemd-journal-remote` to create larger files. However, for systemd +versions prior to 254, `systemd-journal-remote` creates files of up to 32MB each, which when running very busy +journals centralization servers aggregating several thousands of log entries per second, the number of files can grow +to several dozens of thousands quickly. In such setups, the plugin should ideally skip processing journal files +entirely, relying solely on the estimations of the sequence of files each file is part of. However, this has not been +implemented yet. To improve the query performance in such setups, the user has to query smaller timeframes. + +Another optimization taking place in huge journal centralization points, is the initial scan of the database. The plugin +needs to know the list of all journal files available, including the details of the first and the last message in each +of them. When there are several thousands of files in a directory (like it usually happens in `/var/log/journal/remote`), +directory listing and examination of each file can take a considerable amount of time (even `ls -l` takes minutes). +To work around this problem, the plugin uses `inotify` to receive file updates immediately and scans the library from +the newest to the oldest file, allowing the user interface to work immediately after startup, for the most recent +timeframes. + +### Best practices for better performance + +systemd-journal has been designed **first to be reliable** and then to be fast. It includes several mechanisms to ensure +minimal data loss under all conditions (e.g. disk corruption, tampering, forward secure sealing) and despite the fact +that it utilizes several techniques to require minimal disk footprint (like deduplication of log entries, linking of +values and fields, compression) the disk footprint of journal files remains significantly higher compared to other log +management solutions. + +The higher disk footprint results in higher disk I/O during querying, since a lot more data have to read from disk to +evaluate a query. Query performance at scale can greatly benefit by utilizing a compressed filesystem (ext4, btrfs, zfs) +to store systemd-journal files. + +systemd-journal files are cached by the operating system. There is no database server to serve queries. Each file is +opened and the query runs by directly accessing the data in it. + +Therefore systemd-journal relies on the caching layer of the operating system to optimize query performance. The more +RAM the system has, although it will not be reported as `used` (it will be reported as `cache`), the faster the queries +will get. The first time a timeframe is accessed the query performance will be slower, but further queries on the same +timeframe will be significantly faster since journal data are now cached in memory. + +So, on busy logs centralization systems, queries performance can be improved significantly by using a compressed +filesystem for storing the journal files, and higher amounts of RAM. + ## Configuration and maintenance This Netdata plugin does not require any configuration or maintenance. ## FAQ -### Can I use this plugin on journals' centralization servers? +### Can I use this plugin on journal centralization servers? Yes. You can centralize your logs using `systemd-journal-remote`, and then install Netdata on this logs centralization server to explore the logs of all your infrastructure. @@ -304,7 +385,7 @@ on this logs centralization server to explore the logs of all your infrastructur This plugin will automatically provide multi-node views of your logs and also give you the ability to combine the logs of multiple servers, as you see fit. -Check [configuring a logs centralization server](#configuring-a-journals-centralization-server). +Check [configuring a logs centralization server](#how-do-i-configure-a-journal-centralization-server). ### Can I use this plugin from a parent Netdata? @@ -364,7 +445,7 @@ Yes. It is simple, fast and the software to do it is already in your systems. For application and system logs, `systemd` journal is ideal and the visibility you can get by centralizing your system logs and the use of this Netdata plugin, is unparalleled. -### How do I configure a journals' centralization server? +### How do I configure a journal centralization server? A short summary to get journal server running can be found below. There are two strategies you can apply, when it comes down to a centralized server for `systemd` journal logs. @@ -374,294 +455,13 @@ There are two strategies you can apply, when it comes down to a centralized serv For more options and reference to documentation, check `man systemd-journal-remote` and `man systemd-journal-upload`. -#### _passive_ journals' centralization without encryption - -> ℹ️ _passive_ is a journal server that waits for clients to push their metrics to it. - -> ⚠️ **IMPORTANT** -> These instructions will copy your logs to a central server, without any encryption or authorization. -> DO NOT USE THIS ON NON-TRUSTED NETWORKS. - -##### _passive_ server, without encryption - -On the centralization server install `systemd-journal-remote`: - -```sh -# change this according to your distro -sudo apt-get install systemd-journal-remote -``` - -Make sure the journal transfer protocol is `http`: - -```sh -sudo cp /lib/systemd/system/systemd-journal-remote.service /etc/systemd/system/ - -# edit it to make sure it says: -# --listen-http=-3 -# not: -# --listen-https=-3 -sudo nano /etc/systemd/system/systemd-journal-remote.service - -# reload systemd -sudo systemctl daemon-reload -``` - -Optionally, if you want to change the port (the default is `19532`), edit `systemd-journal-remote.socket` - -```sh -# edit the socket file -sudo systemctl edit systemd-journal-remote.socket -``` - -and add the following lines into the instructed place, and choose your desired port; save and exit. - -```sh -[Socket] -ListenStream=<DESIRED_PORT> -``` - -Finally, enable it, so that it will start automatically upon receiving a connection: - -``` -# enable systemd-journal-remote -sudo systemctl enable --now systemd-journal-remote.socket -sudo systemctl enable systemd-journal-remote.service -``` - -`systemd-journal-remote` is now listening for incoming journals from remote hosts. - -##### _passive_ client, without encryption - -On the clients, install `systemd-journal-remote`: - -```sh -# change this according to your distro -sudo apt-get install systemd-journal-remote -``` - -Edit `/etc/systemd/journal-upload.conf` and set the IP address and the port of the server, like so: - -``` -[Upload] -URL=http://centralization.server.ip:19532 -``` - -Edit `systemd-journal-upload`, and add `Restart=always` to make sure the client will keep trying to push logs, even if the server is temporarily not there, like this: - -```sh -sudo systemctl edit systemd-journal-upload -``` - -At the top, add: - -``` -[Service] -Restart=always -``` - -Enable and start `systemd-journal-upload`, like this: - -```sh -sudo systemctl enable systemd-journal-upload -sudo systemctl start systemd-journal-upload -``` - -##### verify it works - -To verify the central server is receiving logs, run this on the central server: - -```sh -sudo ls -l /var/log/journal/remote/ -``` - -You should see new files from the client's IP. - -Also, `systemctl status systemd-journal-remote` should show something like this: - -``` -systemd-journal-remote.service - Journal Remote Sink Service - Loaded: loaded (/etc/systemd/system/systemd-journal-remote.service; indirect; preset: disabled) - Active: active (running) since Sun 2023-10-15 14:29:46 EEST; 2h 24min ago -TriggeredBy: ● systemd-journal-remote.socket - Docs: man:systemd-journal-remote(8) - man:journal-remote.conf(5) - Main PID: 2118153 (systemd-journal) - Status: "Processing requests..." - Tasks: 1 (limit: 154152) - Memory: 2.2M - CPU: 71ms - CGroup: /system.slice/systemd-journal-remote.service - └─2118153 /usr/lib/systemd/systemd-journal-remote --listen-http=-3 --output=/var/log/journal/remote/ -``` - -Note the `status: "Processing requests..."` and the PID under `CGroup`. - -On the client `systemctl status systemd-journal-upload` should show something like this: - -``` -● systemd-journal-upload.service - Journal Remote Upload Service - Loaded: loaded (/lib/systemd/system/systemd-journal-upload.service; enabled; vendor preset: disabled) - Drop-In: /etc/systemd/system/systemd-journal-upload.service.d - └─override.conf - Active: active (running) since Sun 2023-10-15 10:39:04 UTC; 3h 17min ago - Docs: man:systemd-journal-upload(8) - Main PID: 4169 (systemd-journal) - Status: "Processing input..." - Tasks: 1 (limit: 13868) - Memory: 3.5M - CPU: 1.081s - CGroup: /system.slice/systemd-journal-upload.service - └─4169 /lib/systemd/systemd-journal-upload --save-state -``` - -Note the `Status: "Processing input..."` and the PID under `CGroup`. - -#### _passive_ journals' centralization with encryption using self-signed certificates - -> ℹ️ _passive_ is a journal server that waits for clients to push their metrics to it. +#### _passive_ journal centralization without encryption -##### _passive_ server, with encryption and self-singed certificates +If you want to setup your own passive journal centralization setup without encryption, [check out guide on it](https://github.com/netdata/netdata/blob/master/collectors/systemd-journal.plugin/passive_journal_centralization_guide_no_encryption.md). -On the centralization server install `systemd-journal-remote` and `openssl`: - -```sh -# change this according to your distro -sudo apt-get install systemd-journal-remote openssl -``` - -Make sure the journal transfer protocol is `https`: - -```sh -sudo cp /lib/systemd/system/systemd-journal-remote.service /etc/systemd/system/ - -# edit it to make sure it says: -# --listen-https=-3 -# not: -# --listen-http=-3 -sudo nano /etc/systemd/system/systemd-journal-remote.service - -# reload systemd -sudo systemctl daemon-reload -``` - -Optionally, if you want to change the port (the default is `19532`), edit `systemd-journal-remote.socket` - -```sh -# edit the socket file -sudo systemctl edit systemd-journal-remote.socket -``` - -and add the following lines into the instructed place, and choose your desired port; save and exit. - -```sh -[Socket] -ListenStream=<DESIRED_PORT> -``` - -Finally, enable it, so that it will start automatically upon receiving a connection: - -```sh -# enable systemd-journal-remote -sudo systemctl enable --now systemd-journal-remote.socket -sudo systemctl enable systemd-journal-remote.service -``` - -`systemd-journal-remote` is now listening for incoming journals from remote hosts. - -Use [this script](https://gist.github.com/ktsaou/d62b8a6501cf9a0da94f03cbbb71c5c7) to create a self-signed certificates authority and certificates for all your servers. - -```sh -wget -O systemd-journal-self-signed-certs.sh "https://gist.githubusercontent.com/ktsaou/d62b8a6501cf9a0da94f03cbbb71c5c7/raw/c346e61e0a66f45dc4095d254bd23917f0a01bd0/systemd-journal-self-signed-certs.sh" -chmod 755 systemd-journal-self-signed-certs.sh -``` - -Edit the script and at its top, set your settings: - -```sh -# The directory to save the generated certificates (and everything about this certificate authority). -# This is only used on the node generating the certificates (usually on the journals server). -DIR="/etc/ssl/systemd-journal-remote" - -# The journals centralization server name (the CN of the server certificate). -SERVER="server-hostname" - -# All the DNS names or IPs this server is reachable at (the certificate will include them). -# Journal clients can use any of them to connect to this server. -# systemd-journal-upload validates its URL= hostname, against this list. -SERVER_ALIASES=("DNS:server-hostname1" "DNS:server-hostname2" "IP:1.2.3.4" "IP:10.1.1.1" "IP:172.16.1.1") - -# All the names of the journal clients that will be sending logs to the server (the CNs of their certificates). -# These names are used by systemd-journal-remote to name the journal files in /var/log/journal/remote/. -# Also the remote hosts will be presented using these names on Netdata dashboards. -CLIENTS=("vm1" "vm2" "vm3" "add_as_may_as_needed") -``` - -Then run the script: - -```sh -sudo ./systemd-journal-self-signed-certs.sh -``` - -The script will create the directory `/etc/ssl/systemd-journal-remote` and in it you will find all the certificates needed. - -There will also be files named `runme-on-XXX.sh`. There will be 1 script for the server and 1 script for each of the clients. You can copy and paste (or `scp`) these scripts on your server and each of your clients and run them as root: - -```sh -scp /etc/ssl/systemd-journal-remote/runme-on-XXX.sh XXX:/tmp/ -``` - -Once the above is done, `ssh` to each server/client and do: - -```sh -sudo bash /tmp/runme-on-XXX.sh -``` - -The scripts install the needed certificates, fix their file permissions to be accessible by systemd-journal-remote/upload, change `/etc/systemd/journal-remote.conf` (on the server) or `/etc/systemd/journal-upload.conf` on the clients and restart the relevant services. - - -##### _passive_ client, with encryption and self-singed certificates - -On the clients, install `systemd-journal-remote`: - -```sh -# change this according to your distro -sudo apt-get install systemd-journal-remote -``` - -Edit `/etc/systemd/journal-upload.conf` and set the IP address and the port of the server, like so: - -``` -[Upload] -URL=https://centralization.server.ip:19532 -``` - -Make sure that `centralization.server.ip` is one of the `SERVER_ALIASES` when you created the certificates. - -Edit `systemd-journal-upload`, and add `Restart=always` to make sure the client will keep trying to push logs, even if the server is temporarily not there, like this: - -```sh -sudo systemctl edit systemd-journal-upload -``` - -At the top, add: - -``` -[Service] -Restart=always -``` - -Enable and start `systemd-journal-upload`, like this: - -```sh -sudo systemctl enable systemd-journal-upload -``` - -Copy the relevant `runme-on-XXX.sh` script as described on server setup and run it: - -```sh -sudo bash /tmp/runme-on-XXX.sh -``` +#### _passive_ journal centralization with encryption using self-signed certificates +If you want to setup your own passive journal centralization setup using self-signed certificates for encryption, [check out guide on it](https://github.com/netdata/netdata/blob/master/collectors/systemd-journal.plugin/passive_journal_centralization_guide_self_signed_certs.md). #### Limitations when using a logs centralization server @@ -670,4 +470,3 @@ As of this writing `namespaces` support by `systemd` is limited: - Docker containers cannot log to namespaces. Check [this issue](https://github.com/moby/moby/issues/41879). - `systemd-journal-upload` automatically uploads `system` and `user` journals, but not `namespaces` journals. For this you need to spawn a `systemd-journal-upload` per namespace. - diff --git a/collectors/systemd-journal.plugin/active_journal_centralization_guide_no_encryption.md b/collectors/systemd-journal.plugin/active_journal_centralization_guide_no_encryption.md new file mode 100644 index 000000000..cbed1e81e --- /dev/null +++ b/collectors/systemd-journal.plugin/active_journal_centralization_guide_no_encryption.md @@ -0,0 +1,126 @@ +# Active journal source without encryption + +This page will guide you through creating an active journal source without the use of encryption. + +Once you enable an active journal source on a server, `systemd-journal-gatewayd` will expose an REST API on TCP port 19531. This API can be used for querying the logs, exporting the logs, or monitoring new log entries, remotely. + +> ⚠️ **IMPORTANT**<br/> +> These instructions will expose your logs to the network, without any encryption or authorization.<br/> +> DO NOT USE THIS ON NON-TRUSTED NETWORKS. + +## Configuring an active journal source + +On the server you want to expose their logs, install `systemd-journal-gateway`. + +```bash +# change this according to your distro +sudo apt-get install systemd-journal-gateway +``` + +Optionally, if you want to change the port (the default is `19531`), edit `systemd-journal-gatewayd.socket` + +```bash +# edit the socket file +sudo systemctl edit systemd-journal-gatewayd.socket +``` + +and add the following lines into the instructed place, and choose your desired port; save and exit. + +```bash +[Socket] +ListenStream=<DESIRED_PORT> +``` + +Finally, enable it, so that it will start automatically upon receiving a connection: + +```bash +# enable systemd-journal-remote +sudo systemctl daemon-reload +sudo systemctl enable --now systemd-journal-gatewayd.socket +``` + +## Using the active journal source + +### Simple Logs Explorer + +`systemd-journal-gateway` provides a simple HTML5 application to browse the logs. + +To use it, open your web browser and navigate to: + +``` +http://server.ip:19531/browse +``` + +A simple page like this will be presented: + +![image](https://github.com/netdata/netdata/assets/2662304/4da88bf8-6398-468b-a359-68db0c9ad419) + +### Use it with `curl` + +`man systemd-journal-gatewayd` documents the supported API methods and provides examples to query the API using `curl` commands. + +### Copying the logs to a central journals server + +`systemd-journal-remote` has the ability to query instances of `systemd-journal-gatewayd` to fetch their logs, so that the central server fetches the logs, instead of waiting for the individual servers to push their logs to it. + +However, this kind of logs centralization has a key problem: **there is no guarantee that there will be no gaps in the logs replicated**. Theoretically, the REST API of `systemd-journal-gatewayd` supports querying past data, and `systemd-journal-remote` could keep track of the state of replication and automatically continue from the point it stopped last time. But it does not. So, currently the best logs centralization option is to use a **passive** centralization, where the clients push their logs to the server. + +Given these limitations, if you still want to configure an **active** journals centralization, this is what you need to do: + +On the centralization server install `systemd-journal-remote`: + +```bash +# change this according to your distro +sudo apt-get install systemd-journal-remote +``` + +Then, copy `systemd-journal-remote.service` to configure it for querying the active source: + +```bash +# replace "clientX" with the name of the active client node +sudo cp /lib/systemd/system/systemd-journal-remote.service /etc/systemd/system/systemd-journal-remote-clientX.service + +# edit it to make sure it the ExecStart line is like this: +# ExecStart=/usr/lib/systemd/systemd-journal-remote --url http://clientX:19531/entries?follow +sudo nano /etc/systemd/system/systemd-journal-remote-clientX.service + +# reload systemd +sudo systemctl daemon-reload +``` + +```bash +# enable systemd-journal-remote +sudo systemctl enable --now systemd-journal-remote-clientX.service +``` + +You can repeat this process to create as many `systemd-journal-remote` services, as the active source you have. + +## Verify it works + +To verify the central server is receiving logs, run this on the central server: + +```bash +sudo ls -l /var/log/journal/remote/ +``` + +You should see new files from the client's hostname or IP. + +Also, any of the new service files (`systemctl status systemd-journal-clientX`) should show something like this: + +```bash +● systemd-journal-clientX.service - Fetching systemd journal logs from 192.168.2.146 + Loaded: loaded (/etc/systemd/system/systemd-journal-clientX.service; enabled; preset: disabled) + Drop-In: /usr/lib/systemd/system/service.d + └─10-timeout-abort.conf + Active: active (running) since Wed 2023-10-18 07:35:52 EEST; 23min ago + Main PID: 77959 (systemd-journal) + Tasks: 2 (limit: 6928) + Memory: 7.7M + CPU: 518ms + CGroup: /system.slice/systemd-journal-clientX.service + ├─77959 /usr/lib/systemd/systemd-journal-remote --url "http://192.168.2.146:19531/entries?follow" + └─77962 curl "-HAccept: application/vnd.fdo.journal" --silent --show-error "http://192.168.2.146:19531/entries?follow" + +Oct 18 07:35:52 systemd-journal-server systemd[1]: Started systemd-journal-clientX.service - Fetching systemd journal logs from 192.168.2.146. +Oct 18 07:35:52 systemd-journal-server systemd-journal-remote[77959]: Spawning curl http://192.168.2.146:19531/entries?follow... +``` diff --git a/collectors/systemd-journal.plugin/forward_secure_sealing.md b/collectors/systemd-journal.plugin/forward_secure_sealing.md new file mode 100644 index 000000000..b41570d68 --- /dev/null +++ b/collectors/systemd-journal.plugin/forward_secure_sealing.md @@ -0,0 +1,80 @@ +# Forward Secure Sealing (FSS) in Systemd-Journal + +Forward Secure Sealing (FSS) is a feature in the systemd journal designed to detect log file tampering. +Given that attackers often try to hide their actions by modifying or deleting log file entries, +FSS provides administrators with a mechanism to identify any such unauthorized alterations. + +## Importance +Logs are a crucial component of system monitoring and auditing. Ensuring their integrity means administrators can trust +the data, detect potential breaches, and trace actions back to their origins. Traditional methods to maintain this +integrity involve writing logs to external systems or printing them out. While these methods are effective, they are +not foolproof. FSS offers a more streamlined approach, allowing for log verification directly on the local system. + +## How FSS Works +FSS operates by "sealing" binary logs at regular intervals. This seal is a cryptographic operation, ensuring that any +tampering with the logs prior to the sealing can be detected. If an attacker modifies logs before they are sealed, +these changes become a permanent part of the sealed record, highlighting any malicious activity. + +The technology behind FSS is based on "Forward Secure Pseudo Random Generators" (FSPRG), a concept stemming from +academic research. + +Two keys are central to FSS: + +- **Sealing Key**: Kept on the system, used to seal the logs. +- **Verification Key**: Stored securely off-system, used to verify the sealed logs. + +Every so often, the sealing key is regenerated in a non-reversible process, ensuring that old keys are obsolete and the +latest logs are sealed with a fresh key. The off-site verification key can regenerate any past sealing key, allowing +administrators to verify older seals. If logs are tampered with, verification will fail, alerting administrators to the +breach. + +## Enabling FSS +To enable FSS, use the following command: + +```bash +journalctl --setup-keys +``` + +By default, systemd will seal the logs every 15 minutes. However, this interval can be adjusted using a flag during key +generation. For example, to seal logs every 10 seconds: + +```bash +journalctl --setup-keys --interval=10s +``` + +## Verifying Journals +After enabling FSS, you can verify the integrity of your logs using the verification key: + +```bash +journalctl --verify +``` + +If any discrepancies are found, you'll be alerted, indicating potential tampering. + +## Disabling FSS +Should you wish to disable FSS: + +**Delete the Sealing Key**: This stops new log entries from being sealed. + +```bash +journalctl --rotate +``` + +**Rotate and Prune the Journals**: This will start a new unsealed journal and can remove old sealed journals. + +```bash +journalctl --vacuum-time=1s +``` + + +**Adjust Systemd Configuration (Optional)**: If you've made changes to facilitate FSS in `/etc/systemd/journald.conf`, +consider reverting or adjusting those. Restart the systemd-journald service afterward: + +```bash +systemctl restart systemd-journald +``` + +## Conclusion +FSS is a significant advancement in maintaining log integrity. While not a replacement for all traditional integrity +methods, it offers a valuable tool in the battle against unauthorized log tampering. By integrating FSS into your log +management strategy, you ensure a more transparent, reliable, and tamper-evident logging system. diff --git a/collectors/systemd-journal.plugin/passive_journal_centralization_guide_no_encryption.md b/collectors/systemd-journal.plugin/passive_journal_centralization_guide_no_encryption.md new file mode 100644 index 000000000..b70c22033 --- /dev/null +++ b/collectors/systemd-journal.plugin/passive_journal_centralization_guide_no_encryption.md @@ -0,0 +1,150 @@ +# Passive journal centralization without encryption + +This page will guide you through creating a passive journal centralization setup without the use of encryption. + +Once you centralize your infrastructure logs to a server, Netdata will automatically detects all the logs from all servers and organize them in sources. +With the setup described in this document, journal files are identified by the IPs of the clients sending the logs. Netdata will automatically do +reverse DNS lookups to find the names of the server and name the sources on the dashboard accordingly. + +A _passive_ journal server waits for clients to push their metrics to it, so in this setup we will: + +1. configure `systemd-journal-remote` on the server, to listen for incoming connections. +2. configure `systemd-journal-upload` on the clients, to push their logs to the server. + +> ⚠️ **IMPORTANT**<br/> +> These instructions will copy your logs to a central server, without any encryption or authorization.<br/> +> DO NOT USE THIS ON NON-TRUSTED NETWORKS. + +## Server configuration + +On the centralization server install `systemd-journal-remote`: + +```bash +# change this according to your distro +sudo apt-get install systemd-journal-remote +``` + +Make sure the journal transfer protocol is `http`: + +```bash +sudo cp /lib/systemd/system/systemd-journal-remote.service /etc/systemd/system/ + +# edit it to make sure it says: +# --listen-http=-3 +# not: +# --listen-https=-3 +sudo nano /etc/systemd/system/systemd-journal-remote.service + +# reload systemd +sudo systemctl daemon-reload +``` + +Optionally, if you want to change the port (the default is `19532`), edit `systemd-journal-remote.socket` + +```bash +# edit the socket file +sudo systemctl edit systemd-journal-remote.socket +``` + +and add the following lines into the instructed place, and choose your desired port; save and exit. + +```bash +[Socket] +ListenStream=<DESIRED_PORT> +``` + +Finally, enable it, so that it will start automatically upon receiving a connection: + +```bash +# enable systemd-journal-remote +sudo systemctl enable --now systemd-journal-remote.socket +sudo systemctl enable systemd-journal-remote.service +``` + +`systemd-journal-remote` is now listening for incoming journals from remote hosts. + +## Client configuration + +On the clients, install `systemd-journal-remote` (it includes `systemd-journal-upload`): + +```bash +# change this according to your distro +sudo apt-get install systemd-journal-remote +``` + +Edit `/etc/systemd/journal-upload.conf` and set the IP address and the port of the server, like so: + +```conf +[Upload] +URL=http://centralization.server.ip:19532 +``` + +Edit `systemd-journal-upload`, and add `Restart=always` to make sure the client will keep trying to push logs, even if the server is temporarily not there, like this: + +```bash +sudo systemctl edit systemd-journal-upload +``` + +At the top, add: + +```conf +[Service] +Restart=always +``` + +Enable and start `systemd-journal-upload`, like this: + +```bash +sudo systemctl enable systemd-journal-upload +sudo systemctl start systemd-journal-upload +``` + +## Verify it works + +To verify the central server is receiving logs, run this on the central server: + +```bash +sudo ls -l /var/log/journal/remote/ +``` + +You should see new files from the client's IP. + +Also, `systemctl status systemd-journal-remote` should show something like this: + +```bash +systemd-journal-remote.service - Journal Remote Sink Service + Loaded: loaded (/etc/systemd/system/systemd-journal-remote.service; indirect; preset: disabled) + Active: active (running) since Sun 2023-10-15 14:29:46 EEST; 2h 24min ago +TriggeredBy: ● systemd-journal-remote.socket + Docs: man:systemd-journal-remote(8) + man:journal-remote.conf(5) + Main PID: 2118153 (systemd-journal) + Status: "Processing requests..." + Tasks: 1 (limit: 154152) + Memory: 2.2M + CPU: 71ms + CGroup: /system.slice/systemd-journal-remote.service + └─2118153 /usr/lib/systemd/systemd-journal-remote --listen-http=-3 --output=/var/log/journal/remote/ +``` + +Note the `status: "Processing requests..."` and the PID under `CGroup`. + +On the client `systemctl status systemd-journal-upload` should show something like this: + +```bash +● systemd-journal-upload.service - Journal Remote Upload Service + Loaded: loaded (/lib/systemd/system/systemd-journal-upload.service; enabled; vendor preset: disabled) + Drop-In: /etc/systemd/system/systemd-journal-upload.service.d + └─override.conf + Active: active (running) since Sun 2023-10-15 10:39:04 UTC; 3h 17min ago + Docs: man:systemd-journal-upload(8) + Main PID: 4169 (systemd-journal) + Status: "Processing input..." + Tasks: 1 (limit: 13868) + Memory: 3.5M + CPU: 1.081s + CGroup: /system.slice/systemd-journal-upload.service + └─4169 /lib/systemd/systemd-journal-upload --save-state +``` + +Note the `Status: "Processing input..."` and the PID under `CGroup`. diff --git a/collectors/systemd-journal.plugin/passive_journal_centralization_guide_self_signed_certs.md b/collectors/systemd-journal.plugin/passive_journal_centralization_guide_self_signed_certs.md new file mode 100644 index 000000000..722d1ceae --- /dev/null +++ b/collectors/systemd-journal.plugin/passive_journal_centralization_guide_self_signed_certs.md @@ -0,0 +1,250 @@ +# Passive journal centralization with encryption using self-signed certificates + +This page will guide you through creating a **passive** journal centralization setup using **self-signed certificates** for encryption and authorization. + +Once you centralize your infrastructure logs to a server, Netdata will automatically detect all the logs from all servers and organize them in sources. With the setup described in this document, on recent systemd versions, Netdata will automatically name all remote sources using the names of the clients, as they are described at their certificates (on older versions, the names will be IPs or reverse DNS lookups of the IPs). + +A **passive** journal server waits for clients to push their metrics to it, so in this setup we will: + +1. configure a certificates authority and issue self-signed certificates for your servers. +2. configure `systemd-journal-remote` on the server, to listen for incoming connections. +3. configure `systemd-journal-upload` on the clients, to push their logs to the server. + +Keep in mind that the authorization involved works like this: + +1. The server (`systemd-journal-remote`) validates that the client (`systemd-journal-upload`) uses a trusted certificate (a certificate issued by the same certificate authority as its own). + So, **the server will accept logs from any client having a valid certificate**. +2. The client (`systemd-journal-upload`) validates that the receiver (`systemd-journal-remote`) uses a trusted certificate (like the server does) and it also checks that the hostname or IP of the URL specified to its configuration, matches one of the names or IPs of the server it gets connected to. So, **the client does a validation that it connected to the right server**, using the URL hostname against the names and IPs of the server on its certificate. + +This means, that if both certificates are issued by the same certificate authority, only the client can potentially reject the server. + +## Self-signed certificates + +To simplify the process of creating and managing self-signed certificates, we have created [this bash script](https://github.com/netdata/netdata/blob/master/collectors/systemd-journal.plugin/systemd-journal-self-signed-certs.sh). + +This helps to also automate the distribution of the certificates to your servers (it generates a new bash script for each of your servers, which includes everything required, including the certificates). + +We suggest to keep this script and all the involved certificates at the journals centralization server, in the directory `/etc/ssl/systemd-journal`, so that you can make future changes as required. If you prefer to keep the certificate authority and all the certificates at a more secure location, just use the script on that location. + +On the server that will issue the certificates (usually the centralizaton server), do the following: + +```bash +# install systemd-journal-remote to add the users and groups required and openssl for the certs +# change this according to your distro +sudo apt-get install systemd-journal-remote openssl + +# download the script and make it executable +curl >systemd-journal-self-signed-certs.sh "https://raw.githubusercontent.com/netdata/netdata/master/collectors/systemd-journal.plugin/systemd-journal-self-signed-certs.sh" +chmod 750 systemd-journal-self-signed-certs.sh +``` + +To create certificates for your servers, run this: + +```bash +sudo ./systemd-journal-self-signed-certs.sh "server1" "DNS:hostname1" "IP:10.0.0.1" +``` + +Where: + + - `server1` is the canonical name of the server. On newer systemd version, this name will be used by `systemd-journal-remote` and Netdata when you view the logs on the dashboard. + - `DNS:hostname1` is a DNS name that the server is reachable at. Add `"DNS:xyz"` multiple times to define multiple DNS names for the server. + - `IP:10.0.0.1` is an IP that the server is reachable at. Add `"IP:xyz"` multiple times to define multiple IPs for the server. + +Repeat this process to create the certificates for all your servers. You can add servers as required, at any time in the future. + +Existing certificates are never re-generated. Typically certificates need to be revoked and new ones to be issued. But `systemd-journal-remote` tools do not support handling revocations. So, the only option you have to re-issue a certificate is to delete its files in `/etc/ssl/systemd-journal` and run the script again to create a new one. + +Once you run the script of each of your servers, in `/etc/ssl/systemd-journal` you will find shell scripts named `runme-on-XXX.sh`, where `XXX` are the canonical names of your servers. + +These `runme-on-XXX.sh` include everything to install the certificates, fix their file permissions to be accessible by `systemd-journal-remote` and `systemd-journal-upload`, and update `/etc/systemd/journal-remote.conf` and `/etc/systemd/journal-upload.conf`. + +You can copy and paste (or `scp`) these scripts on your server and each of your clients: + +```bash +sudo scp /etc/ssl/systemd-journal/runme-on-XXX.sh XXX:/tmp/ +``` + +For the rest of this guide, we assume that you have copied the right `runme-on-XXX.sh` at the `/tmp` of all the servers for which you issued certificates. + +### note about certificates file permissions + +It is worth noting that `systemd-journal` certificates need to be owned by `systemd-journal-remote:systemd-journal`. + +Both the user `systemd-journal-remote` and the group `systemd-journal` are automatically added by the `systemd-journal-remote` package. However, `systemd-journal-upload` (and `systemd-journal-gatewayd` - that is not used in this guide) use dynamic users. Thankfully they are added to the `systemd-journal` remote group. + +So, by having the certificates owned by `systemd-journal-remote:systemd-journal`, satisfies both `systemd-journal-remote` which is not in the `systemd-journal` group, and `systemd-journal-upload` (and `systemd-journal-gatewayd`) which use dynamic users. + +You don't need to do anything about it (the scripts take care of everything), but it is worth noting how this works. + +## Server configuration + +On the centralization server install `systemd-journal-remote`: + +```bash +# change this according to your distro +sudo apt-get install systemd-journal-remote +``` + +Make sure the journal transfer protocol is `https`: + +```bash +sudo cp /lib/systemd/system/systemd-journal-remote.service /etc/systemd/system/ + +# edit it to make sure it says: +# --listen-https=-3 +# not: +# --listen-http=-3 +sudo nano /etc/systemd/system/systemd-journal-remote.service + +# reload systemd +sudo systemctl daemon-reload +``` + +Optionally, if you want to change the port (the default is `19532`), edit `systemd-journal-remote.socket` + +```bash +# edit the socket file +sudo systemctl edit systemd-journal-remote.socket +``` + +and add the following lines into the instructed place, and choose your desired port; save and exit. + +```bash +[Socket] +ListenStream=<DESIRED_PORT> +``` + +Next, run the `runme-on-XXX.sh` script on the server: + +```bash +# if you run the certificate authority on the server: +sudo /etc/ssl/systemd-journal/runme-on-XXX.sh + +# if you run the certificate authority elsewhere, +# assuming you have coped the runme-on-XXX.sh script (as described above): +sudo bash /tmp/runme-on-XXX.sh +``` + +This will install the certificates in `/etc/ssl/systemd-journal`, set the right file permissions, and update `/etc/systemd/journal-remote.conf` and `/etc/systemd/journal-upload.conf` to use the right certificate files. + +Finally, enable it, so that it will start automatically upon receiving a connection: + +```bash +# enable systemd-journal-remote +sudo systemctl enable --now systemd-journal-remote.socket +sudo systemctl enable systemd-journal-remote.service +``` + +`systemd-journal-remote` is now listening for incoming journals from remote hosts. + +> When done, remember to `rm /tmp/runme-on-*.sh` to make sure your certificates are secure. + +## Client configuration + +On the clients, install `systemd-journal-remote` (it includes `systemd-journal-upload`): + +```bash +# change this according to your distro +sudo apt-get install systemd-journal-remote +``` + +Edit `/etc/systemd/journal-upload.conf` and set the IP address and the port of the server, like so: + +```conf +[Upload] +URL=https://centralization.server.ip:19532 +``` + +Make sure that `centralization.server.ip` is one of the `DNS:` or `IP:` parameters you defined when you created the centralization server certificates. If it is not, the client may reject to connect. + +Next, edit `systemd-journal-upload.service`, and add `Restart=always` to make sure the client will keep trying to push logs, even if the server is temporarily not there, like this: + +```bash +sudo systemctl edit systemd-journal-upload.service +``` + +At the top, add: + +```conf +[Service] +Restart=always +``` + +Enable `systemd-journal-upload.service`, like this: + +```bash +sudo systemctl enable systemd-journal-upload.service +``` + +Assuming that you have in `/tmp` the relevant `runme-on-XXX.sh` script for this client, run: + +```bash +sudo bash /tmp/runme-on-XXX.sh +``` + +This will install the certificates in `/etc/ssl/systemd-journal`, set the right file permissions, and update `/etc/systemd/journal-remote.conf` and `/etc/systemd/journal-upload.conf` to use the right certificate files. + +Finally, restart `systemd-journal-upload.service`: + +```bash +sudo systemctl restart systemd-journal-upload.service +``` + +The client should now be pushing logs to the central server. + +> When done, remember to `rm /tmp/runme-on-*.sh` to make sure your certificates are secure. + +Here it is in action, in Netdata: + +![2023-10-18 16-23-05](https://github.com/netdata/netdata/assets/2662304/83bec232-4770-455b-8f1c-46b5de5f93a2) + + +## Verify it works + +To verify the central server is receiving logs, run this on the central server: + +```bash +sudo ls -l /var/log/journal/remote/ +``` + +Depending on the `systemd` version you use, you should see new files from the clients' canonical names (as defined at their certificates) or IPs. + +Also, `systemctl status systemd-journal-remote` should show something like this: + +```bash +systemd-journal-remote.service - Journal Remote Sink Service + Loaded: loaded (/etc/systemd/system/systemd-journal-remote.service; indirect; preset: disabled) + Active: active (running) since Sun 2023-10-15 14:29:46 EEST; 2h 24min ago +TriggeredBy: ● systemd-journal-remote.socket + Docs: man:systemd-journal-remote(8) + man:journal-remote.conf(5) + Main PID: 2118153 (systemd-journal) + Status: "Processing requests..." + Tasks: 1 (limit: 154152) + Memory: 2.2M + CPU: 71ms + CGroup: /system.slice/systemd-journal-remote.service + └─2118153 /usr/lib/systemd/systemd-journal-remote --listen-https=-3 --output=/var/log/journal/remote/ +``` + +Note the `status: "Processing requests..."` and the PID under `CGroup`. + +On the client `systemctl status systemd-journal-upload` should show something like this: + +```bash +● systemd-journal-upload.service - Journal Remote Upload Service + Loaded: loaded (/lib/systemd/system/systemd-journal-upload.service; enabled; vendor preset: disabled) + Drop-In: /etc/systemd/system/systemd-journal-upload.service.d + └─override.conf + Active: active (running) since Sun 2023-10-15 10:39:04 UTC; 3h 17min ago + Docs: man:systemd-journal-upload(8) + Main PID: 4169 (systemd-journal) + Status: "Processing input..." + Tasks: 1 (limit: 13868) + Memory: 3.5M + CPU: 1.081s + CGroup: /system.slice/systemd-journal-upload.service + └─4169 /lib/systemd/systemd-journal-upload --save-state +``` + +Note the `Status: "Processing input..."` and the PID under `CGroup`. diff --git a/collectors/systemd-journal.plugin/systemd-internals.h b/collectors/systemd-journal.plugin/systemd-internals.h new file mode 100644 index 000000000..e1ae44d4f --- /dev/null +++ b/collectors/systemd-journal.plugin/systemd-internals.h @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_COLLECTORS_SYSTEMD_INTERNALS_H +#define NETDATA_COLLECTORS_SYSTEMD_INTERNALS_H + +#include "collectors/all.h" +#include "libnetdata/libnetdata.h" + +#include <linux/capability.h> +#include <systemd/sd-journal.h> +#include <syslog.h> + +#define SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION "View, search and analyze systemd journal entries." +#define SYSTEMD_JOURNAL_FUNCTION_NAME "systemd-journal" +#define SYSTEMD_JOURNAL_DEFAULT_TIMEOUT 60 +#define SYSTEMD_JOURNAL_ENABLE_ESTIMATIONS_FILE_PERCENTAGE 0.01 +#define SYSTEMD_JOURNAL_EXECUTE_WATCHER_PENDING_EVERY_MS 250 +#define SYSTEMD_JOURNAL_ALL_FILES_SCAN_EVERY_USEC (5 * 60 * USEC_PER_SEC) + +#define SYSTEMD_UNITS_FUNCTION_DESCRIPTION "View the status of systemd units" +#define SYSTEMD_UNITS_FUNCTION_NAME "systemd-list-units" +#define SYSTEMD_UNITS_DEFAULT_TIMEOUT 30 + +extern __thread size_t fstat_thread_calls; +extern __thread size_t fstat_thread_cached_responses; +void fstat_cache_enable_on_thread(void); +void fstat_cache_disable_on_thread(void); + +extern netdata_mutex_t stdout_mutex; + +typedef enum { + ND_SD_JOURNAL_NO_FILE_MATCHED, + ND_SD_JOURNAL_FAILED_TO_OPEN, + ND_SD_JOURNAL_FAILED_TO_SEEK, + ND_SD_JOURNAL_TIMED_OUT, + ND_SD_JOURNAL_OK, + ND_SD_JOURNAL_NOT_MODIFIED, + ND_SD_JOURNAL_CANCELLED, +} ND_SD_JOURNAL_STATUS; + +typedef enum { + SDJF_NONE = 0, + SDJF_ALL = (1 << 0), + SDJF_LOCAL_ALL = (1 << 1), + SDJF_REMOTE_ALL = (1 << 2), + SDJF_LOCAL_SYSTEM = (1 << 3), + SDJF_LOCAL_USER = (1 << 4), + SDJF_LOCAL_NAMESPACE = (1 << 5), + SDJF_LOCAL_OTHER = (1 << 6), +} SD_JOURNAL_FILE_SOURCE_TYPE; + +struct journal_file { + const char *filename; + size_t filename_len; + STRING *source; + SD_JOURNAL_FILE_SOURCE_TYPE source_type; + usec_t file_last_modified_ut; + usec_t msg_first_ut; + usec_t msg_last_ut; + size_t size; + bool logged_failure; + bool logged_journalctl_failure; + usec_t max_journal_vs_realtime_delta_ut; + + usec_t last_scan_monotonic_ut; + usec_t last_scan_header_vs_last_modified_ut; + + uint64_t first_seqnum; + uint64_t last_seqnum; + sd_id128_t first_writer_id; + sd_id128_t last_writer_id; + + uint64_t messages_in_file; +}; + +#define SDJF_SOURCE_ALL_NAME "all" +#define SDJF_SOURCE_LOCAL_NAME "all-local-logs" +#define SDJF_SOURCE_LOCAL_SYSTEM_NAME "all-local-system-logs" +#define SDJF_SOURCE_LOCAL_USERS_NAME "all-local-user-logs" +#define SDJF_SOURCE_LOCAL_OTHER_NAME "all-uncategorized" +#define SDJF_SOURCE_NAMESPACES_NAME "all-local-namespaces" +#define SDJF_SOURCE_REMOTES_NAME "all-remote-systems" + +#define ND_SD_JOURNAL_OPEN_FLAGS (0) + +#define JOURNAL_VS_REALTIME_DELTA_DEFAULT_UT (5 * USEC_PER_SEC) // assume always 5 seconds latency +#define JOURNAL_VS_REALTIME_DELTA_MAX_UT (2 * 60 * USEC_PER_SEC) // up to 2 minutes latency + +extern DICTIONARY *journal_files_registry; +extern DICTIONARY *used_hashes_registry; +extern DICTIONARY *function_query_status_dict; +extern DICTIONARY *boot_ids_to_first_ut; + +int journal_file_dict_items_backward_compar(const void *a, const void *b); +int journal_file_dict_items_forward_compar(const void *a, const void *b); +void buffer_json_journal_versions(BUFFER *wb); +void available_journal_file_sources_to_json_array(BUFFER *wb); +bool journal_files_completed_once(void); +void journal_files_registry_update(void); +void journal_directory_scan_recursively(DICTIONARY *files, DICTIONARY *dirs, const char *dirname, int depth); + +FACET_ROW_SEVERITY syslog_priority_to_facet_severity(FACETS *facets, FACET_ROW *row, void *data); + +void netdata_systemd_journal_dynamic_row_id(FACETS *facets, BUFFER *json_array, FACET_ROW_KEY_VALUE *rkv, FACET_ROW *row, void *data); +void netdata_systemd_journal_transform_priority(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data); +void netdata_systemd_journal_transform_syslog_facility(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data); +void netdata_systemd_journal_transform_errno(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data); +void netdata_systemd_journal_transform_boot_id(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data); +void netdata_systemd_journal_transform_uid(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data); +void netdata_systemd_journal_transform_gid(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data); +void netdata_systemd_journal_transform_cap_effective(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data); +void netdata_systemd_journal_transform_timestamp_usec(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data); + +usec_t journal_file_update_annotation_boot_id(sd_journal *j, struct journal_file *jf, const char *boot_id); + +#define MAX_JOURNAL_DIRECTORIES 100 +struct journal_directory { + char *path; +}; +extern struct journal_directory journal_directories[MAX_JOURNAL_DIRECTORIES]; + +void journal_init_files_and_directories(void); +void journal_init_query_status(void); +void function_systemd_journal(const char *transaction, char *function, int timeout, bool *cancelled); +void journal_file_update_header(const char *filename, struct journal_file *jf); + +void netdata_systemd_journal_message_ids_init(void); +void netdata_systemd_journal_transform_message_id(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused); + +void *journal_watcher_main(void *arg); + +#ifdef ENABLE_SYSTEMD_DBUS +void function_systemd_units(const char *transaction, char *function, int timeout, bool *cancelled); +#endif + +static inline void send_newline_and_flush(void) { + netdata_mutex_lock(&stdout_mutex); + fprintf(stdout, "\n"); + fflush(stdout); + netdata_mutex_unlock(&stdout_mutex); +} + +static inline bool parse_journal_field(const char *data, size_t data_length, const char **key, size_t *key_length, const char **value, size_t *value_length) { + const char *k = data; + const char *equal = strchr(k, '='); + if(unlikely(!equal)) + return false; + + size_t kl = equal - k; + + const char *v = ++equal; + size_t vl = data_length - kl - 1; + + *key = k; + *key_length = kl; + *value = v; + *value_length = vl; + + return true; +} + +#endif //NETDATA_COLLECTORS_SYSTEMD_INTERNALS_H diff --git a/collectors/systemd-journal.plugin/systemd-journal-annotations.c b/collectors/systemd-journal.plugin/systemd-journal-annotations.c new file mode 100644 index 000000000..b12356110 --- /dev/null +++ b/collectors/systemd-journal.plugin/systemd-journal-annotations.c @@ -0,0 +1,719 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "systemd-internals.h" + +const char *errno_map[] = { + [1] = "1 (EPERM)", // "Operation not permitted", + [2] = "2 (ENOENT)", // "No such file or directory", + [3] = "3 (ESRCH)", // "No such process", + [4] = "4 (EINTR)", // "Interrupted system call", + [5] = "5 (EIO)", // "Input/output error", + [6] = "6 (ENXIO)", // "No such device or address", + [7] = "7 (E2BIG)", // "Argument list too long", + [8] = "8 (ENOEXEC)", // "Exec format error", + [9] = "9 (EBADF)", // "Bad file descriptor", + [10] = "10 (ECHILD)", // "No child processes", + [11] = "11 (EAGAIN)", // "Resource temporarily unavailable", + [12] = "12 (ENOMEM)", // "Cannot allocate memory", + [13] = "13 (EACCES)", // "Permission denied", + [14] = "14 (EFAULT)", // "Bad address", + [15] = "15 (ENOTBLK)", // "Block device required", + [16] = "16 (EBUSY)", // "Device or resource busy", + [17] = "17 (EEXIST)", // "File exists", + [18] = "18 (EXDEV)", // "Invalid cross-device link", + [19] = "19 (ENODEV)", // "No such device", + [20] = "20 (ENOTDIR)", // "Not a directory", + [21] = "21 (EISDIR)", // "Is a directory", + [22] = "22 (EINVAL)", // "Invalid argument", + [23] = "23 (ENFILE)", // "Too many open files in system", + [24] = "24 (EMFILE)", // "Too many open files", + [25] = "25 (ENOTTY)", // "Inappropriate ioctl for device", + [26] = "26 (ETXTBSY)", // "Text file busy", + [27] = "27 (EFBIG)", // "File too large", + [28] = "28 (ENOSPC)", // "No space left on device", + [29] = "29 (ESPIPE)", // "Illegal seek", + [30] = "30 (EROFS)", // "Read-only file system", + [31] = "31 (EMLINK)", // "Too many links", + [32] = "32 (EPIPE)", // "Broken pipe", + [33] = "33 (EDOM)", // "Numerical argument out of domain", + [34] = "34 (ERANGE)", // "Numerical result out of range", + [35] = "35 (EDEADLK)", // "Resource deadlock avoided", + [36] = "36 (ENAMETOOLONG)", // "File name too long", + [37] = "37 (ENOLCK)", // "No locks available", + [38] = "38 (ENOSYS)", // "Function not implemented", + [39] = "39 (ENOTEMPTY)", // "Directory not empty", + [40] = "40 (ELOOP)", // "Too many levels of symbolic links", + [42] = "42 (ENOMSG)", // "No message of desired type", + [43] = "43 (EIDRM)", // "Identifier removed", + [44] = "44 (ECHRNG)", // "Channel number out of range", + [45] = "45 (EL2NSYNC)", // "Level 2 not synchronized", + [46] = "46 (EL3HLT)", // "Level 3 halted", + [47] = "47 (EL3RST)", // "Level 3 reset", + [48] = "48 (ELNRNG)", // "Link number out of range", + [49] = "49 (EUNATCH)", // "Protocol driver not attached", + [50] = "50 (ENOCSI)", // "No CSI structure available", + [51] = "51 (EL2HLT)", // "Level 2 halted", + [52] = "52 (EBADE)", // "Invalid exchange", + [53] = "53 (EBADR)", // "Invalid request descriptor", + [54] = "54 (EXFULL)", // "Exchange full", + [55] = "55 (ENOANO)", // "No anode", + [56] = "56 (EBADRQC)", // "Invalid request code", + [57] = "57 (EBADSLT)", // "Invalid slot", + [59] = "59 (EBFONT)", // "Bad font file format", + [60] = "60 (ENOSTR)", // "Device not a stream", + [61] = "61 (ENODATA)", // "No data available", + [62] = "62 (ETIME)", // "Timer expired", + [63] = "63 (ENOSR)", // "Out of streams resources", + [64] = "64 (ENONET)", // "Machine is not on the network", + [65] = "65 (ENOPKG)", // "Package not installed", + [66] = "66 (EREMOTE)", // "Object is remote", + [67] = "67 (ENOLINK)", // "Link has been severed", + [68] = "68 (EADV)", // "Advertise error", + [69] = "69 (ESRMNT)", // "Srmount error", + [70] = "70 (ECOMM)", // "Communication error on send", + [71] = "71 (EPROTO)", // "Protocol error", + [72] = "72 (EMULTIHOP)", // "Multihop attempted", + [73] = "73 (EDOTDOT)", // "RFS specific error", + [74] = "74 (EBADMSG)", // "Bad message", + [75] = "75 (EOVERFLOW)", // "Value too large for defined data type", + [76] = "76 (ENOTUNIQ)", // "Name not unique on network", + [77] = "77 (EBADFD)", // "File descriptor in bad state", + [78] = "78 (EREMCHG)", // "Remote address changed", + [79] = "79 (ELIBACC)", // "Can not access a needed shared library", + [80] = "80 (ELIBBAD)", // "Accessing a corrupted shared library", + [81] = "81 (ELIBSCN)", // ".lib section in a.out corrupted", + [82] = "82 (ELIBMAX)", // "Attempting to link in too many shared libraries", + [83] = "83 (ELIBEXEC)", // "Cannot exec a shared library directly", + [84] = "84 (EILSEQ)", // "Invalid or incomplete multibyte or wide character", + [85] = "85 (ERESTART)", // "Interrupted system call should be restarted", + [86] = "86 (ESTRPIPE)", // "Streams pipe error", + [87] = "87 (EUSERS)", // "Too many users", + [88] = "88 (ENOTSOCK)", // "Socket operation on non-socket", + [89] = "89 (EDESTADDRREQ)", // "Destination address required", + [90] = "90 (EMSGSIZE)", // "Message too long", + [91] = "91 (EPROTOTYPE)", // "Protocol wrong type for socket", + [92] = "92 (ENOPROTOOPT)", // "Protocol not available", + [93] = "93 (EPROTONOSUPPORT)", // "Protocol not supported", + [94] = "94 (ESOCKTNOSUPPORT)", // "Socket type not supported", + [95] = "95 (ENOTSUP)", // "Operation not supported", + [96] = "96 (EPFNOSUPPORT)", // "Protocol family not supported", + [97] = "97 (EAFNOSUPPORT)", // "Address family not supported by protocol", + [98] = "98 (EADDRINUSE)", // "Address already in use", + [99] = "99 (EADDRNOTAVAIL)", // "Cannot assign requested address", + [100] = "100 (ENETDOWN)", // "Network is down", + [101] = "101 (ENETUNREACH)", // "Network is unreachable", + [102] = "102 (ENETRESET)", // "Network dropped connection on reset", + [103] = "103 (ECONNABORTED)", // "Software caused connection abort", + [104] = "104 (ECONNRESET)", // "Connection reset by peer", + [105] = "105 (ENOBUFS)", // "No buffer space available", + [106] = "106 (EISCONN)", // "Transport endpoint is already connected", + [107] = "107 (ENOTCONN)", // "Transport endpoint is not connected", + [108] = "108 (ESHUTDOWN)", // "Cannot send after transport endpoint shutdown", + [109] = "109 (ETOOMANYREFS)", // "Too many references: cannot splice", + [110] = "110 (ETIMEDOUT)", // "Connection timed out", + [111] = "111 (ECONNREFUSED)", // "Connection refused", + [112] = "112 (EHOSTDOWN)", // "Host is down", + [113] = "113 (EHOSTUNREACH)", // "No route to host", + [114] = "114 (EALREADY)", // "Operation already in progress", + [115] = "115 (EINPROGRESS)", // "Operation now in progress", + [116] = "116 (ESTALE)", // "Stale file handle", + [117] = "117 (EUCLEAN)", // "Structure needs cleaning", + [118] = "118 (ENOTNAM)", // "Not a XENIX named type file", + [119] = "119 (ENAVAIL)", // "No XENIX semaphores available", + [120] = "120 (EISNAM)", // "Is a named type file", + [121] = "121 (EREMOTEIO)", // "Remote I/O error", + [122] = "122 (EDQUOT)", // "Disk quota exceeded", + [123] = "123 (ENOMEDIUM)", // "No medium found", + [124] = "124 (EMEDIUMTYPE)", // "Wrong medium type", + [125] = "125 (ECANCELED)", // "Operation canceled", + [126] = "126 (ENOKEY)", // "Required key not available", + [127] = "127 (EKEYEXPIRED)", // "Key has expired", + [128] = "128 (EKEYREVOKED)", // "Key has been revoked", + [129] = "129 (EKEYREJECTED)", // "Key was rejected by service", + [130] = "130 (EOWNERDEAD)", // "Owner died", + [131] = "131 (ENOTRECOVERABLE)", // "State not recoverable", + [132] = "132 (ERFKILL)", // "Operation not possible due to RF-kill", + [133] = "133 (EHWPOISON)", // "Memory page has hardware error", +}; + +const char *linux_capabilities[] = { + [CAP_CHOWN] = "CHOWN", + [CAP_DAC_OVERRIDE] = "DAC_OVERRIDE", + [CAP_DAC_READ_SEARCH] = "DAC_READ_SEARCH", + [CAP_FOWNER] = "FOWNER", + [CAP_FSETID] = "FSETID", + [CAP_KILL] = "KILL", + [CAP_SETGID] = "SETGID", + [CAP_SETUID] = "SETUID", + [CAP_SETPCAP] = "SETPCAP", + [CAP_LINUX_IMMUTABLE] = "LINUX_IMMUTABLE", + [CAP_NET_BIND_SERVICE] = "NET_BIND_SERVICE", + [CAP_NET_BROADCAST] = "NET_BROADCAST", + [CAP_NET_ADMIN] = "NET_ADMIN", + [CAP_NET_RAW] = "NET_RAW", + [CAP_IPC_LOCK] = "IPC_LOCK", + [CAP_IPC_OWNER] = "IPC_OWNER", + [CAP_SYS_MODULE] = "SYS_MODULE", + [CAP_SYS_RAWIO] = "SYS_RAWIO", + [CAP_SYS_CHROOT] = "SYS_CHROOT", + [CAP_SYS_PTRACE] = "SYS_PTRACE", + [CAP_SYS_PACCT] = "SYS_PACCT", + [CAP_SYS_ADMIN] = "SYS_ADMIN", + [CAP_SYS_BOOT] = "SYS_BOOT", + [CAP_SYS_NICE] = "SYS_NICE", + [CAP_SYS_RESOURCE] = "SYS_RESOURCE", + [CAP_SYS_TIME] = "SYS_TIME", + [CAP_SYS_TTY_CONFIG] = "SYS_TTY_CONFIG", + [CAP_MKNOD] = "MKNOD", + [CAP_LEASE] = "LEASE", + [CAP_AUDIT_WRITE] = "AUDIT_WRITE", + [CAP_AUDIT_CONTROL] = "AUDIT_CONTROL", + [CAP_SETFCAP] = "SETFCAP", + [CAP_MAC_OVERRIDE] = "MAC_OVERRIDE", + [CAP_MAC_ADMIN] = "MAC_ADMIN", + [CAP_SYSLOG] = "SYSLOG", + [CAP_WAKE_ALARM] = "WAKE_ALARM", + [CAP_BLOCK_SUSPEND] = "BLOCK_SUSPEND", + [37 /*CAP_AUDIT_READ*/] = "AUDIT_READ", + [38 /*CAP_PERFMON*/] = "PERFMON", + [39 /*CAP_BPF*/] = "BPF", + [40 /* CAP_CHECKPOINT_RESTORE */] = "CHECKPOINT_RESTORE", +}; + +static const char *syslog_facility_to_name(int facility) { + switch (facility) { + case LOG_FAC(LOG_KERN): return "kern"; + case LOG_FAC(LOG_USER): return "user"; + case LOG_FAC(LOG_MAIL): return "mail"; + case LOG_FAC(LOG_DAEMON): return "daemon"; + case LOG_FAC(LOG_AUTH): return "auth"; + case LOG_FAC(LOG_SYSLOG): return "syslog"; + case LOG_FAC(LOG_LPR): return "lpr"; + case LOG_FAC(LOG_NEWS): return "news"; + case LOG_FAC(LOG_UUCP): return "uucp"; + case LOG_FAC(LOG_CRON): return "cron"; + case LOG_FAC(LOG_AUTHPRIV): return "authpriv"; + case LOG_FAC(LOG_FTP): return "ftp"; + case LOG_FAC(LOG_LOCAL0): return "local0"; + case LOG_FAC(LOG_LOCAL1): return "local1"; + case LOG_FAC(LOG_LOCAL2): return "local2"; + case LOG_FAC(LOG_LOCAL3): return "local3"; + case LOG_FAC(LOG_LOCAL4): return "local4"; + case LOG_FAC(LOG_LOCAL5): return "local5"; + case LOG_FAC(LOG_LOCAL6): return "local6"; + case LOG_FAC(LOG_LOCAL7): return "local7"; + default: return NULL; + } +} + +static const char *syslog_priority_to_name(int priority) { + switch (priority) { + case LOG_ALERT: return "alert"; + case LOG_CRIT: return "critical"; + case LOG_DEBUG: return "debug"; + case LOG_EMERG: return "panic"; + case LOG_ERR: return "error"; + case LOG_INFO: return "info"; + case LOG_NOTICE: return "notice"; + case LOG_WARNING: return "warning"; + default: return NULL; + } +} + +FACET_ROW_SEVERITY syslog_priority_to_facet_severity(FACETS *facets __maybe_unused, FACET_ROW *row, void *data __maybe_unused) { + // same to + // https://github.com/systemd/systemd/blob/aab9e4b2b86905a15944a1ac81e471b5b7075932/src/basic/terminal-util.c#L1501 + // function get_log_colors() + + FACET_ROW_KEY_VALUE *priority_rkv = dictionary_get(row->dict, "PRIORITY"); + if(!priority_rkv || priority_rkv->empty) + return FACET_ROW_SEVERITY_NORMAL; + + int priority = str2i(buffer_tostring(priority_rkv->wb)); + + if(priority <= LOG_ERR) + return FACET_ROW_SEVERITY_CRITICAL; + + else if (priority <= LOG_WARNING) + return FACET_ROW_SEVERITY_WARNING; + + else if(priority <= LOG_NOTICE) + return FACET_ROW_SEVERITY_NOTICE; + + else if(priority >= LOG_DEBUG) + return FACET_ROW_SEVERITY_DEBUG; + + return FACET_ROW_SEVERITY_NORMAL; +} + +static char *uid_to_username(uid_t uid, char *buffer, size_t buffer_size) { + static __thread char tmp[1024 + 1]; + struct passwd pw, *result = NULL; + + if (getpwuid_r(uid, &pw, tmp, sizeof(tmp), &result) != 0 || !result || !pw.pw_name || !(*pw.pw_name)) + snprintfz(buffer, buffer_size - 1, "%u", uid); + else + snprintfz(buffer, buffer_size - 1, "%u (%s)", uid, pw.pw_name); + + return buffer; +} + +static char *gid_to_groupname(gid_t gid, char* buffer, size_t buffer_size) { + static __thread char tmp[1024]; + struct group grp, *result = NULL; + + if (getgrgid_r(gid, &grp, tmp, sizeof(tmp), &result) != 0 || !result || !grp.gr_name || !(*grp.gr_name)) + snprintfz(buffer, buffer_size - 1, "%u", gid); + else + snprintfz(buffer, buffer_size - 1, "%u (%s)", gid, grp.gr_name); + + return buffer; +} + +void netdata_systemd_journal_transform_syslog_facility(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { + const char *v = buffer_tostring(wb); + if(*v && isdigit(*v)) { + int facility = str2i(buffer_tostring(wb)); + const char *name = syslog_facility_to_name(facility); + if (name) { + buffer_flush(wb); + buffer_strcat(wb, name); + } + } +} + +void netdata_systemd_journal_transform_priority(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { + if(scope == FACETS_TRANSFORM_FACET_SORT) + return; + + const char *v = buffer_tostring(wb); + if(*v && isdigit(*v)) { + int priority = str2i(buffer_tostring(wb)); + const char *name = syslog_priority_to_name(priority); + if (name) { + buffer_flush(wb); + buffer_strcat(wb, name); + } + } +} + +void netdata_systemd_journal_transform_errno(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { + if(scope == FACETS_TRANSFORM_FACET_SORT) + return; + + const char *v = buffer_tostring(wb); + if(*v && isdigit(*v)) { + unsigned err_no = str2u(buffer_tostring(wb)); + if(err_no > 0 && err_no < sizeof(errno_map) / sizeof(*errno_map)) { + const char *name = errno_map[err_no]; + if(name) { + buffer_flush(wb); + buffer_strcat(wb, name); + } + } + } +} + +// ---------------------------------------------------------------------------- +// UID and GID transformation + +#define UID_GID_HASHTABLE_SIZE 10000 + +struct word_t2str_hashtable_entry { + struct word_t2str_hashtable_entry *next; + Word_t hash; + size_t len; + char str[]; +}; + +struct word_t2str_hashtable { + SPINLOCK spinlock; + size_t size; + struct word_t2str_hashtable_entry *hashtable[UID_GID_HASHTABLE_SIZE]; +}; + +struct word_t2str_hashtable uid_hashtable = { + .size = UID_GID_HASHTABLE_SIZE, +}; + +struct word_t2str_hashtable gid_hashtable = { + .size = UID_GID_HASHTABLE_SIZE, +}; + +struct word_t2str_hashtable_entry **word_t2str_hashtable_slot(struct word_t2str_hashtable *ht, Word_t hash) { + size_t slot = hash % ht->size; + struct word_t2str_hashtable_entry **e = &ht->hashtable[slot]; + + while(*e && (*e)->hash != hash) + e = &((*e)->next); + + return e; +} + +const char *uid_to_username_cached(uid_t uid, size_t *length) { + spinlock_lock(&uid_hashtable.spinlock); + + struct word_t2str_hashtable_entry **e = word_t2str_hashtable_slot(&uid_hashtable, uid); + if(!(*e)) { + static __thread char buf[1024]; + const char *name = uid_to_username(uid, buf, sizeof(buf)); + size_t size = strlen(name) + 1; + + *e = callocz(1, sizeof(struct word_t2str_hashtable_entry) + size); + (*e)->len = size - 1; + (*e)->hash = uid; + memcpy((*e)->str, name, size); + } + + spinlock_unlock(&uid_hashtable.spinlock); + + *length = (*e)->len; + return (*e)->str; +} + +const char *gid_to_groupname_cached(gid_t gid, size_t *length) { + spinlock_lock(&gid_hashtable.spinlock); + + struct word_t2str_hashtable_entry **e = word_t2str_hashtable_slot(&gid_hashtable, gid); + if(!(*e)) { + static __thread char buf[1024]; + const char *name = gid_to_groupname(gid, buf, sizeof(buf)); + size_t size = strlen(name) + 1; + + *e = callocz(1, sizeof(struct word_t2str_hashtable_entry) + size); + (*e)->len = size - 1; + (*e)->hash = gid; + memcpy((*e)->str, name, size); + } + + spinlock_unlock(&gid_hashtable.spinlock); + + *length = (*e)->len; + return (*e)->str; +} + +DICTIONARY *boot_ids_to_first_ut = NULL; + +void netdata_systemd_journal_transform_boot_id(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { + const char *boot_id = buffer_tostring(wb); + if(*boot_id && isxdigit(*boot_id)) { + usec_t ut = UINT64_MAX; + usec_t *p_ut = dictionary_get(boot_ids_to_first_ut, boot_id); + if(!p_ut) { +#ifndef HAVE_SD_JOURNAL_RESTART_FIELDS + struct journal_file *jf; + dfe_start_read(journal_files_registry, jf) { + const char *files[2] = { + [0] = jf_dfe.name, + [1] = NULL, + }; + + sd_journal *j = NULL; + int r = sd_journal_open_files(&j, files, ND_SD_JOURNAL_OPEN_FLAGS); + if(r < 0 || !j) { + internal_error(true, "JOURNAL: while looking for the first timestamp of boot_id '%s', " + "sd_journal_open_files('%s') returned %d", + boot_id, jf_dfe.name, r); + continue; + } + + ut = journal_file_update_annotation_boot_id(j, jf, boot_id); + sd_journal_close(j); + } + dfe_done(jf); +#endif + } + else + ut = *p_ut; + + if(ut && ut != UINT64_MAX) { + char buffer[RFC3339_MAX_LENGTH]; + rfc3339_datetime_ut(buffer, sizeof(buffer), ut, 0, true); + + switch(scope) { + default: + case FACETS_TRANSFORM_DATA: + case FACETS_TRANSFORM_VALUE: + buffer_sprintf(wb, " (%s) ", buffer); + break; + + case FACETS_TRANSFORM_FACET: + case FACETS_TRANSFORM_FACET_SORT: + case FACETS_TRANSFORM_HISTOGRAM: + buffer_flush(wb); + buffer_sprintf(wb, "%s", buffer); + break; + } + } + } +} + +void netdata_systemd_journal_transform_uid(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { + if(scope == FACETS_TRANSFORM_FACET_SORT) + return; + + const char *v = buffer_tostring(wb); + if(*v && isdigit(*v)) { + uid_t uid = str2i(buffer_tostring(wb)); + size_t len; + const char *name = uid_to_username_cached(uid, &len); + buffer_contents_replace(wb, name, len); + } +} + +void netdata_systemd_journal_transform_gid(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { + if(scope == FACETS_TRANSFORM_FACET_SORT) + return; + + const char *v = buffer_tostring(wb); + if(*v && isdigit(*v)) { + gid_t gid = str2i(buffer_tostring(wb)); + size_t len; + const char *name = gid_to_groupname_cached(gid, &len); + buffer_contents_replace(wb, name, len); + } +} + +void netdata_systemd_journal_transform_cap_effective(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { + if(scope == FACETS_TRANSFORM_FACET_SORT) + return; + + const char *v = buffer_tostring(wb); + if(*v && isdigit(*v)) { + uint64_t cap = strtoul(buffer_tostring(wb), NULL, 16); + if(cap) { + buffer_fast_strcat(wb, " (", 2); + for (size_t i = 0, added = 0; i < sizeof(linux_capabilities) / sizeof(linux_capabilities[0]); i++) { + if (linux_capabilities[i] && (cap & (1ULL << i))) { + + if (added) + buffer_fast_strcat(wb, " | ", 3); + + buffer_strcat(wb, linux_capabilities[i]); + added++; + } + } + buffer_fast_strcat(wb, ")", 1); + } + } +} + +void netdata_systemd_journal_transform_timestamp_usec(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { + if(scope == FACETS_TRANSFORM_FACET_SORT) + return; + + const char *v = buffer_tostring(wb); + if(*v && isdigit(*v)) { + uint64_t ut = str2ull(buffer_tostring(wb), NULL); + if(ut) { + char buffer[RFC3339_MAX_LENGTH]; + rfc3339_datetime_ut(buffer, sizeof(buffer), ut, 6, true); + buffer_sprintf(wb, " (%s)", buffer); + } + } +} + +// ---------------------------------------------------------------------------- + +void netdata_systemd_journal_dynamic_row_id(FACETS *facets __maybe_unused, BUFFER *json_array, FACET_ROW_KEY_VALUE *rkv, FACET_ROW *row, void *data __maybe_unused) { + FACET_ROW_KEY_VALUE *pid_rkv = dictionary_get(row->dict, "_PID"); + const char *pid = pid_rkv ? buffer_tostring(pid_rkv->wb) : FACET_VALUE_UNSET; + + const char *identifier = NULL; + FACET_ROW_KEY_VALUE *container_name_rkv = dictionary_get(row->dict, "CONTAINER_NAME"); + if(container_name_rkv && !container_name_rkv->empty) + identifier = buffer_tostring(container_name_rkv->wb); + + if(!identifier) { + FACET_ROW_KEY_VALUE *syslog_identifier_rkv = dictionary_get(row->dict, "SYSLOG_IDENTIFIER"); + if(syslog_identifier_rkv && !syslog_identifier_rkv->empty) + identifier = buffer_tostring(syslog_identifier_rkv->wb); + + if(!identifier) { + FACET_ROW_KEY_VALUE *comm_rkv = dictionary_get(row->dict, "_COMM"); + if(comm_rkv && !comm_rkv->empty) + identifier = buffer_tostring(comm_rkv->wb); + } + } + + buffer_flush(rkv->wb); + + if(!identifier || !*identifier) + buffer_strcat(rkv->wb, FACET_VALUE_UNSET); + else if(!pid || !*pid) + buffer_sprintf(rkv->wb, "%s", identifier); + else + buffer_sprintf(rkv->wb, "%s[%s]", identifier, pid); + + buffer_json_add_array_item_string(json_array, buffer_tostring(rkv->wb)); +} + + +// ---------------------------------------------------------------------------- + +struct message_id_info { + const char *msg; +}; + +static DICTIONARY *known_journal_messages_ids = NULL; + +void netdata_systemd_journal_message_ids_init(void) { + known_journal_messages_ids = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + + struct message_id_info i = { 0 }; + i.msg = "Journal start"; dictionary_set(known_journal_messages_ids, "f77379a8490b408bbe5f6940505a777b", &i, sizeof(i)); + i.msg = "Journal stop"; dictionary_set(known_journal_messages_ids, "d93fb3c9c24d451a97cea615ce59c00b", &i, sizeof(i)); + i.msg = "Journal dropped"; dictionary_set(known_journal_messages_ids, "a596d6fe7bfa4994828e72309e95d61e", &i, sizeof(i)); + i.msg = "Journal missed"; dictionary_set(known_journal_messages_ids, "e9bf28e6e834481bb6f48f548ad13606", &i, sizeof(i)); + i.msg = "Journal usage"; dictionary_set(known_journal_messages_ids, "ec387f577b844b8fa948f33cad9a75e6", &i, sizeof(i)); + i.msg = "Coredump"; dictionary_set(known_journal_messages_ids, "fc2e22bc6ee647b6b90729ab34a250b1", &i, sizeof(i)); + i.msg = "Truncated core"; dictionary_set(known_journal_messages_ids, "5aadd8e954dc4b1a8c954d63fd9e1137", &i, sizeof(i)); + i.msg = "Backtrace"; dictionary_set(known_journal_messages_ids, "1f4e0a44a88649939aaea34fc6da8c95", &i, sizeof(i)); + i.msg = "Session start"; dictionary_set(known_journal_messages_ids, "8d45620c1a4348dbb17410da57c60c66", &i, sizeof(i)); + i.msg = "Session stop"; dictionary_set(known_journal_messages_ids, "3354939424b4456d9802ca8333ed424a", &i, sizeof(i)); + i.msg = "Seat start"; dictionary_set(known_journal_messages_ids, "fcbefc5da23d428093f97c82a9290f7b", &i, sizeof(i)); + i.msg = "Seat stop"; dictionary_set(known_journal_messages_ids, "e7852bfe46784ed0accde04bc864c2d5", &i, sizeof(i)); + i.msg = "Machine start"; dictionary_set(known_journal_messages_ids, "24d8d4452573402496068381a6312df2", &i, sizeof(i)); + i.msg = "Machine stop"; dictionary_set(known_journal_messages_ids, "58432bd3bace477cb514b56381b8a758", &i, sizeof(i)); + i.msg = "Time change"; dictionary_set(known_journal_messages_ids, "c7a787079b354eaaa9e77b371893cd27", &i, sizeof(i)); + i.msg = "Timezone change"; dictionary_set(known_journal_messages_ids, "45f82f4aef7a4bbf942ce861d1f20990", &i, sizeof(i)); + i.msg = "Tainted"; dictionary_set(known_journal_messages_ids, "50876a9db00f4c40bde1a2ad381c3a1b", &i, sizeof(i)); + i.msg = "Startup finished"; dictionary_set(known_journal_messages_ids, "b07a249cd024414a82dd00cd181378ff", &i, sizeof(i)); + i.msg = "User startup finished"; dictionary_set(known_journal_messages_ids, "eed00a68ffd84e31882105fd973abdd1", &i, sizeof(i)); + i.msg = "Sleep start"; dictionary_set(known_journal_messages_ids, "6bbd95ee977941e497c48be27c254128", &i, sizeof(i)); + i.msg = "Sleep stop"; dictionary_set(known_journal_messages_ids, "8811e6df2a8e40f58a94cea26f8ebf14", &i, sizeof(i)); + i.msg = "Shutdown"; dictionary_set(known_journal_messages_ids, "98268866d1d54a499c4e98921d93bc40", &i, sizeof(i)); + i.msg = "Factory reset"; dictionary_set(known_journal_messages_ids, "c14aaf76ec284a5fa1f105f88dfb061c", &i, sizeof(i)); + i.msg = "Crash exit"; dictionary_set(known_journal_messages_ids, "d9ec5e95e4b646aaaea2fd05214edbda", &i, sizeof(i)); + i.msg = "Crash failed"; dictionary_set(known_journal_messages_ids, "3ed0163e868a4417ab8b9e210407a96c", &i, sizeof(i)); + i.msg = "Crash freeze"; dictionary_set(known_journal_messages_ids, "645c735537634ae0a32b15a7c6cba7d4", &i, sizeof(i)); + i.msg = "Crash no coredump"; dictionary_set(known_journal_messages_ids, "5addb3a06a734d3396b794bf98fb2d01", &i, sizeof(i)); + i.msg = "Crash no fork"; dictionary_set(known_journal_messages_ids, "5c9e98de4ab94c6a9d04d0ad793bd903", &i, sizeof(i)); + i.msg = "Crash unknown signal"; dictionary_set(known_journal_messages_ids, "5e6f1f5e4db64a0eaee3368249d20b94", &i, sizeof(i)); + i.msg = "Crash systemd signal"; dictionary_set(known_journal_messages_ids, "83f84b35ee264f74a3896a9717af34cb", &i, sizeof(i)); + i.msg = "Crash process signal"; dictionary_set(known_journal_messages_ids, "3a73a98baf5b4b199929e3226c0be783", &i, sizeof(i)); + i.msg = "Crash waitpid failed"; dictionary_set(known_journal_messages_ids, "2ed18d4f78ca47f0a9bc25271c26adb4", &i, sizeof(i)); + i.msg = "Crash coredump failed"; dictionary_set(known_journal_messages_ids, "56b1cd96f24246c5b607666fda952356", &i, sizeof(i)); + i.msg = "Crash coredump pid"; dictionary_set(known_journal_messages_ids, "4ac7566d4d7548f4981f629a28f0f829", &i, sizeof(i)); + i.msg = "Crash shell fork failed"; dictionary_set(known_journal_messages_ids, "38e8b1e039ad469291b18b44c553a5b7", &i, sizeof(i)); + i.msg = "Crash execle failed"; dictionary_set(known_journal_messages_ids, "872729b47dbe473eb768ccecd477beda", &i, sizeof(i)); + i.msg = "Selinux failed"; dictionary_set(known_journal_messages_ids, "658a67adc1c940b3b3316e7e8628834a", &i, sizeof(i)); + i.msg = "Battery low warning"; dictionary_set(known_journal_messages_ids, "e6f456bd92004d9580160b2207555186", &i, sizeof(i)); + i.msg = "Battery low poweroff"; dictionary_set(known_journal_messages_ids, "267437d33fdd41099ad76221cc24a335", &i, sizeof(i)); + i.msg = "Core mainloop failed"; dictionary_set(known_journal_messages_ids, "79e05b67bc4545d1922fe47107ee60c5", &i, sizeof(i)); + i.msg = "Core no xdgdir path"; dictionary_set(known_journal_messages_ids, "dbb136b10ef4457ba47a795d62f108c9", &i, sizeof(i)); + i.msg = "Core capability bounding user"; dictionary_set(known_journal_messages_ids, "ed158c2df8884fa584eead2d902c1032", &i, sizeof(i)); + i.msg = "Core capability bounding"; dictionary_set(known_journal_messages_ids, "42695b500df048298bee37159caa9f2e", &i, sizeof(i)); + i.msg = "Core disable privileges"; dictionary_set(known_journal_messages_ids, "bfc2430724ab44499735b4f94cca9295", &i, sizeof(i)); + i.msg = "Core start target failed"; dictionary_set(known_journal_messages_ids, "59288af523be43a28d494e41e26e4510", &i, sizeof(i)); + i.msg = "Core isolate target failed"; dictionary_set(known_journal_messages_ids, "689b4fcc97b4486ea5da92db69c9e314", &i, sizeof(i)); + i.msg = "Core fd set failed"; dictionary_set(known_journal_messages_ids, "5ed836f1766f4a8a9fc5da45aae23b29", &i, sizeof(i)); + i.msg = "Core pid1 environment"; dictionary_set(known_journal_messages_ids, "6a40fbfbd2ba4b8db02fb40c9cd090d7", &i, sizeof(i)); + i.msg = "Core manager allocate"; dictionary_set(known_journal_messages_ids, "0e54470984ac419689743d957a119e2e", &i, sizeof(i)); + i.msg = "Smack failed write"; dictionary_set(known_journal_messages_ids, "d67fa9f847aa4b048a2ae33535331adb", &i, sizeof(i)); + i.msg = "Shutdown error"; dictionary_set(known_journal_messages_ids, "af55a6f75b544431b72649f36ff6d62c", &i, sizeof(i)); + i.msg = "Valgrind helper fork"; dictionary_set(known_journal_messages_ids, "d18e0339efb24a068d9c1060221048c2", &i, sizeof(i)); + i.msg = "Unit starting"; dictionary_set(known_journal_messages_ids, "7d4958e842da4a758f6c1cdc7b36dcc5", &i, sizeof(i)); + i.msg = "Unit started"; dictionary_set(known_journal_messages_ids, "39f53479d3a045ac8e11786248231fbf", &i, sizeof(i)); + i.msg = "Unit failed"; dictionary_set(known_journal_messages_ids, "be02cf6855d2428ba40df7e9d022f03d", &i, sizeof(i)); + i.msg = "Unit stopping"; dictionary_set(known_journal_messages_ids, "de5b426a63be47a7b6ac3eaac82e2f6f", &i, sizeof(i)); + i.msg = "Unit stopped"; dictionary_set(known_journal_messages_ids, "9d1aaa27d60140bd96365438aad20286", &i, sizeof(i)); + i.msg = "Unit reloading"; dictionary_set(known_journal_messages_ids, "d34d037fff1847e6ae669a370e694725", &i, sizeof(i)); + i.msg = "Unit reloaded"; dictionary_set(known_journal_messages_ids, "7b05ebc668384222baa8881179cfda54", &i, sizeof(i)); + i.msg = "Unit restart scheduled"; dictionary_set(known_journal_messages_ids, "5eb03494b6584870a536b337290809b3", &i, sizeof(i)); + i.msg = "Unit resources"; dictionary_set(known_journal_messages_ids, "ae8f7b866b0347b9af31fe1c80b127c0", &i, sizeof(i)); + i.msg = "Unit success"; dictionary_set(known_journal_messages_ids, "7ad2d189f7e94e70a38c781354912448", &i, sizeof(i)); + i.msg = "Unit skipped"; dictionary_set(known_journal_messages_ids, "0e4284a0caca4bfc81c0bb6786972673", &i, sizeof(i)); + i.msg = "Unit failure result"; dictionary_set(known_journal_messages_ids, "d9b373ed55a64feb8242e02dbe79a49c", &i, sizeof(i)); + i.msg = "Spawn failed"; dictionary_set(known_journal_messages_ids, "641257651c1b4ec9a8624d7a40a9e1e7", &i, sizeof(i)); + i.msg = "Unit process exit"; dictionary_set(known_journal_messages_ids, "98e322203f7a4ed290d09fe03c09fe15", &i, sizeof(i)); + i.msg = "Forward syslog missed"; dictionary_set(known_journal_messages_ids, "0027229ca0644181a76c4e92458afa2e", &i, sizeof(i)); + i.msg = "Overmounting"; dictionary_set(known_journal_messages_ids, "1dee0369c7fc4736b7099b38ecb46ee7", &i, sizeof(i)); + i.msg = "Unit oomd kill"; dictionary_set(known_journal_messages_ids, "d989611b15e44c9dbf31e3c81256e4ed", &i, sizeof(i)); + i.msg = "Unit out of memory"; dictionary_set(known_journal_messages_ids, "fe6faa94e7774663a0da52717891d8ef", &i, sizeof(i)); + i.msg = "Lid opened"; dictionary_set(known_journal_messages_ids, "b72ea4a2881545a0b50e200e55b9b06f", &i, sizeof(i)); + i.msg = "Lid closed"; dictionary_set(known_journal_messages_ids, "b72ea4a2881545a0b50e200e55b9b070", &i, sizeof(i)); + i.msg = "System docked"; dictionary_set(known_journal_messages_ids, "f5f416b862074b28927a48c3ba7d51ff", &i, sizeof(i)); + i.msg = "System undocked"; dictionary_set(known_journal_messages_ids, "51e171bd585248568110144c517cca53", &i, sizeof(i)); + i.msg = "Power key"; dictionary_set(known_journal_messages_ids, "b72ea4a2881545a0b50e200e55b9b071", &i, sizeof(i)); + i.msg = "Power key long press"; dictionary_set(known_journal_messages_ids, "3e0117101eb243c1b9a50db3494ab10b", &i, sizeof(i)); + i.msg = "Reboot key"; dictionary_set(known_journal_messages_ids, "9fa9d2c012134ec385451ffe316f97d0", &i, sizeof(i)); + i.msg = "Reboot key long press"; dictionary_set(known_journal_messages_ids, "f1c59a58c9d943668965c337caec5975", &i, sizeof(i)); + i.msg = "Suspend key"; dictionary_set(known_journal_messages_ids, "b72ea4a2881545a0b50e200e55b9b072", &i, sizeof(i)); + i.msg = "Suspend key long press"; dictionary_set(known_journal_messages_ids, "bfdaf6d312ab4007bc1fe40a15df78e8", &i, sizeof(i)); + i.msg = "Hibernate key"; dictionary_set(known_journal_messages_ids, "b72ea4a2881545a0b50e200e55b9b073", &i, sizeof(i)); + i.msg = "Hibernate key long press"; dictionary_set(known_journal_messages_ids, "167836df6f7f428e98147227b2dc8945", &i, sizeof(i)); + i.msg = "Invalid configuration"; dictionary_set(known_journal_messages_ids, "c772d24e9a884cbeb9ea12625c306c01", &i, sizeof(i)); + i.msg = "Dnssec failure"; dictionary_set(known_journal_messages_ids, "1675d7f172174098b1108bf8c7dc8f5d", &i, sizeof(i)); + i.msg = "Dnssec trust anchor revoked"; dictionary_set(known_journal_messages_ids, "4d4408cfd0d144859184d1e65d7c8a65", &i, sizeof(i)); + i.msg = "Dnssec downgrade"; dictionary_set(known_journal_messages_ids, "36db2dfa5a9045e1bd4af5f93e1cf057", &i, sizeof(i)); + i.msg = "Unsafe user name"; dictionary_set(known_journal_messages_ids, "b61fdac612e94b9182285b998843061f", &i, sizeof(i)); + i.msg = "Mount point path not suitable"; dictionary_set(known_journal_messages_ids, "1b3bb94037f04bbf81028e135a12d293", &i, sizeof(i)); + i.msg = "Device path not suitable"; dictionary_set(known_journal_messages_ids, "010190138f494e29a0ef6669749531aa", &i, sizeof(i)); + i.msg = "Nobody user unsuitable"; dictionary_set(known_journal_messages_ids, "b480325f9c394a7b802c231e51a2752c", &i, sizeof(i)); + i.msg = "Systemd udev settle deprecated"; dictionary_set(known_journal_messages_ids, "1c0454c1bd2241e0ac6fefb4bc631433", &i, sizeof(i)); + i.msg = "Time sync"; dictionary_set(known_journal_messages_ids, "7c8a41f37b764941a0e1780b1be2f037", &i, sizeof(i)); + i.msg = "Time bump"; dictionary_set(known_journal_messages_ids, "7db73c8af0d94eeb822ae04323fe6ab6", &i, sizeof(i)); + i.msg = "Shutdown scheduled"; dictionary_set(known_journal_messages_ids, "9e7066279dc8403da79ce4b1a69064b2", &i, sizeof(i)); + i.msg = "Shutdown canceled"; dictionary_set(known_journal_messages_ids, "249f6fb9e6e2428c96f3f0875681ffa3", &i, sizeof(i)); + i.msg = "TPM pcr extend"; dictionary_set(known_journal_messages_ids, "3f7d5ef3e54f4302b4f0b143bb270cab", &i, sizeof(i)); + i.msg = "Memory trim"; dictionary_set(known_journal_messages_ids, "f9b0be465ad540d0850ad32172d57c21", &i, sizeof(i)); + i.msg = "Sysv generator deprecated"; dictionary_set(known_journal_messages_ids, "a8fa8dacdb1d443e9503b8be367a6adb", &i, sizeof(i)); + + // gnome + // https://gitlab.gnome.org/GNOME/gnome-session/-/blob/main/gnome-session/gsm-manager.c + i.msg = "Gnome SM startup succeeded"; dictionary_set(known_journal_messages_ids, "0ce153587afa4095832d233c17a88001", &i, sizeof(i)); + i.msg = "Gnome SM unrecoverable failure"; dictionary_set(known_journal_messages_ids, "10dd2dc188b54a5e98970f56499d1f73", &i, sizeof(i)); + + // gnome-shell + // https://gitlab.gnome.org/GNOME/gnome-shell/-/blob/main/js/ui/main.js#L56 + i.msg = "Gnome shell started";dictionary_set(known_journal_messages_ids, "f3ea493c22934e26811cd62abe8e203a", &i, sizeof(i)); + + // flathub + // https://docs.flatpak.org/de/latest/flatpak-command-reference.html + i.msg = "Flatpak cache"; dictionary_set(known_journal_messages_ids, "c7b39b1e006b464599465e105b361485", &i, sizeof(i)); + + // ??? + i.msg = "Flathub pulls"; dictionary_set(known_journal_messages_ids, "75ba3deb0af041a9a46272ff85d9e73e", &i, sizeof(i)); + i.msg = "Flathub pull errors"; dictionary_set(known_journal_messages_ids, "f02bce89a54e4efab3a94a797d26204a", &i, sizeof(i)); + + // ?? + i.msg = "Boltd starting"; dictionary_set(known_journal_messages_ids, "dd11929c788e48bdbb6276fb5f26b08a", &i, sizeof(i)); + + // Netdata + i.msg = "Netdata connection from child"; dictionary_set(known_journal_messages_ids, "ed4cdb8f1beb4ad3b57cb3cae2d162fa", &i, sizeof(i)); + i.msg = "Netdata connection to parent"; dictionary_set(known_journal_messages_ids, "6e2e3839067648968b646045dbf28d66", &i, sizeof(i)); + i.msg = "Netdata alert transition"; dictionary_set(known_journal_messages_ids, "9ce0cb58ab8b44df82c4bf1ad9ee22de", &i, sizeof(i)); + i.msg = "Netdata alert notification"; dictionary_set(known_journal_messages_ids, "6db0018e83e34320ae2a659d78019fb7", &i, sizeof(i)); +} + +void netdata_systemd_journal_transform_message_id(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { + const char *message_id = buffer_tostring(wb); + struct message_id_info *i = dictionary_get(known_journal_messages_ids, message_id); + + if(!i) + return; + + switch(scope) { + default: + case FACETS_TRANSFORM_DATA: + case FACETS_TRANSFORM_VALUE: + buffer_sprintf(wb, " (%s)", i->msg); + break; + + case FACETS_TRANSFORM_FACET: + case FACETS_TRANSFORM_FACET_SORT: + case FACETS_TRANSFORM_HISTOGRAM: + buffer_flush(wb); + buffer_strcat(wb, i->msg); + break; + } +} + +// ---------------------------------------------------------------------------- + +static void netdata_systemd_journal_rich_message(FACETS *facets __maybe_unused, BUFFER *json_array, FACET_ROW_KEY_VALUE *rkv, FACET_ROW *row __maybe_unused, void *data __maybe_unused) { + buffer_json_add_array_item_object(json_array); + buffer_json_member_add_string(json_array, "value", buffer_tostring(rkv->wb)); + buffer_json_object_close(json_array); +} diff --git a/collectors/systemd-journal.plugin/systemd-journal-files.c b/collectors/systemd-journal.plugin/systemd-journal-files.c new file mode 100644 index 000000000..56496df22 --- /dev/null +++ b/collectors/systemd-journal.plugin/systemd-journal-files.c @@ -0,0 +1,857 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "systemd-internals.h" + +#define SYSTEMD_JOURNAL_MAX_SOURCE_LEN 64 +#define VAR_LOG_JOURNAL_MAX_DEPTH 10 + +struct journal_directory journal_directories[MAX_JOURNAL_DIRECTORIES] = { 0 }; +DICTIONARY *journal_files_registry = NULL; +DICTIONARY *used_hashes_registry = NULL; + +static usec_t systemd_journal_session = 0; + +void buffer_json_journal_versions(BUFFER *wb) { + buffer_json_member_add_object(wb, "versions"); + { + buffer_json_member_add_uint64(wb, "sources", + systemd_journal_session + dictionary_version(journal_files_registry)); + } + buffer_json_object_close(wb); +} + +static bool journal_sd_id128_parse(const char *in, sd_id128_t *ret) { + while(isspace(*in)) + in++; + + char uuid[33]; + strncpyz(uuid, in, 32); + uuid[32] = '\0'; + + if(strlen(uuid) == 32) { + sd_id128_t read; + if(sd_id128_from_string(uuid, &read) == 0) { + *ret = read; + return true; + } + } + + return false; +} + +static void journal_file_get_header_from_journalctl(const char *filename, struct journal_file *jf) { + // unfortunately, our capabilities are not inheritted by journalctl + // so, it fails to give us the information we need. + + bool read_writer = false, read_head = false, read_tail = false; + + char cmd[FILENAME_MAX * 2]; + snprintfz(cmd, sizeof(cmd), "journalctl --header --file '%s'", filename); + CLEAN_BUFFER *wb = run_command_and_get_output_to_buffer(cmd, 1024); + if(wb) { + const char *s = buffer_tostring(wb); + + const char *sequential_id_header = "Sequential Number ID:"; + const char *sequential_id_data = strcasestr(s, sequential_id_header); + if(sequential_id_data) { + sequential_id_data += strlen(sequential_id_header); + if(journal_sd_id128_parse(sequential_id_data, &jf->first_writer_id)) + read_writer = true; + } + + const char *head_sequential_number_header = "Head sequential number:"; + const char *head_sequential_number_data = strcasestr(s, head_sequential_number_header); + if(head_sequential_number_data) { + head_sequential_number_data += strlen(head_sequential_number_header); + + while(isspace(*head_sequential_number_data)) + head_sequential_number_data++; + + if(isdigit(*head_sequential_number_data)) { + jf->first_seqnum = strtoul(head_sequential_number_data, NULL, 10); + if(jf->first_seqnum) + read_head = true; + } + } + + const char *tail_sequential_number_header = "Tail sequential number:"; + const char *tail_sequential_number_data = strcasestr(s, tail_sequential_number_header); + if(tail_sequential_number_data) { + tail_sequential_number_data += strlen(tail_sequential_number_header); + + while(isspace(*tail_sequential_number_data)) + tail_sequential_number_data++; + + if(isdigit(*tail_sequential_number_data)) { + jf->last_seqnum = strtoul(tail_sequential_number_data, NULL, 10); + if(jf->last_seqnum) + read_tail = true; + } + } + + if(read_head && read_tail && jf->last_seqnum > jf->first_seqnum) + jf->messages_in_file = jf->last_seqnum - jf->first_seqnum; + } + + if(!jf->logged_journalctl_failure && (!read_head || !read_head || !read_tail)) { + + nd_log(NDLS_COLLECTORS, NDLP_NOTICE, + "Failed to read %s%s%s from journalctl's output on filename '%s', using the command: %s", + read_writer?"":"writer id,", + read_head?"":"head id,", + read_tail?"":"tail id,", + filename, cmd); + + jf->logged_journalctl_failure = true; + } +} + +usec_t journal_file_update_annotation_boot_id(sd_journal *j, struct journal_file *jf, const char *boot_id) { + usec_t ut = UINT64_MAX; + int r; + + char m[100]; + size_t len = snprintfz(m, sizeof(m), "_BOOT_ID=%s", boot_id); + + sd_journal_flush_matches(j); + + r = sd_journal_add_match(j, m, len); + if(r < 0) { + errno = -r; + internal_error(true, + "JOURNAL: while looking for the first timestamp of boot_id '%s', " + "sd_journal_add_match('%s') on file '%s' returned %d", + boot_id, m, jf->filename, r); + return UINT64_MAX; + } + + r = sd_journal_seek_head(j); + if(r < 0) { + errno = -r; + internal_error(true, + "JOURNAL: while looking for the first timestamp of boot_id '%s', " + "sd_journal_seek_head() on file '%s' returned %d", + boot_id, jf->filename, r); + return UINT64_MAX; + } + + r = sd_journal_next(j); + if(r < 0) { + errno = -r; + internal_error(true, + "JOURNAL: while looking for the first timestamp of boot_id '%s', " + "sd_journal_next() on file '%s' returned %d", + boot_id, jf->filename, r); + return UINT64_MAX; + } + + r = sd_journal_get_realtime_usec(j, &ut); + if(r < 0 || !ut || ut == UINT64_MAX) { + errno = -r; + internal_error(r != -EADDRNOTAVAIL, + "JOURNAL: while looking for the first timestamp of boot_id '%s', " + "sd_journal_get_realtime_usec() on file '%s' returned %d", + boot_id, jf->filename, r); + return UINT64_MAX; + } + + if(ut && ut != UINT64_MAX) { + dictionary_set(boot_ids_to_first_ut, boot_id, &ut, sizeof(ut)); + return ut; + } + + return UINT64_MAX; +} + +static void journal_file_get_boot_id_annotations(sd_journal *j __maybe_unused, struct journal_file *jf __maybe_unused) { +#ifdef HAVE_SD_JOURNAL_RESTART_FIELDS + sd_journal_flush_matches(j); + + int r = sd_journal_query_unique(j, "_BOOT_ID"); + if (r < 0) { + errno = -r; + internal_error(true, + "JOURNAL: while querying for the unique _BOOT_ID values, " + "sd_journal_query_unique() on file '%s' returned %d", + jf->filename, r); + errno = -r; + return; + } + + const void *data = NULL; + size_t data_length; + + DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); + + SD_JOURNAL_FOREACH_UNIQUE(j, data, data_length) { + const char *key, *value; + size_t key_length, value_length; + + if(!parse_journal_field(data, data_length, &key, &key_length, &value, &value_length)) + continue; + + if(value_length != 32) + continue; + + char buf[33]; + memcpy(buf, value, 32); + buf[32] = '\0'; + + dictionary_set(dict, buf, NULL, 0); + } + + void *nothing; + dfe_start_read(dict, nothing){ + journal_file_update_annotation_boot_id(j, jf, nothing_dfe.name); + } + dfe_done(nothing); + + dictionary_destroy(dict); +#endif +} + +void journal_file_update_header(const char *filename, struct journal_file *jf) { + if(jf->last_scan_header_vs_last_modified_ut == jf->file_last_modified_ut) + return; + + fstat_cache_enable_on_thread(); + + const char *files[2] = { + [0] = filename, + [1] = NULL, + }; + + sd_journal *j = NULL; + if(sd_journal_open_files(&j, files, ND_SD_JOURNAL_OPEN_FLAGS) < 0 || !j) { + netdata_log_error("JOURNAL: cannot open file '%s' to update msg_ut", filename); + fstat_cache_disable_on_thread(); + + if(!jf->logged_failure) { + netdata_log_error("cannot open journal file '%s', using file timestamps to understand time-frame.", filename); + jf->logged_failure = true; + } + + jf->msg_first_ut = 0; + jf->msg_last_ut = jf->file_last_modified_ut; + jf->last_scan_header_vs_last_modified_ut = jf->file_last_modified_ut; + return; + } + + usec_t first_ut = 0, last_ut = 0; + uint64_t first_seqnum = 0, last_seqnum = 0; + sd_id128_t first_writer_id = SD_ID128_NULL, last_writer_id = SD_ID128_NULL; + + if(sd_journal_seek_head(j) < 0 || sd_journal_next(j) < 0 || sd_journal_get_realtime_usec(j, &first_ut) < 0 || !first_ut) { + internal_error(true, "cannot find the timestamp of the first message in '%s'", filename); + first_ut = 0; + } +#ifdef HAVE_SD_JOURNAL_GET_SEQNUM + else { + if(sd_journal_get_seqnum(j, &first_seqnum, &first_writer_id) < 0 || !first_seqnum) { + internal_error(true, "cannot find the first seqnums of the first message in '%s'", filename); + first_seqnum = 0; + memset(&first_writer_id, 0, sizeof(first_writer_id)); + } + } +#endif + + if(sd_journal_seek_tail(j) < 0 || sd_journal_previous(j) < 0 || sd_journal_get_realtime_usec(j, &last_ut) < 0 || !last_ut) { + internal_error(true, "cannot find the timestamp of the last message in '%s'", filename); + last_ut = jf->file_last_modified_ut; + } +#ifdef HAVE_SD_JOURNAL_GET_SEQNUM + else { + if(sd_journal_get_seqnum(j, &last_seqnum, &last_writer_id) < 0 || !last_seqnum) { + internal_error(true, "cannot find the last seqnums of the first message in '%s'", filename); + last_seqnum = 0; + memset(&last_writer_id, 0, sizeof(last_writer_id)); + } + } +#endif + + if(first_ut > last_ut) { + internal_error(true, "timestamps are flipped in file '%s'", filename); + usec_t t = first_ut; + first_ut = last_ut; + last_ut = t; + } + + if(!first_seqnum || !first_ut) { + // extract these from the filename - if possible + + const char *at = strchr(filename, '@'); + if(at) { + const char *dash_seqnum = strchr(at + 1, '-'); + if(dash_seqnum) { + const char *dash_first_msg_ut = strchr(dash_seqnum + 1, '-'); + if(dash_first_msg_ut) { + const char *dot_journal = strstr(dash_first_msg_ut + 1, ".journal"); + if(dot_journal) { + if(dash_seqnum - at - 1 == 32 && + dash_first_msg_ut - dash_seqnum - 1 == 16 && + dot_journal - dash_first_msg_ut - 1 == 16) { + sd_id128_t writer; + if(journal_sd_id128_parse(at + 1, &writer)) { + char *endptr = NULL; + uint64_t seqnum = strtoul(dash_seqnum + 1, &endptr, 16); + if(endptr == dash_first_msg_ut) { + uint64_t ts = strtoul(dash_first_msg_ut + 1, &endptr, 16); + if(endptr == dot_journal) { + first_seqnum = seqnum; + first_writer_id = writer; + first_ut = ts; + } + } + } + } + } + } + } + } + } + + jf->first_seqnum = first_seqnum; + jf->last_seqnum = last_seqnum; + + jf->first_writer_id = first_writer_id; + jf->last_writer_id = last_writer_id; + + jf->msg_first_ut = first_ut; + jf->msg_last_ut = last_ut; + + if(!jf->msg_last_ut) + jf->msg_last_ut = jf->file_last_modified_ut; + + if(last_seqnum > first_seqnum) { + if(!sd_id128_equal(first_writer_id, last_writer_id)) { + jf->messages_in_file = 0; + nd_log(NDLS_COLLECTORS, NDLP_NOTICE, + "The writers of the first and the last message in file '%s' differ." + , filename); + } + else + jf->messages_in_file = last_seqnum - first_seqnum + 1; + } + else + jf->messages_in_file = 0; + +// if(!jf->messages_in_file) +// journal_file_get_header_from_journalctl(filename, jf); + + journal_file_get_boot_id_annotations(j, jf); + sd_journal_close(j); + fstat_cache_disable_on_thread(); + + jf->last_scan_header_vs_last_modified_ut = jf->file_last_modified_ut; + + nd_log(NDLS_COLLECTORS, NDLP_DEBUG, + "Journal file header updated '%s'", + jf->filename); +} + +static STRING *string_strdupz_source(const char *s, const char *e, size_t max_len, const char *prefix) { + char buf[max_len]; + size_t len; + char *dst = buf; + + if(prefix) { + len = strlen(prefix); + memcpy(buf, prefix, len); + dst = &buf[len]; + max_len -= len; + } + + len = e - s; + if(len >= max_len) + len = max_len - 1; + memcpy(dst, s, len); + dst[len] = '\0'; + buf[max_len - 1] = '\0'; + + for(size_t i = 0; buf[i] ;i++) + if(!isalnum(buf[i]) && buf[i] != '-' && buf[i] != '.' && buf[i] != ':') + buf[i] = '_'; + + return string_strdupz(buf); +} + +static void files_registry_insert_cb(const DICTIONARY_ITEM *item, void *value, void *data __maybe_unused) { + struct journal_file *jf = value; + jf->filename = dictionary_acquired_item_name(item); + jf->filename_len = strlen(jf->filename); + jf->source_type = SDJF_ALL; + + // based on the filename + // decide the source to show to the user + const char *s = strrchr(jf->filename, '/'); + if(s) { + if(strstr(jf->filename, "/remote/")) { + jf->source_type |= SDJF_REMOTE_ALL; + + if(strncmp(s, "/remote-", 8) == 0) { + s = &s[8]; // skip "/remote-" + + char *e = strchr(s, '@'); + if(!e) + e = strstr(s, ".journal"); + + if(e) { + const char *d = s; + for(; d < e && (isdigit(*d) || *d == '.' || *d == ':') ; d++) ; + if(d == e) { + // a valid IP address + char ip[e - s + 1]; + memcpy(ip, s, e - s); + ip[e - s] = '\0'; + char buf[SYSTEMD_JOURNAL_MAX_SOURCE_LEN]; + if(ip_to_hostname(ip, buf, sizeof(buf))) + jf->source = string_strdupz_source(buf, &buf[strlen(buf)], SYSTEMD_JOURNAL_MAX_SOURCE_LEN, "remote-"); + else { + internal_error(true, "Cannot find the hostname for IP '%s'", ip); + jf->source = string_strdupz_source(s, e, SYSTEMD_JOURNAL_MAX_SOURCE_LEN, "remote-"); + } + } + else + jf->source = string_strdupz_source(s, e, SYSTEMD_JOURNAL_MAX_SOURCE_LEN, "remote-"); + } + } + } + else { + jf->source_type |= SDJF_LOCAL_ALL; + + const char *t = s - 1; + while(t >= jf->filename && *t != '.' && *t != '/') + t--; + + if(t >= jf->filename && *t == '.') { + jf->source_type |= SDJF_LOCAL_NAMESPACE; + jf->source = string_strdupz_source(t + 1, s, SYSTEMD_JOURNAL_MAX_SOURCE_LEN, "namespace-"); + } + else if(strncmp(s, "/system", 7) == 0) + jf->source_type |= SDJF_LOCAL_SYSTEM; + + else if(strncmp(s, "/user", 5) == 0) + jf->source_type |= SDJF_LOCAL_USER; + + else + jf->source_type |= SDJF_LOCAL_OTHER; + } + } + else + jf->source_type |= SDJF_LOCAL_ALL | SDJF_LOCAL_OTHER; + + jf->msg_last_ut = jf->file_last_modified_ut; + + nd_log(NDLS_COLLECTORS, NDLP_DEBUG, + "Journal file added to the journal files registry: '%s'", + jf->filename); +} + +static bool files_registry_conflict_cb(const DICTIONARY_ITEM *item, void *old_value, void *new_value, void *data __maybe_unused) { + struct journal_file *jf = old_value; + struct journal_file *njf = new_value; + + if(njf->last_scan_monotonic_ut > jf->last_scan_monotonic_ut) + jf->last_scan_monotonic_ut = njf->last_scan_monotonic_ut; + + if(njf->file_last_modified_ut > jf->file_last_modified_ut) { + jf->file_last_modified_ut = njf->file_last_modified_ut; + jf->size = njf->size; + + jf->msg_last_ut = jf->file_last_modified_ut; + + nd_log(NDLS_COLLECTORS, NDLP_DEBUG, + "Journal file updated to the journal files registry '%s'", + jf->filename); + } + + return false; +} + +struct journal_file_source { + usec_t first_ut; + usec_t last_ut; + size_t count; + uint64_t size; +}; + +static void human_readable_size_ib(uint64_t size, char *dst, size_t dst_len) { + if(size > 1024ULL * 1024 * 1024 * 1024) + snprintfz(dst, dst_len, "%0.2f TiB", (double)size / 1024.0 / 1024.0 / 1024.0 / 1024.0); + else if(size > 1024ULL * 1024 * 1024) + snprintfz(dst, dst_len, "%0.2f GiB", (double)size / 1024.0 / 1024.0 / 1024.0); + else if(size > 1024ULL * 1024) + snprintfz(dst, dst_len, "%0.2f MiB", (double)size / 1024.0 / 1024.0); + else if(size > 1024ULL) + snprintfz(dst, dst_len, "%0.2f KiB", (double)size / 1024.0); + else + snprintfz(dst, dst_len, "%"PRIu64" B", size); +} + +#define print_duration(dst, dst_len, pos, remaining, duration, one, many, printed) do { \ + if((remaining) > (duration)) { \ + uint64_t _count = (remaining) / (duration); \ + uint64_t _rem = (remaining) - (_count * (duration)); \ + (pos) += snprintfz(&(dst)[pos], (dst_len) - (pos), "%s%s%"PRIu64" %s", (printed) ? ", " : "", _rem ? "" : "and ", _count, _count > 1 ? (many) : (one)); \ + (remaining) = _rem; \ + (printed) = true; \ + } \ +} while(0) + +static void human_readable_duration_s(time_t duration_s, char *dst, size_t dst_len) { + if(duration_s < 0) + duration_s = -duration_s; + + size_t pos = 0; + dst[0] = 0 ; + + bool printed = false; + print_duration(dst, dst_len, pos, duration_s, 86400 * 365, "year", "years", printed); + print_duration(dst, dst_len, pos, duration_s, 86400 * 30, "month", "months", printed); + print_duration(dst, dst_len, pos, duration_s, 86400 * 1, "day", "days", printed); + print_duration(dst, dst_len, pos, duration_s, 3600 * 1, "hour", "hours", printed); + print_duration(dst, dst_len, pos, duration_s, 60 * 1, "min", "mins", printed); + print_duration(dst, dst_len, pos, duration_s, 1, "sec", "secs", printed); +} + +static int journal_file_to_json_array_cb(const DICTIONARY_ITEM *item, void *entry, void *data) { + struct journal_file_source *jfs = entry; + BUFFER *wb = data; + + const char *name = dictionary_acquired_item_name(item); + + buffer_json_add_array_item_object(wb); + { + char size_for_humans[100]; + human_readable_size_ib(jfs->size, size_for_humans, sizeof(size_for_humans)); + + char duration_for_humans[1024]; + human_readable_duration_s((time_t)((jfs->last_ut - jfs->first_ut) / USEC_PER_SEC), + duration_for_humans, sizeof(duration_for_humans)); + + char info[1024]; + snprintfz(info, sizeof(info), "%zu files, with a total size of %s, covering %s", + jfs->count, size_for_humans, duration_for_humans); + + buffer_json_member_add_string(wb, "id", name); + buffer_json_member_add_string(wb, "name", name); + buffer_json_member_add_string(wb, "pill", size_for_humans); + buffer_json_member_add_string(wb, "info", info); + } + buffer_json_object_close(wb); // options object + + return 1; +} + +static bool journal_file_merge_sizes(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value , void *data __maybe_unused) { + struct journal_file_source *jfs = old_value, *njfs = new_value; + jfs->count += njfs->count; + jfs->size += njfs->size; + + if(njfs->first_ut && njfs->first_ut < jfs->first_ut) + jfs->first_ut = njfs->first_ut; + + if(njfs->last_ut && njfs->last_ut > jfs->last_ut) + jfs->last_ut = njfs->last_ut; + + return false; +} + +void available_journal_file_sources_to_json_array(BUFFER *wb) { + DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_NAME_LINK_DONT_CLONE|DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_conflict_callback(dict, journal_file_merge_sizes, NULL); + + struct journal_file_source t = { 0 }; + + struct journal_file *jf; + dfe_start_read(journal_files_registry, jf) { + t.first_ut = jf->msg_first_ut; + t.last_ut = jf->msg_last_ut; + t.count = 1; + t.size = jf->size; + + dictionary_set(dict, SDJF_SOURCE_ALL_NAME, &t, sizeof(t)); + + if(jf->source_type & SDJF_LOCAL_ALL) + dictionary_set(dict, SDJF_SOURCE_LOCAL_NAME, &t, sizeof(t)); + if(jf->source_type & SDJF_LOCAL_SYSTEM) + dictionary_set(dict, SDJF_SOURCE_LOCAL_SYSTEM_NAME, &t, sizeof(t)); + if(jf->source_type & SDJF_LOCAL_USER) + dictionary_set(dict, SDJF_SOURCE_LOCAL_USERS_NAME, &t, sizeof(t)); + if(jf->source_type & SDJF_LOCAL_OTHER) + dictionary_set(dict, SDJF_SOURCE_LOCAL_OTHER_NAME, &t, sizeof(t)); + if(jf->source_type & SDJF_LOCAL_NAMESPACE) + dictionary_set(dict, SDJF_SOURCE_NAMESPACES_NAME, &t, sizeof(t)); + if(jf->source_type & SDJF_REMOTE_ALL) + dictionary_set(dict, SDJF_SOURCE_REMOTES_NAME, &t, sizeof(t)); + if(jf->source) + dictionary_set(dict, string2str(jf->source), &t, sizeof(t)); + } + dfe_done(jf); + + dictionary_sorted_walkthrough_read(dict, journal_file_to_json_array_cb, wb); + + dictionary_destroy(dict); +} + +static void files_registry_delete_cb(const DICTIONARY_ITEM *item, void *value, void *data __maybe_unused) { + struct journal_file *jf = value; (void)jf; + const char *filename = dictionary_acquired_item_name(item); (void)filename; + + internal_error(true, "removed journal file '%s'", filename); + string_freez(jf->source); +} + +void journal_directory_scan_recursively(DICTIONARY *files, DICTIONARY *dirs, const char *dirname, int depth) { + static const char *ext = ".journal"; + static const ssize_t ext_len = sizeof(".journal") - 1; + + if (depth > VAR_LOG_JOURNAL_MAX_DEPTH) + return; + + DIR *dir; + struct dirent *entry; + char full_path[FILENAME_MAX]; + + // Open the directory. + if ((dir = opendir(dirname)) == NULL) { + if(errno != ENOENT && errno != ENOTDIR) + netdata_log_error("Cannot opendir() '%s'", dirname); + return; + } + + bool existing = false; + bool *found = dictionary_set(dirs, dirname, &existing, sizeof(existing)); + if(*found) return; + *found = true; + + // Read each entry in the directory. + while ((entry = readdir(dir)) != NULL) { + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) + continue; + + ssize_t len = snprintfz(full_path, sizeof(full_path), "%s/%s", dirname, entry->d_name); + + if (entry->d_type == DT_DIR) { + journal_directory_scan_recursively(files, dirs, full_path, depth++); + } + else if (entry->d_type == DT_REG && len > ext_len && strcmp(full_path + len - ext_len, ext) == 0) { + if(files) + dictionary_set(files, full_path, NULL, 0); + + send_newline_and_flush(); + } + else if (entry->d_type == DT_LNK) { + struct stat info; + if (stat(full_path, &info) == -1) + continue; + + if (S_ISDIR(info.st_mode)) { + // The symbolic link points to a directory + char resolved_path[FILENAME_MAX + 1]; + if (realpath(full_path, resolved_path) != NULL) { + journal_directory_scan_recursively(files, dirs, resolved_path, depth++); + } + } + else if(S_ISREG(info.st_mode) && len > ext_len && strcmp(full_path + len - ext_len, ext) == 0) { + if(files) + dictionary_set(files, full_path, NULL, 0); + + send_newline_and_flush(); + } + } + } + + closedir(dir); +} + +static size_t journal_files_scans = 0; +bool journal_files_completed_once(void) { + return journal_files_scans > 0; +} + +int filenames_compar(const void *a, const void *b) { + const char *p1 = *(const char **)a; + const char *p2 = *(const char **)b; + + const char *at1 = strchr(p1, '@'); + const char *at2 = strchr(p2, '@'); + + if(!at1 && at2) + return -1; + + if(at1 && !at2) + return 1; + + if(!at1 && !at2) + return strcmp(p1, p2); + + const char *dash1 = strrchr(at1, '-'); + const char *dash2 = strrchr(at2, '-'); + + if(!dash1 || !dash2) + return strcmp(p1, p2); + + uint64_t ts1 = strtoul(dash1 + 1, NULL, 16); + uint64_t ts2 = strtoul(dash2 + 1, NULL, 16); + + if(ts1 > ts2) + return -1; + + if(ts1 < ts2) + return 1; + + return -strcmp(p1, p2); +} + +void journal_files_registry_update(void) { + static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER; + + if(spinlock_trylock(&spinlock)) { + usec_t scan_monotonic_ut = now_monotonic_usec(); + + DICTIONARY *files = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE); + DICTIONARY *dirs = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_DONT_OVERWRITE_VALUE); + + for(unsigned i = 0; i < MAX_JOURNAL_DIRECTORIES; i++) { + if(!journal_directories[i].path) break; + journal_directory_scan_recursively(files, dirs, journal_directories[i].path, 0); + } + + const char **array = mallocz(sizeof(const char *) * dictionary_entries(files)); + size_t used = 0; + + void *x; + dfe_start_read(files, x) { + if(used >= dictionary_entries(files)) continue; + array[used++] = x_dfe.name; + } + dfe_done(x); + + qsort(array, used, sizeof(const char *), filenames_compar); + + for(size_t i = 0; i < used ;i++) { + const char *full_path = array[i]; + + struct stat info; + if (stat(full_path, &info) == -1) + continue; + + struct journal_file t = { + .file_last_modified_ut = info.st_mtim.tv_sec * USEC_PER_SEC + info.st_mtim.tv_nsec / NSEC_PER_USEC, + .last_scan_monotonic_ut = scan_monotonic_ut, + .size = info.st_size, + .max_journal_vs_realtime_delta_ut = JOURNAL_VS_REALTIME_DELTA_DEFAULT_UT, + }; + struct journal_file *jf = dictionary_set(journal_files_registry, full_path, &t, sizeof(t)); + journal_file_update_header(jf->filename, jf); + } + freez(array); + dictionary_destroy(files); + dictionary_destroy(dirs); + + struct journal_file *jf; + dfe_start_write(journal_files_registry, jf){ + if(jf->last_scan_monotonic_ut < scan_monotonic_ut) + dictionary_del(journal_files_registry, jf_dfe.name); + } + dfe_done(jf); + + journal_files_scans++; + spinlock_unlock(&spinlock); + + internal_error(true, + "Journal library scan completed in %.3f ms", + (double)(now_monotonic_usec() - scan_monotonic_ut) / (double)USEC_PER_MS); + } +} + +// ---------------------------------------------------------------------------- + +int journal_file_dict_items_backward_compar(const void *a, const void *b) { + const DICTIONARY_ITEM **ad = (const DICTIONARY_ITEM **)a, **bd = (const DICTIONARY_ITEM **)b; + struct journal_file *jfa = dictionary_acquired_item_value(*ad); + struct journal_file *jfb = dictionary_acquired_item_value(*bd); + + // compare the last message timestamps + if(jfa->msg_last_ut < jfb->msg_last_ut) + return 1; + + if(jfa->msg_last_ut > jfb->msg_last_ut) + return -1; + + // compare the file last modification timestamps + if(jfa->file_last_modified_ut < jfb->file_last_modified_ut) + return 1; + + if(jfa->file_last_modified_ut > jfb->file_last_modified_ut) + return -1; + + // compare the first message timestamps + if(jfa->msg_first_ut < jfb->msg_first_ut) + return 1; + + if(jfa->msg_first_ut > jfb->msg_first_ut) + return -1; + + return 0; +} + +int journal_file_dict_items_forward_compar(const void *a, const void *b) { + return -journal_file_dict_items_backward_compar(a, b); +} + +static bool boot_id_conflict_cb(const DICTIONARY_ITEM *item, void *old_value, void *new_value, void *data __maybe_unused) { + usec_t *old_usec = old_value; + usec_t *new_usec = new_value; + + if(*new_usec < *old_usec) { + *old_usec = *new_usec; + return true; + } + + return false; +} + +void journal_init_files_and_directories(void) { + unsigned d = 0; + + // ------------------------------------------------------------------------ + // setup the journal directories + + journal_directories[d++].path = strdupz("/run/log/journal"); + journal_directories[d++].path = strdupz("/var/log/journal"); + + if(*netdata_configured_host_prefix) { + char path[PATH_MAX]; + snprintfz(path, sizeof(path), "%s/var/log/journal", netdata_configured_host_prefix); + journal_directories[d++].path = strdupz(path); + snprintfz(path, sizeof(path), "%s/run/log/journal", netdata_configured_host_prefix); + journal_directories[d++].path = strdupz(path); + } + + // terminate the list + journal_directories[d].path = NULL; + + // ------------------------------------------------------------------------ + // initialize the used hashes files registry + + used_hashes_registry = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + + systemd_journal_session = (now_realtime_usec() / USEC_PER_SEC) * USEC_PER_SEC; + + journal_files_registry = dictionary_create_advanced( + DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct journal_file)); + + dictionary_register_insert_callback(journal_files_registry, files_registry_insert_cb, NULL); + dictionary_register_delete_callback(journal_files_registry, files_registry_delete_cb, NULL); + dictionary_register_conflict_callback(journal_files_registry, files_registry_conflict_cb, NULL); + + boot_ids_to_first_ut = dictionary_create_advanced( + DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(usec_t)); + + dictionary_register_conflict_callback(boot_ids_to_first_ut, boot_id_conflict_cb, NULL); + +} diff --git a/collectors/systemd-journal.plugin/systemd-journal-fstat.c b/collectors/systemd-journal.plugin/systemd-journal-fstat.c new file mode 100644 index 000000000..45ea78174 --- /dev/null +++ b/collectors/systemd-journal.plugin/systemd-journal-fstat.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "systemd-internals.h" + + +// ---------------------------------------------------------------------------- +// fstat64 overloading to speed up libsystemd +// https://github.com/systemd/systemd/pull/29261 + +#include <dlfcn.h> +#include <sys/stat.h> + +#define FSTAT_CACHE_MAX 1024 +struct fdstat64_cache_entry { + bool enabled; + bool updated; + int err_no; + struct stat64 stat; + int ret; + size_t cached_count; + size_t session; +}; + +struct fdstat64_cache_entry fstat64_cache[FSTAT_CACHE_MAX] = {0 }; +__thread size_t fstat_thread_calls = 0; +__thread size_t fstat_thread_cached_responses = 0; +static __thread bool enable_thread_fstat = false; +static __thread size_t fstat_caching_thread_session = 0; +static size_t fstat_caching_global_session = 0; + +void fstat_cache_enable_on_thread(void) { + fstat_caching_thread_session = __atomic_add_fetch(&fstat_caching_global_session, 1, __ATOMIC_ACQUIRE); + enable_thread_fstat = true; +} + +void fstat_cache_disable_on_thread(void) { + fstat_caching_thread_session = __atomic_add_fetch(&fstat_caching_global_session, 1, __ATOMIC_RELEASE); + enable_thread_fstat = false; +} + +int fstat64(int fd, struct stat64 *buf) { + static int (*real_fstat)(int, struct stat64 *) = NULL; + if (!real_fstat) + real_fstat = dlsym(RTLD_NEXT, "fstat64"); + + fstat_thread_calls++; + + if(fd >= 0 && fd < FSTAT_CACHE_MAX) { + if(enable_thread_fstat && fstat64_cache[fd].session != fstat_caching_thread_session) { + fstat64_cache[fd].session = fstat_caching_thread_session; + fstat64_cache[fd].enabled = true; + fstat64_cache[fd].updated = false; + } + + if(fstat64_cache[fd].enabled && fstat64_cache[fd].updated && fstat64_cache[fd].session == fstat_caching_thread_session) { + fstat_thread_cached_responses++; + errno = fstat64_cache[fd].err_no; + *buf = fstat64_cache[fd].stat; + fstat64_cache[fd].cached_count++; + return fstat64_cache[fd].ret; + } + } + + int ret = real_fstat(fd, buf); + + if(fd >= 0 && fd < FSTAT_CACHE_MAX && fstat64_cache[fd].enabled && fstat64_cache[fd].session == fstat_caching_thread_session) { + fstat64_cache[fd].ret = ret; + fstat64_cache[fd].updated = true; + fstat64_cache[fd].err_no = errno; + fstat64_cache[fd].stat = *buf; + } + + return ret; +} diff --git a/collectors/systemd-journal.plugin/systemd-journal-self-signed-certs.sh b/collectors/systemd-journal.plugin/systemd-journal-self-signed-certs.sh new file mode 100755 index 000000000..ada735f1f --- /dev/null +++ b/collectors/systemd-journal.plugin/systemd-journal-self-signed-certs.sh @@ -0,0 +1,267 @@ +#!/usr/bin/env bash + +me="${0}" +dst="/etc/ssl/systemd-journal" + +show_usage() { + cat <<EOFUSAGE + +${me} [options] server_name alias1 alias2 ... + +server_name + the canonical name of the server on the certificates + +aliasN + a hostname or IP this server is reachable with + DNS names should be like DNS:hostname + IPs should be like IP:1.2.3.4 + Any number of aliases are accepted per server + +options can be: + + -h, --help + show this message + + -d, --directory DIRECTORY + change the default certificates install dir + default: ${dst} + +EOFUSAGE +} + +while [ ! -z "${1}" ]; do + case "${1}" in + -h|--help) + show_usage + exit 0 + ;; + + -d|--directory) + dst="${2}" + echo >&2 "directory set to: ${dst}" + shift + ;; + + *) + break 2 + ;; + esac + + shift +done + +if [ -z "${1}" ]; then + show_usage + exit 1 +fi + + +# Define a regular expression pattern for a valid canonical name +valid_canonical_name_pattern="^[a-zA-Z0-9][a-zA-Z0-9.-]+$" + +# Check if ${1} matches the pattern +if [[ ! "${1}" =~ ${valid_canonical_name_pattern} ]]; then + echo "Certificate name '${1}' is not valid." + exit 1 +fi + +# ----------------------------------------------------------------------------- +# Create the CA + +# stop on all errors +set -e + +if [ $UID -ne 0 ] +then + echo >&2 "Hey! sudo me: sudo ${me}" + exit 1 +fi + +if ! getent group systemd-journal >/dev/null 2>&1; then + echo >&2 "Missing system group: systemd-journal. Did you install systemd-journald?" + exit 1 +fi + +if ! getent passwd systemd-journal-remote >/dev/null 2>&1; then + echo >&2 "Missing system user: systemd-journal-remote. Did you install systemd-journal-remote?" + exit 1 +fi + +if [ ! -d "${dst}" ] +then + mkdir -p "${dst}" + chown systemd-journal-remote:systemd-journal "${dst}" + chmod 750 "${dst}" +fi + +cd "${dst}" + +test ! -f ca.conf && cat >ca.conf <<EOF +[ ca ] +default_ca = CA_default +[ CA_default ] +new_certs_dir = . +certificate = ca.pem +database = ./index +private_key = ca.key +serial = ./serial +default_days = 3650 +default_md = default +policy = policy_anything +[ policy_anything ] +countryName = optional +stateOrProvinceName = optional +localityName = optional +organizationName = optional +organizationalUnitName = optional +commonName = supplied +emailAddress = optional +EOF + +test ! -f index && touch index +test ! -f serial && echo 0001 >serial + +if [ ! -f ca.pem -o ! -f ca.key ]; then + echo >&2 "Generating ca.pem ..." + + openssl req -newkey rsa:2048 -days 3650 -x509 -nodes -out ca.pem -keyout ca.key -subj "/CN=systemd-journal-remote-ca/" + chown systemd-journal-remote:systemd-journal ca.pem + chmod 0640 ca.pem +fi + +# ----------------------------------------------------------------------------- +# Create a server certificate + +generate_server_certificate() { + local cn="${1}"; shift + + if [ ! -f "${cn}.pem" -o ! -f "${cn}.key" ]; then + if [ -z "${*}" ]; then + echo >"${cn}.conf" + else + echo "subjectAltName = $(echo "${@}" | tr " " ",")" >"${cn}.conf" + fi + + echo >&2 "Generating server: ${cn}.pem and ${cn}.key ..." + + openssl req -newkey rsa:2048 -nodes -out "${cn}.csr" -keyout "${cn}.key" -subj "/CN=${cn}/" + openssl ca -batch -config ca.conf -notext -in "${cn}.csr" -out "${cn}.pem" -extfile "${cn}.conf" + else + echo >&2 "certificates for ${cn} are already available." + fi + + chown systemd-journal-remote:systemd-journal "${cn}.pem" "${cn}.key" + chmod 0640 "${cn}.pem" "${cn}.key" +} + + +# ----------------------------------------------------------------------------- +# Create a script to install the certificate on each server + +generate_install_script() { + local cn="${1}" + local dst="/etc/ssl/systemd-journal" + + cat >"runme-on-${cn}.sh" <<EOFC1 +#!/usr/bin/env bash + +# stop on all errors +set -e + +if [ \$UID -ne 0 ]; then + echo >&2 "Hey! sudo me: sudo \${0}" + exit 1 +fi + +# make sure the systemd-journal group exists +# all certificates will be owned by this group +if ! getent group systemd-journal >/dev/null 2>&1; then + echo >&2 "Missing system group: systemd-journal. Did you install systemd-journald?" + exit 1 +fi + +if ! getent passwd systemd-journal-remote >/dev/null 2>&1; then + echo >&2 "Missing system user: systemd-journal-remote. Did you install systemd-journal-remote?" + exit 1 +fi + +if [ ! -d ${dst} ]; then + echo >&2 "creating directory: ${dst}" + mkdir -p "${dst}" +fi +chown systemd-journal-remote:systemd-journal "${dst}" +chmod 750 "${dst}" +cd "${dst}" + +echo >&2 "saving trusted certificate file as: ${dst}/ca.pem" +cat >ca.pem <<EOFCAPEM +$(cat ca.pem) +EOFCAPEM + +chown systemd-journal-remote:systemd-journal ca.pem +chmod 0640 ca.pem + +echo >&2 "saving server ${cn} certificate file as: ${dst}/${cn}.pem" +cat >"${cn}.pem" <<EOFSERPEM +$(cat "${cn}.pem") +EOFSERPEM + +chown systemd-journal-remote:systemd-journal "${cn}.pem" +chmod 0640 "${cn}.pem" + +echo >&2 "saving server ${cn} key file as: ${dst}/${cn}.key" +cat >"${cn}.key" <<EOFSERKEY +$(cat "${cn}.key") +EOFSERKEY + +chown systemd-journal-remote:systemd-journal "${cn}.key" +chmod 0640 "${cn}.key" + +for cfg in /etc/systemd/journal-remote.conf /etc/systemd/journal-upload.conf +do + if [ -f \${cfg} ]; then + # keep a backup of the file + test ! -f \${cfg}.orig && cp \${cfg} \${cfg}.orig + + # fix its contents + echo >&2 "updating the certificates in \${cfg}" + sed -i "s|^#\\?\\s*ServerKeyFile=.*$|ServerKeyFile=${dst}/${cn}.key|" \${cfg} + sed -i "s|^#\\?\\s*ServerCertificateFile=.*$|ServerCertificateFile=${dst}/${cn}.pem|" \${cfg} + sed -i "s|^#\\?\\s*TrustedCertificateFile=.*$|TrustedCertificateFile=${dst}/ca.pem|" \${cfg} + fi +done + +echo >&2 "certificates installed - you may need to restart services to active them" +echo >&2 +echo >&2 "If this is a central server:" +echo >&2 "# systemctl restart systemd-journal-remote.socket" +echo >&2 +echo >&2 "If this is a passive client:" +echo >&2 "# systemctl restart systemd-journal-upload.service" +echo >&2 +echo >&2 "If this is an active client:" +echo >&2 "# systemctl restart systemd-journal-gateway.socket" +EOFC1 + + chmod 0700 "runme-on-${cn}.sh" +} + +# ----------------------------------------------------------------------------- +# Create the client certificates + +generate_server_certificate "${@}" +generate_install_script "${1}" + + +# Set ANSI escape code for colors +yellow_color="\033[1;33m" +green_color="\033[0;32m" +# Reset ANSI color after the message +reset_color="\033[0m" + + +echo >&2 -e "use this script to install it on ${1}: ${yellow_color}$(ls ${dst}/runme-on-${1}.sh)${reset_color}" +echo >&2 "copy it to your server ${1}, like this:" +echo >&2 -e "# ${green_color}scp ${dst}/runme-on-${1}.sh ${1}:/tmp/${reset_color}" +echo >&2 "and then run it on that server to install the certificates" +echo >&2 diff --git a/collectors/systemd-journal.plugin/systemd-journal-watcher.c b/collectors/systemd-journal.plugin/systemd-journal-watcher.c new file mode 100644 index 000000000..ed41f6247 --- /dev/null +++ b/collectors/systemd-journal.plugin/systemd-journal-watcher.c @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "systemd-internals.h" +#include <sys/inotify.h> + +#define EVENT_SIZE (sizeof(struct inotify_event)) +#define INITIAL_WATCHES 256 + +#define WATCH_FOR (IN_CREATE | IN_MODIFY | IN_DELETE | IN_DELETE_SELF | IN_MOVED_FROM | IN_MOVED_TO | IN_UNMOUNT) + +typedef struct watch_entry { + int slot; + + int wd; // Watch descriptor + char *path; // Dynamically allocated path + + struct watch_entry *next; // for the free list +} WatchEntry; + +typedef struct { + WatchEntry *watchList; + WatchEntry *freeList; + int watchCount; + int watchListSize; + + size_t errors; + + DICTIONARY *pending; +} Watcher; + +static WatchEntry *get_slot(Watcher *watcher) { + WatchEntry *t; + + if (watcher->freeList != NULL) { + t = watcher->freeList; + watcher->freeList = t->next; + t->next = NULL; + return t; + } + + if (watcher->watchCount == watcher->watchListSize) { + watcher->watchListSize *= 2; + watcher->watchList = reallocz(watcher->watchList, watcher->watchListSize * sizeof(WatchEntry)); + } + + watcher->watchList[watcher->watchCount] = (WatchEntry){ + .slot = watcher->watchCount, + .wd = -1, + .path = NULL, + .next = NULL, + }; + t = &watcher->watchList[watcher->watchCount]; + watcher->watchCount++; + + return t; +} + +static void free_slot(Watcher *watcher, WatchEntry *t) { + t->wd = -1; + freez(t->path); + t->path = NULL; + + // link it to the free list + t->next = watcher->freeList; + watcher->freeList = t; +} + +static int add_watch(Watcher *watcher, int inotifyFd, const char *path) { + WatchEntry *t = get_slot(watcher); + + t->wd = inotify_add_watch(inotifyFd, path, WATCH_FOR); + if (t->wd == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "JOURNAL WATCHER: cannot watch directory: '%s'", + path); + + free_slot(watcher, t); + + struct stat info; + if(stat(path, &info) == 0 && S_ISDIR(info.st_mode)) { + // the directory exists, but we failed to add the watch + // increase errors + watcher->errors++; + } + } + else { + t->path = strdupz(path); + + nd_log(NDLS_COLLECTORS, NDLP_DEBUG, + "JOURNAL WATCHER: watching directory: '%s'", + path); + + } + return t->wd; +} + +static void remove_watch(Watcher *watcher, int inotifyFd, int wd) { + int i; + for (i = 0; i < watcher->watchCount; ++i) { + if (watcher->watchList[i].wd == wd) { + + nd_log(NDLS_COLLECTORS, NDLP_DEBUG, + "JOURNAL WATCHER: removing watch from directory: '%s'", + watcher->watchList[i].path); + + inotify_rm_watch(inotifyFd, watcher->watchList[i].wd); + free_slot(watcher, &watcher->watchList[i]); + return; + } + } + + nd_log(NDLS_COLLECTORS, NDLP_WARNING, + "JOURNAL WATCHER: cannot find directory watch %d to remove.", + wd); +} + +static void free_watches(Watcher *watcher, int inotifyFd) { + for (int i = 0; i < watcher->watchCount; ++i) { + if (watcher->watchList[i].wd != -1) { + inotify_rm_watch(inotifyFd, watcher->watchList[i].wd); + free_slot(watcher, &watcher->watchList[i]); + } + } + freez(watcher->watchList); + watcher->watchList = NULL; + + dictionary_destroy(watcher->pending); + watcher->pending = NULL; +} + +static char* get_path_from_wd(Watcher *watcher, int wd) { + for (int i = 0; i < watcher->watchCount; ++i) { + if (watcher->watchList[i].wd == wd) + return watcher->watchList[i].path; + } + return NULL; +} + +static bool is_directory_watched(Watcher *watcher, const char *path) { + for (int i = 0; i < watcher->watchCount; ++i) { + if (watcher->watchList[i].wd != -1 && strcmp(watcher->watchList[i].path, path) == 0) { + return true; + } + } + return false; +} + +static void watch_directory_and_subdirectories(Watcher *watcher, int inotifyFd, const char *basePath) { + DICTIONARY *dirs = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE); + + journal_directory_scan_recursively(NULL, dirs, basePath, 0); + + void *x; + dfe_start_read(dirs, x) { + const char *dirname = x_dfe.name; + // Check if this directory is already being watched + if (!is_directory_watched(watcher, dirname)) { + add_watch(watcher, inotifyFd, dirname); + } + } + dfe_done(x); + + dictionary_destroy(dirs); +} + +static bool is_subpath(const char *path, const char *subpath) { + // Use strncmp to compare the paths + if (strncmp(path, subpath, strlen(path)) == 0) { + // Ensure that the next character is a '/' or '\0' + char next_char = subpath[strlen(path)]; + return next_char == '/' || next_char == '\0'; + } + + return false; +} + +void remove_directory_watch(Watcher *watcher, int inotifyFd, const char *dirPath) { + for (int i = 0; i < watcher->watchCount; ++i) { + WatchEntry *t = &watcher->watchList[i]; + if (t->wd != -1 && is_subpath(t->path, dirPath)) { + inotify_rm_watch(inotifyFd, t->wd); + free_slot(watcher, t); + } + } + + struct journal_file *jf; + dfe_start_write(journal_files_registry, jf) { + if(is_subpath(jf->filename, dirPath)) + dictionary_del(journal_files_registry, jf->filename); + } + dfe_done(jf); + + dictionary_garbage_collect(journal_files_registry); +} + +void process_event(Watcher *watcher, int inotifyFd, struct inotify_event *event) { + if(!event->len) { + nd_log(NDLS_COLLECTORS, NDLP_NOTICE + , "JOURNAL WATCHER: received event with mask %u and len %u (this is zero) for path: '%s' - ignoring it." + , event->mask, event->len, event->name); + return; + } + + char *dirPath = get_path_from_wd(watcher, event->wd); + if(!dirPath) { + nd_log(NDLS_COLLECTORS, NDLP_NOTICE, + "JOURNAL WATCHER: received event with mask %u and len %u for path: '%s' - " + "but we can't find its watch descriptor - ignoring it." + , event->mask, event->len, event->name); + return; + } + + if(event->mask & IN_DELETE_SELF) { + remove_watch(watcher, inotifyFd, event->wd); + return; + } + + static __thread char fullPath[PATH_MAX]; + snprintfz(fullPath, sizeof(fullPath), "%s/%s", dirPath, event->name); + // fullPath contains the full path to the file + + size_t len = strlen(event->name); + + if(event->mask & IN_ISDIR) { + if (event->mask & (IN_DELETE | IN_MOVED_FROM)) { + // A directory is deleted or moved out + nd_log(NDLS_COLLECTORS, NDLP_DEBUG, + "JOURNAL WATCHER: Directory deleted or moved out: '%s'", + fullPath); + + // Remove the watch - implement this function based on how you manage your watches + remove_directory_watch(watcher, inotifyFd, fullPath); + } + else if (event->mask & (IN_CREATE | IN_MOVED_TO)) { + // A new directory is created or moved in + nd_log(NDLS_COLLECTORS, NDLP_DEBUG, + "JOURNAL WATCHER: New directory created or moved in: '%s'", + fullPath); + + // Start watching the new directory - recursive watch + watch_directory_and_subdirectories(watcher, inotifyFd, fullPath); + } + else + nd_log(NDLS_COLLECTORS, NDLP_WARNING, + "JOURNAL WATCHER: Received unhandled event with mask %u for directory '%s'", + event->mask, fullPath); + } + else if(len > sizeof(".journal") - 1 && strcmp(&event->name[len - (sizeof(".journal") - 1)], ".journal") == 0) { + // It is a file that ends in .journal + // add it to our pending list + dictionary_set(watcher->pending, fullPath, NULL, 0); + } + else + nd_log(NDLS_COLLECTORS, NDLP_DEBUG, + "JOURNAL WATCHER: ignoring event with mask %u for file '%s'", + event->mask, fullPath); +} + +static void process_pending(Watcher *watcher) { + void *x; + dfe_start_write(watcher->pending, x) { + struct stat info; + const char *fullPath = x_dfe.name; + + if(stat(fullPath, &info) != 0) { + nd_log(NDLS_COLLECTORS, NDLP_DEBUG, + "JOURNAL WATCHER: file '%s' no longer exists, removing it from the registry", + fullPath); + + dictionary_del(journal_files_registry, fullPath); + } + else if(S_ISREG(info.st_mode)) { + nd_log(NDLS_COLLECTORS, NDLP_DEBUG, + "JOURNAL WATCHER: file '%s' has been added/updated, updating the registry", + fullPath); + + struct journal_file t = { + .file_last_modified_ut = info.st_mtim.tv_sec * USEC_PER_SEC + + info.st_mtim.tv_nsec / NSEC_PER_USEC, + .last_scan_monotonic_ut = now_monotonic_usec(), + .size = info.st_size, + .max_journal_vs_realtime_delta_ut = JOURNAL_VS_REALTIME_DELTA_DEFAULT_UT, + }; + struct journal_file *jf = dictionary_set(journal_files_registry, fullPath, &t, sizeof(t)); + journal_file_update_header(jf->filename, jf); + } + + dictionary_del(watcher->pending, fullPath); + } + dfe_done(x); + + dictionary_garbage_collect(watcher->pending); +} + +void *journal_watcher_main(void *arg __maybe_unused) { + while(1) { + Watcher watcher = { + .watchList = mallocz(INITIAL_WATCHES * sizeof(WatchEntry)), + .freeList = NULL, + .watchCount = 0, + .watchListSize = INITIAL_WATCHES, + .pending = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE|DICT_OPTION_SINGLE_THREADED), + .errors = 0, + }; + + int inotifyFd = inotify_init(); + if (inotifyFd < 0) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "inotify_init() failed."); + free_watches(&watcher, inotifyFd); + return NULL; + } + + for (unsigned i = 0; i < MAX_JOURNAL_DIRECTORIES; i++) { + if (!journal_directories[i].path) break; + watch_directory_and_subdirectories(&watcher, inotifyFd, journal_directories[i].path); + } + + usec_t last_headers_update_ut = now_monotonic_usec(); + struct buffered_reader reader; + while (1) { + buffered_reader_ret_t rc = buffered_reader_read_timeout( + &reader, inotifyFd, SYSTEMD_JOURNAL_EXECUTE_WATCHER_PENDING_EVERY_MS, false); + + if (rc != BUFFERED_READER_READ_OK && rc != BUFFERED_READER_READ_POLL_TIMEOUT) { + nd_log(NDLS_COLLECTORS, NDLP_CRIT, + "JOURNAL WATCHER: cannot read inotify events, buffered_reader_read_timeout() returned %d - " + "restarting the watcher.", + rc); + break; + } + + if(rc == BUFFERED_READER_READ_OK) { + bool unmount_event = false; + + ssize_t i = 0; + while (i < reader.read_len) { + struct inotify_event *event = (struct inotify_event *) &reader.read_buffer[i]; + + if(event->mask & IN_UNMOUNT) { + unmount_event = true; + break; + } + + process_event(&watcher, inotifyFd, event); + i += (ssize_t)EVENT_SIZE + event->len; + } + + reader.read_buffer[0] = '\0'; + reader.read_len = 0; + reader.pos = 0; + + if(unmount_event) + break; + } + + usec_t ut = now_monotonic_usec(); + if (dictionary_entries(watcher.pending) && (rc == BUFFERED_READER_READ_POLL_TIMEOUT || + last_headers_update_ut + (SYSTEMD_JOURNAL_EXECUTE_WATCHER_PENDING_EVERY_MS * USEC_PER_MS) <= ut)) { + process_pending(&watcher); + last_headers_update_ut = ut; + } + + if(watcher.errors) { + nd_log(NDLS_COLLECTORS, NDLP_NOTICE, + "JOURNAL WATCHER: there were errors in setting up inotify watches - restarting the watcher."); + } + } + + close(inotifyFd); + free_watches(&watcher, inotifyFd); + + // this will scan the directories and cleanup the registry + journal_files_registry_update(); + + sleep_usec(5 * USEC_PER_SEC); + } + + return NULL; +} diff --git a/collectors/systemd-journal.plugin/systemd-journal.c b/collectors/systemd-journal.plugin/systemd-journal.c index 877371120..f812b2161 100644 --- a/collectors/systemd-journal.plugin/systemd-journal.c +++ b/collectors/systemd-journal.plugin/systemd-journal.c @@ -5,13 +5,7 @@ * GPL v3+ */ -#include "collectors/all.h" -#include "libnetdata/libnetdata.h" -#include "libnetdata/required_dummies.h" - -#include <linux/capability.h> -#include <systemd/sd-journal.h> -#include <syslog.h> +#include "systemd-internals.h" /* * TODO @@ -20,95 +14,17 @@ * */ - -// ---------------------------------------------------------------------------- -// fstat64 overloading to speed up libsystemd -// https://github.com/systemd/systemd/pull/29261 - -#define ND_SD_JOURNAL_OPEN_FLAGS (0) - -#include <dlfcn.h> -#include <sys/stat.h> - -#define FSTAT_CACHE_MAX 1024 -struct fdstat64_cache_entry { - bool enabled; - bool updated; - int err_no; - struct stat64 stat; - int ret; - size_t cached_count; - size_t session; -}; - -struct fdstat64_cache_entry fstat64_cache[FSTAT_CACHE_MAX] = {0 }; -static __thread size_t fstat_thread_calls = 0; -static __thread size_t fstat_thread_cached_responses = 0; -static __thread bool enable_thread_fstat = false; -static __thread size_t fstat_caching_thread_session = 0; -static size_t fstat_caching_global_session = 0; - -static void fstat_cache_enable_on_thread(void) { - fstat_caching_thread_session = __atomic_add_fetch(&fstat_caching_global_session, 1, __ATOMIC_ACQUIRE); - enable_thread_fstat = true; -} - -static void fstat_cache_disable_on_thread(void) { - fstat_caching_thread_session = __atomic_add_fetch(&fstat_caching_global_session, 1, __ATOMIC_RELEASE); - enable_thread_fstat = false; -} - -int fstat64(int fd, struct stat64 *buf) { - static int (*real_fstat)(int, struct stat64 *) = NULL; - if (!real_fstat) - real_fstat = dlsym(RTLD_NEXT, "fstat64"); - - fstat_thread_calls++; - - if(fd >= 0 && fd < FSTAT_CACHE_MAX) { - if(enable_thread_fstat && fstat64_cache[fd].session != fstat_caching_thread_session) { - fstat64_cache[fd].session = fstat_caching_thread_session; - fstat64_cache[fd].enabled = true; - fstat64_cache[fd].updated = false; - } - - if(fstat64_cache[fd].enabled && fstat64_cache[fd].updated && fstat64_cache[fd].session == fstat_caching_thread_session) { - fstat_thread_cached_responses++; - errno = fstat64_cache[fd].err_no; - *buf = fstat64_cache[fd].stat; - fstat64_cache[fd].cached_count++; - return fstat64_cache[fd].ret; - } - } - - int ret = real_fstat(fd, buf); - - if(fd >= 0 && fd < FSTAT_CACHE_MAX && fstat64_cache[fd].enabled) { - fstat64_cache[fd].ret = ret; - fstat64_cache[fd].updated = true; - fstat64_cache[fd].err_no = errno; - fstat64_cache[fd].stat = *buf; - fstat64_cache[fd].session = fstat_caching_thread_session; - } - - return ret; -} - -// ---------------------------------------------------------------------------- - #define FACET_MAX_VALUE_LENGTH 8192 -#define SYSTEMD_JOURNAL_MAX_SOURCE_LEN 64 #define SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION "View, search and analyze systemd journal entries." #define SYSTEMD_JOURNAL_FUNCTION_NAME "systemd-journal" #define SYSTEMD_JOURNAL_DEFAULT_TIMEOUT 60 -#define SYSTEMD_JOURNAL_MAX_PARAMS 100 +#define SYSTEMD_JOURNAL_MAX_PARAMS 1000 #define SYSTEMD_JOURNAL_DEFAULT_QUERY_DURATION (1 * 3600) #define SYSTEMD_JOURNAL_DEFAULT_ITEMS_PER_QUERY 200 -#define SYSTEMD_JOURNAL_WORKER_THREADS 5 - -#define JOURNAL_VS_REALTIME_DELTA_DEFAULT_UT (5 * USEC_PER_SEC) // assume always 5 seconds latency -#define JOURNAL_VS_REALTIME_DELTA_MAX_UT (2 * 60 * USEC_PER_SEC) // up to 2 minutes latency +#define SYSTEMD_JOURNAL_DEFAULT_ITEMS_SAMPLING 1000000 +#define SYSTEMD_JOURNAL_SAMPLING_SLOTS 1000 +#define SYSTEMD_JOURNAL_SAMPLING_RECALIBRATE 10000 #define JOURNAL_PARAMETER_HELP "help" #define JOURNAL_PARAMETER_AFTER "after" @@ -128,6 +44,7 @@ int fstat64(int fd, struct stat64 *buf) { #define JOURNAL_PARAMETER_SLICE "slice" #define JOURNAL_PARAMETER_DELTA "delta" #define JOURNAL_PARAMETER_TAIL "tail" +#define JOURNAL_PARAMETER_SAMPLING "sampling" #define JOURNAL_KEY_ND_JOURNAL_FILE "ND_JOURNAL_FILE" #define JOURNAL_KEY_ND_JOURNAL_PROCESS "ND_JOURNAL_PROCESS" @@ -138,7 +55,8 @@ int fstat64(int fd, struct stat64 *buf) { #define SYSTEMD_ALWAYS_VISIBLE_KEYS NULL #define SYSTEMD_KEYS_EXCLUDED_FROM_FACETS \ - "*MESSAGE*" \ + "!MESSAGE_ID" \ + "|*MESSAGE*" \ "|*_RAW" \ "|*_USEC" \ "|*_NSEC" \ @@ -153,7 +71,7 @@ int fstat64(int fd, struct stat64 *buf) { /* --- USER JOURNAL FIELDS --- */ \ \ /* "|MESSAGE" */ \ - /* "|MESSAGE_ID" */ \ + "|MESSAGE_ID" \ "|PRIORITY" \ "|CODE_FILE" \ /* "|CODE_LINE" */ \ @@ -247,33 +165,22 @@ int fstat64(int fd, struct stat64 *buf) { "|IMAGE_NAME" /* undocumented */ \ /* "|CONTAINER_PARTIAL_MESSAGE" */ \ \ + \ + /* --- NETDATA --- */ \ + \ + "|ND_NIDL_NODE" \ + "|ND_NIDL_CONTEXT" \ + "|ND_LOG_SOURCE" \ + /*"|ND_MODULE" */ \ + "|ND_ALERT_NAME" \ + "|ND_ALERT_CLASS" \ + "|ND_ALERT_COMPONENT" \ + "|ND_ALERT_TYPE" \ + \ "" -static netdata_mutex_t stdout_mutex = NETDATA_MUTEX_INITIALIZER; -static bool plugin_should_exit = false; - // ---------------------------------------------------------------------------- -typedef enum { - ND_SD_JOURNAL_NO_FILE_MATCHED, - ND_SD_JOURNAL_FAILED_TO_OPEN, - ND_SD_JOURNAL_FAILED_TO_SEEK, - ND_SD_JOURNAL_TIMED_OUT, - ND_SD_JOURNAL_OK, - ND_SD_JOURNAL_NOT_MODIFIED, - ND_SD_JOURNAL_CANCELLED, -} ND_SD_JOURNAL_STATUS; - -typedef enum { - SDJF_ALL = 0, - SDJF_LOCAL = (1 << 0), - SDJF_REMOTE = (1 << 1), - SDJF_SYSTEM = (1 << 2), - SDJF_USER = (1 << 3), - SDJF_NAMESPACE = (1 << 4), - SDJF_OTHER = (1 << 5), -} SD_JOURNAL_FILE_SOURCE_TYPE; - typedef struct function_query_status { bool *cancelled; // a pointer to the cancelling boolean usec_t stop_monotonic_ut; @@ -282,7 +189,7 @@ typedef struct function_query_status { // request SD_JOURNAL_FILE_SOURCE_TYPE source_type; - STRING *source; + SIMPLE_PATTERN *sources; usec_t after_ut; usec_t before_ut; @@ -298,13 +205,50 @@ typedef struct function_query_status { bool tail; bool data_only; bool slice; + size_t sampling; size_t filters; usec_t last_modified; const char *query; const char *histogram; + struct { + usec_t start_ut; // the starting time of the query - we start from this + usec_t stop_ut; // the ending time of the query - we stop at this + usec_t first_msg_ut; + + sd_id128_t first_msg_writer; + uint64_t first_msg_seqnum; + } query_file; + + struct { + uint32_t enable_after_samples; + uint32_t slots; + uint32_t sampled; + uint32_t unsampled; + uint32_t estimated; + } samples; + + struct { + uint32_t enable_after_samples; + uint32_t every; + uint32_t skipped; + uint32_t recalibrate; + uint32_t sampled; + uint32_t unsampled; + uint32_t estimated; + } samples_per_file; + + struct { + usec_t start_ut; + usec_t end_ut; + usec_t step_ut; + uint32_t enable_after_samples; + uint32_t sampled[SYSTEMD_JOURNAL_SAMPLING_SLOTS]; + uint32_t unsampled[SYSTEMD_JOURNAL_SAMPLING_SLOTS]; + } samples_per_time_slot; + // per file progress info - size_t cached_count; + // size_t cached_count; // progress statistics usec_t matches_setup_ut; @@ -315,20 +259,6 @@ typedef struct function_query_status { size_t file_working; } FUNCTION_QUERY_STATUS; -struct journal_file { - const char *filename; - size_t filename_len; - STRING *source; - SD_JOURNAL_FILE_SOURCE_TYPE source_type; - usec_t file_last_modified_ut; - usec_t msg_first_ut; - usec_t msg_last_ut; - usec_t last_scan_ut; - size_t size; - bool logged_failure; - usec_t max_journal_vs_realtime_delta_ut; -}; - static void log_fqs(FUNCTION_QUERY_STATUS *fqs, const char *msg) { netdata_log_error("ERROR: %s, on query " "timeframe [%"PRIu64" - %"PRIu64"], " @@ -359,25 +289,369 @@ static inline bool netdata_systemd_journal_seek_to(sd_journal *j, usec_t timesta #define JD_SOURCE_REALTIME_TIMESTAMP "_SOURCE_REALTIME_TIMESTAMP" -static inline bool parse_journal_field(const char *data, size_t data_length, const char **key, size_t *key_length, const char **value, size_t *value_length) { - const char *k = data; - const char *equal = strchr(k, '='); - if(unlikely(!equal)) - return false; +// ---------------------------------------------------------------------------- +// sampling support + +static void sampling_query_init(FUNCTION_QUERY_STATUS *fqs, FACETS *facets) { + if(!fqs->sampling) + return; - size_t kl = equal - k; + if(!fqs->slice) { + // the user is doing a full data query + // disable sampling + fqs->sampling = 0; + return; + } - const char *v = ++equal; - size_t vl = data_length - kl - 1; + if(fqs->data_only) { + // the user is doing a data query + // disable sampling + fqs->sampling = 0; + return; + } - *key = k; - *key_length = kl; - *value = v; - *value_length = vl; + if(!fqs->files_matched) { + // no files have been matched + // disable sampling + fqs->sampling = 0; + return; + } - return true; + fqs->samples.slots = facets_histogram_slots(facets); + if(fqs->samples.slots < 2) fqs->samples.slots = 2; + if(fqs->samples.slots > SYSTEMD_JOURNAL_SAMPLING_SLOTS) + fqs->samples.slots = SYSTEMD_JOURNAL_SAMPLING_SLOTS; + + if(!fqs->after_ut || !fqs->before_ut || fqs->after_ut >= fqs->before_ut) { + // we don't have enough information for sampling + fqs->sampling = 0; + return; + } + + usec_t delta = fqs->before_ut - fqs->after_ut; + usec_t step = delta / facets_histogram_slots(facets) - 1; + if(step < 1) step = 1; + + fqs->samples_per_time_slot.start_ut = fqs->after_ut; + fqs->samples_per_time_slot.end_ut = fqs->before_ut; + fqs->samples_per_time_slot.step_ut = step; + + // the minimum number of rows to enable sampling + fqs->samples.enable_after_samples = fqs->sampling / 2; + + size_t files_matched = fqs->files_matched; + if(!files_matched) + files_matched = 1; + + // the minimum number of rows per file to enable sampling + fqs->samples_per_file.enable_after_samples = (fqs->sampling / 4) / files_matched; + if(fqs->samples_per_file.enable_after_samples < fqs->entries) + fqs->samples_per_file.enable_after_samples = fqs->entries; + + // the minimum number of rows per time slot to enable sampling + fqs->samples_per_time_slot.enable_after_samples = (fqs->sampling / 4) / fqs->samples.slots; + if(fqs->samples_per_time_slot.enable_after_samples < fqs->entries) + fqs->samples_per_time_slot.enable_after_samples = fqs->entries; +} + +static void sampling_file_init(FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf __maybe_unused) { + fqs->samples_per_file.sampled = 0; + fqs->samples_per_file.unsampled = 0; + fqs->samples_per_file.estimated = 0; + fqs->samples_per_file.every = 0; + fqs->samples_per_file.skipped = 0; + fqs->samples_per_file.recalibrate = 0; +} + +static size_t sampling_file_lines_scanned_so_far(FUNCTION_QUERY_STATUS *fqs) { + size_t sampled = fqs->samples_per_file.sampled + fqs->samples_per_file.unsampled; + if(!sampled) sampled = 1; + return sampled; +} + +static void sampling_running_file_query_overlapping_timeframe_ut( + FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf, FACETS_ANCHOR_DIRECTION direction, + usec_t msg_ut, usec_t *after_ut, usec_t *before_ut) { + + // find the overlap of the query and file timeframes + // taking into account the first message we encountered + + usec_t oldest_ut, newest_ut; + if(direction == FACETS_ANCHOR_DIRECTION_FORWARD) { + // the first message we know (oldest) + oldest_ut = fqs->query_file.first_msg_ut ? fqs->query_file.first_msg_ut : jf->msg_first_ut; + if(!oldest_ut) oldest_ut = fqs->query_file.start_ut; + + if(jf->msg_last_ut) + newest_ut = MIN(fqs->query_file.stop_ut, jf->msg_last_ut); + else if(jf->file_last_modified_ut) + newest_ut = MIN(fqs->query_file.stop_ut, jf->file_last_modified_ut); + else + newest_ut = fqs->query_file.stop_ut; + + if(msg_ut < oldest_ut) + oldest_ut = msg_ut - 1; + } + else /* BACKWARD */ { + // the latest message we know (newest) + newest_ut = fqs->query_file.first_msg_ut ? fqs->query_file.first_msg_ut : jf->msg_last_ut; + if(!newest_ut) newest_ut = fqs->query_file.start_ut; + + if(jf->msg_first_ut) + oldest_ut = MAX(fqs->query_file.stop_ut, jf->msg_first_ut); + else + oldest_ut = fqs->query_file.stop_ut; + + if(newest_ut < msg_ut) + newest_ut = msg_ut + 1; + } + + *after_ut = oldest_ut; + *before_ut = newest_ut; +} + +static double sampling_running_file_query_progress_by_time(FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf, + FACETS_ANCHOR_DIRECTION direction, usec_t msg_ut) { + + usec_t after_ut, before_ut, elapsed_ut; + sampling_running_file_query_overlapping_timeframe_ut(fqs, jf, direction, msg_ut, &after_ut, &before_ut); + + if(direction == FACETS_ANCHOR_DIRECTION_FORWARD) + elapsed_ut = msg_ut - after_ut; + else + elapsed_ut = before_ut - msg_ut; + + usec_t total_ut = before_ut - after_ut; + double progress = (double)elapsed_ut / (double)total_ut; + + return progress; +} + +static usec_t sampling_running_file_query_remaining_time(FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf, + FACETS_ANCHOR_DIRECTION direction, usec_t msg_ut, + usec_t *total_time_ut, usec_t *remaining_start_ut, + usec_t *remaining_end_ut) { + usec_t after_ut, before_ut; + sampling_running_file_query_overlapping_timeframe_ut(fqs, jf, direction, msg_ut, &after_ut, &before_ut); + + // since we have a timestamp in msg_ut + // this timestamp can extend the overlap + if(msg_ut <= after_ut) + after_ut = msg_ut - 1; + + if(msg_ut >= before_ut) + before_ut = msg_ut + 1; + + // return the remaining duration + usec_t remaining_from_ut, remaining_to_ut; + if(direction == FACETS_ANCHOR_DIRECTION_FORWARD) { + remaining_from_ut = msg_ut; + remaining_to_ut = before_ut; + } + else { + remaining_from_ut = after_ut; + remaining_to_ut = msg_ut; + } + + usec_t remaining_ut = remaining_to_ut - remaining_from_ut; + + if(total_time_ut) + *total_time_ut = (before_ut > after_ut) ? before_ut - after_ut : 1; + + if(remaining_start_ut) + *remaining_start_ut = remaining_from_ut; + + if(remaining_end_ut) + *remaining_end_ut = remaining_to_ut; + + return remaining_ut; +} + +static size_t sampling_running_file_query_estimate_remaining_lines_by_time(FUNCTION_QUERY_STATUS *fqs, + struct journal_file *jf, + FACETS_ANCHOR_DIRECTION direction, + usec_t msg_ut) { + size_t scanned_lines = sampling_file_lines_scanned_so_far(fqs); + + // Calculate the proportion of time covered + usec_t total_time_ut, remaining_start_ut, remaining_end_ut; + usec_t remaining_time_ut = sampling_running_file_query_remaining_time(fqs, jf, direction, msg_ut, &total_time_ut, + &remaining_start_ut, &remaining_end_ut); + if (total_time_ut == 0) total_time_ut = 1; + + double proportion_by_time = (double) (total_time_ut - remaining_time_ut) / (double) total_time_ut; + + if (proportion_by_time == 0 || proportion_by_time > 1.0 || !isfinite(proportion_by_time)) + proportion_by_time = 1.0; + + // Estimate the total number of lines in the file + size_t expected_matching_logs_by_time = (size_t)((double)scanned_lines / proportion_by_time); + + if(jf->messages_in_file && expected_matching_logs_by_time > jf->messages_in_file) + expected_matching_logs_by_time = jf->messages_in_file; + + // Calculate the estimated number of remaining lines + size_t remaining_logs_by_time = expected_matching_logs_by_time - scanned_lines; + if (remaining_logs_by_time < 1) remaining_logs_by_time = 1; + +// nd_log(NDLS_COLLECTORS, NDLP_INFO, +// "JOURNAL ESTIMATION: '%s' " +// "scanned_lines=%zu [sampled=%zu, unsampled=%zu, estimated=%zu], " +// "file [%"PRIu64" - %"PRIu64", duration %"PRId64", known lines in file %zu], " +// "query [%"PRIu64" - %"PRIu64", duration %"PRId64"], " +// "first message read from the file at %"PRIu64", current message at %"PRIu64", " +// "proportion of time %.2f %%, " +// "expected total lines in file %zu, " +// "remaining lines %zu, " +// "remaining time %"PRIu64" [%"PRIu64" - %"PRIu64", duration %"PRId64"]" +// , jf->filename +// , scanned_lines, fqs->samples_per_file.sampled, fqs->samples_per_file.unsampled, fqs->samples_per_file.estimated +// , jf->msg_first_ut, jf->msg_last_ut, jf->msg_last_ut - jf->msg_first_ut, jf->messages_in_file +// , fqs->query_file.start_ut, fqs->query_file.stop_ut, fqs->query_file.stop_ut - fqs->query_file.start_ut +// , fqs->query_file.first_msg_ut, msg_ut +// , proportion_by_time * 100.0 +// , expected_matching_logs_by_time +// , remaining_logs_by_time +// , remaining_time_ut, remaining_start_ut, remaining_end_ut, remaining_end_ut - remaining_start_ut +// ); + + return remaining_logs_by_time; +} + +static size_t sampling_running_file_query_estimate_remaining_lines(sd_journal *j, FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf, FACETS_ANCHOR_DIRECTION direction, usec_t msg_ut) { + size_t expected_matching_logs_by_seqnum = 0; + double proportion_by_seqnum = 0.0; + size_t remaining_logs_by_seqnum = 0; + +#ifdef HAVE_SD_JOURNAL_GET_SEQNUM + uint64_t current_msg_seqnum; + sd_id128_t current_msg_writer; + if(!fqs->query_file.first_msg_seqnum || sd_journal_get_seqnum(j, ¤t_msg_seqnum, ¤t_msg_writer) < 0) { + fqs->query_file.first_msg_seqnum = 0; + fqs->query_file.first_msg_writer = SD_ID128_NULL; + } + else if(jf->messages_in_file) { + size_t scanned_lines = sampling_file_lines_scanned_so_far(fqs); + + double proportion_of_all_lines_so_far; + if(direction == FACETS_ANCHOR_DIRECTION_FORWARD) + proportion_of_all_lines_so_far = (double)scanned_lines / (double)(current_msg_seqnum - jf->first_seqnum); + else + proportion_of_all_lines_so_far = (double)scanned_lines / (double)(jf->last_seqnum - current_msg_seqnum); + + if(proportion_of_all_lines_so_far > 1.0) + proportion_of_all_lines_so_far = 1.0; + + expected_matching_logs_by_seqnum = (size_t)(proportion_of_all_lines_so_far * (double)jf->messages_in_file); + + proportion_by_seqnum = (double)scanned_lines / (double)expected_matching_logs_by_seqnum; + + if (proportion_by_seqnum == 0 || proportion_by_seqnum > 1.0 || !isfinite(proportion_by_seqnum)) + proportion_by_seqnum = 1.0; + + remaining_logs_by_seqnum = expected_matching_logs_by_seqnum - scanned_lines; + if(!remaining_logs_by_seqnum) remaining_logs_by_seqnum = 1; + } +#endif + + if(remaining_logs_by_seqnum) + return remaining_logs_by_seqnum; + + return sampling_running_file_query_estimate_remaining_lines_by_time(fqs, jf, direction, msg_ut); +} + +static void sampling_decide_file_sampling_every(sd_journal *j, FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf, FACETS_ANCHOR_DIRECTION direction, usec_t msg_ut) { + size_t files_matched = fqs->files_matched; + if(!files_matched) files_matched = 1; + + size_t remaining_lines = sampling_running_file_query_estimate_remaining_lines(j, fqs, jf, direction, msg_ut); + size_t wanted_samples = (fqs->sampling / 2) / files_matched; + if(!wanted_samples) wanted_samples = 1; + + fqs->samples_per_file.every = remaining_lines / wanted_samples; + + if(fqs->samples_per_file.every < 1) + fqs->samples_per_file.every = 1; +} + +typedef enum { + SAMPLING_STOP_AND_ESTIMATE = -1, + SAMPLING_FULL = 0, + SAMPLING_SKIP_FIELDS = 1, +} sampling_t; + +static inline sampling_t is_row_in_sample(sd_journal *j, FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf, usec_t msg_ut, FACETS_ANCHOR_DIRECTION direction, bool candidate_to_keep) { + if(!fqs->sampling || candidate_to_keep) + return SAMPLING_FULL; + + if(unlikely(msg_ut < fqs->samples_per_time_slot.start_ut)) + msg_ut = fqs->samples_per_time_slot.start_ut; + if(unlikely(msg_ut > fqs->samples_per_time_slot.end_ut)) + msg_ut = fqs->samples_per_time_slot.end_ut; + + size_t slot = (msg_ut - fqs->samples_per_time_slot.start_ut) / fqs->samples_per_time_slot.step_ut; + if(slot >= fqs->samples.slots) + slot = fqs->samples.slots - 1; + + bool should_sample = false; + + if(fqs->samples.sampled < fqs->samples.enable_after_samples || + fqs->samples_per_file.sampled < fqs->samples_per_file.enable_after_samples || + fqs->samples_per_time_slot.sampled[slot] < fqs->samples_per_time_slot.enable_after_samples) + should_sample = true; + + else if(fqs->samples_per_file.recalibrate >= SYSTEMD_JOURNAL_SAMPLING_RECALIBRATE || !fqs->samples_per_file.every) { + // this is the first to be unsampled for this file + sampling_decide_file_sampling_every(j, fqs, jf, direction, msg_ut); + fqs->samples_per_file.recalibrate = 0; + should_sample = true; + } + else { + // we sample 1 every fqs->samples_per_file.every + if(fqs->samples_per_file.skipped >= fqs->samples_per_file.every) { + fqs->samples_per_file.skipped = 0; + should_sample = true; + } + else + fqs->samples_per_file.skipped++; + } + + if(should_sample) { + fqs->samples.sampled++; + fqs->samples_per_file.sampled++; + fqs->samples_per_time_slot.sampled[slot]++; + + return SAMPLING_FULL; + } + + fqs->samples_per_file.recalibrate++; + + fqs->samples.unsampled++; + fqs->samples_per_file.unsampled++; + fqs->samples_per_time_slot.unsampled[slot]++; + + if(fqs->samples_per_file.unsampled > fqs->samples_per_file.sampled) { + double progress_by_time = sampling_running_file_query_progress_by_time(fqs, jf, direction, msg_ut); + + if(progress_by_time > SYSTEMD_JOURNAL_ENABLE_ESTIMATIONS_FILE_PERCENTAGE) + return SAMPLING_STOP_AND_ESTIMATE; + } + + return SAMPLING_SKIP_FIELDS; +} + +static void sampling_update_running_query_file_estimates(FACETS *facets, sd_journal *j, FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf, usec_t msg_ut, FACETS_ANCHOR_DIRECTION direction) { + usec_t total_time_ut, remaining_start_ut, remaining_end_ut; + sampling_running_file_query_remaining_time(fqs, jf, direction, msg_ut, &total_time_ut, &remaining_start_ut, + &remaining_end_ut); + size_t remaining_lines = sampling_running_file_query_estimate_remaining_lines(j, fqs, jf, direction, msg_ut); + facets_update_estimations(facets, remaining_start_ut, remaining_end_ut, remaining_lines); + fqs->samples.estimated += remaining_lines; + fqs->samples_per_file.estimated += remaining_lines; } +// ---------------------------------------------------------------------------- + static inline size_t netdata_systemd_journal_process_row(sd_journal *j, FACETS *facets, struct journal_file *jf, usec_t *msg_ut) { const void *data; size_t length, bytes = 0; @@ -454,11 +728,15 @@ ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_backward( usec_t stop_ut = (fqs->data_only && fqs->anchor.stop_ut) ? fqs->anchor.stop_ut : fqs->after_ut; bool stop_when_full = (fqs->data_only && !fqs->anchor.stop_ut); + fqs->query_file.start_ut = start_ut; + fqs->query_file.stop_ut = stop_ut; + if(!netdata_systemd_journal_seek_to(j, start_ut)) return ND_SD_JOURNAL_FAILED_TO_SEEK; size_t errors_no_timestamp = 0; - usec_t earliest_msg_ut = 0; + usec_t latest_msg_ut = 0; // the biggest timestamp we have seen so far + usec_t first_msg_ut = 0; // the first message we got from the db size_t row_counter = 0, last_row_counter = 0, rows_useful = 0; size_t bytes = 0, last_bytes = 0; @@ -475,44 +753,68 @@ ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_backward( continue; } - if(unlikely(msg_ut > earliest_msg_ut)) - earliest_msg_ut = msg_ut; - if (unlikely(msg_ut > start_ut)) continue; if (unlikely(msg_ut < stop_ut)) break; - bytes += netdata_systemd_journal_process_row(j, facets, jf, &msg_ut); + if(unlikely(msg_ut > latest_msg_ut)) + latest_msg_ut = msg_ut; - // make sure each line gets a unique timestamp - if(unlikely(msg_ut >= last_usec_from && msg_ut <= last_usec_to)) - msg_ut = --last_usec_from; - else - last_usec_from = last_usec_to = msg_ut; - - if(facets_row_finished(facets, msg_ut)) - rows_useful++; - - row_counter++; - if(unlikely((row_counter % FUNCTION_DATA_ONLY_CHECK_EVERY_ROWS) == 0 && - stop_when_full && - facets_rows(facets) >= fqs->entries)) { - // stop the data only query - usec_t oldest = facets_row_oldest_ut(facets); - if(oldest && msg_ut < (oldest - anchor_delta)) - break; + if(unlikely(!first_msg_ut)) { + first_msg_ut = msg_ut; + fqs->query_file.first_msg_ut = msg_ut; + +#ifdef HAVE_SD_JOURNAL_GET_SEQNUM + if(sd_journal_get_seqnum(j, &fqs->query_file.first_msg_seqnum, &fqs->query_file.first_msg_writer) < 0) { + fqs->query_file.first_msg_seqnum = 0; + fqs->query_file.first_msg_writer = SD_ID128_NULL; + } +#endif } - if(unlikely(row_counter % FUNCTION_PROGRESS_EVERY_ROWS == 0)) { - FUNCTION_PROGRESS_UPDATE_ROWS(fqs->rows_read, row_counter - last_row_counter); - last_row_counter = row_counter; + sampling_t sample = is_row_in_sample(j, fqs, jf, msg_ut, + FACETS_ANCHOR_DIRECTION_BACKWARD, + facets_row_candidate_to_keep(facets, msg_ut)); + + if(sample == SAMPLING_FULL) { + bytes += netdata_systemd_journal_process_row(j, facets, jf, &msg_ut); + + // make sure each line gets a unique timestamp + if(unlikely(msg_ut >= last_usec_from && msg_ut <= last_usec_to)) + msg_ut = --last_usec_from; + else + last_usec_from = last_usec_to = msg_ut; + + if(facets_row_finished(facets, msg_ut)) + rows_useful++; + + row_counter++; + if(unlikely((row_counter % FUNCTION_DATA_ONLY_CHECK_EVERY_ROWS) == 0 && + stop_when_full && + facets_rows(facets) >= fqs->entries)) { + // stop the data only query + usec_t oldest = facets_row_oldest_ut(facets); + if(oldest && msg_ut < (oldest - anchor_delta)) + break; + } + + if(unlikely(row_counter % FUNCTION_PROGRESS_EVERY_ROWS == 0)) { + FUNCTION_PROGRESS_UPDATE_ROWS(fqs->rows_read, row_counter - last_row_counter); + last_row_counter = row_counter; - FUNCTION_PROGRESS_UPDATE_BYTES(fqs->bytes_read, bytes - last_bytes); - last_bytes = bytes; + FUNCTION_PROGRESS_UPDATE_BYTES(fqs->bytes_read, bytes - last_bytes); + last_bytes = bytes; - status = check_stop(fqs->cancelled, &fqs->stop_monotonic_ut); + status = check_stop(fqs->cancelled, &fqs->stop_monotonic_ut); + } + } + else if(sample == SAMPLING_SKIP_FIELDS) + facets_row_finished_unsampled(facets, msg_ut); + else { + sampling_update_running_query_file_estimates(facets, j, fqs, jf, msg_ut, FACETS_ANCHOR_DIRECTION_BACKWARD); + break; } } @@ -524,8 +826,8 @@ ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_backward( if(errors_no_timestamp) netdata_log_error("SYSTEMD-JOURNAL: %zu lines did not have timestamps", errors_no_timestamp); - if(earliest_msg_ut > fqs->last_modified) - fqs->last_modified = earliest_msg_ut; + if(latest_msg_ut > fqs->last_modified) + fqs->last_modified = latest_msg_ut; return status; } @@ -540,11 +842,15 @@ ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_forward( usec_t stop_ut = ((fqs->data_only && fqs->anchor.stop_ut) ? fqs->anchor.stop_ut : fqs->before_ut) + anchor_delta; bool stop_when_full = (fqs->data_only && !fqs->anchor.stop_ut); + fqs->query_file.start_ut = start_ut; + fqs->query_file.stop_ut = stop_ut; + if(!netdata_systemd_journal_seek_to(j, start_ut)) return ND_SD_JOURNAL_FAILED_TO_SEEK; size_t errors_no_timestamp = 0; - usec_t earliest_msg_ut = 0; + usec_t latest_msg_ut = 0; // the biggest timestamp we have seen so far + usec_t first_msg_ut = 0; // the first message we got from the db size_t row_counter = 0, last_row_counter = 0, rows_useful = 0; size_t bytes = 0, last_bytes = 0; @@ -561,44 +867,61 @@ ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_forward( continue; } - if(likely(msg_ut > earliest_msg_ut)) - earliest_msg_ut = msg_ut; - if (unlikely(msg_ut < start_ut)) continue; if (unlikely(msg_ut > stop_ut)) break; - bytes += netdata_systemd_journal_process_row(j, facets, jf, &msg_ut); + if(likely(msg_ut > latest_msg_ut)) + latest_msg_ut = msg_ut; - // make sure each line gets a unique timestamp - if(unlikely(msg_ut >= last_usec_from && msg_ut <= last_usec_to)) - msg_ut = ++last_usec_to; - else - last_usec_from = last_usec_to = msg_ut; - - if(facets_row_finished(facets, msg_ut)) - rows_useful++; - - row_counter++; - if(unlikely((row_counter % FUNCTION_DATA_ONLY_CHECK_EVERY_ROWS) == 0 && - stop_when_full && - facets_rows(facets) >= fqs->entries)) { - // stop the data only query - usec_t newest = facets_row_newest_ut(facets); - if(newest && msg_ut > (newest + anchor_delta)) - break; + if(unlikely(!first_msg_ut)) { + first_msg_ut = msg_ut; + fqs->query_file.first_msg_ut = msg_ut; } - if(unlikely(row_counter % FUNCTION_PROGRESS_EVERY_ROWS == 0)) { - FUNCTION_PROGRESS_UPDATE_ROWS(fqs->rows_read, row_counter - last_row_counter); - last_row_counter = row_counter; + sampling_t sample = is_row_in_sample(j, fqs, jf, msg_ut, + FACETS_ANCHOR_DIRECTION_FORWARD, + facets_row_candidate_to_keep(facets, msg_ut)); + + if(sample == SAMPLING_FULL) { + bytes += netdata_systemd_journal_process_row(j, facets, jf, &msg_ut); - FUNCTION_PROGRESS_UPDATE_BYTES(fqs->bytes_read, bytes - last_bytes); - last_bytes = bytes; + // make sure each line gets a unique timestamp + if(unlikely(msg_ut >= last_usec_from && msg_ut <= last_usec_to)) + msg_ut = ++last_usec_to; + else + last_usec_from = last_usec_to = msg_ut; + + if(facets_row_finished(facets, msg_ut)) + rows_useful++; + + row_counter++; + if(unlikely((row_counter % FUNCTION_DATA_ONLY_CHECK_EVERY_ROWS) == 0 && + stop_when_full && + facets_rows(facets) >= fqs->entries)) { + // stop the data only query + usec_t newest = facets_row_newest_ut(facets); + if(newest && msg_ut > (newest + anchor_delta)) + break; + } + + if(unlikely(row_counter % FUNCTION_PROGRESS_EVERY_ROWS == 0)) { + FUNCTION_PROGRESS_UPDATE_ROWS(fqs->rows_read, row_counter - last_row_counter); + last_row_counter = row_counter; + + FUNCTION_PROGRESS_UPDATE_BYTES(fqs->bytes_read, bytes - last_bytes); + last_bytes = bytes; - status = check_stop(fqs->cancelled, &fqs->stop_monotonic_ut); + status = check_stop(fqs->cancelled, &fqs->stop_monotonic_ut); + } + } + else if(sample == SAMPLING_SKIP_FIELDS) + facets_row_finished_unsampled(facets, msg_ut); + else { + sampling_update_running_query_file_estimates(facets, j, fqs, jf, msg_ut, FACETS_ANCHOR_DIRECTION_FORWARD); + break; } } @@ -610,8 +933,8 @@ ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_forward( if(errors_no_timestamp) netdata_log_error("SYSTEMD-JOURNAL: %zu lines did not have timestamps", errors_no_timestamp); - if(earliest_msg_ut > fqs->last_modified) - fqs->last_modified = earliest_msg_ut; + if(latest_msg_ut > fqs->last_modified) + fqs->last_modified = latest_msg_ut; return status; } @@ -723,6 +1046,7 @@ static ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_one_file( }; if(sd_journal_open_files(&j, paths, ND_SD_JOURNAL_OPEN_FLAGS) < 0 || !j) { + netdata_log_error("JOURNAL: cannot open file '%s' for query", filename); fstat_cache_disable_on_thread(); return ND_SD_JOURNAL_FAILED_TO_OPEN; } @@ -756,432 +1080,18 @@ static ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_one_file( return status; } -// ---------------------------------------------------------------------------- -// journal files registry - -#define VAR_LOG_JOURNAL_MAX_DEPTH 10 -#define MAX_JOURNAL_DIRECTORIES 100 - -struct journal_directory { - char *path; - bool logged_failure; -}; - -static struct journal_directory journal_directories[MAX_JOURNAL_DIRECTORIES] = { 0 }; -static DICTIONARY *journal_files_registry = NULL; -static DICTIONARY *used_hashes_registry = NULL; - -static usec_t systemd_journal_session = 0; - -static void buffer_json_journal_versions(BUFFER *wb) { - buffer_json_member_add_object(wb, "versions"); - { - buffer_json_member_add_uint64(wb, "sources", - systemd_journal_session + dictionary_version(journal_files_registry)); - } - buffer_json_object_close(wb); -} - -static void journal_file_update_msg_ut(const char *filename, struct journal_file *jf) { - fstat_cache_enable_on_thread(); - - const char *files[2] = { - [0] = filename, - [1] = NULL, - }; - - sd_journal *j = NULL; - if(sd_journal_open_files(&j, files, ND_SD_JOURNAL_OPEN_FLAGS) < 0 || !j) { - fstat_cache_disable_on_thread(); - - if(!jf->logged_failure) { - netdata_log_error("cannot open journal file '%s', using file timestamps to understand time-frame.", filename); - jf->logged_failure = true; - } - - jf->msg_first_ut = 0; - jf->msg_last_ut = jf->file_last_modified_ut; - return; - } - - usec_t first_ut = 0, last_ut = 0; - - if(sd_journal_seek_head(j) < 0 || sd_journal_next(j) < 0 || sd_journal_get_realtime_usec(j, &first_ut) < 0 || !first_ut) { - internal_error(true, "cannot find the timestamp of the first message in '%s'", filename); - first_ut = 0; - } - - if(sd_journal_seek_tail(j) < 0 || sd_journal_previous(j) < 0 || sd_journal_get_realtime_usec(j, &last_ut) < 0 || !last_ut) { - internal_error(true, "cannot find the timestamp of the last message in '%s'", filename); - last_ut = jf->file_last_modified_ut; - } - - sd_journal_close(j); - fstat_cache_disable_on_thread(); - - if(first_ut > last_ut) { - internal_error(true, "timestamps are flipped in file '%s'", filename); - usec_t t = first_ut; - first_ut = last_ut; - last_ut = t; - } - - jf->msg_first_ut = first_ut; - jf->msg_last_ut = last_ut; -} - -static STRING *string_strdupz_source(const char *s, const char *e, size_t max_len, const char *prefix) { - char buf[max_len]; - size_t len; - char *dst = buf; - - if(prefix) { - len = strlen(prefix); - memcpy(buf, prefix, len); - dst = &buf[len]; - max_len -= len; - } - - len = e - s; - if(len >= max_len) - len = max_len - 1; - memcpy(dst, s, len); - dst[len] = '\0'; - buf[max_len - 1] = '\0'; - - for(size_t i = 0; buf[i] ;i++) - if(!isalnum(buf[i]) && buf[i] != '-' && buf[i] != '.' && buf[i] != ':') - buf[i] = '_'; - - return string_strdupz(buf); -} - -static void files_registry_insert_cb(const DICTIONARY_ITEM *item, void *value, void *data __maybe_unused) { - struct journal_file *jf = value; - jf->filename = dictionary_acquired_item_name(item); - jf->filename_len = strlen(jf->filename); - - // based on the filename - // decide the source to show to the user - const char *s = strrchr(jf->filename, '/'); - if(s) { - if(strstr(jf->filename, "/remote/")) - jf->source_type = SDJF_REMOTE; - else { - const char *t = s - 1; - while(t >= jf->filename && *t != '.' && *t != '/') - t--; - - if(t >= jf->filename && *t == '.') { - jf->source_type = SDJF_NAMESPACE; - jf->source = string_strdupz_source(t + 1, s, SYSTEMD_JOURNAL_MAX_SOURCE_LEN, "namespace-"); - } - else - jf->source_type = SDJF_LOCAL; - } - - if(strncmp(s, "/system", 7) == 0) - jf->source_type |= SDJF_SYSTEM; - - else if(strncmp(s, "/user", 5) == 0) - jf->source_type |= SDJF_USER; - - else if(strncmp(s, "/remote-", 8) == 0) { - jf->source_type |= SDJF_REMOTE; - - s = &s[8]; // skip "/remote-" - - char *e = strchr(s, '@'); - if(!e) - e = strstr(s, ".journal"); - - if(e) { - const char *d = s; - for(; d < e && (isdigit(*d) || *d == '.' || *d == ':') ; d++) ; - if(d == e) { - // a valid IP address - char ip[e - s + 1]; - memcpy(ip, s, e - s); - ip[e - s] = '\0'; - char buf[SYSTEMD_JOURNAL_MAX_SOURCE_LEN]; - if(ip_to_hostname(ip, buf, sizeof(buf))) - jf->source = string_strdupz_source(buf, &buf[strlen(buf)], SYSTEMD_JOURNAL_MAX_SOURCE_LEN, "remote-"); - else { - internal_error(true, "Cannot find the hostname for IP '%s'", ip); - jf->source = string_strdupz_source(s, e, SYSTEMD_JOURNAL_MAX_SOURCE_LEN, "remote-"); - } - } - else - jf->source = string_strdupz_source(s, e, SYSTEMD_JOURNAL_MAX_SOURCE_LEN, "remote-"); - } - else - jf->source_type |= SDJF_OTHER; - } - else - jf->source_type |= SDJF_OTHER; - } - else - jf->source_type = SDJF_LOCAL | SDJF_OTHER; - - journal_file_update_msg_ut(jf->filename, jf); - - internal_error(true, - "found journal file '%s', type %d, source '%s', " - "file modified: %"PRIu64", " - "msg {first: %"PRIu64", last: %"PRIu64"}", - jf->filename, jf->source_type, jf->source ? string2str(jf->source) : "<unset>", - jf->file_last_modified_ut, - jf->msg_first_ut, jf->msg_last_ut); -} - -static bool files_registry_conflict_cb(const DICTIONARY_ITEM *item, void *old_value, void *new_value, void *data __maybe_unused) { - struct journal_file *jf = old_value; - struct journal_file *njf = new_value; - - if(njf->last_scan_ut > jf->last_scan_ut) - jf->last_scan_ut = njf->last_scan_ut; - - if(njf->file_last_modified_ut > jf->file_last_modified_ut) { - jf->file_last_modified_ut = njf->file_last_modified_ut; - jf->size = njf->size; - - const char *filename = dictionary_acquired_item_name(item); - journal_file_update_msg_ut(filename, jf); - -// internal_error(true, -// "updated journal file '%s', type %d, " -// "file modified: %"PRIu64", " -// "msg {first: %"PRIu64", last: %"PRIu64"}", -// filename, jf->source_type, -// jf->file_last_modified_ut, -// jf->msg_first_ut, jf->msg_last_ut); - } - - return false; -} - -#define SDJF_SOURCE_ALL_NAME "all" -#define SDJF_SOURCE_LOCAL_NAME "all-local-logs" -#define SDJF_SOURCE_LOCAL_SYSTEM_NAME "all-local-system-logs" -#define SDJF_SOURCE_LOCAL_USERS_NAME "all-local-user-logs" -#define SDJF_SOURCE_LOCAL_OTHER_NAME "all-uncategorized" -#define SDJF_SOURCE_NAMESPACES_NAME "all-local-namespaces" -#define SDJF_SOURCE_REMOTES_NAME "all-remote-systems" - -struct journal_file_source { - usec_t first_ut; - usec_t last_ut; - size_t count; - uint64_t size; -}; - -static void human_readable_size_ib(uint64_t size, char *dst, size_t dst_len) { - if(size > 1024ULL * 1024 * 1024 * 1024) - snprintfz(dst, dst_len, "%0.2f TiB", (double)size / 1024.0 / 1024.0 / 1024.0 / 1024.0); - else if(size > 1024ULL * 1024 * 1024) - snprintfz(dst, dst_len, "%0.2f GiB", (double)size / 1024.0 / 1024.0 / 1024.0); - else if(size > 1024ULL * 1024) - snprintfz(dst, dst_len, "%0.2f MiB", (double)size / 1024.0 / 1024.0); - else if(size > 1024ULL) - snprintfz(dst, dst_len, "%0.2f KiB", (double)size / 1024.0); - else - snprintfz(dst, dst_len, "%"PRIu64" B", size); -} - -#define print_duration(dst, dst_len, pos, remaining, duration, one, many, printed) do { \ - if((remaining) > (duration)) { \ - uint64_t _count = (remaining) / (duration); \ - uint64_t _rem = (remaining) - (_count * (duration)); \ - (pos) += snprintfz(&(dst)[pos], (dst_len) - (pos), "%s%s%"PRIu64" %s", (printed) ? ", " : "", _rem ? "" : "and ", _count, _count > 1 ? (many) : (one)); \ - (remaining) = _rem; \ - (printed) = true; \ - } \ -} while(0) - -static void human_readable_duration_s(time_t duration_s, char *dst, size_t dst_len) { - if(duration_s < 0) - duration_s = -duration_s; - - size_t pos = 0; - dst[0] = 0 ; - - bool printed = false; - print_duration(dst, dst_len, pos, duration_s, 86400 * 365, "year", "years", printed); - print_duration(dst, dst_len, pos, duration_s, 86400 * 30, "month", "months", printed); - print_duration(dst, dst_len, pos, duration_s, 86400 * 1, "day", "days", printed); - print_duration(dst, dst_len, pos, duration_s, 3600 * 1, "hour", "hours", printed); - print_duration(dst, dst_len, pos, duration_s, 60 * 1, "min", "mins", printed); - print_duration(dst, dst_len, pos, duration_s, 1, "sec", "secs", printed); -} - -static int journal_file_to_json_array_cb(const DICTIONARY_ITEM *item, void *entry, void *data) { - struct journal_file_source *jfs = entry; - BUFFER *wb = data; - - const char *name = dictionary_acquired_item_name(item); - - buffer_json_add_array_item_object(wb); - { - char size_for_humans[100]; - human_readable_size_ib(jfs->size, size_for_humans, sizeof(size_for_humans)); - - char duration_for_humans[1024]; - human_readable_duration_s((time_t)((jfs->last_ut - jfs->first_ut) / USEC_PER_SEC), - duration_for_humans, sizeof(duration_for_humans)); - - char info[1024]; - snprintfz(info, sizeof(info), "%zu files, with a total size of %s, covering %s", - jfs->count, size_for_humans, duration_for_humans); - - buffer_json_member_add_string(wb, "id", name); - buffer_json_member_add_string(wb, "name", name); - buffer_json_member_add_string(wb, "pill", size_for_humans); - buffer_json_member_add_string(wb, "info", info); - } - buffer_json_object_close(wb); // options object - - return 1; -} - -static bool journal_file_merge_sizes(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value , void *data __maybe_unused) { - struct journal_file_source *jfs = old_value, *njfs = new_value; - jfs->count += njfs->count; - jfs->size += njfs->size; - - if(njfs->first_ut && njfs->first_ut < jfs->first_ut) - jfs->first_ut = njfs->first_ut; - - if(njfs->last_ut && njfs->last_ut > jfs->last_ut) - jfs->last_ut = njfs->last_ut; - - return false; -} - -static void available_journal_file_sources_to_json_array(BUFFER *wb) { - DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_NAME_LINK_DONT_CLONE|DICT_OPTION_DONT_OVERWRITE_VALUE); - dictionary_register_conflict_callback(dict, journal_file_merge_sizes, NULL); - - struct journal_file_source t = { 0 }; - - struct journal_file *jf; - dfe_start_read(journal_files_registry, jf) { - t.first_ut = jf->msg_first_ut; - t.last_ut = jf->msg_last_ut; - t.count = 1; - t.size = jf->size; - - dictionary_set(dict, SDJF_SOURCE_ALL_NAME, &t, sizeof(t)); - - if((jf->source_type & (SDJF_LOCAL)) == (SDJF_LOCAL)) - dictionary_set(dict, SDJF_SOURCE_LOCAL_NAME, &t, sizeof(t)); - if((jf->source_type & (SDJF_LOCAL | SDJF_SYSTEM)) == (SDJF_LOCAL | SDJF_SYSTEM)) - dictionary_set(dict, SDJF_SOURCE_LOCAL_SYSTEM_NAME, &t, sizeof(t)); - if((jf->source_type & (SDJF_LOCAL | SDJF_USER)) == (SDJF_LOCAL | SDJF_USER)) - dictionary_set(dict, SDJF_SOURCE_LOCAL_USERS_NAME, &t, sizeof(t)); - if((jf->source_type & (SDJF_LOCAL | SDJF_OTHER)) == (SDJF_LOCAL | SDJF_OTHER)) - dictionary_set(dict, SDJF_SOURCE_LOCAL_OTHER_NAME, &t, sizeof(t)); - if((jf->source_type & (SDJF_NAMESPACE)) == (SDJF_NAMESPACE)) - dictionary_set(dict, SDJF_SOURCE_NAMESPACES_NAME, &t, sizeof(t)); - if((jf->source_type & (SDJF_REMOTE)) == (SDJF_REMOTE)) - dictionary_set(dict, SDJF_SOURCE_REMOTES_NAME, &t, sizeof(t)); - if(jf->source) - dictionary_set(dict, string2str(jf->source), &t, sizeof(t)); - } - dfe_done(jf); - - dictionary_sorted_walkthrough_read(dict, journal_file_to_json_array_cb, wb); - - dictionary_destroy(dict); -} - -static void files_registry_delete_cb(const DICTIONARY_ITEM *item, void *value, void *data __maybe_unused) { - struct journal_file *jf = value; (void)jf; - const char *filename = dictionary_acquired_item_name(item); (void)filename; - - string_freez(jf->source); - internal_error(true, "removed journal file '%s'", filename); -} - -void journal_directory_scan(const char *dirname, int depth, usec_t last_scan_ut) { - static const char *ext = ".journal"; - static const size_t ext_len = sizeof(".journal") - 1; - - if (depth > VAR_LOG_JOURNAL_MAX_DEPTH) - return; - - DIR *dir; - struct dirent *entry; - struct stat info; - char absolute_path[FILENAME_MAX]; - - // Open the directory. - if ((dir = opendir(dirname)) == NULL) { - if(errno != ENOENT && errno != ENOTDIR) - netdata_log_error("Cannot opendir() '%s'", dirname); - return; - } - - // Read each entry in the directory. - while ((entry = readdir(dir)) != NULL) { - snprintfz(absolute_path, sizeof(absolute_path), "%s/%s", dirname, entry->d_name); - if (stat(absolute_path, &info) != 0) { - netdata_log_error("Failed to stat() '%s", absolute_path); - continue; - } - - if (S_ISDIR(info.st_mode)) { - // If entry is a directory, call traverse recursively. - if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) - journal_directory_scan(absolute_path, depth + 1, last_scan_ut); - - } - else if (S_ISREG(info.st_mode)) { - // If entry is a regular file, check if it ends with .journal. - char *filename = entry->d_name; - size_t len = strlen(filename); - - if (len > ext_len && strcmp(filename + len - ext_len, ext) == 0) { - struct journal_file t = { - .file_last_modified_ut = info.st_mtim.tv_sec * USEC_PER_SEC + info.st_mtim.tv_nsec / NSEC_PER_USEC, - .last_scan_ut = last_scan_ut, - .size = info.st_size, - .max_journal_vs_realtime_delta_ut = JOURNAL_VS_REALTIME_DELTA_DEFAULT_UT, - }; - dictionary_set(journal_files_registry, absolute_path, &t, sizeof(t)); - } - } - } - - closedir(dir); -} - -static void journal_files_registry_update() { - usec_t scan_ut = now_monotonic_usec(); - - for(unsigned i = 0; i < MAX_JOURNAL_DIRECTORIES ;i++) { - if(!journal_directories[i].path) - break; - - journal_directory_scan(journal_directories[i].path, 0, scan_ut); - } - - struct journal_file *jf; - dfe_start_write(journal_files_registry, jf) { - if(jf->last_scan_ut < scan_ut) - dictionary_del(journal_files_registry, jf_dfe.name); - } - dfe_done(jf); -} - -// ---------------------------------------------------------------------------- - static bool jf_is_mine(struct journal_file *jf, FUNCTION_QUERY_STATUS *fqs) { - if((fqs->source_type == SDJF_ALL || (jf->source_type & fqs->source_type) == fqs->source_type) && - (!fqs->source || fqs->source == jf->source)) { + if((fqs->source_type == SDJF_NONE && !fqs->sources) || (jf->source_type & fqs->source_type) || + (fqs->sources && simple_pattern_matches(fqs->sources, string2str(jf->source)))) { + + if(!jf->msg_last_ut || !jf->msg_last_ut) + // the file is not scanned yet, or the timestamps have not been updated, + // so we don't know if it can contribute or not - let's add it. + return true; usec_t anchor_delta = JOURNAL_VS_REALTIME_DELTA_MAX_UT; - usec_t first_ut = jf->msg_first_ut; + usec_t first_ut = jf->msg_first_ut - anchor_delta; usec_t last_ut = jf->msg_last_ut + anchor_delta; if(last_ut >= fqs->after_ut && first_ut <= fqs->before_ut) @@ -1191,30 +1101,6 @@ static bool jf_is_mine(struct journal_file *jf, FUNCTION_QUERY_STATUS *fqs) { return false; } -static int journal_file_dict_items_backward_compar(const void *a, const void *b) { - const DICTIONARY_ITEM **ad = (const DICTIONARY_ITEM **)a, **bd = (const DICTIONARY_ITEM **)b; - struct journal_file *jfa = dictionary_acquired_item_value(*ad); - struct journal_file *jfb = dictionary_acquired_item_value(*bd); - - if(jfa->msg_last_ut < jfb->msg_last_ut) - return 1; - - if(jfa->msg_last_ut > jfb->msg_last_ut) - return -1; - - if(jfa->msg_first_ut < jfb->msg_first_ut) - return 1; - - if(jfa->msg_first_ut > jfb->msg_first_ut) - return -1; - - return 0; -} - -static int journal_file_dict_items_forward_compar(const void *a, const void *b) { - return -journal_file_dict_items_backward_compar(a, b); -} - static int netdata_systemd_journal_query(BUFFER *wb, FACETS *facets, FUNCTION_QUERY_STATUS *fqs) { ND_SD_JOURNAL_STATUS status = ND_SD_JOURNAL_NO_FILE_MATCHED; struct journal_file *jf; @@ -1260,8 +1146,12 @@ static int netdata_systemd_journal_query(BUFFER *wb, FACETS *facets, FUNCTION_QU } bool partial = false; - usec_t started_ut; - usec_t ended_ut = now_monotonic_usec(); + usec_t query_started_ut = now_monotonic_usec(); + usec_t started_ut = query_started_ut; + usec_t ended_ut = started_ut; + usec_t duration_ut = 0, max_duration_ut = 0; + + sampling_query_init(fqs, facets); buffer_json_member_add_array(wb, "_journal_files"); for(size_t f = 0; f < files_used ;f++) { @@ -1271,8 +1161,19 @@ static int netdata_systemd_journal_query(BUFFER *wb, FACETS *facets, FUNCTION_QU if(!jf_is_mine(jf, fqs)) continue; + started_ut = ended_ut; + + // do not even try to do the query if we expect it to pass the timeout + if(ended_ut > (query_started_ut + (fqs->stop_monotonic_ut - query_started_ut) * 3 / 4) && + ended_ut + max_duration_ut * 2 >= fqs->stop_monotonic_ut) { + + partial = true; + status = ND_SD_JOURNAL_TIMED_OUT; + break; + } + fqs->file_working++; - fqs->cached_count = 0; + // fqs->cached_count = 0; size_t fs_calls = fstat_thread_calls; size_t fs_cached = fstat_thread_cached_responses; @@ -1281,8 +1182,22 @@ static int netdata_systemd_journal_query(BUFFER *wb, FACETS *facets, FUNCTION_QU size_t bytes_read = fqs->bytes_read; size_t matches_setup_ut = fqs->matches_setup_ut; + sampling_file_init(fqs, jf); + ND_SD_JOURNAL_STATUS tmp_status = netdata_systemd_journal_query_one_file(filename, wb, facets, jf, fqs); +// nd_log(NDLS_COLLECTORS, NDLP_INFO, +// "JOURNAL ESTIMATION FINAL: '%s' " +// "total lines %zu [sampled=%zu, unsampled=%zu, estimated=%zu], " +// "file [%"PRIu64" - %"PRIu64", duration %"PRId64", known lines in file %zu], " +// "query [%"PRIu64" - %"PRIu64", duration %"PRId64"], " +// , jf->filename +// , fqs->samples_per_file.sampled + fqs->samples_per_file.unsampled + fqs->samples_per_file.estimated +// , fqs->samples_per_file.sampled, fqs->samples_per_file.unsampled, fqs->samples_per_file.estimated +// , jf->msg_first_ut, jf->msg_last_ut, jf->msg_last_ut - jf->msg_first_ut, jf->messages_in_file +// , fqs->query_file.start_ut, fqs->query_file.stop_ut, fqs->query_file.stop_ut - fqs->query_file.start_ut +// ); + rows_useful = fqs->rows_useful - rows_useful; rows_read = fqs->rows_read - rows_read; bytes_read = fqs->bytes_read - bytes_read; @@ -1290,9 +1205,11 @@ static int netdata_systemd_journal_query(BUFFER *wb, FACETS *facets, FUNCTION_QU fs_calls = fstat_thread_calls - fs_calls; fs_cached = fstat_thread_cached_responses - fs_cached; - started_ut = ended_ut; ended_ut = now_monotonic_usec(); - usec_t duration_ut = ended_ut - started_ut; + duration_ut = ended_ut - started_ut; + + if(duration_ut > max_duration_ut) + max_duration_ut = duration_ut; buffer_json_add_array_item_object(wb); // journal file { @@ -1315,6 +1232,16 @@ static int netdata_systemd_journal_query(BUFFER *wb, FACETS *facets, FUNCTION_QU buffer_json_member_add_uint64(wb, "duration_matches_ut", matches_setup_ut); buffer_json_member_add_uint64(wb, "fstat_query_calls", fs_calls); buffer_json_member_add_uint64(wb, "fstat_query_cached_responses", fs_cached); + + if(fqs->sampling) { + buffer_json_member_add_object(wb, "_sampling"); + { + buffer_json_member_add_uint64(wb, "sampled", fqs->samples_per_file.sampled); + buffer_json_member_add_uint64(wb, "unsampled", fqs->samples_per_file.unsampled); + buffer_json_member_add_uint64(wb, "estimated", fqs->samples_per_file.estimated); + } + buffer_json_object_close(wb); // _sampling + } } buffer_json_object_close(wb); // journal file @@ -1384,6 +1311,64 @@ static int netdata_systemd_journal_query(BUFFER *wb, FACETS *facets, FUNCTION_QU buffer_json_member_add_boolean(wb, "partial", partial); buffer_json_member_add_string(wb, "type", "table"); + // build a message for the query + if(!fqs->data_only) { + CLEAN_BUFFER *msg = buffer_create(0, NULL); + CLEAN_BUFFER *msg_description = buffer_create(0, NULL); + ND_LOG_FIELD_PRIORITY msg_priority = NDLP_INFO; + + if(!journal_files_completed_once()) { + buffer_strcat(msg, "Journals are still being scanned. "); + buffer_strcat(msg_description + , "LIBRARY SCAN: The journal files are still being scanned, you are probably viewing incomplete data. "); + msg_priority = NDLP_WARNING; + } + + if(partial) { + buffer_strcat(msg, "Query timed-out, incomplete data. "); + buffer_strcat(msg_description + , "QUERY TIMEOUT: The query timed out and may not include all the data of the selected window. "); + msg_priority = NDLP_WARNING; + } + + if(fqs->samples.estimated || fqs->samples.unsampled) { + double percent = (double) (fqs->samples.sampled * 100.0 / + (fqs->samples.estimated + fqs->samples.unsampled + fqs->samples.sampled)); + buffer_sprintf(msg, "%.2f%% real data", percent); + buffer_sprintf(msg_description, "ACTUAL DATA: The filters counters reflect %0.2f%% of the data. ", percent); + msg_priority = MIN(msg_priority, NDLP_NOTICE); + } + + if(fqs->samples.unsampled) { + double percent = (double) (fqs->samples.unsampled * 100.0 / + (fqs->samples.estimated + fqs->samples.unsampled + fqs->samples.sampled)); + buffer_sprintf(msg, ", %.2f%% unsampled", percent); + buffer_sprintf(msg_description + , "UNSAMPLED DATA: %0.2f%% of the events exist and have been counted, but their values have not been evaluated, so they are not included in the filters counters. " + , percent); + msg_priority = MIN(msg_priority, NDLP_NOTICE); + } + + if(fqs->samples.estimated) { + double percent = (double) (fqs->samples.estimated * 100.0 / + (fqs->samples.estimated + fqs->samples.unsampled + fqs->samples.sampled)); + buffer_sprintf(msg, ", %.2f%% estimated", percent); + buffer_sprintf(msg_description + , "ESTIMATED DATA: The query selected a large amount of data, so to avoid delaying too much, the presented data are estimated by %0.2f%%. " + , percent); + msg_priority = MIN(msg_priority, NDLP_NOTICE); + } + + buffer_json_member_add_object(wb, "message"); + if(buffer_tostring(msg)) { + buffer_json_member_add_string(wb, "title", buffer_tostring(msg)); + buffer_json_member_add_string(wb, "description", buffer_tostring(msg_description)); + buffer_json_member_add_string(wb, "status", nd_log_id2priority(msg_priority)); + } + // else send an empty object if there is nothing to tell + buffer_json_object_close(wb); // message + } + if(!fqs->data_only) { buffer_json_member_add_time_t(wb, "update_every", 1); buffer_json_member_add_string(wb, "help", SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION); @@ -1403,6 +1388,17 @@ static int netdata_systemd_journal_query(BUFFER *wb, FACETS *facets, FUNCTION_QU buffer_json_member_add_uint64(wb, "cached", fstat_thread_cached_responses); } buffer_json_object_close(wb); // _fstat_caching + + if(fqs->sampling) { + buffer_json_member_add_object(wb, "_sampling"); + { + buffer_json_member_add_uint64(wb, "sampled", fqs->samples.sampled); + buffer_json_member_add_uint64(wb, "unsampled", fqs->samples.unsampled); + buffer_json_member_add_uint64(wb, "estimated", fqs->samples.estimated); + } + buffer_json_object_close(wb); // _sampling + } + buffer_json_finalize(wb); return HTTP_RESP_OK; @@ -1471,6 +1467,10 @@ static void netdata_systemd_journal_function_help(const char *transaction) { " The number of items to return.\n" " The default is %d.\n" "\n" + " "JOURNAL_PARAMETER_SAMPLING":ITEMS\n" + " The number of log entries to sample to estimate facets counters and histogram.\n" + " The default is %d.\n" + "\n" " "JOURNAL_PARAMETER_ANCHOR":TIMESTAMP_IN_MICROSECONDS\n" " Return items relative to this timestamp.\n" " The exact items to be returned depend on the query `"JOURNAL_PARAMETER_DIRECTION"`.\n" @@ -1511,6 +1511,7 @@ static void netdata_systemd_journal_function_help(const char *transaction) { , JOURNAL_DEFAULT_SLICE_MODE ? "true" : "false" // slice , -SYSTEMD_JOURNAL_DEFAULT_QUERY_DURATION , SYSTEMD_JOURNAL_DEFAULT_ITEMS_PER_QUERY + , SYSTEMD_JOURNAL_DEFAULT_ITEMS_SAMPLING , JOURNAL_DEFAULT_DIRECTION == FACETS_ANCHOR_DIRECTION_BACKWARD ? "backward" : "forward" ); @@ -1521,572 +1522,6 @@ static void netdata_systemd_journal_function_help(const char *transaction) { buffer_free(wb); } -const char *errno_map[] = { - [1] = "1 (EPERM)", // "Operation not permitted", - [2] = "2 (ENOENT)", // "No such file or directory", - [3] = "3 (ESRCH)", // "No such process", - [4] = "4 (EINTR)", // "Interrupted system call", - [5] = "5 (EIO)", // "Input/output error", - [6] = "6 (ENXIO)", // "No such device or address", - [7] = "7 (E2BIG)", // "Argument list too long", - [8] = "8 (ENOEXEC)", // "Exec format error", - [9] = "9 (EBADF)", // "Bad file descriptor", - [10] = "10 (ECHILD)", // "No child processes", - [11] = "11 (EAGAIN)", // "Resource temporarily unavailable", - [12] = "12 (ENOMEM)", // "Cannot allocate memory", - [13] = "13 (EACCES)", // "Permission denied", - [14] = "14 (EFAULT)", // "Bad address", - [15] = "15 (ENOTBLK)", // "Block device required", - [16] = "16 (EBUSY)", // "Device or resource busy", - [17] = "17 (EEXIST)", // "File exists", - [18] = "18 (EXDEV)", // "Invalid cross-device link", - [19] = "19 (ENODEV)", // "No such device", - [20] = "20 (ENOTDIR)", // "Not a directory", - [21] = "21 (EISDIR)", // "Is a directory", - [22] = "22 (EINVAL)", // "Invalid argument", - [23] = "23 (ENFILE)", // "Too many open files in system", - [24] = "24 (EMFILE)", // "Too many open files", - [25] = "25 (ENOTTY)", // "Inappropriate ioctl for device", - [26] = "26 (ETXTBSY)", // "Text file busy", - [27] = "27 (EFBIG)", // "File too large", - [28] = "28 (ENOSPC)", // "No space left on device", - [29] = "29 (ESPIPE)", // "Illegal seek", - [30] = "30 (EROFS)", // "Read-only file system", - [31] = "31 (EMLINK)", // "Too many links", - [32] = "32 (EPIPE)", // "Broken pipe", - [33] = "33 (EDOM)", // "Numerical argument out of domain", - [34] = "34 (ERANGE)", // "Numerical result out of range", - [35] = "35 (EDEADLK)", // "Resource deadlock avoided", - [36] = "36 (ENAMETOOLONG)", // "File name too long", - [37] = "37 (ENOLCK)", // "No locks available", - [38] = "38 (ENOSYS)", // "Function not implemented", - [39] = "39 (ENOTEMPTY)", // "Directory not empty", - [40] = "40 (ELOOP)", // "Too many levels of symbolic links", - [42] = "42 (ENOMSG)", // "No message of desired type", - [43] = "43 (EIDRM)", // "Identifier removed", - [44] = "44 (ECHRNG)", // "Channel number out of range", - [45] = "45 (EL2NSYNC)", // "Level 2 not synchronized", - [46] = "46 (EL3HLT)", // "Level 3 halted", - [47] = "47 (EL3RST)", // "Level 3 reset", - [48] = "48 (ELNRNG)", // "Link number out of range", - [49] = "49 (EUNATCH)", // "Protocol driver not attached", - [50] = "50 (ENOCSI)", // "No CSI structure available", - [51] = "51 (EL2HLT)", // "Level 2 halted", - [52] = "52 (EBADE)", // "Invalid exchange", - [53] = "53 (EBADR)", // "Invalid request descriptor", - [54] = "54 (EXFULL)", // "Exchange full", - [55] = "55 (ENOANO)", // "No anode", - [56] = "56 (EBADRQC)", // "Invalid request code", - [57] = "57 (EBADSLT)", // "Invalid slot", - [59] = "59 (EBFONT)", // "Bad font file format", - [60] = "60 (ENOSTR)", // "Device not a stream", - [61] = "61 (ENODATA)", // "No data available", - [62] = "62 (ETIME)", // "Timer expired", - [63] = "63 (ENOSR)", // "Out of streams resources", - [64] = "64 (ENONET)", // "Machine is not on the network", - [65] = "65 (ENOPKG)", // "Package not installed", - [66] = "66 (EREMOTE)", // "Object is remote", - [67] = "67 (ENOLINK)", // "Link has been severed", - [68] = "68 (EADV)", // "Advertise error", - [69] = "69 (ESRMNT)", // "Srmount error", - [70] = "70 (ECOMM)", // "Communication error on send", - [71] = "71 (EPROTO)", // "Protocol error", - [72] = "72 (EMULTIHOP)", // "Multihop attempted", - [73] = "73 (EDOTDOT)", // "RFS specific error", - [74] = "74 (EBADMSG)", // "Bad message", - [75] = "75 (EOVERFLOW)", // "Value too large for defined data type", - [76] = "76 (ENOTUNIQ)", // "Name not unique on network", - [77] = "77 (EBADFD)", // "File descriptor in bad state", - [78] = "78 (EREMCHG)", // "Remote address changed", - [79] = "79 (ELIBACC)", // "Can not access a needed shared library", - [80] = "80 (ELIBBAD)", // "Accessing a corrupted shared library", - [81] = "81 (ELIBSCN)", // ".lib section in a.out corrupted", - [82] = "82 (ELIBMAX)", // "Attempting to link in too many shared libraries", - [83] = "83 (ELIBEXEC)", // "Cannot exec a shared library directly", - [84] = "84 (EILSEQ)", // "Invalid or incomplete multibyte or wide character", - [85] = "85 (ERESTART)", // "Interrupted system call should be restarted", - [86] = "86 (ESTRPIPE)", // "Streams pipe error", - [87] = "87 (EUSERS)", // "Too many users", - [88] = "88 (ENOTSOCK)", // "Socket operation on non-socket", - [89] = "89 (EDESTADDRREQ)", // "Destination address required", - [90] = "90 (EMSGSIZE)", // "Message too long", - [91] = "91 (EPROTOTYPE)", // "Protocol wrong type for socket", - [92] = "92 (ENOPROTOOPT)", // "Protocol not available", - [93] = "93 (EPROTONOSUPPORT)", // "Protocol not supported", - [94] = "94 (ESOCKTNOSUPPORT)", // "Socket type not supported", - [95] = "95 (ENOTSUP)", // "Operation not supported", - [96] = "96 (EPFNOSUPPORT)", // "Protocol family not supported", - [97] = "97 (EAFNOSUPPORT)", // "Address family not supported by protocol", - [98] = "98 (EADDRINUSE)", // "Address already in use", - [99] = "99 (EADDRNOTAVAIL)", // "Cannot assign requested address", - [100] = "100 (ENETDOWN)", // "Network is down", - [101] = "101 (ENETUNREACH)", // "Network is unreachable", - [102] = "102 (ENETRESET)", // "Network dropped connection on reset", - [103] = "103 (ECONNABORTED)", // "Software caused connection abort", - [104] = "104 (ECONNRESET)", // "Connection reset by peer", - [105] = "105 (ENOBUFS)", // "No buffer space available", - [106] = "106 (EISCONN)", // "Transport endpoint is already connected", - [107] = "107 (ENOTCONN)", // "Transport endpoint is not connected", - [108] = "108 (ESHUTDOWN)", // "Cannot send after transport endpoint shutdown", - [109] = "109 (ETOOMANYREFS)", // "Too many references: cannot splice", - [110] = "110 (ETIMEDOUT)", // "Connection timed out", - [111] = "111 (ECONNREFUSED)", // "Connection refused", - [112] = "112 (EHOSTDOWN)", // "Host is down", - [113] = "113 (EHOSTUNREACH)", // "No route to host", - [114] = "114 (EALREADY)", // "Operation already in progress", - [115] = "115 (EINPROGRESS)", // "Operation now in progress", - [116] = "116 (ESTALE)", // "Stale file handle", - [117] = "117 (EUCLEAN)", // "Structure needs cleaning", - [118] = "118 (ENOTNAM)", // "Not a XENIX named type file", - [119] = "119 (ENAVAIL)", // "No XENIX semaphores available", - [120] = "120 (EISNAM)", // "Is a named type file", - [121] = "121 (EREMOTEIO)", // "Remote I/O error", - [122] = "122 (EDQUOT)", // "Disk quota exceeded", - [123] = "123 (ENOMEDIUM)", // "No medium found", - [124] = "124 (EMEDIUMTYPE)", // "Wrong medium type", - [125] = "125 (ECANCELED)", // "Operation canceled", - [126] = "126 (ENOKEY)", // "Required key not available", - [127] = "127 (EKEYEXPIRED)", // "Key has expired", - [128] = "128 (EKEYREVOKED)", // "Key has been revoked", - [129] = "129 (EKEYREJECTED)", // "Key was rejected by service", - [130] = "130 (EOWNERDEAD)", // "Owner died", - [131] = "131 (ENOTRECOVERABLE)", // "State not recoverable", - [132] = "132 (ERFKILL)", // "Operation not possible due to RF-kill", - [133] = "133 (EHWPOISON)", // "Memory page has hardware error", -}; - -static const char *syslog_facility_to_name(int facility) { - switch (facility) { - case LOG_FAC(LOG_KERN): return "kern"; - case LOG_FAC(LOG_USER): return "user"; - case LOG_FAC(LOG_MAIL): return "mail"; - case LOG_FAC(LOG_DAEMON): return "daemon"; - case LOG_FAC(LOG_AUTH): return "auth"; - case LOG_FAC(LOG_SYSLOG): return "syslog"; - case LOG_FAC(LOG_LPR): return "lpr"; - case LOG_FAC(LOG_NEWS): return "news"; - case LOG_FAC(LOG_UUCP): return "uucp"; - case LOG_FAC(LOG_CRON): return "cron"; - case LOG_FAC(LOG_AUTHPRIV): return "authpriv"; - case LOG_FAC(LOG_FTP): return "ftp"; - case LOG_FAC(LOG_LOCAL0): return "local0"; - case LOG_FAC(LOG_LOCAL1): return "local1"; - case LOG_FAC(LOG_LOCAL2): return "local2"; - case LOG_FAC(LOG_LOCAL3): return "local3"; - case LOG_FAC(LOG_LOCAL4): return "local4"; - case LOG_FAC(LOG_LOCAL5): return "local5"; - case LOG_FAC(LOG_LOCAL6): return "local6"; - case LOG_FAC(LOG_LOCAL7): return "local7"; - default: return NULL; - } -} - -static const char *syslog_priority_to_name(int priority) { - switch (priority) { - case LOG_ALERT: return "alert"; - case LOG_CRIT: return "critical"; - case LOG_DEBUG: return "debug"; - case LOG_EMERG: return "panic"; - case LOG_ERR: return "error"; - case LOG_INFO: return "info"; - case LOG_NOTICE: return "notice"; - case LOG_WARNING: return "warning"; - default: return NULL; - } -} - -static FACET_ROW_SEVERITY syslog_priority_to_facet_severity(FACETS *facets __maybe_unused, FACET_ROW *row, void *data __maybe_unused) { - // same to - // https://github.com/systemd/systemd/blob/aab9e4b2b86905a15944a1ac81e471b5b7075932/src/basic/terminal-util.c#L1501 - // function get_log_colors() - - FACET_ROW_KEY_VALUE *priority_rkv = dictionary_get(row->dict, "PRIORITY"); - if(!priority_rkv || priority_rkv->empty) - return FACET_ROW_SEVERITY_NORMAL; - - int priority = str2i(buffer_tostring(priority_rkv->wb)); - - if(priority <= LOG_ERR) - return FACET_ROW_SEVERITY_CRITICAL; - - else if (priority <= LOG_WARNING) - return FACET_ROW_SEVERITY_WARNING; - - else if(priority <= LOG_NOTICE) - return FACET_ROW_SEVERITY_NOTICE; - - else if(priority >= LOG_DEBUG) - return FACET_ROW_SEVERITY_DEBUG; - - return FACET_ROW_SEVERITY_NORMAL; -} - -static char *uid_to_username(uid_t uid, char *buffer, size_t buffer_size) { - static __thread char tmp[1024 + 1]; - struct passwd pw, *result = NULL; - - if (getpwuid_r(uid, &pw, tmp, sizeof(tmp), &result) != 0 || !result || !pw.pw_name || !(*pw.pw_name)) - snprintfz(buffer, buffer_size - 1, "%u", uid); - else - snprintfz(buffer, buffer_size - 1, "%u (%s)", uid, pw.pw_name); - - return buffer; -} - -static char *gid_to_groupname(gid_t gid, char* buffer, size_t buffer_size) { - static __thread char tmp[1024]; - struct group grp, *result = NULL; - - if (getgrgid_r(gid, &grp, tmp, sizeof(tmp), &result) != 0 || !result || !grp.gr_name || !(*grp.gr_name)) - snprintfz(buffer, buffer_size - 1, "%u", gid); - else - snprintfz(buffer, buffer_size - 1, "%u (%s)", gid, grp.gr_name); - - return buffer; -} - -static void netdata_systemd_journal_transform_syslog_facility(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { - const char *v = buffer_tostring(wb); - if(*v && isdigit(*v)) { - int facility = str2i(buffer_tostring(wb)); - const char *name = syslog_facility_to_name(facility); - if (name) { - buffer_flush(wb); - buffer_strcat(wb, name); - } - } -} - -static void netdata_systemd_journal_transform_priority(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { - if(scope == FACETS_TRANSFORM_FACET_SORT) - return; - - const char *v = buffer_tostring(wb); - if(*v && isdigit(*v)) { - int priority = str2i(buffer_tostring(wb)); - const char *name = syslog_priority_to_name(priority); - if (name) { - buffer_flush(wb); - buffer_strcat(wb, name); - } - } -} - -static void netdata_systemd_journal_transform_errno(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { - if(scope == FACETS_TRANSFORM_FACET_SORT) - return; - - const char *v = buffer_tostring(wb); - if(*v && isdigit(*v)) { - unsigned err_no = str2u(buffer_tostring(wb)); - if(err_no > 0 && err_no < sizeof(errno_map) / sizeof(*errno_map)) { - const char *name = errno_map[err_no]; - if(name) { - buffer_flush(wb); - buffer_strcat(wb, name); - } - } - } -} - -// ---------------------------------------------------------------------------- -// UID and GID transformation - -#define UID_GID_HASHTABLE_SIZE 10000 - -struct word_t2str_hashtable_entry { - struct word_t2str_hashtable_entry *next; - Word_t hash; - size_t len; - char str[]; -}; - -struct word_t2str_hashtable { - SPINLOCK spinlock; - size_t size; - struct word_t2str_hashtable_entry *hashtable[UID_GID_HASHTABLE_SIZE]; -}; - -struct word_t2str_hashtable uid_hashtable = { - .size = UID_GID_HASHTABLE_SIZE, -}; - -struct word_t2str_hashtable gid_hashtable = { - .size = UID_GID_HASHTABLE_SIZE, -}; - -struct word_t2str_hashtable_entry **word_t2str_hashtable_slot(struct word_t2str_hashtable *ht, Word_t hash) { - size_t slot = hash % ht->size; - struct word_t2str_hashtable_entry **e = &ht->hashtable[slot]; - - while(*e && (*e)->hash != hash) - e = &((*e)->next); - - return e; -} - -const char *uid_to_username_cached(uid_t uid, size_t *length) { - spinlock_lock(&uid_hashtable.spinlock); - - struct word_t2str_hashtable_entry **e = word_t2str_hashtable_slot(&uid_hashtable, uid); - if(!(*e)) { - static __thread char buf[1024]; - const char *name = uid_to_username(uid, buf, sizeof(buf)); - size_t size = strlen(name) + 1; - - *e = callocz(1, sizeof(struct word_t2str_hashtable_entry) + size); - (*e)->len = size - 1; - (*e)->hash = uid; - memcpy((*e)->str, name, size); - } - - spinlock_unlock(&uid_hashtable.spinlock); - - *length = (*e)->len; - return (*e)->str; -} - -const char *gid_to_groupname_cached(gid_t gid, size_t *length) { - spinlock_lock(&gid_hashtable.spinlock); - - struct word_t2str_hashtable_entry **e = word_t2str_hashtable_slot(&gid_hashtable, gid); - if(!(*e)) { - static __thread char buf[1024]; - const char *name = gid_to_groupname(gid, buf, sizeof(buf)); - size_t size = strlen(name) + 1; - - *e = callocz(1, sizeof(struct word_t2str_hashtable_entry) + size); - (*e)->len = size - 1; - (*e)->hash = gid; - memcpy((*e)->str, name, size); - } - - spinlock_unlock(&gid_hashtable.spinlock); - - *length = (*e)->len; - return (*e)->str; -} - -DICTIONARY *boot_ids_to_first_ut = NULL; - -static void netdata_systemd_journal_transform_boot_id(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { - const char *boot_id = buffer_tostring(wb); - if(*boot_id && isxdigit(*boot_id)) { - usec_t ut = UINT64_MAX; - usec_t *p_ut = dictionary_get(boot_ids_to_first_ut, boot_id); - if(!p_ut) { - struct journal_file *jf; - dfe_start_read(journal_files_registry, jf) { - const char *files[2] = { - [0] = jf_dfe.name, - [1] = NULL, - }; - - sd_journal *j = NULL; - if(sd_journal_open_files(&j, files, ND_SD_JOURNAL_OPEN_FLAGS) < 0 || !j) - continue; - - char m[100]; - size_t len = snprintfz(m, sizeof(m), "_BOOT_ID=%s", boot_id); - usec_t t_ut = 0; - if(sd_journal_add_match(j, m, len) < 0 || - sd_journal_seek_head(j) < 0 || - sd_journal_next(j) < 0 || - sd_journal_get_realtime_usec(j, &t_ut) < 0 || !t_ut) { - sd_journal_close(j); - continue; - } - - if(t_ut < ut) - ut = t_ut; - - sd_journal_close(j); - } - dfe_done(jf); - - dictionary_set(boot_ids_to_first_ut, boot_id, &ut, sizeof(ut)); - } - else - ut = *p_ut; - - if(ut != UINT64_MAX) { - time_t timestamp_sec = (time_t)(ut / USEC_PER_SEC); - struct tm tm; - char buffer[30]; - - gmtime_r(×tamp_sec, &tm); - strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", &tm); - - switch(scope) { - default: - case FACETS_TRANSFORM_DATA: - case FACETS_TRANSFORM_VALUE: - buffer_sprintf(wb, " (%s UTC) ", buffer); - break; - - case FACETS_TRANSFORM_FACET: - case FACETS_TRANSFORM_FACET_SORT: - case FACETS_TRANSFORM_HISTOGRAM: - buffer_flush(wb); - buffer_sprintf(wb, "%s UTC", buffer); - break; - } - } - } -} - -static void netdata_systemd_journal_transform_uid(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { - if(scope == FACETS_TRANSFORM_FACET_SORT) - return; - - const char *v = buffer_tostring(wb); - if(*v && isdigit(*v)) { - uid_t uid = str2i(buffer_tostring(wb)); - size_t len; - const char *name = uid_to_username_cached(uid, &len); - buffer_contents_replace(wb, name, len); - } -} - -static void netdata_systemd_journal_transform_gid(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { - if(scope == FACETS_TRANSFORM_FACET_SORT) - return; - - const char *v = buffer_tostring(wb); - if(*v && isdigit(*v)) { - gid_t gid = str2i(buffer_tostring(wb)); - size_t len; - const char *name = gid_to_groupname_cached(gid, &len); - buffer_contents_replace(wb, name, len); - } -} - -const char *linux_capabilities[] = { - [CAP_CHOWN] = "CHOWN", - [CAP_DAC_OVERRIDE] = "DAC_OVERRIDE", - [CAP_DAC_READ_SEARCH] = "DAC_READ_SEARCH", - [CAP_FOWNER] = "FOWNER", - [CAP_FSETID] = "FSETID", - [CAP_KILL] = "KILL", - [CAP_SETGID] = "SETGID", - [CAP_SETUID] = "SETUID", - [CAP_SETPCAP] = "SETPCAP", - [CAP_LINUX_IMMUTABLE] = "LINUX_IMMUTABLE", - [CAP_NET_BIND_SERVICE] = "NET_BIND_SERVICE", - [CAP_NET_BROADCAST] = "NET_BROADCAST", - [CAP_NET_ADMIN] = "NET_ADMIN", - [CAP_NET_RAW] = "NET_RAW", - [CAP_IPC_LOCK] = "IPC_LOCK", - [CAP_IPC_OWNER] = "IPC_OWNER", - [CAP_SYS_MODULE] = "SYS_MODULE", - [CAP_SYS_RAWIO] = "SYS_RAWIO", - [CAP_SYS_CHROOT] = "SYS_CHROOT", - [CAP_SYS_PTRACE] = "SYS_PTRACE", - [CAP_SYS_PACCT] = "SYS_PACCT", - [CAP_SYS_ADMIN] = "SYS_ADMIN", - [CAP_SYS_BOOT] = "SYS_BOOT", - [CAP_SYS_NICE] = "SYS_NICE", - [CAP_SYS_RESOURCE] = "SYS_RESOURCE", - [CAP_SYS_TIME] = "SYS_TIME", - [CAP_SYS_TTY_CONFIG] = "SYS_TTY_CONFIG", - [CAP_MKNOD] = "MKNOD", - [CAP_LEASE] = "LEASE", - [CAP_AUDIT_WRITE] = "AUDIT_WRITE", - [CAP_AUDIT_CONTROL] = "AUDIT_CONTROL", - [CAP_SETFCAP] = "SETFCAP", - [CAP_MAC_OVERRIDE] = "MAC_OVERRIDE", - [CAP_MAC_ADMIN] = "MAC_ADMIN", - [CAP_SYSLOG] = "SYSLOG", - [CAP_WAKE_ALARM] = "WAKE_ALARM", - [CAP_BLOCK_SUSPEND] = "BLOCK_SUSPEND", - [37 /*CAP_AUDIT_READ*/] = "AUDIT_READ", - [38 /*CAP_PERFMON*/] = "PERFMON", - [39 /*CAP_BPF*/] = "BPF", - [40 /* CAP_CHECKPOINT_RESTORE */] = "CHECKPOINT_RESTORE", -}; - -static void netdata_systemd_journal_transform_cap_effective(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { - if(scope == FACETS_TRANSFORM_FACET_SORT) - return; - - const char *v = buffer_tostring(wb); - if(*v && isdigit(*v)) { - uint64_t cap = strtoul(buffer_tostring(wb), NULL, 16); - if(cap) { - buffer_fast_strcat(wb, " (", 2); - for (size_t i = 0, added = 0; i < sizeof(linux_capabilities) / sizeof(linux_capabilities[0]); i++) { - if (linux_capabilities[i] && (cap & (1ULL << i))) { - - if (added) - buffer_fast_strcat(wb, " | ", 3); - - buffer_strcat(wb, linux_capabilities[i]); - added++; - } - } - buffer_fast_strcat(wb, ")", 1); - } - } -} - -static void netdata_systemd_journal_transform_timestamp_usec(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) { - if(scope == FACETS_TRANSFORM_FACET_SORT) - return; - - const char *v = buffer_tostring(wb); - if(*v && isdigit(*v)) { - uint64_t ut = str2ull(buffer_tostring(wb), NULL); - if(ut) { - time_t timestamp_sec = ut / USEC_PER_SEC; - struct tm tm; - char buffer[30]; - - gmtime_r(×tamp_sec, &tm); - strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", &tm); - buffer_sprintf(wb, " (%s.%06llu UTC)", buffer, ut % USEC_PER_SEC); - } - } -} - -// ---------------------------------------------------------------------------- - -static void netdata_systemd_journal_dynamic_row_id(FACETS *facets __maybe_unused, BUFFER *json_array, FACET_ROW_KEY_VALUE *rkv, FACET_ROW *row, void *data __maybe_unused) { - FACET_ROW_KEY_VALUE *pid_rkv = dictionary_get(row->dict, "_PID"); - const char *pid = pid_rkv ? buffer_tostring(pid_rkv->wb) : FACET_VALUE_UNSET; - - const char *identifier = NULL; - FACET_ROW_KEY_VALUE *container_name_rkv = dictionary_get(row->dict, "CONTAINER_NAME"); - if(container_name_rkv && !container_name_rkv->empty) - identifier = buffer_tostring(container_name_rkv->wb); - - if(!identifier) { - FACET_ROW_KEY_VALUE *syslog_identifier_rkv = dictionary_get(row->dict, "SYSLOG_IDENTIFIER"); - if(syslog_identifier_rkv && !syslog_identifier_rkv->empty) - identifier = buffer_tostring(syslog_identifier_rkv->wb); - - if(!identifier) { - FACET_ROW_KEY_VALUE *comm_rkv = dictionary_get(row->dict, "_COMM"); - if(comm_rkv && !comm_rkv->empty) - identifier = buffer_tostring(comm_rkv->wb); - } - } - - buffer_flush(rkv->wb); - - if(!identifier) - buffer_strcat(rkv->wb, FACET_VALUE_UNSET); - else - buffer_sprintf(rkv->wb, "%s[%s]", identifier, pid); - - buffer_json_add_array_item_string(json_array, buffer_tostring(rkv->wb)); -} - -static void netdata_systemd_journal_rich_message(FACETS *facets __maybe_unused, BUFFER *json_array, FACET_ROW_KEY_VALUE *rkv, FACET_ROW *row __maybe_unused, void *data __maybe_unused) { - buffer_json_add_array_item_object(json_array); - buffer_json_member_add_string(json_array, "value", buffer_tostring(rkv->wb)); - buffer_json_object_close(json_array); -} - DICTIONARY *function_query_status_dict = NULL; static void function_systemd_journal_progress(BUFFER *wb, const char *transaction, const char *progress_id) { @@ -2129,7 +1564,7 @@ static void function_systemd_journal_progress(BUFFER *wb, const char *transactio buffer_json_member_add_uint64(wb, "running_duration_usec", duration_ut); buffer_json_member_add_double(wb, "progress", (double)file_working * 100.0 / (double)files_matched); char msg[1024 + 1]; - snprintfz(msg, 1024, + snprintfz(msg, sizeof(msg) - 1, "Read %zu rows (%0.0f rows/s), " "data %0.1f MB (%0.1f MB/s), " "file %zu of %zu", @@ -2147,10 +1582,9 @@ static void function_systemd_journal_progress(BUFFER *wb, const char *transactio dictionary_acquired_item_release(function_query_status_dict, item); } -static void function_systemd_journal(const char *transaction, char *function, int timeout, bool *cancelled) { +void function_systemd_journal(const char *transaction, char *function, int timeout, bool *cancelled) { fstat_thread_calls = 0; fstat_thread_cached_responses = 0; - journal_files_registry_update(); BUFFER *wb = buffer_create(0, NULL); buffer_flush(wb); @@ -2186,6 +1620,7 @@ static void function_systemd_journal(const char *transaction, char *function, in facets_accepted_param(facets, JOURNAL_PARAMETER_PROGRESS); facets_accepted_param(facets, JOURNAL_PARAMETER_DELTA); facets_accepted_param(facets, JOURNAL_PARAMETER_TAIL); + facets_accepted_param(facets, JOURNAL_PARAMETER_SAMPLING); #ifdef HAVE_SD_JOURNAL_RESTART_FIELDS facets_accepted_param(facets, JOURNAL_PARAMETER_SLICE); @@ -2196,10 +1631,10 @@ static void function_systemd_journal(const char *transaction, char *function, in facets_register_row_severity(facets, syslog_priority_to_facet_severity, NULL); facets_register_key_name(facets, "_HOSTNAME", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_VISIBLE | FACET_KEY_OPTION_FTS); + FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_VISIBLE); facets_register_dynamic_key_name(facets, JOURNAL_KEY_ND_JOURNAL_PROCESS, - FACET_KEY_OPTION_NEVER_FACET | FACET_KEY_OPTION_VISIBLE | FACET_KEY_OPTION_FTS, + FACET_KEY_OPTION_NEVER_FACET | FACET_KEY_OPTION_VISIBLE, netdata_systemd_journal_dynamic_row_id, NULL); facets_register_key_name(facets, "MESSAGE", @@ -2212,71 +1647,78 @@ static void function_systemd_journal(const char *transaction, char *function, in // netdata_systemd_journal_rich_message, NULL); facets_register_key_name_transformation(facets, "PRIORITY", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW | + FACET_KEY_OPTION_EXPANDED_FILTER, netdata_systemd_journal_transform_priority, NULL); facets_register_key_name_transformation(facets, "SYSLOG_FACILITY", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW | + FACET_KEY_OPTION_EXPANDED_FILTER, netdata_systemd_journal_transform_syslog_facility, NULL); facets_register_key_name_transformation(facets, "ERRNO", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW, netdata_systemd_journal_transform_errno, NULL); facets_register_key_name(facets, JOURNAL_KEY_ND_JOURNAL_FILE, FACET_KEY_OPTION_NEVER_FACET); facets_register_key_name(facets, "SYSLOG_IDENTIFIER", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_FTS); + FACET_KEY_OPTION_FACET); facets_register_key_name(facets, "UNIT", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_FTS); + FACET_KEY_OPTION_FACET); facets_register_key_name(facets, "USER_UNIT", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_FTS); + FACET_KEY_OPTION_FACET); + + facets_register_key_name_transformation(facets, "MESSAGE_ID", + FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW | + FACET_KEY_OPTION_EXPANDED_FILTER, + netdata_systemd_journal_transform_message_id, NULL); facets_register_key_name_transformation(facets, "_BOOT_ID", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW, netdata_systemd_journal_transform_boot_id, NULL); facets_register_key_name_transformation(facets, "_SYSTEMD_OWNER_UID", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW, netdata_systemd_journal_transform_uid, NULL); facets_register_key_name_transformation(facets, "_UID", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW, netdata_systemd_journal_transform_uid, NULL); facets_register_key_name_transformation(facets, "OBJECT_SYSTEMD_OWNER_UID", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW, netdata_systemd_journal_transform_uid, NULL); facets_register_key_name_transformation(facets, "OBJECT_UID", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW, netdata_systemd_journal_transform_uid, NULL); facets_register_key_name_transformation(facets, "_GID", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW, netdata_systemd_journal_transform_gid, NULL); facets_register_key_name_transformation(facets, "OBJECT_GID", - FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW, netdata_systemd_journal_transform_gid, NULL); facets_register_key_name_transformation(facets, "_CAP_EFFECTIVE", - FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_TRANSFORM_VIEW, netdata_systemd_journal_transform_cap_effective, NULL); facets_register_key_name_transformation(facets, "_AUDIT_LOGINUID", - FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_TRANSFORM_VIEW, netdata_systemd_journal_transform_uid, NULL); facets_register_key_name_transformation(facets, "OBJECT_AUDIT_LOGINUID", - FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_TRANSFORM_VIEW, netdata_systemd_journal_transform_uid, NULL); facets_register_key_name_transformation(facets, "_SOURCE_REALTIME_TIMESTAMP", - FACET_KEY_OPTION_FTS | FACET_KEY_OPTION_TRANSFORM_VIEW, + FACET_KEY_OPTION_TRANSFORM_VIEW, netdata_systemd_journal_transform_timestamp_usec, NULL); // ------------------------------------------------------------------------ @@ -2290,10 +1732,11 @@ static void function_systemd_journal(const char *transaction, char *function, in FACETS_ANCHOR_DIRECTION direction = JOURNAL_DEFAULT_DIRECTION; const char *query = NULL; const char *chart = NULL; - const char *source = NULL; + SIMPLE_PATTERN *sources = NULL; const char *progress_id = NULL; SD_JOURNAL_FILE_SOURCE_TYPE source_type = SDJF_ALL; size_t filters = 0; + size_t sampling = SYSTEMD_JOURNAL_DEFAULT_ITEMS_SAMPLING; buffer_json_member_add_object(wb, "_request"); @@ -2329,6 +1772,9 @@ static void function_systemd_journal(const char *transaction, char *function, in else tail = true; } + else if(strncmp(keyword, JOURNAL_PARAMETER_SAMPLING ":", sizeof(JOURNAL_PARAMETER_SAMPLING ":") - 1) == 0) { + sampling = str2ul(&keyword[sizeof(JOURNAL_PARAMETER_SAMPLING ":") - 1]); + } else if(strncmp(keyword, JOURNAL_PARAMETER_DATA_ONLY ":", sizeof(JOURNAL_PARAMETER_DATA_ONLY ":") - 1) == 0) { char *v = &keyword[sizeof(JOURNAL_PARAMETER_DATA_ONLY ":") - 1]; @@ -2352,40 +1798,67 @@ static void function_systemd_journal(const char *transaction, char *function, in progress_id = id; } else if(strncmp(keyword, JOURNAL_PARAMETER_SOURCE ":", sizeof(JOURNAL_PARAMETER_SOURCE ":") - 1) == 0) { - source = &keyword[sizeof(JOURNAL_PARAMETER_SOURCE ":") - 1]; + const char *value = &keyword[sizeof(JOURNAL_PARAMETER_SOURCE ":") - 1]; - if(strcmp(source, SDJF_SOURCE_ALL_NAME) == 0) { - source_type = SDJF_ALL; - source = NULL; - } - else if(strcmp(source, SDJF_SOURCE_LOCAL_NAME) == 0) { - source_type = SDJF_LOCAL; - source = NULL; - } - else if(strcmp(source, SDJF_SOURCE_REMOTES_NAME) == 0) { - source_type = SDJF_REMOTE; - source = NULL; - } - else if(strcmp(source, SDJF_SOURCE_NAMESPACES_NAME) == 0) { - source_type = SDJF_NAMESPACE; - source = NULL; - } - else if(strcmp(source, SDJF_SOURCE_LOCAL_SYSTEM_NAME) == 0) { - source_type = SDJF_LOCAL | SDJF_SYSTEM; - source = NULL; - } - else if(strcmp(source, SDJF_SOURCE_LOCAL_USERS_NAME) == 0) { - source_type = SDJF_LOCAL | SDJF_USER; - source = NULL; - } - else if(strcmp(source, SDJF_SOURCE_LOCAL_OTHER_NAME) == 0) { - source_type = SDJF_LOCAL | SDJF_OTHER; - source = NULL; + buffer_json_member_add_array(wb, JOURNAL_PARAMETER_SOURCE); + + BUFFER *sources_list = buffer_create(0, NULL); + + source_type = SDJF_NONE; + while(value) { + char *sep = strchr(value, ','); + if(sep) + *sep++ = '\0'; + + buffer_json_add_array_item_string(wb, value); + + if(strcmp(value, SDJF_SOURCE_ALL_NAME) == 0) { + source_type |= SDJF_ALL; + value = NULL; + } + else if(strcmp(value, SDJF_SOURCE_LOCAL_NAME) == 0) { + source_type |= SDJF_LOCAL_ALL; + value = NULL; + } + else if(strcmp(value, SDJF_SOURCE_REMOTES_NAME) == 0) { + source_type |= SDJF_REMOTE_ALL; + value = NULL; + } + else if(strcmp(value, SDJF_SOURCE_NAMESPACES_NAME) == 0) { + source_type |= SDJF_LOCAL_NAMESPACE; + value = NULL; + } + else if(strcmp(value, SDJF_SOURCE_LOCAL_SYSTEM_NAME) == 0) { + source_type |= SDJF_LOCAL_SYSTEM; + value = NULL; + } + else if(strcmp(value, SDJF_SOURCE_LOCAL_USERS_NAME) == 0) { + source_type |= SDJF_LOCAL_USER; + value = NULL; + } + else if(strcmp(value, SDJF_SOURCE_LOCAL_OTHER_NAME) == 0) { + source_type |= SDJF_LOCAL_OTHER; + value = NULL; + } + else { + // else, match the source, whatever it is + if(buffer_strlen(sources_list)) + buffer_strcat(sources_list, ","); + + buffer_strcat(sources_list, value); + } + + value = sep; } - else { - source_type = SDJF_ALL; - // else, match the source, whatever it is + + if(buffer_strlen(sources_list)) { + simple_pattern_free(sources); + sources = simple_pattern_create(buffer_tostring(sources_list), ",", SIMPLE_PATTERN_EXACT, false); } + + buffer_free(sources_list); + + buffer_json_array_close(wb); // source } else if(strncmp(keyword, JOURNAL_PARAMETER_AFTER ":", sizeof(JOURNAL_PARAMETER_AFTER ":") - 1) == 0) { after_s = str2l(&keyword[sizeof(JOURNAL_PARAMETER_AFTER ":") - 1]); @@ -2502,7 +1975,7 @@ static void function_systemd_journal(const char *transaction, char *function, in fqs->data_only = data_only; fqs->delta = (fqs->data_only) ? delta : false; fqs->tail = (fqs->data_only && fqs->if_modified_since) ? tail : false; - fqs->source = string_strdupz(source); + fqs->sources = sources; fqs->source_type = source_type; fqs->entries = last; fqs->last_modified = 0; @@ -2512,6 +1985,7 @@ static void function_systemd_journal(const char *transaction, char *function, in fqs->direction = direction; fqs->anchor.start_ut = anchor; fqs->anchor.stop_ut = 0; + fqs->sampling = sampling; if(fqs->anchor.start_ut && fqs->tail) { // a tail request @@ -2574,8 +2048,8 @@ static void function_systemd_journal(const char *transaction, char *function, in buffer_json_member_add_boolean(wb, JOURNAL_PARAMETER_PROGRESS, false); buffer_json_member_add_boolean(wb, JOURNAL_PARAMETER_DELTA, fqs->delta); buffer_json_member_add_boolean(wb, JOURNAL_PARAMETER_TAIL, fqs->tail); + buffer_json_member_add_uint64(wb, JOURNAL_PARAMETER_SAMPLING, fqs->sampling); buffer_json_member_add_string(wb, JOURNAL_PARAMETER_ID, progress_id); - buffer_json_member_add_string(wb, JOURNAL_PARAMETER_SOURCE, string2str(fqs->source)); buffer_json_member_add_uint64(wb, "source_type", fqs->source_type); buffer_json_member_add_uint64(wb, JOURNAL_PARAMETER_AFTER, fqs->after_ut / USEC_PER_SEC); buffer_json_member_add_uint64(wb, JOURNAL_PARAMETER_BEFORE, fqs->before_ut / USEC_PER_SEC); @@ -2603,7 +2077,7 @@ static void function_systemd_journal(const char *transaction, char *function, in buffer_json_member_add_string(wb, "id", "source"); buffer_json_member_add_string(wb, "name", "source"); buffer_json_member_add_string(wb, "help", "Select the SystemD Journal source to query"); - buffer_json_member_add_string(wb, "type", "select"); + buffer_json_member_add_string(wb, "type", "multiselect"); buffer_json_member_add_array(wb, "options"); { available_journal_file_sources_to_json_array(wb); @@ -2632,12 +2106,6 @@ static void function_systemd_journal(const char *transaction, char *function, in response = netdata_systemd_journal_query(wb, facets, fqs); // ------------------------------------------------------------------------ - // cleanup query params - - string_freez(fqs->source); - fqs->source = NULL; - - // ------------------------------------------------------------------------ // handle error response if(response != HTTP_RESP_OK) { @@ -2653,6 +2121,7 @@ output: netdata_mutex_unlock(&stdout_mutex); cleanup: + simple_pattern_free(sources); facets_destroy(facets); buffer_free(wb); @@ -2663,129 +2132,8 @@ cleanup: } } -// ---------------------------------------------------------------------------- - -int main(int argc __maybe_unused, char **argv __maybe_unused) { - stderror = stderr; - clocks_init(); - - program_name = "systemd-journal.plugin"; - - // disable syslog - error_log_syslog = 0; - - // set errors flood protection to 100 logs per hour - error_log_errors_per_period = 100; - error_log_throttle_period = 3600; - - log_set_global_severity_for_external_plugins(); - - netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); - if(verify_netdata_host_prefix() == -1) exit(1); - - // ------------------------------------------------------------------------ - // setup the journal directories - - unsigned d = 0; - - journal_directories[d++].path = strdupz("/var/log/journal"); - journal_directories[d++].path = strdupz("/run/log/journal"); - - if(*netdata_configured_host_prefix) { - char path[PATH_MAX]; - snprintfz(path, sizeof(path), "%s/var/log/journal", netdata_configured_host_prefix); - journal_directories[d++].path = strdupz(path); - snprintfz(path, sizeof(path), "%s/run/log/journal", netdata_configured_host_prefix); - journal_directories[d++].path = strdupz(path); - } - - // terminate the list - journal_directories[d].path = NULL; - - // ------------------------------------------------------------------------ - +void journal_init_query_status(void) { function_query_status_dict = dictionary_create_advanced( DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, NULL, sizeof(FUNCTION_QUERY_STATUS)); - - // ------------------------------------------------------------------------ - // initialize the used hashes files registry - - used_hashes_registry = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); - - - // ------------------------------------------------------------------------ - // initialize the journal files registry - - systemd_journal_session = (now_realtime_usec() / USEC_PER_SEC) * USEC_PER_SEC; - - journal_files_registry = dictionary_create_advanced( - DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, - NULL, sizeof(struct journal_file)); - - dictionary_register_insert_callback(journal_files_registry, files_registry_insert_cb, NULL); - dictionary_register_delete_callback(journal_files_registry, files_registry_delete_cb, NULL); - dictionary_register_conflict_callback(journal_files_registry, files_registry_conflict_cb, NULL); - - boot_ids_to_first_ut = dictionary_create_advanced( - DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, - NULL, sizeof(usec_t)); - - journal_files_registry_update(); - - - // ------------------------------------------------------------------------ - // debug - - if(argc == 2 && strcmp(argv[1], "debug") == 0) { - bool cancelled = false; - char buf[] = "systemd-journal after:-16000000 before:0 last:1"; - // char buf[] = "systemd-journal after:1695332964 before:1695937764 direction:backward last:100 slice:true source:all DHKucpqUoe1:PtVoyIuX.MU"; - // char buf[] = "systemd-journal after:1694511062 before:1694514662 anchor:1694514122024403"; - function_systemd_journal("123", buf, 600, &cancelled); - exit(1); - } - - // ------------------------------------------------------------------------ - // the event loop for functions - - struct functions_evloop_globals *wg = - functions_evloop_init(SYSTEMD_JOURNAL_WORKER_THREADS, "SDJ", &stdout_mutex, &plugin_should_exit); - - functions_evloop_add_function(wg, SYSTEMD_JOURNAL_FUNCTION_NAME, function_systemd_journal, - SYSTEMD_JOURNAL_DEFAULT_TIMEOUT); - - - // ------------------------------------------------------------------------ - - time_t started_t = now_monotonic_sec(); - - size_t iteration = 0; - usec_t step = 1000 * USEC_PER_MS; - bool tty = isatty(fileno(stderr)) == 1; - - netdata_mutex_lock(&stdout_mutex); - fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " GLOBAL \"%s\" %d \"%s\"\n", - SYSTEMD_JOURNAL_FUNCTION_NAME, SYSTEMD_JOURNAL_DEFAULT_TIMEOUT, SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION); - - heartbeat_t hb; - heartbeat_init(&hb); - while(!plugin_should_exit) { - iteration++; - - netdata_mutex_unlock(&stdout_mutex); - heartbeat_next(&hb, step); - netdata_mutex_lock(&stdout_mutex); - - if(!tty) - fprintf(stdout, "\n"); - - fflush(stdout); - - time_t now = now_monotonic_sec(); - if(now - started_t > 86400) - break; - } - - exit(0); } diff --git a/collectors/systemd-journal.plugin/systemd-main.c b/collectors/systemd-journal.plugin/systemd-main.c new file mode 100644 index 000000000..a3510b0ed --- /dev/null +++ b/collectors/systemd-journal.plugin/systemd-main.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "systemd-internals.h" +#include "libnetdata/required_dummies.h" + +#define SYSTEMD_JOURNAL_WORKER_THREADS 5 + +netdata_mutex_t stdout_mutex = NETDATA_MUTEX_INITIALIZER; +static bool plugin_should_exit = false; + +int main(int argc __maybe_unused, char **argv __maybe_unused) { + clocks_init(); + netdata_thread_set_tag("SDMAIN"); + nd_log_initialize_for_external_plugins("systemd-journal.plugin"); + + netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); + if(verify_netdata_host_prefix(true) == -1) exit(1); + + // ------------------------------------------------------------------------ + // initialization + + netdata_systemd_journal_message_ids_init(); + journal_init_query_status(); + journal_init_files_and_directories(); + + // ------------------------------------------------------------------------ + // debug + + if(argc == 2 && strcmp(argv[1], "debug") == 0) { + journal_files_registry_update(); + + bool cancelled = false; + char buf[] = "systemd-journal after:-8640000 before:0 direction:backward last:200 data_only:false slice:true source:all"; + // char buf[] = "systemd-journal after:1695332964 before:1695937764 direction:backward last:100 slice:true source:all DHKucpqUoe1:PtVoyIuX.MU"; + // char buf[] = "systemd-journal after:1694511062 before:1694514662 anchor:1694514122024403"; + function_systemd_journal("123", buf, 600, &cancelled); +// function_systemd_units("123", "systemd-units", 600, &cancelled); + exit(1); + } +#ifdef ENABLE_SYSTEMD_DBUS + if(argc == 2 && strcmp(argv[1], "debug-units") == 0) { + bool cancelled = false; + function_systemd_units("123", "systemd-units", 600, &cancelled); + exit(1); + } +#endif + + // ------------------------------------------------------------------------ + // watcher thread + + netdata_thread_t watcher_thread; + netdata_thread_create(&watcher_thread, "SDWATCH", + NETDATA_THREAD_OPTION_DONT_LOG, journal_watcher_main, NULL); + + // ------------------------------------------------------------------------ + // the event loop for functions + + struct functions_evloop_globals *wg = + functions_evloop_init(SYSTEMD_JOURNAL_WORKER_THREADS, "SDJ", &stdout_mutex, &plugin_should_exit); + + functions_evloop_add_function(wg, SYSTEMD_JOURNAL_FUNCTION_NAME, function_systemd_journal, + SYSTEMD_JOURNAL_DEFAULT_TIMEOUT); + +#ifdef ENABLE_SYSTEMD_DBUS + functions_evloop_add_function(wg, SYSTEMD_UNITS_FUNCTION_NAME, function_systemd_units, + SYSTEMD_UNITS_DEFAULT_TIMEOUT); +#endif + + // ------------------------------------------------------------------------ + // register functions to netdata + + netdata_mutex_lock(&stdout_mutex); + + fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " GLOBAL \"%s\" %d \"%s\"\n", + SYSTEMD_JOURNAL_FUNCTION_NAME, SYSTEMD_JOURNAL_DEFAULT_TIMEOUT, SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION); + +#ifdef ENABLE_SYSTEMD_DBUS + fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " GLOBAL \"%s\" %d \"%s\"\n", + SYSTEMD_UNITS_FUNCTION_NAME, SYSTEMD_UNITS_DEFAULT_TIMEOUT, SYSTEMD_UNITS_FUNCTION_DESCRIPTION); +#endif + + fflush(stdout); + netdata_mutex_unlock(&stdout_mutex); + + // ------------------------------------------------------------------------ + + usec_t step_ut = 100 * USEC_PER_MS; + usec_t send_newline_ut = 0; + usec_t since_last_scan_ut = SYSTEMD_JOURNAL_ALL_FILES_SCAN_EVERY_USEC * 2; // something big to trigger scanning at start + bool tty = isatty(fileno(stderr)) == 1; + + heartbeat_t hb; + heartbeat_init(&hb); + while(!plugin_should_exit) { + + if(since_last_scan_ut > SYSTEMD_JOURNAL_ALL_FILES_SCAN_EVERY_USEC) { + journal_files_registry_update(); + since_last_scan_ut = 0; + } + + usec_t dt_ut = heartbeat_next(&hb, step_ut); + since_last_scan_ut += dt_ut; + send_newline_ut += dt_ut; + + if(!tty && send_newline_ut > USEC_PER_SEC) { + send_newline_and_flush(); + send_newline_ut = 0; + } + } + + exit(0); +} diff --git a/collectors/systemd-journal.plugin/systemd-units.c b/collectors/systemd-journal.plugin/systemd-units.c new file mode 100644 index 000000000..dac158817 --- /dev/null +++ b/collectors/systemd-journal.plugin/systemd-units.c @@ -0,0 +1,1965 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "systemd-internals.h" + +#ifdef ENABLE_SYSTEMD_DBUS +#include <systemd/sd-bus.h> + +#define SYSTEMD_UNITS_MAX_PARAMS 10 +#define SYSTEMD_UNITS_DBUS_TYPES "(ssssssouso)" + +// ---------------------------------------------------------------------------- +// copied from systemd: string-table.h + +typedef char sd_char; +#define XCONCATENATE(x, y) x ## y +#define CONCATENATE(x, y) XCONCATENATE(x, y) + +#ifndef __COVERITY__ +# define VOID_0 ((void)0) +#else +# define VOID_0 ((void*)0) +#endif + +#define ELEMENTSOF(x) \ + (__builtin_choose_expr( \ + !__builtin_types_compatible_p(typeof(x), typeof(&*(x))), \ + sizeof(x)/sizeof((x)[0]), \ + VOID_0)) + +#define UNIQ_T(x, uniq) CONCATENATE(__unique_prefix_, CONCATENATE(x, uniq)) +#define UNIQ __COUNTER__ +#define __CMP(aq, a, bq, b) \ + ({ \ + const typeof(a) UNIQ_T(A, aq) = (a); \ + const typeof(b) UNIQ_T(B, bq) = (b); \ + UNIQ_T(A, aq) < UNIQ_T(B, bq) ? -1 : \ + UNIQ_T(A, aq) > UNIQ_T(B, bq) ? 1 : 0; \ + }) +#define CMP(a, b) __CMP(UNIQ, (a), UNIQ, (b)) + +static inline int strcmp_ptr(const sd_char *a, const sd_char *b) { + if (a && b) + return strcmp(a, b); + + return CMP(a, b); +} + +static inline bool streq_ptr(const sd_char *a, const sd_char *b) { + return strcmp_ptr(a, b) == 0; +} + +ssize_t string_table_lookup(const char * const *table, size_t len, const char *key) { + if (!key || !*key) + return -EINVAL; + + for (size_t i = 0; i < len; ++i) + if (streq_ptr(table[i], key)) + return (ssize_t) i; + + return -EINVAL; +} + +/* For basic lookup tables with strictly enumerated entries */ +#define _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \ + scope const char *name##_to_string(type i) { \ + if (i < 0 || i >= (type) ELEMENTSOF(name##_table)) \ + return NULL; \ + return name##_table[i]; \ + } + +#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope) \ + scope type name##_from_string(const char *s) { \ + return (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \ + } + +#define _DEFINE_STRING_TABLE_LOOKUP(name,type,scope) \ + _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \ + _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope) + +#define DEFINE_STRING_TABLE_LOOKUP(name,type) _DEFINE_STRING_TABLE_LOOKUP(name,type,) + +// ---------------------------------------------------------------------------- +// copied from systemd: unit-def.h + +typedef enum UnitType { + UNIT_SERVICE, + UNIT_MOUNT, + UNIT_SWAP, + UNIT_SOCKET, + UNIT_TARGET, + UNIT_DEVICE, + UNIT_AUTOMOUNT, + UNIT_TIMER, + UNIT_PATH, + UNIT_SLICE, + UNIT_SCOPE, + _UNIT_TYPE_MAX, + _UNIT_TYPE_INVALID = -EINVAL, +} UnitType; + +typedef enum UnitLoadState { + UNIT_STUB, + UNIT_LOADED, + UNIT_NOT_FOUND, /* error condition #1: unit file not found */ + UNIT_BAD_SETTING, /* error condition #2: we couldn't parse some essential unit file setting */ + UNIT_ERROR, /* error condition #3: other "system" error, catchall for the rest */ + UNIT_MERGED, + UNIT_MASKED, + _UNIT_LOAD_STATE_MAX, + _UNIT_LOAD_STATE_INVALID = -EINVAL, +} UnitLoadState; + +typedef enum UnitActiveState { + UNIT_ACTIVE, + UNIT_RELOADING, + UNIT_INACTIVE, + UNIT_FAILED, + UNIT_ACTIVATING, + UNIT_DEACTIVATING, + UNIT_MAINTENANCE, + _UNIT_ACTIVE_STATE_MAX, + _UNIT_ACTIVE_STATE_INVALID = -EINVAL, +} UnitActiveState; + +typedef enum AutomountState { + AUTOMOUNT_DEAD, + AUTOMOUNT_WAITING, + AUTOMOUNT_RUNNING, + AUTOMOUNT_FAILED, + _AUTOMOUNT_STATE_MAX, + _AUTOMOUNT_STATE_INVALID = -EINVAL, +} AutomountState; + +typedef enum DeviceState { + DEVICE_DEAD, + DEVICE_TENTATIVE, /* mounted or swapped, but not (yet) announced by udev */ + DEVICE_PLUGGED, /* announced by udev */ + _DEVICE_STATE_MAX, + _DEVICE_STATE_INVALID = -EINVAL, +} DeviceState; + +typedef enum MountState { + MOUNT_DEAD, + MOUNT_MOUNTING, /* /usr/bin/mount is running, but the mount is not done yet. */ + MOUNT_MOUNTING_DONE, /* /usr/bin/mount is running, and the mount is done. */ + MOUNT_MOUNTED, + MOUNT_REMOUNTING, + MOUNT_UNMOUNTING, + MOUNT_REMOUNTING_SIGTERM, + MOUNT_REMOUNTING_SIGKILL, + MOUNT_UNMOUNTING_SIGTERM, + MOUNT_UNMOUNTING_SIGKILL, + MOUNT_FAILED, + MOUNT_CLEANING, + _MOUNT_STATE_MAX, + _MOUNT_STATE_INVALID = -EINVAL, +} MountState; + +typedef enum PathState { + PATH_DEAD, + PATH_WAITING, + PATH_RUNNING, + PATH_FAILED, + _PATH_STATE_MAX, + _PATH_STATE_INVALID = -EINVAL, +} PathState; + +typedef enum ScopeState { + SCOPE_DEAD, + SCOPE_START_CHOWN, + SCOPE_RUNNING, + SCOPE_ABANDONED, + SCOPE_STOP_SIGTERM, + SCOPE_STOP_SIGKILL, + SCOPE_FAILED, + _SCOPE_STATE_MAX, + _SCOPE_STATE_INVALID = -EINVAL, +} ScopeState; + +typedef enum ServiceState { + SERVICE_DEAD, + SERVICE_CONDITION, + SERVICE_START_PRE, + SERVICE_START, + SERVICE_START_POST, + SERVICE_RUNNING, + SERVICE_EXITED, /* Nothing is running anymore, but RemainAfterExit is true hence this is OK */ + SERVICE_RELOAD, /* Reloading via ExecReload= */ + SERVICE_RELOAD_SIGNAL, /* Reloading via SIGHUP requested */ + SERVICE_RELOAD_NOTIFY, /* Waiting for READY=1 after RELOADING=1 notify */ + SERVICE_STOP, /* No STOP_PRE state, instead just register multiple STOP executables */ + SERVICE_STOP_WATCHDOG, + SERVICE_STOP_SIGTERM, + SERVICE_STOP_SIGKILL, + SERVICE_STOP_POST, + SERVICE_FINAL_WATCHDOG, /* In case the STOP_POST executable needs to be aborted. */ + SERVICE_FINAL_SIGTERM, /* In case the STOP_POST executable hangs, we shoot that down, too */ + SERVICE_FINAL_SIGKILL, + SERVICE_FAILED, + SERVICE_DEAD_BEFORE_AUTO_RESTART, + SERVICE_FAILED_BEFORE_AUTO_RESTART, + SERVICE_DEAD_RESOURCES_PINNED, /* Like SERVICE_DEAD, but with pinned resources */ + SERVICE_AUTO_RESTART, + SERVICE_AUTO_RESTART_QUEUED, + SERVICE_CLEANING, + _SERVICE_STATE_MAX, + _SERVICE_STATE_INVALID = -EINVAL, +} ServiceState; + +typedef enum SliceState { + SLICE_DEAD, + SLICE_ACTIVE, + _SLICE_STATE_MAX, + _SLICE_STATE_INVALID = -EINVAL, +} SliceState; + +typedef enum SocketState { + SOCKET_DEAD, + SOCKET_START_PRE, + SOCKET_START_CHOWN, + SOCKET_START_POST, + SOCKET_LISTENING, + SOCKET_RUNNING, + SOCKET_STOP_PRE, + SOCKET_STOP_PRE_SIGTERM, + SOCKET_STOP_PRE_SIGKILL, + SOCKET_STOP_POST, + SOCKET_FINAL_SIGTERM, + SOCKET_FINAL_SIGKILL, + SOCKET_FAILED, + SOCKET_CLEANING, + _SOCKET_STATE_MAX, + _SOCKET_STATE_INVALID = -EINVAL, +} SocketState; + +typedef enum SwapState { + SWAP_DEAD, + SWAP_ACTIVATING, /* /sbin/swapon is running, but the swap not yet enabled. */ + SWAP_ACTIVATING_DONE, /* /sbin/swapon is running, and the swap is done. */ + SWAP_ACTIVE, + SWAP_DEACTIVATING, + SWAP_DEACTIVATING_SIGTERM, + SWAP_DEACTIVATING_SIGKILL, + SWAP_FAILED, + SWAP_CLEANING, + _SWAP_STATE_MAX, + _SWAP_STATE_INVALID = -EINVAL, +} SwapState; + +typedef enum TargetState { + TARGET_DEAD, + TARGET_ACTIVE, + _TARGET_STATE_MAX, + _TARGET_STATE_INVALID = -EINVAL, +} TargetState; + +typedef enum TimerState { + TIMER_DEAD, + TIMER_WAITING, + TIMER_RUNNING, + TIMER_ELAPSED, + TIMER_FAILED, + _TIMER_STATE_MAX, + _TIMER_STATE_INVALID = -EINVAL, +} TimerState; + +typedef enum FreezerState { + FREEZER_RUNNING, + FREEZER_FREEZING, + FREEZER_FROZEN, + FREEZER_THAWING, + _FREEZER_STATE_MAX, + _FREEZER_STATE_INVALID = -EINVAL, +} FreezerState; + +// ---------------------------------------------------------------------------- +// copied from systemd: unit-def.c + +static const char* const unit_type_table[_UNIT_TYPE_MAX] = { + [UNIT_SERVICE] = "service", + [UNIT_SOCKET] = "socket", + [UNIT_TARGET] = "target", + [UNIT_DEVICE] = "device", + [UNIT_MOUNT] = "mount", + [UNIT_AUTOMOUNT] = "automount", + [UNIT_SWAP] = "swap", + [UNIT_TIMER] = "timer", + [UNIT_PATH] = "path", + [UNIT_SLICE] = "slice", + [UNIT_SCOPE] = "scope", +}; + +DEFINE_STRING_TABLE_LOOKUP(unit_type, UnitType); + +static const char* const unit_load_state_table[_UNIT_LOAD_STATE_MAX] = { + [UNIT_STUB] = "stub", + [UNIT_LOADED] = "loaded", + [UNIT_NOT_FOUND] = "not-found", + [UNIT_BAD_SETTING] = "bad-setting", + [UNIT_ERROR] = "error", + [UNIT_MERGED] = "merged", + [UNIT_MASKED] = "masked" +}; + +DEFINE_STRING_TABLE_LOOKUP(unit_load_state, UnitLoadState); + +static const char* const unit_active_state_table[_UNIT_ACTIVE_STATE_MAX] = { + [UNIT_ACTIVE] = "active", + [UNIT_RELOADING] = "reloading", + [UNIT_INACTIVE] = "inactive", + [UNIT_FAILED] = "failed", + [UNIT_ACTIVATING] = "activating", + [UNIT_DEACTIVATING] = "deactivating", + [UNIT_MAINTENANCE] = "maintenance", +}; + +DEFINE_STRING_TABLE_LOOKUP(unit_active_state, UnitActiveState); + +static const char* const automount_state_table[_AUTOMOUNT_STATE_MAX] = { + [AUTOMOUNT_DEAD] = "dead", + [AUTOMOUNT_WAITING] = "waiting", + [AUTOMOUNT_RUNNING] = "running", + [AUTOMOUNT_FAILED] = "failed" +}; + +DEFINE_STRING_TABLE_LOOKUP(automount_state, AutomountState); + +static const char* const device_state_table[_DEVICE_STATE_MAX] = { + [DEVICE_DEAD] = "dead", + [DEVICE_TENTATIVE] = "tentative", + [DEVICE_PLUGGED] = "plugged", +}; + +DEFINE_STRING_TABLE_LOOKUP(device_state, DeviceState); + +static const char* const mount_state_table[_MOUNT_STATE_MAX] = { + [MOUNT_DEAD] = "dead", + [MOUNT_MOUNTING] = "mounting", + [MOUNT_MOUNTING_DONE] = "mounting-done", + [MOUNT_MOUNTED] = "mounted", + [MOUNT_REMOUNTING] = "remounting", + [MOUNT_UNMOUNTING] = "unmounting", + [MOUNT_REMOUNTING_SIGTERM] = "remounting-sigterm", + [MOUNT_REMOUNTING_SIGKILL] = "remounting-sigkill", + [MOUNT_UNMOUNTING_SIGTERM] = "unmounting-sigterm", + [MOUNT_UNMOUNTING_SIGKILL] = "unmounting-sigkill", + [MOUNT_FAILED] = "failed", + [MOUNT_CLEANING] = "cleaning", +}; + +DEFINE_STRING_TABLE_LOOKUP(mount_state, MountState); + +static const char* const path_state_table[_PATH_STATE_MAX] = { + [PATH_DEAD] = "dead", + [PATH_WAITING] = "waiting", + [PATH_RUNNING] = "running", + [PATH_FAILED] = "failed" +}; + +DEFINE_STRING_TABLE_LOOKUP(path_state, PathState); + +static const char* const scope_state_table[_SCOPE_STATE_MAX] = { + [SCOPE_DEAD] = "dead", + [SCOPE_START_CHOWN] = "start-chown", + [SCOPE_RUNNING] = "running", + [SCOPE_ABANDONED] = "abandoned", + [SCOPE_STOP_SIGTERM] = "stop-sigterm", + [SCOPE_STOP_SIGKILL] = "stop-sigkill", + [SCOPE_FAILED] = "failed", +}; + +DEFINE_STRING_TABLE_LOOKUP(scope_state, ScopeState); + +static const char* const service_state_table[_SERVICE_STATE_MAX] = { + [SERVICE_DEAD] = "dead", + [SERVICE_CONDITION] = "condition", + [SERVICE_START_PRE] = "start-pre", + [SERVICE_START] = "start", + [SERVICE_START_POST] = "start-post", + [SERVICE_RUNNING] = "running", + [SERVICE_EXITED] = "exited", + [SERVICE_RELOAD] = "reload", + [SERVICE_RELOAD_SIGNAL] = "reload-signal", + [SERVICE_RELOAD_NOTIFY] = "reload-notify", + [SERVICE_STOP] = "stop", + [SERVICE_STOP_WATCHDOG] = "stop-watchdog", + [SERVICE_STOP_SIGTERM] = "stop-sigterm", + [SERVICE_STOP_SIGKILL] = "stop-sigkill", + [SERVICE_STOP_POST] = "stop-post", + [SERVICE_FINAL_WATCHDOG] = "final-watchdog", + [SERVICE_FINAL_SIGTERM] = "final-sigterm", + [SERVICE_FINAL_SIGKILL] = "final-sigkill", + [SERVICE_FAILED] = "failed", + [SERVICE_DEAD_BEFORE_AUTO_RESTART] = "dead-before-auto-restart", + [SERVICE_FAILED_BEFORE_AUTO_RESTART] = "failed-before-auto-restart", + [SERVICE_DEAD_RESOURCES_PINNED] = "dead-resources-pinned", + [SERVICE_AUTO_RESTART] = "auto-restart", + [SERVICE_AUTO_RESTART_QUEUED] = "auto-restart-queued", + [SERVICE_CLEANING] = "cleaning", +}; + +DEFINE_STRING_TABLE_LOOKUP(service_state, ServiceState); + +static const char* const slice_state_table[_SLICE_STATE_MAX] = { + [SLICE_DEAD] = "dead", + [SLICE_ACTIVE] = "active" +}; + +DEFINE_STRING_TABLE_LOOKUP(slice_state, SliceState); + +static const char* const socket_state_table[_SOCKET_STATE_MAX] = { + [SOCKET_DEAD] = "dead", + [SOCKET_START_PRE] = "start-pre", + [SOCKET_START_CHOWN] = "start-chown", + [SOCKET_START_POST] = "start-post", + [SOCKET_LISTENING] = "listening", + [SOCKET_RUNNING] = "running", + [SOCKET_STOP_PRE] = "stop-pre", + [SOCKET_STOP_PRE_SIGTERM] = "stop-pre-sigterm", + [SOCKET_STOP_PRE_SIGKILL] = "stop-pre-sigkill", + [SOCKET_STOP_POST] = "stop-post", + [SOCKET_FINAL_SIGTERM] = "final-sigterm", + [SOCKET_FINAL_SIGKILL] = "final-sigkill", + [SOCKET_FAILED] = "failed", + [SOCKET_CLEANING] = "cleaning", +}; + +DEFINE_STRING_TABLE_LOOKUP(socket_state, SocketState); + +static const char* const swap_state_table[_SWAP_STATE_MAX] = { + [SWAP_DEAD] = "dead", + [SWAP_ACTIVATING] = "activating", + [SWAP_ACTIVATING_DONE] = "activating-done", + [SWAP_ACTIVE] = "active", + [SWAP_DEACTIVATING] = "deactivating", + [SWAP_DEACTIVATING_SIGTERM] = "deactivating-sigterm", + [SWAP_DEACTIVATING_SIGKILL] = "deactivating-sigkill", + [SWAP_FAILED] = "failed", + [SWAP_CLEANING] = "cleaning", +}; + +DEFINE_STRING_TABLE_LOOKUP(swap_state, SwapState); + +static const char* const target_state_table[_TARGET_STATE_MAX] = { + [TARGET_DEAD] = "dead", + [TARGET_ACTIVE] = "active" +}; + +DEFINE_STRING_TABLE_LOOKUP(target_state, TargetState); + +static const char* const timer_state_table[_TIMER_STATE_MAX] = { + [TIMER_DEAD] = "dead", + [TIMER_WAITING] = "waiting", + [TIMER_RUNNING] = "running", + [TIMER_ELAPSED] = "elapsed", + [TIMER_FAILED] = "failed" +}; + +DEFINE_STRING_TABLE_LOOKUP(timer_state, TimerState); + +static const char* const freezer_state_table[_FREEZER_STATE_MAX] = { + [FREEZER_RUNNING] = "running", + [FREEZER_FREEZING] = "freezing", + [FREEZER_FROZEN] = "frozen", + [FREEZER_THAWING] = "thawing", +}; + +DEFINE_STRING_TABLE_LOOKUP(freezer_state, FreezerState); + +// ---------------------------------------------------------------------------- +// our code + +typedef struct UnitAttribute { + union { + int boolean; + char *str; + uint64_t uint64; + int64_t int64; + uint32_t uint32; + int32_t int32; + double dbl; + }; +} UnitAttribute; + +struct UnitInfo; +typedef void (*attribute_handler_t)(struct UnitInfo *u, UnitAttribute *ua); + +static void update_freezer_state(struct UnitInfo *u, UnitAttribute *ua); + +struct { + const char *member; + char value_type; + + const char *show_as; + const char *info; + RRDF_FIELD_OPTIONS options; + RRDF_FIELD_FILTER filter; + + attribute_handler_t handler; +} unit_attributes[] = { + { + .member = "Type", + .value_type = SD_BUS_TYPE_STRING, + .show_as = "ServiceType", + .info = "Service Type", + .options = RRDF_FIELD_OPTS_VISIBLE, + .filter = RRDF_FIELD_FILTER_MULTISELECT, + }, { + .member = "Result", + .value_type = SD_BUS_TYPE_STRING, + .show_as = "Result", + .info = "Result", + .options = RRDF_FIELD_OPTS_VISIBLE, + .filter = RRDF_FIELD_FILTER_MULTISELECT, + }, { + .member = "UnitFileState", + .value_type = SD_BUS_TYPE_STRING, + .show_as = "Enabled", + .info = "Unit File State", + .options = RRDF_FIELD_OPTS_NONE, + .filter = RRDF_FIELD_FILTER_MULTISELECT, + }, { + .member = "UnitFilePreset", + .value_type = SD_BUS_TYPE_STRING, + .show_as = "Preset", + .info = "Unit File Preset", + .options = RRDF_FIELD_OPTS_NONE, + .filter = RRDF_FIELD_FILTER_MULTISELECT, + }, { + .member = "FreezerState", + .value_type = SD_BUS_TYPE_STRING, + .show_as = "FreezerState", + .info = "Freezer State", + .options = RRDF_FIELD_OPTS_NONE, + .filter = RRDF_FIELD_FILTER_MULTISELECT, + .handler = update_freezer_state, + }, +// { .member = "Id", .signature = "s", }, +// { .member = "LoadState", .signature = "s", }, +// { .member = "ActiveState", .signature = "s", }, +// { .member = "SubState", .signature = "s", }, +// { .member = "Description", .signature = "s", }, +// { .member = "Following", .signature = "s", }, +// { .member = "Documentation", .signature = "as", }, +// { .member = "FragmentPath", .signature = "s", }, +// { .member = "SourcePath", .signature = "s", }, +// { .member = "ControlGroup", .signature = "s", }, +// { .member = "DropInPaths", .signature = "as", }, +// { .member = "LoadError", .signature = "(ss)", }, +// { .member = "TriggeredBy", .signature = "as", }, +// { .member = "Triggers", .signature = "as", }, +// { .member = "InactiveExitTimestamp", .signature = "t", }, +// { .member = "InactiveExitTimestampMonotonic", .signature = "t", }, +// { .member = "ActiveEnterTimestamp", .signature = "t", }, +// { .member = "ActiveExitTimestamp", .signature = "t", }, +// { .member = "RuntimeMaxUSec", .signature = "t", }, +// { .member = "InactiveEnterTimestamp", .signature = "t", }, +// { .member = "NeedDaemonReload", .signature = "b", }, +// { .member = "Transient", .signature = "b", }, +// { .member = "ExecMainPID", .signature = "u", }, +// { .member = "MainPID", .signature = "u", }, +// { .member = "ControlPID", .signature = "u", }, +// { .member = "StatusText", .signature = "s", }, +// { .member = "PIDFile", .signature = "s", }, +// { .member = "StatusErrno", .signature = "i", }, +// { .member = "FileDescriptorStoreMax", .signature = "u", }, +// { .member = "NFileDescriptorStore", .signature = "u", }, +// { .member = "ExecMainStartTimestamp", .signature = "t", }, +// { .member = "ExecMainExitTimestamp", .signature = "t", }, +// { .member = "ExecMainCode", .signature = "i", }, +// { .member = "ExecMainStatus", .signature = "i", }, +// { .member = "LogNamespace", .signature = "s", }, +// { .member = "ConditionTimestamp", .signature = "t", }, +// { .member = "ConditionResult", .signature = "b", }, +// { .member = "Conditions", .signature = "a(sbbsi)", }, +// { .member = "AssertTimestamp", .signature = "t", }, +// { .member = "AssertResult", .signature = "b", }, +// { .member = "Asserts", .signature = "a(sbbsi)", }, +// { .member = "NextElapseUSecRealtime", .signature = "t", }, +// { .member = "NextElapseUSecMonotonic", .signature = "t", }, +// { .member = "NAccepted", .signature = "u", }, +// { .member = "NConnections", .signature = "u", }, +// { .member = "NRefused", .signature = "u", }, +// { .member = "Accept", .signature = "b", }, +// { .member = "Listen", .signature = "a(ss)", }, +// { .member = "SysFSPath", .signature = "s", }, +// { .member = "Where", .signature = "s", }, +// { .member = "What", .signature = "s", }, +// { .member = "MemoryCurrent", .signature = "t", }, +// { .member = "MemoryAvailable", .signature = "t", }, +// { .member = "DefaultMemoryMin", .signature = "t", }, +// { .member = "DefaultMemoryLow", .signature = "t", }, +// { .member = "DefaultStartupMemoryLow", .signature = "t", }, +// { .member = "MemoryMin", .signature = "t", }, +// { .member = "MemoryLow", .signature = "t", }, +// { .member = "StartupMemoryLow", .signature = "t", }, +// { .member = "MemoryHigh", .signature = "t", }, +// { .member = "StartupMemoryHigh", .signature = "t", }, +// { .member = "MemoryMax", .signature = "t", }, +// { .member = "StartupMemoryMax", .signature = "t", }, +// { .member = "MemorySwapMax", .signature = "t", }, +// { .member = "StartupMemorySwapMax", .signature = "t", }, +// { .member = "MemoryZSwapMax", .signature = "t", }, +// { .member = "StartupMemoryZSwapMax", .signature = "t", }, +// { .member = "MemoryLimit", .signature = "t", }, +// { .member = "CPUUsageNSec", .signature = "t", }, +// { .member = "TasksCurrent", .signature = "t", }, +// { .member = "TasksMax", .signature = "t", }, +// { .member = "IPIngressBytes", .signature = "t", }, +// { .member = "IPEgressBytes", .signature = "t", }, +// { .member = "IOReadBytes", .signature = "t", }, +// { .member = "IOWriteBytes", .signature = "t", }, +// { .member = "ExecCondition", .signature = "a(sasbttttuii)", }, +// { .member = "ExecConditionEx", .signature = "a(sasasttttuii)", }, +// { .member = "ExecStartPre", .signature = "a(sasbttttuii)", }, +// { .member = "ExecStartPreEx", .signature = "a(sasasttttuii)", }, +// { .member = "ExecStart", .signature = "a(sasbttttuii)", }, +// { .member = "ExecStartEx", .signature = "a(sasasttttuii)", }, +// { .member = "ExecStartPost", .signature = "a(sasbttttuii)", }, +// { .member = "ExecStartPostEx", .signature = "a(sasasttttuii)", }, +// { .member = "ExecReload", .signature = "a(sasbttttuii)", }, +// { .member = "ExecReloadEx", .signature = "a(sasasttttuii)", }, +// { .member = "ExecStopPre", .signature = "a(sasbttttuii)", }, +// { .member = "ExecStop", .signature = "a(sasbttttuii)", }, +// { .member = "ExecStopEx", .signature = "a(sasasttttuii)", }, +// { .member = "ExecStopPost", .signature = "a(sasbttttuii)", }, +// { .member = "ExecStopPostEx", .signature = "a(sasasttttuii)", }, +}; + +#define _UNIT_ATTRIBUTE_MAX (sizeof(unit_attributes) / sizeof(unit_attributes[0])) + +typedef struct UnitInfo { + char *id; + char *type; + char *description; + char *load_state; + char *active_state; + char *sub_state; + char *following; + char *unit_path; + uint32_t job_id; + char *job_type; + char *job_path; + + UnitType UnitType; + UnitLoadState UnitLoadState; + UnitActiveState UnitActiveState; + FreezerState FreezerState; + + union { + AutomountState AutomountState; + DeviceState DeviceState; + MountState MountState; + PathState PathState; + ScopeState ScopeState; + ServiceState ServiceState; + SliceState SliceState; + SocketState SocketState; + SwapState SwapState; + TargetState TargetState; + TimerState TimerState; + }; + + struct UnitAttribute attributes[_UNIT_ATTRIBUTE_MAX]; + + FACET_ROW_SEVERITY severity; + uint32_t prio; + + struct UnitInfo *prev, *next; +} UnitInfo; + +static void update_freezer_state(UnitInfo *u, UnitAttribute *ua) { + u->FreezerState = freezer_state_from_string(ua->str); +} + +// ---------------------------------------------------------------------------- +// common helpers + +static void log_dbus_error(int r, const char *msg) { + netdata_log_error("SYSTEMD_UNITS: %s failed with error %d (%s)", msg, r, strerror(-r)); +} + +// ---------------------------------------------------------------------------- +// attributes management + +static inline ssize_t unit_property_slot_from_string(const char *s) { + if(!s || !*s) + return -EINVAL; + + for(size_t i = 0; i < _UNIT_ATTRIBUTE_MAX ;i++) + if(streq_ptr(unit_attributes[i].member, s)) + return (ssize_t)i; + + return -EINVAL; +} + +static inline const char *unit_property_name_to_string_from_slot(ssize_t i) { + if(i >= 0 && i < (ssize_t)_UNIT_ATTRIBUTE_MAX) + return unit_attributes[i].member; + + return NULL; +} + +static inline void systemd_unit_free_property(char type, struct UnitAttribute *at) { + switch(type) { + case SD_BUS_TYPE_STRING: + case SD_BUS_TYPE_OBJECT_PATH: + freez(at->str); + at->str = NULL; + break; + + default: + break; + } +} + +static int systemd_unit_get_property(sd_bus_message *m, UnitInfo *u, const char *name) { + int r; + char type; + + r = sd_bus_message_peek_type(m, &type, NULL); + if(r < 0) { + log_dbus_error(r, "sd_bus_message_peek_type()"); + return r; + } + + ssize_t slot = unit_property_slot_from_string(name); + if(slot < 0) { + // internal_error(true, "unused attribute '%s' for unit '%s'", name, u->id); + sd_bus_message_skip(m, NULL); + return 0; + } + + systemd_unit_free_property(unit_attributes[slot].value_type, &u->attributes[slot]); + + if(unit_attributes[slot].value_type != type) { + netdata_log_error("Type of field '%s' expected to be '%c' but found '%c'. Ignoring field.", + unit_attributes[slot].member, unit_attributes[slot].value_type, type); + sd_bus_message_skip(m, NULL); + return 0; + } + + switch (type) { + case SD_BUS_TYPE_OBJECT_PATH: + case SD_BUS_TYPE_STRING: { + char *s; + + r = sd_bus_message_read_basic(m, type, &s); + if(r < 0) { + log_dbus_error(r, "sd_bus_message_read_basic()"); + return r; + } + + if(s && *s) + u->attributes[slot].str = strdupz(s); + } + break; + + case SD_BUS_TYPE_BOOLEAN: { + r = sd_bus_message_read_basic(m, type, &u->attributes[slot].boolean); + if(r < 0) { + log_dbus_error(r, "sd_bus_message_read_basic()"); + return r; + } + } + break; + + case SD_BUS_TYPE_UINT64: { + r = sd_bus_message_read_basic(m, type, &u->attributes[slot].uint64); + if(r < 0) { + log_dbus_error(r, "sd_bus_message_read_basic()"); + return r; + } + } + break; + + case SD_BUS_TYPE_INT64: { + r = sd_bus_message_read_basic(m, type, &u->attributes[slot].int64); + if(r < 0) { + log_dbus_error(r, "sd_bus_message_read_basic()"); + return r; + } + } + break; + + case SD_BUS_TYPE_UINT32: { + r = sd_bus_message_read_basic(m, type, &u->attributes[slot].uint32); + if(r < 0) { + log_dbus_error(r, "sd_bus_message_read_basic()"); + return r; + } + } + break; + + case SD_BUS_TYPE_INT32: { + r = sd_bus_message_read_basic(m, type, &u->attributes[slot].int32); + if(r < 0) { + log_dbus_error(r, "sd_bus_message_read_basic()"); + return r; + } + } + break; + + case SD_BUS_TYPE_DOUBLE: { + r = sd_bus_message_read_basic(m, type, &u->attributes[slot].dbl); + if(r < 0) { + log_dbus_error(r, "sd_bus_message_read_basic()"); + return r; + } + } + break; + + case SD_BUS_TYPE_ARRAY: { + internal_error(true, "member '%s' is an array", name); + sd_bus_message_skip(m, NULL); + return 0; + } + break; + + default: { + internal_error(true, "unknown field type '%c' for key '%s'", type, name); + sd_bus_message_skip(m, NULL); + return 0; + } + break; + } + + if(unit_attributes[slot].handler) + unit_attributes[slot].handler(u, &u->attributes[slot]); + + return 0; +} + +static int systemd_unit_get_all_properties(sd_bus *bus, UnitInfo *u) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + int r; + + r = sd_bus_call_method(bus, + "org.freedesktop.systemd1", + u->unit_path, + "org.freedesktop.DBus.Properties", + "GetAll", + &error, + &m, + "s", ""); + if (r < 0) { + log_dbus_error(r, "sd_bus_call_method(p1)"); + return r; + } + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}"); + if (r < 0) { + log_dbus_error(r, "sd_bus_message_enter_container(p2)"); + return r; + } + + int c = 0; + while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) { + const char *member, *contents; + c++; + + r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &member); + if (r < 0) { + log_dbus_error(r, "sd_bus_message_read_basic(p3)"); + return r; + } + + r = sd_bus_message_peek_type(m, NULL, &contents); + if (r < 0) { + log_dbus_error(r, "sd_bus_message_peek_type(p4)"); + return r; + } + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents); + if (r < 0) { + log_dbus_error(r, "sd_bus_message_enter_container(p5)"); + return r; + } + + systemd_unit_get_property(m, u, member); + + r = sd_bus_message_exit_container(m); + if(r < 0) { + log_dbus_error(r, "sd_bus_message_exit_container(p6)"); + return r; + } + + r = sd_bus_message_exit_container(m); + if(r < 0) { + log_dbus_error(r, "sd_bus_message_exit_container(p7)"); + return r; + } + } + if(r < 0) { + log_dbus_error(r, "sd_bus_message_enter_container(p8)"); + return r; + } + + r = sd_bus_message_exit_container(m); + if(r < 0) { + log_dbus_error(r, "sd_bus_message_exit_container(p9)"); + return r; + } + + return 0; +} + +static void systemd_units_get_all_properties(sd_bus *bus, UnitInfo *base) { + for(UnitInfo *u = base ; u ;u = u->next) + systemd_unit_get_all_properties(bus, u); +} + + + +// ---------------------------------------------------------------------------- +// main unit info + +int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u) { + assert(message); + assert(u); + + u->type = NULL; + + int r = sd_bus_message_read( + message, + SYSTEMD_UNITS_DBUS_TYPES, + &u->id, + &u->description, + &u->load_state, + &u->active_state, + &u->sub_state, + &u->following, + &u->unit_path, + &u->job_id, + &u->job_type, + &u->job_path); + + if(r <= 0) + return r; + + char *dot; + if(u->id && (dot = strrchr(u->id, '.')) != NULL) + u->type = &dot[1]; + else + u->type = "unknown"; + + u->UnitType = unit_type_from_string(u->type); + u->UnitLoadState = unit_load_state_from_string(u->load_state); + u->UnitActiveState = unit_active_state_from_string(u->active_state); + + switch(u->UnitType) { + case UNIT_SERVICE: + u->ServiceState = service_state_from_string(u->sub_state); + break; + + case UNIT_MOUNT: + u->MountState = mount_state_from_string(u->sub_state); + break; + + case UNIT_SWAP: + u->SwapState = swap_state_from_string(u->sub_state); + break; + + case UNIT_SOCKET: + u->SocketState = socket_state_from_string(u->sub_state); + break; + + case UNIT_TARGET: + u->TargetState = target_state_from_string(u->sub_state); + break; + + case UNIT_DEVICE: + u->DeviceState = device_state_from_string(u->sub_state); + break; + + case UNIT_AUTOMOUNT: + u->AutomountState = automount_state_from_string(u->sub_state); + break; + + case UNIT_TIMER: + u->TimerState = timer_state_from_string(u->sub_state); + break; + + case UNIT_PATH: + u->PathState = path_state_from_string(u->sub_state); + break; + + case UNIT_SLICE: + u->SliceState = slice_state_from_string(u->sub_state); + break; + + case UNIT_SCOPE: + u->ScopeState = scope_state_from_string(u->sub_state); + break; + + default: + break; + } + + return r; +} + +static int hex_to_int(char c) { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + return 0; +} + +// un-escape hex sequences (\xNN) in id +static void txt_decode(char *txt) { + if(!txt || !*txt) + return; + + char *src = txt, *dst = txt; + + size_t id_len = strlen(src); + size_t s = 0, d = 0; + for(; s < id_len ; s++) { + if(src[s] == '\\' && src[s + 1] == 'x' && isxdigit(src[s + 2]) && isxdigit(src[s + 3])) { + int value = (hex_to_int(src[s + 2]) << 4) + hex_to_int(src[s + 3]); + dst[d++] = (char)value; + s += 3; + } + else + dst[d++] = src[s]; + } + dst[d] = '\0'; +} + +static UnitInfo *systemd_units_get_all(void) { + _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + + UnitInfo *base = NULL; + int r; + + r = sd_bus_default_system(&bus); + if (r < 0) { + log_dbus_error(r, "sd_bus_default_system()"); + return base; + } + + // This calls the ListUnits method of the org.freedesktop.systemd1.Manager interface + // Replace "ListUnits" with "ListUnitsFiltered" to get specific units based on filters + r = sd_bus_call_method(bus, + "org.freedesktop.systemd1", /* service to contact */ + "/org/freedesktop/systemd1", /* object path */ + "org.freedesktop.systemd1.Manager", /* interface name */ + "ListUnits", /* method name */ + &error, /* object to return error in */ + &reply, /* return message on success */ + NULL); /* input signature */ + if (r < 0) { + log_dbus_error(r, "sd_bus_call_method()"); + return base; + } + + r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, SYSTEMD_UNITS_DBUS_TYPES); + if (r < 0) { + log_dbus_error(r, "sd_bus_message_enter_container()"); + return base; + } + + UnitInfo u; + memset(&u, 0, sizeof(u)); + while ((r = bus_parse_unit_info(reply, &u)) > 0) { + UnitInfo *i = callocz(1, sizeof(u)); + *i = u; + + i->id = strdupz(u.id && *u.id ? u.id : "-"); + txt_decode(i->id); + + i->type = strdupz(u.type && *u.type ? u.type : "-"); + i->description = strdupz(u.description && *u.description ? u.description : "-"); + txt_decode(i->description); + + i->load_state = strdupz(u.load_state && *u.load_state ? u.load_state : "-"); + i->active_state = strdupz(u.active_state && *u.active_state ? u.active_state : "-"); + i->sub_state = strdupz(u.sub_state && *u.sub_state ? u.sub_state : "-"); + i->following = strdupz(u.following && *u.following ? u.following : "-"); + i->unit_path = strdupz(u.unit_path && *u.unit_path ? u.unit_path : "-"); + i->job_type = strdupz(u.job_type && *u.job_type ? u.job_type : "-"); + i->job_path = strdupz(u.job_path && *u.job_path ? u.job_path : "-"); + i->job_id = u.job_id; + + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(base, i, prev, next); + memset(&u, 0, sizeof(u)); + } + if (r < 0) { + log_dbus_error(r, "sd_bus_message_read()"); + return base; + } + + r = sd_bus_message_exit_container(reply); + if (r < 0) { + log_dbus_error(r, "sd_bus_message_exit_container()"); + return base; + } + + systemd_units_get_all_properties(bus, base); + + return base; +} + +void systemd_units_free_all(UnitInfo *base) { + while(base) { + UnitInfo *u = base; + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(base, u, prev, next); + freez((void *)u->id); + freez((void *)u->type); + freez((void *)u->description); + freez((void *)u->load_state); + freez((void *)u->active_state); + freez((void *)u->sub_state); + freez((void *)u->following); + freez((void *)u->unit_path); + freez((void *)u->job_type); + freez((void *)u->job_path); + + for(int i = 0; i < (ssize_t)_UNIT_ATTRIBUTE_MAX ;i++) + systemd_unit_free_property(unit_attributes[i].value_type, &u->attributes[i]); + + freez(u); + } +} + +// ---------------------------------------------------------------------------- + +static void netdata_systemd_units_function_help(const char *transaction) { + BUFFER *wb = buffer_create(0, NULL); + buffer_sprintf(wb, + "%s / %s\n" + "\n" + "%s\n" + "\n" + "The following parameters are supported:\n" + "\n" + " help\n" + " Shows this help message.\n" + "\n" + " info\n" + " Request initial configuration information about the plugin.\n" + " The key entity returned is the required_params array, which includes\n" + " all the available systemd journal sources.\n" + " When `info` is requested, all other parameters are ignored.\n" + "\n" + , program_name + , SYSTEMD_UNITS_FUNCTION_NAME + , SYSTEMD_UNITS_FUNCTION_DESCRIPTION + ); + + netdata_mutex_lock(&stdout_mutex); + pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600, wb); + netdata_mutex_unlock(&stdout_mutex); + + buffer_free(wb); +} + +static void netdata_systemd_units_function_info(const char *transaction) { + BUFFER *wb = buffer_create(0, NULL); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); + + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + buffer_json_member_add_string(wb, "type", "table"); + buffer_json_member_add_string(wb, "help", SYSTEMD_UNITS_FUNCTION_DESCRIPTION); + + buffer_json_finalize(wb); + netdata_mutex_lock(&stdout_mutex); + pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600, wb); + netdata_mutex_unlock(&stdout_mutex); + + buffer_free(wb); +} + +// ---------------------------------------------------------------------------- + +static void systemd_unit_priority(UnitInfo *u, size_t units) { + uint32_t prio; + + switch(u->severity) { + case FACET_ROW_SEVERITY_CRITICAL: + prio = 0; + break; + + default: + case FACET_ROW_SEVERITY_WARNING: + prio = 1; + break; + + case FACET_ROW_SEVERITY_NOTICE: + prio = 2; + break; + + case FACET_ROW_SEVERITY_NORMAL: + prio = 3; + break; + + case FACET_ROW_SEVERITY_DEBUG: + prio = 4; + break; + } + + prio = prio * (uint32_t)(_UNIT_TYPE_MAX + 1) + (uint32_t)u->UnitType; + u->prio = (prio * units) + u->prio; +} + +#define if_less(current, max, target) ({ \ + typeof(current) _wanted = (current); \ + if((current) < (target)) \ + _wanted = (target) > (max) ? (max) : (target); \ + _wanted; \ +}) + +#define if_normal(current, max, target) ({ \ + typeof(current) _wanted = (current); \ + if((current) == FACET_ROW_SEVERITY_NORMAL) \ + _wanted = (target) > (max) ? (max) : (target); \ + _wanted; \ +}) + +FACET_ROW_SEVERITY system_unit_severity(UnitInfo *u) { + FACET_ROW_SEVERITY severity, max_severity; + + switch(u->UnitLoadState) { + case UNIT_ERROR: + case UNIT_BAD_SETTING: + severity = FACET_ROW_SEVERITY_CRITICAL; + max_severity = FACET_ROW_SEVERITY_CRITICAL; + break; + + default: + severity = FACET_ROW_SEVERITY_WARNING; + max_severity = FACET_ROW_SEVERITY_CRITICAL; + break; + + case UNIT_NOT_FOUND: + severity = FACET_ROW_SEVERITY_NOTICE; + max_severity = FACET_ROW_SEVERITY_NOTICE; + break; + + case UNIT_LOADED: + severity = FACET_ROW_SEVERITY_NORMAL; + max_severity = FACET_ROW_SEVERITY_CRITICAL; + break; + + case UNIT_MERGED: + case UNIT_MASKED: + case UNIT_STUB: + severity = FACET_ROW_SEVERITY_DEBUG; + max_severity = FACET_ROW_SEVERITY_DEBUG; + break; + } + + switch(u->UnitActiveState) { + case UNIT_FAILED: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL); + break; + + default: + case UNIT_RELOADING: + case UNIT_ACTIVATING: + case UNIT_DEACTIVATING: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + + case UNIT_MAINTENANCE: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE); + break; + + case UNIT_ACTIVE: + break; + + case UNIT_INACTIVE: + severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG); + break; + } + + switch(u->FreezerState) { + default: + case FREEZER_FROZEN: + case FREEZER_FREEZING: + case FREEZER_THAWING: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + + case FREEZER_RUNNING: + break; + } + + switch(u->UnitType) { + case UNIT_SERVICE: + switch(u->ServiceState) { + case SERVICE_FAILED: + case SERVICE_FAILED_BEFORE_AUTO_RESTART: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL); + break; + + default: + case SERVICE_STOP: + case SERVICE_STOP_WATCHDOG: + case SERVICE_STOP_SIGTERM: + case SERVICE_STOP_SIGKILL: + case SERVICE_STOP_POST: + case SERVICE_FINAL_WATCHDOG: + case SERVICE_FINAL_SIGTERM: + case SERVICE_FINAL_SIGKILL: + case SERVICE_AUTO_RESTART: + case SERVICE_AUTO_RESTART_QUEUED: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + + case SERVICE_CONDITION: + case SERVICE_START_PRE: + case SERVICE_START: + case SERVICE_START_POST: + case SERVICE_RELOAD: + case SERVICE_RELOAD_SIGNAL: + case SERVICE_RELOAD_NOTIFY: + case SERVICE_DEAD_RESOURCES_PINNED: + case SERVICE_CLEANING: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE); + break; + + case SERVICE_EXITED: + case SERVICE_RUNNING: + break; + + case SERVICE_DEAD: + case SERVICE_DEAD_BEFORE_AUTO_RESTART: + severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG); + break; + } + break; + + case UNIT_MOUNT: + switch(u->MountState) { + case MOUNT_FAILED: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL); + break; + + default: + case MOUNT_REMOUNTING_SIGTERM: + case MOUNT_REMOUNTING_SIGKILL: + case MOUNT_UNMOUNTING_SIGTERM: + case MOUNT_UNMOUNTING_SIGKILL: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + + case MOUNT_MOUNTING: + case MOUNT_MOUNTING_DONE: + case MOUNT_REMOUNTING: + case MOUNT_UNMOUNTING: + case MOUNT_CLEANING: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE); + break; + + case MOUNT_MOUNTED: + break; + + case MOUNT_DEAD: + severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG); + break; + } + break; + + case UNIT_SWAP: + switch(u->SwapState) { + case SWAP_FAILED: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL); + break; + + default: + case SWAP_DEACTIVATING_SIGTERM: + case SWAP_DEACTIVATING_SIGKILL: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + + case SWAP_ACTIVATING: + case SWAP_ACTIVATING_DONE: + case SWAP_DEACTIVATING: + case SWAP_CLEANING: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE); + break; + + case SWAP_ACTIVE: + break; + + case SWAP_DEAD: + severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG); + break; + } + break; + + case UNIT_SOCKET: + switch(u->SocketState) { + case SOCKET_FAILED: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL); + break; + + default: + case SOCKET_STOP_PRE_SIGTERM: + case SOCKET_STOP_PRE_SIGKILL: + case SOCKET_FINAL_SIGTERM: + case SOCKET_FINAL_SIGKILL: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + + case SOCKET_START_PRE: + case SOCKET_START_CHOWN: + case SOCKET_START_POST: + case SOCKET_STOP_PRE: + case SOCKET_STOP_POST: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE); + break; + + case SOCKET_RUNNING: + case SOCKET_LISTENING: + break; + + case SOCKET_DEAD: + severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG); + break; + } + break; + + case UNIT_TARGET: + switch(u->TargetState) { + default: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + + case TARGET_ACTIVE: + break; + + case TARGET_DEAD: + severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG); + break; + } + break; + + case UNIT_DEVICE: + switch(u->DeviceState) { + default: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + + case DEVICE_TENTATIVE: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE); + break; + + case DEVICE_PLUGGED: + break; + + case DEVICE_DEAD: + severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG); + break; + } + break; + + case UNIT_AUTOMOUNT: + switch(u->AutomountState) { + case AUTOMOUNT_FAILED: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL); + break; + + default: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + + case AUTOMOUNT_WAITING: + case AUTOMOUNT_RUNNING: + break; + + case AUTOMOUNT_DEAD: + severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG); + break; + } + break; + + case UNIT_TIMER: + switch(u->TimerState) { + case TIMER_FAILED: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL); + break; + + default: + case TIMER_ELAPSED: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + + case TIMER_WAITING: + case TIMER_RUNNING: + break; + + case TIMER_DEAD: + severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG); + break; + } + break; + + case UNIT_PATH: + switch(u->PathState) { + case PATH_FAILED: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL); + break; + + default: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + + case PATH_WAITING: + case PATH_RUNNING: + break; + + case PATH_DEAD: + severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG); + break; + } + break; + + case UNIT_SLICE: + switch(u->SliceState) { + default: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + + case SLICE_ACTIVE: + break; + + case SLICE_DEAD: + severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG); + break; + } + break; + + case UNIT_SCOPE: + switch(u->ScopeState) { + case SCOPE_FAILED: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL); + break; + + default: + case SCOPE_STOP_SIGTERM: + case SCOPE_STOP_SIGKILL: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + + case SCOPE_ABANDONED: + case SCOPE_START_CHOWN: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE); + break; + + case SCOPE_RUNNING: + break; + + case SCOPE_DEAD: + severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG); + break; + } + break; + + default: + severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING); + break; + } + + u->severity = severity; + return severity; +} + +int unit_info_compar(const void *a, const void *b) { + UnitInfo *u1 = *((UnitInfo **)a); + UnitInfo *u2 = *((UnitInfo **)b); + + return strcasecmp(u1->id, u2->id); +} + +void systemd_units_assign_priority(UnitInfo *base) { + size_t units = 0, c = 0, prio = 0; + for(UnitInfo *u = base; u ; u = u->next) + units++; + + UnitInfo *array[units]; + for(UnitInfo *u = base; u ; u = u->next) + array[c++] = u; + + qsort(array, units, sizeof(UnitInfo *), unit_info_compar); + + for(c = 0; c < units ; c++) { + array[c]->prio = prio++; + system_unit_severity(array[c]); + systemd_unit_priority(array[c], units); + } +} + +void function_systemd_units(const char *transaction, char *function, int timeout, bool *cancelled) { + char *words[SYSTEMD_UNITS_MAX_PARAMS] = { NULL }; + size_t num_words = quoted_strings_splitter_pluginsd(function, words, SYSTEMD_UNITS_MAX_PARAMS); + for(int i = 1; i < SYSTEMD_UNITS_MAX_PARAMS ;i++) { + char *keyword = get_word(words, num_words, i); + if(!keyword) break; + + if(strcmp(keyword, "info") == 0) { + netdata_systemd_units_function_info(transaction); + return; + } + else if(strcmp(keyword, "help") == 0) { + netdata_systemd_units_function_help(transaction); + return; + } + } + + UnitInfo *base = systemd_units_get_all(); + systemd_units_assign_priority(base); + + BUFFER *wb = buffer_create(0, NULL); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); + + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + buffer_json_member_add_string(wb, "type", "table"); + buffer_json_member_add_time_t(wb, "update_every", 10); + buffer_json_member_add_string(wb, "help", SYSTEMD_UNITS_FUNCTION_DESCRIPTION); + buffer_json_member_add_array(wb, "data"); + + size_t count[_UNIT_ATTRIBUTE_MAX] = { 0 }; + struct UnitAttribute max[_UNIT_ATTRIBUTE_MAX]; + + for(UnitInfo *u = base; u ;u = u->next) { + buffer_json_add_array_item_array(wb); + { + buffer_json_add_array_item_string(wb, u->id); + + buffer_json_add_array_item_object(wb); + { + buffer_json_member_add_string(wb, "severity", facets_severity_to_string(u->severity)); + } + buffer_json_object_close(wb); + + buffer_json_add_array_item_string(wb, u->type); + buffer_json_add_array_item_string(wb, u->description); + buffer_json_add_array_item_string(wb, u->load_state); + buffer_json_add_array_item_string(wb, u->active_state); + buffer_json_add_array_item_string(wb, u->sub_state); + buffer_json_add_array_item_string(wb, u->following); + buffer_json_add_array_item_string(wb, u->unit_path); + buffer_json_add_array_item_uint64(wb, u->job_id); + buffer_json_add_array_item_string(wb, u->job_type); + buffer_json_add_array_item_string(wb, u->job_path); + + for(ssize_t i = 0; i < (ssize_t)_UNIT_ATTRIBUTE_MAX ;i++) { + switch(unit_attributes[i].value_type) { + case SD_BUS_TYPE_OBJECT_PATH: + case SD_BUS_TYPE_STRING: + buffer_json_add_array_item_string(wb, u->attributes[i].str && *u->attributes[i].str ? u->attributes[i].str : "-"); + break; + + case SD_BUS_TYPE_UINT64: + buffer_json_add_array_item_uint64(wb, u->attributes[i].uint64); + if(!count[i]++) max[i].uint64 = 0; + max[i].uint64 = MAX(max[i].uint64, u->attributes[i].uint64); + break; + + case SD_BUS_TYPE_UINT32: + buffer_json_add_array_item_uint64(wb, u->attributes[i].uint32); + if(!count[i]++) max[i].uint32 = 0; + max[i].uint32 = MAX(max[i].uint32, u->attributes[i].uint32); + break; + + case SD_BUS_TYPE_INT64: + buffer_json_add_array_item_uint64(wb, u->attributes[i].int64); + if(!count[i]++) max[i].uint64 = 0; + max[i].int64 = MAX(max[i].int64, u->attributes[i].int64); + break; + + case SD_BUS_TYPE_INT32: + buffer_json_add_array_item_uint64(wb, u->attributes[i].int32); + if(!count[i]++) max[i].int32 = 0; + max[i].int32 = MAX(max[i].int32, u->attributes[i].int32); + break; + + case SD_BUS_TYPE_DOUBLE: + buffer_json_add_array_item_double(wb, u->attributes[i].dbl); + if(!count[i]++) max[i].dbl = 0.0; + max[i].dbl = MAX(max[i].dbl, u->attributes[i].dbl); + break; + + case SD_BUS_TYPE_BOOLEAN: + buffer_json_add_array_item_boolean(wb, u->attributes[i].boolean); + break; + + default: + break; + } + } + + buffer_json_add_array_item_uint64(wb, u->prio); + buffer_json_add_array_item_uint64(wb, 1); // count + } + buffer_json_array_close(wb); + } + + buffer_json_array_close(wb); // data + + buffer_json_member_add_object(wb, "columns"); + { + size_t field_id = 0; + + buffer_rrdf_table_add_field(wb, field_id++, "id", "Unit ID", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_WRAP | RRDF_FIELD_OPTS_FULL_WIDTH, + NULL); + + buffer_rrdf_table_add_field( + wb, field_id++, + "rowOptions", "rowOptions", + RRDF_FIELD_TYPE_NONE, + RRDR_FIELD_VISUAL_ROW_OPTIONS, + RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_FIXED, + NULL, + RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_DUMMY, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "type", "Unit Type", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_EXPANDED_FILTER, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "description", "Unit Description", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_WRAP | RRDF_FIELD_OPTS_FULL_WIDTH, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "loadState", "Unit Load State", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_EXPANDED_FILTER, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "activeState", "Unit Active State", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_EXPANDED_FILTER, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "subState", "Unit Sub State", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_EXPANDED_FILTER, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "following", "Unit Following", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_WRAP, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "path", "Unit Path", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_WRAP | RRDF_FIELD_OPTS_FULL_WIDTH, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "jobId", "Unit Job ID", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_NONE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "jobType", "Unit Job Type", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "jobPath", "Unit Job Path", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_WRAP | RRDF_FIELD_OPTS_FULL_WIDTH, + NULL); + + for(ssize_t i = 0; i < (ssize_t)_UNIT_ATTRIBUTE_MAX ;i++) { + char key[256], name[256]; + + if(unit_attributes[i].show_as) + snprintfz(key, sizeof(key), "%s", unit_attributes[i].show_as); + else + snprintfz(key, sizeof(key), "attribute%s", unit_property_name_to_string_from_slot(i)); + + if(unit_attributes[i].info) + snprintfz(name, sizeof(name), "%s", unit_attributes[i].info); + else + snprintfz(name, sizeof(name), "Attribute %s", unit_property_name_to_string_from_slot(i)); + + RRDF_FIELD_OPTIONS options = unit_attributes[i].options; + RRDF_FIELD_FILTER filter = unit_attributes[i].filter; + + switch(unit_attributes[i].value_type) { + case SD_BUS_TYPE_OBJECT_PATH: + case SD_BUS_TYPE_STRING: + buffer_rrdf_table_add_field(wb, field_id++, key, name, + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, filter, + RRDF_FIELD_OPTS_WRAP | options, + NULL); + break; + + case SD_BUS_TYPE_INT32: + case SD_BUS_TYPE_UINT32: + case SD_BUS_TYPE_INT64: + case SD_BUS_TYPE_UINT64: { + double m; + if(unit_attributes[i].value_type == SD_BUS_TYPE_UINT64) + m = (double)max[i].uint64; + else if(unit_attributes[i].value_type == SD_BUS_TYPE_INT64) + m = (double)max[i].int64; + else if(unit_attributes[i].value_type == SD_BUS_TYPE_UINT32) + m = (double)max[i].uint32; + else if(unit_attributes[i].value_type == SD_BUS_TYPE_INT32) + m = (double)max[i].int32; + + buffer_rrdf_table_add_field(wb, field_id++, key, name, + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, m, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, filter, + RRDF_FIELD_OPTS_WRAP | options, + NULL); + } + break; + + case SD_BUS_TYPE_DOUBLE: + buffer_rrdf_table_add_field(wb, field_id++, key, name, + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 2, NULL, max[i].dbl, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, filter, + RRDF_FIELD_OPTS_WRAP | options, + NULL); + break; + + case SD_BUS_TYPE_BOOLEAN: + buffer_rrdf_table_add_field(wb, field_id++, key, name, + RRDF_FIELD_TYPE_BOOLEAN, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, filter, + RRDF_FIELD_OPTS_WRAP | options, + NULL); + break; + + default: + break; + } + + } + + buffer_rrdf_table_add_field(wb, field_id++, "priority", "Priority", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_NONE, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "count", "Count", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_NONE, + NULL); + } + + buffer_json_object_close(wb); // columns + buffer_json_member_add_string(wb, "default_sort_column", "priority"); + + buffer_json_member_add_object(wb, "charts"); + { + buffer_json_member_add_object(wb, "count"); + { + buffer_json_member_add_string(wb, "name", "count"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "count"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // charts + + buffer_json_member_add_array(wb, "default_charts"); + { + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "count"); + buffer_json_add_array_item_string(wb, "activeState"); + buffer_json_array_close(wb); + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "count"); + buffer_json_add_array_item_string(wb, "subState"); + buffer_json_array_close(wb); + } + buffer_json_array_close(wb); + + buffer_json_member_add_object(wb, "group_by"); + { + buffer_json_member_add_object(wb, "type"); + { + buffer_json_member_add_string(wb, "name", "Top Down Tree"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "type"); + buffer_json_add_array_item_string(wb, "loadState"); + buffer_json_add_array_item_string(wb, "activeState"); + buffer_json_add_array_item_string(wb, "subState"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "subState"); + { + buffer_json_member_add_string(wb, "name", "Bottom Up Tree"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "subState"); + buffer_json_add_array_item_string(wb, "activeState"); + buffer_json_add_array_item_string(wb, "loadState"); + buffer_json_add_array_item_string(wb, "type"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // group_by + + buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1); + buffer_json_finalize(wb); + + netdata_mutex_lock(&stdout_mutex); + pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "application/json", now_realtime_sec() + 1, wb); + netdata_mutex_unlock(&stdout_mutex); + + buffer_free(wb); + systemd_units_free_all(base); +} + +#endif // ENABLE_SYSTEMD_DBUS diff --git a/collectors/tc.plugin/integrations/tc_qos_classes.md b/collectors/tc.plugin/integrations/tc_qos_classes.md index 2e013fc00..7a6650660 100644 --- a/collectors/tc.plugin/integrations/tc_qos_classes.md +++ b/collectors/tc.plugin/integrations/tc_qos_classes.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/tc.plugin/ sidebar_label: "tc QoS classes" learn_status: "Published" learn_rel_path: "Data Collection/Linux Systems/Network" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -130,8 +131,8 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| script to run to get tc values | Path to script `tc-qos-helper.sh` | usr/libexec/netdata/plugins.d/tc-qos-helper.s | False | -| enable show all classes and qdiscs for all interfaces | yes/no flag to control what data is presented. | yes | False | +| script to run to get tc values | Path to script `tc-qos-helper.sh` | usr/libexec/netdata/plugins.d/tc-qos-helper.s | no | +| enable show all classes and qdiscs for all interfaces | yes/no flag to control what data is presented. | yes | no | </details> diff --git a/collectors/tc.plugin/tc-qos-helper.sh.in b/collectors/tc.plugin/tc-qos-helper.sh.in index 0fab69eef..3298c39a3 100755 --- a/collectors/tc.plugin/tc-qos-helper.sh.in +++ b/collectors/tc.plugin/tc-qos-helper.sh.in @@ -2,54 +2,113 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2017 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2023 Netdata Inc. # SPDX-License-Identifier: GPL-3.0-or-later # # This script is a helper to allow netdata collect tc data. # tc output parsing has been implemented in C, inside netdata # This script allows setting names to dimensions. -export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin:@sbindir_POST@" export LC_ALL=C +cmd_line="'${0}' $(printf "'%s' " "${@}")" + # ----------------------------------------------------------------------------- -# logging functions +# logging -PROGRAM_NAME="$(basename "$0")" +PROGRAM_NAME="$(basename "${0}")" PROGRAM_NAME="${PROGRAM_NAME/.plugin/}" -logdate() { - date "+%Y-%m-%d %H:%M:%S" +# these should be the same with syslog() priorities +NDLP_EMERG=0 # system is unusable +NDLP_ALERT=1 # action must be taken immediately +NDLP_CRIT=2 # critical conditions +NDLP_ERR=3 # error conditions +NDLP_WARN=4 # warning conditions +NDLP_NOTICE=5 # normal but significant condition +NDLP_INFO=6 # informational +NDLP_DEBUG=7 # debug-level messages + +# the max (numerically) log level we will log +LOG_LEVEL=$NDLP_INFO + +set_log_min_priority() { + case "${NETDATA_LOG_LEVEL,,}" in + "emerg" | "emergency") + LOG_LEVEL=$NDLP_EMERG + ;; + + "alert") + LOG_LEVEL=$NDLP_ALERT + ;; + + "crit" | "critical") + LOG_LEVEL=$NDLP_CRIT + ;; + + "err" | "error") + LOG_LEVEL=$NDLP_ERR + ;; + + "warn" | "warning") + LOG_LEVEL=$NDLP_WARN + ;; + + "notice") + LOG_LEVEL=$NDLP_NOTICE + ;; + + "info") + LOG_LEVEL=$NDLP_INFO + ;; + + "debug") + LOG_LEVEL=$NDLP_DEBUG + ;; + esac } -log() { - local status="${1}" - shift +set_log_min_priority - echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" +log() { + local level="${1}" + shift 1 + + [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return + + systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <<EOFLOG +INVOCATION_ID=${NETDATA_INVOCATION_ID} +SYSLOG_IDENTIFIER=${PROGRAM_NAME} +PRIORITY=${level} +THREAD_TAG=tc-qos-helper +ND_LOG_SOURCE=collector +ND_REQUEST=${cmd_line} +MESSAGE=${*//\\n/--NEWLINE--} + +EOFLOG + # AN EMPTY LINE IS NEEDED ABOVE +} +info() { + log "$NDLP_INFO" "${@}" } warning() { - log WARNING "${@}" + log "$NDLP_WARN" "${@}" } error() { - log ERROR "${@}" -} - -info() { - log INFO "${@}" + log "$NDLP_ERR" "${@}" } fatal() { - log FATAL "${@}" - exit 1 + log "$NDLP_ALERT" "${@}" + exit 1 } -debug=0 debug() { - [ $debug -eq 1 ] && log DEBUG "${@}" + log "$NDLP_DEBUG" "${@}" } # ----------------------------------------------------------------------------- diff --git a/collectors/timex.plugin/integrations/timex.md b/collectors/timex.plugin/integrations/timex.md index 80d77bc8f..754b2368c 100644 --- a/collectors/timex.plugin/integrations/timex.md +++ b/collectors/timex.plugin/integrations/timex.md @@ -4,6 +4,7 @@ meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/timex.plug sidebar_label: "Timex" learn_status: "Published" learn_rel_path: "Data Collection/System Clock and NTP" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> @@ -116,9 +117,9 @@ At least one option ('clock synchronization state', 'time offset') needs to be e | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 1 | False | -| clock synchronization state | Make chart showing system clock synchronization state. | yes | True | -| time offset | Make chart showing computed time offset between local system and reference clock | yes | True | +| update every | Data collection frequency. | 1 | no | +| clock synchronization state | Make chart showing system clock synchronization state. | yes | yes | +| time offset | Make chart showing computed time offset between local system and reference clock | yes | yes | </details> diff --git a/collectors/xenstat.plugin/README.md b/collectors/xenstat.plugin/README.md index 826e18e41..32fe4d213 120000 --- a/collectors/xenstat.plugin/README.md +++ b/collectors/xenstat.plugin/README.md @@ -1 +1 @@ -integrations/xen-xcp-ng.md
\ No newline at end of file +integrations/xen_xcp-ng.md
\ No newline at end of file diff --git a/collectors/xenstat.plugin/integrations/xen-xcp-ng.md b/collectors/xenstat.plugin/integrations/xen_xcp-ng.md index e4aea6fee..17dc8d785 100644 --- a/collectors/xenstat.plugin/integrations/xen-xcp-ng.md +++ b/collectors/xenstat.plugin/integrations/xen_xcp-ng.md @@ -1,13 +1,14 @@ <!--startmeta custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/xenstat.plugin/README.md" meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/xenstat.plugin/metadata.yaml" -sidebar_label: "Xen/XCP-ng" +sidebar_label: "Xen XCP-ng" learn_status: "Published" learn_rel_path: "Data Collection/Containers and VMs" +most_popular: False message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" endmeta--> -# Xen/XCP-ng +# Xen XCP-ng <img src="https://netdata.cloud/img/xen.png" width="150"/> @@ -53,7 +54,7 @@ The scope defines the instance that the metric belongs to. An instance is unique -### Per Xen/XCP-ng instance +### Per Xen XCP-ng instance These metrics refer to the entire monitored application. @@ -165,7 +166,7 @@ sudo ./edit-config netdata.conf | Name | Description | Default | Required | |:----|:-----------|:-------|:--------:| -| update every | Data collection frequency. | 1 | False | +| update every | Data collection frequency. | 1 | no | </details> diff --git a/collectors/xenstat.plugin/metadata.yaml b/collectors/xenstat.plugin/metadata.yaml index 493183694..e5527dbb1 100644 --- a/collectors/xenstat.plugin/metadata.yaml +++ b/collectors/xenstat.plugin/metadata.yaml @@ -4,7 +4,7 @@ modules: plugin_name: xenstat.plugin module_name: xenstat.plugin monitored_instance: - name: Xen/XCP-ng + name: Xen XCP-ng link: "https://xenproject.org/" categories: - data-collection.containers-and-vms diff --git a/collectors/xenstat.plugin/xenstat_plugin.c b/collectors/xenstat.plugin/xenstat_plugin.c index c05d5e298..319396d43 100644 --- a/collectors/xenstat.plugin/xenstat_plugin.c +++ b/collectors/xenstat.plugin/xenstat_plugin.c @@ -920,22 +920,14 @@ static void xenstat_send_domain_metrics() { } int main(int argc, char **argv) { - stderror = stderr; clocks_init(); // ------------------------------------------------------------------------ // initialization of netdata plugin - program_name = "xenstat.plugin"; + program_name = PLUGIN_XENSTAT_NAME; - // disable syslog - error_log_syslog = 0; - - // set errors flood protection to 100 logs per hour - error_log_errors_per_period = 100; - error_log_throttle_period = 3600; - - log_set_global_severity_for_external_plugins(); + nd_log_initialize_for_external_plugins(PLUGIN_XENSTAT_NAME); // ------------------------------------------------------------------------ // parse command line parameters |