diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2021-05-19 12:33:27 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2021-05-19 12:33:27 +0000 |
commit | 841395dd16f470e3c051a0a4fff5b91efc983c30 (patch) | |
tree | 4115f6eedcddda75067130b80acaff9e51612f49 /collectors/ebpf.plugin | |
parent | Adding upstream version 1.30.1. (diff) | |
download | netdata-841395dd16f470e3c051a0a4fff5b91efc983c30.tar.xz netdata-841395dd16f470e3c051a0a4fff5b91efc983c30.zip |
Adding upstream version 1.31.0.upstream/1.31.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/ebpf.plugin')
-rw-r--r-- | collectors/ebpf.plugin/Makefile.am | 1 | ||||
-rw-r--r-- | collectors/ebpf.plugin/README.md | 29 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.c | 90 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.d.conf | 5 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.d/cachestat.conf | 2 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.d/dcstat.conf | 13 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.d/network.conf | 11 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.d/process.conf | 2 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.h | 8 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_apps.c | 15 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_apps.h | 8 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_cachestat.c | 33 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_cachestat.h | 1 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_dcstat.c | 603 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_dcstat.h | 64 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_process.c | 31 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_socket.c | 53 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_socket.h | 16 |
18 files changed, 906 insertions, 79 deletions
diff --git a/collectors/ebpf.plugin/Makefile.am b/collectors/ebpf.plugin/Makefile.am index 4fb2056fd..18b1fc6c8 100644 --- a/collectors/ebpf.plugin/Makefile.am +++ b/collectors/ebpf.plugin/Makefile.am @@ -33,6 +33,7 @@ dist_libconfig_DATA = \ dist_ebpfconfig_DATA = \ ebpf.d/ebpf_kernel_reject_list.txt \ ebpf.d/cachestat.conf \ + ebpf.d/dcstat.conf \ ebpf.d/network.conf \ ebpf.d/process.conf \ ebpf.d/sync.conf \ diff --git a/collectors/ebpf.plugin/README.md b/collectors/ebpf.plugin/README.md index 405eab875..1e593786b 100644 --- a/collectors/ebpf.plugin/README.md +++ b/collectors/ebpf.plugin/README.md @@ -123,11 +123,11 @@ To enable the collector, scroll down to the `[plugins]` section ensure the relev ebpf = yes ``` -You can also configure the eBPF collector's behavior by editing `ebpf.conf`. +You can also configure the eBPF collector's behavior by editing `ebpf.d.conf`. ```bash cd /etc/netdata/ # Replace with your Netdata configuration directory, if not /etc/netdata/ -./edit-config ebpf.conf +./edit-config ebpf.d.conf ``` ### `[global]` @@ -149,6 +149,7 @@ accepts the following values: new charts for the return of these functions, such as errors. Monitoring function returns can help in debugging software, such as failing to close file descriptors or creating zombie processes. - `update every`: Number of seconds used for eBPF to send data for Netdata. +- `pid table size`: Defines the maximum number of PIDs stored inside the application hash table. #### Integration with `apps.plugin` @@ -187,6 +188,11 @@ If you want to _disable_ the integration with `apps.plugin` along with the above apps = yes ``` +When the integration is enabled, eBPF collector allocates memory for each process running. The total + allocated memory has direct relationship with the kernel version. When the eBPF plugin is running on kernels newer than `4.15`, + it uses per-cpu maps to speed up the update of hash tables. This also implies storing data for the same PID + for each processor it runs. + #### `[ebpf programs]` The eBPF collector enables and runs the following eBPF programs by default: @@ -194,6 +200,9 @@ The eBPF collector enables and runs the following eBPF programs by default: - `cachestat`: Netdata's eBPF data collector creates charts about the memory page cache. When the integration with [`apps.plugin`](/collectors/apps.plugin/README.md) is enabled, this collector creates charts for the whole host _and_ for each application. +- `dcstat` : This eBPF program creates charts that show information about file access using directory cache. It appends + `kprobes` for `lookup_fast()` and `d_lookup()` to identify if files are inside directory cache, outside and + files are not found. - `process`: This eBPF program creates charts that show information about process creation, VFS IO, and files removed. When in `return` mode, it also creates charts showing errors when these operations are executed. - `network viewer`: This eBPF program creates charts with information about `TCP` and `UDP` functions, including the @@ -215,6 +224,7 @@ cd /etc/netdata/ # Replace with your Netdata configuration directory, if not / The following configuration files are available: - `cachestat.conf`: Configuration for the `cachestat` thread. +- `dcstat.conf`: Configuration for the `dcstat` thread. - `process.conf`: Configuration for the `process` thread. - `network.conf`: Configuration for the `network viewer` thread. This config file overwrites the global options and also lets you specify which network the eBPF collector monitors. @@ -347,13 +357,16 @@ mount these filesystems on startup. More information can be found in the [ftrace ## Performance -Because eBPF monitoring is complex, we are evaluating the performance of this new collector in various real-world -conditions, across various system loads, and when monitoring complex applications. +eBPF monitoring is complex and produces a large volume of metrics. We've discovered scenarios where the eBPF plugin +significantly increases kernel memory usage by several hundred MB. + +If your node is experiencing high memory usage and there is no obvious culprit to be found in the `apps.mem` chart, +consider testing for high kernel memory usage by [disabling eBPF monitoring](#configuration). Next, +[restart Netdata](/docs/configure/start-stop-restart.md) with `sudo systemctl restart netdata` to see if system +memory usage (see the `system.ram` chart) has dropped significantly. -Our [initial testing](https://github.com/netdata/netdata/issues/8195) shows the performance of the eBPF collector is -nearly identical to our [apps.plugin collector](/collectors/apps.plugin/README.md), despite collecting and displaying -much more sophisticated metrics. You can now use the eBPF to gather deeper insights without affecting the performance of -your complex applications at any load. +Beginning with `v1.31`, kernel memory usage is configurable via the [`pid table size` setting](#ebpf-load-mode) +in `ebpf.conf`. ## SELinux diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c index 26dacfd3e..5cc005f30 100644 --- a/collectors/ebpf.plugin/ebpf.c +++ b/collectors/ebpf.plugin/ebpf.c @@ -77,19 +77,26 @@ pthread_cond_t collect_data_cond_var; ebpf_module_t ebpf_modules[] = { { .thread_name = "process", .config_name = "process", .enabled = 0, .start_routine = ebpf_process_thread, .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY, - .optional = 0, .apps_routine = ebpf_process_create_apps_charts }, + .optional = 0, .apps_routine = ebpf_process_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL}, { .thread_name = "socket", .config_name = "socket", .enabled = 0, .start_routine = ebpf_socket_thread, .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY, - .optional = 0, .apps_routine = ebpf_socket_create_apps_charts }, + .optional = 0, .apps_routine = ebpf_socket_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL}, { .thread_name = "cachestat", .config_name = "cachestat", .enabled = 0, .start_routine = ebpf_cachestat_thread, - .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY, - .optional = 0, .apps_routine = ebpf_cachestat_create_apps_charts }, + .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY, + .optional = 0, .apps_routine = ebpf_cachestat_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL}, { .thread_name = "sync", .config_name = "sync", .enabled = 0, .start_routine = ebpf_sync_thread, - .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY, - .optional = 0, .apps_routine = NULL }, + .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY, + .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL }, + { .thread_name = "dc", .config_name = "dc", .enabled = 0, .start_routine = ebpf_dcstat_thread, + .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY, + .optional = 0, .apps_routine = ebpf_dcstat_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE }, { .thread_name = NULL, .enabled = 0, .start_routine = NULL, .update_time = 1, .global_charts = 0, .apps_charts = 1, .mode = MODE_ENTRY, - .optional = 0, .apps_routine = NULL }, + .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = 0, .names = NULL }, }; // Link with apps.plugin @@ -130,7 +137,23 @@ static void ebpf_exit(int sig) return; } - freez(global_process_stat); + if (ebpf_modules[EBPF_MODULE_SOCKET_IDX].enabled) { + ebpf_modules[EBPF_MODULE_SOCKET_IDX].enabled = 0; + clean_socket_apps_structures(); + freez(socket_bandwidth_curr); + } + + if (ebpf_modules[EBPF_MODULE_CACHESTAT_IDX].enabled) { + ebpf_modules[EBPF_MODULE_CACHESTAT_IDX].enabled = 0; + clean_cachestat_pid_structures(); + freez(cachestat_pid); + } + + if (ebpf_modules[EBPF_MODULE_DCSTAT_IDX].enabled) { + ebpf_modules[EBPF_MODULE_DCSTAT_IDX].enabled = 0; + clean_dcstat_pid_structures(); + freez(dcstat_pid); + } /* int ret = fork(); @@ -154,7 +177,7 @@ static void ebpf_exit(int sig) int sid = setsid(); if (sid >= 0) { debug(D_EXIT, "Wait for father %d die", getpid()); - sleep_usec(200000); // Sleep 200 miliseconds to father dies. + sleep_usec(200000); // Sleep 200 milliseconds to father dies. clean_loaded_events(); } else { error("Cannot become session id leader, so I won't try to clean kprobe_events.\n"); @@ -318,7 +341,7 @@ void write_io_chart(char *chart, char *family, char *dwrite, long long vwrite, c * @param id chart id * @param title chart title * @param units units label - * @param family group name used to attach the chart on dashaboard + * @param family group name used to attach the chart on dashboard * @param charttype chart type * @param context chart context * @param order chart order @@ -376,7 +399,7 @@ void ebpf_create_global_dimension(void *ptr, int end) * @param id chart id * @param title chart title * @param units axis label - * @param family group name used to attach the chart on dashaboard + * @param family group name used to attach the chart on dashboard * @param context chart context * @param charttype chart type * @param order order number of the specified chart @@ -572,6 +595,8 @@ void ebpf_print_help() "\n" " --cachestat or -c Enable charts related to process run time.\n" "\n" + " --dcstat or -d Enable charts related to directory cache.\n" + "\n" " --net or -n Enable network viewer charts.\n" "\n" " --process or -p Enable charts related to process run time.\n" @@ -691,7 +716,7 @@ static void read_local_addresses() } /** - * Start Ptherad Variable + * Start Pthread Variable * * This function starts all pthread variables. * @@ -764,6 +789,22 @@ static void ebpf_update_interval() } /** + * Update PID table size + * + * Update default size with value from user + */ +static void ebpf_update_table_size() +{ + int i; + uint32_t value = (uint32_t) appconfig_get_number(&collector_config, EBPF_GLOBAL_SECTION, + EBPF_CFG_PID_SIZE, ND_EBPF_DEFAULT_PID_SIZE); + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_modules[i].pid_map_size = value; + } +} + + +/** * Read collector values * * @param disable_apps variable to store information related to apps. @@ -783,6 +824,8 @@ static void read_collector_values(int *disable_apps) ebpf_update_interval(); + ebpf_update_table_size(); + // This is kept to keep compatibility uint32_t enabled = appconfig_get_boolean(&collector_config, EBPF_GLOBAL_SECTION, "disable apps", CONFIG_BOOLEAN_NO); @@ -844,6 +887,13 @@ static void read_collector_values(int *disable_apps) started++; } + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "dcstat", + CONFIG_BOOLEAN_NO); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_DCSTAT_IDX, *disable_apps); + started++; + } + if (!started){ ebpf_enable_all_charts(*disable_apps); // Read network viewer section @@ -927,6 +977,7 @@ static void parse_args(int argc, char **argv) {"global", no_argument, 0, 'g' }, {"all", no_argument, 0, 'a' }, {"cachestat", no_argument, 0, 'c' }, + {"dcstat", no_argument, 0, 'd' }, {"net", no_argument, 0, 'n' }, {"process", no_argument, 0, 'p' }, {"return", no_argument, 0, 'r' }, @@ -945,7 +996,7 @@ static void parse_args(int argc, char **argv) } while (1) { - int c = getopt_long(argc, argv, "hvgcanprs", long_options, &option_index); + int c = getopt_long(argc, argv, "hvgacdnprs", long_options, &option_index); if (c == -1) break; @@ -983,6 +1034,15 @@ static void parse_args(int argc, char **argv) #endif break; } + case 'd': { + enabled = 1; + ebpf_enable_chart(EBPF_MODULE_DCSTAT_IDX, disable_apps); +#ifdef NETDATA_INTERNAL_CHECKS + info( + "EBPF enabling \"DCSTAT\" charts, because it was started with the option \"--dcstat\" or \"-d\"."); +#endif + break; + } case 'n': { enabled = 1; ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, disable_apps); @@ -1027,7 +1087,7 @@ static void parse_args(int argc, char **argv) if (load_collector_config(ebpf_user_config_dir, &disable_apps)) { info( - "Does not have a configuration file inside `%s/ebpf.conf. It will try to load stock file.", + "Does not have a configuration file inside `%s/ebpf.d.conf. It will try to load stock file.", ebpf_user_config_dir); if (load_collector_config(ebpf_stock_config_dir, &disable_apps)) { info("Does not have a stock file. It is starting with default options."); @@ -1141,6 +1201,8 @@ int main(int argc, char **argv) NULL, NULL, ebpf_modules[EBPF_MODULE_CACHESTAT_IDX].start_routine}, {"EBPF SYNC" , NULL, NULL, 1, NULL, NULL, ebpf_modules[EBPF_MODULE_SYNC_IDX].start_routine}, + {"EBPF DCSTAT" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_DCSTAT_IDX].start_routine}, {NULL , NULL, NULL, 0, NULL, NULL, NULL} }; diff --git a/collectors/ebpf.plugin/ebpf.d.conf b/collectors/ebpf.plugin/ebpf.d.conf index 7191d7416..ef6ff8145 100644 --- a/collectors/ebpf.plugin/ebpf.d.conf +++ b/collectors/ebpf.plugin/ebpf.d.conf @@ -11,10 +11,14 @@ # 'no'. # # The `update every` option defines the number of seconds used to read data from kernel and send to netdata +# +# The `pid table size` defines the maximum number of PIDs stored in the application hash tables. +# [global] ebpf load mode = entry apps = yes update every = 1 + pid table size = 32768 # # eBPF Programs @@ -29,6 +33,7 @@ # `sync` : Montitor calls for syscall sync(2). [ebpf programs] cachestat = no + dcstat = no process = yes socket = yes sync = yes diff --git a/collectors/ebpf.plugin/ebpf.d/cachestat.conf b/collectors/ebpf.plugin/ebpf.d/cachestat.conf index 78277cf56..0c4d991df 100644 --- a/collectors/ebpf.plugin/ebpf.d/cachestat.conf +++ b/collectors/ebpf.plugin/ebpf.d/cachestat.conf @@ -7,8 +7,10 @@ # If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to # 'no'. # +# The `pid table size` defines the maximum number of PIDs stored inside the application hash table. # [global] ebpf load mode = entry apps = yes update every = 2 + pid table size = 32768 diff --git a/collectors/ebpf.plugin/ebpf.d/dcstat.conf b/collectors/ebpf.plugin/ebpf.d/dcstat.conf new file mode 100644 index 000000000..2607b98fd --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/dcstat.conf @@ -0,0 +1,13 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps plugin`. +# If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to +# 'no'. +# +[global] + ebpf load mode = entry + apps = yes + update every = 2 diff --git a/collectors/ebpf.plugin/ebpf.d/network.conf b/collectors/ebpf.plugin/ebpf.d/network.conf index b033bc39c..6bbd49a49 100644 --- a/collectors/ebpf.plugin/ebpf.d/network.conf +++ b/collectors/ebpf.plugin/ebpf.d/network.conf @@ -7,11 +7,20 @@ # If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to # 'no'. # -# +# The following options change the hash table size: +# `bandwidth table size`: Maximum number of connections monitored +# `ipv4 connection table size`: Maximum number of IPV4 connections monitored +# `ipv6 connection table size`: Maximum number of IPV6 connections monitored +# `udp connection table size`: Maximum number of UDP connections monitored +# [global] ebpf load mode = entry apps = yes update every = 1 + bandwidth table size = 16384 + ipv4 connection table size = 16384 + ipv6 connection table size = 16384 + udp connection table size = 4096 # # Network Connection diff --git a/collectors/ebpf.plugin/ebpf.d/process.conf b/collectors/ebpf.plugin/ebpf.d/process.conf index 7806dc844..511da95ad 100644 --- a/collectors/ebpf.plugin/ebpf.d/process.conf +++ b/collectors/ebpf.plugin/ebpf.d/process.conf @@ -7,8 +7,10 @@ # If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to # 'no'. # +# The `pid table size` defines the maximum number of PIDs stored inside the hash table. # [global] ebpf load mode = entry apps = yes update every = 1 + pid table size = 32768 diff --git a/collectors/ebpf.plugin/ebpf.h b/collectors/ebpf.plugin/ebpf.h index 6796dcdad..841701e20 100644 --- a/collectors/ebpf.plugin/ebpf.h +++ b/collectors/ebpf.plugin/ebpf.h @@ -77,7 +77,8 @@ enum ebpf_module_indexes { EBPF_MODULE_PROCESS_IDX, EBPF_MODULE_SOCKET_IDX, EBPF_MODULE_CACHESTAT_IDX, - EBPF_MODULE_SYNC_IDX + EBPF_MODULE_SYNC_IDX, + EBPF_MODULE_DCSTAT_IDX }; // Copied from musl header @@ -89,8 +90,9 @@ enum ebpf_module_indexes { #endif #endif -// Chart defintions +// Chart definitions #define NETDATA_EBPF_FAMILY "ebpf" +#define NETDATA_FILESYSTEM_FAMILY "filesystem" #define NETDATA_EBPF_CHART_TYPE_LINE "line" #define NETDATA_EBPF_CHART_TYPE_STACKED "stacked" #define NETDATA_EBPF_MEMORY_GROUP "mem" @@ -196,6 +198,7 @@ extern void ebpf_cleanup_publish_syscall(netdata_publish_syscall_t *nps); #define EBPF_COMMON_DIMENSION_BYTES "bytes/s" #define EBPF_COMMON_DIMENSION_DIFFERENCE "difference" #define EBPF_COMMON_DIMENSION_PACKETS "packets" +#define EBPF_COMMON_DIMENSION_FILES "files" // Common variables extern int debug_enabled; @@ -215,6 +218,7 @@ extern void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr); extern void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *root); extern void ebpf_one_dimension_write_charts(char *family, char *chart, char *dim, long long v1); extern collected_number get_value_from_structure(char *basis, size_t offset); +extern void ebpf_update_pid_table(ebpf_local_maps_t *pid, ebpf_module_t *em); #define EBPF_MAX_SYNCHRONIZATION_TIME 300 diff --git a/collectors/ebpf.plugin/ebpf_apps.c b/collectors/ebpf.plugin/ebpf_apps.c index 1be7b9260..6459bad0d 100644 --- a/collectors/ebpf.plugin/ebpf_apps.c +++ b/collectors/ebpf.plugin/ebpf_apps.c @@ -265,7 +265,7 @@ struct target *get_apps_groups_target(struct target **agrt, const char *id, stru * @param path the directory to search apps_%s.conf * @param file the word to complement the file name. * - * @return It returns 0 on succcess and -1 otherwise + * @return It returns 0 on success and -1 otherwise */ int ebpf_read_apps_groups_conf(struct target **agdt, struct target **agrt, const char *path, const char *file) { @@ -470,7 +470,7 @@ static inline int managed_log(struct pid_stat *p, uint32_t log, int status) /** * Get PID entry * - * Get or allocate the PID entry for the specifid pid. + * Get or allocate the PID entry for the specified pid. * * @param pid the pid to search the data. * @@ -664,7 +664,7 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) * @param pid the current pid that we are working * @param ptr a NULL value * - * @return It returns 1 on succcess and 0 otherwise + * @return It returns 1 on success and 0 otherwise */ static inline int collect_data_for_pid(pid_t pid, void *ptr) { @@ -927,6 +927,12 @@ void cleanup_variables_from_other_threads(uint32_t pid) freez(cachestat_pid[pid]); cachestat_pid[pid] = NULL; } + + // Clean directory cache structure + if (dcstat_pid) { + freez(dcstat_pid[pid]); + dcstat_pid[pid] = NULL; + } } /** @@ -943,7 +949,6 @@ void cleanup_exited_pids() pid_t r = p->pid; p = p->next; - del_pid_entry(r); // Clean process structure freez(global_process_stats[r]); @@ -953,6 +958,8 @@ void cleanup_exited_pids() current_apps_data[r] = NULL; cleanup_variables_from_other_threads(r); + + del_pid_entry(r); } else { if (unlikely(p->keep)) p->keeploops++; diff --git a/collectors/ebpf.plugin/ebpf_apps.h b/collectors/ebpf.plugin/ebpf_apps.h index eb54754c6..edcdef605 100644 --- a/collectors/ebpf.plugin/ebpf_apps.h +++ b/collectors/ebpf.plugin/ebpf_apps.h @@ -16,8 +16,10 @@ #define NETDATA_APPS_PROCESS_GROUP "process (eBPF)" #define NETDATA_APPS_NET_GROUP "net (eBPF)" #define NETDATA_APPS_CACHESTAT_GROUP "page cache (eBPF)" +#define NETDATA_APPS_DCSTAT_GROUP "directory cache (eBPF)" #include "ebpf_process.h" +#include "ebpf_dcstat.h" #include "ebpf_cachestat.h" #include "ebpf_sync.h" @@ -108,8 +110,9 @@ struct target { uid_t uid; gid_t gid; - // Page cache statistic per process + // Changes made to simplify integration between apps and eBPF. netdata_publish_cachestat_t cachestat; + netdata_publish_dcstat_t dcstat; /* These variables are not necessary for eBPF collector kernel_uint_t minflt; @@ -430,8 +433,11 @@ extern size_t read_bandwidth_statistic_using_pid_on_target(ebpf_bandwidth_t **ep extern void collect_data_for_all_processes(int tbl_pid_stats_fd); +extern void clean_global_memory(); + extern ebpf_process_stat_t **global_process_stats; extern ebpf_process_publish_apps_t **current_apps_data; extern netdata_publish_cachestat_t **cachestat_pid; +extern netdata_publish_dcstat_t **dcstat_pid; #endif /* NETDATA_EBPF_APPS_H */ diff --git a/collectors/ebpf.plugin/ebpf_cachestat.c b/collectors/ebpf.plugin/ebpf_cachestat.c index 6516d4da2..cdeac6951 100644 --- a/collectors/ebpf.plugin/ebpf_cachestat.c +++ b/collectors/ebpf.plugin/ebpf_cachestat.c @@ -24,6 +24,10 @@ struct netdata_static_thread cachestat_threads = {"CACHESTAT KERNEL", NULL, NULL, 1, NULL, NULL, NULL}; +static ebpf_local_maps_t cachestat_maps[] = {{.name = "cstat_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0}, + {.name = NULL, .internal_input = 0, .user_input = 0}}; + static int *map_fd = NULL; struct config cachestat_config = { .first_section = NULL, @@ -43,7 +47,7 @@ struct config cachestat_config = { .first_section = NULL, * * Clean the allocated structures. */ -static void clean_pid_structures() { +void clean_cachestat_pid_structures() { struct pid_stat *pids = root_of_pids; while (pids) { freez(cachestat_pid[pids->pid]); @@ -71,9 +75,6 @@ static void ebpf_cachestat_cleanup(void *ptr) UNUSED(dt); } - clean_pid_structures(); - freez(cachestat_pid); - ebpf_cleanup_publish_syscall(cachestat_counter_publish_aggregated); freez(cachestat_vector); @@ -125,7 +126,7 @@ void cachestat_update_publish(netdata_publish_cachestat_t *out, uint64_t mpa, ui hits = 0; } - calculated_number ratio = (total > 0) ? hits/total : 0; + calculated_number ratio = (total > 0) ? hits/total : 1; out->ratio = (long long )(ratio*100); out->hit = (long long)hits; @@ -282,7 +283,7 @@ void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *ptr) "The ratio is calculated dividing the Hit pages per total cache accesses without counting dirties.", EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_APPS_CACHESTAT_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_EBPF_CHART_TYPE_LINE, 20090, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], root); @@ -360,15 +361,11 @@ void *ebpf_cachestat_read_hash(void *ptr) ebpf_module_t *em = (ebpf_module_t *)ptr; usec_t step = NETDATA_LATENCY_CACHESTAT_SLEEP_MS * em->update_time; - int apps = em->apps_charts; while (!close_ebpf_plugin) { usec_t dt = heartbeat_next(&hb, step); (void)dt; read_global_table(); - - if (apps) - read_apps_table(); } read_thread_closed = 1; @@ -385,12 +382,9 @@ static void cachestat_send_global(netdata_publish_cachestat_t *publish) calculate_stats(publish); netdata_publish_syscall_t *ptr = cachestat_counter_publish_aggregated; - // The algorithm sets this value to zero sometimes, we are not written them to have a smooth chart - if (publish->ratio) { - ebpf_one_dimension_write_charts( - NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_RATIO_CHART, ptr[NETDATA_CACHESTAT_IDX_RATIO].dimension, - publish->ratio); - } + ebpf_one_dimension_write_charts( + NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_RATIO_CHART, ptr[NETDATA_CACHESTAT_IDX_RATIO].dimension, + publish->ratio); ebpf_one_dimension_write_charts( NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_DIRTY_CHART, ptr[NETDATA_CACHESTAT_IDX_DIRTY].dimension, @@ -512,6 +506,9 @@ static void cachestat_collector(ebpf_module_t *em) pthread_mutex_lock(&collect_data_mutex); pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + if (apps) + read_apps_table(); + pthread_mutex_lock(&lock); cachestat_send_global(&publish); @@ -539,7 +536,7 @@ static void ebpf_create_memory_charts() { ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_RATIO_CHART, "Hit is calculating using total cache added without dirties per total added because of red misses.", - EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_SUBMENU, + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_CACHESTAT_SUBMENU, NULL, NETDATA_EBPF_CHART_TYPE_LINE, 21100, @@ -615,9 +612,11 @@ void *ebpf_cachestat_thread(void *ptr) netdata_thread_cleanup_push(ebpf_cachestat_cleanup, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = cachestat_maps; fill_ebpf_data(&cachestat_data); ebpf_update_module(em, &cachestat_config, NETDATA_CACHESTAT_CONFIG_FILE); + ebpf_update_pid_table(&cachestat_maps[0], em); if (!em->enabled) goto endcachestat; diff --git a/collectors/ebpf.plugin/ebpf_cachestat.h b/collectors/ebpf.plugin/ebpf_cachestat.h index daf678975..694933e0c 100644 --- a/collectors/ebpf.plugin/ebpf_cachestat.h +++ b/collectors/ebpf.plugin/ebpf_cachestat.h @@ -60,5 +60,6 @@ typedef struct netdata_publish_cachestat { } netdata_publish_cachestat_t; extern void *ebpf_cachestat_thread(void *ptr); +extern void clean_cachestat_pid_structures(); #endif // NETDATA_EBPF_CACHESTAT_H diff --git a/collectors/ebpf.plugin/ebpf_dcstat.c b/collectors/ebpf.plugin/ebpf_dcstat.c new file mode 100644 index 000000000..01fd97972 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_dcstat.c @@ -0,0 +1,603 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_dcstat.h" + +static char *dcstat_counter_dimension_name[NETDATA_DCSTAT_IDX_END] = { "ratio", "reference", "slow", "miss" }; +static netdata_syscall_stat_t dcstat_counter_aggregated_data[NETDATA_DCSTAT_IDX_END]; +static netdata_publish_syscall_t dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_END]; + +static ebpf_data_t dcstat_data; + +netdata_dcstat_pid_t *dcstat_vector = NULL; +netdata_publish_dcstat_t **dcstat_pid = NULL; + +static struct bpf_link **probe_links = NULL; +static struct bpf_object *objects = NULL; + +static int *map_fd = NULL; +static netdata_idx_t dcstat_hash_values[NETDATA_DCSTAT_IDX_END]; + +static int read_thread_closed = 1; + +struct config dcstat_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +struct netdata_static_thread dcstat_threads = {"DCSTAT KERNEL", + NULL, NULL, 1, NULL, + NULL, NULL}; + +static ebpf_local_maps_t dcstat_maps[] = {{.name = "dcstat_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0}, + {.name = NULL, .internal_input = 0, .user_input = 0}}; + +static ebpf_specify_name_t dc_optional_name[] = { {.program_name = "netdata_lookup_fast", + .function_to_attach = "lookup_fast", + .optional = NULL, + .retprobe = CONFIG_BOOLEAN_NO}, + {.program_name = NULL}}; + +/***************************************************************** + * + * COMMON FUNCTIONS + * + *****************************************************************/ + +/** + * Update publish + * + * Update publish values before to write dimension. + * + * @param out strcuture that will receive data. + * @param cache_access number of access to directory cache. + * @param not_found number of files not found on the file system + */ +void dcstat_update_publish(netdata_publish_dcstat_t *out, uint64_t cache_access, uint64_t not_found) +{ + calculated_number successful_access = (calculated_number) (((long long)cache_access) - ((long long)not_found)); + calculated_number ratio = (cache_access) ? successful_access/(calculated_number)cache_access : 0; + + out->ratio = (long long )(ratio*100); +} + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Clean PID structures + * + * Clean the allocated structures. + */ +void clean_dcstat_pid_structures() { + struct pid_stat *pids = root_of_pids; + while (pids) { + freez(dcstat_pid[pids->pid]); + + pids = pids->next; + } +} + +/** + * Clean names + * + * Clean the optional names allocated during startup. + */ +void ebpf_dcstat_clean_names() +{ + size_t i = 0; + while (dc_optional_name[i].program_name) { + freez(dc_optional_name[i].optional); + i++; + } +} + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_dcstat_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) + return; + + heartbeat_t hb; + heartbeat_init(&hb); + uint32_t tick = 2 * USEC_PER_MS; + while (!read_thread_closed) { + usec_t dt = heartbeat_next(&hb, tick); + UNUSED(dt); + } + + freez(dcstat_vector); + + ebpf_cleanup_publish_syscall(dcstat_counter_publish_aggregated); + + ebpf_dcstat_clean_names(); + + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); +} + +/***************************************************************** + * + * APPS + * + *****************************************************************/ + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_dcstat_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + UNUSED(em); + struct target *root = ptr; + ebpf_create_charts_on_apps(NETDATA_DC_HIT_CHART, + "Percentage of files listed inside directory cache", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_APPS_DCSTAT_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + 20100, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root); + + ebpf_create_charts_on_apps(NETDATA_DC_REFERENCE_CHART, + "Count file access.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_APPS_DCSTAT_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20101, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root); + + ebpf_create_charts_on_apps(NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "Access to files that were not present inside directory cache.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_APPS_DCSTAT_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20102, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root); + + ebpf_create_charts_on_apps(NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "Number of requests for files that were not found on filesystem.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_APPS_DCSTAT_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20103, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root); +} + +/***************************************************************** + * + * MAIN LOOP + * + *****************************************************************/ + +/** + * Apps Accumulator + * + * Sum all values read from kernel and store in the first address. + * + * @param out the vector with read values. + */ +static void dcstat_apps_accumulator(netdata_dcstat_pid_t *out) +{ + int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + netdata_dcstat_pid_t *total = &out[0]; + for (i = 1; i < end; i++) { + netdata_dcstat_pid_t *w = &out[i]; + total->cache_access += w->cache_access; + total->file_system += w->file_system; + total->not_found += w->not_found; + } +} + +/** + * Save PID values + * + * Save the current values inside the structure + * + * @param out vector used to plot charts + * @param publish vector with values read from hash tables. + */ +static inline void dcstat_save_pid_values(netdata_publish_dcstat_t *out, netdata_dcstat_pid_t *publish) +{ + memcpy(&out->curr, &publish[0], sizeof(netdata_dcstat_pid_t)); +} + +/** + * Fill PID + * + * Fill PID structures + * + * @param current_pid pid that we are collecting data + * @param out values read from hash tables; + */ +static void dcstat_fill_pid(uint32_t current_pid, netdata_dcstat_pid_t *publish) +{ + netdata_publish_dcstat_t *curr = dcstat_pid[current_pid]; + if (!curr) { + curr = callocz(1, sizeof(netdata_publish_dcstat_t)); + dcstat_pid[current_pid] = curr; + } + + dcstat_save_pid_values(curr, publish); +} + +/** + * Read APPS table + * + * Read the apps table and store data inside the structure. + */ +static void read_apps_table() +{ + netdata_dcstat_pid_t *cv = dcstat_vector; + uint32_t key; + struct pid_stat *pids = root_of_pids; + int fd = map_fd[NETDATA_DCSTAT_PID_STATS]; + size_t length = sizeof(netdata_dcstat_pid_t)*ebpf_nprocs; + while (pids) { + key = pids->pid; + + if (bpf_map_lookup_elem(fd, &key, cv)) { + pids = pids->next; + continue; + } + + dcstat_apps_accumulator(cv); + + dcstat_fill_pid(key, cv); + + // We are cleaning to avoid passing data read from one process to other. + memset(cv, 0, length); + + pids = pids->next; + } +} + +/** + * Read global table + * + * Read the table with number of calls for all functions + */ +static void read_global_table() +{ + uint32_t idx; + netdata_idx_t *val = dcstat_hash_values; + netdata_idx_t stored; + int fd = map_fd[NETDATA_DCSTAT_GLOBAL_STATS]; + + for (idx = NETDATA_KEY_DC_REFERENCE; idx < NETDATA_DIRECTORY_CACHE_END; idx++) { + if (!bpf_map_lookup_elem(fd, &idx, &stored)) { + val[idx] = stored; + } + } +} + +/** + * DCstat read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_dcstat_read_hash(void *ptr) +{ + read_thread_closed = 0; + + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_LATENCY_DCSTAT_SLEEP_MS * em->update_time; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + (void)dt; + + read_global_table(); + } + read_thread_closed = 1; + + return NULL; +} + +/** + * Cachestat sum PIDs + * + * Sum values for all PIDs associated to a group + * + * @param publish output structure. + * @param root structure with listed IPs + */ +void ebpf_dcstat_sum_pids(netdata_publish_dcstat_t *publish, struct pid_on_target *root) +{ + memset(&publish->curr, 0, sizeof(netdata_dcstat_pid_t)); + netdata_dcstat_pid_t *dst = &publish->curr; + while (root) { + int32_t pid = root->pid; + netdata_publish_dcstat_t *w = dcstat_pid[pid]; + if (w) { + netdata_dcstat_pid_t *src = &w->curr; + dst->cache_access += src->cache_access; + dst->file_system += src->file_system; + dst->not_found += src->not_found; + } + + root = root->next; + } +} + +/** + * Send data to Netdata calling auxiliar functions. + * + * @param root the target list. +*/ +void ebpf_dcache_send_apps_data(struct target *root) +{ + struct target *w; + collected_number value; + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_HIT_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + ebpf_dcstat_sum_pids(&w->dcstat, w->root_pid); + + uint64_t cache = w->dcstat.curr.cache_access; + uint64_t not_found = w->dcstat.curr.not_found; + + dcstat_update_publish(&w->dcstat, cache, not_found); + value = (collected_number) w->dcstat.ratio; + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_REFERENCE_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + if (w->dcstat.curr.cache_access < w->dcstat.prev.cache_access) { + w->dcstat.prev.cache_access = 0; + } + + w->dcstat.cache_access = (long long)w->dcstat.curr.cache_access - (long long)w->dcstat.prev.cache_access; + value = (collected_number) w->dcstat.cache_access; + write_chart_dimension(w->name, value); + w->dcstat.prev.cache_access = w->dcstat.curr.cache_access; + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_REQUEST_NOT_CACHE_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + if (w->dcstat.curr.file_system < w->dcstat.prev.file_system) { + w->dcstat.prev.file_system = 0; + } + + value = (collected_number) (!w->dcstat.cache_access) ? 0 : + (long long )w->dcstat.curr.file_system - (long long)w->dcstat.prev.file_system; + write_chart_dimension(w->name, value); + w->dcstat.prev.file_system = w->dcstat.curr.file_system; + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_REQUEST_NOT_FOUND_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + if (w->dcstat.curr.not_found < w->dcstat.prev.not_found) { + w->dcstat.prev.not_found = 0; + } + value = (collected_number) (!w->dcstat.cache_access) ? 0 : + (long long)w->dcstat.curr.not_found - (long long)w->dcstat.prev.not_found; + write_chart_dimension(w->name, value); + w->dcstat.prev.not_found = w->dcstat.curr.not_found; + } + } + write_end_chart(); +} + +/** + * Send global + * + * Send global charts to Netdata + */ +static void dcstat_send_global(netdata_publish_dcstat_t *publish) +{ + dcstat_update_publish(publish, dcstat_hash_values[NETDATA_KEY_DC_REFERENCE], + dcstat_hash_values[NETDATA_KEY_DC_MISS]); + + netdata_publish_syscall_t *ptr = dcstat_counter_publish_aggregated; + netdata_idx_t value = dcstat_hash_values[NETDATA_KEY_DC_REFERENCE]; + if (value != ptr[NETDATA_DCSTAT_IDX_REFERENCE].pcall) { + ptr[NETDATA_DCSTAT_IDX_REFERENCE].ncall = value - ptr[NETDATA_DCSTAT_IDX_REFERENCE].pcall; + ptr[NETDATA_DCSTAT_IDX_REFERENCE].pcall = value; + + value = dcstat_hash_values[NETDATA_KEY_DC_SLOW]; + ptr[NETDATA_DCSTAT_IDX_SLOW].ncall = value - ptr[NETDATA_DCSTAT_IDX_SLOW].pcall; + ptr[NETDATA_DCSTAT_IDX_SLOW].pcall = value; + + value = dcstat_hash_values[NETDATA_KEY_DC_MISS]; + ptr[NETDATA_DCSTAT_IDX_MISS].ncall = value - ptr[NETDATA_DCSTAT_IDX_MISS].pcall; + ptr[NETDATA_DCSTAT_IDX_MISS].pcall = value; + } else { + ptr[NETDATA_DCSTAT_IDX_REFERENCE].ncall = 0; + ptr[NETDATA_DCSTAT_IDX_SLOW].ncall = 0; + ptr[NETDATA_DCSTAT_IDX_MISS].ncall = 0; + } + + ebpf_one_dimension_write_charts(NETDATA_FILESYSTEM_FAMILY, NETDATA_DC_HIT_CHART, + ptr[NETDATA_DCSTAT_IDX_RATIO].dimension, publish->ratio); + + write_count_chart( + NETDATA_DC_REFERENCE_CHART, NETDATA_FILESYSTEM_FAMILY, + &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE], 3); +} + +/** +* Main loop for this collector. +*/ +static void dcstat_collector(ebpf_module_t *em) +{ + dcstat_threads.thread = mallocz(sizeof(netdata_thread_t)); + dcstat_threads.start_routine = ebpf_dcstat_read_hash; + + map_fd = dcstat_data.map_fd; + + netdata_thread_create(dcstat_threads.thread, dcstat_threads.name, NETDATA_THREAD_OPTION_JOINABLE, + ebpf_dcstat_read_hash, em); + + netdata_publish_dcstat_t publish; + memset(&publish, 0, sizeof(publish)); + int apps = em->apps_charts; + while (!close_ebpf_plugin) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + + if (apps) + read_apps_table(); + + pthread_mutex_lock(&lock); + + dcstat_send_global(&publish); + + if (apps) + ebpf_dcache_send_apps_data(apps_groups_root_target); + + pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * + * INITIALIZE THREAD + * + *****************************************************************/ + +/** + * Create filesystem charts + * + * Call ebpf_create_chart to create the charts for the collector. + */ +static void ebpf_create_filesystem_charts() +{ + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, NETDATA_DC_HIT_CHART, + "Percentage of files listed inside directory cache", + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_FILESYSTEM_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21200, + ebpf_create_global_dimension, + dcstat_counter_publish_aggregated, 1); + + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, NETDATA_DC_REFERENCE_CHART, + "Variables used to calculate hit ratio.", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_FILESYSTEM_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21201, + ebpf_create_global_dimension, + &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE], 3); + + fflush(stdout); +} + +/** + * Allocate vectors used with this thread. + * + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param length is the length for the vectors used inside the collector. + */ +static void ebpf_dcstat_allocate_global_vectors(size_t length) +{ + dcstat_pid = callocz((size_t)pid_max, sizeof(netdata_publish_dcstat_t *)); + dcstat_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_dcstat_pid_t)); + + memset(dcstat_counter_aggregated_data, 0, length*sizeof(netdata_syscall_stat_t)); + memset(dcstat_counter_publish_aggregated, 0, length*sizeof(netdata_publish_syscall_t)); +} + +/***************************************************************** + * + * MAIN THREAD + * + *****************************************************************/ + +/** + * Directory Cache thread + * + * Thread used to make dcstat thread + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always returns NULL + */ +void *ebpf_dcstat_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_dcstat_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = dcstat_maps; + fill_ebpf_data(&dcstat_data); + + ebpf_update_module(em, &dcstat_config, NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE); + ebpf_update_pid_table(&dcstat_maps[0], em); + + ebpf_update_names(dc_optional_name, em); + + if (!em->enabled) + goto enddcstat; + + ebpf_dcstat_allocate_global_vectors(NETDATA_DCSTAT_IDX_END); + + pthread_mutex_lock(&lock); + + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects, dcstat_data.map_fd); + if (!probe_links) { + pthread_mutex_unlock(&lock); + goto enddcstat; + } + + int algorithms[NETDATA_DCSTAT_IDX_END] = { + NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, + NETDATA_EBPF_ABSOLUTE_IDX + }; + + ebpf_global_labels(dcstat_counter_aggregated_data, dcstat_counter_publish_aggregated, + dcstat_counter_dimension_name, dcstat_counter_dimension_name, + algorithms, NETDATA_DCSTAT_IDX_END); + + ebpf_create_filesystem_charts(); + pthread_mutex_unlock(&lock); + + dcstat_collector(em); + +enddcstat: + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_dcstat.h b/collectors/ebpf.plugin/ebpf_dcstat.h new file mode 100644 index 000000000..ad4bd1992 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_dcstat.h @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_DCSTAT_H +#define NETDATA_EBPF_DCSTAT_H 1 + + +// charts +#define NETDATA_DC_HIT_CHART "dc_hit_ratio" +#define NETDATA_DC_REFERENCE_CHART "dc_reference" +#define NETDATA_DC_REQUEST_NOT_CACHE_CHART "dc_not_cache" +#define NETDATA_DC_REQUEST_NOT_FOUND_CHART "dc_not_found" + +#define NETDATA_DIRECTORY_CACHE_SUBMENU "directory cache (eBPF)" +#define NETDATA_DIRECTORY_FILESYSTEM_SUBMENU "Directory Cache (eBPF)" + +// configuration file +#define NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE "dcstat.conf" + +#define NETDATA_LATENCY_DCSTAT_SLEEP_MS 700000ULL + +enum directory_cache_indexes { + NETDATA_DCSTAT_IDX_RATIO, + NETDATA_DCSTAT_IDX_REFERENCE, + NETDATA_DCSTAT_IDX_SLOW, + NETDATA_DCSTAT_IDX_MISS, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_DCSTAT_IDX_END +}; + +enum directory_cache_tables { + NETDATA_DCSTAT_GLOBAL_STATS, + NETDATA_DCSTAT_PID_STATS +}; + +// variables +enum directory_cache_counters { + NETDATA_KEY_DC_REFERENCE, + NETDATA_KEY_DC_SLOW, + NETDATA_KEY_DC_MISS, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_DIRECTORY_CACHE_END +}; + +typedef struct netdata_publish_dcstat_pid { + uint64_t cache_access; + uint64_t file_system; + uint64_t not_found; +} netdata_dcstat_pid_t; + +typedef struct netdata_publish_dcstat { + long long ratio; + long long cache_access; + + netdata_dcstat_pid_t curr; + netdata_dcstat_pid_t prev; +} netdata_publish_dcstat_t; + +extern void *ebpf_dcstat_thread(void *ptr); +extern void ebpf_dcstat_create_apps_charts(struct ebpf_module *em, void *ptr); +extern void clean_dcstat_pid_structures(); + +#endif // NETDATA_EBPF_DCSTAT_H diff --git a/collectors/ebpf.plugin/ebpf_process.c b/collectors/ebpf.plugin/ebpf_process.c index 5fa930b2d..9b15c8407 100644 --- a/collectors/ebpf.plugin/ebpf_process.c +++ b/collectors/ebpf.plugin/ebpf_process.c @@ -18,6 +18,10 @@ static char *process_id_names[NETDATA_KEY_PUBLISH_PROCESS_END] = { "do_sys_open" "release_task", "_do_fork", "sys_clone" }; static char *status[] = { "process", "zombie" }; +static ebpf_local_maps_t process_maps[] = {{.name = "tbl_pid_stats", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0}, + {.name = NULL, .internal_input = 0, .user_input = 0}}; + static netdata_idx_t *process_hash_values = NULL; static netdata_syscall_stat_t process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_END]; static netdata_publish_syscall_t process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_END]; @@ -464,7 +468,7 @@ static void ebpf_process_update_apps_data() * @param family the chart family * @param name the chart name * @param axis the axis label - * @param web the group name used to attach the chart on dashaboard + * @param web the group name used to attach the chart on dashboard * @param order the order number of the specified chart * @param algorithm the algorithm used to make the charts. */ @@ -494,7 +498,7 @@ static void ebpf_create_io_chart(char *family, char *name, char *axis, char *web * @param family the chart family * @param name the chart name * @param axis the axis label - * @param web the group name used to attach the chart on dashaboard + * @param web the group name used to attach the chart on dashboard * @param order the order number of the specified chart */ static void ebpf_process_status_chart(char *family, char *name, char *axis, @@ -905,26 +909,6 @@ void clean_global_memory() { } } -void clean_pid_on_target(struct pid_on_target *ptr) { - while (ptr) { - struct pid_on_target *next = ptr->next; - freez(ptr); - - ptr = next; - } -} - -void clean_apps_structures(struct target *ptr) { - struct target *agdt = ptr; - while (agdt) { - struct target *next = agdt->next; - clean_pid_on_target(agdt->root_pid); - freez(agdt); - - agdt = next; - } -} - /** * Clean up the main thread. * @@ -949,7 +933,6 @@ static void ebpf_process_cleanup(void *ptr) freez(global_process_stats); freez(current_apps_data); - clean_apps_structures(apps_groups_root_target); freez(process_data.map_fd); struct bpf_program *prog; @@ -1050,6 +1033,7 @@ void *ebpf_process_thread(void *ptr) netdata_thread_cleanup_push(ebpf_process_cleanup, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = process_maps; process_enabled = em->enabled; fill_ebpf_data(&process_data); @@ -1062,6 +1046,7 @@ void *ebpf_process_thread(void *ptr) } ebpf_update_module(em, &process_config, NETDATA_PROCESS_CONFIG_FILE); + ebpf_update_pid_table(&process_maps[0], em); set_local_pointers(); probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects, process_data.map_fd); diff --git a/collectors/ebpf.plugin/ebpf_socket.c b/collectors/ebpf.plugin/ebpf_socket.c index a142d43b3..cbb4dded0 100644 --- a/collectors/ebpf.plugin/ebpf_socket.c +++ b/collectors/ebpf.plugin/ebpf_socket.c @@ -16,6 +16,20 @@ static char *socket_dimension_names[NETDATA_MAX_SOCKET_VECTOR] = { "sent", "rece static char *socket_id_names[NETDATA_MAX_SOCKET_VECTOR] = { "tcp_sendmsg", "tcp_cleanup_rbuf", "tcp_close", "udp_sendmsg", "udp_recvmsg", "tcp_retransmit_skb" }; +static ebpf_local_maps_t socket_maps[] = {{.name = "tbl_bandwidth", + .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED, + .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED}, + {.name = "tbl_conn_ipv4", + .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED, + .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED}, + {.name = "tbl_conn_ipv6", + .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED, + .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED}, + {.name = "tbl_nv_udp_conn_stats", + .internal_input = NETDATA_COMPILED_UDP_CONNECTIONS_ALLOWED, + .user_input = NETDATA_MAXIMUM_UDP_CONNECTIONS_ALLOWED}, + {.name = NULL, .internal_input = 0, .user_input = 0}}; + static netdata_idx_t *socket_hash_values = NULL; static netdata_syscall_stat_t socket_aggregated_data[NETDATA_MAX_SOCKET_VECTOR]; static netdata_publish_syscall_t socket_publish_aggregated[NETDATA_MAX_SOCKET_VECTOR]; @@ -600,7 +614,7 @@ void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) * @param id the chart id * @param title the chart title * @param units the units label - * @param family the group name used to attach the chart on dashaboard + * @param family the group name used to attach the chart on dashboard * @param order the chart order * @param ptr the plot structure with values. */ @@ -637,7 +651,7 @@ static void ebpf_socket_create_nv_chart(char *id, char *title, char *units, * @param id the chart id * @param title the chart title * @param units the units label - * @param family the group name used to attach the chart on dashaboard + * @param family the group name used to attach the chart on dashboard * @param order the chart order * @param ptr the plot structure with values. */ @@ -1325,7 +1339,7 @@ static void read_socket_hash_table(int fd, int family, int network_connection) return; netdata_socket_idx_t key = {}; - netdata_socket_idx_t next_key; + netdata_socket_idx_t next_key = {}; netdata_socket_idx_t removeme; int removesock = 0; @@ -1421,7 +1435,7 @@ void update_listen_table(uint16_t value, uint8_t proto) static void read_listen_table() { uint16_t key = 0; - uint16_t next_key; + uint16_t next_key = 0; int fd = map_fd[NETDATA_SOCKET_LISTEN_TABLE]; uint8_t value; @@ -1713,7 +1727,7 @@ static void clean_allocated_socket_plot() } /** - * Clean netowrk ports allocated during initializaion. + * Clean network ports allocated during initialization. * * @param ptr a pointer to the link list. */ @@ -1769,7 +1783,7 @@ static void clean_hostnames(ebpf_network_viewer_hostname_list_t *hostnames) } } -void clean_thread_structures() { +void clean_socket_apps_structures() { struct pid_stat *pids = root_of_pids; while (pids) { freez(socket_bandwidth_curr[pids->pid]); @@ -1853,8 +1867,6 @@ static void ebpf_socket_cleanup(void *ptr) ebpf_cleanup_publish_syscall(socket_publish_aggregated); freez(socket_hash_values); - clean_thread_structures(); - freez(socket_bandwidth_curr); freez(bandwidth_vector); freez(socket_values); @@ -2755,7 +2767,7 @@ static void link_dimension_name(char *port, uint32_t hash, char *value) } else { for (; names->next; names = names->next) { if (names->port == w->port) { - info("Dupplicated definition for a service, the name %s will be ignored. ", names->name); + info("Duplicated definition for a service, the name %s will be ignored. ", names->name); freez(names->name); names->name = w->name; names->hash = w->hash; @@ -2809,6 +2821,25 @@ void parse_service_name_section(struct config *cfg) } } +void parse_table_size_options(struct config *cfg) +{ + socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].user_input = (uint32_t) appconfig_get_number(cfg, + EBPF_GLOBAL_SECTION, + EBPF_CONFIG_BANDWIDTH_SIZE, NETDATA_MAXIMUM_CONNECTIONS_ALLOWED); + + socket_maps[NETDATA_SOCKET_TABLE_IPV4].user_input = (uint32_t) appconfig_get_number(cfg, + EBPF_GLOBAL_SECTION, + EBPF_CONFIG_IPV4_SIZE, NETDATA_MAXIMUM_CONNECTIONS_ALLOWED); + + socket_maps[NETDATA_SOCKET_TABLE_IPV6].user_input = (uint32_t) appconfig_get_number(cfg, + EBPF_GLOBAL_SECTION, + EBPF_CONFIG_IPV6_SIZE, NETDATA_MAXIMUM_CONNECTIONS_ALLOWED); + + socket_maps[NETDATA_SOCKET_TABLE_UDP].user_input = (uint32_t) appconfig_get_number(cfg, + EBPF_GLOBAL_SECTION, + EBPF_CONFIG_UDP_SIZE, NETDATA_MAXIMUM_UDP_CONNECTIONS_ALLOWED); +} + /** * Socket thread * @@ -2822,15 +2853,19 @@ void *ebpf_socket_thread(void *ptr) { netdata_thread_cleanup_push(ebpf_socket_cleanup, ptr); + memset(&inbound_vectors.tree, 0, sizeof(avl_tree_lock)); + memset(&outbound_vectors.tree, 0, sizeof(avl_tree_lock)); avl_init_lock(&inbound_vectors.tree, compare_sockets); avl_init_lock(&outbound_vectors.tree, compare_sockets); ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = socket_maps; fill_ebpf_data(&socket_data); ebpf_update_module(em, &socket_config, NETDATA_NETWORK_CONFIG_FILE); parse_network_viewer_section(&socket_config); parse_service_name_section(&socket_config); + parse_table_size_options(&socket_config); if (!em->enabled) goto endsocket; diff --git a/collectors/ebpf.plugin/ebpf_socket.h b/collectors/ebpf.plugin/ebpf_socket.h index 81001bab6..8dd422507 100644 --- a/collectors/ebpf.plugin/ebpf_socket.h +++ b/collectors/ebpf.plugin/ebpf_socket.h @@ -24,8 +24,19 @@ #define EBPF_CONFIG_RESOLVE_SERVICE "resolve service names" #define EBPF_CONFIG_PORTS "ports" #define EBPF_CONFIG_HOSTNAMES "hostnames" +#define EBPF_CONFIG_BANDWIDTH_SIZE "bandwidth table size" +#define EBPF_CONFIG_IPV4_SIZE "ipv4 connection table size" +#define EBPF_CONFIG_IPV6_SIZE "ipv6 connection table size" +#define EBPF_CONFIG_UDP_SIZE "udp connection table size" #define EBPF_MAXIMUM_DIMENSIONS "maximum dimensions" +enum ebpf_socket_table_list { + NETDATA_SOCKET_TABLE_BANDWIDTH, + NETDATA_SOCKET_TABLE_IPV4, + NETDATA_SOCKET_TABLE_IPV6, + NETDATA_SOCKET_TABLE_UDP +}; + enum ebpf_socket_publish_index { NETDATA_IDX_TCP_SENDMSG, NETDATA_IDX_TCP_CLEANUP_RBUF, @@ -94,6 +105,10 @@ typedef enum ebpf_socket_idx { // Port range #define NETDATA_MINIMUM_PORT_VALUE 1 #define NETDATA_MAXIMUM_PORT_VALUE 65535 +#define NETDATA_COMPILED_CONNECTIONS_ALLOWED 65535U +#define NETDATA_MAXIMUM_CONNECTIONS_ALLOWED 16384U +#define NETDATA_COMPILED_UDP_CONNECTIONS_ALLOWED 8192U +#define NETDATA_MAXIMUM_UDP_CONNECTIONS_ALLOWED 4096U #define NETDATA_MINIMUM_IPV4_CIDR 0 #define NETDATA_MAXIMUM_IPV4_CIDR 32 @@ -294,6 +309,7 @@ extern void update_listen_table(uint16_t value, uint8_t proto); extern void parse_network_viewer_section(struct config *cfg); extern void fill_ip_list(ebpf_network_viewer_ip_list_t **out, ebpf_network_viewer_ip_list_t *in, char *table); extern void parse_service_name_section(struct config *cfg); +extern void clean_socket_apps_structures(); extern ebpf_socket_publish_apps_t **socket_bandwidth_curr; |