diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-26 08:15:24 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-26 08:15:35 +0000 |
commit | f09848204fa5283d21ea43e262ee41aa578e1808 (patch) | |
tree | c62385d7adf209fa6a798635954d887f718fb3fb /src/collectors/ebpf.plugin | |
parent | Releasing debian version 1.46.3-2. (diff) | |
download | netdata-f09848204fa5283d21ea43e262ee41aa578e1808.tar.xz netdata-f09848204fa5283d21ea43e262ee41aa578e1808.zip |
Merging upstream version 1.47.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/collectors/ebpf.plugin')
37 files changed, 1665 insertions, 1266 deletions
diff --git a/src/collectors/ebpf.plugin/ebpf.c b/src/collectors/ebpf.plugin/ebpf.c index de2b6e14..5424ea8f 100644 --- a/src/collectors/ebpf.plugin/ebpf.c +++ b/src/collectors/ebpf.plugin/ebpf.c @@ -30,6 +30,7 @@ int ebpf_nprocs; int isrh = 0; int main_thread_id = 0; int process_pid_fd = -1; +uint64_t collect_pids = 0; static size_t global_iterations_counter = 1; bool publish_internal_metrics = true; @@ -996,7 +997,7 @@ static inline void ebpf_create_apps_for_module(ebpf_module_t *em, struct ebpf_ta */ static void ebpf_create_apps_charts(struct ebpf_target *root) { - if (unlikely(!ebpf_all_pids)) + if (unlikely(!ebpf_pids)) return; struct ebpf_target *w; @@ -1028,21 +1029,15 @@ static void ebpf_create_apps_charts(struct ebpf_target *root) } } - int i; - if (!newly_added) { + if (newly_added) { + int i; for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) { - ebpf_module_t *current = &ebpf_modules[i]; - if (current->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + if (!(collect_pids & (1<<i))) continue; + ebpf_module_t *current = &ebpf_modules[i]; ebpf_create_apps_for_module(current, root); } - return; - } - - for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) { - ebpf_module_t *current = &ebpf_modules[i]; - ebpf_create_apps_for_module(current, root); } } @@ -2680,7 +2675,7 @@ static void ebpf_allocate_common_vectors() { ebpf_judy_pid.pid_table = ebpf_allocate_pid_aral(NETDATA_EBPF_PID_SOCKET_ARAL_TABLE_NAME, sizeof(netdata_ebpf_judy_pid_stats_t)); - ebpf_all_pids = callocz((size_t)pid_max, sizeof(struct ebpf_pid_stat *)); + ebpf_pids = callocz((size_t)pid_max, sizeof(ebpf_pid_data_t)); ebpf_aral_init(); } @@ -3014,7 +3009,7 @@ static int ebpf_load_collector_config(char *path, int *disable_cgroups, int upda /** * Set global variables reading environment variables */ -void set_global_variables() +static void ebpf_set_global_variables() { // Get environment variables ebpf_plugin_dir = getenv("NETDATA_PLUGINS_DIR"); @@ -3042,6 +3037,7 @@ void set_global_variables() isrh = get_redhat_release(); pid_max = os_get_system_pid_max(); running_on_kernel = ebpf_get_kernel_version(); + memset(pids_fd, -1, sizeof(pids_fd)); } /** @@ -3418,6 +3414,11 @@ void ebpf_send_statistic_data() } ebpf_write_end_chart(); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, "monitoring_pid", ""); + write_chart_dimension("user", ebpf_all_pids_count); + write_chart_dimension("kernel", ebpf_hash_table_pids_count); + ebpf_write_end_chart(); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LIFE_TIME, ""); for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) { ebpf_module_t *wem = &ebpf_modules[i]; @@ -3490,6 +3491,37 @@ static void update_internal_metric_variable() } /** + * Create PIDS Chart + * + * Write to standard output current values for PIDSs charts. + * + * @param order order to display chart + * @param update_every time used to update charts + */ +static void ebpf_create_pids_chart(int order, int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + "monitoring_pid", + "", + "Total number of monitored PIDs", + "pids", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + "netdata.ebpf_pids", + order, + update_every, + "main"); + + ebpf_write_global_dimension("user", + "user", + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_global_dimension("kernel", + "kernel", + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); +} + +/** * Create Thread Chart * * Write to standard output current values for threads charts. @@ -3538,7 +3570,7 @@ static void ebpf_create_thread_chart(char *name, (char *)em->info.thread_name, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); } -} + } /** * Create chart for Load Thread @@ -3741,6 +3773,8 @@ static void ebpf_create_statistic_charts(int update_every) update_every, NULL); + ebpf_create_pids_chart(NETDATA_EBPF_ORDER_PIDS, update_every); + ebpf_create_thread_chart(NETDATA_EBPF_LIFE_TIME, "Time remaining for thread.", "seconds", @@ -3974,18 +4008,18 @@ int main(int argc, char **argv) clocks_init(); nd_log_initialize_for_external_plugins(NETDATA_EBPF_PLUGIN_NAME); - main_thread_id = gettid_cached(); - - set_global_variables(); - ebpf_parse_args(argc, argv); - ebpf_manage_pid(getpid()); - + ebpf_set_global_variables(); if (ebpf_can_plugin_load_code(running_on_kernel, NETDATA_EBPF_PLUGIN_NAME)) return 2; if (ebpf_adjust_memory_limit()) return 3; + main_thread_id = gettid_cached(); + + ebpf_parse_args(argc, argv); + ebpf_manage_pid(getpid()); + signal(SIGINT, ebpf_stop_threads); signal(SIGQUIT, ebpf_stop_threads); signal(SIGTERM, ebpf_stop_threads); @@ -4018,7 +4052,7 @@ int main(int argc, char **argv) ebpf_cgroup_integration, NULL); - int i; + uint32_t i; for (i = 0; ebpf_threads[i].name != NULL; i++) { struct netdata_static_thread *st = &ebpf_threads[i]; @@ -4028,6 +4062,10 @@ int main(int argc, char **argv) if (em->enabled != NETDATA_THREAD_EBPF_NOT_RUNNING) { em->enabled = NETDATA_THREAD_EBPF_RUNNING; em->lifetime = EBPF_NON_FUNCTION_LIFE_TIME; + + if (em->functions.apps_routine && (em->apps_charts || em->cgroup_charts)) { + collect_pids |= 1<<i; + } st->thread = nd_thread_create(st->name, NETDATA_THREAD_OPTION_JOINABLE, st->start_routine, em); } else { em->lifetime = EBPF_DEFAULT_LIFETIME; @@ -4038,7 +4076,7 @@ int main(int argc, char **argv) heartbeat_t hb; heartbeat_init(&hb); int update_apps_every = (int) EBPF_CFG_UPDATE_APPS_EVERY_DEFAULT; - int max_period = update_apps_every * EBPF_CLEANUP_FACTOR; + uint32_t max_period = EBPF_CLEANUP_FACTOR; int update_apps_list = update_apps_every - 1; int process_maps_per_core = ebpf_modules[EBPF_MODULE_PROCESS_IDX].maps_per_core; //Plugin will be killed when it receives a signal @@ -4050,19 +4088,23 @@ int main(int argc, char **argv) ebpf_create_statistic_charts(EBPF_DEFAULT_UPDATE_EVERY); ebpf_send_statistic_data(); - pthread_mutex_unlock(&lock); fflush(stdout); + pthread_mutex_unlock(&lock); } if (++update_apps_list == update_apps_every) { update_apps_list = 0; pthread_mutex_lock(&lock); - pthread_mutex_lock(&collect_data_mutex); - ebpf_cleanup_exited_pids(max_period); - collect_data_for_all_processes(process_pid_fd, process_maps_per_core); - - ebpf_create_apps_charts(apps_groups_root_target); - pthread_mutex_unlock(&collect_data_mutex); + if (collect_pids) { + pthread_mutex_lock(&collect_data_mutex); + ebpf_parse_proc_files(); + if (collect_pids & (1<<EBPF_MODULE_PROCESS_IDX)) { + collect_data_for_all_processes(process_pid_fd, process_maps_per_core, max_period); + } + + ebpf_create_apps_charts(apps_groups_root_target); + pthread_mutex_unlock(&collect_data_mutex); + } pthread_mutex_unlock(&lock); } } diff --git a/src/collectors/ebpf.plugin/ebpf.d/cachestat.conf b/src/collectors/ebpf.plugin/ebpf.d/cachestat.conf index c378e82e..9c51b2c5 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/cachestat.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/cachestat.conf @@ -37,6 +37,6 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline - collect pid = all + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/dcstat.conf b/src/collectors/ebpf.plugin/ebpf.d/dcstat.conf index 2d54bce9..614d814e 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/dcstat.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/dcstat.conf @@ -35,6 +35,6 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline - collect pid = all + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/fd.conf b/src/collectors/ebpf.plugin/ebpf.d/fd.conf index d4823032..4d0d2ac0 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/fd.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/fd.conf @@ -23,5 +23,6 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/oomkill.conf b/src/collectors/ebpf.plugin/ebpf.d/oomkill.conf index ea97ebe8..a137b945 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/oomkill.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/oomkill.conf @@ -3,9 +3,21 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# The `pid table size` defines the maximum number of PIDs stored inside the hash table. +# +# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. +# # The `lifetime` defines the time length a thread will run when it is enabled by a function. # [global] # ebpf load mode = entry # update every = 1 + ebpf type format = auto + ebpf co-re tracing = trampoline + collect pid = real parent lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/process.conf b/src/collectors/ebpf.plugin/ebpf.d/process.conf index 6f647700..150c5792 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/process.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/process.conf @@ -26,6 +26,6 @@ # cgroups = no # update every = 10 # pid table size = 32768 - collect pid = all + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/shm.conf b/src/collectors/ebpf.plugin/ebpf.d/shm.conf index 0314bdc9..4769c52e 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/shm.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/shm.conf @@ -31,7 +31,7 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline - collect pid = all + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/swap.conf b/src/collectors/ebpf.plugin/ebpf.d/swap.conf index 6d76b988..7d4c5f7d 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/swap.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/swap.conf @@ -30,6 +30,6 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline - collect pid = all + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/vfs.conf b/src/collectors/ebpf.plugin/ebpf.d/vfs.conf index f511581b..941ac140 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/vfs.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/vfs.conf @@ -31,5 +31,6 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.h b/src/collectors/ebpf.plugin/ebpf.h index c54b5900..6fc42b3e 100644 --- a/src/collectors/ebpf.plugin/ebpf.h +++ b/src/collectors/ebpf.plugin/ebpf.h @@ -37,6 +37,7 @@ #define NETDATA_EBPF_OLD_CONFIG_FILE "ebpf.conf" #define NETDATA_EBPF_CONFIG_FILE "ebpf.d.conf" +extern size_t ebpf_hash_table_pids_count; #ifdef LIBBPF_MAJOR_VERSION // BTF code #include "cachestat.skel.h" #include "dc.skel.h" @@ -122,34 +123,6 @@ typedef struct netdata_ebpf_judy_pid_stats { } netdata_ebpf_judy_pid_stats_t; extern ebpf_module_t ebpf_modules[]; -enum ebpf_main_index { - EBPF_MODULE_PROCESS_IDX, - EBPF_MODULE_SOCKET_IDX, - EBPF_MODULE_CACHESTAT_IDX, - EBPF_MODULE_SYNC_IDX, - EBPF_MODULE_DCSTAT_IDX, - EBPF_MODULE_SWAP_IDX, - EBPF_MODULE_VFS_IDX, - EBPF_MODULE_FILESYSTEM_IDX, - EBPF_MODULE_DISK_IDX, - EBPF_MODULE_MOUNT_IDX, - EBPF_MODULE_FD_IDX, - EBPF_MODULE_HARDIRQ_IDX, - EBPF_MODULE_SOFTIRQ_IDX, - EBPF_MODULE_OOMKILL_IDX, - EBPF_MODULE_SHM_IDX, - EBPF_MODULE_MDFLUSH_IDX, - EBPF_MODULE_FUNCTION_IDX, - /* THREADS MUST BE INCLUDED BEFORE THIS COMMENT */ - EBPF_OPTION_ALL_CHARTS, - EBPF_OPTION_VERSION, - EBPF_OPTION_HELP, - EBPF_OPTION_GLOBAL_CHART, - EBPF_OPTION_RETURN_MODE, - EBPF_OPTION_LEGACY, - EBPF_OPTION_CORE, - EBPF_OPTION_UNITTEST -}; typedef struct ebpf_tracepoint { bool enabled; @@ -380,6 +353,7 @@ void ebpf_read_local_addresses_unsafe(); extern ebpf_filesystem_partitions_t localfs[]; extern ebpf_sync_syscalls_t local_syscalls[]; extern bool ebpf_plugin_exit; +extern uint64_t collect_pids; static inline bool ebpf_plugin_stop(void) { return ebpf_plugin_exit || nd_thread_signaled_to_cancel(); diff --git a/src/collectors/ebpf.plugin/ebpf_apps.c b/src/collectors/ebpf.plugin/ebpf_apps.c index a17cdb33..d90c5f12 100644 --- a/src/collectors/ebpf.plugin/ebpf_apps.c +++ b/src/collectors/ebpf.plugin/ebpf_apps.c @@ -21,37 +21,11 @@ void ebpf_aral_init(void) max_elements = NETDATA_EBPF_ALLOC_MIN_ELEMENTS; } - ebpf_aral_apps_pid_stat = ebpf_allocate_pid_aral("ebpf_pid_stat", sizeof(struct ebpf_pid_stat)); - #ifdef NETDATA_DEV_MODE netdata_log_info("Plugin is using ARAL with values %d", NETDATA_EBPF_ALLOC_MAX_PID); #endif } -/** - * eBPF pid stat get - * - * Get a ebpf_pid_stat entry to be used with a specific PID. - * - * @return it returns the address on success. - */ -struct ebpf_pid_stat *ebpf_pid_stat_get(void) -{ - struct ebpf_pid_stat *target = aral_mallocz(ebpf_aral_apps_pid_stat); - memset(target, 0, sizeof(struct ebpf_pid_stat)); - return target; -} - -/** - * eBPF target release - * - * @param stat Release a target after usage. - */ -void ebpf_pid_stat_release(struct ebpf_pid_stat *stat) -{ - aral_freez(ebpf_aral_apps_pid_stat, stat); -} - // ---------------------------------------------------------------------------- // internal flags // handled in code (automatically set) @@ -332,11 +306,11 @@ int ebpf_read_apps_groups_conf(struct ebpf_target **agdt, struct ebpf_target **a #define MAX_CMDLINE 16384 -struct ebpf_pid_stat **ebpf_all_pids = NULL; // to avoid allocations, we pre-allocate the - // the entire pid space. -struct ebpf_pid_stat *ebpf_root_of_pids = NULL; // global list of all processes running +ebpf_pid_data_t *ebpf_pids = NULL; // to avoid allocations, we pre-allocate the entire pid space. +ebpf_pid_data_t *ebpf_pids_link_list = NULL; // global list of all processes running -size_t ebpf_all_pids_count = 0; // the number of processes running +size_t ebpf_all_pids_count = 0; // the number of processes running read from /proc +size_t ebpf_hash_table_pids_count = 0; // the number of tasks in our hash tables struct ebpf_target *apps_groups_default_target = NULL, // the default target @@ -346,6 +320,8 @@ struct ebpf_target size_t apps_groups_targets_count = 0; // # of apps_groups.conf targets +int pids_fd[EBPF_PIDS_END_IDX]; + // ---------------------------------------------------------------------------- // internal counters @@ -389,109 +365,11 @@ static inline void debug_log_dummy(void) #endif /** - * Managed log - * - * Store log information if it is necessary. - * - * @param p the pid stat structure - * @param log the log id - * @param status the return from a function. - * - * @return It returns the status value. - */ -static inline int managed_log(struct ebpf_pid_stat *p, uint32_t log, int status) -{ - if (unlikely(!status)) { - // netdata_log_error("command failed log %u, errno %d", log, errno); - - if (unlikely(debug_enabled || errno != ENOENT)) { - if (unlikely(debug_enabled || !(p->log_thrown & log))) { - p->log_thrown |= log; - switch (log) { - case PID_LOG_IO: - netdata_log_error( - "Cannot process %s/proc/%d/io (command '%s')", netdata_configured_host_prefix, p->pid, - p->comm); - break; - - case PID_LOG_STATUS: - netdata_log_error( - "Cannot process %s/proc/%d/status (command '%s')", netdata_configured_host_prefix, p->pid, - p->comm); - break; - - case PID_LOG_CMDLINE: - netdata_log_error( - "Cannot process %s/proc/%d/cmdline (command '%s')", netdata_configured_host_prefix, p->pid, - p->comm); - break; - - case PID_LOG_FDS: - netdata_log_error( - "Cannot process entries in %s/proc/%d/fd (command '%s')", netdata_configured_host_prefix, - p->pid, p->comm); - break; - - case PID_LOG_STAT: - break; - - default: - netdata_log_error("unhandled error for pid %d, command '%s'", p->pid, p->comm); - break; - } - } - } - errno = 0; - } else if (unlikely(p->log_thrown & log)) { - // netdata_log_error("unsetting log %u on pid %d", log, p->pid); - p->log_thrown &= ~log; - } - - return status; -} - -/** - * Get PID entry - * - * Get or allocate the PID entry for the specified pid. - * - * @param pid the pid to search the data. - * @param tgid the task group id - * - * @return It returns the pid entry structure - */ -ebpf_pid_stat_t *ebpf_get_pid_entry(pid_t pid, pid_t tgid) -{ - ebpf_pid_stat_t *ptr = ebpf_all_pids[pid]; - if (unlikely(ptr)) { - if (!ptr->ppid && tgid) - ptr->ppid = tgid; - return ebpf_all_pids[pid]; - } - - struct ebpf_pid_stat *p = ebpf_pid_stat_get(); - - if (likely(ebpf_root_of_pids)) - ebpf_root_of_pids->prev = p; - - p->next = ebpf_root_of_pids; - ebpf_root_of_pids = p; - - p->pid = pid; - p->ppid = tgid; - - ebpf_all_pids[pid] = p; - ebpf_all_pids_count++; - - return p; -} - -/** * Assign the PID to a target. * * @param p the pid_stat structure to assign for a target. */ -static inline void assign_target_to_pid(struct ebpf_pid_stat *p) +static inline void assign_target_to_pid(ebpf_pid_data_t *p) { targets_assignment_counter++; @@ -499,6 +377,7 @@ static inline void assign_target_to_pid(struct ebpf_pid_stat *p) size_t pclen = strlen(p->comm); struct ebpf_target *w; + bool assigned = false; for (w = apps_groups_root_target; w; w = w->next) { // if(debug_enabled || (p->target && p->target->debug_enabled)) debug_log_int("\t\tcomparing '%s' with '%s'", w->compare, p->comm); @@ -521,9 +400,17 @@ static inline void assign_target_to_pid(struct ebpf_pid_stat *p) if (debug_enabled || (p->target && p->target->debug_enabled)) debug_log_int("%s linked to target %s", p->comm, p->target->name); + w->processes++; + assigned = true; + break; } } + + if (!assigned) { + apps_groups_default_target->processes++; + p->target = apps_groups_default_target; + } } // ---------------------------------------------------------------------------- @@ -532,22 +419,18 @@ static inline void assign_target_to_pid(struct ebpf_pid_stat *p) /** * Read cmd line from /proc/PID/cmdline * - * @param p the ebpf_pid_stat_structure. + * @param p the ebpf_pid_data structure. * * @return It returns 1 on success and 0 otherwise. */ -static inline int read_proc_pid_cmdline(struct ebpf_pid_stat *p) +static inline int read_proc_pid_cmdline(ebpf_pid_data_t *p, char *cmdline) { - static char cmdline[MAX_CMDLINE + 1]; + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", netdata_configured_host_prefix, p->pid); int ret = 0; - if (unlikely(!p->cmdline_filename)) { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", netdata_configured_host_prefix, p->pid); - p->cmdline_filename = strdupz(filename); - } - int fd = open(p->cmdline_filename, procfile_open_flags, 0666); + int fd = open(filename, procfile_open_flags, 0666); if (unlikely(fd == -1)) goto cleanup; @@ -563,21 +446,12 @@ static inline int read_proc_pid_cmdline(struct ebpf_pid_stat *p) cmdline[i] = ' '; } - debug_log("Read file '%s' contents: %s", p->cmdline_filename, p->cmdline); + debug_log("Read file '%s' contents: %s", filename, p->cmdline); ret = 1; cleanup: - // copy the command to the command line - if (p->cmdline) - freez(p->cmdline); - p->cmdline = strdupz(p->comm); - - rw_spinlock_write_lock(&ebpf_judy_pid.index.rw_spinlock); - netdata_ebpf_judy_pid_stats_t *pid_ptr = ebpf_get_pid_from_judy_unsafe(&ebpf_judy_pid.index.JudyLArray, p->pid); - if (pid_ptr) - pid_ptr->cmdline = p->cmdline; - rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock); + p->cmdline[0] = '\0'; return ret; } @@ -587,44 +461,43 @@ cleanup: * Assign target to pid * * @param p the pid stat structure to store the data. - * @param ptr an useless argument. */ -static inline int read_proc_pid_stat(struct ebpf_pid_stat *p, void *ptr) +static inline int read_proc_pid_stat(ebpf_pid_data_t *p) { - UNUSED(ptr); + procfile *ff; - static procfile *ff = NULL; - - if (unlikely(!p->stat_filename)) { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/stat", netdata_configured_host_prefix, p->pid); - p->stat_filename = strdupz(filename); - } - - int set_quotes = (!ff) ? 1 : 0; + char filename[FILENAME_MAX + 1]; + int ret = 0; + snprintfz(filename, FILENAME_MAX, "%s/proc/%u/stat", netdata_configured_host_prefix, p->pid); struct stat statbuf; - if (stat(p->stat_filename, &statbuf)) + if (stat(filename, &statbuf)) { + // PID ended before we stat the file + p->has_proc_file = 0; return 0; + } - ff = procfile_reopen(ff, p->stat_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + ff = procfile_open(filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); if (unlikely(!ff)) - return 0; + goto cleanup_pid_stat; - if (unlikely(set_quotes)) - procfile_set_open_close(ff, "(", ")"); + procfile_set_open_close(ff, "(", ")"); ff = procfile_readall(ff); if (unlikely(!ff)) - return 0; - - p->last_stat_collected_usec = p->stat_collected_usec; - p->stat_collected_usec = now_monotonic_usec(); - calls_counter++; + goto cleanup_pid_stat; char *comm = procfile_lineword(ff, 0, 1); - p->ppid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 3)); + int32_t ppid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 3)); + if (p->ppid == ppid && p->target) + goto without_cmdline_target; + + p->ppid = ppid; + + char cmdline[MAX_CMDLINE + 1]; + p->cmdline = cmdline; + read_proc_pid_cmdline(p, cmdline); if (strcmp(p->comm, comm) != 0) { if (unlikely(debug_enabled)) { if (p->comm[0]) @@ -634,58 +507,50 @@ static inline int read_proc_pid_stat(struct ebpf_pid_stat *p, void *ptr) } strncpyz(p->comm, comm, EBPF_MAX_COMPARE_NAME); - - // /proc/<pid>/cmdline - if (likely(proc_pid_cmdline_is_needed)) - managed_log(p, PID_LOG_CMDLINE, read_proc_pid_cmdline(p)); - - assign_target_to_pid(p); } + if (!p->target) + assign_target_to_pid(p); + + p->cmdline = NULL; if (unlikely(debug_enabled || (p->target && p->target->debug_enabled))) debug_log_int( - "READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu)", - netdata_configured_host_prefix, p->pid, p->comm, (p->target) ? p->target->name : "UNSET", - p->stat_collected_usec - p->last_stat_collected_usec); + "READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s'", + netdata_configured_host_prefix, p->pid, p->comm, (p->target) ? p->target->name : "UNSET"); - return 1; +without_cmdline_target: + p->has_proc_file = 1; + p->not_updated = 0; + ret = 1; +cleanup_pid_stat: + procfile_close(ff); + + return ret; } /** * Collect data for PID * * @param pid the current pid that we are working - * @param ptr a NULL value * * @return It returns 1 on success and 0 otherwise */ -static inline int ebpf_collect_data_for_pid(pid_t pid, void *ptr) +static inline int ebpf_collect_data_for_pid(pid_t pid) { if (unlikely(pid < 0 || pid > pid_max)) { netdata_log_error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, 0, pid_max); return 0; } - ebpf_pid_stat_t *p = ebpf_get_pid_entry(pid, 0); - if (unlikely(!p || p->read)) - return 0; - p->read = 1; - - if (unlikely(!managed_log(p, PID_LOG_STAT, read_proc_pid_stat(p, ptr)))) - // there is no reason to proceed if we cannot get its status - return 0; + ebpf_pid_data_t *p = ebpf_get_pid_data((uint32_t)pid, 0, NULL, EBPF_PIDS_PROC_FILE); + read_proc_pid_stat(p); // check its parent pid - if (unlikely(p->ppid < 0 || p->ppid > pid_max)) { - netdata_log_error("Pid %d (command '%s') states invalid parent pid %d. Using 0.", pid, p->comm, p->ppid); + if (unlikely( p->ppid > pid_max)) { + netdata_log_error("Pid %d (command '%s') states invalid parent pid %u. Using 0.", pid, p->comm, p->ppid); p->ppid = 0; } - // mark it as updated - p->updated = 1; - p->keep = 0; - p->keeploops = 0; - return 1; } @@ -694,14 +559,13 @@ static inline int ebpf_collect_data_for_pid(pid_t pid, void *ptr) */ static inline void link_all_processes_to_their_parents(void) { - struct ebpf_pid_stat *p, *pp; + ebpf_pid_data_t *p, *pp; // link all children to their parents // and update children count on parents - for (p = ebpf_root_of_pids; p; p = p->next) { + for (p = ebpf_pids_link_list; p; p = p->next) { // for each process found - p->sortlist = 0; p->parent = NULL; if (unlikely(!p->ppid)) { @@ -709,16 +573,15 @@ static inline void link_all_processes_to_their_parents(void) continue; } - pp = ebpf_all_pids[p->ppid]; - if (likely(pp)) { + pp = &ebpf_pids[p->ppid]; + if (likely(pp->pid)) { p->parent = pp; pp->children_count++; if (unlikely(debug_enabled || (p->target && p->target->debug_enabled))) debug_log_int( - "child %d (%s, %s) on target '%s' has parent %d (%s, %s).", p->pid, p->comm, - p->updated ? "running" : "exited", (p->target) ? p->target->name : "UNSET", pp->pid, pp->comm, - pp->updated ? "running" : "exited"); + "child %d (%s) on target '%s' has parent %d (%s).", p->pid, p->comm, + (p->target) ? p->target->name : "UNSET", pp->pid, pp->comm); } else { p->parent = NULL; debug_log("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid); @@ -731,7 +594,7 @@ static inline void link_all_processes_to_their_parents(void) */ static void apply_apps_groups_targets_inheritance(void) { - struct ebpf_pid_stat *p = NULL; + struct ebpf_pid_data *p = NULL; // children that do not have a target // inherit their target from their parent @@ -740,7 +603,7 @@ static void apply_apps_groups_targets_inheritance(void) if (unlikely(debug_enabled)) loops++; found = 0; - for (p = ebpf_root_of_pids; p; p = p->next) { + for (p = ebpf_pids_link_list; p; p = p->next) { // if this process does not have a target // and it has a parent // and its parent has a target @@ -751,7 +614,7 @@ static void apply_apps_groups_targets_inheritance(void) if (debug_enabled || (p->target && p->target->debug_enabled)) debug_log_int( - "TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).", p->target->name, + "TARGET INHERITANCE: %s is inherited by %u (%s) from its parent %d (%s).", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); } } @@ -766,7 +629,7 @@ static void apply_apps_groups_targets_inheritance(void) loops++; found = 0; - for (p = ebpf_root_of_pids; p; p = p->next) { + for (p = ebpf_pids_link_list; p; p = p->next) { if (unlikely(!p->sortlist && !p->children_count)) p->sortlist = sortlist++; @@ -802,17 +665,15 @@ static void apply_apps_groups_targets_inheritance(void) } // init goes always to default target - if (ebpf_all_pids[INIT_PID]) - ebpf_all_pids[INIT_PID]->target = apps_groups_default_target; + ebpf_pids[INIT_PID].target = apps_groups_default_target; // pid 0 goes always to default target - if (ebpf_all_pids[0]) - ebpf_all_pids[0]->target = apps_groups_default_target; + ebpf_pids[0].target = apps_groups_default_target; // give a default target on all top level processes if (unlikely(debug_enabled)) loops++; - for (p = ebpf_root_of_pids; p; p = p->next) { + for (p = ebpf_pids_link_list; p; p = p->next) { // if the process is not merged itself // then is is a top level process if (unlikely(!p->merged && !p->target)) @@ -823,8 +684,7 @@ static void apply_apps_groups_targets_inheritance(void) p->sortlist = sortlist++; } - if (ebpf_all_pids[1]) - ebpf_all_pids[1]->sortlist = sortlist++; + ebpf_pids[1].sortlist = sortlist++; // give a target to all merged child processes found = 1; @@ -832,7 +692,7 @@ static void apply_apps_groups_targets_inheritance(void) if (unlikely(debug_enabled)) loops++; found = 0; - for (p = ebpf_root_of_pids; p; p = p->next) { + for (p = ebpf_pids_link_list; p; p = p->next) { if (unlikely(!p->target && p->merged && p->parent && p->parent->target)) { p->target = p->parent->target; found++; @@ -872,29 +732,23 @@ static inline void post_aggregate_targets(struct ebpf_target *root) * * @param pid the PID that will be removed. */ -static inline void ebpf_del_pid_entry(pid_t pid) +void ebpf_del_pid_entry(pid_t pid) { - struct ebpf_pid_stat *p = ebpf_all_pids[pid]; - - if (unlikely(!p)) { - netdata_log_error("attempted to free pid %d that is not allocated.", pid); - return; - } + ebpf_pid_data_t *p = &ebpf_pids[pid]; debug_log("process %d %s exited, deleting it.", pid, p->comm); - if (ebpf_root_of_pids == p) - ebpf_root_of_pids = p->next; + if (ebpf_pids_link_list == p) + ebpf_pids_link_list = p->next; if (p->next) p->next->prev = p->prev; if (p->prev) p->prev->next = p->next; - freez(p->stat_filename); - freez(p->status_filename); - freez(p->io_filename); - freez(p->cmdline_filename); + + if ((p->thread_collecting & EBPF_PIDS_PROC_FILE) || p->has_proc_file) + ebpf_all_pids_count--; rw_spinlock_write_lock(&ebpf_judy_pid.index.rw_spinlock); netdata_ebpf_judy_pid_stats_t *pid_ptr = ebpf_get_pid_from_judy_unsafe(&ebpf_judy_pid.index.JudyLArray, p->pid); @@ -914,58 +768,19 @@ static inline void ebpf_del_pid_entry(pid_t pid) } rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock); - freez(p->cmdline); - ebpf_pid_stat_release(p); - - ebpf_all_pids[pid] = NULL; - ebpf_all_pids_count--; -} - -/** - * Get command string associated with a PID. - * This can only safely be used when holding the `collect_data_mutex` lock. - * - * @param pid the pid to search the data. - * @param n the maximum amount of bytes to copy into dest. - * if this is greater than the size of the command, it is clipped. - * @param dest the target memory buffer to write the command into. - * @return -1 if the PID hasn't been scraped yet, 0 otherwise. - */ -int get_pid_comm(pid_t pid, size_t n, char *dest) -{ - struct ebpf_pid_stat *stat; - - stat = ebpf_all_pids[pid]; - if (unlikely(stat == NULL)) { - return -1; - } - - if (unlikely(n > sizeof(stat->comm))) { - n = sizeof(stat->comm); - } - - strncpyz(dest, stat->comm, n); - return 0; + memset(p, 0, sizeof(ebpf_pid_data_t)); } /** * Remove PIDs when they are not running more. */ -void ebpf_cleanup_exited_pids(int max) +static void ebpf_cleanup_exited_pids() { - struct ebpf_pid_stat *p = NULL; - - for (p = ebpf_root_of_pids; p;) { - if (p->not_updated > max) { - if (unlikely(debug_enabled && (p->keep || p->keeploops))) - debug_log(" > CLEANUP cannot keep exited process %d (%s) anymore - removing it.", p->pid, p->comm); - - pid_t r = p->pid; - p = p->next; - - ebpf_del_pid_entry(r); + ebpf_pid_data_t *p = NULL; + for (p = ebpf_pids_link_list; p; p = p->next) { + if (!p->has_proc_file) { + ebpf_reset_specific_pid_data(p); } - p = p->next; } } @@ -974,14 +789,14 @@ void ebpf_cleanup_exited_pids(int max) * * @return It returns 0 on success and -1 otherwise. */ -static inline void read_proc_filesystem() +static int ebpf_read_proc_filesystem() { char dirname[FILENAME_MAX + 1]; snprintfz(dirname, FILENAME_MAX, "%s/proc", netdata_configured_host_prefix); DIR *dir = opendir(dirname); if (!dir) - return; + return -1; struct dirent *de = NULL; @@ -997,9 +812,11 @@ static inline void read_proc_filesystem() if (unlikely(endptr == de->d_name || *endptr != '\0')) continue; - ebpf_collect_data_for_pid(pid, NULL); + ebpf_collect_data_for_pid(pid); } closedir(dir); + + return 0; } /** @@ -1009,17 +826,17 @@ static inline void read_proc_filesystem() * @param p the pid with information to update * @param o never used */ -static inline void aggregate_pid_on_target(struct ebpf_target *w, struct ebpf_pid_stat *p, struct ebpf_target *o) +static inline void aggregate_pid_on_target(struct ebpf_target *w, ebpf_pid_data_t *p, struct ebpf_target *o) { UNUSED(o); - if (unlikely(!p->updated)) { + if (unlikely(!p->has_proc_file)) { // the process is not running return; } if (unlikely(!w)) { - netdata_log_error("pid %d %s was left without a target!", p->pid, p->comm); + netdata_log_error("pid %u %s was left without a target!", p->pid, p->comm); return; } @@ -1042,6 +859,7 @@ void ebpf_process_apps_accumulator(ebpf_process_stat_t *out, int maps_per_core) { int i, end = (maps_per_core) ? ebpf_nprocs : 1; ebpf_process_stat_t *total = &out[0]; + uint64_t ct = total->ct; for (i = 1; i < end; i++) { ebpf_process_stat_t *w = &out[i]; total->exit_call += w->exit_call; @@ -1049,7 +867,11 @@ void ebpf_process_apps_accumulator(ebpf_process_stat_t *out, int maps_per_core) total->create_thread += w->create_thread; total->create_process += w->create_process; total->release_call += w->release_call; + + if (w->ct > ct) + ct = w->ct; } + total->ct = ct; } /** @@ -1061,19 +883,18 @@ void ebpf_process_apps_accumulator(ebpf_process_stat_t *out, int maps_per_core) void ebpf_process_sum_values_for_pids(ebpf_process_stat_t *process, struct ebpf_pid_on_target *root) { memset(process, 0, sizeof(ebpf_process_stat_t)); - while (root) { + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - ebpf_process_stat_t *in = &local_pid->process; - process->task_err += in->task_err; - process->release_call += in->release_call; - process->exit_call += in->exit_call; - process->create_thread += in->create_thread; - process->create_process += in->create_process; - } + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_PROCESS_IDX); + ebpf_publish_process_t *in = local_pid->process; + if (!in) + continue; - root = root->next; + process->task_err += in->task_err; + process->release_call += in->release_call; + process->exit_call += in->exit_call; + process->create_thread += in->create_thread; + process->create_process += in->create_process; } } @@ -1085,51 +906,50 @@ void ebpf_process_sum_values_for_pids(ebpf_process_stat_t *process, struct ebpf_ * * @param tbl_pid_stats_fd The mapped file descriptor for the hash table. * @param maps_per_core do I have hash maps per core? + * @param max_period max period to wait before remove from hash table. */ -void collect_data_for_all_processes(int tbl_pid_stats_fd, int maps_per_core) +void collect_data_for_all_processes(int tbl_pid_stats_fd, int maps_per_core, uint32_t max_period) { - if (unlikely(!ebpf_all_pids)) + if (tbl_pid_stats_fd == -1) return; - struct ebpf_pid_stat *pids = ebpf_root_of_pids; // global list of all processes running - while (pids) { - if (pids->updated_twice) { - pids->read = 0; // mark it as not read, so that collect_data_for_pid() will read it - pids->updated = 0; - pids->merged = 0; - pids->children_count = 0; - pids->parent = NULL; - } else { - if (pids->updated) - pids->updated_twice = 1; - } - - pids = pids->next; - } - - read_proc_filesystem(); - - pids = ebpf_root_of_pids; // global list of all processes running + pids_fd[EBPF_PIDS_PROCESS_IDX] = tbl_pid_stats_fd; + size_t length = sizeof(ebpf_process_stat_t); + if (maps_per_core) + length *= ebpf_nprocs; if (tbl_pid_stats_fd != -1) { - size_t length = sizeof(ebpf_process_stat_t); - if (maps_per_core) - length *= ebpf_nprocs; uint32_t key = 0, next_key = 0; while (bpf_map_get_next_key(tbl_pid_stats_fd, &key, &next_key) == 0) { - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(key, 0); - if (!local_pid) - goto end_process_loop; - - ebpf_process_stat_t *w = &local_pid->process; if (bpf_map_lookup_elem(tbl_pid_stats_fd, &key, process_stat_vector)) { goto end_process_loop; } ebpf_process_apps_accumulator(process_stat_vector, maps_per_core); - memcpy(w, process_stat_vector, sizeof(ebpf_process_stat_t)); + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(key, 0, NULL, EBPF_PIDS_PROCESS_IDX); + ebpf_publish_process_t *w = local_pid->process; + if (!w) + local_pid->process = w = ebpf_process_allocate_publish(); + + if (!w->ct || w->ct != process_stat_vector[0].ct) { + w->ct = process_stat_vector[0].ct; + w->create_thread = process_stat_vector[0].create_thread; + w->exit_call = process_stat_vector[0].exit_call; + w->create_thread = process_stat_vector[0].create_thread; + w->create_process = process_stat_vector[0].create_process; + w->release_call = process_stat_vector[0].release_call; + w->task_err = process_stat_vector[0].task_err; + } else { + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(local_pid); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(local_pid, tbl_pid_stats_fd, key, EBPF_PIDS_PROCESS_IDX); + ebpf_process_release_publish(w); + local_pid->process = NULL; + } + } end_process_loop: memset(process_stat_vector, 0, length); @@ -1137,24 +957,47 @@ end_process_loop: } } + struct ebpf_target *w; + for (w = apps_groups_root_target; w; w = w->next) { + if (unlikely(!(w->processes))) + continue; + + ebpf_process_sum_values_for_pids(&w->process, w->root_pid); + } + +} + +/** + * + */ +void ebpf_parse_proc_files() +{ + ebpf_pid_data_t *pids; + for (pids = ebpf_pids_link_list; pids;) { + if (kill(pids->pid, 0)) { // No PID found + ebpf_pid_data_t *next = pids->next; + ebpf_reset_specific_pid_data(pids); + pids = next; + continue; + } + + pids->not_updated = EBPF_CLEANUP_FACTOR; + pids->merged = 0; + pids->children_count = 0; + pids = pids->next; + } + + if (ebpf_read_proc_filesystem()) + return; + link_all_processes_to_their_parents(); apply_apps_groups_targets_inheritance(); apps_groups_targets_count = zero_all_targets(apps_groups_root_target); - // this has to be done, before the cleanup - // // concentrate everything on the targets - for (pids = ebpf_root_of_pids; pids; pids = pids->next) + for (pids = ebpf_pids_link_list; pids; pids = pids->next) aggregate_pid_on_target(pids->target, pids, NULL); - post_aggregate_targets(apps_groups_root_target); - - struct ebpf_target *w; - for (w = apps_groups_root_target; w; w = w->next) { - if (unlikely(!(w->processes))) - continue; - - ebpf_process_sum_values_for_pids(&w->process, w->root_pid); - } + ebpf_cleanup_exited_pids(); } diff --git a/src/collectors/ebpf.plugin/ebpf_apps.h b/src/collectors/ebpf.plugin/ebpf_apps.h index a2cbaf3b..98c9995d 100644 --- a/src/collectors/ebpf.plugin/ebpf_apps.h +++ b/src/collectors/ebpf.plugin/ebpf_apps.h @@ -39,10 +39,55 @@ #include "ebpf_swap.h" #include "ebpf_vfs.h" -#define EBPF_MAX_COMPARE_NAME 100 +#define EBPF_MAX_COMPARE_NAME 95 #define EBPF_MAX_NAME 100 -#define EBPF_CLEANUP_FACTOR 10 +#define EBPF_CLEANUP_FACTOR 2 + +enum ebpf_pids_index { + EBPF_PIDS_PROCESS_IDX, + EBPF_PIDS_SOCKET_IDX, + EBPF_PIDS_CACHESTAT_IDX, + EBPF_PIDS_DCSTAT_IDX, + EBPF_PIDS_SWAP_IDX, + EBPF_PIDS_VFS_IDX, + EBPF_PIDS_FD_IDX, + EBPF_PIDS_SHM_IDX, + + EBPF_PIDS_PROC_FILE, + EBPF_PIDS_END_IDX +}; + +extern int pids_fd[EBPF_PIDS_END_IDX]; + +enum ebpf_main_index { + EBPF_MODULE_PROCESS_IDX, + EBPF_MODULE_SOCKET_IDX, + EBPF_MODULE_CACHESTAT_IDX, + EBPF_MODULE_SYNC_IDX, + EBPF_MODULE_DCSTAT_IDX, + EBPF_MODULE_SWAP_IDX, + EBPF_MODULE_VFS_IDX, + EBPF_MODULE_FILESYSTEM_IDX, + EBPF_MODULE_DISK_IDX, + EBPF_MODULE_MOUNT_IDX, + EBPF_MODULE_FD_IDX, + EBPF_MODULE_HARDIRQ_IDX, + EBPF_MODULE_SOFTIRQ_IDX, + EBPF_MODULE_OOMKILL_IDX, + EBPF_MODULE_SHM_IDX, + EBPF_MODULE_MDFLUSH_IDX, + EBPF_MODULE_FUNCTION_IDX, + /* THREADS MUST BE INCLUDED BEFORE THIS COMMENT */ + EBPF_OPTION_ALL_CHARTS, + EBPF_OPTION_VERSION, + EBPF_OPTION_HELP, + EBPF_OPTION_GLOBAL_CHART, + EBPF_OPTION_RETURN_MODE, + EBPF_OPTION_LEGACY, + EBPF_OPTION_CORE, + EBPF_OPTION_UNITTEST +}; // ---------------------------------------------------------------------------- // Structures used to read information from kernel ring @@ -63,10 +108,21 @@ typedef struct ebpf_process_stat { //Counter uint32_t task_err; - - uint8_t removeme; } ebpf_process_stat_t; +typedef struct __attribute__((packed)) ebpf_publish_process { + uint64_t ct; + + //Counter + uint32_t exit_call; + uint32_t release_call; + uint32_t create_process; + uint32_t create_thread; + + //Counter + uint32_t task_err; +} ebpf_publish_process_t; + // ---------------------------------------------------------------------------- // pid_stat // @@ -108,21 +164,246 @@ struct ebpf_target { struct ebpf_target *target; // the one that will be reported to netdata struct ebpf_target *next; }; - extern struct ebpf_target *apps_groups_default_target; extern struct ebpf_target *apps_groups_root_target; extern struct ebpf_target *users_root_target; extern struct ebpf_target *groups_root_target; +extern uint64_t collect_pids; + +// ebpf_pid_data +typedef struct __attribute__((packed)) ebpf_pid_data { + uint32_t pid; + uint32_t ppid; + uint64_t thread_collecting; + + char comm[EBPF_MAX_COMPARE_NAME + 1]; + char *cmdline; + + uint32_t has_proc_file; + uint32_t not_updated; + int children_count; // number of processes directly referencing this + int merged; + int sortlist; // higher numbers = top on the process tree + + struct ebpf_target *target; // the one that will be reported to netdata + struct ebpf_pid_data *parent; + struct ebpf_pid_data *prev; + struct ebpf_pid_data *next; + + netdata_publish_fd_stat_t *fd; + netdata_publish_swap_t *swap; + netdata_publish_shm_t *shm; // this has a leak issue + netdata_publish_dcstat_t *dc; + netdata_publish_vfs_t *vfs; + netdata_publish_cachestat_t *cachestat; + ebpf_publish_process_t *process; + ebpf_socket_publish_apps_t *socket; + +} ebpf_pid_data_t; + +extern ebpf_pid_data_t *ebpf_pids; +extern ebpf_pid_data_t *ebpf_pids_link_list; +extern size_t ebpf_all_pids_count; +extern size_t ebpf_hash_table_pids_count; +void ebpf_del_pid_entry(pid_t pid); + +static inline void *ebpf_cachestat_allocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(netdata_publish_cachestat_t)); +} + +static inline void ebpf_cachestat_release_publish(netdata_publish_cachestat_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_dcallocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(netdata_publish_dcstat_t)); +} + +static inline void ebpf_dc_release_publish(netdata_publish_dcstat_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_fd_allocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(netdata_publish_fd_stat_t)); +} + +static inline void ebpf_fd_release_publish(netdata_publish_fd_stat_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_shm_allocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(netdata_publish_shm_t)); +} + +static inline void ebpf_shm_release_publish(netdata_publish_shm_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_socket_allocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(ebpf_socket_publish_apps_t)); +} + +static inline void ebpf_socket_release_publish(ebpf_socket_publish_apps_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_swap_allocate_publish_swap() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(netdata_publish_swap_t)); +} + +static inline void ebpf_swap_release_publish(netdata_publish_swap_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_vfs_allocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(netdata_publish_vfs_t)); +} + +static inline void ebpf_vfs_release_publish(netdata_publish_vfs_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_process_allocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(ebpf_publish_process_t)); +} + +static inline void ebpf_process_release_publish(ebpf_publish_process_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline ebpf_pid_data_t *ebpf_get_pid_data(uint32_t pid, uint32_t tgid, char *name, uint32_t idx) { + ebpf_pid_data_t *ptr = &ebpf_pids[pid]; + ptr->thread_collecting |= 1<<idx; + // The caller is getting data to work. + if (!name && idx != EBPF_PIDS_PROC_FILE) + return ptr; + + if (ptr->pid == pid) { + return ptr; + } + + ptr->pid = pid; + ptr->ppid = tgid; + + if (name) + strncpyz(ptr->comm, name, EBPF_MAX_COMPARE_NAME); + + if (likely(ebpf_pids_link_list)) + ebpf_pids_link_list->prev = ptr; + + ptr->next = ebpf_pids_link_list; + ebpf_pids_link_list = ptr; + if (idx == EBPF_PIDS_PROC_FILE) { + ebpf_all_pids_count++; + } + + return ptr; +} + +static inline void ebpf_release_pid_data(ebpf_pid_data_t *eps, int fd, uint32_t key, uint32_t idx) +{ + if (fd) { + bpf_map_delete_elem(fd, &key); + } + eps->thread_collecting &= ~(1<<idx); + if (!eps->thread_collecting && !eps->has_proc_file) { + ebpf_del_pid_entry((pid_t)key); + } +} + +static inline void ebpf_reset_specific_pid_data(ebpf_pid_data_t *ptr) +{ + int idx; + uint32_t pid = ptr->pid; + for (idx = EBPF_PIDS_PROCESS_IDX; idx < EBPF_PIDS_PROC_FILE; idx++) { + if (!(ptr->thread_collecting & (1<<idx))) { + continue; + } + // Check if we still have the map loaded + int fd = pids_fd[idx]; + if (fd <= STDERR_FILENO) + continue; + + bpf_map_delete_elem(fd, &pid); + ebpf_hash_table_pids_count--; + void *clean; + switch (idx) { + case EBPF_PIDS_PROCESS_IDX: + clean = ptr->process; + break; + case EBPF_PIDS_SOCKET_IDX: + clean = ptr->socket; + break; + case EBPF_PIDS_CACHESTAT_IDX: + clean = ptr->cachestat; + break; + case EBPF_PIDS_DCSTAT_IDX: + clean = ptr->dc; + break; + case EBPF_PIDS_SWAP_IDX: + clean = ptr->swap; + break; + case EBPF_PIDS_VFS_IDX: + clean = ptr->vfs; + break; + case EBPF_PIDS_FD_IDX: + clean = ptr->fd; + break; + case EBPF_PIDS_SHM_IDX: + clean = ptr->shm; + break; + default: + clean = NULL; + } + freez(clean); + } + + ebpf_del_pid_entry(pid); +} + typedef struct ebpf_pid_stat { - int32_t pid; + uint32_t pid; + uint64_t thread_collecting; char comm[EBPF_MAX_COMPARE_NAME + 1]; char *cmdline; uint32_t log_thrown; // char state; - int32_t ppid; + uint32_t ppid; int children_count; // number of processes directly referencing this unsigned char keep : 1; // 1 when we need to keep this process in memory even after it exited @@ -199,8 +480,6 @@ static inline void debug_log_int(const char *fmt, ...) // ---------------------------------------------------------------------------- // Exported variabled and functions // -extern struct ebpf_pid_stat **ebpf_all_pids; - int ebpf_read_apps_groups_conf(struct ebpf_target **apps_groups_default_target, struct ebpf_target **apps_groups_root_target, const char *path, @@ -216,7 +495,7 @@ int ebpf_read_hash_table(void *ep, int fd, uint32_t pid); int get_pid_comm(pid_t pid, size_t n, char *dest); -void collect_data_for_all_processes(int tbl_pid_stats_fd, int maps_per_core); +void collect_data_for_all_processes(int tbl_pid_stats_fd, int maps_per_core, uint32_t max_period); void ebpf_process_apps_accumulator(ebpf_process_stat_t *out, int maps_per_core); // The default value is at least 32 times smaller than maximum number of PIDs allowed on system, @@ -227,8 +506,7 @@ void ebpf_process_apps_accumulator(ebpf_process_stat_t *out, int maps_per_core); #define NETDATA_EBPF_ALLOC_MIN_ELEMENTS 256 // ARAL Sectiion -extern void ebpf_aral_init(void); -extern ebpf_pid_stat_t *ebpf_get_pid_entry(pid_t pid, pid_t tgid); +void ebpf_aral_init(void); extern ebpf_process_stat_t *process_stat_vector; extern ARAL *ebpf_aral_vfs_pid; @@ -240,7 +518,7 @@ extern ARAL *ebpf_aral_shm_pid; void ebpf_shm_aral_init(); netdata_publish_shm_t *ebpf_shm_stat_get(void); void ebpf_shm_release(netdata_publish_shm_t *stat); -void ebpf_cleanup_exited_pids(int max); +void ebpf_parse_proc_files(); // ARAL Section end diff --git a/src/collectors/ebpf.plugin/ebpf_cachestat.c b/src/collectors/ebpf.plugin/ebpf_cachestat.c index 379ff05b..8c0260d5 100644 --- a/src/collectors/ebpf.plugin/ebpf_cachestat.c +++ b/src/collectors/ebpf.plugin/ebpf_cachestat.c @@ -330,9 +330,9 @@ static void ebpf_obsolete_specific_cachestat_charts(char *type, int update_every */ static void ebpf_obsolete_cachestat_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_CACHESTAT_HIT_RATIO_CHART, + "", "Hit ratio", EBPF_COMMON_UNITS_PERCENTAGE, NETDATA_CACHESTAT_SUBMENU, @@ -341,9 +341,9 @@ static void ebpf_obsolete_cachestat_services(ebpf_module_t *em, char *id) 21100, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_CACHESTAT_DIRTY_CHART, + "", "Number of dirty pages", EBPF_CACHESTAT_UNITS_PAGE, NETDATA_CACHESTAT_SUBMENU, @@ -352,9 +352,9 @@ static void ebpf_obsolete_cachestat_services(ebpf_module_t *em, char *id) 21101, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_CACHESTAT_HIT_CHART, + "", "Number of accessed files", EBPF_CACHESTAT_UNITS_HITS, NETDATA_CACHESTAT_SUBMENU, @@ -363,9 +363,9 @@ static void ebpf_obsolete_cachestat_services(ebpf_module_t *em, char *id) 21102, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_CACHESTAT_MISSES_CHART, + "", "Files out of page cache", EBPF_CACHESTAT_UNITS_MISSES, NETDATA_CACHESTAT_SUBMENU, @@ -525,9 +525,14 @@ void ebpf_obsolete_cachestat_apps_charts(struct ebpf_module *em) */ static void ebpf_cachestat_exit(void *pptr) { + pids_fd[EBPF_PIDS_CACHESTAT_IDX] = -1; ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_CACHESTAT_IDX); + pthread_mutex_unlock(&lock); + if (ebpf_read_cachestat.thread) nd_thread_signal_cancel(ebpf_read_cachestat.thread); @@ -677,6 +682,9 @@ static void cachestat_apps_accumulator(netdata_cachestat_pid_t *out, int maps_pe total->mark_page_accessed += w->mark_page_accessed; if (w->ct > ct) ct = w->ct; + + if (!total->name[0] && w->name[0]) + strncpyz(total->name, w->name, sizeof(total->name) - 1); } total->ct = ct; } @@ -692,13 +700,14 @@ static void cachestat_apps_accumulator(netdata_cachestat_pid_t *out, int maps_pe static inline void cachestat_save_pid_values(netdata_publish_cachestat_t *out, netdata_cachestat_pid_t *in) { out->ct = in->ct; - if (!out->current.mark_page_accessed) { - memcpy(&out->current, &in[0], sizeof(netdata_cachestat_pid_t)); - return; + if (out->current.mark_page_accessed) { + memcpy(&out->prev, &out->current, sizeof(netdata_cachestat_t)); } - memcpy(&out->prev, &out->current, sizeof(netdata_cachestat_pid_t)); - memcpy(&out->current, &in[0], sizeof(netdata_cachestat_pid_t)); + out->current.account_page_dirtied = in[0].account_page_dirtied; + out->current.add_to_page_cache_lru = in[0].add_to_page_cache_lru; + out->current.mark_buffer_dirty = in[0].mark_buffer_dirty; + out->current.mark_page_accessed = in[0].mark_page_accessed; } /** @@ -707,8 +716,9 @@ static inline void cachestat_save_pid_values(netdata_publish_cachestat_t *out, n * Read the apps table and store data inside the structure. * * @param maps_per_core do I need to read all cores? + * @param max_period limit of iterations without updates before remove data from hash table */ -static void ebpf_read_cachestat_apps_table(int maps_per_core, int max_period) +static void ebpf_read_cachestat_apps_table(int maps_per_core, uint32_t max_period) { netdata_cachestat_pid_t *cv = cachestat_vector; int fd = cachestat_maps[NETDATA_CACHESTAT_PID_STATS].map_fd; @@ -724,17 +734,22 @@ static void ebpf_read_cachestat_apps_table(int maps_per_core, int max_period) cachestat_apps_accumulator(cv, maps_per_core); - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(key, cv->tgid); - if (!local_pid) - goto end_cachestat_loop; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(key, cv->tgid, cv->name, EBPF_PIDS_CACHESTAT_IDX); + netdata_publish_cachestat_t *publish = local_pid->cachestat; + if (!publish) + local_pid->cachestat = publish = ebpf_cachestat_allocate_publish(); - netdata_publish_cachestat_t *publish = &local_pid->cachestat; if (!publish->ct || publish->ct != cv->ct){ cachestat_save_pid_values(publish, cv); local_pid->not_updated = 0; - } else if (++local_pid->not_updated >= max_period) { - bpf_map_delete_elem(fd, &key); - local_pid->not_updated = 0; + } else { + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(local_pid); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(local_pid, fd, key, EBPF_PIDS_CACHESTAT_IDX); + ebpf_cachestat_release_publish(publish); + local_pid->cachestat = NULL; + } } end_cachestat_loop: @@ -759,13 +774,14 @@ static void ebpf_update_cachestat_cgroup() struct pid_on_target2 *pids; for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; - netdata_cachestat_pid_t *out = &pids->cachestat; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_cachestat_t *in = &local_pid->cachestat; + netdata_publish_cachestat_t *out = &pids->cachestat; - memcpy(out, &in->current, sizeof(netdata_cachestat_pid_t)); - } + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_CACHESTAT_IDX); + netdata_publish_cachestat_t *in = local_pid->cachestat; + if (!in) + continue; + + memcpy(&out->current, &in->current, sizeof(netdata_cachestat_t)); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -784,20 +800,19 @@ void ebpf_cachestat_sum_pids(netdata_publish_cachestat_t *publish, struct ebpf_p memcpy(&publish->prev, &publish->current,sizeof(publish->current)); memset(&publish->current, 0, sizeof(publish->current)); - netdata_cachestat_pid_t *dst = &publish->current; - while (root) { + netdata_cachestat_t *dst = &publish->current; + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_cachestat_t *w = &local_pid->cachestat; - netdata_cachestat_pid_t *src = &w->current; - dst->account_page_dirtied += src->account_page_dirtied; - dst->add_to_page_cache_lru += src->add_to_page_cache_lru; - dst->mark_buffer_dirty += src->mark_buffer_dirty; - dst->mark_page_accessed += src->mark_page_accessed; - } + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_CACHESTAT_IDX); + netdata_publish_cachestat_t *w = local_pid->cachestat; + if (!w) + continue; - root = root->next; + netdata_cachestat_t *src = &w->current; + dst->account_page_dirtied += src->account_page_dirtied; + dst->add_to_page_cache_lru += src->add_to_page_cache_lru; + dst->mark_buffer_dirty += src->mark_buffer_dirty; + dst->mark_page_accessed += src->mark_page_accessed; } } @@ -834,13 +849,14 @@ void *ebpf_read_cachestat_thread(void *ptr) int maps_per_core = em->maps_per_core; int update_every = em->update_every; - int max_period = update_every * EBPF_CLEANUP_FACTOR; + uint32_t max_period = EBPF_CLEANUP_FACTOR; int counter = update_every - 1; uint32_t lifetime = em->lifetime; uint32_t running_time = 0; usec_t period = update_every * USEC_PER_SEC; + pids_fd[EBPF_PIDS_CACHESTAT_IDX] = cachestat_maps[NETDATA_CACHESTAT_PID_STATS].map_fd; while (!ebpf_plugin_stop() && running_time < lifetime) { (void)heartbeat_next(&hb, period); if (ebpf_plugin_stop() || ++counter != update_every) @@ -1020,8 +1036,8 @@ void ebpf_cache_send_apps_data(struct ebpf_target *root) if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_CACHESTAT_IDX)))) continue; - netdata_cachestat_pid_t *current = &w->cachestat.current; - netdata_cachestat_pid_t *prev = &w->cachestat.prev; + netdata_cachestat_t *current = &w->cachestat.current; + netdata_cachestat_t *prev = &w->cachestat.prev; uint64_t mpa = current->mark_page_accessed - prev->mark_page_accessed; uint64_t mbd = current->mark_buffer_dirty - prev->mark_buffer_dirty; @@ -1067,16 +1083,14 @@ void ebpf_cachestat_sum_cgroup_pids(netdata_publish_cachestat_t *publish, struct memcpy(&publish->prev, &publish->current,sizeof(publish->current)); memset(&publish->current, 0, sizeof(publish->current)); - netdata_cachestat_pid_t *dst = &publish->current; - while (root) { - netdata_cachestat_pid_t *src = &root->cachestat; + netdata_cachestat_t *dst = &publish->current; + for (; root; root = root->next) { + netdata_cachestat_t *src = &root->cachestat.current; dst->account_page_dirtied += src->account_page_dirtied; dst->add_to_page_cache_lru += src->add_to_page_cache_lru; dst->mark_buffer_dirty += src->mark_buffer_dirty; dst->mark_page_accessed += src->mark_page_accessed; - - root = root->next; } } @@ -1091,8 +1105,8 @@ void ebpf_cachestat_calc_chart_values() for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { ebpf_cachestat_sum_cgroup_pids(&ect->publish_cachestat, ect->pids); - netdata_cachestat_pid_t *current = &ect->publish_cachestat.current; - netdata_cachestat_pid_t *prev = &ect->publish_cachestat.prev; + netdata_cachestat_t *current = &ect->publish_cachestat.current; + netdata_cachestat_t *prev = &ect->publish_cachestat.prev; uint64_t mpa = current->mark_page_accessed - prev->mark_page_accessed; uint64_t mbd = current->mark_buffer_dirty - prev->mark_buffer_dirty; @@ -1205,19 +1219,19 @@ static void ebpf_send_systemd_cachestat_charts() continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_CACHESTAT_HIT_RATIO_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_CACHESTAT_HIT_RATIO_CHART, ""); write_chart_dimension("percentage", (long long)ect->publish_cachestat.ratio); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_CACHESTAT_DIRTY_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_CACHESTAT_DIRTY_CHART, ""); write_chart_dimension("pages", (long long)ect->publish_cachestat.dirty); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_CACHESTAT_HIT_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_CACHESTAT_HIT_CHART, ""); write_chart_dimension("hits", (long long)ect->publish_cachestat.hit); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_CACHESTAT_MISSES_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_CACHESTAT_MISSES_CHART, ""); write_chart_dimension("misses", (long long)ect->publish_cachestat.miss); ebpf_write_end_chart(); } diff --git a/src/collectors/ebpf.plugin/ebpf_cachestat.h b/src/collectors/ebpf.plugin/ebpf_cachestat.h index 79d22b43..6bb91b64 100644 --- a/src/collectors/ebpf.plugin/ebpf_cachestat.h +++ b/src/collectors/ebpf.plugin/ebpf_cachestat.h @@ -33,10 +33,10 @@ #define NETDATA_CGROUP_CACHESTAT_HIT_FILES_CONTEXT "cgroup.cachestat_hits" #define NETDATA_CGROUP_CACHESTAT_MISS_FILES_CONTEXT "cgroup.cachestat_misses" -#define NETDATA_SYSTEMD_CACHESTAT_HIT_RATIO_CONTEXT "systemd.services.cachestat_ratio" -#define NETDATA_SYSTEMD_CACHESTAT_MODIFIED_CACHE_CONTEXT "systemd.services.cachestat_dirties" -#define NETDATA_SYSTEMD_CACHESTAT_HIT_FILE_CONTEXT "systemd.services.cachestat_hits" -#define NETDATA_SYSTEMD_CACHESTAT_MISS_FILES_CONTEXT "systemd.services.cachestat_misses" +#define NETDATA_SYSTEMD_CACHESTAT_HIT_RATIO_CONTEXT "systemd.service.cachestat_ratio" +#define NETDATA_SYSTEMD_CACHESTAT_MODIFIED_CACHE_CONTEXT "systemd.service.cachestat_dirties" +#define NETDATA_SYSTEMD_CACHESTAT_HIT_FILE_CONTEXT "systemd.service.cachestat_hits" +#define NETDATA_SYSTEMD_CACHESTAT_MISS_FILES_CONTEXT "systemd.service.cachestat_misses" // variables enum cachestat_counters { @@ -69,20 +69,27 @@ enum cachestat_tables { NETDATA_CACHESTAT_CTRL }; -typedef struct netdata_publish_cachestat_pid { +typedef struct netdata_cachestat_pid { uint64_t ct; uint32_t tgid; uint32_t uid; uint32_t gid; char name[TASK_COMM_LEN]; - uint64_t add_to_page_cache_lru; - uint64_t mark_page_accessed; - uint64_t account_page_dirtied; - uint64_t mark_buffer_dirty; + uint32_t add_to_page_cache_lru; + uint32_t mark_page_accessed; + uint32_t account_page_dirtied; + uint32_t mark_buffer_dirty; } netdata_cachestat_pid_t; -typedef struct netdata_publish_cachestat { +typedef struct __attribute__((packed)) netdata_cachestat { + uint32_t add_to_page_cache_lru; + uint32_t mark_page_accessed; + uint32_t account_page_dirtied; + uint32_t mark_buffer_dirty; +} netdata_cachestat_t; + +typedef struct __attribute__((packed)) netdata_publish_cachestat { uint64_t ct; long long ratio; @@ -90,8 +97,8 @@ typedef struct netdata_publish_cachestat { long long hit; long long miss; - netdata_cachestat_pid_t current; - netdata_cachestat_pid_t prev; + netdata_cachestat_t current; + netdata_cachestat_t prev; } netdata_publish_cachestat_t; void *ebpf_cachestat_thread(void *ptr); diff --git a/src/collectors/ebpf.plugin/ebpf_cgroup.c b/src/collectors/ebpf.plugin/ebpf_cgroup.c index ae3bf3f8..9e1fa823 100644 --- a/src/collectors/ebpf.plugin/ebpf_cgroup.c +++ b/src/collectors/ebpf.plugin/ebpf_cgroup.c @@ -329,9 +329,9 @@ void ebpf_parse_cgroup_shm_data() */ void ebpf_create_charts_on_systemd(ebpf_systemd_args_t *chart) { - ebpf_write_chart_cmd(NETDATA_SERVICE_FAMILY, - chart->id, + ebpf_write_chart_cmd(chart->id, chart->suffix, + "", chart->title, chart->units, chart->family, @@ -340,9 +340,23 @@ void ebpf_create_charts_on_systemd(ebpf_systemd_args_t *chart) chart->order, chart->update_every, chart->module); - ebpf_create_chart_labels("service_name", chart->id, RRDLABEL_SRC_AUTO); + char service_name[512]; + snprintfz(service_name, 511, "%s", (!strstr(chart->id, "systemd_")) ? chart->id : (chart->id + 8)); + ebpf_create_chart_labels("service_name", service_name, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - fprintf(stdout, "DIMENSION %s '' %s 1 1\n", chart->dimension, chart->algorithm); + // Let us keep original string that can be used in another place. Chart creation does not happen frequently. + char *move = strdupz(chart->dimension); + while (move) { + char *next_dim = strchr(move, ','); + if (next_dim) { + *next_dim = '\0'; + next_dim++; + } + + fprintf(stdout, "DIMENSION %s '' %s 1 1\n", move, chart->algorithm); + move = next_dim; + } + freez(move); } // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/collectors/ebpf.plugin/ebpf_cgroup.h b/src/collectors/ebpf.plugin/ebpf_cgroup.h index 87df7bed..65c8212b 100644 --- a/src/collectors/ebpf.plugin/ebpf_cgroup.h +++ b/src/collectors/ebpf.plugin/ebpf_cgroup.h @@ -9,20 +9,18 @@ #include "ebpf.h" #include "ebpf_apps.h" -#define NETDATA_SERVICE_FAMILY "systemd" - struct pid_on_target2 { int32_t pid; int updated; netdata_publish_swap_t swap; - netdata_fd_stat_t fd; + netdata_publish_fd_stat_t fd; netdata_publish_vfs_t vfs; - ebpf_process_stat_t ps; + ebpf_publish_process_t ps; netdata_dcstat_pid_t dc; netdata_publish_shm_t shm; netdata_socket_t socket; - netdata_cachestat_pid_t cachestat; + netdata_publish_cachestat_t cachestat; struct pid_on_target2 *next; }; @@ -57,9 +55,9 @@ typedef struct ebpf_cgroup_target { uint32_t updated; netdata_publish_swap_t publish_systemd_swap; - netdata_fd_stat_t publish_systemd_fd; + netdata_publish_fd_stat_t publish_systemd_fd; netdata_publish_vfs_t publish_systemd_vfs; - ebpf_process_stat_t publish_systemd_ps; + ebpf_publish_process_t publish_systemd_ps; netdata_publish_dcstat_t publish_dc; int oomkill; netdata_publish_shm_t publish_shm; diff --git a/src/collectors/ebpf.plugin/ebpf_dcstat.c b/src/collectors/ebpf.plugin/ebpf_dcstat.c index d9455ed9..e6053cb4 100644 --- a/src/collectors/ebpf.plugin/ebpf_dcstat.c +++ b/src/collectors/ebpf.plugin/ebpf_dcstat.c @@ -279,9 +279,9 @@ static void ebpf_obsolete_specific_dc_charts(char *type, int update_every); */ static void ebpf_obsolete_dc_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_DC_HIT_CHART, + "", "Percentage of files inside directory cache", EBPF_COMMON_UNITS_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -290,9 +290,9 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em, char *id) 21200, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_DC_REFERENCE_CHART, + "", "Count file access", EBPF_COMMON_UNITS_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -301,9 +301,9 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em, char *id) 21201, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "", "Files not present inside directory cache", EBPF_COMMON_UNITS_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -312,9 +312,9 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em, char *id) 21202, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "", "Files not found", EBPF_COMMON_UNITS_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -453,9 +453,14 @@ static void ebpf_obsolete_dc_global(ebpf_module_t *em) */ static void ebpf_dcstat_exit(void *pptr) { + pids_fd[EBPF_PIDS_DCSTAT_IDX] = -1; ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_DCSTAT_IDX); + pthread_mutex_unlock(&lock); + if (ebpf_read_dcstat.thread) nd_thread_signal_cancel(ebpf_read_dcstat.thread); @@ -524,6 +529,9 @@ static void ebpf_dcstat_apps_accumulator(netdata_dcstat_pid_t *out, int maps_per if (w->ct > ct) ct = w->ct; + + if (!total->name[0] && w->name[0]) + strncpyz(total->name, w->name, sizeof(total->name) - 1); } total->ct = ct; } @@ -534,8 +542,9 @@ static void ebpf_dcstat_apps_accumulator(netdata_dcstat_pid_t *out, int maps_per * Read the apps table and store data inside the structure. * * @param maps_per_core do I need to read all cores? + * @param max_period limit of iterations without updates before remove data from hash table */ -static void ebpf_read_dc_apps_table(int maps_per_core, int max_period) +static void ebpf_read_dc_apps_table(int maps_per_core, uint32_t max_period) { netdata_dcstat_pid_t *cv = dcstat_vector; int fd = dcstat_maps[NETDATA_DCSTAT_PID_STATS].map_fd; @@ -551,15 +560,25 @@ static void ebpf_read_dc_apps_table(int maps_per_core, int max_period) ebpf_dcstat_apps_accumulator(cv, maps_per_core); - ebpf_pid_stat_t *pid_stat = ebpf_get_pid_entry(key, cv->tgid); - if (pid_stat) { - netdata_publish_dcstat_t *publish = &pid_stat->dc; - if (!publish->ct || publish->ct != cv->ct) { - memcpy(&publish->curr, &cv[0], sizeof(netdata_dcstat_pid_t)); - pid_stat->not_updated = 0; - } else if (++pid_stat->not_updated >= max_period) { - bpf_map_delete_elem(fd, &key); - pid_stat->not_updated = 0; + ebpf_pid_data_t *pid_stat = ebpf_get_pid_data(key, cv->tgid, cv->name, EBPF_PIDS_DCSTAT_IDX); + netdata_publish_dcstat_t *publish = pid_stat->dc; + if (!publish) + pid_stat->dc = publish = ebpf_dcallocate_publish(); + + if (!publish->ct || publish->ct != cv->ct) { + publish->ct = cv->ct; + publish->curr.not_found = cv[0].not_found; + publish->curr.file_system = cv[0].file_system; + publish->curr.cache_access = cv[0].cache_access; + + pid_stat->not_updated = 0; + } else { + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(pid_stat); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(pid_stat, fd, key, EBPF_PIDS_DCSTAT_IDX); + ebpf_dc_release_publish(publish); + pid_stat->dc = NULL; } } @@ -580,20 +599,17 @@ end_dc_loop: */ void ebpf_dcstat_sum_pids(netdata_publish_dcstat_t *publish, struct ebpf_pid_on_target *root) { - memset(&publish->curr, 0, sizeof(netdata_dcstat_pid_t)); - netdata_dcstat_pid_t *dst = &publish->curr; - while (root) { + memset(&publish->curr, 0, sizeof(netdata_publish_dcstat_pid_t)); + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *pid_stat = ebpf_get_pid_entry(pid, 0); - if (pid_stat) { - netdata_publish_dcstat_t *w = &pid_stat->dc; - netdata_dcstat_pid_t *src = &w->curr; - dst->cache_access += src->cache_access; - dst->file_system += src->file_system; - dst->not_found += src->not_found; - } + ebpf_pid_data_t *pid_stat = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_DCSTAT_IDX); + netdata_publish_dcstat_t *w = pid_stat->dc; + if (!w) + continue; - root = root->next; + publish->curr.cache_access += w->curr.cache_access; + publish->curr.file_system += w->curr.file_system; + publish->curr.not_found += w->curr.not_found; } } @@ -635,13 +651,17 @@ void *ebpf_read_dcstat_thread(void *ptr) int maps_per_core = em->maps_per_core; int update_every = em->update_every; + int collect_pid = (em->apps_charts || em->cgroup_charts); + if (!collect_pid) + return NULL; int counter = update_every - 1; uint32_t lifetime = em->lifetime; uint32_t running_time = 0; usec_t period = update_every * USEC_PER_SEC; - int max_period = update_every * EBPF_CLEANUP_FACTOR; + uint32_t max_period = EBPF_CLEANUP_FACTOR; + pids_fd[EBPF_PIDS_DCSTAT_IDX] = dcstat_maps[NETDATA_DCSTAT_PID_STATS].map_fd; while (!ebpf_plugin_stop() && running_time < lifetime) { (void)heartbeat_next(&hb, period); if (ebpf_plugin_stop() || ++counter != update_every) @@ -771,12 +791,12 @@ static void ebpf_update_dc_cgroup() for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; netdata_dcstat_pid_t *out = &pids->dc; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_dcstat_t *in = &local_pid->dc; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_DCSTAT_IDX); + netdata_publish_dcstat_t *in = local_pid->dc; + if (!in) + continue; - memcpy(out, &in->curr, sizeof(netdata_dcstat_pid_t)); - } + memcpy(out, &in->curr, sizeof(netdata_publish_dcstat_pid_t)); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -1001,13 +1021,12 @@ static void ebpf_obsolete_specific_dc_charts(char *type, int update_every) void ebpf_dc_sum_cgroup_pids(netdata_publish_dcstat_t *publish, struct pid_on_target2 *root) { memset(&publish->curr, 0, sizeof(netdata_dcstat_pid_t)); - netdata_dcstat_pid_t *dst = &publish->curr; while (root) { netdata_dcstat_pid_t *src = &root->dc; - dst->cache_access += src->cache_access; - dst->file_system += src->file_system; - dst->not_found += src->not_found; + publish->curr.cache_access += src->cache_access; + publish->curr.file_system += src->file_system; + publish->curr.not_found += src->not_found; root = root->next; } @@ -1139,22 +1158,22 @@ static void ebpf_send_systemd_dc_charts() continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_DC_HIT_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_DC_HIT_CHART, ""); write_chart_dimension("percentage", (long long) ect->publish_dc.ratio); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_DC_REFERENCE_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_DC_REFERENCE_CHART, ""); write_chart_dimension("files", (long long) ect->publish_dc.cache_access); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_DC_REQUEST_NOT_CACHE_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_DC_REQUEST_NOT_CACHE_CHART, ""); value = (collected_number) (!ect->publish_dc.cache_access) ? 0 : (long long )ect->publish_dc.curr.file_system - (long long)ect->publish_dc.prev.file_system; ect->publish_dc.prev.file_system = ect->publish_dc.curr.file_system; write_chart_dimension("files", (long long) value); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_DC_REQUEST_NOT_FOUND_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_DC_REQUEST_NOT_FOUND_CHART, ""); value = (collected_number) (!ect->publish_dc.cache_access) ? 0 : (long long)ect->publish_dc.curr.not_found - (long long)ect->publish_dc.prev.not_found; diff --git a/src/collectors/ebpf.plugin/ebpf_dcstat.h b/src/collectors/ebpf.plugin/ebpf_dcstat.h index 82f21f48..a7e9f82b 100644 --- a/src/collectors/ebpf.plugin/ebpf_dcstat.h +++ b/src/collectors/ebpf.plugin/ebpf_dcstat.h @@ -3,6 +3,8 @@ #ifndef NETDATA_EBPF_DCSTAT_H #define NETDATA_EBPF_DCSTAT_H 1 +#include "ebpf.h" + // Module name & description #define NETDATA_EBPF_MODULE_NAME_DCSTAT "dcstat" #define NETDATA_EBPF_DC_MODULE_DESC "Monitor file access using directory cache. This thread is integrated with apps and cgroup." @@ -27,10 +29,10 @@ #define NETDATA_CGROUP_DC_NOT_CACHE_CONTEXT "cgroup.dc_not_cache" #define NETDATA_CGROUP_DC_NOT_FOUND_CONTEXT "cgroup.dc_not_found" -#define NETDATA_SYSTEMD_DC_HIT_RATIO_CONTEXT "systemd.services.dc_ratio" -#define NETDATA_SYSTEMD_DC_REFERENCE_CONTEXT "systemd.services.dc_reference" -#define NETDATA_SYSTEMD_DC_NOT_CACHE_CONTEXT "systemd.services.dc_not_cache" -#define NETDATA_SYSTEMD_DC_NOT_FOUND_CONTEXT "systemd.services.dc_not_found" +#define NETDATA_SYSTEMD_DC_HIT_RATIO_CONTEXT "systemd.service.dc_ratio" +#define NETDATA_SYSTEMD_DC_REFERENCE_CONTEXT "systemd.service.dc_reference" +#define NETDATA_SYSTEMD_DC_NOT_CACHE_CONTEXT "systemd.service.dc_not_cache" +#define NETDATA_SYSTEMD_DC_NOT_FOUND_CONTEXT "systemd.service.dc_not_found" // ARAL name #define NETDATA_EBPF_DCSTAT_ARAL_NAME "ebpf_dcstat" @@ -69,26 +71,32 @@ enum directory_cache_targets { NETDATA_DC_TARGET_D_LOOKUP }; -typedef struct netdata_publish_dcstat_pid { +typedef struct __attribute__((packed)) netdata_publish_dcstat_pid { + uint64_t cache_access; + uint32_t file_system; + uint32_t not_found; +} netdata_publish_dcstat_pid_t; + +typedef struct netdata_dcstat_pid { uint64_t ct; uint32_t tgid; uint32_t uid; uint32_t gid; char name[TASK_COMM_LEN]; - uint64_t cache_access; - uint64_t file_system; - uint64_t not_found; + uint32_t cache_access; + uint32_t file_system; + uint32_t not_found; } netdata_dcstat_pid_t; -typedef struct netdata_publish_dcstat { +typedef struct __attribute__((packed)) netdata_publish_dcstat { uint64_t ct; long long ratio; long long cache_access; - netdata_dcstat_pid_t curr; - netdata_dcstat_pid_t prev; + netdata_publish_dcstat_pid_t curr; + netdata_publish_dcstat_pid_t prev; } netdata_publish_dcstat_t; void *ebpf_dcstat_thread(void *ptr); diff --git a/src/collectors/ebpf.plugin/ebpf_fd.c b/src/collectors/ebpf.plugin/ebpf_fd.c index 4025931f..61a9595c 100644 --- a/src/collectors/ebpf.plugin/ebpf_fd.c +++ b/src/collectors/ebpf.plugin/ebpf_fd.c @@ -365,9 +365,9 @@ static void ebpf_obsolete_specific_fd_charts(char *type, ebpf_module_t *em); */ static void ebpf_obsolete_fd_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_FILE_OPEN, + "", "Number of open files", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_FILE_GROUP, @@ -377,9 +377,9 @@ static void ebpf_obsolete_fd_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, + "", "Fails to open files", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_FILE_GROUP, @@ -389,9 +389,9 @@ static void ebpf_obsolete_fd_services(ebpf_module_t *em, char *id) em->update_every); } - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_FILE_CLOSED, + "", "Files closed", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_FILE_GROUP, @@ -401,9 +401,9 @@ static void ebpf_obsolete_fd_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, + "", "Fails to close files", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_FILE_GROUP, @@ -548,9 +548,14 @@ static void ebpf_obsolete_fd_global(ebpf_module_t *em) */ static void ebpf_fd_exit(void *pptr) { + pids_fd[EBPF_PIDS_FD_IDX] = -1; ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_FD_IDX); + pthread_mutex_unlock(&lock); + if (ebpf_read_fd.thread) nd_thread_signal_cancel(ebpf_read_fd.thread); @@ -656,12 +661,19 @@ static void fd_apps_accumulator(netdata_fd_stat_t *out, int maps_per_core) { int i, end = (maps_per_core) ? ebpf_nprocs : 1; netdata_fd_stat_t *total = &out[0]; + uint64_t ct = total->ct; for (i = 1; i < end; i++) { netdata_fd_stat_t *w = &out[i]; total->open_call += w->open_call; total->close_call += w->close_call; total->open_err += w->open_err; total->close_err += w->close_err; + + if (w->ct > ct) + ct = w->ct; + + if (!total->name[0] && w->name[0]) + strncpyz(total->name, w->name, sizeof(total->name) - 1); } } @@ -671,8 +683,9 @@ static void fd_apps_accumulator(netdata_fd_stat_t *out, int maps_per_core) * Read the apps table and store data inside the structure. * * @param maps_per_core do I need to read all cores? + * @param max_period limit of iterations without updates before remove data from hash table */ -static void ebpf_read_fd_apps_table(int maps_per_core, int max_period) +static void ebpf_read_fd_apps_table(int maps_per_core, uint32_t max_period) { netdata_fd_stat_t *fv = fd_vector; int fd = fd_maps[NETDATA_FD_PID_STATS].map_fd; @@ -688,15 +701,26 @@ static void ebpf_read_fd_apps_table(int maps_per_core, int max_period) fd_apps_accumulator(fv, maps_per_core); - ebpf_pid_stat_t *pid_stat = ebpf_get_pid_entry(key, fv->tgid); - if (pid_stat) { - netdata_fd_stat_t *publish_fd = &pid_stat->fd; - if (!publish_fd->ct || publish_fd->ct != fv->ct) { - memcpy(publish_fd, &fv[0], sizeof(netdata_fd_stat_t)); - pid_stat->not_updated = 0; - } else if (++pid_stat->not_updated >= max_period) { - bpf_map_delete_elem(fd, &key); - pid_stat->not_updated = 0; + ebpf_pid_data_t *pid_stat = ebpf_get_pid_data(key, fv->tgid, fv->name, EBPF_PIDS_FD_IDX); + netdata_publish_fd_stat_t *publish_fd = pid_stat->fd; + if (!publish_fd) + pid_stat->fd = publish_fd = ebpf_fd_allocate_publish(); + + if (!publish_fd->ct || publish_fd->ct != fv->ct) { + publish_fd->ct = fv->ct; + publish_fd->open_call = fv->open_call; + publish_fd->close_call = fv->close_call; + publish_fd->open_err = fv->open_err; + publish_fd->close_err = fv->close_err; + + pid_stat->not_updated = 0; + } else { + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(pid_stat); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(pid_stat, fd, key, EBPF_PIDS_FD_IDX); + ebpf_fd_release_publish(publish_fd); + pid_stat->fd = NULL; } } @@ -719,18 +743,17 @@ static void ebpf_fd_sum_pids(netdata_fd_stat_t *fd, struct ebpf_pid_on_target *r { memset(fd, 0, sizeof(netdata_fd_stat_t)); - while (root) { + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *pid_stat = ebpf_get_pid_entry(pid, 0); - if (pid_stat) { - netdata_fd_stat_t *w = &pid_stat->fd; - fd->open_call += w->open_call; - fd->close_call += w->close_call; - fd->open_err += w->open_err; - fd->close_err += w->close_err; - } + ebpf_pid_data_t *pid_stat = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_FD_IDX); + netdata_publish_fd_stat_t *w = pid_stat->fd; + if (!w) + continue; - root = root->next; + fd->open_call += w->open_call; + fd->close_call += w->close_call; + fd->open_err += w->open_err; + fd->close_err += w->close_err; } } @@ -767,13 +790,17 @@ void *ebpf_read_fd_thread(void *ptr) int maps_per_core = em->maps_per_core; int update_every = em->update_every; + int collect_pid = (em->apps_charts || em->cgroup_charts); + if (!collect_pid) + return NULL; int counter = update_every - 1; uint32_t lifetime = em->lifetime; uint32_t running_time = 0; - usec_t period = update_every * USEC_PER_SEC; - int max_period = update_every * EBPF_CLEANUP_FACTOR; + int period = USEC_PER_SEC; + uint32_t max_period = EBPF_CLEANUP_FACTOR; + pids_fd[EBPF_PIDS_FD_IDX] = fd_maps[NETDATA_FD_PID_STATS].map_fd; while (!ebpf_plugin_stop() && running_time < lifetime) { (void)heartbeat_next(&hb, period); if (ebpf_plugin_stop() || ++counter != update_every) @@ -815,13 +842,12 @@ static void ebpf_update_fd_cgroup() struct pid_on_target2 *pids; for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; - netdata_fd_stat_t *out = &pids->fd; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_fd_stat_t *in = &local_pid->fd; - - memcpy(out, in, sizeof(netdata_fd_stat_t)); - } + netdata_publish_fd_stat_t *out = &pids->fd; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_FD_IDX); + netdata_publish_fd_stat_t *in = local_pid->fd; + if (!in) + continue; + memcpy(out, in, sizeof(netdata_publish_fd_stat_t)); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -872,13 +898,13 @@ void ebpf_fd_send_apps_data(ebpf_module_t *em, struct ebpf_target *root) * @param fd structure used to store data * @param pids input data */ -static void ebpf_fd_sum_cgroup_pids(netdata_fd_stat_t *fd, struct pid_on_target2 *pids) +static void ebpf_fd_sum_cgroup_pids(netdata_publish_fd_stat_t *fd, struct pid_on_target2 *pids) { netdata_fd_stat_t accumulator; memset(&accumulator, 0, sizeof(accumulator)); while (pids) { - netdata_fd_stat_t *w = &pids->fd; + netdata_publish_fd_stat_t *w = &pids->fd; accumulator.open_err += w->open_err; accumulator.open_call += w->open_call; @@ -995,7 +1021,7 @@ static void ebpf_obsolete_specific_fd_charts(char *type, ebpf_module_t *em) * @param type chart type * @param values structure with values that will be sent to netdata */ -static void ebpf_send_specific_fd_data(char *type, netdata_fd_stat_t *values, ebpf_module_t *em) +static void ebpf_send_specific_fd_data(char *type, netdata_publish_fd_stat_t *values, ebpf_module_t *em) { ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN, ""); write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].name, (long long)values->open_call); @@ -1120,22 +1146,22 @@ static void ebpf_send_systemd_fd_charts(ebpf_module_t *em) continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_FILE_OPEN); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_FILE_OPEN, ""); write_chart_dimension("calls", ect->publish_systemd_fd.open_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_fd.open_err); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_FILE_CLOSED); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_FILE_CLOSED, ""); write_chart_dimension("calls", ect->publish_systemd_fd.close_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_fd.close_err); ebpf_write_end_chart(); } @@ -1463,7 +1489,8 @@ void *ebpf_fd_thread(void *ptr) pthread_mutex_unlock(&lock); - ebpf_read_fd.thread = nd_thread_create(ebpf_read_fd.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_read_fd_thread, em); + ebpf_read_fd.thread = nd_thread_create(ebpf_read_fd.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_read_fd_thread, em); fd_collector(em); diff --git a/src/collectors/ebpf.plugin/ebpf_fd.h b/src/collectors/ebpf.plugin/ebpf_fd.h index d4975940..90ecdb13 100644 --- a/src/collectors/ebpf.plugin/ebpf_fd.h +++ b/src/collectors/ebpf.plugin/ebpf_fd.h @@ -32,14 +32,25 @@ #define NETDATA_CGROUP_FD_CLOSE_CONTEXT "cgroup.fd_close" #define NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT "cgroup.fd_close_error" -#define NETDATA_SYSTEMD_FD_OPEN_CONTEXT "systemd.services.fd_open" -#define NETDATA_SYSTEMD_FD_OPEN_ERR_CONTEXT "systemd.services.fd_open_error" -#define NETDATA_SYSTEMD_FD_CLOSE_CONTEXT "systemd.services.fd_close" -#define NETDATA_SYSTEMD_FD_CLOSE_ERR_CONTEXT "systemd.services.fd_close_error" +#define NETDATA_SYSTEMD_FD_OPEN_CONTEXT "systemd.service.fd_open" +#define NETDATA_SYSTEMD_FD_OPEN_ERR_CONTEXT "systemd.service.fd_open_error" +#define NETDATA_SYSTEMD_FD_CLOSE_CONTEXT "systemd.service.fd_close" +#define NETDATA_SYSTEMD_FD_CLOSE_ERR_CONTEXT "systemd.service.fd_close_error" // ARAL name #define NETDATA_EBPF_FD_ARAL_NAME "ebpf_fd" +typedef struct __attribute__((packed)) netdata_publish_fd_stat { + uint64_t ct; + + uint32_t open_call; // Open syscalls (open and openat) + uint32_t close_call; // Close syscall (close) + + // Errors + uint32_t open_err; + uint32_t close_err; +} netdata_publish_fd_stat_t; + typedef struct netdata_fd_stat { uint64_t ct; uint32_t tgid; diff --git a/src/collectors/ebpf.plugin/ebpf_filesystem.c b/src/collectors/ebpf.plugin/ebpf_filesystem.c index c56dea4b..1187b03e 100644 --- a/src/collectors/ebpf.plugin/ebpf_filesystem.c +++ b/src/collectors/ebpf.plugin/ebpf_filesystem.c @@ -334,6 +334,46 @@ static inline int ebpf_fs_load_and_attach(ebpf_local_maps_t *map, struct filesys *****************************************************************/ /** + * Obsolete Cleanup Struct + * + * Clean allocatged data durinc obsolete steps + * + * @param efp + */ +static void ebpf_obsolete_cleanup_struct(ebpf_filesystem_partitions_t *efp) { + freez(efp->hread.name); + efp->hread.name = NULL; + freez(efp->hread.title); + efp->hread.title = NULL; + freez(efp->hread.ctx); + efp->hread.ctx = NULL; + + freez(efp->hwrite.name); + efp->hwrite.name = NULL; + freez(efp->hwrite.title); + efp->hwrite.title = NULL; + freez(efp->hwrite.ctx); + efp->hwrite.ctx = NULL; + + freez(efp->hopen.name); + efp->hopen.name = NULL; + freez(efp->hopen.title); + efp->hopen.title = NULL; + freez(efp->hopen.ctx); + efp->hopen.ctx = NULL; + + freez(efp->hadditional.name); + efp->hadditional.name = NULL; + freez(efp->hadditional.title); + efp->hadditional.title = NULL; + freez(efp->hadditional.ctx); + efp->hadditional.ctx = NULL; + + freez(efp->family_name); + efp->family_name = NULL; +} + +/** * Create Filesystem chart * * Create latency charts @@ -348,7 +388,7 @@ static void ebpf_obsolete_fs_charts(int update_every) ebpf_filesystem_partitions_t *efp = &localfs[i]; uint32_t flags = efp->flags; if ((flags & test) == test) { - flags &= ~NETDATA_FILESYSTEM_FLAG_CHART_CREATED; + flags &= ~test; ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hread.name, "", @@ -370,6 +410,8 @@ static void ebpf_obsolete_fs_charts(int update_every) EBPF_COMMON_UNITS_CALLS_PER_SEC, efp->family_name, NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hadditional.order, update_every); + + ebpf_obsolete_cleanup_struct(efp); } efp->flags = flags; } @@ -395,9 +437,10 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(title, sizeof(title) - 1, "%s latency for each read request.", efp->filesystem); snprintfz(family, sizeof(family) - 1, "%s_latency", efp->family); snprintfz(chart_name, sizeof(chart_name) - 1, "%s_read_latency", efp->filesystem); + snprintfz(ctx, sizeof(ctx) - 1, "filesystem.read_latency"); efp->hread.name = strdupz(chart_name); efp->hread.title = strdupz(title); - efp->hread.ctx = NULL; + efp->hread.ctx = strdupz(ctx); efp->hread.order = order; efp->family_name = strdupz(family); @@ -412,9 +455,10 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(title, sizeof(title) - 1, "%s latency for each write request.", efp->filesystem); snprintfz(chart_name, sizeof(chart_name) - 1, "%s_write_latency", efp->filesystem); + snprintfz(ctx, sizeof(ctx) - 1, "filesystem.write_latency"); efp->hwrite.name = strdupz(chart_name); efp->hwrite.title = strdupz(title); - efp->hwrite.ctx = NULL; + efp->hwrite.ctx = strdupz(ctx); efp->hwrite.order = order; ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hwrite.name, efp->hwrite.title, @@ -427,9 +471,10 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(title, sizeof(title) - 1, "%s latency for each open request.", efp->filesystem); snprintfz(chart_name, sizeof(chart_name) - 1, "%s_open_latency", efp->filesystem); + snprintfz(ctx, sizeof(ctx) - 1, "filesystem.open_latency"); efp->hopen.name = strdupz(chart_name); efp->hopen.title = strdupz(title); - efp->hopen.ctx = NULL; + efp->hopen.ctx = strdupz(ctx); efp->hopen.order = order; ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, efp->hopen.title, @@ -443,7 +488,7 @@ static void ebpf_create_fs_charts(int update_every) char *type = (efp->flags & NETDATA_FILESYSTEM_ATTR_CHARTS) ? "attribute" : "sync"; snprintfz(title, sizeof(title) - 1, "%s latency for each %s request.", efp->filesystem, type); snprintfz(chart_name, sizeof(chart_name) - 1, "%s_%s_latency", efp->filesystem, type); - snprintfz(ctx, sizeof(ctx) - 1, "filesystem.%s_latency", type); + snprintfz(ctx, sizeof(ctx) - 1, "filesystem.%s_latency", efp->filesystem); efp->hadditional.name = strdupz(chart_name); efp->hadditional.title = strdupz(title); efp->hadditional.ctx = strdupz(ctx); @@ -499,11 +544,14 @@ int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em) if (!efp->fs_obj) { em->info.thread_name = saved_name; em->kernels = kernels; + pthread_mutex_unlock(&lock); + return -1; + } else if (ebpf_fs_load_and_attach(em->maps, efp->fs_obj, + efp->functions, NULL)) { + em->info.thread_name = saved_name; + em->kernels = kernels; + pthread_mutex_unlock(&lock); return -1; - } else { - if (ebpf_fs_load_and_attach(em->maps, efp->fs_obj, - efp->functions, NULL)) - return -1; } } #endif @@ -572,7 +620,9 @@ static int ebpf_read_local_partitions() ebpf_filesystem_partitions_t *w = &localfs[i]; if (w->enabled && (!strcmp(fs, w->filesystem) || (w->optional_filesystem && !strcmp(fs, w->optional_filesystem)))) { - localfs[i].flags |= NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM; + if (!(localfs[i].flags & NETDATA_FILESYSTEM_FLAG_CHART_CREATED)) + localfs[i].flags |= NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM; + localfs[i].flags &= ~NETDATA_FILESYSTEM_REMOVE_CHARTS; count++; break; @@ -756,8 +806,8 @@ static void ebpf_filesystem_exit(void *pptr) pthread_mutex_lock(&lock); ebpf_obsolete_filesystem_global(em); - pthread_mutex_unlock(&lock); fflush(stdout); + pthread_mutex_unlock(&lock); } ebpf_filesystem_cleanup_ebpf_data(); @@ -889,10 +939,10 @@ static void read_filesystem_tables(int maps_per_core) */ void ebpf_filesystem_read_hash(ebpf_module_t *em) { - ebpf_obsolete_fs_charts(em->update_every); - (void) ebpf_update_partitions(em); + ebpf_obsolete_fs_charts(em->update_every); + if (em->optional) return; diff --git a/src/collectors/ebpf.plugin/ebpf_functions.c b/src/collectors/ebpf.plugin/ebpf_functions.c index 4a43bf43..8e9fb01e 100644 --- a/src/collectors/ebpf.plugin/ebpf_functions.c +++ b/src/collectors/ebpf.plugin/ebpf_functions.c @@ -331,7 +331,7 @@ static void ebpf_function_socket_manipulation(const char *transaction, "Filters can be combined. Each filter can be given only one time. Default all ports\n" }; -for (int i = 1; i < PLUGINSD_MAX_WORDS; i++) { + for (int i = 1; i < PLUGINSD_MAX_WORDS; i++) { const char *keyword = get_word(words, num_words, i); if (!keyword) break; @@ -428,6 +428,7 @@ for (int i = 1; i < PLUGINSD_MAX_WORDS; i++) { ebpf_socket_clean_judy_array_unsafe(); rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock); + collect_pids |= 1<<EBPF_MODULE_SOCKET_IDX; pthread_mutex_lock(&ebpf_exit_cleanup); if (ebpf_function_start_thread(em, period)) { ebpf_function_error(transaction, diff --git a/src/collectors/ebpf.plugin/ebpf_oomkill.c b/src/collectors/ebpf.plugin/ebpf_oomkill.c index 8ecd0883..34361550 100644 --- a/src/collectors/ebpf.plugin/ebpf_oomkill.c +++ b/src/collectors/ebpf.plugin/ebpf_oomkill.c @@ -55,9 +55,9 @@ static void ebpf_obsolete_specific_oomkill_charts(char *type, int update_every); */ static void ebpf_obsolete_oomkill_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_OOMKILL_CHART, + "", "Systemd service OOM kills.", EBPF_OOMKILL_UNIT_KILLS, NETDATA_EBPF_MEMORY_GROUP, @@ -133,6 +133,10 @@ static void oomkill_cleanup(void *pptr) ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_OOMKILL_IDX); + pthread_mutex_unlock(&lock); + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { pthread_mutex_lock(&lock); @@ -242,7 +246,7 @@ static void ebpf_create_systemd_oomkill_charts(int update_every) .charttype = NETDATA_EBPF_CHART_TYPE_STACKED, .order = 20191, .algorithm = EBPF_CHART_ALGORITHM_INCREMENTAL, - .context = NETDATA_CGROUP_OOMKILLS_CONTEXT, + .context = NETDATA_SYSTEMD_OOMKILLS_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_OOMKILL, .update_every = 0, .suffix = NETDATA_OOMKILL_CHART, @@ -276,7 +280,7 @@ static void ebpf_send_systemd_oomkill_charts() if (unlikely(!(ect->flags & NETDATA_EBPF_SERVICES_HAS_OOMKILL_CHART)) ) { continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_OOMKILL_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_OOMKILL_CHART, ""); write_chart_dimension(oomkill_publish_aggregated.dimension, (long long) ect->oomkill); ect->oomkill = 0; ebpf_write_end_chart(); @@ -549,7 +553,7 @@ void *ebpf_oomkill_thread(void *ptr) em->maps = oomkill_maps; #define NETDATA_DEFAULT_OOM_DISABLED_MSG "Disabling OOMKILL thread, because" - if (unlikely(!ebpf_all_pids || !em->apps_charts)) { + if (unlikely(!em->apps_charts)) { // When we are not running integration with apps, we won't fill necessary variables for this thread to run, so // we need to disable it. pthread_mutex_lock(&ebpf_exit_cleanup); diff --git a/src/collectors/ebpf.plugin/ebpf_oomkill.h b/src/collectors/ebpf.plugin/ebpf_oomkill.h index 0d02da9d..0504181c 100644 --- a/src/collectors/ebpf.plugin/ebpf_oomkill.h +++ b/src/collectors/ebpf.plugin/ebpf_oomkill.h @@ -28,6 +28,7 @@ typedef uint8_t oomkill_ebpf_val_t; // Contexts #define NETDATA_CGROUP_OOMKILLS_CONTEXT "cgroup.oomkills" +#define NETDATA_SYSTEMD_OOMKILLS_CONTEXT "systemd.oomkills" extern struct config oomkill_config; void *ebpf_oomkill_thread(void *ptr); diff --git a/src/collectors/ebpf.plugin/ebpf_process.c b/src/collectors/ebpf.plugin/ebpf_process.c index e5756fa3..d2810f89 100644 --- a/src/collectors/ebpf.plugin/ebpf_process.c +++ b/src/collectors/ebpf.plugin/ebpf_process.c @@ -229,13 +229,13 @@ static void ebpf_update_process_cgroup() struct pid_on_target2 *pids; for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; - ebpf_process_stat_t *out = &pids->ps; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - ebpf_process_stat_t *in = &local_pid->process; + ebpf_publish_process_t *out = &pids->ps; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_PROCESS_IDX); + ebpf_publish_process_t *in = local_pid->process; + if (!in) + continue; - memcpy(out, in, sizeof(ebpf_process_stat_t)); - } + memcpy(out, in, sizeof(ebpf_publish_process_t)); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -445,9 +445,9 @@ static void ebpf_obsolete_specific_process_charts(char *type, ebpf_module_t *em) */ static void ebpf_obsolete_process_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_TASK_PROCESS, + "", "Process started", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_PROCESS_GROUP, @@ -456,9 +456,9 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em, char *id) 20065, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_TASK_THREAD, + "", "Threads started", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_PROCESS_GROUP, @@ -467,9 +467,9 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em, char *id) 20066, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_TASK_CLOSE, + "", "Tasks starts exit process.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_PROCESS_GROUP, @@ -478,9 +478,9 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em, char *id) 20067, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_TASK_EXIT, + "", "Tasks closed", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_PROCESS_GROUP, @@ -490,9 +490,9 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_TASK_ERROR, + "", "Errors to create process or threads.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_PROCESS_GROUP, @@ -691,9 +691,14 @@ static void ebpf_process_disable_tracepoints() */ static void ebpf_process_exit(void *pptr) { + pids_fd[EBPF_PIDS_PROCESS_IDX] = -1; ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_PROCESS_IDX); + pthread_mutex_unlock(&lock); + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { pthread_mutex_lock(&lock); if (em->cgroup_charts) { @@ -746,13 +751,13 @@ static void ebpf_process_exit(void *pptr) * @param ps structure used to store data * @param pids input data */ -static void ebpf_process_sum_cgroup_pids(ebpf_process_stat_t *ps, struct pid_on_target2 *pids) +static void ebpf_process_sum_cgroup_pids(ebpf_publish_process_t *ps, struct pid_on_target2 *pids) { - ebpf_process_stat_t accumulator; + ebpf_publish_process_t accumulator; memset(&accumulator, 0, sizeof(accumulator)); while (pids) { - ebpf_process_stat_t *pps = &pids->ps; + ebpf_publish_process_t *pps = &pids->ps; accumulator.exit_call += pps->exit_call; accumulator.release_call += pps->release_call; @@ -781,7 +786,7 @@ static void ebpf_process_sum_cgroup_pids(ebpf_process_stat_t *ps, struct pid_on_ * @param values structure with values that will be sent to netdata * @param em the structure with thread information */ -static void ebpf_send_specific_process_data(char *type, ebpf_process_stat_t *values, ebpf_module_t *em) +static void ebpf_send_specific_process_data(char *type, ebpf_publish_process_t *values, ebpf_module_t *em) { ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, ""); write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK].name, @@ -1031,24 +1036,24 @@ static void ebpf_send_systemd_process_charts(ebpf_module_t *em) continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_TASK_PROCESS); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_TASK_PROCESS, ""); write_chart_dimension("calls", ect->publish_systemd_ps.create_process); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_TASK_THREAD); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_TASK_THREAD, ""); write_chart_dimension("calls", ect->publish_systemd_ps.create_thread); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_TASK_EXIT); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_TASK_EXIT, ""); write_chart_dimension("calls", ect->publish_systemd_ps.exit_call); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_TASK_CLOSE); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_TASK_CLOSE, ""); write_chart_dimension("calls", ect->publish_systemd_ps.release_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_TASK_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_TASK_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_ps.task_err); ebpf_write_end_chart(); } diff --git a/src/collectors/ebpf.plugin/ebpf_process.h b/src/collectors/ebpf.plugin/ebpf_process.h index 18ffec1f..d2990cea 100644 --- a/src/collectors/ebpf.plugin/ebpf_process.h +++ b/src/collectors/ebpf.plugin/ebpf_process.h @@ -33,16 +33,17 @@ #define NETDATA_CGROUP_PROCESS_EXIT_CONTEXT "cgroup.task_exit" #define NETDATA_CGROUP_PROCESS_ERROR_CONTEXT "cgroup.task_error" -#define NETDATA_SYSTEMD_PROCESS_CREATE_CONTEXT "systemd.services.process_create" -#define NETDATA_SYSTEMD_THREAD_CREATE_CONTEXT "systemd.services.thread_create" -#define NETDATA_SYSTEMD_PROCESS_CLOSE_CONTEXT "systemd.services.task_close" -#define NETDATA_SYSTEMD_PROCESS_EXIT_CONTEXT "systemd.services.task_exit" -#define NETDATA_SYSTEMD_PROCESS_ERROR_CONTEXT "systemd.services.task_error" +#define NETDATA_SYSTEMD_PROCESS_CREATE_CONTEXT "systemd.service.process_create" +#define NETDATA_SYSTEMD_THREAD_CREATE_CONTEXT "systemd.service.thread_create" +#define NETDATA_SYSTEMD_PROCESS_CLOSE_CONTEXT "systemd.service.task_close" +#define NETDATA_SYSTEMD_PROCESS_EXIT_CONTEXT "systemd.service.task_exit" +#define NETDATA_SYSTEMD_PROCESS_ERROR_CONTEXT "systemd.service.task_error" #define NETDATA_EBPF_CGROUP_UPDATE 30 enum netdata_ebpf_stats_order { NETDATA_EBPF_ORDER_STAT_THREADS = 140000, + NETDATA_EBPF_ORDER_PIDS, NETDATA_EBPF_ORDER_STAT_LIFE_TIME, NETDATA_EBPF_ORDER_STAT_LOAD_METHOD, NETDATA_EBPF_ORDER_STAT_KERNEL_MEMORY, diff --git a/src/collectors/ebpf.plugin/ebpf_shm.c b/src/collectors/ebpf.plugin/ebpf_shm.c index 8e199952..ac44549b 100644 --- a/src/collectors/ebpf.plugin/ebpf_shm.c +++ b/src/collectors/ebpf.plugin/ebpf_shm.c @@ -7,7 +7,7 @@ static char *shm_dimension_name[NETDATA_SHM_END] = { "get", "at", "dt", "ctl" }; static netdata_syscall_stat_t shm_aggregated_data[NETDATA_SHM_END]; static netdata_publish_syscall_t shm_publish_aggregated[NETDATA_SHM_END]; -netdata_publish_shm_t *shm_vector = NULL; +netdata_ebpf_shm_t *shm_vector = NULL; static netdata_idx_t shm_hash_values[NETDATA_SHM_END]; static netdata_idx_t *shm_values = NULL; @@ -287,9 +287,9 @@ static void ebpf_obsolete_specific_shm_charts(char *type, int update_every); */ static void ebpf_obsolete_shm_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SHMGET_CHART, + "", "Calls to syscall shmget(2).", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_IPC_SHM_GROUP, @@ -298,9 +298,9 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em, char *id) 20191, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SHMAT_CHART, + "", "Calls to syscall shmat(2).", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_IPC_SHM_GROUP, @@ -309,9 +309,9 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em, char *id) 20192, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SHMDT_CHART, + "", "Calls to syscall shmdt(2).", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_IPC_SHM_GROUP, @@ -320,9 +320,9 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em, char *id) 20193, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SHMCTL_CHART, + "", "Calls to syscall shmctl(2).", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_IPC_SHM_GROUP, @@ -453,6 +453,10 @@ static void ebpf_shm_exit(void *pptr) ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_SHM_IDX); + pthread_mutex_unlock(&lock); + if (ebpf_read_shm.thread) nd_thread_signal_cancel(ebpf_read_shm.thread); @@ -506,16 +510,23 @@ static void ebpf_shm_exit(void *pptr) * @param out the vector with read values. * @param maps_per_core do I need to read all cores? */ -static void shm_apps_accumulator(netdata_publish_shm_t *out, int maps_per_core) +static void shm_apps_accumulator(netdata_ebpf_shm_t *out, int maps_per_core) { int i, end = (maps_per_core) ? ebpf_nprocs : 1; - netdata_publish_shm_t *total = &out[0]; + netdata_ebpf_shm_t *total = &out[0]; + uint64_t ct = total->ct; for (i = 1; i < end; i++) { - netdata_publish_shm_t *w = &out[i]; + netdata_ebpf_shm_t *w = &out[i]; total->get += w->get; total->at += w->at; total->dt += w->dt; total->ctl += w->ctl; + + if (w->ct > ct) + ct = w->ct; + + if (!total->name[0] && w->name[0]) + strncpyz(total->name, w->name, sizeof(total->name) - 1); } } @@ -528,7 +539,7 @@ static void shm_apps_accumulator(netdata_publish_shm_t *out, int maps_per_core) */ static void ebpf_update_shm_cgroup() { - netdata_publish_shm_t *cv = shm_vector; + netdata_ebpf_shm_t *cv = shm_vector; size_t length = sizeof(netdata_publish_shm_t); ebpf_cgroup_target_t *ect; @@ -541,12 +552,12 @@ static void ebpf_update_shm_cgroup() for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; netdata_publish_shm_t *out = &pids->shm; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_shm_t *in = &local_pid->shm; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_SHM_IDX); + netdata_publish_shm_t *in = local_pid->shm; + if (!in) + continue; - memcpy(out, in, sizeof(netdata_publish_shm_t)); - } + memcpy(out, in, sizeof(netdata_publish_shm_t)); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -558,12 +569,13 @@ static void ebpf_update_shm_cgroup() * Read the apps table and store data inside the structure. * * @param maps_per_core do I need to read all cores? + * @param max_period limit of iterations without updates before remove data from hash table */ -static void ebpf_read_shm_apps_table(int maps_per_core, int max_period) +static void ebpf_read_shm_apps_table(int maps_per_core, uint32_t max_period) { - netdata_publish_shm_t *cv = shm_vector; + netdata_ebpf_shm_t *cv = shm_vector; int fd = shm_maps[NETDATA_PID_SHM_TABLE].map_fd; - size_t length = sizeof(netdata_publish_shm_t); + size_t length = sizeof(netdata_ebpf_shm_t); if (maps_per_core) length *= ebpf_nprocs; @@ -575,18 +587,22 @@ static void ebpf_read_shm_apps_table(int maps_per_core, int max_period) shm_apps_accumulator(cv, maps_per_core); - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(key, 0); - if (!local_pid) - goto end_shm_loop; - + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(key, cv->tgid, cv->name, EBPF_PIDS_SHM_IDX); + netdata_publish_shm_t *publish = local_pid->shm; + if (!publish) + local_pid->shm = publish = ebpf_shm_allocate_publish(); - netdata_publish_shm_t *publish = &local_pid->shm; if (!publish->ct || publish->ct != cv->ct) { memcpy(publish, &cv[0], sizeof(netdata_publish_shm_t)); local_pid->not_updated = 0; - } else if (++local_pid->not_updated >= max_period){ - bpf_map_delete_elem(fd, &key); - local_pid->not_updated = 0; + } else { + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(local_pid); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(local_pid, fd, key, EBPF_PIDS_SHM_IDX); + ebpf_shm_release_publish(publish); + local_pid->shm = NULL; + } } end_shm_loop: @@ -654,23 +670,17 @@ static void ebpf_shm_read_global_table(netdata_idx_t *stats, int maps_per_core) static void ebpf_shm_sum_pids(netdata_publish_shm_t *shm, struct ebpf_pid_on_target *root) { memset(shm, 0, sizeof(netdata_publish_shm_t)); - while (root) { + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *pid_stat = ebpf_get_pid_entry(pid, 0); - if (pid_stat) { - netdata_publish_shm_t *w = &pid_stat->shm; - shm->get += w->get; - shm->at += w->at; - shm->dt += w->dt; - shm->ctl += w->ctl; - - // reset for next collection. - w->get = 0; - w->at = 0; - w->dt = 0; - w->ctl = 0; - } - root = root->next; + ebpf_pid_data_t *pid_stat = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_SHM_IDX); + netdata_publish_shm_t *w = pid_stat->shm; + if (!w) + continue; + + shm->get += w->get; + shm->at += w->at; + shm->dt += w->dt; + shm->ctl += w->ctl; } } @@ -941,19 +951,19 @@ static void ebpf_send_systemd_shm_charts() continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SHMGET_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_SHMGET_CHART, ""); write_chart_dimension("calls", (long long)ect->publish_shm.get); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SHMAT_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_SHMAT_CHART, ""); write_chart_dimension("calls", (long long)ect->publish_shm.at); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SHMDT_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_SHMDT_CHART, ""); write_chart_dimension("calls", (long long)ect->publish_shm.dt); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SHMCTL_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_SHMCTL_CHART, ""); write_chart_dimension("calls", (long long)ect->publish_shm.ctl); ebpf_write_end_chart(); } @@ -1060,13 +1070,17 @@ void *ebpf_read_shm_thread(void *ptr) int maps_per_core = em->maps_per_core; int update_every = em->update_every; + int collect_pid = (em->apps_charts || em->cgroup_charts); + if (!collect_pid) + return NULL; int counter = update_every - 1; uint32_t lifetime = em->lifetime; uint32_t running_time = 0; usec_t period = update_every * USEC_PER_SEC; - int max_period = update_every * EBPF_CLEANUP_FACTOR; + uint32_t max_period = EBPF_CLEANUP_FACTOR; + pids_fd[EBPF_PIDS_SHM_IDX] = shm_maps[NETDATA_PID_SHM_TABLE].map_fd; while (!ebpf_plugin_stop() && running_time < lifetime) { (void)heartbeat_next(&hb, period); if (ebpf_plugin_stop() || ++counter != update_every) @@ -1325,6 +1339,7 @@ static int ebpf_shm_load_bpf(ebpf_module_t *em) */ void *ebpf_shm_thread(void *ptr) { + pids_fd[EBPF_PIDS_SHM_IDX] = -1; ebpf_module_t *em = (ebpf_module_t *)ptr; CLEANUP_FUNCTION_REGISTER(ebpf_shm_exit) cleanup_ptr = em; @@ -1363,7 +1378,8 @@ void *ebpf_shm_thread(void *ptr) ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); - ebpf_read_shm.thread = nd_thread_create(ebpf_read_shm.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_read_shm_thread, em); + ebpf_read_shm.thread = nd_thread_create(ebpf_read_shm.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_read_shm_thread, em); shm_collector(em); diff --git a/src/collectors/ebpf.plugin/ebpf_shm.h b/src/collectors/ebpf.plugin/ebpf_shm.h index 5a670b1b..6f89faa9 100644 --- a/src/collectors/ebpf.plugin/ebpf_shm.h +++ b/src/collectors/ebpf.plugin/ebpf_shm.h @@ -23,21 +23,33 @@ #define NETDATA_CGROUP_SHM_DT_CONTEXT "cgroup.shmdt" #define NETDATA_CGROUP_SHM_CTL_CONTEXT "cgroup.shmctl" -#define NETDATA_SYSTEMD_SHM_GET_CONTEXT "systemd.services.shmget" -#define NETDATA_SYSTEMD_SHM_AT_CONTEXT "systemd.services.shmat" -#define NETDATA_SYSTEMD_SHM_DT_CONTEXT "systemd.services.shmdt" -#define NETDATA_SYSTEMD_SHM_CTL_CONTEXT "systemd.services.shmctl" +#define NETDATA_SYSTEMD_SHM_GET_CONTEXT "systemd.service.shmget" +#define NETDATA_SYSTEMD_SHM_AT_CONTEXT "systemd.service.shmat" +#define NETDATA_SYSTEMD_SHM_DT_CONTEXT "systemd.service.shmdt" +#define NETDATA_SYSTEMD_SHM_CTL_CONTEXT "systemd.service.shmctl" -typedef struct netdata_publish_shm { +typedef struct __attribute__((packed)) netdata_publish_shm { uint64_t ct; - char name[TASK_COMM_LEN]; - uint64_t get; - uint64_t at; - uint64_t dt; - uint64_t ctl; + uint32_t get; + uint32_t at; + uint32_t dt; + uint32_t ctl; } netdata_publish_shm_t; +typedef struct netdata_ebpf_shm { + uint64_t ct; + uint32_t tgid; + uint32_t uid; + uint32_t gid; + char name[TASK_COMM_LEN]; + + uint32_t get; + uint32_t at; + uint32_t dt; + uint32_t ctl; +} netdata_ebpf_shm_t; + enum shm_tables { NETDATA_PID_SHM_TABLE, NETDATA_SHM_CONTROLLER, diff --git a/src/collectors/ebpf.plugin/ebpf_socket.c b/src/collectors/ebpf.plugin/ebpf_socket.c index 9a55f7be..5b87a325 100644 --- a/src/collectors/ebpf.plugin/ebpf_socket.c +++ b/src/collectors/ebpf.plugin/ebpf_socket.c @@ -497,6 +497,10 @@ static void ebpf_socket_free(ebpf_module_t *em ) ebpf_update_stats(&plugin_statistics, em); ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); pthread_mutex_unlock(&ebpf_exit_cleanup); + + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_SOCKET_IDX); + pthread_mutex_unlock(&lock); } /** @@ -509,9 +513,9 @@ static void ebpf_socket_free(ebpf_module_t *em ) static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) { int order = 20080; - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_CONNECTION_TCP_V4, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4, + "", "Calls to tcp_v4_connection", EBPF_COMMON_UNITS_CONNECTIONS, NETDATA_APPS_NET_GROUP, @@ -521,9 +525,9 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) update_every); if (tcp_v6_connect_address.type == 'T') { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_CONNECTION_TCP_V6, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6, + "", "Calls to tcp_v6_connection", EBPF_COMMON_UNITS_CONNECTIONS, NETDATA_APPS_NET_GROUP, @@ -533,31 +537,20 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) update_every); } - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_RECV, - "Bits received", - EBPF_COMMON_UNITS_KILOBITS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT, - order++, - update_every); - - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_SENT, - "Bits sent", + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH, + "", + "Bandwidth.", EBPF_COMMON_UNITS_KILOBITS, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT, + NETDATA_SERVICES_SOCKET_TCP_BANDWIDTH_CONTEXT, order++, update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS, + "", "Calls to tcp_cleanup_rbuf.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, @@ -566,9 +559,9 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) order++, update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS, + "", "Calls to tcp_sendmsg.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, @@ -577,9 +570,9 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) order++, update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT, + "", "Calls to tcp_retransmit", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, @@ -588,9 +581,9 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) order++, update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS, + "", "Calls to udp_sendmsg", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, @@ -599,9 +592,9 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) order++, update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS, + "", "Calls to udp_recvmsg", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, @@ -678,23 +671,12 @@ void ebpf_socket_obsolete_apps_charts(struct ebpf_module *em) ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, w->clean_name, - "_ebpf_sock_bytes_sent", - "Bits sent.", + "_ebpf_sock_bandwidth", + "Bandwidth.", EBPF_COMMON_UNITS_KILOBITS, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - "app.ebpf_sock_bytes_sent", - order++, - update_every); - - ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, - w->clean_name, - "_ebpf_sock_bytes_received", - "Bits received.", - EBPF_COMMON_UNITS_KILOBITS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - "app.ebpf_sock_bytes_received", + "app.ebpf_sock_total_bandwidth", order++, update_every); @@ -1056,18 +1038,14 @@ void ebpf_socket_send_apps_data() if (tcp_v6_connect_address.type == 'T') { ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_call_tcp_v6_connection"); - write_chart_dimension("calls", (collected_number) values->call_tcp_v6_connection); + write_chart_dimension("connections", (collected_number) values->call_tcp_v6_connection); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_sock_bytes_sent"); - // We multiply by 0.008, because we read bytes, but we display bits - write_chart_dimension("bandwidth", ebpf_socket_bytes2bits(values->bytes_sent)); - ebpf_write_end_chart(); - - ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_sock_bytes_received"); + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_sock_bandwidth"); // We multiply by 0.008, because we read bytes, but we display bits - write_chart_dimension("bandwidth", ebpf_socket_bytes2bits(values->bytes_received)); + write_chart_dimension("received", ebpf_socket_bytes2bits(values->bytes_received)); + write_chart_dimension("sent", ebpf_socket_bytes2bits(values->bytes_sent)); ebpf_write_end_chart(); ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_tcp_sendmsg"); @@ -1273,33 +1251,19 @@ void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) ebpf_write_chart_cmd(NETDATA_APP_FAMILY, w->clean_name, - "_ebpf_sock_bytes_sent", - "Bits sent.", - EBPF_COMMON_UNITS_KILOBITS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - "app.ebpf_sock_bytes_sent", - order++, - update_every, - NETDATA_EBPF_MODULE_NAME_SOCKET); - ebpf_create_chart_labels("app_group", w->name, RRDLABEL_SRC_AUTO); - ebpf_commit_label(); - fprintf(stdout, "DIMENSION bandwidth '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); - - ebpf_write_chart_cmd(NETDATA_APP_FAMILY, - w->clean_name, - "_ebpf_sock_bytes_received", - "Bits received.", + "_ebpf_sock_bandwidth", + "Bandwidth.", EBPF_COMMON_UNITS_KILOBITS, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - "app.ebpf_sock_bytes_received", + "app.ebpf_sock_total_bandwidth", order++, update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("app_group", w->name, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - fprintf(stdout, "DIMENSION bandwidth '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + fprintf(stdout, "DIMENSION received '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + fprintf(stdout, "DIMENSION sent '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); ebpf_write_chart_cmd(NETDATA_APP_FAMILY, w->clean_name, @@ -1714,6 +1678,7 @@ static void ebpf_update_array_vectors(ebpf_module_t *em) time_t update_time = time(NULL); while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { test = bpf_map_lookup_elem(fd, &key, values); + bool deleted = true; if (test < 0) { goto end_socket_loop; } @@ -1723,7 +1688,6 @@ static void ebpf_update_array_vectors(ebpf_module_t *em) } ebpf_hash_socket_accumulator(values, end); - ebpf_socket_fill_publish_apps(key.pid, values); // We update UDP to show info with charts, but we do not show them with functions /* @@ -1767,14 +1731,17 @@ static void ebpf_update_array_vectors(ebpf_module_t *em) } uint64_t prev_period = socket_ptr->data.current_timestamp; memcpy(&socket_ptr->data, &values[0], sizeof(netdata_socket_t)); - if (translate) + if (translate) { ebpf_socket_translate(socket_ptr, &key); - else { // Check socket was updated + deleted = false; + } else { // Check socket was updated + deleted = false; if (prev_period) { if (values[0].current_timestamp > prev_period) // Socket updated socket_ptr->last_update = update_time; else if ((update_time - socket_ptr->last_update) > em->update_every) { // Socket was not updated since last read + deleted = true; JudyLDel(&pid_ptr->socket_stats.JudyLArray, values[0].first_timestamp, PJE0); aral_freez(aral_socket_table, socket_ptr); } @@ -1785,7 +1752,19 @@ static void ebpf_update_array_vectors(ebpf_module_t *em) rw_spinlock_write_unlock(&pid_ptr->socket_stats.rw_spinlock); rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock); -end_socket_loop: +end_socket_loop: ; // the empty statement is here to allow code to be compiled by old compilers + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(key.pid, 0, values[0].name, EBPF_MODULE_SOCKET_IDX); + ebpf_socket_publish_apps_t *curr = local_pid->socket; + if (!curr) + local_pid->socket = curr = ebpf_socket_allocate_publish(); + + if (!deleted) + ebpf_socket_fill_publish_apps(curr, values); + else { + ebpf_release_pid_data(local_pid, fd, key.pid, EBPF_MODULE_SOCKET_IDX); + ebpf_socket_release_publish(curr); + local_pid->socket = NULL; + } memset(values, 0, length); memcpy(&key, &next_key, sizeof(key)); } @@ -1805,23 +1784,22 @@ void ebpf_socket_resume_apps_data() ebpf_socket_publish_apps_t *values = &w->socket; memset(&w->socket, 0, sizeof(ebpf_socket_publish_apps_t)); - while (move) { + for (; move; move = move->next) { int32_t pid = move->pid; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - ebpf_socket_publish_apps_t *ws = &local_pid->socket; - values->call_tcp_v4_connection = ws->call_tcp_v4_connection; - values->call_tcp_v6_connection = ws->call_tcp_v6_connection; - values->bytes_sent = ws->bytes_sent; - values->bytes_received = ws->bytes_received; - values->call_tcp_sent = ws->call_tcp_sent; - values->call_tcp_received = ws->call_tcp_received; - values->retransmit = ws->retransmit; - values->call_udp_sent = ws->call_udp_sent; - values->call_udp_received = ws->call_udp_received; - } - - move = move->next; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_MODULE_SOCKET_IDX); + ebpf_socket_publish_apps_t *ws = local_pid->socket; + if (!ws) + continue; + + values->call_tcp_v4_connection = ws->call_tcp_v4_connection; + values->call_tcp_v6_connection = ws->call_tcp_v6_connection; + values->bytes_sent = ws->bytes_sent; + values->bytes_received = ws->bytes_received; + values->call_tcp_sent = ws->call_tcp_sent; + values->call_tcp_received = ws->call_tcp_received; + values->retransmit = ws->retransmit; + values->call_udp_sent = ws->call_udp_sent; + values->call_udp_received = ws->call_udp_received; } } } @@ -1846,6 +1824,9 @@ void *ebpf_read_socket_thread(void *ptr) int update_every = em->update_every; int counter = update_every - 1; + int collect_pid = (em->apps_charts || em->cgroup_charts); + if (!collect_pid) + return NULL; uint32_t running_time = 0; uint32_t lifetime = em->lifetime; @@ -2009,14 +1990,8 @@ static void ebpf_socket_read_hash_global_tables(netdata_idx_t *stats, int maps_p * @param current_pid the PID that I am updating * @param ns the structure with data read from memory. */ -void ebpf_socket_fill_publish_apps(uint32_t current_pid, netdata_socket_t *ns) +void ebpf_socket_fill_publish_apps(ebpf_socket_publish_apps_t *curr, netdata_socket_t *ns) { - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(current_pid, 0); - if (!local_pid) - return; - - ebpf_socket_publish_apps_t *curr = &local_pid->socket; - curr->bytes_sent = ns->tcp.tcp_bytes_sent; curr->bytes_received = ns->tcp.tcp_bytes_received; curr->call_tcp_sent = ns->tcp.call_tcp_sent; @@ -2045,21 +2020,21 @@ static void ebpf_update_socket_cgroup() for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; ebpf_socket_publish_apps_t *publish = &ect->publish_socket; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - ebpf_socket_publish_apps_t *in = &local_pid->socket; - - publish->bytes_sent = in->bytes_sent; - publish->bytes_received = in->bytes_received; - publish->call_tcp_sent = in->call_tcp_sent; - publish->call_tcp_received = in->call_tcp_received; - publish->retransmit = in->retransmit; - publish->call_udp_sent = in->call_udp_sent; - publish->call_udp_received = in->call_udp_received; - publish->call_close = in->call_close; - publish->call_tcp_v4_connection = in->call_tcp_v4_connection; - publish->call_tcp_v6_connection = in->call_tcp_v6_connection; - } + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_MODULE_SOCKET_IDX); + ebpf_socket_publish_apps_t *in = local_pid->socket; + if (!in) + continue; + + publish->bytes_sent = in->bytes_sent; + publish->bytes_received = in->bytes_received; + publish->call_tcp_sent = in->call_tcp_sent; + publish->call_tcp_received = in->call_tcp_received; + publish->retransmit = in->retransmit; + publish->call_udp_sent = in->call_udp_sent; + publish->call_udp_received = in->call_udp_received; + publish->call_close = in->call_close; + publish->call_tcp_v4_connection = in->call_tcp_v4_connection; + publish->call_tcp_v6_connection = in->call_tcp_v6_connection; } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -2121,119 +2096,128 @@ static void ebpf_create_specific_socket_charts(char *type, int update_every) { int order_basis = 5300; char *label = (!strncmp(type, "cgroup_", 7)) ? &type[7] : type; - ebpf_create_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, - "Calls to tcp_v4_connection", - EBPF_COMMON_UNITS_CONNECTIONS, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_TCP_V4_CONN_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4], 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4, + "", + "Calls to tcp_v4_connection", + EBPF_COMMON_UNITS_CONNECTIONS, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_TCP_V4_CONN_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); + fprintf(stdout, "DIMENSION connections '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); if (tcp_v6_connect_address.type == 'T') { - ebpf_create_chart(type, - NETDATA_NET_APPS_CONNECTION_TCP_V6, - "Calls to tcp_v6_connection", - EBPF_COMMON_UNITS_CONNECTIONS, - NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_TCP_V6_CONN_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6], - 1, - update_every, - NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6, + "", + "Calls to tcp_v6_connection", + EBPF_COMMON_UNITS_CONNECTIONS, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_TCP_V6_CONN_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); + fprintf(stdout, "DIMENSION connections '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); } - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV, - "Bits received", - EBPF_COMMON_UNITS_KILOBITS, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_BYTES_RECV_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF], 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH, + "", + "Bandwidth.", + EBPF_COMMON_UNITS_KILOBITS, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SOCKET_TCP_BANDWIDTH_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_SENT, - "Bits sent", - EBPF_COMMON_UNITS_KILOBITS, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_BYTES_SEND_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - socket_publish_aggregated, 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + fprintf(stdout, "DIMENSION received '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + fprintf(stdout, "DIMENSION sent '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS, + "", + "Calls to tcp_cleanup_rbuf.", + EBPF_COMMON_UNITS_CALLS_PER_SEC, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SOCKET_TCP_RECV_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, - "Calls to tcp_cleanup_rbuf.", - EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_TCP_RECV_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF], 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS, + "", + "Calls to tcp_sendmsg.", + EBPF_COMMON_UNITS_CALLS_PER_SEC, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SOCKET_TCP_SEND_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, - "Calls to tcp_sendmsg.", - EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_TCP_SEND_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - socket_publish_aggregated, 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT, + "", + "Calls to tcp_retransmit.", + EBPF_COMMON_UNITS_CALLS_PER_SEC, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SOCKET_TCP_RETRANSMIT_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, - "Calls to tcp_retransmit.", - EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_TCP_RETRANSMIT_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT], 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS, + "", + "Calls to udp_sendmsg.", + EBPF_COMMON_UNITS_CALLS_PER_SEC, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SOCKET_UDP_SEND_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, - "Calls to udp_sendmsg", - EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_UDP_SEND_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_UDP_SENDMSG], 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); - ebpf_commit_label(); - - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, - "Calls to udp_recvmsg", - EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_UDP_RECV_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF], 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS, + "", + "Calls to udp_recvmsg.", + EBPF_COMMON_UNITS_CALLS_PER_SEC, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SOCKET_UDP_RECV_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); } /** @@ -2247,57 +2231,65 @@ static void ebpf_create_specific_socket_charts(char *type, int update_every) static void ebpf_obsolete_specific_socket_charts(char *type, int update_every) { int order_basis = 5300; - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, "", "Calls to tcp_v4_connection", - EBPF_COMMON_UNITS_CONNECTIONS, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); + ebpf_write_chart_obsolete(type, + NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4, + "", + "Calls to tcp_v4_connection", + EBPF_COMMON_UNITS_CONNECTIONS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_TCP_V4_CONN_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every); if (tcp_v6_connect_address.type == 'T') { ebpf_write_chart_obsolete(type, - NETDATA_NET_APPS_CONNECTION_TCP_V6, + NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6, "", "Calls to tcp_v6_connection", EBPF_COMMON_UNITS_CONNECTIONS, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT, + NETDATA_CGROUP_TCP_V6_CONN_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); } - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_RECV, "", "Bits received", - EBPF_COMMON_UNITS_KILOBITS, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_SENT, "","Bits sent", + ebpf_write_chart_obsolete(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH, + "", + "Bandwidth.", EBPF_COMMON_UNITS_KILOBITS, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SOCKET_TCP_BANDWIDTH_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, "", "Calls to tcp_cleanup_rbuf.", + ebpf_write_chart_obsolete(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS, "", + "Calls to tcp_cleanup_rbuf.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SOCKET_TCP_RECV_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, "", "Calls to tcp_sendmsg.", + ebpf_write_chart_obsolete(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS, "", + "Calls to tcp_sendmsg.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SOCKET_TCP_SEND_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, "", "Calls to tcp_retransmit.", + ebpf_write_chart_obsolete(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT, "", + "Calls to tcp_retransmit.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SOCKET_TCP_RETRANSMIT_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, "", "Calls to udp_sendmsg", + ebpf_write_chart_obsolete(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS, "", + "Calls to udp_sendmsg.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SOCKET_UDP_SEND_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, "", "Calls to udp_recvmsg", + ebpf_write_chart_obsolete(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS, "", + "Calls to udp_recvmsg.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT, + NETDATA_CGROUP_SOCKET_UDP_RECV_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); } @@ -2311,51 +2303,39 @@ static void ebpf_obsolete_specific_socket_charts(char *type, int update_every) */ static void ebpf_send_specific_socket_data(char *type, ebpf_socket_publish_apps_t *values) { - ebpf_write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4].name, - (long long) values->call_tcp_v4_connection); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4, ""); + write_chart_dimension("connections", (long long) values->call_tcp_v4_connection); ebpf_write_end_chart(); if (tcp_v6_connect_address.type == 'T') { - ebpf_write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V6, ""); - write_chart_dimension( - socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6].name, (long long)values->call_tcp_v6_connection); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6, ""); + write_chart_dimension("connections", (long long)values->call_tcp_v6_connection); ebpf_write_end_chart(); } - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_SENT, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_SENDMSG].name, - (long long) ebpf_socket_bytes2bits(values->bytes_sent)); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH, ""); + write_chart_dimension("received", (long long) ebpf_socket_bytes2bits(values->bytes_received)); + write_chart_dimension("sent", (long long) ebpf_socket_bytes2bits(values->bytes_sent)); ebpf_write_end_chart(); - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF].name, - (long long) ebpf_socket_bytes2bits(values->bytes_received)); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS, ""); + write_chart_dimension("calls", (long long) values->call_tcp_received); ebpf_write_end_chart(); - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_SENDMSG].name, - (long long) values->call_tcp_sent); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS, ""); + write_chart_dimension("calls", (long long) values->call_tcp_sent); ebpf_write_end_chart(); - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF].name, - (long long) values->call_tcp_received); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT, ""); + write_chart_dimension("calls", (long long) values->retransmit); ebpf_write_end_chart(); - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT].name, - (long long) values->retransmit); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS, ""); + write_chart_dimension("calls", (long long) values->call_udp_sent); ebpf_write_end_chart(); - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_UDP_SENDMSG].name, - (long long) values->call_udp_sent); - ebpf_write_end_chart(); - - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF].name, - (long long) values->call_udp_received); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS, ""); + write_chart_dimension("calls", (long long) values->call_udp_received); ebpf_write_end_chart(); } @@ -2378,8 +2358,8 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_CONNECTION_TCP_V4, - .dimension = EBPF_COMMON_UNITS_CONNECTIONS + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4, + .dimension = "connections" }; static ebpf_systemd_args_t data_tcp_v6 = { @@ -2392,36 +2372,22 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_CONNECTION_TCP_V6, - .dimension = "connection" + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6, + .dimension = "connections" }; - static ebpf_systemd_args_t data_bandwith_recv = { - .title = "Bits received", + static ebpf_systemd_args_t data_bandwidth = { + .title = "Bandwidth.", .units = EBPF_COMMON_UNITS_KILOBITS, .family = NETDATA_APPS_NET_GROUP, .charttype = NETDATA_EBPF_CHART_TYPE_STACKED, .order = 20082, .algorithm = EBPF_CHART_ALGORITHM_INCREMENTAL, - .context = NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT, + .context = NETDATA_SERVICES_SOCKET_TCP_BANDWIDTH_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_RECV, - .dimension = "connection" - }; - - static ebpf_systemd_args_t data_bandwith_sent = { - .title = "Bits sent", - .units = EBPF_COMMON_UNITS_KILOBITS, - .family = NETDATA_APPS_NET_GROUP, - .charttype = NETDATA_EBPF_CHART_TYPE_STACKED, - .order = 20083, - .algorithm = EBPF_CHART_ALGORITHM_INCREMENTAL, - .context = NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT, - .module = NETDATA_EBPF_MODULE_NAME_SOCKET, - .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_SENT, - .dimension = EBPF_COMMON_UNITS_KILOBITS + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH, + .dimension = "received,sent" }; static ebpf_systemd_args_t data_tcp_cleanup = { @@ -2434,7 +2400,7 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS, .dimension = "calls" }; @@ -2448,7 +2414,7 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS, .dimension = "calls" }; @@ -2462,7 +2428,7 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT, .dimension = "calls" }; @@ -2476,7 +2442,7 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS, .dimension = "calls" }; @@ -2490,13 +2456,13 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS, .dimension = "calls" }; if (!data_tcp_v4.update_every) - data_tcp_v4.update_every = data_tcp_v6.update_every = data_bandwith_recv.update_every = - data_bandwith_sent.update_every = data_tcp_cleanup.update_every = data_tcp_sendmsg.update_every = + data_tcp_v4.update_every = data_tcp_v6.update_every = data_bandwidth.update_every = + data_tcp_cleanup.update_every = data_tcp_sendmsg.update_every = data_tcp_retransmit.update_every = data_udp_send.update_every = data_udp_recv.update_every = update_every; ebpf_cgroup_target_t *w; @@ -2504,8 +2470,8 @@ static void ebpf_create_systemd_socket_charts(int update_every) if (unlikely(!w->systemd || w->flags & NETDATA_EBPF_SERVICES_HAS_SOCKET_CHART)) continue; - data_tcp_v4.id = data_tcp_v6.id = data_bandwith_recv.id = - data_bandwith_sent.id = data_tcp_cleanup.id = data_tcp_sendmsg.id = + data_tcp_v4.id = data_tcp_v6.id = data_bandwidth.id = + data_tcp_cleanup.id = data_tcp_sendmsg.id = data_tcp_retransmit.id = data_udp_send.id = data_udp_recv.id = w->name; ebpf_create_charts_on_systemd(&data_tcp_v4); @@ -2513,8 +2479,7 @@ static void ebpf_create_systemd_socket_charts(int update_every) ebpf_create_charts_on_systemd(&data_tcp_v6); } - ebpf_create_charts_on_systemd(&data_bandwith_recv); - ebpf_create_charts_on_systemd(&data_bandwith_sent); + ebpf_create_charts_on_systemd(&data_bandwidth); ebpf_create_charts_on_systemd(&data_tcp_cleanup); @@ -2543,41 +2508,38 @@ static void ebpf_send_systemd_socket_charts() continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_CONNECTION_TCP_V4); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4, ""); write_chart_dimension("connections", (long long)ect->publish_socket.call_tcp_v4_connection); ebpf_write_end_chart(); if (tcp_v6_connect_address.type == 'T') { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_CONNECTION_TCP_V6); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6, ""); write_chart_dimension("connections", (long long)ect->publish_socket.call_tcp_v6_connection); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_SENT); - write_chart_dimension("bits", (long long)ect->publish_socket.bytes_sent); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH, ""); + write_chart_dimension("received", (long long)ect->publish_socket.bytes_received); + write_chart_dimension("sent", (long long)ect->publish_socket.bytes_sent); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_RECV); - write_chart_dimension("bits", (long long)ect->publish_socket.bytes_received); - ebpf_write_end_chart(); - - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS, ""); write_chart_dimension("calls", (long long)ect->publish_socket.call_tcp_sent); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS, ""); write_chart_dimension("calls", (long long)ect->publish_socket.call_tcp_received); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT, ""); write_chart_dimension("calls", (long long)ect->publish_socket.retransmit); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS, ""); write_chart_dimension("calls", (long long)ect->publish_socket.call_udp_sent); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS, ""); write_chart_dimension("calls", (long long)ect->publish_socket.call_udp_received); ebpf_write_end_chart(); } @@ -2888,6 +2850,7 @@ static int ebpf_socket_load_bpf(ebpf_module_t *em) */ void *ebpf_socket_thread(void *ptr) { + pids_fd[EBPF_PIDS_SOCKET_IDX] = -1; ebpf_module_t *em = (ebpf_module_t *)ptr; CLEANUP_FUNCTION_REGISTER(ebpf_socket_exit) cleanup_ptr = em; @@ -2917,7 +2880,6 @@ void *ebpf_socket_thread(void *ptr) ebpf_adjust_thread_load(em, default_btf); #endif if (ebpf_socket_load_bpf(em)) { - pthread_mutex_unlock(&lock); goto endsocket; } diff --git a/src/collectors/ebpf.plugin/ebpf_socket.h b/src/collectors/ebpf.plugin/ebpf_socket.h index b36ed064..e0112603 100644 --- a/src/collectors/ebpf.plugin/ebpf_socket.h +++ b/src/collectors/ebpf.plugin/ebpf_socket.h @@ -112,16 +112,15 @@ typedef enum ebpf_socket_idx { #define NETDATA_UDP_FUNCTION_BITS "total_udp_bandwidth" #define NETDATA_UDP_FUNCTION_ERROR "udp_error" -// Charts created on Apps submenu -#define NETDATA_NET_APPS_CONNECTION_TCP_V4 "outbound_conn_v4" -#define NETDATA_NET_APPS_CONNECTION_TCP_V6 "outbound_conn_v6" -#define NETDATA_NET_APPS_BANDWIDTH_SENT "total_bandwidth_sent" -#define NETDATA_NET_APPS_BANDWIDTH_RECV "total_bandwidth_recv" -#define NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS "bandwidth_tcp_send" -#define NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS "bandwidth_tcp_recv" -#define NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT "bandwidth_tcp_retransmit" -#define NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS "bandwidth_udp_send" -#define NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS "bandwidth_udp_recv" +// Charts created (id or suffix) +#define NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4 "outbound_conn_v4" +#define NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6 "outbound_conn_v6" +#define NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH "total_bandwidth" +#define NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS "bandwidth_tcp_send" +#define NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS "bandwidth_tcp_recv" +#define NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT "bandwidth_tcp_retransmit" +#define NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS "bandwidth_udp_send" +#define NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS "bandwidth_udp_recv" // Port range #define NETDATA_MINIMUM_PORT_VALUE 1 @@ -137,30 +136,28 @@ typedef enum ebpf_socket_idx { // Contexts #define NETDATA_CGROUP_TCP_V4_CONN_CONTEXT "cgroup.net_conn_ipv4" #define NETDATA_CGROUP_TCP_V6_CONN_CONTEXT "cgroup.net_conn_ipv6" -#define NETDATA_CGROUP_SOCKET_BYTES_RECV_CONTEXT "cgroup.net_bytes_recv" -#define NETDATA_CGROUP_SOCKET_BYTES_SEND_CONTEXT "cgroup.net_bytes_send" +#define NETDATA_CGROUP_SOCKET_TCP_BANDWIDTH_CONTEXT "cgroup.net_total_bandwidth" #define NETDATA_CGROUP_SOCKET_TCP_RECV_CONTEXT "cgroup.net_tcp_recv" #define NETDATA_CGROUP_SOCKET_TCP_SEND_CONTEXT "cgroup.net_tcp_send" #define NETDATA_CGROUP_SOCKET_TCP_RETRANSMIT_CONTEXT "cgroup.net_retransmit" #define NETDATA_CGROUP_SOCKET_UDP_RECV_CONTEXT "cgroup.net_udp_recv" #define NETDATA_CGROUP_SOCKET_UDP_SEND_CONTEXT "cgroup.net_udp_send" -#define NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT "systemd.services.net_conn_ipv4" -#define NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT "systemd.services.net_conn_ipv6" -#define NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT "systemd.services.net_bytes_recv" -#define NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT "systemd.services.net_bytes_send" -#define NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT "systemd.services.net_tcp_recv" -#define NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT "systemd.services.net_tcp_send" -#define NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT "systemd.services.net_retransmit" -#define NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT "systemd.services.net_udp_recv" -#define NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT "systemd.services.net_udp_send" +#define NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT "systemd.service.net_conn_ipv4" +#define NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT "systemd.service.net_conn_ipv6" +#define NETDATA_SERVICES_SOCKET_TCP_BANDWIDTH_CONTEXT "systemd.service.net_total_bandwidth" +#define NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT "systemd.service.net_tcp_recv" +#define NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT "systemd.service.net_tcp_send" +#define NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT "systemd.service.net_retransmit" +#define NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT "systemd.service.net_udp_recv" +#define NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT "systemd.service.net_udp_send" // ARAL name #define NETDATA_EBPF_SOCKET_ARAL_NAME "ebpf_socket" #define NETDATA_EBPF_PID_SOCKET_ARAL_TABLE_NAME "ebpf_pid_socket" #define NETDATA_EBPF_SOCKET_ARAL_TABLE_NAME "ebpf_socket_tbl" -typedef struct ebpf_socket_publish_apps { +typedef struct __attribute__((packed)) ebpf_socket_publish_apps { // Data read uint64_t bytes_sent; // Bytes sent uint64_t bytes_received; // Bytes received @@ -345,8 +342,7 @@ void ebpf_parse_service_name_section(struct config *cfg); void ebpf_parse_ips_unsafe(char *ptr); void ebpf_parse_ports(char *ptr); void ebpf_socket_read_open_connections(BUFFER *buf, struct ebpf_module *em); -void ebpf_socket_fill_publish_apps(uint32_t current_pid, netdata_socket_t *ns); - +void ebpf_socket_fill_publish_apps(ebpf_socket_publish_apps_t *curr, netdata_socket_t *ns); extern struct config socket_config; extern netdata_ebpf_targets_t socket_targets[]; diff --git a/src/collectors/ebpf.plugin/ebpf_swap.c b/src/collectors/ebpf.plugin/ebpf_swap.c index 1e2a7cc6..93335317 100644 --- a/src/collectors/ebpf.plugin/ebpf_swap.c +++ b/src/collectors/ebpf.plugin/ebpf_swap.c @@ -10,7 +10,7 @@ static netdata_publish_syscall_t swap_publish_aggregated[NETDATA_SWAP_END]; static netdata_idx_t swap_hash_values[NETDATA_SWAP_END]; static netdata_idx_t *swap_values = NULL; -netdata_publish_swap_t *swap_vector = NULL; +netdata_ebpf_swap_t *swap_vector = NULL; struct config swap_config = { .first_section = NULL, .last_section = NULL, @@ -274,9 +274,9 @@ static void ebpf_obsolete_specific_swap_charts(char *type, int update_every); */ static void ebpf_obsolete_swap_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_MEM_SWAP_READ_CHART, + "", "Calls to function swap_readpage.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_SYSTEM_SWAP_SUBMENU, @@ -285,9 +285,9 @@ static void ebpf_obsolete_swap_services(ebpf_module_t *em, char *id) 20191, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_MEM_SWAP_WRITE_CHART, + "", "Calls to function swap_writepage.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_SYSTEM_SWAP_SUBMENU, @@ -391,8 +391,13 @@ static void ebpf_obsolete_swap_global(ebpf_module_t *em) */ static void ebpf_swap_exit(void *ptr) { + pids_fd[EBPF_PIDS_SWAP_IDX] = -1; ebpf_module_t *em = (ebpf_module_t *)ptr; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_SWAP_IDX); + pthread_mutex_unlock(&lock); + if (ebpf_read_swap.thread) nd_thread_signal_cancel(ebpf_read_swap.thread); @@ -447,14 +452,21 @@ static void ebpf_swap_exit(void *ptr) * @param out the vector with read values. * @param maps_per_core do I need to read all cores? */ -static void swap_apps_accumulator(netdata_publish_swap_t *out, int maps_per_core) +static void swap_apps_accumulator(netdata_ebpf_swap_t *out, int maps_per_core) { int i, end = (maps_per_core) ? ebpf_nprocs : 1; - netdata_publish_swap_t *total = &out[0]; + netdata_ebpf_swap_t *total = &out[0]; + uint64_t ct = total->ct; for (i = 1; i < end; i++) { - netdata_publish_swap_t *w = &out[i]; + netdata_ebpf_swap_t *w = &out[i]; total->write += w->write; total->read += w->read; + + if (w->ct > ct) + ct = w->ct; + + if (!total->name[0] && w->name[0]) + strncpyz(total->name, w->name, sizeof(total->name) - 1); } } @@ -472,12 +484,11 @@ static void ebpf_update_swap_cgroup() for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; netdata_publish_swap_t *out = &pids->swap; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_swap_t *in = &local_pid->swap; - - memcpy(out, in, sizeof(netdata_publish_swap_t)); - } + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_SWAP_IDX); + netdata_publish_swap_t *in = local_pid->swap; + if (!in) + continue; + memcpy(out, in, sizeof(netdata_publish_swap_t)); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -496,15 +507,15 @@ static void ebpf_swap_sum_pids(netdata_publish_swap_t *swap, struct ebpf_pid_on_ uint64_t local_read = 0; uint64_t local_write = 0; - while (root) { + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_swap_t *w = &local_pid->swap; - local_write += w->write; - local_read += w->read; - } - root = root->next; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_SWAP_IDX); + netdata_publish_swap_t *w = local_pid->swap; + if (!w) + continue; + + local_write += w->write; + local_read += w->read; } // These conditions were added, because we are using incremental algorithm @@ -532,12 +543,13 @@ void ebpf_swap_resume_apps_data() { * Read the apps table and store data inside the structure. * * @param maps_per_core do I need to read all cores? + * @param max_period limit of iterations without updates before remove data from hash table */ -static void ebpf_read_swap_apps_table(int maps_per_core, int max_period) +static void ebpf_read_swap_apps_table(int maps_per_core, uint32_t max_period) { - netdata_publish_swap_t *cv = swap_vector; + netdata_ebpf_swap_t *cv = swap_vector; int fd = swap_maps[NETDATA_PID_SWAP_TABLE].map_fd; - size_t length = sizeof(netdata_publish_swap_t); + size_t length = sizeof(netdata_ebpf_swap_t); if (maps_per_core) length *= ebpf_nprocs; @@ -549,17 +561,22 @@ static void ebpf_read_swap_apps_table(int maps_per_core, int max_period) swap_apps_accumulator(cv, maps_per_core); - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(key, cv->tgid); - if (!local_pid) - goto end_swap_loop; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(key, cv->tgid, cv->name, EBPF_PIDS_SWAP_IDX); + netdata_publish_swap_t *publish = local_pid->swap; + if (!publish) + local_pid->swap = publish = ebpf_swap_allocate_publish_swap(); - netdata_publish_swap_t *publish = &local_pid->swap; if (!publish->ct || publish->ct != cv->ct) { memcpy(publish, cv, sizeof(netdata_publish_swap_t)); local_pid->not_updated = 0; - } else if (++local_pid->not_updated >= max_period) { - bpf_map_delete_elem(fd, &key); - local_pid->not_updated = 0; + } else { + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(local_pid); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(local_pid, fd, key, EBPF_PIDS_SWAP_IDX); + ebpf_swap_release_publish(publish); + local_pid->swap = NULL; + } } // We are cleaning to avoid passing data read from one process to other. @@ -587,13 +604,17 @@ void *ebpf_read_swap_thread(void *ptr) int maps_per_core = em->maps_per_core; int update_every = em->update_every; + int collect_pid = (em->apps_charts || em->cgroup_charts); + if (!collect_pid) + return NULL; int counter = update_every - 1; uint32_t lifetime = em->lifetime; uint32_t running_time = 0; usec_t period = update_every * USEC_PER_SEC; - int max_period = update_every * EBPF_CLEANUP_FACTOR; + uint32_t max_period = EBPF_CLEANUP_FACTOR; + pids_fd[EBPF_PIDS_SWAP_IDX] = swap_maps[NETDATA_PID_SWAP_TABLE].map_fd; while (!ebpf_plugin_stop() && running_time < lifetime) { (void)heartbeat_next(&hb, period); @@ -722,11 +743,11 @@ static void ebpf_send_systemd_swap_charts() continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_MEM_SWAP_READ_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_MEM_SWAP_READ_CHART, ""); write_chart_dimension("calls", (long long) ect->publish_systemd_swap.read); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_MEM_SWAP_WRITE_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_MEM_SWAP_WRITE_CHART, ""); write_chart_dimension("calls", (long long) ect->publish_systemd_swap.write); ebpf_write_end_chart(); } @@ -1017,7 +1038,7 @@ void ebpf_swap_create_apps_charts(struct ebpf_module *em, void *ptr) */ static void ebpf_swap_allocate_global_vectors() { - swap_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_publish_swap_t)); + swap_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_ebpf_swap_t)); swap_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); diff --git a/src/collectors/ebpf.plugin/ebpf_swap.h b/src/collectors/ebpf.plugin/ebpf_swap.h index 92aecd29..478b47ad 100644 --- a/src/collectors/ebpf.plugin/ebpf_swap.h +++ b/src/collectors/ebpf.plugin/ebpf_swap.h @@ -21,19 +21,26 @@ // Contexts #define NETDATA_CGROUP_SWAP_READ_CONTEXT "cgroup.swap_read" #define NETDATA_CGROUP_SWAP_WRITE_CONTEXT "cgroup.swap_write" -#define NETDATA_SYSTEMD_SWAP_READ_CONTEXT "systemd.services.swap_read" -#define NETDATA_SYSTEMD_SWAP_WRITE_CONTEXT "systemd.services.swap_write" +#define NETDATA_SYSTEMD_SWAP_READ_CONTEXT "systemd.service.swap_read" +#define NETDATA_SYSTEMD_SWAP_WRITE_CONTEXT "systemd.service.swap_write" -typedef struct netdata_publish_swap { +typedef struct __attribute__((packed)) netdata_publish_swap { + uint64_t ct; + + uint32_t read; + uint32_t write; +} netdata_publish_swap_t; + +typedef struct netdata_ebpf_swap { uint64_t ct; uint32_t tgid; uint32_t uid; uint32_t gid; char name[TASK_COMM_LEN]; - uint64_t read; - uint64_t write; -} netdata_publish_swap_t; + uint32_t read; + uint32_t write; +} netdata_ebpf_swap_t; enum swap_tables { NETDATA_PID_SWAP_TABLE, diff --git a/src/collectors/ebpf.plugin/ebpf_vfs.c b/src/collectors/ebpf.plugin/ebpf_vfs.c index eea27192..cf1f50e9 100644 --- a/src/collectors/ebpf.plugin/ebpf_vfs.c +++ b/src/collectors/ebpf.plugin/ebpf_vfs.c @@ -11,7 +11,7 @@ static char *vfs_id_names[NETDATA_KEY_PUBLISH_VFS_END] = { "vfs_unlink", "vfs_re static netdata_idx_t *vfs_hash_values = NULL; static netdata_syscall_stat_t vfs_aggregated_data[NETDATA_KEY_PUBLISH_VFS_END]; static netdata_publish_syscall_t vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_END]; -netdata_publish_vfs_t *vfs_vector = NULL; +netdata_ebpf_vfs_t *vfs_vector = NULL; static ebpf_local_maps_t vfs_maps[] = {{.name = "tbl_vfs_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, .user_input = 0, .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, @@ -396,9 +396,9 @@ static void ebpf_obsolete_specific_vfs_charts(char *type, ebpf_module_t *em); */ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_FILE_DELETED, + "", "Files deleted", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -407,9 +407,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) 20065, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, + "", "Write to disk", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -419,9 +419,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, + "", "Fails to write", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -431,9 +431,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); } - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, + "", "Read from disk", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -443,9 +443,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, + "", "Fails to read", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -455,9 +455,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); } - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, + "", "Bytes written on disk", EBPF_COMMON_UNITS_BYTES, NETDATA_VFS_GROUP, @@ -466,9 +466,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) 20070, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, + "", "Bytes read from disk", EBPF_COMMON_UNITS_BYTES, NETDATA_VFS_GROUP, @@ -477,9 +477,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) 20071, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_FSYNC, + "", "Calls to vfs_fsync.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -489,9 +489,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, + "", "Sync error", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -501,9 +501,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); } - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_OPEN, + "", "Calls to vfs_open.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -513,9 +513,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, + "", "Open error", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -525,9 +525,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); } - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_CREATE, + "", "Calls to vfs_create.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -537,9 +537,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, + "", "Create error", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -881,6 +881,10 @@ static void ebpf_vfs_exit(void *pptr) ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_VFS_IDX); + pthread_mutex_unlock(&lock); + if (ebpf_read_vfs.thread) nd_thread_signal_cancel(ebpf_read_vfs.thread); @@ -1029,6 +1033,74 @@ static void ebpf_vfs_read_global_table(netdata_idx_t *stats, int maps_per_core) } /** + * Set VFS + * + * Set vfs structure with values from ebpf structure. + * + * @param vfs the output structure. + * @param w the input data. + */ +static inline void vfs_aggregate_set_vfs(netdata_publish_vfs_t *vfs, netdata_ebpf_vfs_t *w) +{ + vfs->write_call = w->write_call; + vfs->writev_call = w->writev_call; + vfs->read_call = w->read_call; + vfs->readv_call = w->readv_call; + vfs->unlink_call = w->unlink_call; + vfs->fsync_call = w->fsync_call; + vfs->open_call = w->open_call; + vfs->create_call = w->create_call; + + vfs->write_bytes = w->write_bytes; + vfs->writev_bytes = w->writev_bytes; + vfs->read_bytes = w->read_bytes; + vfs->readv_bytes = w->readv_bytes; + + vfs->write_err = w->write_err; + vfs->writev_err = w->writev_err; + vfs->read_err = w->read_err; + vfs->readv_err = w->readv_err; + vfs->unlink_err = w->unlink_err; + vfs->fsync_err = w->fsync_err; + vfs->open_err = w->open_err; + vfs->create_err = w->create_err; +} + +/** + * Aggregate Publish VFS + * + * Aggregate data from w source. + * + * @param vfs the output structure. + * @param w the input data. + */ +static inline void vfs_aggregate_publish_vfs(netdata_publish_vfs_t *vfs, netdata_publish_vfs_t *w) +{ + vfs->write_call += w->write_call; + vfs->writev_call += w->writev_call; + vfs->read_call += w->read_call; + vfs->readv_call += w->readv_call; + vfs->unlink_call += w->unlink_call; + vfs->fsync_call += w->fsync_call; + vfs->open_call += w->open_call; + vfs->create_call += w->create_call; + + vfs->write_bytes += w->write_bytes; + vfs->writev_bytes += w->writev_bytes; + vfs->read_bytes += w->read_bytes; + vfs->readv_bytes += w->readv_bytes; + + vfs->write_err += w->write_err; + vfs->writev_err += w->writev_err; + vfs->read_err += w->read_err; + vfs->readv_err += w->readv_err; + vfs->unlink_err += w->unlink_err; + vfs->fsync_err += w->fsync_err; + vfs->open_err += w->open_err; + vfs->create_err += w->create_err; +} + +/** * Sum PIDs * * Sum values for all targets. @@ -1038,63 +1110,17 @@ static void ebpf_vfs_read_global_table(netdata_idx_t *stats, int maps_per_core) */ static void ebpf_vfs_sum_pids(netdata_publish_vfs_t *vfs, struct ebpf_pid_on_target *root) { - netdata_publish_vfs_t accumulator; - memset(&accumulator, 0, sizeof(accumulator)); + memset(vfs, 0, sizeof(netdata_publish_vfs_t)); - while (root) { + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_vfs_t *w = &local_pid->vfs; - accumulator.write_call += w->write_call; - accumulator.writev_call += w->writev_call; - accumulator.read_call += w->read_call; - accumulator.readv_call += w->readv_call; - accumulator.unlink_call += w->unlink_call; - accumulator.fsync_call += w->fsync_call; - accumulator.open_call += w->open_call; - accumulator.create_call += w->create_call; - - accumulator.write_bytes += w->write_bytes; - accumulator.writev_bytes += w->writev_bytes; - accumulator.read_bytes += w->read_bytes; - accumulator.readv_bytes += w->readv_bytes; - - accumulator.write_err += w->write_err; - accumulator.writev_err += w->writev_err; - accumulator.read_err += w->read_err; - accumulator.readv_err += w->readv_err; - accumulator.unlink_err += w->unlink_err; - accumulator.fsync_err += w->fsync_err; - accumulator.open_err += w->open_err; - accumulator.create_err += w->create_err; - } - root = root->next; - } - - // These conditions were added, because we are using incremental algorithm - vfs->write_call = (accumulator.write_call >= vfs->write_call) ? accumulator.write_call : vfs->write_call; - vfs->writev_call = (accumulator.writev_call >= vfs->writev_call) ? accumulator.writev_call : vfs->writev_call; - vfs->read_call = (accumulator.read_call >= vfs->read_call) ? accumulator.read_call : vfs->read_call; - vfs->readv_call = (accumulator.readv_call >= vfs->readv_call) ? accumulator.readv_call : vfs->readv_call; - vfs->unlink_call = (accumulator.unlink_call >= vfs->unlink_call) ? accumulator.unlink_call : vfs->unlink_call; - vfs->fsync_call = (accumulator.fsync_call >= vfs->fsync_call) ? accumulator.fsync_call : vfs->fsync_call; - vfs->open_call = (accumulator.open_call >= vfs->open_call) ? accumulator.open_call : vfs->open_call; - vfs->create_call = (accumulator.create_call >= vfs->create_call) ? accumulator.create_call : vfs->create_call; - - vfs->write_bytes = (accumulator.write_bytes >= vfs->write_bytes) ? accumulator.write_bytes : vfs->write_bytes; - vfs->writev_bytes = (accumulator.writev_bytes >= vfs->writev_bytes) ? accumulator.writev_bytes : vfs->writev_bytes; - vfs->read_bytes = (accumulator.read_bytes >= vfs->read_bytes) ? accumulator.read_bytes : vfs->read_bytes; - vfs->readv_bytes = (accumulator.readv_bytes >= vfs->readv_bytes) ? accumulator.readv_bytes : vfs->readv_bytes; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_VFS_IDX); + netdata_publish_vfs_t *w = local_pid->vfs; + if (!w) + continue; - vfs->write_err = (accumulator.write_err >= vfs->write_err) ? accumulator.write_err : vfs->write_err; - vfs->writev_err = (accumulator.writev_err >= vfs->writev_err) ? accumulator.writev_err : vfs->writev_err; - vfs->read_err = (accumulator.read_err >= vfs->read_err) ? accumulator.read_err : vfs->read_err; - vfs->readv_err = (accumulator.readv_err >= vfs->readv_err) ? accumulator.readv_err : vfs->readv_err; - vfs->unlink_err = (accumulator.unlink_err >= vfs->unlink_err) ? accumulator.unlink_err : vfs->unlink_err; - vfs->fsync_err = (accumulator.fsync_err >= vfs->fsync_err) ? accumulator.fsync_err : vfs->fsync_err; - vfs->open_err = (accumulator.open_err >= vfs->open_err) ? accumulator.open_err : vfs->open_err; - vfs->create_err = (accumulator.create_err >= vfs->create_err) ? accumulator.create_err : vfs->create_err; + vfs_aggregate_publish_vfs(vfs, w); + } } /** @@ -1183,12 +1209,13 @@ void ebpf_vfs_send_apps_data(ebpf_module_t *em, struct ebpf_target *root) * * @param out the vector with read values. */ -static void vfs_apps_accumulator(netdata_publish_vfs_t *out, int maps_per_core) +static void vfs_apps_accumulator(netdata_ebpf_vfs_t *out, int maps_per_core) { int i, end = (maps_per_core) ? ebpf_nprocs : 1; - netdata_publish_vfs_t *total = &out[0]; + netdata_ebpf_vfs_t *total = &out[0]; + uint64_t ct = total->ct; for (i = 1; i < end; i++) { - netdata_publish_vfs_t *w = &out[i]; + netdata_ebpf_vfs_t *w = &out[i]; total->write_call += w->write_call; total->writev_call += w->writev_call; @@ -1206,17 +1233,23 @@ static void vfs_apps_accumulator(netdata_publish_vfs_t *out, int maps_per_core) total->read_err += w->read_err; total->readv_err += w->readv_err; total->unlink_err += w->unlink_err; + + if (w->ct > ct) + ct = w->ct; + + if (!total->name[0] && w->name[0]) + strncpyz(total->name, w->name, sizeof(total->name) - 1); } } /** * Read the hash table and store data to allocated vectors. */ -static void ebpf_vfs_read_apps(int maps_per_core, int max_period) +static void ebpf_vfs_read_apps(int maps_per_core, uint32_t max_period) { - netdata_publish_vfs_t *vv = vfs_vector; + netdata_ebpf_vfs_t *vv = vfs_vector; int fd = vfs_maps[NETDATA_VFS_PID].map_fd; - size_t length = sizeof(netdata_publish_vfs_t); + size_t length = sizeof(netdata_ebpf_vfs_t); if (maps_per_core) length *= ebpf_nprocs; @@ -1228,17 +1261,22 @@ static void ebpf_vfs_read_apps(int maps_per_core, int max_period) vfs_apps_accumulator(vv, maps_per_core); - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(key, vv->tgid); - if (!local_pid) - goto end_vfs_loop; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(key, vv->tgid, vv->name, EBPF_PIDS_VFS_IDX); + netdata_publish_vfs_t *publish = local_pid->vfs; + if (!publish) + local_pid->vfs = publish = ebpf_vfs_allocate_publish(); - netdata_publish_vfs_t *publish = &local_pid->vfs; if (!publish->ct || publish->ct != vv->ct) { - memcpy(publish, vv, sizeof(netdata_publish_vfs_t)); + vfs_aggregate_set_vfs(publish, vv); local_pid->not_updated = 0; } else if (++local_pid->not_updated >= max_period){ - bpf_map_delete_elem(fd, &key); - local_pid->not_updated = 0; + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(local_pid); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(local_pid, fd, key, EBPF_PIDS_VFS_IDX); + ebpf_vfs_release_publish(publish); + local_pid->vfs = NULL; + } } end_vfs_loop: @@ -1264,12 +1302,14 @@ static void read_update_vfs_cgroup() for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; netdata_publish_vfs_t *out = &pids->vfs; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_vfs_t *in = &local_pid->vfs; + memset(out, 0, sizeof(netdata_publish_vfs_t)); - memcpy(out, in, sizeof(netdata_publish_vfs_t)); - } + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_VFS_IDX); + netdata_publish_vfs_t *in = local_pid->vfs; + if (!in) + continue; + + vfs_aggregate_publish_vfs(out, in); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -1284,7 +1324,7 @@ static void read_update_vfs_cgroup() * @param pids input data */ static void ebpf_vfs_sum_cgroup_pids(netdata_publish_vfs_t *vfs, struct pid_on_target2 *pids) - { +{ netdata_publish_vfs_t accumulator; memset(&accumulator, 0, sizeof(accumulator)); @@ -1888,70 +1928,70 @@ static void ebpf_send_systemd_vfs_charts(ebpf_module_t *em) continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_FILE_DELETED); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_FILE_DELETED, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.unlink_call); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.write_call + ect->publish_systemd_vfs.writev_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.write_err + ect->publish_systemd_vfs.writev_err); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_READ_CALLS); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.read_call + ect->publish_systemd_vfs.readv_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.read_err + ect->publish_systemd_vfs.readv_err); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, ""); write_chart_dimension("bytes", ect->publish_systemd_vfs.write_bytes + ect->publish_systemd_vfs.writev_bytes); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_READ_BYTES); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, ""); write_chart_dimension("bytes", ect->publish_systemd_vfs.read_bytes + ect->publish_systemd_vfs.readv_bytes); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_FSYNC); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_FSYNC, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.fsync_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.fsync_err); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_OPEN); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_OPEN, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.open_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.open_err); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_CREATE); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_CREATE, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.create_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.create_err); ebpf_write_end_chart(); } @@ -2031,13 +2071,17 @@ void *ebpf_read_vfs_thread(void *ptr) int maps_per_core = em->maps_per_core; int update_every = em->update_every; + int collect_pid = (em->apps_charts || em->cgroup_charts); + if (!collect_pid) + return NULL; int counter = update_every - 1; uint32_t lifetime = em->lifetime; uint32_t running_time = 0; usec_t period = update_every * USEC_PER_SEC; - int max_period = update_every * EBPF_CLEANUP_FACTOR; + uint32_t max_period = EBPF_CLEANUP_FACTOR; + pids_fd[EBPF_PIDS_VFS_IDX] = vfs_maps[NETDATA_VFS_PID].map_fd; while (!ebpf_plugin_stop() && running_time < lifetime) { (void)heartbeat_next(&hb, period); if (ebpf_plugin_stop() || ++counter != update_every) @@ -2527,7 +2571,7 @@ void ebpf_vfs_create_apps_charts(struct ebpf_module *em, void *ptr) */ static void ebpf_vfs_allocate_global_vectors() { - vfs_vector = callocz(ebpf_nprocs, sizeof(netdata_publish_vfs_t)); + vfs_vector = callocz(ebpf_nprocs, sizeof(netdata_ebpf_vfs_t)); memset(vfs_aggregated_data, 0, sizeof(vfs_aggregated_data)); memset(vfs_publish_aggregated, 0, sizeof(vfs_publish_aggregated)); @@ -2586,6 +2630,7 @@ static int ebpf_vfs_load_bpf(ebpf_module_t *em) */ void *ebpf_vfs_thread(void *ptr) { + pids_fd[EBPF_PIDS_VFS_IDX] = -1; ebpf_module_t *em = (ebpf_module_t *)ptr; CLEANUP_FUNCTION_REGISTER(ebpf_vfs_exit) cleanup_ptr = em; @@ -2618,7 +2663,8 @@ void *ebpf_vfs_thread(void *ptr) pthread_mutex_unlock(&lock); - ebpf_read_vfs.thread = nd_thread_create(ebpf_read_vfs.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_read_vfs_thread, em); + ebpf_read_vfs.thread = nd_thread_create(ebpf_read_vfs.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_read_vfs_thread, em); vfs_collector(em); diff --git a/src/collectors/ebpf.plugin/ebpf_vfs.h b/src/collectors/ebpf.plugin/ebpf_vfs.h index 398e2831..7458cd85 100644 --- a/src/collectors/ebpf.plugin/ebpf_vfs.h +++ b/src/collectors/ebpf.plugin/ebpf_vfs.h @@ -55,19 +55,19 @@ #define NETDATA_CGROUP_VFS_FSYNC_CONTEXT "cgroup.vfs_fsync" #define NETDATA_CGROUP_VFS_FSYNC_ERROR_CONTEXT "cgroup.vfs_fsync_error" -#define NETDATA_SYSTEMD_VFS_UNLINK_CONTEXT "systemd.services.vfs_unlink" -#define NETDATA_SYSTEMD_VFS_WRITE_CONTEXT "systemd.services.vfs_write" -#define NETDATA_SYSTEMD_VFS_WRITE_ERROR_CONTEXT "systemd.services.vfs_write_error" -#define NETDATA_SYSTEMD_VFS_READ_CONTEXT "systemd.services.vfs_read" -#define NETDATA_SYSTEMD_VFS_READ_ERROR_CONTEXT "systemd.services.vfs_read_error" -#define NETDATA_SYSTEMD_VFS_WRITE_BYTES_CONTEXT "systemd.services.vfs_write_bytes" -#define NETDATA_SYSTEMD_VFS_READ_BYTES_CONTEXT "systemd.services.vfs_read_bytes" -#define NETDATA_SYSTEMD_VFS_CREATE_CONTEXT "systemd.services.vfs_create" -#define NETDATA_SYSTEMD_VFS_CREATE_ERROR_CONTEXT "systemd.services.vfs_create_error" -#define NETDATA_SYSTEMD_VFS_OPEN_CONTEXT "systemd.services.vfs_open" -#define NETDATA_SYSTEMD_VFS_OPEN_ERROR_CONTEXT "systemd.services.vfs_open_error" -#define NETDATA_SYSTEMD_VFS_FSYNC_CONTEXT "systemd.services.vfs_fsync" -#define NETDATA_SYSTEMD_VFS_FSYNC_ERROR_CONTEXT "systemd.services.vfs_fsync_error" +#define NETDATA_SYSTEMD_VFS_UNLINK_CONTEXT "systemd.service.vfs_unlink" +#define NETDATA_SYSTEMD_VFS_WRITE_CONTEXT "systemd.service.vfs_write" +#define NETDATA_SYSTEMD_VFS_WRITE_ERROR_CONTEXT "systemd.service.vfs_write_error" +#define NETDATA_SYSTEMD_VFS_READ_CONTEXT "systemd.service.vfs_read" +#define NETDATA_SYSTEMD_VFS_READ_ERROR_CONTEXT "systemd.service.vfs_read_error" +#define NETDATA_SYSTEMD_VFS_WRITE_BYTES_CONTEXT "systemd.service.vfs_write_bytes" +#define NETDATA_SYSTEMD_VFS_READ_BYTES_CONTEXT "systemd.service.vfs_read_bytes" +#define NETDATA_SYSTEMD_VFS_CREATE_CONTEXT "systemd.service.vfs_create" +#define NETDATA_SYSTEMD_VFS_CREATE_ERROR_CONTEXT "systemd.service.vfs_create_error" +#define NETDATA_SYSTEMD_VFS_OPEN_CONTEXT "systemd.service.vfs_open" +#define NETDATA_SYSTEMD_VFS_OPEN_ERROR_CONTEXT "systemd.service.vfs_open_error" +#define NETDATA_SYSTEMD_VFS_FSYNC_CONTEXT "systemd.service.vfs_fsync" +#define NETDATA_SYSTEMD_VFS_FSYNC_ERROR_CONTEXT "systemd.service.vfs_fsync_error" // ARAL name #define NETDATA_EBPF_VFS_ARAL_NAME "ebpf_vfs" @@ -75,7 +75,38 @@ // dimension #define EBPF_COMMON_UNITS_BYTES "bytes/s" -typedef struct netdata_publish_vfs { +typedef struct __attribute__((packed)) netdata_publish_vfs { + uint64_t ct; + + //Counter + uint32_t write_call; + uint32_t writev_call; + uint32_t read_call; + uint32_t readv_call; + uint32_t unlink_call; + uint32_t fsync_call; + uint32_t open_call; + uint32_t create_call; + + //Accumulator + uint64_t write_bytes; + uint64_t writev_bytes; + uint64_t readv_bytes; + uint64_t read_bytes; + + //Counter + uint32_t write_err; + uint32_t writev_err; + uint32_t read_err; + uint32_t readv_err; + uint32_t unlink_err; + uint32_t fsync_err; + uint32_t open_err; + uint32_t create_err; + +} netdata_publish_vfs_t; + +typedef struct netdata_ebpf_vfs { uint64_t ct; uint32_t tgid; uint32_t uid; @@ -107,7 +138,7 @@ typedef struct netdata_publish_vfs { uint32_t fsync_err; uint32_t open_err; uint32_t create_err; -} netdata_publish_vfs_t; +} netdata_ebpf_vfs_t; enum netdata_publish_vfs_list { NETDATA_KEY_PUBLISH_VFS_UNLINK, diff --git a/src/collectors/ebpf.plugin/integrations/ebpf_process.md b/src/collectors/ebpf.plugin/integrations/ebpf_process.md index d6da0903..817d9169 100644 --- a/src/collectors/ebpf.plugin/integrations/ebpf_process.md +++ b/src/collectors/ebpf.plugin/integrations/ebpf_process.md @@ -68,6 +68,7 @@ Metrics: | netdata.ebpf_aral_stat_size | memory | bytes | | netdata.ebpf_aral_stat_alloc | aral | calls | | netdata.ebpf_threads | total, running | threads | +| netdata.ebpf_pids | user, kernel | pids | | netdata.ebpf_load_methods | legacy, co-re | methods | | netdata.ebpf_kernel_memory | memory_locked | bytes | | netdata.ebpf_hash_tables_count | hash_table | hash tables | diff --git a/src/collectors/ebpf.plugin/integrations/ebpf_socket.md b/src/collectors/ebpf.plugin/integrations/ebpf_socket.md index c5b61331..917dcaba 100644 --- a/src/collectors/ebpf.plugin/integrations/ebpf_socket.md +++ b/src/collectors/ebpf.plugin/integrations/ebpf_socket.md @@ -92,8 +92,7 @@ Metrics: |:------|:----------|:----| | app.ebpf_call_tcp_v4_connection | connections | connections/s | | app.ebpf_call_tcp_v6_connection | connections | connections/s | -| app.ebpf_sock_bytes_sent | bandwidth | kilobits/s | -| app.ebpf_sock_bytes_received | bandwidth | kilobits/s | +| app.ebpf_sock_total_bandwidth | received, sent | kilobits/s | | app.ebpf_call_tcp_sendmsg | calls | calls/s | | app.ebpf_call_tcp_cleanup_rbuf | calls | calls/s | | app.ebpf_call_tcp_retransmit | calls | calls/s | @@ -110,23 +109,22 @@ Metrics: | Metric | Dimensions | Unit | |:------|:----------|:----| -| cgroup.net_conn_ipv4 | connected_v4 | connections/s | -| cgroup.net_conn_ipv6 | connected_v6 | connections/s | -| cgroup.net_bytes_recv | received | calls/s | -| cgroup.net_bytes_sent | sent | calls/s | -| cgroup.net_tcp_recv | received | calls/s | -| cgroup.net_tcp_send | sent | calls/s | -| cgroup.net_retransmit | retransmitted | calls/s | -| cgroup.net_udp_send | sent | calls/s | -| cgroup.net_udp_recv | received | calls/s | -| services.net_conn_ipv6 | a dimension per systemd service | connections/s | -| services.net_bytes_recv | a dimension per systemd service | kilobits/s | -| services.net_bytes_sent | a dimension per systemd service | kilobits/s | -| services.net_tcp_recv | a dimension per systemd service | calls/s | -| services.net_tcp_send | a dimension per systemd service | calls/s | -| services.net_tcp_retransmit | a dimension per systemd service | calls/s | -| services.net_udp_send | a dimension per systemd service | calls/s | -| services.net_udp_recv | a dimension per systemd service | calls/s | +| cgroup.net_conn_ipv4 | connections | connections/s | +| cgroup.net_conn_ipv6 | connections | connections/s | +| cgroup.net_total_bandwidth | received, sent | kilobits/s | +| cgroup.net_tcp_recv | calls | calls/s | +| cgroup.net_tcp_send | calls | calls/s | +| cgroup.net_retransmit | calls | calls/s | +| cgroup.net_udp_send | calls | calls/s | +| cgroup.net_udp_recv | calls | calls/s | +| services.net_conn_ipv4 | connections | connections/s | +| services.net_conn_ipv6 | connections | connections/s | +| services.net_total_bandwidth | received, sent | kilobits/s | +| services.net_tcp_recv | calls | calls/s | +| services.net_tcp_send | calls | calls/s | +| services.net_tcp_retransmit | calls | calls/s | +| services.net_udp_send | calls | calls/s | +| services.net_udp_recv | calls | calls/s | diff --git a/src/collectors/ebpf.plugin/metadata.yaml b/src/collectors/ebpf.plugin/metadata.yaml index 4921e44f..861b0ba8 100644 --- a/src/collectors/ebpf.plugin/metadata.yaml +++ b/src/collectors/ebpf.plugin/metadata.yaml @@ -1739,18 +1739,13 @@ modules: chart_type: stacked dimensions: - name: connections - - name: app.ebpf_sock_bytes_sent + - name: app.ebpf_sock_total_bandwidth description: Bytes sent unit: "kilobits/s" chart_type: stacked dimensions: - - name: bandwidth - - name: app.ebpf_sock_bytes_received - description: bytes received - unit: "kilobits/s" - chart_type: stacked - dimensions: - - name: bandwidth + - name: received + - name: sent - name: app.ebpf_call_tcp_sendmsg description: Calls for tcp_sendmsg unit: "calls/s" @@ -1790,103 +1785,99 @@ modules: unit: "connections/s" chart_type: line dimensions: - - name: connected_v4 + - name: connections - name: cgroup.net_conn_ipv6 description: Calls to tcp_v6_connection unit: "connections/s" chart_type: line dimensions: - - name: connected_v6 - - name: cgroup.net_bytes_recv + - name: connections + - name: cgroup.net_total_bandwidth description: Bytes received - unit: "calls/s" + unit: "kilobits/s" chart_type: line dimensions: - name: received - - name: cgroup.net_bytes_sent - description: Bytes sent - unit: "calls/s" - chart_type: line - dimensions: - name: sent - name: cgroup.net_tcp_recv description: Calls to tcp_cleanup_rbuf. unit: "calls/s" chart_type: line dimensions: - - name: received + - name: calls - name: cgroup.net_tcp_send description: Calls to tcp_sendmsg. unit: "calls/s" chart_type: line dimensions: - - name: sent + - name: calls - name: cgroup.net_retransmit description: Calls to tcp_retransmit. unit: "calls/s" chart_type: line dimensions: - - name: retransmitted + - name: calls - name: cgroup.net_udp_send description: Calls to udp_sendmsg unit: "calls/s" chart_type: line dimensions: - - name: sent + - name: calls - name: cgroup.net_udp_recv description: Calls to udp_recvmsg unit: "calls/s" chart_type: line dimensions: - - name: received + - name: calls + - name: services.net_conn_ipv4 + description: Calls to tcp_v4_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: connections - name: services.net_conn_ipv6 description: Calls to tcp_v6_connection unit: "connections/s" chart_type: stacked dimensions: - - name: a dimension per systemd service - - name: services.net_bytes_recv + - name: connections + - name: services.net_total_bandwidth description: Bytes received unit: "kilobits/s" chart_type: stacked dimensions: - - name: a dimension per systemd service - - name: services.net_bytes_sent - description: Bytes sent - unit: "kilobits/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service + - name: received + - name: sent - name: services.net_tcp_recv description: Calls to tcp_cleanup_rbuf. unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per systemd service + - name: calls - name: services.net_tcp_send description: Calls to tcp_sendmsg. unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per systemd service + - name: calls - name: services.net_tcp_retransmit description: Calls to tcp_retransmit unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per systemd service + - name: calls - name: services.net_udp_send description: Calls to udp_sendmsg unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per systemd service + - name: calls - name: services.net_udp_recv description: Calls to udp_recvmsg unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per systemd service + - name: calls - meta: plugin_name: ebpf.plugin module_name: dcstat @@ -3263,6 +3254,13 @@ modules: dimensions: - name: total - name: running + - name: netdata.ebpf_pids + description: Total number of monitored PIDs + unit: "pids" + chart_type: line + dimensions: + - name: user + - name: kernel - name: netdata.ebpf_load_methods description: Load info unit: "methods" |