From ab1bb5b7f1c3c3a7b240ab7fc8661459ecd7decb Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 20 Jul 2023 06:49:55 +0200 Subject: Adding upstream version 1.41.0. Signed-off-by: Daniel Baumann --- collectors/ebpf.plugin/README.md | 61 +- collectors/ebpf.plugin/ebpf.c | 823 ++++++--- collectors/ebpf.plugin/ebpf.d.conf | 3 + collectors/ebpf.plugin/ebpf.d/cachestat.conf | 3 + collectors/ebpf.plugin/ebpf.d/dcstat.conf | 3 + collectors/ebpf.plugin/ebpf.d/disk.conf | 5 +- collectors/ebpf.plugin/ebpf.d/fd.conf | 3 + collectors/ebpf.plugin/ebpf.d/filesystem.conf | 5 +- collectors/ebpf.plugin/ebpf.d/functions.conf | 3 + collectors/ebpf.plugin/ebpf.d/hardirq.conf | 5 +- collectors/ebpf.plugin/ebpf.d/mdflush.conf | 6 +- collectors/ebpf.plugin/ebpf.d/mount.conf | 4 + collectors/ebpf.plugin/ebpf.d/network.conf | 3 + collectors/ebpf.plugin/ebpf.d/oomkill.conf | 6 +- collectors/ebpf.plugin/ebpf.d/process.conf | 3 + collectors/ebpf.plugin/ebpf.d/shm.conf | 3 + collectors/ebpf.plugin/ebpf.d/softirq.conf | 5 +- collectors/ebpf.plugin/ebpf.d/swap.conf | 3 + collectors/ebpf.plugin/ebpf.d/sync.conf | 3 + collectors/ebpf.plugin/ebpf.d/vfs.conf | 3 + collectors/ebpf.plugin/ebpf.h | 8 + collectors/ebpf.plugin/ebpf_apps.c | 82 +- collectors/ebpf.plugin/ebpf_apps.h | 1 + collectors/ebpf.plugin/ebpf_cachestat.c | 230 ++- collectors/ebpf.plugin/ebpf_cachestat.h | 3 +- collectors/ebpf.plugin/ebpf_cgroup.c | 8 +- collectors/ebpf.plugin/ebpf_dcstat.c | 217 ++- collectors/ebpf.plugin/ebpf_dcstat.h | 3 +- collectors/ebpf.plugin/ebpf_disk.c | 153 +- collectors/ebpf.plugin/ebpf_disk.h | 8 +- collectors/ebpf.plugin/ebpf_fd.c | 226 ++- collectors/ebpf.plugin/ebpf_fd.h | 3 +- collectors/ebpf.plugin/ebpf_filesystem.c | 387 +++- collectors/ebpf.plugin/ebpf_filesystem.h | 6 +- collectors/ebpf.plugin/ebpf_functions.c | 419 +++++ collectors/ebpf.plugin/ebpf_functions.h | 29 + collectors/ebpf.plugin/ebpf_hardirq.c | 125 +- collectors/ebpf.plugin/ebpf_hardirq.h | 8 + collectors/ebpf.plugin/ebpf_mdflush.c | 195 +- collectors/ebpf.plugin/ebpf_mdflush.h | 10 +- collectors/ebpf.plugin/ebpf_mount.c | 68 +- collectors/ebpf.plugin/ebpf_mount.h | 3 +- collectors/ebpf.plugin/ebpf_oomkill.c | 128 +- collectors/ebpf.plugin/ebpf_oomkill.h | 3 + collectors/ebpf.plugin/ebpf_process.c | 523 +++--- collectors/ebpf.plugin/ebpf_process.h | 16 +- collectors/ebpf.plugin/ebpf_shm.c | 201 ++- collectors/ebpf.plugin/ebpf_shm.h | 3 +- collectors/ebpf.plugin/ebpf_socket.c | 99 +- collectors/ebpf.plugin/ebpf_socket.h | 3 +- collectors/ebpf.plugin/ebpf_softirq.c | 53 +- collectors/ebpf.plugin/ebpf_softirq.h | 3 + collectors/ebpf.plugin/ebpf_swap.c | 151 +- collectors/ebpf.plugin/ebpf_swap.h | 3 +- collectors/ebpf.plugin/ebpf_sync.c | 114 +- collectors/ebpf.plugin/ebpf_sync.h | 3 +- collectors/ebpf.plugin/ebpf_vfs.c | 498 +++++- collectors/ebpf.plugin/ebpf_vfs.h | 3 +- collectors/ebpf.plugin/multi_metadata.yaml | 2360 +++++++++++++++++++++++++ 59 files changed, 6573 insertions(+), 736 deletions(-) create mode 100644 collectors/ebpf.plugin/ebpf.d/functions.conf create mode 100644 collectors/ebpf.plugin/ebpf_functions.c create mode 100644 collectors/ebpf.plugin/ebpf_functions.h create mode 100644 collectors/ebpf.plugin/multi_metadata.yaml (limited to 'collectors/ebpf.plugin') diff --git a/collectors/ebpf.plugin/README.md b/collectors/ebpf.plugin/README.md index 94bbc184d..fb036a5aa 100644 --- a/collectors/ebpf.plugin/README.md +++ b/collectors/ebpf.plugin/README.md @@ -235,13 +235,12 @@ Linux metrics: The eBPF collector enables and runs the following eBPF programs by default: +- `cachestat`: Netdata's eBPF data collector creates charts about the memory page cache. When the integration with + [`apps.plugin`](https://github.com/netdata/netdata/blob/master/collectors/apps.plugin/README.md) is enabled, this collector creates charts for the whole host _and_ + for each application. - `fd` : This eBPF program creates charts that show information about calls to open files. - `mount`: This eBPF program creates charts that show calls to syscalls mount(2) and umount(2). - `shm`: This eBPF program creates charts that show calls to syscalls shmget(2), shmat(2), shmdt(2) and shmctl(2). -- `sync`: Monitor calls to syscalls sync(2), fsync(2), fdatasync(2), syncfs(2), msync(2), and sync_file_range(2). -- `network viewer`: This eBPF program creates charts with information about `TCP` and `UDP` functions, including the - bandwidth consumed by each. -- `vfs`: This eBPF program creates charts that show information about VFS (Virtual File System) functions. - `process`: This eBPF program creates charts that show information about process life. When in `return` mode, it also creates charts showing errors when these operations are executed. - `hardirq`: This eBPF program creates charts that show information about time spent servicing individual hardware @@ -254,9 +253,6 @@ The eBPF collector enables and runs the following eBPF programs by default: You can also enable the following eBPF programs: -- `cachestat`: Netdata's eBPF data collector creates charts about the memory page cache. When the integration with - [`apps.plugin`](https://github.com/netdata/netdata/blob/master/collectors/apps.plugin/README.md) is enabled, this collector creates charts for the whole host _and_ - for each application. - `dcstat` : This eBPF program creates charts that show information about file access using directory cache. It appends `kprobes` for `lookup_fast()` and `d_lookup()` to identify if files are inside directory cache, outside and files are not found. @@ -264,7 +260,11 @@ You can also enable the following eBPF programs: - `filesystem` : This eBPF program creates charts that show information about some filesystem latency. - `swap` : This eBPF program creates charts that show information about swap access. - `mdflush`: This eBPF program creates charts that show information about +- `sync`: Monitor calls to syscalls sync(2), fsync(2), fdatasync(2), syncfs(2), msync(2), and sync_file_range(2). +- `network viewer`: This eBPF program creates charts with information about `TCP` and `UDP` functions, including the + bandwidth consumed by each. multi-device software flushes. +- `vfs`: This eBPF program creates charts that show information about VFS (Virtual File System) functions. ### Configuring eBPF threads @@ -989,3 +989,50 @@ shows how the lockdown module impacts `ebpf.plugin` based on the selected option If you or your distribution compiled the kernel with the last combination, your system cannot load shared libraries required to run `ebpf.plugin`. + +## Function + +The eBPF plugin has a [function](https://github.com/netdata/netdata/blob/master/docs/cloud/netdata-functions.md) named +`ebpf_thread` that controls its internal threads and helps to reduce the overhead on host. Using the function you +can run the plugin with all threads disabled and enable them only when you want to take a look in specific areas. + +### List threads + +To list all threads status you can query directly the endpoint function: + +`http://localhost:19999/api/v1/function?function=ebpf_thread` + +It is also possible to query a specific thread adding keyword `thread` and thread name: + +`http://localhost:19999/api/v1/function?function=ebpf_thread%20thread:mount` + +### Enable thread + +It is possible to enable a specific thread using the keyword `enable`: + +`http://localhost:19999/api/v1/function?function=ebpf_thread%20enable:mount` + +this will run thread `mount` during 300 seconds (5 minutes). You can specify a specific period by appending the period +after the thread name: + +`http://localhost:19999/api/v1/function?function=ebpf_thread%20enable:mount:600` + +in this example thread `mount` will run during 600 seconds (10 minutes). + +### Disable thread + +It is also possible to stop any thread running using the keyword `disable`. For example, to disable `cachestat` you can +request: + +`http://localhost:19999/api/v1/function?function=ebpf_thread%20disable:cachestat` + +### Debugging threads + +You can verify the impact of threads on the host by running the +[ebpf_thread_function.sh](https://github.com/netdata/netdata/blob/master/tests/ebpf/ebpf_thread_function.sh) +script on your environment. + +You can check the results of having threads running on your environment in the Netdata monitoring section on your +dashboard + +Threads running. diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c index ffab37de3..72aedba6a 100644 --- a/collectors/ebpf.plugin/ebpf.c +++ b/collectors/ebpf.plugin/ebpf.c @@ -30,6 +30,8 @@ int ebpf_nprocs; int isrh = 0; int main_thread_id = 0; int process_pid_fd = -1; +static size_t global_iterations_counter = 1; +bool publish_internal_metrics = true; pthread_mutex_t lock; pthread_mutex_t ebpf_exit_cleanup; @@ -47,7 +49,8 @@ struct netdata_static_thread cgroup_integration_thread = { }; ebpf_module_t ebpf_modules[] = { - { .thread_name = "process", .config_name = "process", .enabled = 0, .start_routine = ebpf_process_thread, + { .thread_name = "process", .config_name = "process", .thread_description = NETDATA_EBPF_MODULE_PROCESS_DESC, + .enabled = 0, .start_routine = ebpf_process_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_process_create_apps_charts, .maps = NULL, @@ -56,8 +59,9 @@ ebpf_module_t ebpf_modules[] = { .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "socket", .config_name = "socket", .enabled = 0, .start_routine = ebpf_socket_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0 }, + { .thread_name = "socket", .config_name = "socket", .thread_description = NETDATA_EBPF_SOCKET_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_socket_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_socket_create_apps_charts, .maps = NULL, @@ -65,8 +69,9 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_NETWORK_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = socket_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "cachestat", .config_name = "cachestat", .enabled = 0, .start_routine = ebpf_cachestat_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "cachestat", .config_name = "cachestat", .thread_description = NETDATA_EBPF_CACHESTAT_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_cachestat_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_cachestat_create_apps_charts, .maps = cachestat_maps, @@ -75,8 +80,9 @@ ebpf_module_t ebpf_modules[] = { .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18| NETDATA_V5_4 | NETDATA_V5_14 | NETDATA_V5_15 | NETDATA_V5_16, .load = EBPF_LOAD_LEGACY, .targets = cachestat_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "sync", .config_name = "sync", .enabled = 0, .start_routine = ebpf_sync_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "sync", .config_name = "sync", .thread_description = NETDATA_EBPF_SYNC_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_sync_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &sync_config, @@ -84,8 +90,9 @@ ebpf_module_t ebpf_modules[] = { // All syscalls have the same kernels .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = sync_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "dc", .config_name = "dc", .enabled = 0, .start_routine = ebpf_dcstat_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "dc", .config_name = "dc", .thread_description = NETDATA_EBPF_DC_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_dcstat_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_dcstat_create_apps_charts, .maps = dcstat_maps, @@ -93,8 +100,9 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = dc_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "swap", .config_name = "swap", .enabled = 0, .start_routine = ebpf_swap_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "swap", .config_name = "swap", .thread_description = NETDATA_EBPF_SWAP_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_swap_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_swap_create_apps_charts, .maps = NULL, @@ -102,8 +110,9 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_DIRECTORY_SWAP_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = swap_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "vfs", .config_name = "vfs", .enabled = 0, .start_routine = ebpf_vfs_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "vfs", .config_name = "vfs", .thread_description = NETDATA_EBPF_VFS_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_vfs_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_vfs_create_apps_charts, .maps = NULL, @@ -111,32 +120,36 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_DIRECTORY_VFS_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = vfs_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "filesystem", .config_name = "filesystem", .enabled = 0, .start_routine = ebpf_filesystem_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "filesystem", .config_name = "filesystem", .thread_description = NETDATA_EBPF_FS_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_filesystem_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fs_config, .config_file = NETDATA_FILESYSTEM_CONFIG_FILE, //We are setting kernels as zero, because we load eBPF programs according the kernel running. .kernels = 0, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES }, - { .thread_name = "disk", .config_name = "disk", .enabled = 0, .start_routine = ebpf_disk_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "disk", .config_name = "disk", .thread_description = NETDATA_EBPF_DISK_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_disk_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &disk_config, .config_file = NETDATA_DISK_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "mount", .config_name = "mount", .enabled = 0, .start_routine = ebpf_mount_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "mount", .config_name = "mount", .thread_description = NETDATA_EBPF_MOUNT_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_mount_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mount_config, .config_file = NETDATA_MOUNT_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = mount_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "fd", .config_name = "fd", .enabled = 0, .start_routine = ebpf_fd_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "fd", .config_name = "fd", .thread_description = NETDATA_EBPF_FD_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_fd_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_fd_create_apps_charts, .maps = NULL, @@ -145,24 +158,27 @@ ebpf_module_t ebpf_modules[] = { .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_11 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = fd_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "hardirq", .config_name = "hardirq", .enabled = 0, .start_routine = ebpf_hardirq_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "hardirq", .config_name = "hardirq", .thread_description = NETDATA_EBPF_HARDIRQ_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_hardirq_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &hardirq_config, .config_file = NETDATA_HARDIRQ_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "softirq", .config_name = "softirq", .enabled = 0, .start_routine = ebpf_softirq_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "softirq", .config_name = "softirq", .thread_description = NETDATA_EBPF_SOFTIRQ_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_softirq_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &softirq_config, .config_file = NETDATA_SOFTIRQ_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "oomkill", .config_name = "oomkill", .enabled = 0, .start_routine = ebpf_oomkill_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "oomkill", .config_name = "oomkill", .thread_description = NETDATA_EBPF_OOMKILL_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_oomkill_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_oomkill_create_apps_charts, .maps = NULL, @@ -170,8 +186,9 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_OOMKILL_CONFIG_FILE, .kernels = NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "shm", .config_name = "shm", .enabled = 0, .start_routine = ebpf_shm_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "shm", .config_name = "shm", .thread_description = NETDATA_EBPF_SHM_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_shm_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_shm_create_apps_charts, .maps = NULL, @@ -179,15 +196,25 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_DIRECTORY_SHM_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = shm_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "mdflush", .config_name = "mdflush", .enabled = 0, .start_routine = ebpf_mdflush_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "mdflush", .config_name = "mdflush", .thread_description = NETDATA_EBPF_MD_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_mdflush_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mdflush_config, .config_file = NETDATA_DIRECTORY_MDFLUSH_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = mdflush_targets, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "functions", .config_name = "functions", .thread_description = NETDATA_EBPF_FUNCTIONS_MODULE_DESC, + .enabled = 1, .start_routine = ebpf_function_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = NULL, + .config_file = NETDATA_DIRECTORY_FUNCTIONS_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, { .thread_name = NULL, .enabled = 0, .start_routine = NULL, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 0, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, @@ -356,6 +383,20 @@ struct netdata_static_thread ebpf_threads[] = { .init_routine = NULL, .start_routine = NULL }, + { + .name = "EBPF FUNCTIONS", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, +#ifdef NETDATA_DEV_MODE + .enabled = 1, +#else + .enabled = 0, +#endif + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, { .name = NULL, .config_section = NULL, @@ -378,7 +419,13 @@ ebpf_filesystem_partitions_t localfs[] = .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .fs_maps = NULL}, + .fs_maps = NULL, + .fs_obj = NULL, + .functions = { "ext4_file_read_iter", + "ext4_file_write_iter", + "ext4_file_open", + "ext4_sync_file", + NULL }}, {.filesystem = "xfs", .optional_filesystem = NULL, .family = "xfs", @@ -388,7 +435,13 @@ ebpf_filesystem_partitions_t localfs[] = .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .fs_maps = NULL}, + .fs_maps = NULL, + .fs_obj = NULL, + .functions = { "xfs_file_read_iter", + "xfs_file_write_iter", + "xfs_file_open", + "xfs_file_fsync", + NULL }}, {.filesystem = "nfs", .optional_filesystem = "nfs4", .family = "nfs", @@ -398,7 +451,13 @@ ebpf_filesystem_partitions_t localfs[] = .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .fs_maps = NULL}, + .fs_maps = NULL, + .fs_obj = NULL, + .functions = { "nfs_file_read", + "nfs_file_write", + "nfs_open", + "nfs_getattr", + NULL }}, // // "nfs4_file_open" - not present on all kernels {.filesystem = "zfs", .optional_filesystem = NULL, .family = "zfs", @@ -408,7 +467,13 @@ ebpf_filesystem_partitions_t localfs[] = .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .fs_maps = NULL}, + .fs_maps = NULL, + .fs_obj = NULL, + .functions = { "zpl_iter_read", + "zpl_iter_write", + "zpl_open", + "zpl_fsync", + NULL }}, {.filesystem = "btrfs", .optional_filesystem = NULL, .family = "btrfs", @@ -418,7 +483,13 @@ ebpf_filesystem_partitions_t localfs[] = .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = "btrfs_file_operations", .addr = 0}, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10, - .fs_maps = NULL}, + .fs_maps = NULL, + .fs_obj = NULL, + .functions = { "btrfs_file_read_iter", + "btrfs_file_write_iter", + "btrfs_file_open", + "btrfs_sync_file", + NULL }}, {.filesystem = NULL, .optional_filesystem = NULL, .family = NULL, @@ -427,7 +498,7 @@ ebpf_filesystem_partitions_t localfs[] = .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, - .kernels = 0, .fs_maps = NULL}}; + .kernels = 0, .fs_maps = NULL, .fs_obj = NULL}}; ebpf_sync_syscalls_t local_syscalls[] = { {.syscall = NETDATA_SYSCALLS_SYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, @@ -493,7 +564,10 @@ ebpf_plugin_stats_t plugin_statistics = {.core = 0, .legacy = 0, .running = 0, . struct btf *default_btf = NULL; struct cachestat_bpf *cachestat_bpf_obj = NULL; struct dc_bpf *dc_bpf_obj = NULL; +struct disk_bpf *disk_bpf_obj = NULL; struct fd_bpf *fd_bpf_obj = NULL; +struct hardirq_bpf *hardirq_bpf_obj = NULL; +struct mdflush_bpf *mdflush_bpf_obj = NULL; struct mount_bpf *mount_bpf_obj = NULL; struct shm_bpf *shm_bpf_obj = NULL; struct socket_bpf *socket_bpf_obj = NULL; @@ -524,7 +598,7 @@ ARAL *ebpf_allocate_pid_aral(char *name, size_t size) { static size_t max_elements = NETDATA_EBPF_ALLOC_MAX_PID; if (max_elements < NETDATA_EBPF_ALLOC_MIN_ELEMENTS) { - error("Number of elements given is too small, adjusting it for %d", NETDATA_EBPF_ALLOC_MIN_ELEMENTS); + netdata_log_error("Number of elements given is too small, adjusting it for %d", NETDATA_EBPF_ALLOC_MIN_ELEMENTS); max_elements = NETDATA_EBPF_ALLOC_MIN_ELEMENTS; } @@ -553,14 +627,14 @@ static inline void ebpf_check_before2go() int j; pthread_mutex_lock(&ebpf_exit_cleanup); for (j = 0; ebpf_modules[j].thread_name != NULL; j++) { - if (ebpf_modules[j].enabled == NETDATA_THREAD_EBPF_RUNNING) + if (ebpf_modules[j].enabled < NETDATA_THREAD_EBPF_STOPPING) i++; } pthread_mutex_unlock(&ebpf_exit_cleanup); } if (i) { - error("eBPF cannot unload all threads on time, but it will go away"); + netdata_log_error("eBPF cannot unload all threads on time, but it will go away"); } } @@ -581,10 +655,10 @@ static void ebpf_exit() char filename[FILENAME_MAX + 1]; ebpf_pid_file(filename, FILENAME_MAX); if (unlink(filename)) - error("Cannot remove PID file %s", filename); + netdata_log_error("Cannot remove PID file %s", filename); #ifdef NETDATA_INTERNAL_CHECKS - error("Good bye world! I was PID %d", main_thread_id); + netdata_log_error("Good bye world! I was PID %d", main_thread_id); #endif fprintf(stdout, "EXIT\n"); fflush(stdout); @@ -632,12 +706,12 @@ static void ebpf_unload_unique_maps() int i; for (i = 0; ebpf_modules[i].thread_name; i++) { // These threads are cleaned with other functions - if (i > EBPF_MODULE_SOCKET_IDX) + if (i != EBPF_MODULE_SOCKET_IDX) continue; if (ebpf_modules[i].enabled != NETDATA_THREAD_EBPF_STOPPED) { if (ebpf_modules[i].enabled != NETDATA_THREAD_EBPF_NOT_RUNNING) - error("Cannot unload maps for thread %s, because it is not stopped.", ebpf_modules[i].thread_name); + netdata_log_error("Cannot unload maps for thread %s, because it is not stopped.", ebpf_modules[i].thread_name); continue; } @@ -647,13 +721,10 @@ static void ebpf_unload_unique_maps() continue; } - if (i == EBPF_MODULE_SOCKET_IDX) { #ifdef LIBBPF_MAJOR_VERSION - if (socket_bpf_obj) - socket_bpf__destroy(socket_bpf_obj); + if (socket_bpf_obj) + socket_bpf__destroy(socket_bpf_obj); #endif - } - } } @@ -665,7 +736,7 @@ static void ebpf_unload_unique_maps() static void ebpf_unload_filesystems() { if (ebpf_modules[EBPF_MODULE_FILESYSTEM_IDX].enabled == NETDATA_THREAD_EBPF_NOT_RUNNING || - ebpf_modules[EBPF_MODULE_FILESYSTEM_IDX].enabled == NETDATA_THREAD_EBPF_RUNNING || + ebpf_modules[EBPF_MODULE_FILESYSTEM_IDX].enabled < NETDATA_THREAD_EBPF_STOPPING || ebpf_modules[EBPF_MODULE_FILESYSTEM_IDX].load != EBPF_LOAD_LEGACY) return; @@ -686,7 +757,7 @@ static void ebpf_unload_filesystems() static void ebpf_unload_sync() { if (ebpf_modules[EBPF_MODULE_SYNC_IDX].enabled == NETDATA_THREAD_EBPF_NOT_RUNNING || - ebpf_modules[EBPF_MODULE_SYNC_IDX].enabled == NETDATA_THREAD_EBPF_RUNNING) + ebpf_modules[EBPF_MODULE_SYNC_IDX].enabled < NETDATA_THREAD_EBPF_STOPPING) return; int i; @@ -724,10 +795,10 @@ static void ebpf_stop_threads(int sig) only_one = 1; int i; for (i = 0; ebpf_modules[i].thread_name != NULL; i++) { - if (ebpf_modules[i].enabled == NETDATA_THREAD_EBPF_RUNNING) { + if (ebpf_modules[i].enabled < NETDATA_THREAD_EBPF_STOPPING) { netdata_thread_cancel(*ebpf_modules[i].thread->thread); #ifdef NETDATA_DEV_MODE - info("Sending cancel for thread %s", ebpf_modules[i].thread_name); + netdata_log_info("Sending cancel for thread %s", ebpf_modules[i].thread_name); #endif } } @@ -736,7 +807,7 @@ static void ebpf_stop_threads(int sig) pthread_mutex_lock(&mutex_cgroup_shm); netdata_thread_cancel(*cgroup_integration_thread.thread); #ifdef NETDATA_DEV_MODE - info("Sending cancel for thread %s", cgroup_integration_thread.name); + netdata_log_info("Sending cancel for thread %s", cgroup_integration_thread.name); #endif pthread_mutex_unlock(&mutex_cgroup_shm); @@ -759,6 +830,19 @@ static void ebpf_stop_threads(int sig) * *****************************************************************/ +/** + * Create apps for module + * + * Create apps chart that will be used with specific module + * + * @param em the module main structure. + * @param root a pointer for the targets. + */ +static inline void ebpf_create_apps_for_module(ebpf_module_t *em, struct ebpf_target *root) { + if (em->enabled < NETDATA_THREAD_EBPF_STOPPING && em->apps_charts && em->apps_routine) + em->apps_routine(em, root); +} + /** * Create apps charts * @@ -800,14 +884,21 @@ static void ebpf_create_apps_charts(struct ebpf_target *root) } } - if (!newly_added) + int i; + if (!newly_added) { + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) { + ebpf_module_t *current = &ebpf_modules[i]; + if (current->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + continue; + + ebpf_create_apps_for_module(current, root); + } return; + } - int counter; - for (counter = 0; ebpf_modules[counter].thread_name; counter++) { - ebpf_module_t *current = &ebpf_modules[counter]; - if (current->enabled == NETDATA_THREAD_EBPF_RUNNING && current->apps_charts && current->apps_routine) - current->apps_routine(current, root); + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) { + ebpf_module_t *current = &ebpf_modules[i]; + ebpf_create_apps_for_module(current, root); } } @@ -1136,7 +1227,7 @@ void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, * @param name the name used to create aral * @param em a pointer to the structure with the default values. */ -void ebpf_statistic_create_aral_chart(char *name, ebpf_module_t *em) +int ebpf_statistic_create_aral_chart(char *name, ebpf_module_t *em) { static int priority = 140100; char *mem = { NETDATA_EBPF_STAT_DIMENSION_MEMORY }; @@ -1174,6 +1265,40 @@ void ebpf_statistic_create_aral_chart(char *name, ebpf_module_t *em) ebpf_write_global_dimension(aral, aral, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + return priority - 2; +} + +/** + * ARAL Charts + * + * Add chart to monitor ARAL usage + * Caller must call this function with mutex locked. + * + * @param em a pointer to the structure with the default values. + * @param prio the initial priority used to disable charts. + */ +void ebpf_statistic_obsolete_aral_chart(ebpf_module_t *em, int prio) +{ + ebpf_write_chart_obsolete(NETDATA_MONITORING_FAMILY, + em->memory_allocations, + "Calls to allocate memory.", + "calls", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_STACKED, + "netdata.ebpf_aral_stat_alloc", + prio++, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_MONITORING_FAMILY, + em->memory_allocations, + "Calls to allocate memory.", + "calls", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_STACKED, + "netdata.ebpf_aral_stat_alloc", + prio++, + em->update_every); } /** @@ -1247,7 +1372,7 @@ void ebpf_global_labels(netdata_syscall_stat_t *is, netdata_publish_syscall_t *p static inline void ebpf_set_thread_mode(netdata_run_mode_t lmode) { int i; - for (i = 0; ebpf_modules[i].thread_name; i++) { + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { ebpf_modules[i].mode = lmode; } } @@ -1256,16 +1381,15 @@ static inline void ebpf_set_thread_mode(netdata_run_mode_t lmode) * Enable specific charts selected by user. * * @param em the structure that will be changed - * @param disable_apps the status about the apps charts. * @param disable_cgroup the status about the cgroups charts. */ -static inline void ebpf_enable_specific_chart(struct ebpf_module *em, int disable_apps, int disable_cgroup) +static inline void ebpf_enable_specific_chart(struct ebpf_module *em, int disable_cgroup) { em->enabled = CONFIG_BOOLEAN_YES; // oomkill stores data inside apps submenu, so it always need to have apps_enabled for plugin to create // its chart, without this comparison eBPF.plugin will try to store invalid data when apps is disabled. - if (!disable_apps || !strcmp(em->thread_name, "oomkill")) { + if (!strcmp(em->thread_name, "oomkill")) { em->apps_charts = NETDATA_EBPF_APPS_FLAG_YES; } @@ -1276,20 +1400,6 @@ static inline void ebpf_enable_specific_chart(struct ebpf_module *em, int disabl em->global_charts = CONFIG_BOOLEAN_YES; } -/** - * Enable all charts - * - * @param apps what is the current status of apps - * @param cgroups what is the current status of cgroups - */ -static inline void ebpf_enable_all_charts(int apps, int cgroups) -{ - int i; - for (i = 0; ebpf_modules[i].thread_name; i++) { - ebpf_enable_specific_chart(&ebpf_modules[i], apps, cgroups); - } -} - /** * Disable all Global charts * @@ -1304,37 +1414,22 @@ static inline void disable_all_global_charts() } } - /** * Enable the specified chart group * * @param idx the index of ebpf_modules that I am enabling - * @param disable_apps should I keep apps charts? */ -static inline void ebpf_enable_chart(int idx, int disable_apps, int disable_cgroup) +static inline void ebpf_enable_chart(int idx, int disable_cgroup) { int i; for (i = 0; ebpf_modules[i].thread_name; i++) { if (i == idx) { - ebpf_enable_specific_chart(&ebpf_modules[i], disable_apps, disable_cgroup); + ebpf_enable_specific_chart(&ebpf_modules[i], disable_cgroup); break; } } } -/** - * Disable APPs - * - * Disable charts for apps loading only global charts. - */ -static inline void ebpf_disable_apps() -{ - int i; - for (i = 0; ebpf_modules[i].thread_name; i++) { - ebpf_modules[i].apps_charts = NETDATA_EBPF_APPS_FLAG_NO; - } -} - /** * Disable Cgroups * @@ -1508,8 +1603,7 @@ uint32_t ebpf_enable_tracepoints(ebpf_tracepoint_t *tps) uint32_t cnt = 0; for (int i = 0; tps[i].class != NULL; i++) { if (ebpf_enable_tracepoint(&tps[i]) == -1) { - infoerr("failed to enable tracepoint %s:%s", - tps[i].class, tps[i].event); + netdata_log_error("Failed to enable tracepoint %s:%s", tps[i].class, tps[i].event); } else { cnt += 1; @@ -1572,7 +1666,7 @@ static void read_local_addresses() { struct ifaddrs *ifaddr, *ifa; if (getifaddrs(&ifaddr) == -1) { - error("Cannot get the local IP addresses, it is no possible to do separation between inbound and outbound connections"); + netdata_log_error("Cannot get the local IP addresses, it is no possible to do separation between inbound and outbound connections"); return; } @@ -1638,33 +1732,11 @@ void ebpf_start_pthread_variables() pthread_mutex_init(&mutex_cgroup_shm, NULL); } -/** - * Am I collecting PIDs? - * - * Test if eBPF plugin needs to collect PID information. - * - * @return It returns 1 if at least one thread needs to collect the data, or zero otherwise. - */ -static inline uint32_t ebpf_am_i_collect_pids() -{ - uint32_t ret = 0; - int i; - for (i = 0; ebpf_modules[i].thread_name; i++) { - ret |= ebpf_modules[i].cgroup_charts | (ebpf_modules[i].apps_charts & NETDATA_EBPF_APPS_FLAG_YES); - } - - return ret; -} - /** * Allocate the vectors used for all threads. */ static void ebpf_allocate_common_vectors() { - if (unlikely(!ebpf_am_i_collect_pids())) { - return; - } - ebpf_all_pids = callocz((size_t)pid_max, sizeof(struct ebpf_pid_stat *)); ebpf_aral_init(); } @@ -1674,16 +1746,30 @@ static void ebpf_allocate_common_vectors() * * @param ptr the option given by users */ -static inline void how_to_load(char *ptr) +static inline void ebpf_how_to_load(char *ptr) { if (!strcasecmp(ptr, EBPF_CFG_LOAD_MODE_RETURN)) ebpf_set_thread_mode(MODE_RETURN); else if (!strcasecmp(ptr, EBPF_CFG_LOAD_MODE_DEFAULT)) ebpf_set_thread_mode(MODE_ENTRY); else - error("the option %s for \"ebpf load mode\" is not a valid option.", ptr); + netdata_log_error("the option %s for \"ebpf load mode\" is not a valid option.", ptr); } +/** + * Define whether we should have charts for apps + * + * @param lmode the mode that will be used for them. + */ +static inline void ebpf_set_apps_mode(netdata_apps_integration_flags_t value) +{ + int i; + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + ebpf_modules[i].apps_charts = value; + } +} + + /** * Update interval * @@ -1716,6 +1802,21 @@ static void ebpf_update_table_size() } } +/** + * Update lifetime + * + * Update the period of time that specific thread will run + */ +static void ebpf_update_lifetime() +{ + int i; + uint32_t value = (uint32_t) appconfig_get_number(&collector_config, EBPF_GLOBAL_SECTION, + EBPF_CFG_LIFETIME, EBPF_DEFAULT_LIFETIME); + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_modules[i].lifetime = value; + } +} + /** * Set Load mode * @@ -1761,12 +1862,11 @@ static void ebpf_update_map_per_core() /** * Read collector values * - * @param disable_apps variable to store information related to apps. * @param disable_cgroups variable to store information related to cgroups. * @param update_every value to overwrite the update frequency set by the server. * @param origin specify the configuration file loaded */ -static void read_collector_values(int *disable_apps, int *disable_cgroups, +static void read_collector_values(int *disable_cgroups, int update_every, netdata_ebpf_load_mode_t origin) { // Read global section @@ -1778,7 +1878,7 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, value = appconfig_get(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_LOAD_MODE, EBPF_CFG_LOAD_MODE_DEFAULT); - how_to_load(value); + ebpf_how_to_load(value); btf_path = appconfig_get(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_PROGRAM_PATH, EBPF_DEFAULT_BTF_PATH); @@ -1795,6 +1895,8 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, ebpf_update_table_size(); + ebpf_update_lifetime(); + // This is kept to keep compatibility uint32_t enabled = appconfig_get_boolean(&collector_config, EBPF_GLOBAL_SECTION, "disable apps", CONFIG_BOOLEAN_NO); @@ -1804,7 +1906,8 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, CONFIG_BOOLEAN_YES); enabled = (enabled == CONFIG_BOOLEAN_NO)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_NO; } - *disable_apps = (int)enabled; + + ebpf_set_apps_mode(!enabled); // Cgroup is a positive sentence, so we need to invert the values to disable apps. // We are using the same pattern for cgroup and apps @@ -1816,10 +1919,8 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, // Read ebpf programs section enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, ebpf_modules[EBPF_MODULE_PROCESS_IDX].config_name, CONFIG_BOOLEAN_YES); - int started = 0; if (enabled) { - ebpf_enable_chart(EBPF_MODULE_PROCESS_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_PROCESS_IDX, *disable_cgroups); } // This is kept to keep compatibility @@ -1830,8 +1931,7 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, ebpf_modules[EBPF_MODULE_SOCKET_IDX].config_name, CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, *disable_cgroups); } // This is kept to keep compatibility @@ -1843,123 +1943,98 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, network_viewer_opt.enabled = enabled; if (enabled) { if (!ebpf_modules[EBPF_MODULE_SOCKET_IDX].enabled) - ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, *disable_apps, *disable_cgroups); + ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, *disable_cgroups); // Read network viewer section if network viewer is enabled // This is kept here to keep backward compatibility parse_network_viewer_section(&collector_config); parse_service_name_section(&collector_config); - started++; } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "cachestat", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_CACHESTAT_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_CACHESTAT_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "sync", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_SYNC_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_SYNC_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "dcstat", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_DCSTAT_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_DCSTAT_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "swap", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_SWAP_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_SWAP_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "vfs", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_VFS_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_VFS_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "filesystem", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_FILESYSTEM_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_FILESYSTEM_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "disk", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_DISK_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_DISK_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "mount", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_MOUNT_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_MOUNT_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "fd", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_FD_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_FD_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "hardirq", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_HARDIRQ_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_HARDIRQ_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "softirq", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_SOFTIRQ_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_SOFTIRQ_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "oomkill", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_OOMKILL_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_OOMKILL_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "shm", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_SHM_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_SHM_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "mdflush", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_MDFLUSH_IDX, *disable_apps, *disable_cgroups); - started++; - } - - if (!started){ - ebpf_enable_all_charts(*disable_apps, *disable_cgroups); - // Read network viewer section - // This is kept here to keep backward compatibility - if (network_viewer_opt.enabled) { - parse_network_viewer_section(&collector_config); - parse_service_name_section(&collector_config); - } + ebpf_enable_chart(EBPF_MODULE_MDFLUSH_IDX, *disable_cgroups); } } @@ -1967,13 +2042,12 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, * Load collector config * * @param path the path where the file ebpf.conf is stored. - * @param disable_apps variable to store the information about apps plugin status. * @param disable_cgroups variable to store the information about cgroups plugin status. * @param update_every value to overwrite the update frequency set by the server. * * @return 0 on success and -1 otherwise. */ -static int load_collector_config(char *path, int *disable_apps, int *disable_cgroups, int update_every) +static int ebpf_load_collector_config(char *path, int *disable_cgroups, int update_every) { char lpath[4096]; netdata_ebpf_load_mode_t origin; @@ -1988,7 +2062,7 @@ static int load_collector_config(char *path, int *disable_apps, int *disable_cgr } else origin = EBPF_LOADED_FROM_USER; - read_collector_values(disable_apps, disable_cgroups, update_every, origin); + read_collector_values(disable_cgroups, update_every, origin); return 0; } @@ -2018,7 +2092,7 @@ void set_global_variables() ebpf_nprocs = (int)sysconf(_SC_NPROCESSORS_ONLN); if (ebpf_nprocs < 0) { ebpf_nprocs = NETDATA_MAX_PROCESSOR; - error("Cannot identify number of process, using default value %d", ebpf_nprocs); + netdata_log_error("Cannot identify number of process, using default value %d", ebpf_nprocs); } isrh = get_redhat_release(); @@ -2032,7 +2106,7 @@ void set_global_variables() static inline void ebpf_load_thread_config() { int i; - for (i = 0; ebpf_modules[i].thread_name; i++) { + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { ebpf_update_module(&ebpf_modules[i], default_btf, running_on_kernel, isrh); } } @@ -2047,12 +2121,12 @@ static inline void ebpf_load_thread_config() int ebpf_check_conditions() { if (!has_condition_to_run(running_on_kernel)) { - error("The current collector cannot run on this kernel."); + netdata_log_error("The current collector cannot run on this kernel."); return -1; } if (!am_i_running_as_root()) { - error( + netdata_log_error( "ebpf.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities..", (unsigned int)getuid(), (unsigned int)geteuid()); return -1; @@ -2072,7 +2146,7 @@ int ebpf_adjust_memory_limit() { struct rlimit r = { RLIM_INFINITY, RLIM_INFINITY }; if (setrlimit(RLIMIT_MEMLOCK, &r)) { - error("Setrlimit(RLIMIT_MEMLOCK)"); + netdata_log_error("Setrlimit(RLIMIT_MEMLOCK)"); return -1; } @@ -2087,7 +2161,6 @@ int ebpf_adjust_memory_limit() */ static void ebpf_parse_args(int argc, char **argv) { - int disable_apps = 0; int disable_cgroups = 1; int freq = 0; int option_index = 0; @@ -2134,12 +2207,12 @@ static void ebpf_parse_args(int argc, char **argv) if (!freq) freq = EBPF_DEFAULT_UPDATE_EVERY; - if (load_collector_config(ebpf_user_config_dir, &disable_apps, &disable_cgroups, freq)) { - info( + if (ebpf_load_collector_config(ebpf_user_config_dir, &disable_cgroups, freq)) { + netdata_log_info( "Does not have a configuration file inside `%s/ebpf.d.conf. It will try to load stock file.", ebpf_user_config_dir); - if (load_collector_config(ebpf_stock_config_dir, &disable_apps, &disable_cgroups, freq)) { - info("Does not have a stock file. It is starting with default options."); + if (ebpf_load_collector_config(ebpf_stock_config_dir, &disable_cgroups, freq)) { + netdata_log_info("Does not have a stock file. It is starting with default options."); } } @@ -2154,120 +2227,120 @@ static void ebpf_parse_args(int argc, char **argv) case EBPF_MODULE_PROCESS_IDX: { select_threads |= 1<thread_name, (wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? 1 : 0); + } + write_end_chart(); + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LIFE_TIME); + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) { + ebpf_module_t *wem = &ebpf_modules[i]; + // Threads like VFS is slow to load and this can create an invalid number, this is the motive + // we are also testing wem->lifetime value. + write_chart_dimension((char *)wem->thread_name, + (wem->lifetime && wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? + (long long) (wem->lifetime - wem->running_time): + 0) ; + } + write_end_chart(); + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LOAD_METHOD); + write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], (long long)plugin_statistics.legacy); + write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], (long long)plugin_statistics.core); + write_end_chart(); + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_KERNEL_MEMORY); + write_chart_dimension(memlock_stat, (long long)plugin_statistics.memlock_kern); + write_end_chart(); + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_LOADED); + write_chart_dimension(hash_table_stat, (long long)plugin_statistics.hash_tables); + write_end_chart(); + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_PER_CORE); + write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], (long long)plugin_statistics.hash_percpu); + write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], (long long)plugin_statistics.hash_unique); + write_end_chart(); +} + +/** + * Update Internal Metric variable + * + * By default eBPF.plugin sends internal metrics for netdata, but user can + * disable this. + * + * The function updates the variable used to send charts. + */ +static void update_internal_metric_variable() +{ + const char *s = getenv("NETDATA_INTERNALS_MONITORING"); + if (s && *s && strcmp(s, "NO") == 0) + publish_internal_metrics = false; +} + +/** + * Create chart for Statistic Thread + * + * Write to standard output current values for threads. + * + * @param update_every time used to update charts + */ +static inline void ebpf_create_statistic_thread_chart(int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_THREADS, + "Threads running.", + "boolean", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_EBPF_ORDER_STAT_THREADS, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + int i; + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + ebpf_write_global_dimension((char *)ebpf_modules[i].thread_name, + (char *)ebpf_modules[i].thread_name, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + } +} + +/** + * Create lifetime Thread Chart + * + * Write to standard output current values for threads lifetime. + * + * @param update_every time used to update charts + */ +static inline void ebpf_create_lifetime_thread_chart(int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_LIFE_TIME, + "Threads running.", + "seconds", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_EBPF_ORDER_STAT_LIFE_TIME, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + int i; + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + ebpf_write_global_dimension((char *)ebpf_modules[i].thread_name, + (char *)ebpf_modules[i].thread_name, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + } +} + +/** + * Create chart for Load Thread + * + * Write to standard output current values for load mode. + * + * @param update_every time used to update charts + */ +static inline void ebpf_create_statistic_load_chart(int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_LOAD_METHOD, + "Load info.", + "methods", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_EBPF_ORDER_STAT_LOAD_METHOD, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_write_global_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], + load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_global_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], + load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); +} + +/** + * Create chart for Kernel Memory + * + * Write to standard output current values for allocated memory. + * + * @param update_every time used to update charts + */ +static inline void ebpf_create_statistic_kernel_memory(int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_KERNEL_MEMORY, + "Memory allocated for hash tables.", + "bytes", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_EBPF_ORDER_STAT_KERNEL_MEMORY, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_write_global_dimension(memlock_stat, + memlock_stat, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); +} + +/** + * Create chart Hash Table + * + * Write to standard output number of hash tables used with this software. + * + * @param update_every time used to update charts + */ +static inline void ebpf_create_statistic_hash_tables(int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_HASH_TABLES_LOADED, + "Number of hash tables loaded.", + "hash tables", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_EBPF_ORDER_STAT_HASH_TABLES, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_write_global_dimension(hash_table_stat, + hash_table_stat, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); +} + +/** + * Create chart for percpu stats + * + * Write to standard output current values for threads. + * + * @param update_every time used to update charts + */ +static inline void ebpf_create_statistic_hash_per_core(int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_HASH_TABLES_PER_CORE, + "How threads are loading hash/array tables.", + "threads", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_EBPF_ORDER_STAT_HASH_CORE, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_write_global_dimension(hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], + hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_global_dimension(hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], + hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); +} + + +/** + * Create Statistics Charts + * + * Create charts that will show statistics related to eBPF plugin. + * + * @param update_every time used to update charts + */ +static void ebpf_create_statistic_charts(int update_every) +{ + static char create_charts = 1; + update_internal_metric_variable(); + if (!publish_internal_metrics) + return; + + if (!create_charts) + return; + + create_charts = 0; + + ebpf_create_statistic_thread_chart(update_every); +#ifdef NETDATA_DEV_MODE + EBPF_PLUGIN_FUNCTIONS(EBPF_FUNCTION_THREAD, EBPF_PLUGIN_THREAD_FUNCTION_DESCRIPTION); +#endif + + ebpf_create_lifetime_thread_chart(update_every); +#ifdef NETDATA_DEV_MODE + EBPF_PLUGIN_FUNCTIONS(EBPF_FUNCTION_THREAD, EBPF_PLUGIN_THREAD_FUNCTION_DESCRIPTION); +#endif + + ebpf_create_statistic_load_chart(update_every); + + ebpf_create_statistic_kernel_memory(update_every); + + ebpf_create_statistic_hash_tables(update_every); + + ebpf_create_statistic_hash_per_core(update_every); } /***************************************************************** @@ -2412,7 +2759,7 @@ static char *ebpf_get_process_name(pid_t pid) procfile *ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); if(unlikely(!ff)) { - error("Cannot open %s", filename); + netdata_log_error("Cannot open %s", filename); return name; } @@ -2605,11 +2952,11 @@ int main(int argc, char **argv) ebpf_module_t *em = &ebpf_modules[i]; em->thread = st; - // We always initialize process, because it is responsible to take care of apps integration - if (em->enabled || !i) { + em->thread_id = i; + if (em->enabled) { st->thread = mallocz(sizeof(netdata_thread_t)); - em->thread_id = i; em->enabled = NETDATA_THREAD_EBPF_RUNNING; + em->lifetime = EBPF_NON_FUNCTION_LIFE_TIME; netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, em); } else { em->enabled = NETDATA_THREAD_EBPF_NOT_RUNNING; @@ -2623,23 +2970,30 @@ int main(int argc, char **argv) int update_apps_list = update_apps_every - 1; int process_maps_per_core = ebpf_modules[EBPF_MODULE_PROCESS_IDX].maps_per_core; //Plugin will be killed when it receives a signal - while (!ebpf_exit_plugin) { + for ( ; !ebpf_exit_plugin ; global_iterations_counter++) { (void)heartbeat_next(&hb, step); + if (global_iterations_counter % EBPF_DEFAULT_UPDATE_EVERY == 0) { + pthread_mutex_lock(&lock); + ebpf_create_statistic_charts(EBPF_DEFAULT_UPDATE_EVERY); + + ebpf_send_statistic_data(); + pthread_mutex_unlock(&lock); + fflush(stdout); + } + pthread_mutex_lock(&ebpf_exit_cleanup); - if (process_pid_fd != -1) { - pthread_mutex_lock(&collect_data_mutex); - if (++update_apps_list == update_apps_every) { - update_apps_list = 0; - cleanup_exited_pids(); - collect_data_for_all_processes(process_pid_fd, process_maps_per_core); - - pthread_mutex_lock(&lock); - ebpf_create_apps_charts(apps_groups_root_target); - pthread_mutex_unlock(&lock); - } - pthread_mutex_unlock(&collect_data_mutex); + pthread_mutex_lock(&collect_data_mutex); + if (++update_apps_list == update_apps_every) { + update_apps_list = 0; + cleanup_exited_pids(); + collect_data_for_all_processes(process_pid_fd, process_maps_per_core); + + pthread_mutex_lock(&lock); + ebpf_create_apps_charts(apps_groups_root_target); + pthread_mutex_unlock(&lock); } + pthread_mutex_unlock(&collect_data_mutex); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -2647,4 +3001,3 @@ int main(int argc, char **argv) return 0; } - diff --git a/collectors/ebpf.plugin/ebpf.d.conf b/collectors/ebpf.plugin/ebpf.d.conf index 8807f9a3a..5cb844b20 100644 --- a/collectors/ebpf.plugin/ebpf.d.conf +++ b/collectors/ebpf.plugin/ebpf.d.conf @@ -19,6 +19,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.15. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# [global] ebpf load mode = entry apps = no @@ -27,6 +29,7 @@ pid table size = 32768 btf path = /sys/kernel/btf/ maps per core = yes + lifetime = 300 # # eBPF Programs diff --git a/collectors/ebpf.plugin/ebpf.d/cachestat.conf b/collectors/ebpf.plugin/ebpf.d/cachestat.conf index 82f870c98..9c51b2c52 100644 --- a/collectors/ebpf.plugin/ebpf.d/cachestat.conf +++ b/collectors/ebpf.plugin/ebpf.d/cachestat.conf @@ -26,6 +26,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -37,3 +39,4 @@ ebpf co-re tracing = trampoline collect pid = real parent # maps per core = yes + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/dcstat.conf b/collectors/ebpf.plugin/ebpf.d/dcstat.conf index f741b62a8..614d814e6 100644 --- a/collectors/ebpf.plugin/ebpf.d/dcstat.conf +++ b/collectors/ebpf.plugin/ebpf.d/dcstat.conf @@ -24,6 +24,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -35,3 +37,4 @@ ebpf co-re tracing = trampoline collect pid = real parent # maps per core = yes + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/disk.conf b/collectors/ebpf.plugin/ebpf.d/disk.conf index 4adf88e74..c5a0a2708 100644 --- a/collectors/ebpf.plugin/ebpf.d/disk.conf +++ b/collectors/ebpf.plugin/ebpf.d/disk.conf @@ -3,7 +3,10 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # -#[global] +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# +[global] # ebpf load mode = entry # update every = 10 + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/fd.conf b/collectors/ebpf.plugin/ebpf.d/fd.conf index 30a5fcfd9..d48230323 100644 --- a/collectors/ebpf.plugin/ebpf.d/fd.conf +++ b/collectors/ebpf.plugin/ebpf.d/fd.conf @@ -12,6 +12,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -22,3 +24,4 @@ ebpf type format = auto ebpf co-re tracing = trampoline # maps per core = yes + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/filesystem.conf b/collectors/ebpf.plugin/ebpf.d/filesystem.conf index c5eb01e54..209abba77 100644 --- a/collectors/ebpf.plugin/ebpf.d/filesystem.conf +++ b/collectors/ebpf.plugin/ebpf.d/filesystem.conf @@ -3,13 +3,16 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # The eBPF collector also creates charts for each running application through an integration with the `apps plugin`. # If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to # 'no'. # -#[global] +[global] # ebpf load mode = entry # update every = 10 + lifetime = 300 # All filesystems are named as 'NAMEdist' where NAME is the filesystem name while 'dist' is a reference for distribution. [filesystem] diff --git a/collectors/ebpf.plugin/ebpf.d/functions.conf b/collectors/ebpf.plugin/ebpf.d/functions.conf new file mode 100644 index 000000000..a4f57f641 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/functions.conf @@ -0,0 +1,3 @@ +#[global] +# update every = 5 + diff --git a/collectors/ebpf.plugin/ebpf.d/hardirq.conf b/collectors/ebpf.plugin/ebpf.d/hardirq.conf index f2bae1d57..6a47a94bf 100644 --- a/collectors/ebpf.plugin/ebpf.d/hardirq.conf +++ b/collectors/ebpf.plugin/ebpf.d/hardirq.conf @@ -3,6 +3,9 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # -#[global] +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# +[global] # ebpf load mode = entry # update every = 10 + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/mdflush.conf b/collectors/ebpf.plugin/ebpf.d/mdflush.conf index e65e8672c..ea97ebe85 100644 --- a/collectors/ebpf.plugin/ebpf.d/mdflush.conf +++ b/collectors/ebpf.plugin/ebpf.d/mdflush.conf @@ -2,6 +2,10 @@ # `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. -#[global] +# +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# +[global] # ebpf load mode = entry # update every = 1 + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/mount.conf b/collectors/ebpf.plugin/ebpf.d/mount.conf index fdd82f220..ff9a2948c 100644 --- a/collectors/ebpf.plugin/ebpf.d/mount.conf +++ b/collectors/ebpf.plugin/ebpf.d/mount.conf @@ -12,8 +12,12 @@ # `trampoline`: This is the default mode used by the eBPF collector, due the small overhead added to host. # `tracepoint`: When available, the eBPF collector will use kernel tracepoint to monitor syscall. # `probe` : This is the same as legacy code. +# +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# [global] # ebpf load mode = entry # update every = 1 ebpf type format = auto ebpf co-re tracing = trampoline + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/network.conf b/collectors/ebpf.plugin/ebpf.d/network.conf index 75644a772..00cbf2e8b 100644 --- a/collectors/ebpf.plugin/ebpf.d/network.conf +++ b/collectors/ebpf.plugin/ebpf.d/network.conf @@ -26,6 +26,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -39,6 +41,7 @@ ebpf type format = auto ebpf co-re tracing = trampoline maps per core = no + lifetime = 300 # # Network Connection diff --git a/collectors/ebpf.plugin/ebpf.d/oomkill.conf b/collectors/ebpf.plugin/ebpf.d/oomkill.conf index e65e8672c..ea97ebe85 100644 --- a/collectors/ebpf.plugin/ebpf.d/oomkill.conf +++ b/collectors/ebpf.plugin/ebpf.d/oomkill.conf @@ -2,6 +2,10 @@ # `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. -#[global] +# +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# +[global] # ebpf load mode = entry # update every = 1 + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/process.conf b/collectors/ebpf.plugin/ebpf.d/process.conf index f5e8804cd..150c57920 100644 --- a/collectors/ebpf.plugin/ebpf.d/process.conf +++ b/collectors/ebpf.plugin/ebpf.d/process.conf @@ -17,6 +17,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -26,3 +28,4 @@ # pid table size = 32768 collect pid = real parent # maps per core = yes + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/shm.conf b/collectors/ebpf.plugin/ebpf.d/shm.conf index f8ec1a18f..95fb54e0f 100644 --- a/collectors/ebpf.plugin/ebpf.d/shm.conf +++ b/collectors/ebpf.plugin/ebpf.d/shm.conf @@ -20,6 +20,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -30,6 +32,7 @@ ebpf type format = auto ebpf co-re tracing = trampoline # maps per core = yes + lifetime = 300 # List of monitored syscalls [syscalls] diff --git a/collectors/ebpf.plugin/ebpf.d/softirq.conf b/collectors/ebpf.plugin/ebpf.d/softirq.conf index f2bae1d57..6a47a94bf 100644 --- a/collectors/ebpf.plugin/ebpf.d/softirq.conf +++ b/collectors/ebpf.plugin/ebpf.d/softirq.conf @@ -3,6 +3,9 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # -#[global] +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# +[global] # ebpf load mode = entry # update every = 10 + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/swap.conf b/collectors/ebpf.plugin/ebpf.d/swap.conf index 5bad04424..29d9b4204 100644 --- a/collectors/ebpf.plugin/ebpf.d/swap.conf +++ b/collectors/ebpf.plugin/ebpf.d/swap.conf @@ -19,6 +19,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -29,3 +31,4 @@ ebpf type format = auto ebpf co-re tracing = trampoline # maps per core = yes + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/sync.conf b/collectors/ebpf.plugin/ebpf.d/sync.conf index fefbd4ee6..a086ed4db 100644 --- a/collectors/ebpf.plugin/ebpf.d/sync.conf +++ b/collectors/ebpf.plugin/ebpf.d/sync.conf @@ -19,6 +19,8 @@ # `probe` : This is the same as legacy code. # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. +# +# The `lifetime` defines the time length a thread will run when it is enabled by a function. # # Uncomment lines to define specific options for thread. [global] @@ -29,6 +31,7 @@ ebpf type format = auto ebpf co-re tracing = trampoline # maps per core = yes + lifetime = 300 # List of monitored syscalls [syscalls] diff --git a/collectors/ebpf.plugin/ebpf.d/vfs.conf b/collectors/ebpf.plugin/ebpf.d/vfs.conf index b4e5daac0..f511581b8 100644 --- a/collectors/ebpf.plugin/ebpf.d/vfs.conf +++ b/collectors/ebpf.plugin/ebpf.d/vfs.conf @@ -20,6 +20,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -30,3 +32,4 @@ ebpf type format = auto ebpf co-re tracing = trampoline # maps per core = yes + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.h b/collectors/ebpf.plugin/ebpf.h index ae24c302c..f008d21af 100644 --- a/collectors/ebpf.plugin/ebpf.h +++ b/collectors/ebpf.plugin/ebpf.h @@ -39,7 +39,10 @@ #ifdef LIBBPF_MAJOR_VERSION // BTF code #include "includes/cachestat.skel.h" #include "includes/dc.skel.h" +#include "includes/disk.skel.h" #include "includes/fd.skel.h" +#include "includes/hardirq.skel.h" +#include "includes/mdflush.skel.h" #include "includes/mount.skel.h" #include "includes/shm.skel.h" #include "includes/socket.skel.h" @@ -48,8 +51,11 @@ extern struct cachestat_bpf *cachestat_bpf_obj; extern struct dc_bpf *dc_bpf_obj; +extern struct disk_bpf *disk_bpf_obj; extern struct fd_bpf *fd_bpf_obj; +extern struct hardirq_bpf *hardirq_bpf_obj; extern struct mount_bpf *mount_bpf_obj; +extern struct mdflush_bpf *mdflush_bpf_obj; extern struct shm_bpf *shm_bpf_obj; extern struct socket_bpf *socket_bpf_obj; extern struct swap_bpf *bpf_obj; @@ -112,6 +118,7 @@ enum ebpf_main_index { EBPF_MODULE_OOMKILL_IDX, EBPF_MODULE_SHM_IDX, EBPF_MODULE_MDFLUSH_IDX, + EBPF_MODULE_FUNCTION_IDX, /* THREADS MUST BE INCLUDED BEFORE THIS COMMENT */ EBPF_OPTION_ALL_CHARTS, EBPF_OPTION_VERSION, @@ -157,6 +164,7 @@ typedef struct ebpf_tracepoint { // Statistics charts #define NETDATA_EBPF_THREADS "ebpf_threads" +#define NETDATA_EBPF_LIFE_TIME "ebpf_life_time" #define NETDATA_EBPF_LOAD_METHOD "ebpf_load_methods" #define NETDATA_EBPF_KERNEL_MEMORY "ebpf_kernel_memory" #define NETDATA_EBPF_HASH_TABLES_LOADED "ebpf_hash_tables_count" diff --git a/collectors/ebpf.plugin/ebpf_apps.c b/collectors/ebpf.plugin/ebpf_apps.c index 3826f8efc..c7c0cbbbb 100644 --- a/collectors/ebpf.plugin/ebpf_apps.c +++ b/collectors/ebpf.plugin/ebpf_apps.c @@ -35,7 +35,7 @@ void ebpf_aral_init(void) { size_t max_elements = NETDATA_EBPF_ALLOC_MAX_PID; if (max_elements < NETDATA_EBPF_ALLOC_MIN_ELEMENTS) { - error("Number of elements given is too small, adjusting it for %d", NETDATA_EBPF_ALLOC_MIN_ELEMENTS); + netdata_log_error("Number of elements given is too small, adjusting it for %d", NETDATA_EBPF_ALLOC_MIN_ELEMENTS); max_elements = NETDATA_EBPF_ALLOC_MIN_ELEMENTS; } @@ -44,7 +44,7 @@ void ebpf_aral_init(void) ebpf_aral_process_stat = ebpf_allocate_pid_aral(NETDATA_EBPF_PROC_ARAL_NAME, sizeof(ebpf_process_stat_t)); #ifdef NETDATA_DEV_MODE - info("Plugin is using ARAL with values %d", NETDATA_EBPF_ALLOC_MAX_PID); + netdata_log_info("Plugin is using ARAL with values %d", NETDATA_EBPF_ALLOC_MAX_PID); #endif } @@ -652,7 +652,7 @@ int ebpf_read_apps_groups_conf(struct ebpf_target **agdt, struct ebpf_target **a // add this target struct ebpf_target *n = get_apps_groups_target(agrt, s, w, name); if (!n) { - error("Cannot create target '%s' (line %zu, word %zu)", s, line, word); + netdata_log_error("Cannot create target '%s' (line %zu, word %zu)", s, line, word); continue; } @@ -755,32 +755,32 @@ static inline void debug_log_dummy(void) static inline int managed_log(struct ebpf_pid_stat *p, uint32_t log, int status) { if (unlikely(!status)) { - // error("command failed log %u, errno %d", log, errno); + // netdata_log_error("command failed log %u, errno %d", log, errno); if (unlikely(debug_enabled || errno != ENOENT)) { if (unlikely(debug_enabled || !(p->log_thrown & log))) { p->log_thrown |= log; switch (log) { case PID_LOG_IO: - error( + netdata_log_error( "Cannot process %s/proc/%d/io (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); break; case PID_LOG_STATUS: - error( + netdata_log_error( "Cannot process %s/proc/%d/status (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); break; case PID_LOG_CMDLINE: - error( + netdata_log_error( "Cannot process %s/proc/%d/cmdline (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); break; case PID_LOG_FDS: - error( + netdata_log_error( "Cannot process entries in %s/proc/%d/fd (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); break; @@ -789,14 +789,14 @@ static inline int managed_log(struct ebpf_pid_stat *p, uint32_t log, int status) break; default: - error("unhandled error for pid %d, command '%s'", p->pid, p->comm); + netdata_log_error("unhandled error for pid %d, command '%s'", p->pid, p->comm); break; } } } errno = 0; } else if (unlikely(p->log_thrown & log)) { - // error("unsetting log %u on pid %d", log, p->pid); + // netdata_log_error("unsetting log %u on pid %d", log, p->pid); p->log_thrown &= ~log; } @@ -1005,7 +1005,7 @@ static inline int read_proc_pid_stat(struct ebpf_pid_stat *p, void *ptr) static inline int collect_data_for_pid(pid_t pid, void *ptr) { if (unlikely(pid < 0 || pid > pid_max)) { - error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, 0, pid_max); + netdata_log_error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, 0, pid_max); return 0; } @@ -1020,7 +1020,7 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) // check its parent pid if (unlikely(p->ppid < 0 || p->ppid > pid_max)) { - error("Pid %d (command '%s') states invalid parent pid %d. Using 0.", pid, p->comm, p->ppid); + netdata_log_error("Pid %d (command '%s') states invalid parent pid %d. Using 0.", pid, p->comm, p->ppid); p->ppid = 0; } @@ -1220,7 +1220,7 @@ static inline void del_pid_entry(pid_t pid) struct ebpf_pid_stat *p = ebpf_all_pids[pid]; if (unlikely(!p)) { - error("attempted to free pid %d that is not allocated.", pid); + netdata_log_error("attempted to free pid %d that is not allocated.", pid); return; } @@ -1338,8 +1338,10 @@ void cleanup_exited_pids() p = p->next; // Clean process structure - ebpf_process_stat_release(global_process_stats[r]); - global_process_stats[r] = NULL; + if (global_process_stats) { + ebpf_process_stat_release(global_process_stats[r]); + global_process_stats[r] = NULL; + } cleanup_variables_from_other_threads(r); @@ -1403,7 +1405,7 @@ static inline void aggregate_pid_on_target(struct ebpf_target *w, struct ebpf_pi } if (unlikely(!w)) { - error("pid %d %s was left without a target!", p->pid, p->comm); + netdata_log_error("pid %d %s was left without a target!", p->pid, p->comm); return; } @@ -1471,36 +1473,40 @@ void collect_data_for_all_processes(int tbl_pid_stats_fd, int maps_per_core) uint32_t key; pids = ebpf_root_of_pids; // global list of all processes running // while (bpf_map_get_next_key(tbl_pid_stats_fd, &key, &next_key) == 0) { - size_t length = sizeof(ebpf_process_stat_t); - if (maps_per_core) - length *= ebpf_nprocs; - while (pids) { - key = pids->pid; - ebpf_process_stat_t *w = global_process_stats[key]; - if (!w) { - w = ebpf_process_stat_get(); - global_process_stats[key] = w; - } + if (tbl_pid_stats_fd != -1) { + size_t length = sizeof(ebpf_process_stat_t); + if (maps_per_core) + length *= ebpf_nprocs; - if (bpf_map_lookup_elem(tbl_pid_stats_fd, &key, process_stat_vector)) { - // Clean Process structures - ebpf_process_stat_release(w); - global_process_stats[key] = NULL; + while (pids) { + key = pids->pid; - cleanup_variables_from_other_threads(key); + ebpf_process_stat_t *w = global_process_stats[key]; + if (!w) { + w = ebpf_process_stat_get(); + global_process_stats[key] = w; + } - pids = pids->next; - continue; - } + if (bpf_map_lookup_elem(tbl_pid_stats_fd, &key, process_stat_vector)) { + // Clean Process structures + ebpf_process_stat_release(w); + global_process_stats[key] = NULL; - ebpf_process_apps_accumulator(process_stat_vector, maps_per_core); + cleanup_variables_from_other_threads(key); - memcpy(w, process_stat_vector, sizeof(ebpf_process_stat_t)); + pids = pids->next; + continue; + } - memset(process_stat_vector, 0, length); + ebpf_process_apps_accumulator(process_stat_vector, maps_per_core); - pids = pids->next; + memcpy(w, process_stat_vector, sizeof(ebpf_process_stat_t)); + + memset(process_stat_vector, 0, length); + + pids = pids->next; + } } link_all_processes_to_their_parents(); diff --git a/collectors/ebpf.plugin/ebpf_apps.h b/collectors/ebpf.plugin/ebpf_apps.h index ad2e338d4..fc894a55f 100644 --- a/collectors/ebpf.plugin/ebpf_apps.h +++ b/collectors/ebpf.plugin/ebpf_apps.h @@ -21,6 +21,7 @@ #include "ebpf_disk.h" #include "ebpf_fd.h" #include "ebpf_filesystem.h" +#include "ebpf_functions.h" #include "ebpf_hardirq.h" #include "ebpf_cachestat.h" #include "ebpf_mdflush.h" diff --git a/collectors/ebpf.plugin/ebpf_cachestat.c b/collectors/ebpf.plugin/ebpf_cachestat.c index c287136cf..72c337941 100644 --- a/collectors/ebpf.plugin/ebpf_cachestat.c +++ b/collectors/ebpf.plugin/ebpf_cachestat.c @@ -58,6 +58,10 @@ netdata_ebpf_targets_t cachestat_targets[] = { {.name = "add_to_page_cache_lru", static char *account_page[NETDATA_CACHESTAT_ACCOUNT_DIRTY_END] ={ "account_page_dirtied", "__set_page_dirty", "__folio_mark_dirty" }; +#ifdef NETDATA_DEV_MODE +int cachestat_disable_priority; +#endif + #ifdef LIBBPF_MAJOR_VERSION /** * Disable probe @@ -336,6 +340,179 @@ static inline int ebpf_cachestat_load_and_attach(struct cachestat_bpf *obj, ebpf * *****************************************************************/ +static void ebpf_obsolete_specific_cachestat_charts(char *type, int update_every); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit ratio", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_CACHESTAT_HIT_RATIO_CONTEXT, + 21100, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages", + EBPF_CACHESTAT_DIMENSION_PAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_CACHESTAT_MODIFIED_CACHE_CONTEXT, + 21101, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_CACHESTAT_HIT_CHART, + "Number of accessed files", + EBPF_CACHESTAT_DIMENSION_HITS, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_CACHESTAT_HIT_FILE_CONTEXT, + 21102, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_CACHESTAT_MISSES_CHART, + "Files out of page cache", + EBPF_CACHESTAT_DIMENSION_MISSES, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_CACHESTAT_MISS_FILES_CONTEXT, + 21103, + em->update_every); +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_cachestat_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_cachestat_charts(ect->name, em->update_every); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_cachestat_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit ratio", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21100, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages", + EBPF_CACHESTAT_DIMENSION_PAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21101, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_CACHESTAT_HIT_CHART, + "Number of accessed files", + EBPF_CACHESTAT_DIMENSION_HITS, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21102, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_CACHESTAT_MISSES_CHART, + "Files out of page cache", + EBPF_CACHESTAT_DIMENSION_MISSES, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21103, + em->update_every); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_cachestat_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit ratio", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 20090, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages", + EBPF_CACHESTAT_DIMENSION_PAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20091, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_HIT_CHART, + "Number of accessed files", + EBPF_CACHESTAT_DIMENSION_HITS, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20092, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_CACHESTAT_MISSES_CHART, + "Files out of page cache", + EBPF_CACHESTAT_DIMENSION_MISSES, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20093, + em->update_every); +} + /** * Cachestat exit. * @@ -347,17 +524,47 @@ static void ebpf_cachestat_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_cachestat_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_cachestat_apps_charts(em); + } + + ebpf_obsolete_cachestat_global(em); + +#ifdef NETDATA_DEV_MODE + if (ebpf_aral_cachestat_pid) + ebpf_statistic_obsolete_aral_chart(em, cachestat_disable_priority); +#endif + + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (cachestat_bpf_obj) + if (cachestat_bpf_obj) { cachestat_bpf__destroy(cachestat_bpf_obj); + cachestat_bpf_obj = NULL; + } #endif if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -1079,7 +1286,9 @@ static void cachestat_collector(ebpf_module_t *em) heartbeat_init(&hb); int counter = update_every - 1; //This will be cancelled by its parent - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -1112,6 +1321,15 @@ static void cachestat_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -1220,7 +1438,7 @@ static int ebpf_cachestat_set_internal_value() } if (!address.addr) { - error("%s cachestat.", NETDATA_EBPF_DEFAULT_FNT_NOT_FOUND); + netdata_log_error("%s cachestat.", NETDATA_EBPF_DEFAULT_FNT_NOT_FOUND); return -1; } @@ -1261,7 +1479,7 @@ static int ebpf_cachestat_load_bpf(ebpf_module_t *em) #endif if (ret) - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); return ret; } @@ -1307,11 +1525,11 @@ void *ebpf_cachestat_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); ebpf_create_memory_charts(em); #ifdef NETDATA_DEV_MODE if (ebpf_aral_cachestat_pid) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_CACHESTAT_ARAL_NAME, em); + cachestat_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_CACHESTAT_ARAL_NAME, em); #endif pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_cachestat.h b/collectors/ebpf.plugin/ebpf_cachestat.h index 2c1f171c7..ba2b12833 100644 --- a/collectors/ebpf.plugin/ebpf_cachestat.h +++ b/collectors/ebpf.plugin/ebpf_cachestat.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_CACHESTAT_H #define NETDATA_EBPF_CACHESTAT_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_CACHESTAT "cachestat" +#define NETDATA_EBPF_CACHESTAT_MODULE_DESC "Monitor Linux page cache internal functions. This thread is integrated with apps and cgroup." // charts #define NETDATA_CACHESTAT_HIT_RATIO_CHART "cachestat_ratio" diff --git a/collectors/ebpf.plugin/ebpf_cgroup.c b/collectors/ebpf.plugin/ebpf_cgroup.c index 6d7c555bd..fd4e783db 100644 --- a/collectors/ebpf.plugin/ebpf_cgroup.c +++ b/collectors/ebpf.plugin/ebpf_cgroup.c @@ -28,7 +28,7 @@ static inline void *ebpf_cgroup_map_shm_locally(int fd, size_t length) value = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (!value) { - error("Cannot map shared memory used between eBPF and cgroup, integration between processes won't happen"); + netdata_log_error("Cannot map shared memory used between eBPF and cgroup, integration between processes won't happen"); close(shm_fd_ebpf_cgroup); shm_fd_ebpf_cgroup = -1; shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME); @@ -71,7 +71,7 @@ void ebpf_map_cgroup_shared_memory() shm_fd_ebpf_cgroup = shm_open(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME, O_RDWR, 0660); if (shm_fd_ebpf_cgroup < 0) { if (limit_try == NETDATA_EBPF_CGROUP_MAX_TRIES) - error("Shared memory was not initialized, integration between processes won't happen."); + netdata_log_error("Shared memory was not initialized, integration between processes won't happen."); return; } @@ -103,7 +103,7 @@ void ebpf_map_cgroup_shared_memory() shm_sem_ebpf_cgroup = sem_open(NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME, O_CREAT, 0660, 1); if (shm_sem_ebpf_cgroup == SEM_FAILED) { - error("Cannot create semaphore, integration between eBPF and cgroup won't happen"); + netdata_log_error("Cannot create semaphore, integration between eBPF and cgroup won't happen"); limit_try = NETDATA_EBPF_CGROUP_MAX_TRIES + 1; munmap(ebpf_mapped_memory, length); shm_ebpf_cgroup.header = NULL; @@ -303,7 +303,7 @@ void ebpf_parse_cgroup_shm_data() sem_post(shm_sem_ebpf_cgroup); pthread_mutex_unlock(&mutex_cgroup_shm); #ifdef NETDATA_DEV_MODE - info("Updating cgroup %d (Previous: %d, Current: %d)", + netdata_log_info("Updating cgroup %d (Previous: %d, Current: %d)", send_cgroup_chart, previous, shm_ebpf_cgroup.header->cgroup_root_count); #endif diff --git a/collectors/ebpf.plugin/ebpf_dcstat.c b/collectors/ebpf.plugin/ebpf_dcstat.c index 4157f0c87..dba3f44d9 100644 --- a/collectors/ebpf.plugin/ebpf_dcstat.c +++ b/collectors/ebpf.plugin/ebpf_dcstat.c @@ -59,6 +59,10 @@ netdata_ebpf_targets_t dc_targets[] = { {.name = "lookup_fast", .mode = EBPF_LOA {.name = "d_lookup", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; +#ifdef NETDATA_DEV_MODE +int dcstat_disable_priority; +#endif + #ifdef LIBBPF_MAJOR_VERSION /** * Disable probe @@ -195,7 +199,7 @@ netdata_ebpf_program_loaded_t ebpf_dc_update_load(ebpf_module_t *em) return EBPF_LOAD_TRAMPOLINE; if (em->targets[NETDATA_DC_TARGET_LOOKUP_FAST].mode != EBPF_LOAD_RETPROBE) - info("When your kernel was compiled the symbol %s was modified, instead to use `trampoline`, the plugin will use `probes`.", + netdata_log_info("When your kernel was compiled the symbol %s was modified, instead to use `trampoline`, the plugin will use `probes`.", dc_optional_name[NETDATA_DC_TARGET_LOOKUP_FAST].function_to_attach); return EBPF_LOAD_RETPROBE; @@ -285,6 +289,160 @@ void dcstat_update_publish(netdata_publish_dcstat_t *out, uint64_t cache_access, * *****************************************************************/ +static void ebpf_obsolete_specific_dc_charts(char *type, int update_every); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_dc_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_DC_HIT_CHART, + "Percentage of files inside directory cache", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_DC_HIT_RATIO_CONTEXT, + 21200, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_DC_REFERENCE_CHART, + "Count file access", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_DC_REFERENCE_CONTEXT, + 21201, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "Files not present inside directory cache", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_DC_NOT_CACHE_CONTEXT, + 21202, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "Files not found", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_DC_NOT_FOUND_CONTEXT, + 21202, + em->update_every); +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_dc_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_dc_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_dc_charts(ect->name, em->update_every); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_dc_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_DC_HIT_CHART, + "Percentage of files inside directory cache", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 20100, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_DC_REFERENCE_CHART, + "Count file access", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20101, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "Files not present inside directory cache", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20102, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "Files not found", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20103, + em->update_every); +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_dc_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_DC_HIT_CHART, + "Percentage of files inside directory cache", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21200, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_DC_REFERENCE_CHART, + "Variables used to calculate hit ratio.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21201, + em->update_every); +} + /** * DCstat exit * @@ -296,16 +454,46 @@ static void ebpf_dcstat_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_dc_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_dc_apps_charts(em); + } + + ebpf_obsolete_dc_global(em); + +#ifdef NETDATA_DEV_MODE + if (ebpf_aral_dcstat_pid) + ebpf_statistic_obsolete_aral_chart(em, dcstat_disable_priority); +#endif + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (dc_bpf_obj) + if (dc_bpf_obj) { dc_bpf__destroy(dc_bpf_obj); + dc_bpf_obj = NULL; + } #endif - if (em->objects) + if (em->objects){ ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -979,7 +1167,9 @@ static void dcstat_collector(ebpf_module_t *em) heartbeat_init(&hb); int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -1012,6 +1202,15 @@ static void dcstat_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -1028,7 +1227,7 @@ static void dcstat_collector(ebpf_module_t *em) * * @param update_every value to overwrite the update frequency set by the server. */ -static void ebpf_create_filesystem_charts(int update_every) +static void ebpf_create_dc_global_charts(int update_every) { ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, NETDATA_DC_HIT_CHART, "Percentage of files inside directory cache", @@ -1112,7 +1311,7 @@ static int ebpf_dcstat_load_bpf(ebpf_module_t *em) #endif if (ret) - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); return ret; } @@ -1156,12 +1355,12 @@ void *ebpf_dcstat_thread(void *ptr) algorithms, NETDATA_DCSTAT_IDX_END); pthread_mutex_lock(&lock); - ebpf_create_filesystem_charts(em->update_every); + ebpf_create_dc_global_charts(em->update_every); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); #ifdef NETDATA_DEV_MODE if (ebpf_aral_dcstat_pid) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_DCSTAT_ARAL_NAME, em); + dcstat_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_DCSTAT_ARAL_NAME, em); #endif pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_dcstat.h b/collectors/ebpf.plugin/ebpf_dcstat.h index 5c9eed4d6..845b65908 100644 --- a/collectors/ebpf.plugin/ebpf_dcstat.h +++ b/collectors/ebpf.plugin/ebpf_dcstat.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_DCSTAT_H #define NETDATA_EBPF_DCSTAT_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_DCSTAT "dcstat" +#define NETDATA_EBPF_DC_MODULE_DESC "Monitor file access using directory cache. This thread is integrated with apps and cgroup." // charts #define NETDATA_DC_HIT_CHART "dc_hit_ratio" diff --git a/collectors/ebpf.plugin/ebpf_disk.c b/collectors/ebpf.plugin/ebpf_disk.c index 231186b84..879456270 100644 --- a/collectors/ebpf.plugin/ebpf_disk.c +++ b/collectors/ebpf.plugin/ebpf_disk.c @@ -52,6 +52,39 @@ static netdata_idx_t *disk_hash_values = NULL; ebpf_publish_disk_t *plot_disks = NULL; pthread_mutex_t plot_mutex; +#ifdef LIBBPF_MAJOR_VERSION +/** + * Set hash table + * + * Set the values for maps according the value given by kernel. + * + * @param obj is the main structure for bpf objects. + */ +static inline void ebpf_disk_set_hash_table(struct disk_bpf *obj) + { + disk_maps[NETDATA_DISK_IO].map_fd = bpf_map__fd(obj->maps.tbl_disk_iocall); + } + +/** + * Load and attach + * + * Load and attach the eBPF code in kernel. + * + * @param obj is the main structure for bpf objects. + * + * @return it returns 0 on success and -1 otherwise + */ +static inline int ebpf_disk_load_and_attach(struct disk_bpf *obj) +{ + int ret = disk_bpf__load(obj); + if (ret) { + return ret; + } + + return disk_bpf__attach(obj); +} +#endif + /***************************************************************** * * FUNCTIONS TO MANIPULATE HARD DISKS @@ -308,10 +341,10 @@ static void update_disk_table(char *name, int major, int minor, time_t current_t netdata_ebpf_disks_t *check; check = (netdata_ebpf_disks_t *) avl_insert_lock(&disk_tree, (avl_t *)w); if (check != w) - error("Internal error, cannot insert the AVL tree."); + netdata_log_error("Internal error, cannot insert the AVL tree."); #ifdef NETDATA_INTERNAL_CHECKS - info("The Latency is monitoring the hard disk %s (Major = %d, Minor = %d, Device = %u)", name, major, minor,w->dev); + netdata_log_info("The Latency is monitoring the hard disk %s (Major = %d, Minor = %d, Device = %u)", name, major, minor,w->dev); #endif w->flags |= NETDATA_DISK_IS_HERE; @@ -391,12 +424,12 @@ static void ebpf_disk_disable_tracepoints() char *default_message = { "Cannot disable the tracepoint" }; if (!was_block_issue_enabled) { if (ebpf_disable_tracing_values(tracepoint_block_type, tracepoint_block_issue)) - error("%s %s/%s.", default_message, tracepoint_block_type, tracepoint_block_issue); + netdata_log_error("%s %s/%s.", default_message, tracepoint_block_type, tracepoint_block_issue); } if (!was_block_rq_complete_enabled) { if (ebpf_disable_tracing_values(tracepoint_block_type, tracepoint_block_rq_complete)) - error("%s %s/%s.", default_message, tracepoint_block_type, tracepoint_block_rq_complete); + netdata_log_error("%s %s/%s.", default_message, tracepoint_block_type, tracepoint_block_rq_complete); } } @@ -415,6 +448,7 @@ static void ebpf_cleanup_plot_disks() move = next; } + plot_disks = NULL; } /** @@ -432,6 +466,36 @@ static void ebpf_cleanup_disk_list() move = next; } + disk_list = NULL; +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_disk_global(ebpf_module_t *em) +{ + ebpf_publish_disk_t *move = plot_disks; + while (move) { + netdata_ebpf_disks_t *ned = move->plot; + uint32_t flags = ned->flags; + if (flags & NETDATA_DISK_CHART_CREATED) { + ebpf_write_chart_obsolete(ned->histogram.name, + ned->family, + "Disk latency", + EBPF_COMMON_DIMENSION_CALL, + ned->family, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + ned->histogram.order, + em->update_every); + } + + move = move->next; + } } /** @@ -445,15 +509,29 @@ static void ebpf_disk_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (em->objects) - ebpf_unload_legacy_code(em->objects, em->probe_links); + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + ebpf_obsolete_disk_global(em); + + pthread_mutex_unlock(&lock); + fflush(stdout); + } ebpf_disk_disable_tracepoints(); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, disk_maps, EBPF_ACTION_STAT_REMOVE); + + if (em->objects) { + ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } + if (dimensions) ebpf_histogram_dimension_cleanup(dimensions, NETDATA_EBPF_HIST_MAX_BINS); freez(disk_hash_values); + disk_hash_values = NULL; pthread_mutex_destroy(&plot_mutex); ebpf_cleanup_plot_disks(); @@ -461,6 +539,7 @@ static void ebpf_disk_exit(void *ptr) pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -607,6 +686,8 @@ static void ebpf_create_hd_charts(netdata_ebpf_disks_t *w, int update_every) order++; w->flags |= NETDATA_DISK_CHART_CREATED; + + fflush(stdout); } /** @@ -695,14 +776,16 @@ static void disk_collector(ebpf_module_t *em) heartbeat_init(&hb); int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; counter = 0; - read_hard_disk_tables(disk_maps[NETDATA_DISK_READ].map_fd, maps_per_core); + read_hard_disk_tables(disk_maps[NETDATA_DISK_IO].map_fd, maps_per_core); pthread_mutex_lock(&lock); ebpf_remove_pointer_from_plot_disk(em); ebpf_latency_send_hd_data(update_every); @@ -710,6 +793,15 @@ static void disk_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); ebpf_update_disks(em); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -749,6 +841,43 @@ static int ebpf_disk_enable_tracepoints() return 0; } +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_disk_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + disk_bpf_obj = disk_bpf__open(); + if (!disk_bpf_obj) + ret = -1; + else { + ret = ebpf_disk_load_and_attach(disk_bpf_obj); + if (!ret) + ebpf_disk_set_hash_table(disk_bpf_obj); + } + } +#endif + + if (ret) + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + + return ret; +} + /** * Disk thread * @@ -775,15 +904,15 @@ void *ebpf_disk_thread(void *ptr) } if (pthread_mutex_init(&plot_mutex, NULL)) { - error("Cannot initialize local mutex"); + netdata_log_error("Cannot initialize local mutex"); goto enddisk; } #ifdef LIBBPF_MAJOR_VERSION ebpf_define_map_type(disk_maps, em->maps_per_core, running_on_kernel); + ebpf_adjust_thread_load(em, default_btf); #endif - em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); - if (!em->probe_links) { + if (ebpf_disk_load_bpf(em)) { goto enddisk; } @@ -796,7 +925,7 @@ void *ebpf_disk_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, disk_maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, disk_maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); disk_collector(em); diff --git a/collectors/ebpf.plugin/ebpf_disk.h b/collectors/ebpf.plugin/ebpf_disk.h index 69c705875..487ed376d 100644 --- a/collectors/ebpf.plugin/ebpf_disk.h +++ b/collectors/ebpf.plugin/ebpf_disk.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_DISK_H #define NETDATA_EBPF_DISK_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_DISK "disk" +#define NETDATA_EBPF_DISK_MODULE_DESC "Monitor disk latency independent of filesystem." #include "libnetdata/avl/avl.h" #include "libnetdata/ebpf/ebpf.h" @@ -54,10 +55,7 @@ typedef struct netdata_ebpf_disks { struct netdata_ebpf_disks *next; } netdata_ebpf_disks_t; -enum ebpf_disk_tables { - NETDATA_DISK_READ, - NETDATA_DISK_TMP -}; +enum ebpf_disk_tables { NETDATA_DISK_IO }; typedef struct block_key { uint32_t bin; diff --git a/collectors/ebpf.plugin/ebpf_fd.c b/collectors/ebpf.plugin/ebpf_fd.c index d39e6ae20..214d2955d 100644 --- a/collectors/ebpf.plugin/ebpf_fd.c +++ b/collectors/ebpf.plugin/ebpf_fd.c @@ -57,6 +57,10 @@ netdata_ebpf_targets_t fd_targets[] = { {.name = "open", .mode = EBPF_LOAD_TRAMP {.name = "close", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; +#ifdef NETDATA_DEV_MODE +int fd_disable_priority; +#endif + #ifdef LIBBPF_MAJOR_VERSION /** * Disable probe @@ -326,7 +330,7 @@ static inline int ebpf_fd_load_and_attach(struct fd_bpf *obj, ebpf_module_t *em) netdata_ebpf_program_loaded_t test = mt[NETDATA_FD_SYSCALL_OPEN].mode; if (ebpf_fd_set_target_values()) { - error("%s file descriptor.", NETDATA_EBPF_DEFAULT_FNT_NOT_FOUND); + netdata_log_error("%s file descriptor.", NETDATA_EBPF_DEFAULT_FNT_NOT_FOUND); return -1; } @@ -369,6 +373,170 @@ static inline int ebpf_fd_load_and_attach(struct fd_bpf *obj, ebpf_module_t *em) * *****************************************************************/ +static void ebpf_obsolete_specific_fd_charts(char *type, ebpf_module_t *em); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_fd_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_FILE_OPEN, + "Number of open files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CGROUP_FD_OPEN_CONTEXT, + 20061, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, + "Fails to open files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT, + 20062, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_FILE_CLOSED, + "Files closed", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CGROUP_FD_CLOSE_CONTEXT, + 20063, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, + "Fails to close files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT, + 20064, + em->update_every); + } +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_fd_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_fd_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_fd_charts(ect->name, em); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_fd_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_FILE_OPEN, + "Number of open files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20061, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, + "Fails to open files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20062, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_FILE_CLOSED, + "Files closed", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20063, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, + "Fails to close files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20064, + em->update_every); + } +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_fd_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_FILE_OPEN_CLOSE_COUNT, + "Open and close calls", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_EBPF_FD_CHARTS, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_FILE_OPEN_ERR_COUNT, + "Open fails", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_EBPF_FD_CHARTS + 1, + em->update_every); + } +} + /** * FD Exit * @@ -380,15 +548,46 @@ static void ebpf_fd_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_fd_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_fd_apps_charts(em); + } + + ebpf_obsolete_fd_global(em); + +#ifdef NETDATA_DEV_MODE + if (ebpf_aral_fd_pid) + ebpf_statistic_obsolete_aral_chart(em, fd_disable_priority); +#endif + + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (fd_bpf_obj) + if (fd_bpf_obj) { fd_bpf__destroy(fd_bpf_obj); + fd_bpf_obj = NULL; + } #endif - if (em->objects) + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -935,7 +1134,9 @@ static void fd_collector(ebpf_module_t *em) int update_every = em->update_every; int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -968,6 +1169,15 @@ static void fd_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -1066,6 +1276,8 @@ static void ebpf_create_fd_global_charts(ebpf_module_t *em) NETDATA_FD_SYSCALL_END, em->update_every, NETDATA_EBPF_MODULE_NAME_FD); } + + fflush(stdout); } /***************************************************************** @@ -1125,7 +1337,7 @@ static int ebpf_fd_load_bpf(ebpf_module_t *em) #endif if (ret) - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); return ret; } @@ -1165,10 +1377,10 @@ void *ebpf_fd_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_create_fd_global_charts(em); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); #ifdef NETDATA_DEV_MODE if (ebpf_aral_fd_pid) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_FD_ARAL_NAME, em); + fd_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_FD_ARAL_NAME, em); #endif pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_fd.h b/collectors/ebpf.plugin/ebpf_fd.h index 85dfd36ea..00986673e 100644 --- a/collectors/ebpf.plugin/ebpf_fd.h +++ b/collectors/ebpf.plugin/ebpf_fd.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_FD_H #define NETDATA_EBPF_FD_H 1 -// Module name +// Module name & File description #define NETDATA_EBPF_MODULE_NAME_FD "filedescriptor" +#define NETDATA_EBPF_FD_MODULE_DESC "Monitor when files are open and closed. This thread is integrated with apps and cgroup." // Menu group #define NETDATA_FILE_GROUP "file_access" diff --git a/collectors/ebpf.plugin/ebpf_filesystem.c b/collectors/ebpf.plugin/ebpf_filesystem.c index 63f592eb9..2bff738ca 100644 --- a/collectors/ebpf.plugin/ebpf_filesystem.c +++ b/collectors/ebpf.plugin/ebpf_filesystem.c @@ -131,6 +131,202 @@ static netdata_publish_syscall_t filesystem_publish_aggregated[NETDATA_EBPF_HIST char **dimensions = NULL; static netdata_idx_t *filesystem_hash_values = NULL; +#ifdef LIBBPF_MAJOR_VERSION +/** + * FS disable kprobe + * + * Disable kprobes, because system will use trampolines. + * We are not calling this function for while, because we are prioritizing kprobes. We opted by this road, because + * distribution are still not deliverying necessary btf files per FS. + * + * @param obj FS object loaded. + */ +static void ebpf_fs_disable_kprobe(struct filesystem_bpf *obj) + { + // kprobe + bpf_program__set_autoload(obj->progs.netdata_fs_file_read_probe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_write_probe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_open_probe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_2nd_file_open_probe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_getattr_probe, false); + // kretprobe + bpf_program__set_autoload(obj->progs.netdata_fs_file_read_retprobe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_write_retprobe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_open_retprobe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_2nd_file_open_retprobe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_getattr_retprobe, false); + } + + /** + * Disable trampoline + * + * Disable trampolines to use kprobes. + * + * @param obj FS object loaded. + */ + static void ebpf_fs_disable_trampoline(struct filesystem_bpf *obj) + { + // entry + bpf_program__set_autoload(obj->progs.netdata_fs_file_read_entry, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_write_entry, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_open_entry, false); + bpf_program__set_autoload(obj->progs.netdata_fs_getattr_entry, false); + bpf_program__set_autoload(obj->progs.netdata_fs_2nd_file_open_entry, false); + + // exit + bpf_program__set_autoload(obj->progs.netdata_fs_file_read_exit, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_write_exit, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_open_exit, false); + bpf_program__set_autoload(obj->progs.netdata_fs_getattr_exit, false); + bpf_program__set_autoload(obj->progs.netdata_fs_2nd_file_open_exit, false); + } + + /** + * Set targets + * + * Set targets for each objects. + * + * @param obj FS object loaded. + * @param functions array with function names. + */ + static void ebpf_fs_set_target(struct filesystem_bpf *obj, const char **functions) +{ + // entry + bpf_program__set_attach_target(obj->progs.netdata_fs_file_read_entry, 0, + functions[NETDATA_KEY_BTF_READ]); + bpf_program__set_attach_target(obj->progs.netdata_fs_file_write_entry, 0, + functions[NETDATA_KEY_BTF_WRITE]); + bpf_program__set_attach_target(obj->progs.netdata_fs_file_open_entry, 0, + functions[NETDATA_KEY_BTF_OPEN]); + bpf_program__set_attach_target(obj->progs.netdata_fs_getattr_entry, 0, + functions[NETDATA_KEY_BTF_SYNC_ATTR]); + + // exit + bpf_program__set_attach_target(obj->progs.netdata_fs_file_read_exit, 0, + functions[NETDATA_KEY_BTF_READ]); + bpf_program__set_attach_target(obj->progs.netdata_fs_file_write_exit, 0, + functions[NETDATA_KEY_BTF_WRITE]); + bpf_program__set_attach_target(obj->progs.netdata_fs_file_open_exit, 0, + functions[NETDATA_KEY_BTF_OPEN]); + bpf_program__set_attach_target(obj->progs.netdata_fs_getattr_exit, 0, + functions[NETDATA_KEY_BTF_SYNC_ATTR]); + + if (functions[NETDATA_KEY_BTF_OPEN2]) { + bpf_program__set_attach_target(obj->progs.netdata_fs_2nd_file_open_entry, 0, + functions[NETDATA_KEY_BTF_OPEN2]); + bpf_program__set_attach_target(obj->progs.netdata_fs_2nd_file_open_exit, 0, + functions[NETDATA_KEY_BTF_OPEN2]); + } else { + bpf_program__set_autoload(obj->progs.netdata_fs_2nd_file_open_entry, false); + bpf_program__set_autoload(obj->progs.netdata_fs_2nd_file_open_exit, false); + } +} + +/** + * Attach Kprobe + * + * Attach kprobe on targets + * + * @param obj FS object loaded. + * @param functions array with function names. + */ +static int ebpf_fs_attach_kprobe(struct filesystem_bpf *obj, const char **functions) +{ + // kprobe + obj->links.netdata_fs_file_read_probe = bpf_program__attach_kprobe(obj->progs.netdata_fs_file_read_probe, + false, functions[NETDATA_KEY_BTF_READ]); + if (libbpf_get_error(obj->links.netdata_fs_file_read_probe)) + return -1; + + obj->links.netdata_fs_file_write_probe = bpf_program__attach_kprobe(obj->progs.netdata_fs_file_write_probe, + false, functions[NETDATA_KEY_BTF_WRITE]); + if (libbpf_get_error(obj->links.netdata_fs_file_write_probe)) + return -1; + + obj->links.netdata_fs_file_open_probe = bpf_program__attach_kprobe(obj->progs.netdata_fs_file_open_probe, + false, functions[NETDATA_KEY_BTF_OPEN]); + if (libbpf_get_error(obj->links.netdata_fs_file_open_probe)) + return -1; + + obj->links.netdata_fs_getattr_probe = bpf_program__attach_kprobe(obj->progs.netdata_fs_getattr_probe, + false, functions[NETDATA_KEY_BTF_SYNC_ATTR]); + if (libbpf_get_error(obj->links.netdata_fs_getattr_probe)) + return -1; + + // kretprobe + obj->links.netdata_fs_file_read_retprobe = bpf_program__attach_kprobe(obj->progs.netdata_fs_file_read_retprobe, + false, functions[NETDATA_KEY_BTF_READ]); + if (libbpf_get_error(obj->links.netdata_fs_file_read_retprobe)) + return -1; + + obj->links.netdata_fs_file_write_retprobe = bpf_program__attach_kprobe(obj->progs.netdata_fs_file_write_retprobe, + false, functions[NETDATA_KEY_BTF_WRITE]); + if (libbpf_get_error(obj->links.netdata_fs_file_write_retprobe)) + return -1; + + obj->links.netdata_fs_file_open_retprobe = bpf_program__attach_kprobe(obj->progs.netdata_fs_file_open_retprobe, + false, functions[NETDATA_KEY_BTF_OPEN]); + if (libbpf_get_error(obj->links.netdata_fs_file_open_retprobe)) + return -1; + + obj->links.netdata_fs_getattr_retprobe = bpf_program__attach_kprobe(obj->progs.netdata_fs_getattr_retprobe, + false, functions[NETDATA_KEY_BTF_SYNC_ATTR]); + if (libbpf_get_error(obj->links.netdata_fs_getattr_retprobe)) + return -1; + + if (functions[NETDATA_KEY_BTF_OPEN2]) { + obj->links.netdata_fs_2nd_file_open_probe = bpf_program__attach_kprobe(obj->progs.netdata_fs_2nd_file_open_probe, + false, functions[NETDATA_KEY_BTF_OPEN2]); + if (libbpf_get_error(obj->links.netdata_fs_2nd_file_open_probe)) + return -1; + + obj->links.netdata_fs_2nd_file_open_retprobe = bpf_program__attach_kprobe(obj->progs.netdata_fs_2nd_file_open_retprobe, + false, functions[NETDATA_KEY_BTF_OPEN2]); + if (libbpf_get_error(obj->links.netdata_fs_2nd_file_open_retprobe)) + return -1; + } + + return 0; +} + +/** + * Load and Attach + * + * Load binary and attach to targets. + * + * @param map Structure with information about maps. + * @param obj FS object loaded. + * @param functions array with function names. + * @param bf sttruct with btf file loaded. + */ +static inline int ebpf_fs_load_and_attach(ebpf_local_maps_t *map, struct filesystem_bpf *obj, + const char **functions, struct btf *bf) +{ + if (bf) { + ebpf_fs_disable_kprobe(obj); + ebpf_fs_set_target(obj, functions); + } else { + ebpf_fs_disable_trampoline(obj); + } + + int ret = filesystem_bpf__load(obj); + if (ret) { + fprintf(stderr, "failed to load BPF object: %d\n", ret); + return -1; + } + + if (bf) + ret = filesystem_bpf__attach(obj); + else + ret = ebpf_fs_attach_kprobe(obj, functions); + + if (!ret) + map->map_fd = bpf_map__fd(obj->maps.tbl_fs);; + + return ret; +} +#endif + /***************************************************************** * * COMMON FUNCTIONS @@ -199,13 +395,15 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(chart_name, 63, "%s_read_latency", efp->filesystem); efp->hread.name = strdupz(chart_name); efp->hread.title = strdupz(title); + efp->hread.ctx = NULL; efp->hread.order = order; efp->family_name = strdupz(family); ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hread.name, - title, - EBPF_COMMON_DIMENSION_CALL, family, - "filesystem.read_latency", NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + efp->hread.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + "filesystem.read_latency", NETDATA_EBPF_CHART_TYPE_STACKED, order, + ebpf_create_global_dimension, filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); order++; @@ -214,11 +412,13 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(chart_name, 63, "%s_write_latency", efp->filesystem); efp->hwrite.name = strdupz(chart_name); efp->hwrite.title = strdupz(title); + efp->hwrite.ctx = NULL; efp->hwrite.order = order; ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hwrite.name, - title, - EBPF_COMMON_DIMENSION_CALL, family, - "filesystem.write_latency", NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + efp->hwrite.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + "filesystem.write_latency", NETDATA_EBPF_CHART_TYPE_STACKED, order, + ebpf_create_global_dimension, filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); order++; @@ -227,11 +427,13 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(chart_name, 63, "%s_open_latency", efp->filesystem); efp->hopen.name = strdupz(chart_name); efp->hopen.title = strdupz(title); + efp->hopen.ctx = NULL; efp->hopen.order = order; ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, - title, - EBPF_COMMON_DIMENSION_CALL, family, - "filesystem.open_latency", NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + efp->hopen.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + "filesystem.open_latency", NETDATA_EBPF_CHART_TYPE_STACKED, order, + ebpf_create_global_dimension, filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); order++; @@ -242,9 +444,10 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(ctx, 63, "filesystem.%s_latency", type); efp->hadditional.name = strdupz(chart_name); efp->hadditional.title = strdupz(title); + efp->hadditional.ctx = strdupz(ctx); efp->hadditional.order = order; - ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name, title, - EBPF_COMMON_DIMENSION_CALL, family, + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name, efp->hadditional.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, ctx, NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); @@ -252,6 +455,8 @@ static void ebpf_create_fs_charts(int update_every) efp->flags |= NETDATA_FILESYSTEM_FLAG_CHART_CREATED; } } + + fflush(stdout); } /** @@ -263,6 +468,7 @@ static void ebpf_create_fs_charts(int update_every) */ int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em) { + pthread_mutex_lock(&lock); int i; const char *saved_name = em->thread_name; uint64_t kernels = em->kernels; @@ -275,17 +481,32 @@ int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em) #ifdef LIBBPF_MAJOR_VERSION ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); #endif - efp->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &efp->objects); - if (!efp->probe_links) { - em->thread_name = saved_name; - em->kernels = kernels; - em->maps = NULL; - return -1; + if (em->load & EBPF_LOAD_LEGACY) { + efp->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &efp->objects); + if (!efp->probe_links) { + em->thread_name = saved_name; + em->kernels = kernels; + em->maps = NULL; + pthread_mutex_unlock(&lock); + return -1; + } } +#ifdef LIBBPF_MAJOR_VERSION + else { + efp->fs_obj = filesystem_bpf__open(); + if (!efp->fs_obj) { + em->thread_name = saved_name; + em->kernels = kernels; + return -1; + } else { + if (ebpf_fs_load_and_attach(em->maps, efp->fs_obj, + efp->functions, NULL)) + return -1; + } + } +#endif efp->flags |= NETDATA_FILESYSTEM_FLAG_HAS_PARTITION; - pthread_mutex_lock(&lock); ebpf_update_kernel_memory(&plugin_statistics, efp->fs_maps, EBPF_ACTION_STAT_ADD); - pthread_mutex_unlock(&lock); // Nedeed for filesystems like btrfs if ((efp->flags & NETDATA_FILESYSTEM_FILL_ADDRESS_TABLE) && (efp->addresses.function)) { @@ -295,6 +516,7 @@ int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em) efp->flags &= ~NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM; } em->thread_name = saved_name; + pthread_mutex_unlock(&lock); em->kernels = kernels; em->maps = NULL; @@ -405,43 +627,88 @@ void ebpf_filesystem_cleanup_ebpf_data() ebpf_filesystem_partitions_t *efp = &localfs[i]; if (efp->probe_links) { freez(efp->family_name); + efp->family_name = NULL; freez(efp->hread.name); + efp->hread.name = NULL; freez(efp->hread.title); + efp->hread.title = NULL; freez(efp->hwrite.name); + efp->hwrite.name = NULL; freez(efp->hwrite.title); + efp->hwrite.title = NULL; freez(efp->hopen.name); + efp->hopen.name = NULL; freez(efp->hopen.title); + efp->hopen.title = NULL; freez(efp->hadditional.name); + efp->hadditional.name = NULL; freez(efp->hadditional.title); + efp->hadditional.title = NULL; + freez(efp->hadditional.ctx); + efp->hadditional.ctx = NULL; } } } /** - * Filesystem Free + * Obsolete global * - * Cleanup variables after child threads to stop + * Obsolete global charts created by thread. * - * @param ptr thread data. + * @param em a pointer to `struct ebpf_module` */ -static void ebpf_filesystem_free(ebpf_module_t *em) +static void ebpf_obsolete_filesystem_global(ebpf_module_t *em) { - pthread_mutex_lock(&ebpf_exit_cleanup); - em->enabled = NETDATA_THREAD_EBPF_STOPPING; - pthread_mutex_unlock(&ebpf_exit_cleanup); - - ebpf_filesystem_cleanup_ebpf_data(); - if (dimensions) - ebpf_histogram_dimension_cleanup(dimensions, NETDATA_EBPF_HIST_MAX_BINS); - freez(filesystem_hash_values); + int i; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + if (!efp->objects) + continue; - pthread_mutex_lock(&ebpf_exit_cleanup); - em->enabled = NETDATA_THREAD_EBPF_STOPPED; - pthread_mutex_unlock(&ebpf_exit_cleanup); + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + efp->hread.name, + efp->hread.title, + EBPF_COMMON_DIMENSION_CALL, + efp->family_name, + NETDATA_EBPF_CHART_TYPE_STACKED, + "filesystem.read_latency", + efp->hread.order, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + efp->hwrite.name, + efp->hwrite.title, + EBPF_COMMON_DIMENSION_CALL, + efp->family_name, + NETDATA_EBPF_CHART_TYPE_STACKED, + "filesystem.write_latency", + efp->hwrite.order, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + efp->hopen.name, + efp->hopen.title, + EBPF_COMMON_DIMENSION_CALL, + efp->family_name, + NETDATA_EBPF_CHART_TYPE_STACKED, + "filesystem.open_latency", + efp->hopen.order, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + efp->hadditional.name, + efp->hadditional.title, + EBPF_COMMON_DIMENSION_CALL, + efp->family_name, + NETDATA_EBPF_CHART_TYPE_STACKED, + efp->hadditional.ctx, + efp->hadditional.order, + em->update_every); + } } /** @@ -454,7 +721,39 @@ static void ebpf_filesystem_free(ebpf_module_t *em) static void ebpf_filesystem_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - ebpf_filesystem_free(em); + + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + ebpf_obsolete_filesystem_global(em); + + pthread_mutex_unlock(&lock); + fflush(stdout); + } + + ebpf_filesystem_cleanup_ebpf_data(); + if (dimensions) { + ebpf_histogram_dimension_cleanup(dimensions, NETDATA_EBPF_HIST_MAX_BINS); + dimensions = NULL; + } + + freez(filesystem_hash_values); + + int i; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + if (!efp->probe_links) + continue; + + ebpf_unload_legacy_code(efp->objects, efp->probe_links); + efp->objects = NULL; + efp->probe_links = NULL; + efp->flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION; + } + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&ebpf_exit_cleanup); } /***************************************************************** @@ -608,7 +907,9 @@ static void filesystem_collector(ebpf_module_t *em) heartbeat_t hb; heartbeat_init(&hb); int counter = update_every - 1; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -622,6 +923,15 @@ static void filesystem_collector(ebpf_module_t *em) ebpf_histogram_send_data(); pthread_mutex_unlock(&lock); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -683,9 +993,12 @@ void *ebpf_filesystem_thread(void *ptr) // Initialize optional as zero, to identify when there are not partitions to monitor em->optional = 0; +#ifdef LIBBPF_MAJOR_VERSION + ebpf_adjust_thread_load(em, default_btf); +#endif if (ebpf_update_partitions(em)) { if (em->optional) - info("Netdata cannot monitor the filesystems used on this host."); + netdata_log_info("Netdata cannot monitor the filesystems used on this host."); goto endfilesystem; } diff --git a/collectors/ebpf.plugin/ebpf_filesystem.h b/collectors/ebpf.plugin/ebpf_filesystem.h index b1126badb..f58d7fbe4 100644 --- a/collectors/ebpf.plugin/ebpf_filesystem.h +++ b/collectors/ebpf.plugin/ebpf_filesystem.h @@ -3,10 +3,14 @@ #ifndef NETDATA_EBPF_FILESYSTEM_H #define NETDATA_EBPF_FILESYSTEM_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_FILESYSTEM "filesystem" +#define NETDATA_EBPF_FS_MODULE_DESC "Monitor filesystem latency for: btrfs, ext4, nfs, xfs and zfs." #include "ebpf.h" +#ifdef LIBBPF_MAJOR_VERSION +#include "includes/filesystem.skel.h" +#endif #define NETDATA_FS_MAX_DIST_NAME 64UL diff --git a/collectors/ebpf.plugin/ebpf_functions.c b/collectors/ebpf.plugin/ebpf_functions.c new file mode 100644 index 000000000..cc26044c4 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_functions.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_functions.h" + +/***************************************************************** + * EBPF SELECT MODULE + *****************************************************************/ + +/** + * Select Module + * + * @param thread_name name of the thread we are looking for. + * + * @return it returns a pointer for the module that has thread_name on success or NULL otherwise. + */ +ebpf_module_t *ebpf_functions_select_module(const char *thread_name) { + int i; + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + if (strcmp(ebpf_modules[i].thread_name, thread_name) == 0) { + return &ebpf_modules[i]; + } + } + + return NULL; +} + +/***************************************************************** + * EBPF HELP FUNCTIONS + *****************************************************************/ + +/** + * Thread Help + * + * Shows help with all options accepted by thread function. + * + * @param transaction the transaction id that Netdata sent for this function execution +*/ +static void ebpf_function_thread_manipulation_help(const char *transaction) { + pthread_mutex_lock(&lock); + pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600); + fprintf(stdout, "%s", + "ebpf.plugin / thread\n" + "\n" + "Function `thread` allows user to control eBPF threads.\n" + "\n" + "The following filters are supported:\n" + "\n" + " thread:NAME\n" + " Shows information for the thread NAME. Names are listed inside `ebpf.d.conf`.\n" + "\n" + " enable:NAME:PERIOD\n" + " Enable a specific thread named `NAME` to run a specific PERIOD in seconds. When PERIOD is not\n" + " specified plugin will use the default 300 seconds\n" + "\n" + " disable:NAME\n" + " Disable a sp.\n" + "\n" + "Filters can be combined. Each filter can be given only one time.\n" + "Process thread is not controlled by functions until we finish the creation of functions per thread..\n" + ); + pluginsd_function_result_end_to_stdout(); + fflush(stdout); + pthread_mutex_unlock(&lock); +} + + +/***************************************************************** + * EBPF ERROR FUNCTIONS + *****************************************************************/ + +/** + * Function error + * + * Show error when a wrong function is given + * + * @param transaction the transaction id that Netdata sent for this function execution + * @param code the error code to show with the message. + * @param msg the error message + */ +static void ebpf_function_error(const char *transaction, int code, const char *msg) { + char buffer[PLUGINSD_LINE_MAX + 1]; + json_escape_string(buffer, msg, PLUGINSD_LINE_MAX); + + pluginsd_function_result_begin_to_stdout(transaction, code, "application/json", now_realtime_sec()); + fprintf(stdout, "{\"status\":%d,\"error_message\":\"%s\"}", code, buffer); + pluginsd_function_result_end_to_stdout(); +} + +/***************************************************************** + * EBPF THREAD FUNCTION + *****************************************************************/ + +/** + * Function enable + * + * Enable a specific thread. + * + * @param transaction the transaction id that Netdata sent for this function execution + * @param function function name and arguments given to thread. + * @param line_buffer buffer used to parse args + * @param line_max Number of arguments given + * @param timeout The function timeout + * @param em The structure with thread information + */ +static void ebpf_function_thread_manipulation(const char *transaction, + char *function __maybe_unused, + char *line_buffer __maybe_unused, + int line_max __maybe_unused, + int timeout __maybe_unused, + ebpf_module_t *em) +{ + char *words[PLUGINSD_MAX_WORDS] = { NULL }; + char message[512]; + uint32_t show_specific_thread = 0; + size_t num_words = quoted_strings_splitter_pluginsd(function, words, PLUGINSD_MAX_WORDS); + for(int i = 1; i < PLUGINSD_MAX_WORDS ;i++) { + const char *keyword = get_word(words, num_words, i); + if (!keyword) + break; + + ebpf_module_t *lem; + if(strncmp(keyword, EBPF_THREADS_ENABLE_CATEGORY, sizeof(EBPF_THREADS_ENABLE_CATEGORY) -1) == 0) { + char thread_name[128]; + int period = -1; + const char *name = &keyword[sizeof(EBPF_THREADS_ENABLE_CATEGORY) - 1]; + char *separator = strchr(name, ':'); + if (separator) { + strncpyz(thread_name, name, separator - name); + period = str2i(++separator); + } else { + strncpyz(thread_name, name, strlen(name)); + } + + lem = ebpf_functions_select_module(thread_name); + if (!lem) { + snprintfz(message, 511, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name); + ebpf_function_error(transaction, HTTP_RESP_NOT_FOUND, message); + return; + } + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (lem->enabled > NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + struct netdata_static_thread *st = lem->thread; + // Load configuration again + ebpf_update_module(lem, default_btf, running_on_kernel, isrh); + + // another request for thread that already ran, cleanup and restart + if (st->thread) + freez(st->thread); + + if (period <= 0) + period = EBPF_DEFAULT_LIFETIME; + + st->thread = mallocz(sizeof(netdata_thread_t)); + lem->enabled = NETDATA_THREAD_EBPF_FUNCTION_RUNNING; + lem->lifetime = period; + +#ifdef NETDATA_INTERNAL_CHECKS + netdata_log_info("Starting thread %s with lifetime = %d", thread_name, period); +#endif + + netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, + st->start_routine, lem); + } else { + lem->running_time = 0; + if (period > 0) // user is modifying period to run + lem->lifetime = period; +#ifdef NETDATA_INTERNAL_CHECKS + netdata_log_info("Thread %s had lifetime updated for %d", thread_name, period); +#endif + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + } else if(strncmp(keyword, EBPF_THREADS_DISABLE_CATEGORY, sizeof(EBPF_THREADS_DISABLE_CATEGORY) -1) == 0) { + const char *name = &keyword[sizeof(EBPF_THREADS_DISABLE_CATEGORY) - 1]; + lem = ebpf_functions_select_module(name); + if (!lem) { + snprintfz(message, 511, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name); + ebpf_function_error(transaction, HTTP_RESP_NOT_FOUND, message); + return; + } + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (lem->enabled < NETDATA_THREAD_EBPF_STOPPING && lem->thread->thread) { + lem->lifetime = 0; + lem->running_time = lem->update_every; + netdata_thread_cancel(*lem->thread->thread); + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + } else if(strncmp(keyword, EBPF_THREADS_SELECT_THREAD, sizeof(EBPF_THREADS_SELECT_THREAD) -1) == 0) { + const char *name = &keyword[sizeof(EBPF_THREADS_SELECT_THREAD) - 1]; + lem = ebpf_functions_select_module(name); + if (!lem) { + snprintfz(message, 511, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name); + ebpf_function_error(transaction, HTTP_RESP_NOT_FOUND, message); + return; + } + + show_specific_thread |= 1<thread_id; + } else if(strncmp(keyword, "help", 4) == 0) { + ebpf_function_thread_manipulation_help(transaction); + return; + } + } + + time_t expires = now_realtime_sec() + em->update_every; + + BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX, NULL); + buffer_json_initialize(wb, "\"", "\"", 0, true, false); + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + buffer_json_member_add_string(wb, "type", "table"); + buffer_json_member_add_time_t(wb, "update_every", em->update_every); + buffer_json_member_add_string(wb, "help", EBPF_PLUGIN_THREAD_FUNCTION_DESCRIPTION); + + // Collect data + buffer_json_member_add_array(wb, "data"); + int i; + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + if (show_specific_thread && !(show_specific_thread & 1<thread_name); + + // description + buffer_json_add_array_item_string(wb, wem->thread_description); + // Either it is not running or received a disabled signal and it is stopping. + if (wem->enabled > NETDATA_THREAD_EBPF_FUNCTION_RUNNING || + (!wem->lifetime && (int)wem->running_time == wem->update_every)) { + // status + buffer_json_add_array_item_string(wb, EBPF_THREAD_STATUS_STOPPED); + + // Time remaining + buffer_json_add_array_item_uint64(wb, 0); + + // action + buffer_json_add_array_item_string(wb, "NULL"); + } else { + // status + buffer_json_add_array_item_string(wb, EBPF_THREAD_STATUS_RUNNING); + + // Time remaining + buffer_json_add_array_item_uint64(wb, (wem->lifetime) ? (wem->lifetime - wem->running_time) : 0); + + // action + buffer_json_add_array_item_string(wb, "Enabled/Disabled"); + } + + buffer_json_array_close(wb); + } + + buffer_json_array_close(wb); // data + + buffer_json_member_add_object(wb, "columns"); + { + int fields_id = 0; + + // IMPORTANT! + // THE ORDER SHOULD BE THE SAME WITH THE VALUES! + buffer_rrdf_table_add_field(wb, fields_id++, "Thread", "Thread Name", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL); + + buffer_rrdf_table_add_field(wb, fields_id++, "Description", "Thread Desc", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL); + + buffer_rrdf_table_add_field(wb, fields_id++, "Status", "Thread Status", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL); + + buffer_rrdf_table_add_field(wb, fields_id++, "Time", "Time Remaining", RRDF_FIELD_TYPE_INTEGER, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, 0, NULL, + NAN, RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, fields_id++, "Action", "Thread Action", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL); + } + buffer_json_object_close(wb); // columns + + buffer_json_member_add_string(wb, "default_sort_column", "Thread"); + + buffer_json_member_add_object(wb, "charts"); + { + // Threads + buffer_json_member_add_object(wb, "eBPFThreads"); + { + buffer_json_member_add_string(wb, "name", "Threads"); + buffer_json_member_add_string(wb, "type", "line"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Threads"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + // Life Time + buffer_json_member_add_object(wb, "eBPFLifeTime"); + { + buffer_json_member_add_string(wb, "name", "LifeTime"); + buffer_json_member_add_string(wb, "type", "line"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Threads"); + buffer_json_add_array_item_string(wb, "Time"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // charts + + // Do we use only on fields that can be groupped? + buffer_json_member_add_object(wb, "group_by"); + { + // group by Status + buffer_json_member_add_object(wb, "Status"); + { + buffer_json_member_add_string(wb, "name", "Thread status"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Status"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // group_by + + buffer_json_member_add_time_t(wb, "expires", expires); + buffer_json_finalize(wb); + + // Lock necessary to avoid race condition + pthread_mutex_lock(&lock); + pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "application/json", expires); + + fwrite(buffer_tostring(wb), buffer_strlen(wb), 1, stdout); + + pluginsd_function_result_end_to_stdout(); + fflush(stdout); + pthread_mutex_unlock(&lock); + + buffer_free(wb); +} + + +/***************************************************************** + * EBPF FUNCTION THREAD + *****************************************************************/ + +/** + * FUNCTION thread. + * + * @param ptr a `ebpf_module_t *`. + * + * @return always NULL. + */ +void *ebpf_function_thread(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + char buffer[PLUGINSD_LINE_MAX + 1]; + + char *s = NULL; + while(!ebpf_exit_plugin && (s = fgets(buffer, PLUGINSD_LINE_MAX, stdin))) { + char *words[PLUGINSD_MAX_WORDS] = { NULL }; + size_t num_words = quoted_strings_splitter_pluginsd(buffer, words, PLUGINSD_MAX_WORDS); + + const char *keyword = get_word(words, num_words, 0); + + if(keyword && strcmp(keyword, PLUGINSD_KEYWORD_FUNCTION) == 0) { + char *transaction = get_word(words, num_words, 1); + char *timeout_s = get_word(words, num_words, 2); + char *function = get_word(words, num_words, 3); + + if(!transaction || !*transaction || !timeout_s || !*timeout_s || !function || !*function) { + netdata_log_error("Received incomplete %s (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.", + keyword, + transaction?transaction:"(unset)", + timeout_s?timeout_s:"(unset)", + function?function:"(unset)"); + } + else { + int timeout = str2i(timeout_s); + if (!strncmp(function, EBPF_FUNCTION_THREAD, sizeof(EBPF_FUNCTION_THREAD) - 1)) + ebpf_function_thread_manipulation(transaction, + function, + buffer, + PLUGINSD_LINE_MAX + 1, + timeout, + em); + else + ebpf_function_error(transaction, + HTTP_RESP_NOT_FOUND, + "No function with this name found in ebpf.plugin."); + } + } + else + netdata_log_error("Received unknown command: %s", keyword ? keyword : "(unset)"); + } + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_functions.h b/collectors/ebpf.plugin/ebpf_functions.h new file mode 100644 index 000000000..b20dab634 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_functions.h @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_FUNCTIONS_H +#define NETDATA_EBPF_FUNCTIONS_H 1 + +// configuration file & description +#define NETDATA_DIRECTORY_FUNCTIONS_CONFIG_FILE "functions.conf" +#define NETDATA_EBPF_FUNCTIONS_MODULE_DESC "Show information about current function status." + +// function list +#define EBPF_FUNCTION_THREAD "ebpf_thread" + +#define EBPF_PLUGIN_THREAD_FUNCTION_DESCRIPTION "Detailed information about eBPF threads." +#define EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND "ebpf.plugin does not have thread named " + +#define EBPF_PLUGIN_FUNCTIONS(NAME, DESC) do { \ + fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " \"" NAME "\" 10 \"%s\"\n", DESC); \ +} while(0) + +#define EBPF_THREADS_SELECT_THREAD "thread:" +#define EBPF_THREADS_ENABLE_CATEGORY "enable:" +#define EBPF_THREADS_DISABLE_CATEGORY "disable:" + +#define EBPF_THREAD_STATUS_RUNNING "running" +#define EBPF_THREAD_STATUS_STOPPED "stopped" + +void *ebpf_function_thread(void *ptr); + +#endif diff --git a/collectors/ebpf.plugin/ebpf_hardirq.c b/collectors/ebpf.plugin/ebpf_hardirq.c index f714c261c..9092c7ac3 100644 --- a/collectors/ebpf.plugin/ebpf_hardirq.c +++ b/collectors/ebpf.plugin/ebpf_hardirq.c @@ -9,8 +9,6 @@ struct config hardirq_config = { .first_section = NULL, .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, .rwlock = AVL_LOCK_INITIALIZER } }; -#define HARDIRQ_MAP_LATENCY 0 -#define HARDIRQ_MAP_LATENCY_STATIC 1 static ebpf_local_maps_t hardirq_maps[] = { { .name = "tbl_hardirq", @@ -138,6 +136,36 @@ static hardirq_static_val_t hardirq_static_vals[] = { // thread will write to netdata agent. static avl_tree_lock hardirq_pub; +#ifdef LIBBPF_MAJOR_VERSION +/** + * Set hash table + * + * Set the values for maps according the value given by kernel. + * + * @param obj is the main structure for bpf objects. + */ +static inline void ebpf_hardirq_set_hash_table(struct hardirq_bpf *obj) +{ + hardirq_maps[HARDIRQ_MAP_LATENCY].map_fd = bpf_map__fd(obj->maps.tbl_hardirq); + hardirq_maps[HARDIRQ_MAP_LATENCY_STATIC].map_fd = bpf_map__fd(obj->maps.tbl_hardirq_static); +} + +/** + * Load and Attach + * + * Load and attach bpf software. + */ +static inline int ebpf_hardirq_load_and_attach(struct hardirq_bpf *obj) +{ + int ret = hardirq_bpf__load(obj); + if (ret) { + return -1; + } + + return hardirq_bpf__attach(obj); +} +#endif + /***************************************************************** * * ARAL SECTION @@ -187,6 +215,27 @@ void ebpf_hardirq_release(hardirq_val_t *stat) * *****************************************************************/ +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_hardirq_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + "hardirq_latency", + "Hardware IRQ latency", + EBPF_COMMON_DIMENSION_MILLISECONDS, + "interrupts", + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + NETDATA_CHART_PRIO_HARDIRQ_LATENCY, + em->update_every + ); +} + /** * Hardirq Exit * @@ -198,8 +247,22 @@ static void hardirq_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (em->objects) + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + + ebpf_obsolete_hardirq_global(em); + + pthread_mutex_unlock(&lock); + fflush(stdout); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } for (int i = 0; hardirq_tracepoints[i].class != NULL; i++) { ebpf_disable_tracepoint(&hardirq_tracepoints[i]); @@ -207,6 +270,7 @@ static void hardirq_exit(void *ptr) pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -378,7 +442,7 @@ static int hardirq_read_latency_map(int mapfd) avl_t *check = avl_insert_lock(&hardirq_pub, (avl_t *)v); if (check != (avl_t *)v) { - error("Internal error, cannot insert the AVL tree."); + netdata_log_error("Internal error, cannot insert the AVL tree."); } } @@ -505,7 +569,7 @@ static void hardirq_collector(ebpf_module_t *em) hardirq_create_charts(em->update_every); hardirq_create_static_dims(); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); // loop and read from published data until ebpf plugin is closed. @@ -514,7 +578,9 @@ static void hardirq_collector(ebpf_module_t *em) int update_every = em->update_every; int counter = update_every - 1; //This will be cancelled by its parent - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -533,6 +599,15 @@ static void hardirq_collector(ebpf_module_t *em) write_end_chart(); pthread_mutex_unlock(&lock); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -540,6 +615,40 @@ static void hardirq_collector(ebpf_module_t *em) * EBPF HARDIRQ THREAD *****************************************************************/ +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_hardirq_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + hardirq_bpf_obj = hardirq_bpf__open(); + if (!hardirq_bpf_obj) + ret = -1; + else { + ret = ebpf_hardirq_load_and_attach(hardirq_bpf_obj); + if (!ret) + ebpf_hardirq_set_hash_table(hardirq_bpf_obj); + } + } +#endif + + return ret; +} + /** * Hard IRQ latency thread. * @@ -559,9 +668,9 @@ void *ebpf_hardirq_thread(void *ptr) #ifdef LIBBPF_MAJOR_VERSION ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); + ebpf_adjust_thread_load(em, default_btf); #endif - em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); - if (!em->probe_links) { + if (ebpf_hardirq_load_bpf(em)) { goto endhardirq; } diff --git a/collectors/ebpf.plugin/ebpf_hardirq.h b/collectors/ebpf.plugin/ebpf_hardirq.h index 52dea1e56..35b03b761 100644 --- a/collectors/ebpf.plugin/ebpf_hardirq.h +++ b/collectors/ebpf.plugin/ebpf_hardirq.h @@ -3,6 +3,9 @@ #ifndef NETDATA_EBPF_HARDIRQ_H #define NETDATA_EBPF_HARDIRQ_H 1 +// Module description +#define NETDATA_EBPF_HARDIRQ_MODULE_DESC "Show time spent servicing individual hardware interrupt requests (hard IRQs)." + #include #include "libnetdata/avl/avl.h" @@ -34,6 +37,11 @@ enum hardirq_ebpf_static { HARDIRQ_EBPF_STATIC_END }; +enum hardirq_maps { + HARDIRQ_MAP_LATENCY, + HARDIRQ_MAP_LATENCY_STATIC +}; + typedef struct hardirq_ebpf_static_val { uint64_t latency; uint64_t ts; diff --git a/collectors/ebpf.plugin/ebpf_mdflush.c b/collectors/ebpf.plugin/ebpf_mdflush.c index 65ed860a4..3548d673b 100644 --- a/collectors/ebpf.plugin/ebpf_mdflush.c +++ b/collectors/ebpf.plugin/ebpf_mdflush.c @@ -31,6 +31,10 @@ static ebpf_local_maps_t mdflush_maps[] = { } }; +netdata_ebpf_targets_t mdflush_targets[] = { {.name = "md_flush_request", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; + + // store for "published" data from the reader thread, which the collector // thread will write to netdata agent. static avl_tree_lock mdflush_pub; @@ -38,6 +42,113 @@ static avl_tree_lock mdflush_pub; // tmp store for mdflush values we get from a per-CPU eBPF map. static mdflush_ebpf_val_t *mdflush_ebpf_vals = NULL; +#ifdef LIBBPF_MAJOR_VERSION +/** + * Disable probes + * + * Disable probes to use trampolines. + * + * @param obj the loaded object structure. + */ +static inline void ebpf_disable_probes(struct mdflush_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_md_flush_request_kprobe, false); +} + +/** + * Disable trampolines + * + * Disable trampoliness to use probes. + * + * @param obj the loaded object structure. + */ +static inline void ebpf_disable_trampoline(struct mdflush_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_md_flush_request_fentry, false); +} + +/** + * Set Trampoline + * + * Define target to attach trampoline + * + * @param obj the loaded object structure. + */ +static void ebpf_set_trampoline_target(struct mdflush_bpf *obj) +{ + bpf_program__set_attach_target(obj->progs.netdata_md_flush_request_fentry, 0, + mdflush_targets[NETDATA_MD_FLUSH_REQUEST].name); +} + +/** + * Load probe + * + * Load probe to monitor internal function. + * + * @param obj the loaded object structure. + */ +static inline int ebpf_load_probes(struct mdflush_bpf *obj) +{ + obj->links.netdata_md_flush_request_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_md_flush_request_kprobe, + false, + mdflush_targets[NETDATA_MD_FLUSH_REQUEST].name); + return libbpf_get_error(obj->links.netdata_md_flush_request_kprobe); +} + +/** + * Load and Attach + * + * Load and attach bpf codes according user selection. + * + * @param obj the loaded object structure. + * @param em the structure with configuration + */ +static inline int ebpf_mdflush_load_and_attach(struct mdflush_bpf *obj, ebpf_module_t *em) +{ + int mode = em->targets[NETDATA_MD_FLUSH_REQUEST].mode; + if (mode == EBPF_LOAD_TRAMPOLINE) { // trampoline + ebpf_disable_probes(obj); + + ebpf_set_trampoline_target(obj); + } else // kprobe + ebpf_disable_trampoline(obj); + + int ret = mdflush_bpf__load(obj); + if (ret) { + fprintf(stderr, "failed to load BPF object: %d\n", ret); + return -1; + } + + if (mode == EBPF_LOAD_TRAMPOLINE) + ret = mdflush_bpf__attach(obj); + else + ret = ebpf_load_probes(obj); + + return ret; +} + +#endif + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_mdflush_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete("mdstat", + "mdstat_flush", + "MD flushes", + "flushes", + "flush (eBPF)", + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + NETDATA_CHART_PRIO_MDSTAT_FLUSH, + em->update_every); +} + /** * MDflush exit * @@ -49,11 +160,26 @@ static void mdflush_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (em->objects) + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + + ebpf_obsolete_mdflush_global(em); + + pthread_mutex_unlock(&lock); + fflush(stdout); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -150,7 +276,7 @@ static void mdflush_read_count_map(int maps_per_core) if (v_is_new) { avl_t *check = avl_insert_lock(&mdflush_pub, (avl_t *)v); if (check != (avl_t *)v) { - error("Internal error, cannot insert the AVL tree."); + netdata_log_error("Internal error, cannot insert the AVL tree."); } } } @@ -209,7 +335,7 @@ static void mdflush_collector(ebpf_module_t *em) pthread_mutex_lock(&lock); mdflush_create_charts(update_every); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); // loop and read from published data until ebpf plugin is closed. @@ -217,7 +343,9 @@ static void mdflush_collector(ebpf_module_t *em) heartbeat_init(&hb); int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -232,9 +360,61 @@ static void mdflush_collector(ebpf_module_t *em) write_end_chart(); pthread_mutex_unlock(&lock); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); + } +} + +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_mdflush_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + mdflush_bpf_obj = mdflush_bpf__open(); + if (!mdflush_bpf_obj) + ret = -1; + else { + ret = ebpf_mdflush_load_and_attach(mdflush_bpf_obj, em); + if (ret && em->targets[NETDATA_MD_FLUSH_REQUEST].mode == EBPF_LOAD_TRAMPOLINE) { + mdflush_bpf__destroy(mdflush_bpf_obj); + mdflush_bpf_obj = mdflush_bpf__open(); + if (!mdflush_bpf_obj) + ret = -1; + else { + em->targets[NETDATA_MD_FLUSH_REQUEST].mode = EBPF_LOAD_PROBE; + ret = ebpf_mdflush_load_and_attach(mdflush_bpf_obj, em); + } + } + } } +#endif + + return ret; } + /** * mdflush thread. * @@ -250,15 +430,16 @@ void *ebpf_mdflush_thread(void *ptr) char *md_flush_request = ebpf_find_symbol("md_flush_request"); if (!md_flush_request) { - error("Cannot monitor MD devices, because md is not loaded."); + netdata_log_error("Cannot monitor MD devices, because md is not loaded."); goto endmdflush; } #ifdef LIBBPF_MAJOR_VERSION ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); + ebpf_adjust_thread_load(em, default_btf); #endif - em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); - if (!em->probe_links) { + if (ebpf_mdflush_load_bpf(em)) { + netdata_log_error("Cannot load eBPF software."); goto endmdflush; } diff --git a/collectors/ebpf.plugin/ebpf_mdflush.h b/collectors/ebpf.plugin/ebpf_mdflush.h index 4913ad019..629550746 100644 --- a/collectors/ebpf.plugin/ebpf_mdflush.h +++ b/collectors/ebpf.plugin/ebpf_mdflush.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_MDFLUSH_H #define NETDATA_EBPF_MDFLUSH_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_MDFLUSH "mdflush" +#define NETDATA_EBPF_MD_MODULE_DESC "Show information about multi-device software flushes." // charts #define NETDATA_MDFLUSH_GLOBAL_CHART "mdflush" @@ -33,8 +34,15 @@ typedef struct netdata_mdflush { uint64_t cnt; } netdata_mdflush_t; +enum netdata_mdflush_targets { + NETDATA_MD_FLUSH_REQUEST, + + NETDATA_MD_FLUSH_END +}; + void *ebpf_mdflush_thread(void *ptr); extern struct config mdflush_config; +extern netdata_ebpf_targets_t mdflush_targets[]; #endif diff --git a/collectors/ebpf.plugin/ebpf_mount.c b/collectors/ebpf.plugin/ebpf_mount.c index e48c89227..57ea5b2f4 100644 --- a/collectors/ebpf.plugin/ebpf_mount.c +++ b/collectors/ebpf.plugin/ebpf_mount.c @@ -222,6 +222,36 @@ static inline int ebpf_mount_load_and_attach(struct mount_bpf *obj, ebpf_module_ * *****************************************************************/ +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_mount_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, + NETDATA_EBPF_MOUNT_CALLS, + "Calls to mount and umount syscalls", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_MOUNT_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_EBPF_MOUNT_CHARTS, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, + NETDATA_EBPF_MOUNT_ERRORS, + "Errors to mount and umount file systems", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_MOUNT_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_EBPF_MOUNT_CHARTS + 1, + em->update_every); +} + /** * Mount Exit * @@ -233,15 +263,32 @@ static void ebpf_mount_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + + ebpf_obsolete_mount_global(em); + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (mount_bpf_obj) + if (mount_bpf_obj) { mount_bpf__destroy(mount_bpf_obj); + mount_bpf_obj = NULL; + } #endif - if (em->objects) + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -318,7 +365,9 @@ static void mount_collector(ebpf_module_t *em) int update_every = em->update_every; int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -330,6 +379,15 @@ static void mount_collector(ebpf_module_t *em) ebpf_mount_send_data(); pthread_mutex_unlock(&lock); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -408,7 +466,7 @@ static int ebpf_mount_load_bpf(ebpf_module_t *em) #endif if (ret) - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); return ret; } @@ -444,7 +502,7 @@ void *ebpf_mount_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_create_mount_charts(em->update_every); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); mount_collector(em); diff --git a/collectors/ebpf.plugin/ebpf_mount.h b/collectors/ebpf.plugin/ebpf_mount.h index 11b21f832..768914b02 100644 --- a/collectors/ebpf.plugin/ebpf_mount.h +++ b/collectors/ebpf.plugin/ebpf_mount.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_MOUNT_H #define NETDATA_EBPF_MOUNT_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_MOUNT "mount" +#define NETDATA_EBPF_MOUNT_MODULE_DESC "Show calls to syscalls mount(2) and umount(2)." #define NETDATA_EBPF_MOUNT_SYSCALL 2 diff --git a/collectors/ebpf.plugin/ebpf_oomkill.c b/collectors/ebpf.plugin/ebpf_oomkill.c index c80f44873..66421d277 100644 --- a/collectors/ebpf.plugin/ebpf_oomkill.c +++ b/collectors/ebpf.plugin/ebpf_oomkill.c @@ -44,6 +44,71 @@ static netdata_publish_syscall_t oomkill_publish_aggregated = {.name = "oomkill" .algorithm = "absolute", .next = NULL}; +static void ebpf_create_specific_oomkill_charts(char *type, int update_every); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_oomkill_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_OOMKILL_CHART, + "OOM kills. This chart is provided by eBPF plugin.", + EBPF_COMMON_DIMENSION_KILLS, + NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 20191, + em->update_every); +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_oomkill_cgroup_charts(ebpf_module_t *em) +{ + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_oomkill_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_create_specific_oomkill_charts(ect->name, em->update_every); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_oomkill_apps(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_OOMKILL_CHART, + "OOM kills", + EBPF_COMMON_DIMENSION_KILLS, + "mem", + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20020, + em->update_every); +} + /** * Clean up the main thread. * @@ -53,11 +118,30 @@ static void oomkill_cleanup(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (em->objects) + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + + if (em->cgroup_charts) { + ebpf_obsolete_oomkill_cgroup_charts(em); + } + + ebpf_obsolete_oomkill_apps(em); + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -259,7 +343,7 @@ static uint32_t oomkill_read_data(int32_t *keys) if (unlikely(test < 0)) { // since there's only 1 thread doing these deletions, it should be // impossible to get this condition. - error("key unexpectedly not available for deletion."); + netdata_log_error("key unexpectedly not available for deletion."); } } @@ -293,6 +377,30 @@ static void ebpf_update_oomkill_cgroup(int32_t *keys, uint32_t total) } } +/** + * Update OOMkill period + * + * Update oomkill period according function arguments. + * + * @param running_time current value of running_value. + * @param em the thread main structure. + * + * @return It returns new running_time value. + */ +static int ebpf_update_oomkill_period(int running_time, ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = em->update_every; + else + running_time += em->update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); + + return running_time; +} + /** * Main loop for this collector. * @@ -309,7 +417,9 @@ static void oomkill_collector(ebpf_module_t *em) heartbeat_t hb; heartbeat_init(&hb); int counter = update_every - 1; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -317,8 +427,10 @@ static void oomkill_collector(ebpf_module_t *em) counter = 0; uint32_t count = oomkill_read_data(keys); - if (!count) + if (!count) { + running_time = ebpf_update_oomkill_period(running_time, em); continue; + } pthread_mutex_lock(&collect_data_mutex); pthread_mutex_lock(&lock); @@ -335,6 +447,8 @@ static void oomkill_collector(ebpf_module_t *em) } pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + running_time = ebpf_update_oomkill_period(running_time, em); } } @@ -379,14 +493,14 @@ void *ebpf_oomkill_thread(void *ptr) // we need to disable it. pthread_mutex_lock(&ebpf_exit_cleanup); if (em->enabled) - info("%s apps integration is completely disabled.", NETDATA_DEFAULT_OOM_DISABLED_MSG); + netdata_log_info("%s apps integration is completely disabled.", NETDATA_DEFAULT_OOM_DISABLED_MSG); pthread_mutex_unlock(&ebpf_exit_cleanup); goto endoomkill; } else if (running_on_kernel < NETDATA_EBPF_KERNEL_4_14) { pthread_mutex_lock(&ebpf_exit_cleanup); if (em->enabled) - info("%s kernel does not have necessary tracepoints.", NETDATA_DEFAULT_OOM_DISABLED_MSG); + netdata_log_info("%s kernel does not have necessary tracepoints.", NETDATA_DEFAULT_OOM_DISABLED_MSG); pthread_mutex_unlock(&ebpf_exit_cleanup); goto endoomkill; @@ -406,7 +520,7 @@ void *ebpf_oomkill_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); oomkill_collector(em); diff --git a/collectors/ebpf.plugin/ebpf_oomkill.h b/collectors/ebpf.plugin/ebpf_oomkill.h index f921f9d87..4a5fa62aa 100644 --- a/collectors/ebpf.plugin/ebpf_oomkill.h +++ b/collectors/ebpf.plugin/ebpf_oomkill.h @@ -3,6 +3,9 @@ #ifndef NETDATA_EBPF_OOMKILL_H #define NETDATA_EBPF_OOMKILL_H 1 +// Module description +#define NETDATA_EBPF_OOMKILL_MODULE_DESC "Show OOM kills for all applications recognized via the apps.plugin." + /***************************************************************** * copied from kernel-collectors repo, with modifications needed * for inclusion here. diff --git a/collectors/ebpf.plugin/ebpf_process.c b/collectors/ebpf.plugin/ebpf_process.c index 17a9809d3..4d915e132 100644 --- a/collectors/ebpf.plugin/ebpf_process.c +++ b/collectors/ebpf.plugin/ebpf_process.c @@ -59,20 +59,15 @@ ebpf_process_stat_t *process_stat_vector = NULL; static netdata_syscall_stat_t process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_END]; static netdata_publish_syscall_t process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_END]; -int process_enabled = 0; -bool publish_internal_metrics = true; - struct config process_config = { .first_section = NULL, .last_section = NULL, .mutex = NETDATA_MUTEX_INITIALIZER, .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, .rwlock = AVL_LOCK_INITIALIZER } }; -static char *threads_stat[NETDATA_EBPF_THREAD_STAT_END] = {"total", "running"}; -static char *load_event_stat[NETDATA_EBPF_LOAD_STAT_END] = {"legacy", "co-re"}; -static char *memlock_stat = {"memory_locked"}; -static char *hash_table_stat = {"hash_table"}; -static char *hash_table_core[NETDATA_EBPF_LOAD_STAT_END] = {"per_core", "unique"}; +#ifdef NETDATA_DEV_MODE +int process_disable_priority; +#endif /***************************************************************** * @@ -427,182 +422,8 @@ static void ebpf_create_global_charts(ebpf_module_t *em) &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], 2, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); } -} - -/** - * Create chart for Statistic Thread - * - * Write to standard output current values for threads. - * - * @param em a pointer to the structure with the default values. - */ -static inline void ebpf_create_statistic_thread_chart(ebpf_module_t *em) -{ - ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, - NETDATA_EBPF_THREADS, - "Threads info.", - "threads", - NETDATA_EBPF_FAMILY, - NETDATA_EBPF_CHART_TYPE_LINE, - NULL, - 140000, - em->update_every, - NETDATA_EBPF_MODULE_NAME_PROCESS); - - ebpf_write_global_dimension(threads_stat[NETDATA_EBPF_THREAD_STAT_TOTAL], - threads_stat[NETDATA_EBPF_THREAD_STAT_TOTAL], - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); - - ebpf_write_global_dimension(threads_stat[NETDATA_EBPF_THREAD_STAT_RUNNING], - threads_stat[NETDATA_EBPF_THREAD_STAT_RUNNING], - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); -} - -/** - * Create chart for Load Thread - * - * Write to standard output current values for load mode. - * - * @param em a pointer to the structure with the default values. - */ -static inline void ebpf_create_statistic_load_chart(ebpf_module_t *em) -{ - ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, - NETDATA_EBPF_LOAD_METHOD, - "Load info.", - "methods", - NETDATA_EBPF_FAMILY, - NETDATA_EBPF_CHART_TYPE_LINE, - NULL, - 140001, - em->update_every, - NETDATA_EBPF_MODULE_NAME_PROCESS); - - ebpf_write_global_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], - load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); - - ebpf_write_global_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], - load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); -} - -/** - * Create chart for Kernel Memory - * - * Write to standard output current values for allocated memory. - * - * @param em a pointer to the structure with the default values. - */ -static inline void ebpf_create_statistic_kernel_memory(ebpf_module_t *em) -{ - ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, - NETDATA_EBPF_KERNEL_MEMORY, - "Memory allocated for hash tables.", - "bytes", - NETDATA_EBPF_FAMILY, - NETDATA_EBPF_CHART_TYPE_LINE, - NULL, - 140002, - em->update_every, - NETDATA_EBPF_MODULE_NAME_PROCESS); - - ebpf_write_global_dimension(memlock_stat, - memlock_stat, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); -} - -/** - * Create chart Hash Table - * - * Write to standard output number of hash tables used with this software. - * - * @param em a pointer to the structure with the default values. - */ -static inline void ebpf_create_statistic_hash_tables(ebpf_module_t *em) -{ - ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, - NETDATA_EBPF_HASH_TABLES_LOADED, - "Number of hash tables loaded.", - "hash tables", - NETDATA_EBPF_FAMILY, - NETDATA_EBPF_CHART_TYPE_LINE, - NULL, - 140003, - em->update_every, - NETDATA_EBPF_MODULE_NAME_PROCESS); - - ebpf_write_global_dimension(hash_table_stat, - hash_table_stat, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); -} - -/** - * Create chart for percpu stats - * - * Write to standard output current values for threads. - * - * @param em a pointer to the structure with the default values. - */ -static inline void ebpf_create_statistic_hash_per_core(ebpf_module_t *em) -{ - ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, - NETDATA_EBPF_HASH_TABLES_PER_CORE, - "How threads are loading hash/array tables.", - "threads", - NETDATA_EBPF_FAMILY, - NETDATA_EBPF_CHART_TYPE_LINE, - NULL, - 140004, - em->update_every, - NETDATA_EBPF_MODULE_NAME_PROCESS); - - ebpf_write_global_dimension(hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], - hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); - - ebpf_write_global_dimension(hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], - hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); -} - -/** - * Update Internal Metric variable - * - * By default eBPF.plugin sends internal metrics for netdata, but user can - * disable this. - * - * The function updates the variable used to send charts. - */ -static void update_internal_metric_variable() -{ - const char *s = getenv("NETDATA_INTERNALS_MONITORING"); - if (s && *s && strcmp(s, "NO") == 0) - publish_internal_metrics = false; -} - -/** - * Create Statistics Charts - * - * Create charts that will show statistics related to eBPF plugin. - * - * @param em a pointer to the structure with the default values. - */ -static void ebpf_create_statistic_charts(ebpf_module_t *em) -{ - update_internal_metric_variable(); - if (!publish_internal_metrics) - return; - ebpf_create_statistic_thread_chart(em); - - ebpf_create_statistic_load_chart(em); - - ebpf_create_statistic_kernel_memory(em); - - ebpf_create_statistic_hash_tables(em); - - ebpf_create_statistic_hash_per_core(em); + fflush(stdout); } /** @@ -673,6 +494,206 @@ void ebpf_process_create_apps_charts(struct ebpf_module *em, void *ptr) * *****************************************************************/ +static void ebpf_obsolete_specific_process_charts(char *type, ebpf_module_t *em); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_process_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_TASK_PROCESS, + "Process started", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20065, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_TASK_THREAD, + "Threads started", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20066, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_TASK_CLOSE, + "Tasks starts exit process.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20067, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_TASK_EXIT, + "Tasks closed", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20068, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_TASK_ERROR, + "Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20069, + em->update_every); + } +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_process_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_process_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_process_charts(ect->name, em); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_process_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_TASK_PROCESS, + "Process started", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20065, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_TASK_THREAD, + "Threads started", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20066, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_TASK_EXIT, + "Tasks starts exit process.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20067, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_TASK_CLOSE, + "Tasks closed", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20068, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_TASK_ERROR, + "Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20069, + em->update_every); + } +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_process_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_PROCESS_SYSCALL, + "Start process", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21002, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_EXIT_SYSCALL, + "Exit process", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21003, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_PROCESS_STATUS_NAME, + "Process not closed", + EBPF_COMMON_DIMENSION_DIFFERENCE, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21004, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_PROCESS_ERROR_NAME, + "Fails to create process", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21005, + em->update_every); + } +} + /** * Process disable tracepoints * @@ -683,17 +704,17 @@ static void ebpf_process_disable_tracepoints() char *default_message = { "Cannot disable the tracepoint" }; if (!was_sched_process_exit_enabled) { if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exit)) - error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exit); + netdata_log_error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exit); } if (!was_sched_process_exec_enabled) { if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exec)) - error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exec); + netdata_log_error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exec); } if (!was_sched_process_fork_enabled) { if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_fork)) - error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_fork); + netdata_log_error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_fork); } } @@ -708,6 +729,37 @@ static void ebpf_process_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_process_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_process_apps_charts(em); + } + + ebpf_obsolete_process_global(em); + +#ifdef NETDATA_DEV_MODE + if (ebpf_aral_process_stat) + ebpf_statistic_obsolete_aral_chart(em, process_disable_priority); +#endif + + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + + if (em->objects) { + ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } + freez(process_hash_values); freez(process_stat_vector); @@ -716,6 +768,7 @@ static void ebpf_process_exit(void *ptr) pthread_mutex_lock(&ebpf_exit_cleanup); process_pid_fd = -1; em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -740,14 +793,14 @@ static void ebpf_process_sum_cgroup_pids(ebpf_process_stat_t *ps, struct pid_on_ memset(&accumulator, 0, sizeof(accumulator)); while (pids) { - ebpf_process_stat_t *ps = &pids->ps; + ebpf_process_stat_t *pps = &pids->ps; - accumulator.exit_call += ps->exit_call; - accumulator.release_call += ps->release_call; - accumulator.create_process += ps->create_process; - accumulator.create_thread += ps->create_thread; + accumulator.exit_call += pps->exit_call; + accumulator.release_call += pps->release_call; + accumulator.create_process += pps->create_process; + accumulator.create_thread += pps->create_thread; - accumulator.task_err += ps->task_err; + accumulator.task_err += pps->task_err; pids = pids->next; } @@ -1046,40 +1099,6 @@ void ebpf_process_update_cgroup_algorithm() } } -/** - * Send Statistic Data - * - * Send statistic information to netdata. - */ -void ebpf_send_statistic_data() -{ - if (!publish_internal_metrics) - return; - - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_THREADS); - write_chart_dimension(threads_stat[NETDATA_EBPF_THREAD_STAT_TOTAL], (long long)plugin_statistics.threads); - write_chart_dimension(threads_stat[NETDATA_EBPF_THREAD_STAT_RUNNING], (long long)plugin_statistics.running); - write_end_chart(); - - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LOAD_METHOD); - write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], (long long)plugin_statistics.legacy); - write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], (long long)plugin_statistics.core); - write_end_chart(); - - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_KERNEL_MEMORY); - write_chart_dimension(memlock_stat, (long long)plugin_statistics.memlock_kern); - write_end_chart(); - - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_LOADED); - write_chart_dimension(hash_table_stat, (long long)plugin_statistics.hash_tables); - write_end_chart(); - - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_PER_CORE); - write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], (long long)plugin_statistics.hash_percpu); - write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], (long long)plugin_statistics.hash_unique); - write_end_chart(); -} - /** * Main loop for this collector. * @@ -1092,7 +1111,6 @@ static void process_collector(ebpf_module_t *em) int publish_global = em->global_charts; int cgroups = em->cgroup_charts; pthread_mutex_lock(&ebpf_exit_cleanup); - int thread_enabled = em->enabled; process_pid_fd = process_maps[NETDATA_PROCESS_PID_TABLE].map_fd; pthread_mutex_unlock(&ebpf_exit_cleanup); if (cgroups) @@ -1101,7 +1119,9 @@ static void process_collector(ebpf_module_t *em) int update_every = em->update_every; int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { usec_t dt = heartbeat_next(&hb, USEC_PER_SEC); (void)dt; if (ebpf_exit_plugin) @@ -1122,28 +1142,35 @@ static void process_collector(ebpf_module_t *em) } pthread_mutex_lock(&lock); - ebpf_send_statistic_data(); - if (thread_enabled == NETDATA_THREAD_EBPF_RUNNING) { - if (publish_global) { - ebpf_process_send_data(em); - } + if (publish_global) { + ebpf_process_send_data(em); + } - if (apps_enabled & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { - ebpf_process_send_apps_data(apps_groups_root_target, em); - } + if (apps_enabled & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_process_send_apps_data(apps_groups_root_target, em); + } #ifdef NETDATA_DEV_MODE - if (ebpf_aral_process_stat) - ebpf_send_data_aral_chart(ebpf_aral_process_stat, em); + if (ebpf_aral_process_stat) + ebpf_send_data_aral_chart(ebpf_aral_process_stat, em); #endif - if (cgroups && shm_ebpf_cgroup.header) { - ebpf_process_send_cgroup_data(em); - } + if (cgroups && shm_ebpf_cgroup.header) { + ebpf_process_send_cgroup_data(em); } + pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } fflush(stdout); @@ -1254,7 +1281,6 @@ void *ebpf_process_thread(void *ptr) if (ebpf_process_enable_tracepoints()) { em->enabled = em->global_charts = em->apps_charts = em->cgroup_charts = NETDATA_THREAD_EBPF_STOPPING; } - process_enabled = em->enabled; pthread_mutex_unlock(&ebpf_exit_cleanup); pthread_mutex_lock(&lock); @@ -1276,27 +1302,22 @@ void *ebpf_process_thread(void *ptr) process_aggregated_data, process_publish_aggregated, process_dimension_names, process_id_names, algorithms, NETDATA_KEY_PUBLISH_PROCESS_END); - if (process_enabled == NETDATA_THREAD_EBPF_RUNNING) { - ebpf_create_global_charts(em); - } + ebpf_create_global_charts(em); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); #ifdef NETDATA_DEV_MODE if (ebpf_aral_process_stat) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_PROC_ARAL_NAME, em); + process_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_PROC_ARAL_NAME, em); #endif - ebpf_create_statistic_charts(em); - pthread_mutex_unlock(&lock); process_collector(em); pthread_mutex_lock(&ebpf_exit_cleanup); - if (em->enabled == NETDATA_THREAD_EBPF_RUNNING) - ebpf_update_disabled_plugin_stats(em); + ebpf_update_disabled_plugin_stats(em); pthread_mutex_unlock(&ebpf_exit_cleanup); netdata_thread_cleanup_pop(1); diff --git a/collectors/ebpf.plugin/ebpf_process.h b/collectors/ebpf.plugin/ebpf_process.h index bccdc0eb5..48267d87b 100644 --- a/collectors/ebpf.plugin/ebpf_process.h +++ b/collectors/ebpf.plugin/ebpf_process.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_PROCESS_H #define NETDATA_EBPF_PROCESS_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_PROCESS "process" +#define NETDATA_EBPF_MODULE_PROCESS_DESC "Monitor information about process life. This thread is integrated with apps and cgroup." // Groups used on Dashboard #define NETDATA_PROCESS_GROUP "processes" @@ -41,12 +42,13 @@ #define NETDATA_EBPF_CGROUP_UPDATE 30 -// Statistical information -enum netdata_ebpf_thread_stats{ - NETDATA_EBPF_THREAD_STAT_TOTAL, - NETDATA_EBPF_THREAD_STAT_RUNNING, - - NETDATA_EBPF_THREAD_STAT_END +enum netdata_ebpf_stats_order { + NETDATA_EBPF_ORDER_STAT_THREADS = 140000, + NETDATA_EBPF_ORDER_STAT_LIFE_TIME, + NETDATA_EBPF_ORDER_STAT_LOAD_METHOD, + NETDATA_EBPF_ORDER_STAT_KERNEL_MEMORY, + NETDATA_EBPF_ORDER_STAT_HASH_TABLES, + NETDATA_EBPF_ORDER_STAT_HASH_CORE }; enum netdata_ebpf_load_mode_stats{ diff --git a/collectors/ebpf.plugin/ebpf_shm.c b/collectors/ebpf.plugin/ebpf_shm.c index 94ac624b3..78ada81f7 100644 --- a/collectors/ebpf.plugin/ebpf_shm.c +++ b/collectors/ebpf.plugin/ebpf_shm.c @@ -50,6 +50,10 @@ netdata_ebpf_targets_t shm_targets[] = { {.name = "shmget", .mode = EBPF_LOAD_TR {.name = "shmctl", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; +#ifdef NETDATA_DEV_MODE +int shm_disable_priority; +#endif + #ifdef LIBBPF_MAJOR_VERSION /***************************************************************** * @@ -288,6 +292,150 @@ static inline int ebpf_shm_load_and_attach(struct shm_bpf *obj, ebpf_module_t *e * FUNCTIONS TO CLOSE THE THREAD *****************************************************************/ +static void ebpf_obsolete_specific_shm_charts(char *type, int update_every); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_shm_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SHMGET_CHART, + "Calls to syscall shmget(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20191, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SHMAT_CHART, + "Calls to syscall shmat(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20192, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SHMDT_CHART, + "Calls to syscall shmdt(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20193, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SHMCTL_CHART, + "Calls to syscall shmctl(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20193, + em->update_every); +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_shm_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_shm_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_shm_charts(ect->name, em->update_every); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_shm_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SHMGET_CHART, + "Calls to syscall shmget(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20191, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SHMAT_CHART, + "Calls to syscall shmat(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20192, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SHMDT_CHART, + "Calls to syscall shmdt(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20193, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SHMCTL_CHART, + "Calls to syscall shmctl(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20194, + em->update_every); +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_shm_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_SHM_GLOBAL_CHART, + "Calls to shared memory system calls", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SYSTEM_IPC_SHM_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_CALLS, + em->update_every); +} + /** * SHM Exit * @@ -299,16 +447,46 @@ static void ebpf_shm_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_shm_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_shm_apps_charts(em); + } + + ebpf_obsolete_shm_global(em); + +#ifdef NETDATA_DEV_MODE + if (ebpf_aral_shm_pid) + ebpf_statistic_obsolete_aral_chart(em, shm_disable_priority); +#endif + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (shm_bpf_obj) + if (shm_bpf_obj) { shm_bpf__destroy(shm_bpf_obj); + shm_bpf_obj = NULL; + } #endif - if (em->objects) + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -859,7 +1037,9 @@ static void shm_collector(ebpf_module_t *em) heartbeat_init(&hb); int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -895,6 +1075,15 @@ static void shm_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -1037,7 +1226,7 @@ static int ebpf_shm_load_bpf(ebpf_module_t *em) if (ret) - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); return ret; } @@ -1084,10 +1273,10 @@ void *ebpf_shm_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_create_shm_charts(em->update_every); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); #ifdef NETDATA_DEV_MODE if (ebpf_aral_shm_pid) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_SHM_ARAL_NAME, em); + shm_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_SHM_ARAL_NAME, em); #endif pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_shm.h b/collectors/ebpf.plugin/ebpf_shm.h index f58eaa6c1..a415006e6 100644 --- a/collectors/ebpf.plugin/ebpf_shm.h +++ b/collectors/ebpf.plugin/ebpf_shm.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_SHM_H #define NETDATA_EBPF_SHM_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_SHM "shm" +#define NETDATA_EBPF_SHM_MODULE_DESC "Show calls to syscalls shmget(2), shmat(2), shmdt(2) and shmctl(2). This thread is integrated with apps and cgroup." // charts #define NETDATA_SHM_GLOBAL_CHART "shared_memory_calls" diff --git a/collectors/ebpf.plugin/ebpf_socket.c b/collectors/ebpf.plugin/ebpf_socket.c index b45dec7d9..2cad8bdf1 100644 --- a/collectors/ebpf.plugin/ebpf_socket.c +++ b/collectors/ebpf.plugin/ebpf_socket.c @@ -130,6 +130,10 @@ struct netdata_static_thread socket_threads = { .start_routine = NULL }; +#ifdef NETDATA_DEV_MODE +int socket_disable_priority; +#endif + #ifdef LIBBPF_MAJOR_VERSION /** * Disable Probe @@ -646,6 +650,8 @@ static void ebpf_socket_free(ebpf_module_t *em ) pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -1217,6 +1223,8 @@ static void ebpf_create_global_charts(ebpf_module_t *em) &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF], 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); } + + fflush(stdout); } /** @@ -1844,7 +1852,7 @@ static void fill_last_nv_dimension(netdata_socket_plot_t *ptr, int is_outbound) fill_resolved_name(ptr, hostname, 10 + NETDATA_DOTS_PROTOCOL_COMBINED_LENGTH, service_name, is_outbound); #ifdef NETDATA_INTERNAL_CHECKS - info("Last %s dimension added: ID = %u, IP = OTHER, NAME = %s, DIM1 = %s, DIM2 = %s, DIM3 = %s", + netdata_log_info("Last %s dimension added: ID = %u, IP = OTHER, NAME = %s, DIM1 = %s, DIM2 = %s, DIM3 = %s", (is_outbound)?"outbound":"inbound", network_viewer_opt.max_dim - 1, ptr->resolved_name, ptr->dimension_recv, ptr->dimension_sent, ptr->dimension_retransmit); #endif @@ -1927,12 +1935,12 @@ static void store_socket_inside_avl(netdata_vector_plot_t *out, netdata_socket_t netdata_socket_plot_t *check ; check = (netdata_socket_plot_t *) avl_insert_lock(&out->tree, (avl_t *)w); if (check != w) - error("Internal error, cannot insert the AVL tree."); + netdata_log_error("Internal error, cannot insert the AVL tree."); #ifdef NETDATA_INTERNAL_CHECKS char iptext[INET6_ADDRSTRLEN]; if (inet_ntop(family, &w->index.daddr.addr8, iptext, sizeof(iptext))) - info("New %s dimension added: ID = %u, IP = %s, NAME = %s, DIM1 = %s, DIM2 = %s, DIM3 = %s", + netdata_log_info("New %s dimension added: ID = %u, IP = %s, NAME = %s, DIM1 = %s, DIM2 = %s, DIM3 = %s", (out == &inbound_vectors)?"inbound":"outbound", curr, iptext, w->resolved_name, w->dimension_recv, w->dimension_sent, w->dimension_retransmit); #endif @@ -2120,7 +2128,7 @@ void update_listen_table(uint16_t value, uint16_t proto, netdata_passive_connect fill_nv_port_list(w, value, proto, in); #ifdef NETDATA_INTERNAL_CHECKS - info("The network viewer is monitoring inbound connections for port %u", ntohs(value)); + netdata_log_info("The network viewer is monitoring inbound connections for port %u", ntohs(value)); #endif } @@ -2177,7 +2185,9 @@ void *ebpf_socket_read_hash(void *ptr) int fd_ipv6 = socket_maps[NETDATA_SOCKET_TABLE_IPV6].map_fd; int maps_per_core = em->maps_per_core; // This thread is cancelled from another thread - for (;;) { + uint32_t running_time; + uint32_t lifetime = em->lifetime; + for (running_time = 0;!ebpf_exit_plugin && running_time < lifetime; running_time++) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin) break; @@ -2918,7 +2928,9 @@ static void socket_collector(ebpf_module_t *em) int update_every = em->update_every; int maps_per_core = em->maps_per_core; int counter = update_every - 1; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -2973,6 +2985,15 @@ static void socket_collector(ebpf_module_t *em) } pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -3044,14 +3065,14 @@ static inline void fill_port_list(ebpf_network_viewer_port_list_t **out, ebpf_ne uint16_t cmp_last = ntohs(move->last); if (cmp_first <= first && first <= cmp_last && cmp_first <= last && last <= cmp_last ) { - info("The range/value (%u, %u) is inside the range/value (%u, %u) already inserted, it will be ignored.", + netdata_log_info("The range/value (%u, %u) is inside the range/value (%u, %u) already inserted, it will be ignored.", first, last, cmp_first, cmp_last); freez(in->value); freez(in); return; } else if (first <= cmp_first && cmp_first <= last && first <= cmp_last && cmp_last <= last) { - info("The range (%u, %u) is bigger than previous range (%u, %u) already inserted, the previous will be ignored.", + netdata_log_info("The range (%u, %u) is bigger than previous range (%u, %u) already inserted, the previous will be ignored.", first, last, cmp_first, cmp_last); freez(move->value); move->value = in->value; @@ -3071,7 +3092,7 @@ static inline void fill_port_list(ebpf_network_viewer_port_list_t **out, ebpf_ne } #ifdef NETDATA_INTERNAL_CHECKS - info("Adding values %s( %u, %u) to %s port list used on network viewer", + netdata_log_info("Adding values %s( %u, %u) to %s port list used on network viewer", in->value, ntohs(in->first), ntohs(in->last), (*out == network_viewer_opt.included_port)?"included":"excluded"); #endif @@ -3091,7 +3112,7 @@ static void parse_service_list(void **out, char *service) serv = getservbyname((const char *)service, "udp"); if (!serv) { - info("Cannot resolv the service '%s' with protocols TCP and UDP, it will be ignored", service); + netdata_log_info("Cannot resolv the service '%s' with protocols TCP and UDP, it will be ignored", service); return; } @@ -3165,7 +3186,7 @@ static inline in_addr_t ipv4_network(in_addr_t addr, int prefix) static inline int ip2nl(uint8_t *dst, char *ip, int domain, char *source) { if (inet_pton(domain, ip, dst) <= 0) { - error("The address specified (%s) is invalid ", source); + netdata_log_error("The address specified (%s) is invalid ", source); return -1; } @@ -3301,7 +3322,7 @@ void ebpf_fill_ip_list(ebpf_network_viewer_ip_list_t **out, ebpf_network_viewer_ while (move) { if (in->ver == move->ver && ebpf_is_ip_inside_range(&move->first, &move->last, &in->first, &in->last, in->ver)) { - info("The range/value (%s) is inside the range/value (%s) already inserted, it will be ignored.", + netdata_log_info("The range/value (%s) is inside the range/value (%s) already inserted, it will be ignored.", in->value, move->value); freez(in->value); freez(in); @@ -3319,14 +3340,14 @@ void ebpf_fill_ip_list(ebpf_network_viewer_ip_list_t **out, ebpf_network_viewer_ #ifdef NETDATA_INTERNAL_CHECKS char first[256], last[512]; if (in->ver == AF_INET) { - info("Adding values %s: (%u - %u) to %s IP list \"%s\" used on network viewer", + netdata_log_info("Adding values %s: (%u - %u) to %s IP list \"%s\" used on network viewer", in->value, in->first.addr32[0], in->last.addr32[0], (*out == network_viewer_opt.included_ips)?"included":"excluded", table); } else { if (inet_ntop(AF_INET6, in->first.addr8, first, INET6_ADDRSTRLEN) && inet_ntop(AF_INET6, in->last.addr8, last, INET6_ADDRSTRLEN)) - info("Adding values %s - %s to %s IP list \"%s\" used on network viewer", + netdata_log_info("Adding values %s - %s to %s IP list \"%s\" used on network viewer", first, last, (*out == network_viewer_opt.included_ips)?"included":"excluded", table); @@ -3373,7 +3394,7 @@ static void ebpf_parse_ip_list(void **out, char *ip) select = (*end == '/') ? 0 : 1; *end++ = '\0'; if (*end == '!') { - info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); + netdata_log_info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); goto cleanipdup; } @@ -3384,7 +3405,7 @@ static void ebpf_parse_ip_list(void **out, char *ip) select = (int) str2i(end); if (select < NETDATA_MINIMUM_IPV4_CIDR || select > NETDATA_MAXIMUM_IPV4_CIDR) { - info("The specified CIDR %s is not valid, the IP %s will be ignored.", end, ip); + netdata_log_info("The specified CIDR %s is not valid, the IP %s will be ignored.", end, ip); goto cleanipdup; } @@ -3400,7 +3421,7 @@ static void ebpf_parse_ip_list(void **out, char *ip) ipv4_convert.s_addr = ipv4_test; char ipv4_msg[INET_ADDRSTRLEN]; if(inet_ntop(AF_INET, &ipv4_convert, ipv4_msg, INET_ADDRSTRLEN)) - info("The network value of CIDR %s was updated for %s .", ipdup, ipv4_msg); + netdata_log_info("The network value of CIDR %s was updated for %s .", ipdup, ipv4_msg); } } else { // Range select = ip2nl(first.addr8, ip, AF_INET, ipdup); @@ -3413,7 +3434,7 @@ static void ebpf_parse_ip_list(void **out, char *ip) } if (htonl(first.addr32[0]) > htonl(last.addr32[0])) { - info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.", + netdata_log_info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.", ipdup); goto cleanipdup; } @@ -3427,7 +3448,7 @@ static void ebpf_parse_ip_list(void **out, char *ip) } else if (*end == '-') { *end++ = 0x00; if (*end == '!') { - info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); + netdata_log_info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); goto cleanipdup; } @@ -3441,13 +3462,13 @@ static void ebpf_parse_ip_list(void **out, char *ip) } else { // CIDR *end++ = 0x00; if (*end == '!') { - info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); + netdata_log_info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); goto cleanipdup; } select = str2i(end); if (select < 0 || select > 128) { - info("The CIDR %s is not valid, the address %s will be ignored.", end, ip); + netdata_log_info("The CIDR %s is not valid, the address %s will be ignored.", end, ip); goto cleanipdup; } @@ -3469,14 +3490,14 @@ static void ebpf_parse_ip_list(void **out, char *ip) char ipv6_msg[INET6_ADDRSTRLEN]; if(inet_ntop(AF_INET6, &ipv6_convert, ipv6_msg, INET6_ADDRSTRLEN)) - info("The network value of CIDR %s was updated for %s .", ipdup, ipv6_msg); + netdata_log_info("The network value of CIDR %s was updated for %s .", ipdup, ipv6_msg); } } if ((be64toh(*(uint64_t *)&first.addr32[2]) > be64toh(*(uint64_t *)&last.addr32[2]) && !memcmp(first.addr32, last.addr32, 2*sizeof(uint32_t))) || (be64toh(*(uint64_t *)&first.addr32) > be64toh(*(uint64_t *)&last.addr32)) ) { - info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.", + netdata_log_info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.", ipdup); goto cleanipdup; } @@ -3580,7 +3601,7 @@ static void parse_port_list(void **out, char *range) if (likely(*end)) { *end++ = '\0'; if (*end == '!') { - info("The exclusion cannot be in the second part of the range, the range %s will be ignored.", copied); + netdata_log_info("The exclusion cannot be in the second part of the range, the range %s will be ignored.", copied); freez(copied); return; } @@ -3591,7 +3612,7 @@ static void parse_port_list(void **out, char *range) first = str2i((const char *)range); if (first < NETDATA_MINIMUM_PORT_VALUE || first > NETDATA_MAXIMUM_PORT_VALUE) { - info("The first port %d of the range \"%s\" is invalid and it will be ignored!", first, copied); + netdata_log_info("The first port %d of the range \"%s\" is invalid and it will be ignored!", first, copied); freez(copied); return; } @@ -3600,13 +3621,13 @@ static void parse_port_list(void **out, char *range) last = first; if (last < NETDATA_MINIMUM_PORT_VALUE || last > NETDATA_MAXIMUM_PORT_VALUE) { - info("The second port %d of the range \"%s\" is invalid and the whole range will be ignored!", last, copied); + netdata_log_info("The second port %d of the range \"%s\" is invalid and the whole range will be ignored!", last, copied); freez(copied); return; } if (first > last) { - info("The specified order %s is wrong, the smallest value is always the first, it will be ignored!", copied); + netdata_log_info("The specified order %s is wrong, the smallest value is always the first, it will be ignored!", copied); freez(copied); return; } @@ -3639,14 +3660,14 @@ static void read_max_dimension(struct config *cfg) EBPF_MAXIMUM_DIMENSIONS, NETDATA_NV_CAP_VALUE); if (maxdim < 0) { - error("'maximum dimensions = %d' must be a positive number, Netdata will change for default value %ld.", + netdata_log_error("'maximum dimensions = %d' must be a positive number, Netdata will change for default value %ld.", maxdim, NETDATA_NV_CAP_VALUE); maxdim = NETDATA_NV_CAP_VALUE; } maxdim /= 2; if (!maxdim) { - info("The number of dimensions is too small (%u), we are setting it to minimum 2", network_viewer_opt.max_dim); + netdata_log_info("The number of dimensions is too small (%u), we are setting it to minimum 2", network_viewer_opt.max_dim); network_viewer_opt.max_dim = 1; return; } @@ -3714,7 +3735,7 @@ static void link_hostname(ebpf_network_viewer_hostname_list_t **out, ebpf_networ ebpf_network_viewer_hostname_list_t *move = *out; for (; move->next ; move = move->next ) { if (move->hash == in->hash && !strcmp(move->value, in->value)) { - info("The hostname %s was already inserted, it will be ignored.", in->value); + netdata_log_info("The hostname %s was already inserted, it will be ignored.", in->value); freez(in->value); simple_pattern_free(in->value_pattern); freez(in); @@ -3727,7 +3748,7 @@ static void link_hostname(ebpf_network_viewer_hostname_list_t **out, ebpf_networ *out = in; } #ifdef NETDATA_INTERNAL_CHECKS - info("Adding value %s to %s hostname list used on network viewer", + netdata_log_info("Adding value %s to %s hostname list used on network viewer", in->value, (*out == network_viewer_opt.included_hostnames)?"included":"excluded"); #endif @@ -3806,7 +3827,7 @@ void parse_network_viewer_section(struct config *cfg) value = appconfig_get(cfg, EBPF_NETWORK_VIEWER_SECTION, EBPF_CONFIG_HOSTNAMES, NULL); link_hostnames(value); } else { - info("Name resolution is disabled, collector will not parser \"hostnames\" list."); + netdata_log_info("Name resolution is disabled, collector will not parser \"hostnames\" list."); } value = appconfig_get(cfg, EBPF_NETWORK_VIEWER_SECTION, @@ -3827,7 +3848,7 @@ static void link_dimension_name(char *port, uint32_t hash, char *value) { int test = str2i(port); if (test < NETDATA_MINIMUM_PORT_VALUE || test > NETDATA_MAXIMUM_PORT_VALUE){ - error("The dimension given (%s = %s) has an invalid value and it will be ignored.", port, value); + netdata_log_error("The dimension given (%s = %s) has an invalid value and it will be ignored.", port, value); return; } @@ -3845,7 +3866,7 @@ static void link_dimension_name(char *port, uint32_t hash, char *value) } else { for (; names->next; names = names->next) { if (names->port == w->port) { - info("Duplicated definition for a service, the name %s will be ignored. ", names->name); + netdata_log_info("Duplicated definition for a service, the name %s will be ignored. ", names->name); freez(names->name); names->name = w->name; names->hash = w->hash; @@ -3857,7 +3878,7 @@ static void link_dimension_name(char *port, uint32_t hash, char *value) } #ifdef NETDATA_INTERNAL_CHECKS - info("Adding values %s( %u) to dimension name list used on network viewer", w->name, htons(w->port)); + netdata_log_info("Adding values %s( %u) to dimension name list used on network viewer", w->name, htons(w->port)); #endif } @@ -3950,7 +3971,7 @@ static int ebpf_socket_load_bpf(ebpf_module_t *em) #endif if (ret) { - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); } return ret; @@ -3975,7 +3996,7 @@ void *ebpf_socket_thread(void *ptr) parse_table_size_options(&socket_config); if (pthread_mutex_init(&nv_mutex, NULL)) { - error("Cannot initialize local mutex"); + netdata_log_error("Cannot initialize local mutex"); goto endsocket; } @@ -4015,11 +4036,11 @@ void *ebpf_socket_thread(void *ptr) ebpf_create_global_charts(em); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); #ifdef NETDATA_DEV_MODE if (ebpf_aral_socket_pid) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_SOCKET_ARAL_NAME, em); + socket_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_SOCKET_ARAL_NAME, em); #endif pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_socket.h b/collectors/ebpf.plugin/ebpf_socket.h index 1ba20e65e..ae2ee28ab 100644 --- a/collectors/ebpf.plugin/ebpf_socket.h +++ b/collectors/ebpf.plugin/ebpf_socket.h @@ -4,8 +4,9 @@ #include #include "libnetdata/avl/avl.h" -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_SOCKET "socket" +#define NETDATA_EBPF_SOCKET_MODULE_DESC "Monitors TCP and UDP bandwidth. This thread is integrated with apps and cgroup." // Vector indexes #define NETDATA_UDP_START 3 diff --git a/collectors/ebpf.plugin/ebpf_softirq.c b/collectors/ebpf.plugin/ebpf_softirq.c index b5c77bf06..8d8930a10 100644 --- a/collectors/ebpf.plugin/ebpf_softirq.c +++ b/collectors/ebpf.plugin/ebpf_softirq.c @@ -60,6 +60,26 @@ static softirq_val_t softirq_vals[] = { // tmp store for soft IRQ values we get from a per-CPU eBPF map. static softirq_ebpf_val_t *softirq_ebpf_vals = NULL; +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_softirq_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + "softirq_latency", + "Software IRQ latency", + EBPF_COMMON_DIMENSION_MILLISECONDS, + "softirqs", + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + NETDATA_CHART_PRIO_SYSTEM_SOFTIRQS+1, + em->update_every); +} + /** * Cleanup * @@ -71,16 +91,32 @@ static void softirq_cleanup(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (em->objects) + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + + ebpf_obsolete_softirq_global(em); + + pthread_mutex_unlock(&lock); + fflush(stdout); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } for (int i = 0; softirq_tracepoints[i].class != NULL; i++) { ebpf_disable_tracepoint(&softirq_tracepoints[i]); } freez(softirq_ebpf_vals); + softirq_ebpf_vals = NULL; pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -170,7 +206,7 @@ static void softirq_collector(ebpf_module_t *em) softirq_create_charts(em->update_every); softirq_create_dims(); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); // loop and read from published data until ebpf plugin is closed. @@ -180,7 +216,9 @@ static void softirq_collector(ebpf_module_t *em) int counter = update_every - 1; int maps_per_core = em->maps_per_core; //This will be cancelled by its parent - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -195,6 +233,15 @@ static void softirq_collector(ebpf_module_t *em) write_end_chart(); pthread_mutex_unlock(&lock); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } diff --git a/collectors/ebpf.plugin/ebpf_softirq.h b/collectors/ebpf.plugin/ebpf_softirq.h index eea2a1841..4ef36775a 100644 --- a/collectors/ebpf.plugin/ebpf_softirq.h +++ b/collectors/ebpf.plugin/ebpf_softirq.h @@ -3,6 +3,9 @@ #ifndef NETDATA_EBPF_SOFTIRQ_H #define NETDATA_EBPF_SOFTIRQ_H 1 +// Module observation +#define NETDATA_EBPF_SOFTIRQ_MODULE_DESC "Show time spent servicing individual software interrupt requests (soft IRQs)." + /***************************************************************** * copied from kernel-collectors repo, with modifications needed * for inclusion here. diff --git a/collectors/ebpf.plugin/ebpf_swap.c b/collectors/ebpf.plugin/ebpf_swap.c index 492b59678..9a1640a35 100644 --- a/collectors/ebpf.plugin/ebpf_swap.c +++ b/collectors/ebpf.plugin/ebpf_swap.c @@ -229,6 +229,109 @@ static inline int ebpf_swap_load_and_attach(struct swap_bpf *obj, ebpf_module_t * *****************************************************************/ +static void ebpf_obsolete_specific_swap_charts(char *type, int update_every); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_swap_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_MEM_SWAP_READ_CHART, + "Calls to function swap_readpage.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SWAP_READ_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5100, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_MEM_SWAP_WRITE_CHART, + "Calls to function swap_writepage.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SWAP_WRITE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5101, + em->update_every); +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_swap_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_swap_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_swap_charts(ect->name, em->update_every); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_swap_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_MEM_SWAP_READ_CHART, + "Calls to function swap_readpage.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20191, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_MEM_SWAP_WRITE_CHART, + "Calls to function swap_writepage.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20192, + em->update_every); +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_swap_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_MEM_SWAP_CHART, + "Calls to access swap memory", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS, + em->update_every); +} + /** * Swap exit * @@ -240,15 +343,40 @@ static void ebpf_swap_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_swap_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_swap_apps_charts(em); + } + + ebpf_obsolete_swap_global(em); + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (bpf_obj) + if (bpf_obj) { swap_bpf__destroy(bpf_obj); + bpf_obj = NULL; + } #endif - if (em->objects) + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -674,7 +802,9 @@ static void swap_collector(ebpf_module_t *em) heartbeat_init(&hb); int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -701,6 +831,15 @@ static void swap_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -784,6 +923,8 @@ static void ebpf_create_swap_charts(int update_every) ebpf_create_global_dimension, swap_publish_aggregated, NETDATA_SWAP_END, update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + fflush(stdout); } /* @@ -818,7 +959,7 @@ static int ebpf_swap_load_bpf(ebpf_module_t *em) #endif if (ret) - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); return ret; } @@ -857,7 +998,7 @@ void *ebpf_swap_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_create_swap_charts(em->update_every); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); swap_collector(em); diff --git a/collectors/ebpf.plugin/ebpf_swap.h b/collectors/ebpf.plugin/ebpf_swap.h index 8ca980bf0..79e9a01ac 100644 --- a/collectors/ebpf.plugin/ebpf_swap.h +++ b/collectors/ebpf.plugin/ebpf_swap.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_SWAP_H #define NETDATA_EBPF_SWAP_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_SWAP "swap" +#define NETDATA_EBPF_SWAP_MODULE_DESC "Monitor swap space usage. This thread is integrated with apps and cgroup." #define NETDATA_SWAP_SLEEP_MS 850000ULL diff --git a/collectors/ebpf.plugin/ebpf_sync.c b/collectors/ebpf.plugin/ebpf_sync.c index 9f1c0159d..521d39f31 100644 --- a/collectors/ebpf.plugin/ebpf_sync.c +++ b/collectors/ebpf.plugin/ebpf_sync.c @@ -248,7 +248,6 @@ static inline int ebpf_sync_load_and_attach(struct sync_bpf *obj, ebpf_module_t * *****************************************************************/ -#ifdef LIBBPF_MAJOR_VERSION /** * Cleanup Objects * @@ -259,28 +258,86 @@ void ebpf_sync_cleanup_objects() int i; for (i = 0; local_syscalls[i].syscall; i++) { ebpf_sync_syscalls_t *w = &local_syscalls[i]; - if (w->sync_obj) +#ifdef LIBBPF_MAJOR_VERSION + if (w->sync_obj) { sync_bpf__destroy(w->sync_obj); + w->sync_obj = NULL; + } +#endif + if (w->probe_links) { + ebpf_unload_legacy_code(w->objects, w->probe_links); + w->objects = NULL; + w->probe_links = NULL; + } } } -#endif + +/* + static void ebpf_create_sync_chart(char *id, + char *title, + int order, + int idx, + int end, + int update_every) + { + ebpf_write_chart_cmd(NETDATA_EBPF_MEMORY_GROUP, id, title, EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_SYNC_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NULL, order, + update_every, + NETDATA_EBPF_MODULE_NAME_SYNC); + */ /** - * Sync Free + * Obsolete global * - * Cleanup variables after child threads to stop + * Obsolete global charts created by thread. * - * @param ptr thread data. + * @param em a pointer to `struct ebpf_module` */ -static void ebpf_sync_free(ebpf_module_t *em) +static void ebpf_obsolete_sync_global(ebpf_module_t *em) { -#ifdef LIBBPF_MAJOR_VERSION - ebpf_sync_cleanup_objects(); -#endif + if (local_syscalls[NETDATA_SYNC_FSYNC_IDX].enabled && local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].enabled) + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_FILE_SYNC_CHART, + "Monitor calls for fsync(2) and fdatasync(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_SYNC_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21300, + em->update_every); - pthread_mutex_lock(&ebpf_exit_cleanup); - em->enabled = NETDATA_THREAD_EBPF_STOPPED; - pthread_mutex_unlock(&ebpf_exit_cleanup); + if (local_syscalls[NETDATA_SYNC_MSYNC_IDX].enabled) + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_MSYNC_CHART, + "Monitor calls for msync(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_SYNC_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21301, + em->update_every); + + if (local_syscalls[NETDATA_SYNC_SYNC_IDX].enabled && local_syscalls[NETDATA_SYNC_SYNCFS_IDX].enabled) + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_SYNC_CHART, + "Monitor calls for sync(2) and syncfs(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_SYNC_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21302, + em->update_every); + + if (local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].enabled) + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_FILE_SEGMENT_CHART, + "Monitor calls for sync_file_range(2).", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_SYNC_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21303, + em->update_every); } /** @@ -293,7 +350,19 @@ static void ebpf_sync_free(ebpf_module_t *em) static void ebpf_sync_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - ebpf_sync_free(em); + + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + ebpf_obsolete_sync_global(em); + pthread_mutex_unlock(&lock); + } + + ebpf_sync_cleanup_objects(); + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&ebpf_exit_cleanup); } /***************************************************************** @@ -373,7 +442,7 @@ static int ebpf_sync_initialize_syscall(ebpf_module_t *em) } } } else { - info("Cannot find syscall %s we are not going to monitor it.", syscall); + netdata_log_info("Cannot find syscall %s we are not going to monitor it.", syscall); w->enabled = false; } @@ -489,7 +558,9 @@ static void sync_collector(ebpf_module_t *em) int update_every = em->update_every; int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -501,6 +572,15 @@ static void sync_collector(ebpf_module_t *em) sync_send_data(); pthread_mutex_unlock(&lock); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -574,6 +654,8 @@ static void ebpf_create_sync_charts(int update_every) ebpf_create_sync_chart(NETDATA_EBPF_FILE_SEGMENT_CHART, "Monitor calls for sync_file_range(2).", 21303, NETDATA_SYNC_SYNC_FILE_RANGE_IDX, NETDATA_SYNC_SYNC_FILE_RANGE_IDX, update_every); + + fflush(stdout); } /** diff --git a/collectors/ebpf.plugin/ebpf_sync.h b/collectors/ebpf.plugin/ebpf_sync.h index cace2a1cf..bd1bb78b0 100644 --- a/collectors/ebpf.plugin/ebpf_sync.h +++ b/collectors/ebpf.plugin/ebpf_sync.h @@ -7,8 +7,9 @@ #include "includes/sync.skel.h" #endif -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_SYNC "sync" +#define NETDATA_EBPF_SYNC_MODULE_DESC "Monitor calls to syscalls sync(2), fsync(2), fdatasync(2), syncfs(2), msync(2), and sync_file_range(2)." // charts #define NETDATA_EBPF_SYNC_CHART "sync" diff --git a/collectors/ebpf.plugin/ebpf_vfs.c b/collectors/ebpf.plugin/ebpf_vfs.c index 6cafafc38..5747a2408 100644 --- a/collectors/ebpf.plugin/ebpf_vfs.c +++ b/collectors/ebpf.plugin/ebpf_vfs.c @@ -60,6 +60,10 @@ netdata_ebpf_targets_t vfs_targets[] = { {.name = "vfs_write", .mode = EBPF_LOAD {.name = "release_task", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; +#ifdef NETDATA_DEV_MODE +int vfs_disable_priority; +#endif + #ifdef LIBBPF_MAJOR_VERSION /** * Disable probe @@ -403,6 +407,447 @@ static inline int ebpf_vfs_load_and_attach(struct vfs_bpf *obj, ebpf_module_t *e * *****************************************************************/ +static void ebpf_obsolete_specific_vfs_charts(char *type, ebpf_module_t *em); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_vfs_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_FILE_DELETED, + "Files deleted", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20065, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, + "Write to disk", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20066, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, + "Fails to write", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20067, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_READ_CALLS, + "Read from disk", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20068, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, + "Fails to read", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20069, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, + "Bytes written on disk", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20070, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_READ_BYTES, + "Bytes read from disk", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20071, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_FSYNC, + "Calls to vfs_fsync", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20072, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, + "Sync error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20073, + em->update_every); + } + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_OPEN, + "Calls to vfs_open", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20074, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, + "Open error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20075, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_CREATE, + "Calls to vfs_create", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20076, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, + "Create error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20077, + em->update_every); + } +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_vfs_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_vfs_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_vfs_charts(ect->name, em); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_vfs_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_FILE_DELETED, + "Files deleted", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20065, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, + "Write to disk", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20066, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, + "Fails to write", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20067, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_READ_CALLS, + "Read from disk", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20068, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, + "Fails to read", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20069, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, + "Bytes written on disk", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20070, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_READ_BYTES, + "Bytes read from disk", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20071, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_FSYNC, + "Calls for vfs_fsync", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20072, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, + "Sync error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20073, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_OPEN, + "Calls for vfs_open", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20074, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, + "Open error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20075, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_CREATE, + "Calls for vfs_create", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20076, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, + "Create error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20077, + em->update_every); + } +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_vfs_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FILE_CLEAN_COUNT, + "Remove files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_CLEAN, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FILE_IO_COUNT, + "Calls to IO", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_COUNT, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_IO_FILE_BYTES, + "Bytes written and read", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_BYTES, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FILE_ERR_COUNT, + "Fails to write or read", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EBYTES, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FSYNC, + "Calls for vfs_fsync", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_FSYNC, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FSYNC_ERR, + "Fails to synchronize", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EFSYNC, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_OPEN, + "Calls for vfs_open", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_OPEN, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_OPEN_ERR, + "Fails to open a file", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EOPEN, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_CREATE, + "Calls for vfs_create", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_CREATE, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_CREATE_ERR, + "Fails to create a file.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_ECREATE, + em->update_every); + } +} + /** * Exit * @@ -414,15 +859,45 @@ static void ebpf_vfs_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_vfs_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_vfs_apps_charts(em); + } + + ebpf_obsolete_vfs_global(em); + +#ifdef NETDATA_DEV_MODE + if (ebpf_aral_vfs_pid) + ebpf_statistic_obsolete_aral_chart(em, vfs_disable_priority); +#endif + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (vfs_bpf_obj) + if (vfs_bpf_obj) { vfs_bpf__destroy(vfs_bpf_obj); + vfs_bpf_obj = NULL; + } #endif - if (em->objects) + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -1486,7 +1961,9 @@ static void vfs_collector(ebpf_module_t *em) int update_every = em->update_every; int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -1519,6 +1996,15 @@ static void vfs_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -1690,6 +2176,8 @@ static void ebpf_create_global_charts(ebpf_module_t *em) &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); } + + fflush(stdout); } /** @@ -1934,10 +2422,10 @@ void *ebpf_vfs_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_create_global_charts(em); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); #ifdef NETDATA_DEV_MODE if (ebpf_aral_vfs_pid) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_VFS_ARAL_NAME, em); + vfs_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_VFS_ARAL_NAME, em); #endif pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_vfs.h b/collectors/ebpf.plugin/ebpf_vfs.h index 45a1df4b1..8fe12a7eb 100644 --- a/collectors/ebpf.plugin/ebpf_vfs.h +++ b/collectors/ebpf.plugin/ebpf_vfs.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_VFS_H #define NETDATA_EBPF_VFS_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_VFS "vfs" +#define NETDATA_EBPF_VFS_MODULE_DESC "Monitor VFS (Virtual File System) functions. This thread is integrated with apps and cgroup." #define NETDATA_DIRECTORY_VFS_CONFIG_FILE "vfs.conf" diff --git a/collectors/ebpf.plugin/multi_metadata.yaml b/collectors/ebpf.plugin/multi_metadata.yaml new file mode 100644 index 000000000..9a31a4037 --- /dev/null +++ b/collectors/ebpf.plugin/multi_metadata.yaml @@ -0,0 +1,2360 @@ +name: ebpf.plugin +modules: + - meta: + plugin_name: ebpf.plugin + module_name: filedescriptor + monitored_instance: + name: ebpf filedescriptor + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.fd_open + description: Number of open files + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: cgroup.fd_open_error + description: Fails to open files + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: cgroup.fd_closed + description: Files closed + unit: "calls/s" + chart_type: line + dimensions: + - name: close + - name: cgroup.fd_close_error + description: Fails to close files + unit: "calls/s" + chart_type: line + dimensions: + - name: close + - name: global + description: "" + labels: [] + metrics: + - name: services.file_open + description: Number of open files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.file_open_error + description: Fails to open files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.file_closed + description: Files closed + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.file_close_error + description: Fails to close files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: apps.file_open + description: Number of open files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.file_open_error + description: Fails to open files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.file_closed + description: Files closed + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.file_close_error + description: Fails to close files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: filesystem.file_descriptor + description: Open and close calls + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: close + - name: filesystem.file_error + description: Open fails + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: close + - meta: + plugin_name: ebpf.plugin + module_name: processes + monitored_instance: + name: ebpf processes + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.process_thread + description: Start process + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: system.process_status + description: Process not closed + unit: "difference" + chart_type: line + dimensions: + - name: process + - name: zombie + - name: system.exit + description: Exit process + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: system.task_error + description: Fails to create process + unit: "calls/s" + chart_type: line + dimensions: + - name: task + - name: apps.process_create + description: Process started + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.thread_create + description: Threads started + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.task_exit + description: Tasks starts exit process + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.task_close + description: Tasks closed + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.task_error + description: Errors to create process or threads + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.process_create + description: Process started + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: cgroup.thread_create + description: Threads started + unit: "calls/s" + chart_type: line + dimensions: + - name: thread + - name: cgroup.task_exit + description: Tasks starts exit process + unit: "calls/s" + chart_type: line + dimensions: + - name: exit + - name: cgroup.task_close + description: Tasks closed + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: cgroup.task_error + description: Errors to create process or threads + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: services.process_create + description: Process started + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.thread_create + description: Threads started + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.task_close + description: Tasks starts exit process + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.task_exit + description: Tasks closed + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.task_error + description: Errors to create process or threads + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - meta: + plugin_name: ebpf.plugin + module_name: disk + monitored_instance: + name: ebpf disk + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: disk + description: "" + labels: [] + metrics: + - name: disk.latency_io + description: Disk latency + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency + - meta: + plugin_name: ebpf.plugin + module_name: hardirq + monitored_instance: + name: ebpf hardirq + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.hardirq_latency + description: Hardware IRQ latency + unit: "milisecondds" + chart_type: stacked + dimensions: + - name: hardirq names + - meta: + plugin_name: ebpf.plugin + module_name: cachestat + monitored_instance: + name: ebpf cachestat + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: apps.cachestat_ratio + description: Hit ratio + unit: "%" + chart_type: line + dimensions: + - name: a dimension per app group + - name: apps.cachestat_dirties + description: Number of dirty pages + unit: "page/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.cachestat_hits + description: Number of accessed files + unit: "hits/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.cachestat_misses + description: Files out of page cache + unit: "misses/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: services.cachestat_ratio + description: Hit ratio + unit: "%" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.cachestat_dirties + description: Number of dirty pages + unit: "page/s" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.cachestat_hits + description: Number of accessed files + unit: "hits/s" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.cachestat_misses + description: Files out of page cache + unit: "misses/s" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: mem.cachestat_ratio + description: Hit ratio + unit: "%" + chart_type: line + dimensions: + - name: ratio + - name: mem.cachestat_dirties + description: Number of dirty pages + unit: "page/s" + chart_type: line + dimensions: + - name: dirty + - name: mem.cachestat_hits + description: Number of accessed files + unit: "hits/s" + chart_type: line + dimensions: + - name: hit + - name: mem.cachestat_misses + description: Files out of page cache + unit: "misses/s" + chart_type: line + dimensions: + - name: miss + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.cachestat_ratio + description: Hit ratio + unit: "%" + chart_type: line + dimensions: + - name: ratio + - name: cgroup.cachestat_dirties + description: Number of dirty pages + unit: "page/s" + chart_type: line + dimensions: + - name: dirty + - name: cgroup.cachestat_hits + description: Number of accessed files + unit: "hits/s" + chart_type: line + dimensions: + - name: hit + - name: cgroup.cachestat_misses + description: Files out of page cache + unit: "misses/s" + chart_type: line + dimensions: + - name: miss + - meta: + plugin_name: ebpf.plugin + module_name: sync + monitored_instance: + name: ebpf sync + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: sync_freq + link: https://github.com/netdata/netdata/blob/master/health/health.d/synchronization.conf + metric: mem.sync + info: number of sync() system calls. Every call causes all pending modifications to filesystem metadata and cached file data to be written to the underlying filesystems. + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: mem.file_sync + description: Monitor calls for fsync(2) and fdatasync(2). + unit: "calls/s" + chart_type: stacked + dimensions: + - name: fsync + - name: fdatasync + - name: mem.meory_map + description: Monitor calls for msync(2). + unit: "calls/s" + chart_type: line + dimensions: + - name: msync + - name: mem.sync + description: Monitor calls for sync(2) and syncfs(2). + unit: "calls/s" + chart_type: line + dimensions: + - name: sync + - name: syncfs + - name: mem.file_segment + description: Monitor calls for sync_file_range(2). + unit: "calls/s" + chart_type: line + dimensions: + - name: sync_file_range + - meta: + plugin_name: ebpf.plugin + module_name: mdflush + monitored_instance: + name: ebpf mdflush + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: mdstat.mdstat_flush + description: MD flushes + unit: "flushes" + chart_type: stacked + dimensions: + - name: disk + - meta: + plugin_name: ebpf.plugin + module_name: swap + monitored_instance: + name: ebpf swap + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.swap_read + description: Calls to function swap_readpage. + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: cgroup.swap_write + description: Calls to function swap_writepage. + unit: "calls/s" + chart_type: line + dimensions: + - name: write + - name: global + description: "" + labels: [] + metrics: + - name: services.swap_read + description: Calls to swap_readpage. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.swap_write + description: Calls to function swap_writepage. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: apps.swap_read_call + description: Calls to function swap_readpage. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.swap_write_call + description: Calls to function swap_writepage. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: system.swapcalls + description: Calls to access swap memory + unit: "calls/s" + chart_type: line + dimensions: + - name: write + - name: read + - meta: + plugin_name: ebpf.plugin + module_name: oomkill + monitored_instance: + name: ebpf oomkill + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.oomkills + description: OOM kills. This chart is provided by eBPF plugin. + unit: "kills" + chart_type: line + dimensions: + - name: cgroup name + - name: global + description: "" + labels: [] + metrics: + - name: services.oomkills + description: OOM kills. This chart is provided by eBPF plugin. + unit: "kills" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: apps.oomkills + description: OOM kills + unit: "kills" + chart_type: stacked + dimensions: + - name: a dimension per app group + - meta: + plugin_name: ebpf.plugin + module_name: socket + monitored_instance: + name: ebpf socket + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ip.inbound_conn + description: Inbound connections. + unit: "connections/s" + chart_type: line + dimensions: + - name: connection_tcp + - name: ip.tcp_outbound_conn + description: TCP outbound connections. + unit: "connections/s" + chart_type: line + dimensions: + - name: received + - name: ip.tcp_functions + description: Calls to internal functions + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: closed + - name: ip.total_tcp_bandwidth + description: TCP bandwidth + unit: "kilobits/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: ip.tcp_error + description: TCP errors + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: ip.tcp_retransmit + description: Packages retransmitted + unit: "calls/s" + chart_type: line + dimensions: + - name: retransmited + - name: ip.udp_functions + description: UDP calls + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: ip.total_udp_bandwidth + description: UDP bandwidth + unit: "kilobits/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: ip.udp_error + description: UDP errors + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: apps.outbound_conn_v4 + description: Calls to tcp_v4_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.outbound_conn_v6 + description: Calls to tcp_v6_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.total_bandwidth_sent + description: Bytes sent + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.total_bandwidth_recv + description: bytes received + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_tcp_send + description: Calls for tcp_sendmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_tcp_recv + description: Calls for tcp_cleanup_rbuf + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_tcp_retransmit + description: Calls for tcp_retransmit + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_udp_send + description: Calls for udp_sendmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_udp_recv + description: Calls for udp_recvmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: services.net_conn_ipv4 + description: Calls to tcp_v4_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_conn_ipv6 + description: Calls to tcp_v6_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_bytes_recv + description: Bytes received + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_bytes_sent + description: Bytes sent + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_tcp_recv + description: Calls to tcp_cleanup_rbuf. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_tcp_send + description: Calls to tcp_sendmsg. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_tcp_retransmit + description: Calls to tcp_retransmit + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_udp_send + description: Calls to udp_sendmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_udp_recv + description: Calls to udp_recvmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.net_conn_ipv4 + description: Calls to tcp_v4_connection + unit: "connections/s" + chart_type: line + dimensions: + - name: connected_v4 + - name: cgroup.net_conn_ipv6 + description: Calls to tcp_v6_connection + unit: "connections/s" + chart_type: line + dimensions: + - name: connected_v6 + - name: cgroup.net_bytes_recv + description: Bytes received + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: cgroup.net_bytes_sent + description: Bytes sent + unit: "calls/s" + chart_type: line + dimensions: + - name: sent + - name: cgroup.net_tcp_recv + description: Calls to tcp_cleanup_rbuf. + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: cgroup.net_tcp_send + description: Calls to tcp_sendmsg. + unit: "calls/s" + chart_type: line + dimensions: + - name: sent + - name: cgroup.net_retransmit + description: Calls to tcp_retransmit. + unit: "calls/s" + chart_type: line + dimensions: + - name: retransmitted + - name: cgroup.net_udp_send + description: Calls to udp_sendmsg + unit: "calls/s" + chart_type: line + dimensions: + - name: sent + - name: cgroup.net_udp_recv + description: Calls to udp_recvmsg + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - meta: + plugin_name: ebpf.plugin + module_name: dcstat + monitored_instance: + name: ebpf dcstat + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: apps.dc_ratio + description: Percentage of files inside directory cache + unit: "%" + chart_type: line + dimensions: + - name: a dimension per app group + - name: apps.dc_reference + description: Count file access + unit: "files" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.dc_not_cache + description: Files not present inside directory cache + unit: "files" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.dc_not_found + description: Files not found + unit: "files" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: services.dc_ratio + description: Percentage of files inside directory cache + unit: "%" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.dc_reference + description: Count file access + unit: "files" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.dc_not_cache + description: Files not present inside directory cache + unit: "files" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.dc_not_found + description: Files not found + unit: "files" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: filesystem.dc_hit_ratio + description: Percentage of files inside directory cache + unit: "%" + chart_type: line + dimensions: + - name: ratio + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.dc_ratio + description: Percentage of files inside directory cache + unit: "%" + chart_type: line + dimensions: + - name: ratio + - name: cgroup.dc_reference + description: Count file access + unit: "files" + chart_type: line + dimensions: + - name: reference + - name: cgroup.dc_not_cache + description: Files not present inside directory cache + unit: "files" + chart_type: line + dimensions: + - name: slow + - name: cgroup.dc_not_found + description: Files not found + unit: "files" + chart_type: line + dimensions: + - name: miss + - name: filesystem + description: "" + labels: [] + metrics: + - name: filesystem.dc_reference + description: Variables used to calculate hit ratio. + unit: "files" + chart_type: line + dimensions: + - name: reference + - name: slow + - name: miss + - meta: + plugin_name: ebpf.plugin + module_name: filesystem + monitored_instance: + name: ebpf filesystem + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: filesystem + description: "" + labels: [] + metrics: + - name: filesystem.read_latency + description: ext4 latency for each read request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - name: filesystem.open_latency + description: ext4 latency for each open request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - name: filesystem.sync_latency + description: ext4 latency for each sync request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - name: iilesystem + description: "" + labels: [] + metrics: + - name: filesystem.write_latency + description: ext4 latency for each write request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - name: global + description: "" + labels: [] + metrics: + - name: filesystem.attributte_latency + description: nfs latency for each attribute request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - meta: + plugin_name: ebpf.plugin + module_name: shm + monitored_instance: + name: ebpf shm + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.shmget + description: Calls to syscall shmget(2). + unit: "calls/s" + chart_type: line + dimensions: + - name: get + - name: cgroup.shmat + description: Calls to syscall shmat(2). + unit: "calls/s" + chart_type: line + dimensions: + - name: at + - name: cgroup.shmdt + description: Calls to syscall shmdt(2). + unit: "calls/s" + chart_type: line + dimensions: + - name: dt + - name: cgroup.shmctl + description: Calls to syscall shmctl(2). + unit: "calls/s" + chart_type: line + dimensions: + - name: ctl + - name: global + description: "" + labels: [] + metrics: + - name: services.shmget + description: Calls to syscall shmget(2). + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.shmat + description: Calls to syscall shmat(2). + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.shmdt + description: Calls to syscall shmdt(2). + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.shmctl + description: Calls to syscall shmctl(2). + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: apps.shmget_call + description: Calls to syscall shmget(2). + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.shmat_call + description: Calls to syscall shmat(2). + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.shmdt_call + description: Calls to syscall shmdt(2). + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.shmctl_call + description: Calls to syscall shmctl(2). + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: system.shared_memory_calls + description: Calls to shared memory system calls + unit: "calls/s" + chart_type: line + dimensions: + - name: get + - name: at + - name: dt + - name: ctl + - meta: + plugin_name: ebpf.plugin + module_name: softirq + monitored_instance: + name: ebpf softirq + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.softirq_latency + description: Software IRQ latency + unit: "miliseconds" + chart_type: stacked + dimensions: + - name: soft IRQs + - meta: + plugin_name: ebpf.plugin + module_name: mount + monitored_instance: + name: ebpf mount + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: mount_points.call + description: Calls to mount and umount syscalls + unit: "calls/s" + chart_type: line + dimensions: + - name: mount + - name: umount + - name: mount_points.error + description: Errors to mount and umount file systems + unit: "calls/s" + chart_type: line + dimensions: + - name: mount + - name: umount + - meta: + plugin_name: ebpf.plugin + module_name: vfs + monitored_instance: + name: ebpf vfs + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.vfs_unlink + description: Files deleted + unit: "calls/s" + chart_type: line + dimensions: + - name: delete + - name: cgroup.vfs_write + description: Write to disk + unit: "calls/s" + chart_type: line + dimensions: + - name: write + - name: cgroup.vfs_write_error + description: Fails to write + unit: "calls/s" + chart_type: line + dimensions: + - name: write + - name: cgroup.vfs_read + description: Read from disk + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: cgroup.vfs_read_error + description: Fails to read + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: cgroup.vfs_write_bytes + description: Bytes written on disk + unit: "bytes/s" + chart_type: line + dimensions: + - name: write + - name: cgroup.vfs_read_bytes + description: Bytes read from disk + unit: "bytes/s" + chart_type: line + dimensions: + - name: read + - name: cgroup.vfs_fsync + description: Calls for vfs_fsync + unit: "calls/s" + chart_type: line + dimensions: + - name: fsync + - name: cgroup.vfs_fsync_error + description: Sync error + unit: "calls/s" + chart_type: line + dimensions: + - name: fsync + - name: cgroup.vfs_open + description: Calls for vfs_open + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: cgroup.vfs_open_error + description: Open error + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: cgroup.vfs_create + description: Calls for vfs_create + unit: "calls/s" + chart_type: line + dimensions: + - name: create + - name: cgroup.vfs_create_error + description: Create error + unit: "calls/s" + chart_type: line + dimensions: + - name: create + - name: global + description: "" + labels: [] + metrics: + - name: services.vfs_unlink + description: Files deleted + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_write + description: Write to disk + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_write_error + description: Fails to write + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_read + description: Read from disk + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_read_error + description: Fails to read + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_write_bytes + description: Bytes written on disk + unit: "bytes/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_read_bytes + description: Bytes read from disk + unit: "bytes/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_fsync + description: Calls to vfs_fsync + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_fsync_error + description: Sync error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_open + description: Calls to vfs_open + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_open_error + description: Open error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_create + description: Calls to vfs_create + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_create_error + description: Create error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: filesystem.vfs_deleted_objects + description: Remove files + unit: "calls/s" + chart_type: line + dimensions: + - name: delete + - name: filesystem.vfs_io + description: Calls to IO + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: filesystem.vfs_io_bytes + description: Bytes written and read + unit: "bytes/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: filesystem.vfs_io_error + description: Fails to write or read + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: filesystem.vfs_fsync + description: Calls for vfs_fsync + unit: "calls/s" + chart_type: line + dimensions: + - name: fsync + - name: filesystem.vfs_fsync_error + description: Fails to synchronize + unit: "calls/s" + chart_type: line + dimensions: + - name: fsync + - name: filesystem.vfs_open + description: Calls for vfs_open + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: filesystem.vfs_open_error + description: Fails to open a file + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: filesystem.vfs_create + description: Calls for vfs_create + unit: "calls/s" + chart_type: line + dimensions: + - name: create + - name: filesystem.vfs_create_error + description: Fails to create a file. + unit: "calls/s" + chart_type: line + dimensions: + - name: create + - name: apps.file_deleted + description: Files deleted + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_write_call + description: Write to disk + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_write_error + description: Fails to write + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_read_call + description: Read from disk + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_read_error + description: Fails to read + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_write_bytes + description: Bytes written on disk + unit: "bytes/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_read_bytes + description: Bytes read on disk + unit: "bytes/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_fsync + description: Calls for vfs_fsync + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_fsync_error + description: Sync error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_open + description: Calls for vfs_open + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_open_error + description: Open error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_create + description: Calls for vfs_create + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_create_error + description: Create error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - meta: + plugin_name: ebpf.plugin + module_name: process + monitored_instance: + name: ebpf process + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: netdata.ebpf_aral_stat_size + description: Bytes allocated for ARAL. + unit: "bytes" + chart_type: stacked + dimensions: + - name: memory + - name: netdata.ebpf_aral_stat_alloc + description: Calls to allocate memory. + unit: "calls" + chart_type: stacked + dimensions: + - name: aral + - name: netdata.ebpf_threads + description: Threads info + unit: "threads" + chart_type: line + dimensions: + - name: total + - name: running + - name: netdata.ebpf_load_methods + description: Load info + unit: "methods" + chart_type: line + dimensions: + - name: legacy + - name: co-re + - name: netdata.ebpf_kernel_memory + description: Memory allocated for hash tables. + unit: "bytes" + chart_type: line + dimensions: + - name: memory_locked + - name: netdata.ebpf_hash_tables_count + description: Number of hash tables loaded + unit: "hash tables" + chart_type: line + dimensions: + - name: hash_table + - name: netdata.ebpf_aral_stat_size + description: Bytes allocated for ARAL + unit: "bytes" + chart_type: stacked + dimensions: + - name: memory + - name: netdata.ebpf_aral_stat_alloc + description: Calls to allocate memory + unit: "calls" + chart_type: stacked + dimensions: + - name: aral + - name: netdata.ebpf_aral_stat_size + description: Bytes allocated for ARAL. + unit: "bytes" + chart_type: stacked + dimensions: + - name: memory + - name: netdata.ebpf_aral_stat_alloc + description: Calls to allocate memory + unit: "calls" + chart_type: stacked + dimensions: + - name: aral -- cgit v1.2.3