diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2021-12-01 06:15:04 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2021-12-01 06:15:04 +0000 |
commit | e970e0b37b8bd7f246feb3f70c4136418225e434 (patch) | |
tree | 0b67c0ca45f56f2f9d9c5c2e725279ecdf52d2eb /collectors | |
parent | Adding upstream version 1.31.0. (diff) | |
download | netdata-e970e0b37b8bd7f246feb3f70c4136418225e434.tar.xz netdata-e970e0b37b8bd7f246feb3f70c4136418225e434.zip |
Adding upstream version 1.32.0.upstream/1.32.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors')
135 files changed, 13626 insertions, 1953 deletions
diff --git a/collectors/COLLECTORS.md b/collectors/COLLECTORS.md index 3325049e..5f37dfc3 100644 --- a/collectors/COLLECTORS.md +++ b/collectors/COLLECTORS.md @@ -135,6 +135,7 @@ configure any of these collectors according to your setup and infrastructure. - [Riak KV](/collectors/python.d.plugin/riakkv/README.md): Collect database stats from the `/stats` endpoint. - [Zookeeper](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/zookeeper/): Monitor application health metrics reading the server's response to the `mntr` command. +- [Memcached](/collectors/python.d.plugin/memcached/README.md): Collect memory-caching system performance metrics. ### Distributed computing @@ -417,7 +418,6 @@ The Netdata Agent can collect these system- and hardware-level metrics using a v `proc.plugin` collector. - [KSM](/collectors/proc.plugin/README.md): Measure the amount of merging, savings, and effectiveness using the `proc.plugin` collector. -- [Memcached](/collectors/python.d.plugin/memcached/README.md): Collect memory-caching system performance metrics. - [Numa](/collectors/proc.plugin/README.md): Gather metrics on the number of non-uniform memory access (NUMA) events every second using the `proc.plugin` collector. - [Page faults](/collectors/proc.plugin/README.md): Collect the number of memory page faults per second using the @@ -496,7 +496,7 @@ collectors are described only in code and associated charts in Netdata dashboard - [ACLK (code only)](https://github.com/netdata/netdata/blob/master/aclk/legacy/aclk_stats.c): View whether a Netdata Agent is connected to Netdata Cloud via the [ACLK](/aclk/README.md), the volume of queries, process times, and more. - [Alarms](https://learn.netdata.cloud/docs/agent/collectors/python.d.plugin/alarms): This collector creates an - <strong>Alarms</strong> menu with one line plot showing the alarm states of a Netdata Agent over time. + **Alarms** menu with one line plot showing the alarm states of a Netdata Agent over time. - [Anomalies](https://learn.netdata.cloud/docs/agent/collectors/python.d.plugin/anomalies): This collector uses the Python PyOD library to perform unsupervised anomaly detection on your Netdata charts and/or dimensions. - [Exporting (code only)](https://github.com/netdata/netdata/blob/master/exporting/send_internal_metrics.c): Gather diff --git a/collectors/all.h b/collectors/all.h index bbb39569..647ee977 100644 --- a/collectors/all.h +++ b/collectors/all.h @@ -3,7 +3,7 @@ #ifndef NETDATA_ALL_H #define NETDATA_ALL_H 1 -#include "../daemon/common.h" +#include "daemon/common.h" // netdata internal data collection plugins @@ -30,40 +30,41 @@ // - for each FAMILY +100 // - for each CHART +10 -#define NETDATA_CHART_PRIO_SYSTEM_CPU 100 -#define NETDATA_CHART_PRIO_SYSTEM_LOAD 100 -#define NETDATA_CHART_PRIO_SYSTEM_IO 150 -#define NETDATA_CHART_PRIO_SYSTEM_PGPGIO 151 -#define NETDATA_CHART_PRIO_SYSTEM_RAM 200 -#define NETDATA_CHART_PRIO_SYSTEM_SWAP 201 -#define NETDATA_CHART_PRIO_SYSTEM_SWAPIO 250 -#define NETDATA_CHART_PRIO_SYSTEM_NET 500 -#define NETDATA_CHART_PRIO_SYSTEM_IPV4 500 // freebsd only -#define NETDATA_CHART_PRIO_SYSTEM_IP 501 -#define NETDATA_CHART_PRIO_SYSTEM_IPV6 502 -#define NETDATA_CHART_PRIO_SYSTEM_PROCESSES 600 -#define NETDATA_CHART_PRIO_SYSTEM_FORKS 700 -#define NETDATA_CHART_PRIO_SYSTEM_ACTIVE_PROCESSES 750 -#define NETDATA_CHART_PRIO_SYSTEM_CTXT 800 -#define NETDATA_CHART_PRIO_SYSTEM_IDLEJITTER 800 -#define NETDATA_CHART_PRIO_SYSTEM_INTR 900 -#define NETDATA_CHART_PRIO_SYSTEM_SOFTIRQS 950 -#define NETDATA_CHART_PRIO_SYSTEM_SOFTNET_STAT 955 -#define NETDATA_CHART_PRIO_SYSTEM_INTERRUPTS 1000 -#define NETDATA_CHART_PRIO_SYSTEM_DEV_INTR 1000 // freebsd only -#define NETDATA_CHART_PRIO_SYSTEM_SOFT_INTR 1100 // freebsd only -#define NETDATA_CHART_PRIO_SYSTEM_ENTROPY 1000 -#define NETDATA_CHART_PRIO_SYSTEM_UPTIME 1000 -#define NETDATA_CHART_PRIO_CLOCK_SYNC_STATE 1100 -#define NETDATA_CHART_PRIO_CLOCK_SYNC_OFFSET 1110 -#define NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_QUEUES 1200 // freebsd only -#define NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_MESSAGES 1201 -#define NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_SIZE 1202 -#define NETDATA_CHART_PRIO_SYSTEM_IPC_SEMAPHORES 1203 -#define NETDATA_CHART_PRIO_SYSTEM_IPC_SEM_ARRAYS 1204 -#define NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_SEGS 1205 -#define NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_SIZE 1206 -#define NETDATA_CHART_PRIO_SYSTEM_PACKETS 7001 // freebsd only +#define NETDATA_CHART_PRIO_SYSTEM_CPU 100 +#define NETDATA_CHART_PRIO_SYSTEM_LOAD 100 +#define NETDATA_CHART_PRIO_SYSTEM_IO 150 +#define NETDATA_CHART_PRIO_SYSTEM_PGPGIO 151 +#define NETDATA_CHART_PRIO_SYSTEM_RAM 200 +#define NETDATA_CHART_PRIO_SYSTEM_SWAP 201 +#define NETDATA_CHART_PRIO_SYSTEM_SWAPIO 250 +#define NETDATA_CHART_PRIO_SYSTEM_NET 500 +#define NETDATA_CHART_PRIO_SYSTEM_IPV4 500 // freebsd only +#define NETDATA_CHART_PRIO_SYSTEM_IP 501 +#define NETDATA_CHART_PRIO_SYSTEM_IPV6 502 +#define NETDATA_CHART_PRIO_SYSTEM_PROCESSES 600 +#define NETDATA_CHART_PRIO_SYSTEM_FORKS 700 +#define NETDATA_CHART_PRIO_SYSTEM_ACTIVE_PROCESSES 750 +#define NETDATA_CHART_PRIO_SYSTEM_CTXT 800 +#define NETDATA_CHART_PRIO_SYSTEM_IDLEJITTER 800 +#define NETDATA_CHART_PRIO_SYSTEM_INTR 900 +#define NETDATA_CHART_PRIO_SYSTEM_SOFTIRQS 950 +#define NETDATA_CHART_PRIO_SYSTEM_SOFTNET_STAT 955 +#define NETDATA_CHART_PRIO_SYSTEM_INTERRUPTS 1000 +#define NETDATA_CHART_PRIO_SYSTEM_DEV_INTR 1000 // freebsd only +#define NETDATA_CHART_PRIO_SYSTEM_SOFT_INTR 1100 // freebsd only +#define NETDATA_CHART_PRIO_SYSTEM_ENTROPY 1000 +#define NETDATA_CHART_PRIO_SYSTEM_UPTIME 1000 +#define NETDATA_CHART_PRIO_CLOCK_SYNC_STATE 1100 +#define NETDATA_CHART_PRIO_CLOCK_SYNC_OFFSET 1110 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_QUEUES 1200 // freebsd only +#define NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_MESSAGES 1201 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_MSQ_SIZE 1202 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_SEMAPHORES 1203 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_SEM_ARRAYS 1204 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_SEGS 1205 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_SIZE 1206 +#define NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_CALLS 1207 +#define NETDATA_CHART_PRIO_SYSTEM_PACKETS 7001 // freebsd only // CPU per core @@ -116,6 +117,7 @@ #define NETDATA_CHART_PRIO_DISK_SVCTM 2070 #define NETDATA_CHART_PRIO_DISK_MOPS 2080 #define NETDATA_CHART_PRIO_DISK_IOTIME 2090 +#define NETDATA_CHART_PRIO_DISK_LATENCY 2095 #define NETDATA_CHART_PRIO_BCACHE_CACHE_ALLOC 2120 #define NETDATA_CHART_PRIO_BCACHE_HIT_RATIO 2120 #define NETDATA_CHART_PRIO_BCACHE_RATES 2121 @@ -128,36 +130,68 @@ #define NETDATA_CHART_PRIO_DISKSPACE_SPACE 2023 #define NETDATA_CHART_PRIO_DISKSPACE_INODES 2024 +// MDSTAT + +#define NETDATA_CHART_PRIO_MDSTAT_HEALTH 2100 +#define NETDATA_CHART_PRIO_MDSTAT_FLUSH 2101 +#define NETDATA_CHART_PRIO_MDSTAT_NONREDUNDANT 2105 +#define NETDATA_CHART_PRIO_MDSTAT_DISKS 2106 // 5 charts per raid +#define NETDATA_CHART_PRIO_MDSTAT_MISMATCH 2107 +#define NETDATA_CHART_PRIO_MDSTAT_OPERATION 2108 +#define NETDATA_CHART_PRIO_MDSTAT_FINISH 2109 +#define NETDATA_CHART_PRIO_MDSTAT_SPEED 2110 + +// Filesystem +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_CLEAN 2150 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_COUNT 2151 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_BYTES 2152 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EBYTES 2153 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_FSYNC 2154 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EFSYNC 2155 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_OPEN 2156 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EOPEN 2157 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_CREATE 2158 +#define NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_ECREATE 2159 + +#define NETDATA_CHART_PRIO_EBPF_FILESYSTEM_CHARTS 2160 + +// Mount Points +#define NETDATA_CHART_PRIO_EBPF_MOUNT_CHARTS 2190 + +// File descriptor +#define NETDATA_CHART_PRIO_EBPF_FD_CHARTS 2195 + + // NFS (server) -#define NETDATA_CHART_PRIO_NFSD_READCACHE 2100 -#define NETDATA_CHART_PRIO_NFSD_FILEHANDLES 2101 -#define NETDATA_CHART_PRIO_NFSD_IO 2102 -#define NETDATA_CHART_PRIO_NFSD_THREADS 2103 -#define NETDATA_CHART_PRIO_NFSD_THREADS_FULLCNT 2104 -#define NETDATA_CHART_PRIO_NFSD_THREADS_HISTOGRAM 2105 -#define NETDATA_CHART_PRIO_NFSD_READAHEAD 2105 -#define NETDATA_CHART_PRIO_NFSD_NET 2107 -#define NETDATA_CHART_PRIO_NFSD_RPC 2108 -#define NETDATA_CHART_PRIO_NFSD_PROC2 2109 -#define NETDATA_CHART_PRIO_NFSD_PROC3 2110 -#define NETDATA_CHART_PRIO_NFSD_PROC4 2111 -#define NETDATA_CHART_PRIO_NFSD_PROC4OPS 2112 +#define NETDATA_CHART_PRIO_NFSD_READCACHE 2200 +#define NETDATA_CHART_PRIO_NFSD_FILEHANDLES 2201 +#define NETDATA_CHART_PRIO_NFSD_IO 2202 +#define NETDATA_CHART_PRIO_NFSD_THREADS 2203 +#define NETDATA_CHART_PRIO_NFSD_THREADS_FULLCNT 2204 +#define NETDATA_CHART_PRIO_NFSD_THREADS_HISTOGRAM 2205 +#define NETDATA_CHART_PRIO_NFSD_READAHEAD 2205 +#define NETDATA_CHART_PRIO_NFSD_NET 2207 +#define NETDATA_CHART_PRIO_NFSD_RPC 2208 +#define NETDATA_CHART_PRIO_NFSD_PROC2 2209 +#define NETDATA_CHART_PRIO_NFSD_PROC3 2210 +#define NETDATA_CHART_PRIO_NFSD_PROC4 2211 +#define NETDATA_CHART_PRIO_NFSD_PROC4OPS 2212 // NFS (client) -#define NETDATA_CHART_PRIO_NFS_NET 2207 -#define NETDATA_CHART_PRIO_NFS_RPC 2208 -#define NETDATA_CHART_PRIO_NFS_PROC2 2209 -#define NETDATA_CHART_PRIO_NFS_PROC3 2210 -#define NETDATA_CHART_PRIO_NFS_PROC4 2211 +#define NETDATA_CHART_PRIO_NFS_NET 2307 +#define NETDATA_CHART_PRIO_NFS_RPC 2308 +#define NETDATA_CHART_PRIO_NFS_PROC2 2309 +#define NETDATA_CHART_PRIO_NFS_PROC3 2310 +#define NETDATA_CHART_PRIO_NFS_PROC4 2311 // BTRFS -#define NETDATA_CHART_PRIO_BTRFS_DISK 2300 -#define NETDATA_CHART_PRIO_BTRFS_DATA 2301 -#define NETDATA_CHART_PRIO_BTRFS_METADATA 2302 -#define NETDATA_CHART_PRIO_BTRFS_SYSTEM 2303 +#define NETDATA_CHART_PRIO_BTRFS_DISK 2400 +#define NETDATA_CHART_PRIO_BTRFS_DATA 2401 +#define NETDATA_CHART_PRIO_BTRFS_METADATA 2402 +#define NETDATA_CHART_PRIO_BTRFS_SYSTEM 2403 // ZFS @@ -182,6 +216,9 @@ #define NETDATA_CHART_PRIO_ZFS_POOL_STATE 2820 +// HARDIRQS + +#define NETDATA_CHART_PRIO_HARDIRQ_LATENCY 2900 // SOFTIRQs @@ -308,16 +345,6 @@ #define NETDATA_CHART_PRIO_SYNPROXY_CONN_OPEN 8753 #define NETDATA_CHART_PRIO_SYNPROXY_ENTRIES 8754 -// MDSTAT - -#define NETDATA_CHART_PRIO_MDSTAT_HEALTH 9000 -#define NETDATA_CHART_PRIO_MDSTAT_NONREDUNDANT 9001 -#define NETDATA_CHART_PRIO_MDSTAT_DISKS 9002 // 5 charts per raid -#define NETDATA_CHART_PRIO_MDSTAT_MISMATCH 9003 -#define NETDATA_CHART_PRIO_MDSTAT_OPERATION 9004 -#define NETDATA_CHART_PRIO_MDSTAT_FINISH 9005 -#define NETDATA_CHART_PRIO_MDSTAT_SPEED 9006 - // Linux Power Supply #define NETDATA_CHART_PRIO_POWER_SUPPLY_CAPACITY 9500 // 4 charts per power supply diff --git a/collectors/apps.plugin/apps_groups.conf b/collectors/apps.plugin/apps_groups.conf index cffd26c9..a36cae50 100644 --- a/collectors/apps.plugin/apps_groups.conf +++ b/collectors/apps.plugin/apps_groups.conf @@ -25,6 +25,9 @@ # To add process names with spaces, enclose them in quotes (single or double) # example: 'Plex Media Serv' "my other process". # +# Note that spaces are not supported for process groups. Use a dash "-" instead. +# example-process-group: process1 process2 +# # Wildcard support: # You can add an asterisk (*) at the beginning and/or the end of a process: # @@ -177,6 +180,12 @@ metrics-server: metrics-server heapster: heapster # ----------------------------------------------------------------------------- +# AWS + +aws-s3: '*aws s3*' +aws: aws + +# ----------------------------------------------------------------------------- # containers & virtual machines containers: lxc* docker* balena* @@ -298,6 +307,8 @@ kernel: fsnotify_mark kthrotld deferwq scsi_* # ----------------------------------------------------------------------------- # other application servers +consul: consul + kafka: *kafka.Kafka* rabbitmq: *rabbitmq* @@ -314,3 +325,16 @@ p4: p4* git-services: gitea gitlab-runner freeswitch: freeswitch* + +# -------- web3 / blockchains ---------- + +go-ethereum: geth* +nethermind-ethereum: nethermind* +besu-ethereum: besu* +openEthereum: openethereum* +urbit: urbit* +bitcoin-node: *bitcoind* lnd* +filecoin: lotus* lotus-miner* lotus-worker* +solana: solana* +web3: *hardhat* *ganache* *truffle* *brownie* *waffle* +terra: terra* mantle* diff --git a/collectors/apps.plugin/apps_plugin.c b/collectors/apps.plugin/apps_plugin.c index 4d4626e6..3bed4bb6 100644 --- a/collectors/apps.plugin/apps_plugin.c +++ b/collectors/apps.plugin/apps_plugin.c @@ -6,7 +6,7 @@ * Released under GPL v3+ */ -#include "../../libnetdata/libnetdata.h" +#include "libnetdata/libnetdata.h" // ---------------------------------------------------------------------------- @@ -128,6 +128,7 @@ static int enable_file_charts = 1, max_fds_cache_seconds = 60, #endif + enable_detailed_uptime_charts = 0, enable_users_charts = 1, enable_groups_charts = 1, include_exited_childs = 1; @@ -3354,7 +3355,7 @@ static void normalize_utilization(struct target *root) { cgtime_fix_ratio = 1.0; //(double)(global_utime + global_stime) / (double)(utime + cutime + stime + cstime); } else if((global_utime + global_stime > utime + stime) && (cutime || cstime)) { - // childrens resources are too high + // children resources are too high // lower only the children resources utime_fix_ratio = stime_fix_ratio = @@ -3519,26 +3520,28 @@ static void send_collected_data_to_netdata(struct target *root, const char *type } send_END(); - send_BEGIN(type, "uptime_min", dt); - for (w = root; w ; w = w->next) { - if(unlikely(w->exposed && w->processes)) - send_SET(w->name, w->uptime_min); - } - send_END(); + if (enable_detailed_uptime_charts) { + send_BEGIN(type, "uptime_min", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, w->uptime_min); + } + send_END(); - send_BEGIN(type, "uptime_avg", dt); - for (w = root; w ; w = w->next) { - if(unlikely(w->exposed && w->processes)) - send_SET(w->name, w->uptime_sum / w->processes); - } - send_END(); + send_BEGIN(type, "uptime_avg", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, w->uptime_sum / w->processes); + } + send_END(); - send_BEGIN(type, "uptime_max", dt); - for (w = root; w ; w = w->next) { - if(unlikely(w->exposed && w->processes)) - send_SET(w->name, w->uptime_max); + send_BEGIN(type, "uptime_max", dt); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed && w->processes)) + send_SET(w->name, w->uptime_max); + } + send_END(); } - send_END(); #endif send_BEGIN(type, "mem", dt); @@ -3710,22 +3713,24 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); } - fprintf(stdout, "CHART %s.uptime_min '' '%s Minimum Uptime' 'seconds' processes %s.uptime_min line 20009 %d\n", type, title, type, update_every); - for (w = root; w ; w = w->next) { - if(unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); - } + if (enable_detailed_uptime_charts) { + fprintf(stdout, "CHART %s.uptime_min '' '%s Minimum Uptime' 'seconds' processes %s.uptime_min line 20009 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } - fprintf(stdout, "CHART %s.uptime_avg '' '%s Average Uptime' 'seconds' processes %s.uptime_avg line 20010 %d\n", type, title, type, update_every); - for (w = root; w ; w = w->next) { - if(unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); - } + fprintf(stdout, "CHART %s.uptime_avg '' '%s Average Uptime' 'seconds' processes %s.uptime_avg line 20010 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } - fprintf(stdout, "CHART %s.uptime_max '' '%s Maximum Uptime' 'seconds' processes %s.uptime_max line 20011 %d\n", type, title, type, update_every); - for (w = root; w ; w = w->next) { - if(unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + fprintf(stdout, "CHART %s.uptime_max '' '%s Maximum Uptime' 'seconds' processes %s.uptime_max line 20011 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } } #endif @@ -3939,6 +3944,11 @@ static void parse_args(int argc, char **argv) continue; } + if(strcmp("with-detailed-uptime", argv[i]) == 0) { + enable_detailed_uptime_charts = 1; + continue; + } + if(strcmp("-h", argv[i]) == 0 || strcmp("--help", argv[i]) == 0) { fprintf(stderr, "\n" @@ -3951,34 +3961,36 @@ static void parse_args(int argc, char **argv) "\n" " Available command line options:\n" "\n" - " SECONDS set the data collection frequency\n" + " SECONDS set the data collection frequency\n" "\n" - " debug enable debugging (lot of output)\n" + " debug enable debugging (lot of output)\n" "\n" " with-childs\n" - " without-childs enable / disable aggregating exited\n" - " children resources into parents\n" - " (default is enabled)\n" + " without-childs enable / disable aggregating exited\n" + " children resources into parents\n" + " (default is enabled)\n" "\n" " with-guest\n" - " without-guest enable / disable reporting guest charts\n" - " (default is disabled)\n" + " without-guest enable / disable reporting guest charts\n" + " (default is disabled)\n" "\n" " with-files\n" - " without-files enable / disable reporting files, sockets, pipes\n" - " (default is enabled)\n" + " without-files enable / disable reporting files, sockets, pipes\n" + " (default is enabled)\n" + "\n" + " without-users disable reporting per user charts\n" "\n" - " without-users disable reporting per user charts\n" + " without-groups disable reporting per user group charts\n" "\n" - " without-groups disable reporting per user group charts\n" + " with-detailed-uptime enable reporting min/avg/max uptime charts\n" "\n" #ifndef __FreeBSD__ - " fds-cache-secs N cache the files of processed for N seconds\n" - " caching is adaptive per file (when a file\n" - " is found, it starts at 0 and while the file\n" - " remains open, it is incremented up to the\n" - " max given)\n" - " (default is %d seconds)\n" + " fds-cache-secs N cache the files of processed for N seconds\n" + " caching is adaptive per file (when a file\n" + " is found, it starts at 0 and while the file\n" + " remains open, it is incremented up to the\n" + " max given)\n" + " (default is %d seconds)\n" "\n" #endif " version or -v or -V print program version and exit\n" diff --git a/collectors/cgroups.plugin/cgroup-name.sh.in b/collectors/cgroups.plugin/cgroup-name.sh.in index 8ef8ab58..1f31c49a 100755 --- a/collectors/cgroups.plugin/cgroup-name.sh.in +++ b/collectors/cgroups.plugin/cgroup-name.sh.in @@ -118,24 +118,37 @@ function add_lbl_prefix() { # pod level cgroup name format: 'pod_<namespace>_<pod_name>' # container level cgroup name format: 'cntr_<namespace>_<pod_name>_<container_name>' function k8s_get_kubepod_name() { - # GKE /sys/fs/cgroup/*/ tree: + # GKE /sys/fs/cgroup/*/ (cri=docker, cgroups=v1): # |-- kubepods # | |-- burstable # | | |-- pod98cee708-023b-11eb-933d-42010a800193 # | | | |-- 922161c98e6ea450bf665226cdc64ca2aa3e889934c2cff0aec4325f8f78ac03 - # | | `-- a5d223eec35e00f5a1c6fa3e3a5faac6148cdc1f03a2e762e873b7efede012d7 # | `-- pode314bbac-d577-11ea-a171-42010a80013b # | |-- 7d505356b04507de7b710016d540b2759483ed5f9136bb01a80872b08f771930 - # | `-- 88ab4683b99cfa7cc8c5f503adf7987dd93a3faa7c4ce0d17d419962b3220d50 # - # Minikube (v1.8.2) /sys/fs/cgroup/*/ tree: + # GKE /sys/fs/cgroup/*/ (cri=containerd, cgroups=v1): + # |-- kubepods.slice + # | |-- kubepods-besteffort.slice + # | | |-- kubepods-besteffort-pode1465238_4518_4c21_832f_fd9f87033dad.slice + # | | | |-- cri-containerd-66be9b2efdf4d85288c319b8c1a2f50d2439b5617e36f45d9d0d0be1381113be.scope + # | `-- kubepods-pod91f5b561_369f_4103_8015_66391059996a.slice + # | |-- cri-containerd-24c53b774a586f06abc058619b47f71d9d869ac50c92898adbd199106fd0aaeb.scope + # + # GKE /sys/fs/cgroup/*/ (cri=crio, cgroups=v1): + # |-- kubepods.slice + # | |-- kubepods-besteffort.slice + # | | |-- kubepods-besteffort-podad412dfe_3589_4056_965a_592356172968.slice + # | | | |-- crio-77b019312fd9825828b70214b2c94da69c30621af2a7ee06f8beace4bc9439e5.scope + # + # Minikube (v1.8.2) /sys/fs/cgroup/*/ (cri=docker, cgroups=v1): # |-- kubepods.slice # | |-- kubepods-besteffort.slice # | | |-- kubepods-besteffort-pod10fb5647_c724_400c_b9cc_0e6eae3110e7.slice # | | | |-- docker-36e5eb5056dfdf6dbb75c0c44a1ecf23217fe2c50d606209d8130fcbb19fb5a7.scope - # | | | `-- docker-87e18c2323621cf0f635c53c798b926e33e9665c348c60d489eef31ee1bd38d7.scope # - # NOTE: cgroups plugin uses '_' to join dir names, so it is <parent>_<child>_<child>_... + # NOTE: cgroups plugin + # - uses '_' to join dir names (so it is <parent>_<child>_<child>_...) + # - replaces '.' with '-' local fn="${FUNCNAME[0]}" local id="${1}" @@ -157,9 +170,9 @@ function k8s_get_kubepod_name() { # kubepods_kubepods-<QOS_CLASS> name=${clean_id//-/_} name=${name/#kubepods_kubepods/kubepods} - elif [[ $clean_id =~ .+pod[a-f0-9_-]+_docker-([a-f0-9]+)$ ]]; then - # ...pod<POD_UID>_docker-<CONTAINER_ID> (POD_UID w/ "_") - cntr_id=${BASH_REMATCH[1]} + elif [[ $clean_id =~ .+pod[a-f0-9_-]+_(docker|crio|cri-containerd)-([a-f0-9]+)$ ]]; then + # ...pod<POD_UID>_(docker|crio|cri-containerd)-<CONTAINER_ID> (POD_UID w/ "_") + cntr_id=${BASH_REMATCH[2]} elif [[ $clean_id =~ .+pod[a-f0-9-]+_([a-f0-9]+)$ ]]; then # ...pod<POD_UID>_<CONTAINER_ID> cntr_id=${BASH_REMATCH[1]} @@ -252,7 +265,7 @@ function k8s_get_kubepod_name() { jq_filter+='container_name=\"\(.name)\",' jq_filter+='container_id=\"\(.containerID)\"' jq_filter+='") | ' - jq_filter+='sub("docker://";"")' # containerID: docker://a346da9bc0e3eaba6b295f64ac16e02f2190db2cef570835706a9e7a36e2c722 + jq_filter+='sub("(docker|cri-o|containerd)://";"")' # containerID: docker://a346da9bc0e3eaba6b295f64ac16e02f2190db2cef570835706a9e7a36e2c722 local containers if ! containers=$(jq -r "${jq_filter}" <<< "$pods" 2>&1); then diff --git a/collectors/cgroups.plugin/cgroup-network-helper.sh b/collectors/cgroups.plugin/cgroup-network-helper.sh index 1b60f452..f355480b 100755 --- a/collectors/cgroups.plugin/cgroup-network-helper.sh +++ b/collectors/cgroups.plugin/cgroup-network-helper.sh @@ -76,7 +76,7 @@ debug() { pid= cgroup= -while [ ! -z "${1}" ] +while [ -n "${1}" ] do case "${1}" in --cgroup) cgroup="${2}"; shift 1;; @@ -164,7 +164,7 @@ virsh_find_all_interfaces_for_cgroup() { # shellcheck disable=SC2230 virsh="$(which virsh 2>/dev/null || command -v virsh 2>/dev/null)" - if [ ! -z "${virsh}" ] + if [ -n "${virsh}" ] then local d d="$(virsh_cgroup_to_domain_name "${c}")" @@ -172,7 +172,7 @@ virsh_find_all_interfaces_for_cgroup() { # e.g.: vm01\x2dweb => vm01-web (https://github.com/netdata/netdata/issues/11088#issuecomment-832618149) d="$(printf '%b' "${d}")" - if [ ! -z "${d}" ] + if [ -n "${d}" ] then debug "running: virsh domiflist ${d}; to find the network interfaces" @@ -203,8 +203,11 @@ netnsid_find_all_interfaces_for_pid() { local pid="${1}" [ -z "${pid}" ] && return 1 - local nsid=$(lsns -t net -p ${pid} -o NETNSID -nr) - [ -z "${nsid}" -o "${nsid}" = "unassigned" ] && return 1 + local nsid + nsid=$(lsns -t net -p "${pid}" -o NETNSID -nr 2>/dev/null) + if [ -z "${nsid}" ] || [ "${nsid}" = "unassigned" ]; then + return 1 + fi set_source "netnsid" ip link show |\ @@ -234,14 +237,14 @@ netnsid_find_all_interfaces_for_cgroup() { find_all_interfaces_of_pid_or_cgroup() { local p="${1}" c="${2}" # the pid and the cgroup path - if [ ! -z "${pid}" ] + if [ -n "${pid}" ] then # we have been called with a pid proc_pid_fdinfo_iff "${p}" netnsid_find_all_interfaces_for_pid "${p}" - elif [ ! -z "${c}" ] + elif [ -n "${c}" ] then # we have been called with a cgroup diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index eea4d9ae..92aa22c7 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -94,6 +94,11 @@ static struct cgroups_systemd_config_setting cgroups_systemd_options[] = { { .name = NULL, .setting = SYSTEMD_CGROUP_ERR }, }; +// Shared memory with information from detected cgroups +netdata_ebpf_cgroup_shm_t shm_cgroup_ebpf = {NULL, NULL}; +static int shm_fd_cgroup_ebpf = -1; +sem_t *shm_mutex_cgroup_ebpf = SEM_FAILED; + /* on Fed systemd is not in PATH for some reason */ #define SYSTEMD_CMD_RHEL "/usr/lib/systemd/systemd --version" #define SYSTEMD_HIERARCHY_STRING "default-hierarchy=" @@ -168,8 +173,6 @@ static enum cgroups_type cgroups_try_detect_version() if (!statfs(filename, &fsinfo)) { if (fsinfo.f_type == CGROUP2_SUPER_MAGIC) return CGROUPS_V2; - if (fsinfo.f_type == CGROUP_SUPER_MAGIC) - return CGROUPS_V1; } #endif @@ -463,6 +466,61 @@ void read_cgroup_plugin_configuration() { mountinfo_free_all(root); } +void netdata_cgroup_ebpf_set_values(size_t length) +{ + sem_wait(shm_mutex_cgroup_ebpf); + + shm_cgroup_ebpf.header->cgroup_max = cgroup_root_max; + shm_cgroup_ebpf.header->systemd_enabled = cgroup_enable_systemd_services | + cgroup_enable_systemd_services_detailed_memory | + cgroup_used_memory; + shm_cgroup_ebpf.header->body_length = length; + + sem_post(shm_mutex_cgroup_ebpf); +} + +void netdata_cgroup_ebpf_initialize_shm() +{ + shm_fd_cgroup_ebpf = shm_open(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME, O_CREAT | O_RDWR, 0660); + if (shm_fd_cgroup_ebpf < 0) { + error("Cannot initialize shared memory used by cgroup and eBPF, integration won't happen."); + return; + } + + size_t length = sizeof(netdata_ebpf_cgroup_shm_header_t) + cgroup_root_max * sizeof(netdata_ebpf_cgroup_shm_body_t); + if (ftruncate(shm_fd_cgroup_ebpf, length)) { + error("Cannot set size for shared memory."); + goto end_init_shm; + } + + shm_cgroup_ebpf.header = (netdata_ebpf_cgroup_shm_header_t *) mmap(NULL, length, + PROT_READ | PROT_WRITE, MAP_SHARED, + shm_fd_cgroup_ebpf, 0); + + if (!shm_cgroup_ebpf.header) { + error("Cannot map shared memory used between cgroup and eBPF, integration won't happen"); + goto end_init_shm; + } + shm_cgroup_ebpf.body = (netdata_ebpf_cgroup_shm_body_t *) ((char *)shm_cgroup_ebpf.header + + sizeof(netdata_ebpf_cgroup_shm_header_t)); + + shm_mutex_cgroup_ebpf = sem_open(NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME, O_CREAT, + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH, 1); + + if (shm_mutex_cgroup_ebpf != SEM_FAILED) { + netdata_cgroup_ebpf_set_values(length); + return; + } + + error("Cannot create semaphore, integration between eBPF and cgroup won't happen"); + munmap(shm_cgroup_ebpf.header, length); + +end_init_shm: + close(shm_fd_cgroup_ebpf); + shm_fd_cgroup_ebpf = -1; + shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME); +} + // ---------------------------------------------------------------------------- // cgroup objects @@ -597,10 +655,6 @@ struct cgroup_network_interface { struct cgroup_network_interface *next; }; -#define CGROUP_OPTIONS_DISABLED_DUPLICATE 0x00000001 -#define CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE 0x00000002 -#define CGROUP_OPTIONS_IS_UNIFIED 0x00000004 - // *** WARNING *** The fields are not thread safe. Take care of safe usage. struct cgroup { uint32_t options; @@ -609,6 +663,7 @@ struct cgroup { char enabled; // enabled in the config char pending_renames; + char *intermediate_id; // TODO: remove it when the renaming script is fixed char *id; uint32_t hash; @@ -1313,13 +1368,16 @@ static inline char *cgroup_chart_id_strdupz(const char *s) { char *r = strdupz(s); netdata_fix_chart_id(r); + return r; +} + +// TODO: move the code to cgroup_chart_id_strdupz() when the renaming script is fixed +static inline void substitute_dots_in_id(char *s) { // dots are used to distinguish chart type and id in streaming, so we should replace them - for (char *d = r; *d; d++) { + for (char *d = s; *d; d++) { if (*d == '.') *d = '-'; } - - return r; } char *parse_k8s_data(struct label **labels, char *data) @@ -1357,7 +1415,8 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) { pid_t cgroup_pid; char command[CGROUP_CHARTID_LINE_MAX + 1]; - snprintfz(command, CGROUP_CHARTID_LINE_MAX, "exec %s '%s'", cgroups_rename_script, cg->chart_id); + // TODO: use cg->id when the renaming script is fixed + snprintfz(command, CGROUP_CHARTID_LINE_MAX, "exec %s '%s'", cgroups_rename_script, cg->intermediate_id); debug(D_CGROUP, "executing command \"%s\" for cgroup '%s'", command, cg->chart_id); FILE *fp = mypopen(command, &cgroup_pid); @@ -1394,6 +1453,7 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) { freez(cg->chart_id); cg->chart_id = cgroup_chart_id_strdupz(name); + substitute_dots_in_id(cg->chart_id); cg->hash_chart = simple_hash(cg->chart_id); } } @@ -1420,7 +1480,10 @@ static inline struct cgroup *cgroup_add(const char *id) { cg->chart_title = cgroup_title_strdupz(id); + cg->intermediate_id = cgroup_chart_id_strdupz(id); + cg->chart_id = cgroup_chart_id_strdupz(id); + substitute_dots_in_id(cg->chart_id); cg->hash_chart = simple_hash(cg->chart_id); if(cgroup_use_unified_cgroups) cg->options |= CGROUP_OPTIONS_IS_UNIFIED; @@ -1461,10 +1524,6 @@ static inline struct cgroup *cgroup_add(const char *id) { strncpy(buffer, cg->id, CGROUP_CHARTID_LINE_MAX); char *s = buffer; - //freez(cg->chart_id); - //cg->chart_id = cgroup_chart_id_strdupz(s); - //cg->hash_chart = simple_hash(cg->chart_id); - // skip to the last slash size_t len = strlen(s); while(len--) if(unlikely(s[len] == '/')) break; @@ -1588,6 +1647,7 @@ static inline void cgroup_free(struct cgroup *cg) { free_pressure(&cg->memory_pressure); freez(cg->id); + freez(cg->intermediate_id); freez(cg->chart_id); freez(cg->chart_title); @@ -2056,6 +2116,69 @@ static inline void copy_discovered_cgroups() cgroup_root = discovered_cgroup_root; } +static void is_there_cgroup_procs(netdata_ebpf_cgroup_shm_body_t *out, char *id) +{ + struct stat buf; + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_cpuset_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_blkio_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_memory_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_devices_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + out->path[0] = '\0'; + out->enabled = 0; +} + +static inline void share_cgroups() +{ + struct cgroup *cg; + int count; + struct stat buf; + + if (shm_mutex_cgroup_ebpf == SEM_FAILED) { + return; + } + sem_wait(shm_mutex_cgroup_ebpf); + + for (cg = cgroup_root, count = 0; cg ; cg = cg->next, count++) { + netdata_ebpf_cgroup_shm_body_t *ptr = &shm_cgroup_ebpf.body[count]; + char *prefix = (cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE) ? "" : "cgroup_"; + snprintfz(ptr->name, CGROUP_EBPF_NAME_SHARED_LENGTH - 1, "%s%s", prefix, cg->chart_title); + ptr->hash = simple_hash(ptr->name); + ptr->options = cg->options; + ptr->enabled = cg->enabled; + if (cgroup_use_unified_cgroups) { + snprintfz(ptr->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_unified_base, cg->id); + if (likely(stat(ptr->path, &buf) == -1)) { + ptr->path[0] = '\0'; + ptr->enabled = 0; + } + } else { + is_there_cgroup_procs(ptr, cg->id); + } + + debug(D_CGROUP, "cgroup shared: NAME=%s, ENABLED=%d", ptr->name, ptr->enabled); + } + + shm_cgroup_ebpf.header->cgroup_root_count = count; + sem_post(shm_mutex_cgroup_ebpf); +} + static inline void find_all_cgroups() { debug(D_CGROUP, "searching for cgroups"); @@ -2112,6 +2235,8 @@ static inline void find_all_cgroups() { copy_discovered_cgroups(); uv_mutex_unlock(&cgroup_root_mutex); + share_cgroups(); + debug(D_CGROUP, "done searching for cgroups"); } @@ -2743,7 +2868,7 @@ void update_systemd_services_charts( if(unlikely(!cg->rd_mem_detailed_rss)) cg->rd_mem_detailed_rss = rrddim_add(st_mem_detailed_rss, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_set_by_pointer(st_mem_detailed_rss, cg->rd_mem_detailed_rss, cg->memory.total_rss + cg->memory.total_rss_huge); + rrddim_set_by_pointer(st_mem_detailed_rss, cg->rd_mem_detailed_rss, cg->memory.total_rss); if(unlikely(!cg->rd_mem_detailed_mapped)) cg->rd_mem_detailed_mapped = rrddim_add(st_mem_detailed_mapped, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); @@ -2792,7 +2917,15 @@ void update_systemd_services_charts( if(unlikely(!cg->rd_swap_usage)) cg->rd_swap_usage = rrddim_add(st_swap_usage, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_set_by_pointer(st_swap_usage, cg->rd_swap_usage, cg->memory.msw_usage_in_bytes); + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_set_by_pointer( + st_swap_usage, + cg->rd_swap_usage, + cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ? + cg->memory.msw_usage_in_bytes - (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) : 0); + } else { + rrddim_set_by_pointer(st_swap_usage, cg->rd_swap_usage, cg->memory.msw_usage_in_bytes); + } } if(likely(do_io && cg->io_service_bytes.updated)) { @@ -3482,8 +3615,8 @@ void update_cgroup_charts(int update_every) { rrddim_set( cg->st_mem_usage, "swap", - (cg->memory.msw_usage_in_bytes > cg->memory.usage_in_bytes) ? - cg->memory.msw_usage_in_bytes - cg->memory.usage_in_bytes : 0); + cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ? + cg->memory.msw_usage_in_bytes - (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) : 0); } else { rrddim_set(cg->st_mem_usage, "swap", cg->memory.msw_usage_in_bytes); } @@ -4022,6 +4155,18 @@ static void cgroup_main_cleanup(void *ptr) { sleep_usec(step); } + if (shm_mutex_cgroup_ebpf != SEM_FAILED) { + sem_close(shm_mutex_cgroup_ebpf); + } + + if (shm_cgroup_ebpf.header) { + munmap(shm_cgroup_ebpf.header, shm_cgroup_ebpf.header->body_length); + } + + if (shm_fd_cgroup_ebpf > 0) { + close(shm_fd_cgroup_ebpf); + } + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; } @@ -4034,6 +4179,7 @@ void *cgroups_main(void *ptr) { int vdo_cpu_netdata = config_get_boolean("plugin:cgroups", "cgroups plugin resource charts", 1); read_cgroup_plugin_configuration(); + netdata_cgroup_ebpf_initialize_shm(); RRDSET *stcpu_thread = NULL; @@ -4057,7 +4203,7 @@ void *cgroups_main(void *ptr) { int error = uv_thread_create(&discovery_thread.thread, cgroup_discovery_worker, NULL); if (error) { - error("CGROUP: cannot create tread worker. uv_thread_create(): %s", uv_strerror(error)); + error("CGROUP: cannot create thread worker. uv_thread_create(): %s", uv_strerror(error)); goto exit; } uv_thread_set_name_np(discovery_thread.thread, "PLUGIN[cgroups]"); diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.h b/collectors/cgroups.plugin/sys_fs_cgroup.h index 155330ff..017aa8fb 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.h +++ b/collectors/cgroups.plugin/sys_fs_cgroup.h @@ -3,7 +3,7 @@ #ifndef NETDATA_SYS_FS_CGROUP_H #define NETDATA_SYS_FS_CGROUP_H 1 -#include "../../daemon/common.h" +#include "daemon/common.h" #if (TARGET_OS == OS_LINUX) @@ -20,6 +20,38 @@ extern void *cgroups_main(void *ptr); +#define CGROUP_OPTIONS_DISABLED_DUPLICATE 0x00000001 +#define CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE 0x00000002 +#define CGROUP_OPTIONS_IS_UNIFIED 0x00000004 + +typedef struct netdata_ebpf_cgroup_shm_header { + int cgroup_root_count; + int cgroup_max; + int systemd_enabled; + int __pad; + size_t body_length; +} netdata_ebpf_cgroup_shm_header_t; + +#define CGROUP_EBPF_NAME_SHARED_LENGTH 256 + +typedef struct netdata_ebpf_cgroup_shm_body { + // Considering what is exposed in this link https://en.wikipedia.org/wiki/Comparison_of_file_systems#Limits + // this length is enough to store what we want. + char name[CGROUP_EBPF_NAME_SHARED_LENGTH]; + uint32_t hash; + uint32_t options; + int enabled; + char path[FILENAME_MAX + 1]; +} netdata_ebpf_cgroup_shm_body_t; + +typedef struct netdata_ebpf_cgroup_shm { + netdata_ebpf_cgroup_shm_header_t *header; + netdata_ebpf_cgroup_shm_body_t *body; +} netdata_ebpf_cgroup_shm_t; + +#define NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME "netdata_shm_cgroup_ebpf" +#define NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME "/netdata_sem_cgroup_ebpf" + #include "../proc.plugin/plugin_proc.h" #else // (TARGET_OS == OS_LINUX) diff --git a/collectors/charts.d.plugin/ap/ap.chart.sh b/collectors/charts.d.plugin/ap/ap.chart.sh index 5dd78783..80c9dc60 100644 --- a/collectors/charts.d.plugin/ap/ap.chart.sh +++ b/collectors/charts.d.plugin/ap/ap.chart.sh @@ -61,25 +61,25 @@ ap_create() { # create the chart with 3 dimensions cat << EOF -CHART ap_clients.${dev} '' "Connected clients to ${ssid} on ${dev}" "clients" ${dev} ap.clients line $((ap_priority + 1)) $ap_update_every +CHART ap_clients.${dev} '' "Connected clients to ${ssid} on ${dev}" "clients" ${dev} ap.clients line $((ap_priority + 1)) $ap_update_every '' '' 'ap' DIMENSION clients '' absolute 1 1 -CHART ap_bandwidth.${dev} '' "Bandwidth for ${ssid} on ${dev}" "kilobits/s" ${dev} ap.net area $((ap_priority + 2)) $ap_update_every +CHART ap_bandwidth.${dev} '' "Bandwidth for ${ssid} on ${dev}" "kilobits/s" ${dev} ap.net area $((ap_priority + 2)) $ap_update_every '' '' 'ap' DIMENSION received '' incremental 8 1024 DIMENSION sent '' incremental -8 1024 -CHART ap_packets.${dev} '' "Packets for ${ssid} on ${dev}" "packets/s" ${dev} ap.packets line $((ap_priority + 3)) $ap_update_every +CHART ap_packets.${dev} '' "Packets for ${ssid} on ${dev}" "packets/s" ${dev} ap.packets line $((ap_priority + 3)) $ap_update_every '' '' 'ap' DIMENSION received '' incremental 1 1 DIMENSION sent '' incremental -1 1 -CHART ap_issues.${dev} '' "Transmit Issues for ${ssid} on ${dev}" "issues/s" ${dev} ap.issues line $((ap_priority + 4)) $ap_update_every +CHART ap_issues.${dev} '' "Transmit Issues for ${ssid} on ${dev}" "issues/s" ${dev} ap.issues line $((ap_priority + 4)) $ap_update_every '' '' 'ap' DIMENSION retries 'tx retries' incremental 1 1 DIMENSION failures 'tx failures' incremental -1 1 -CHART ap_signal.${dev} '' "Average Signal for ${ssid} on ${dev}" "dBm" ${dev} ap.signal line $((ap_priority + 5)) $ap_update_every +CHART ap_signal.${dev} '' "Average Signal for ${ssid} on ${dev}" "dBm" ${dev} ap.signal line $((ap_priority + 5)) $ap_update_every '' '' 'ap' DIMENSION signal 'average signal' absolute 1 1000 -CHART ap_bitrate.${dev} '' "Bitrate for ${ssid} on ${dev}" "Mbps" ${dev} ap.bitrate line $((ap_priority + 6)) $ap_update_every +CHART ap_bitrate.${dev} '' "Bitrate for ${ssid} on ${dev}" "Mbps" ${dev} ap.bitrate line $((ap_priority + 6)) $ap_update_every '' '' 'ap' DIMENSION receive '' absolute 1 1000 DIMENSION transmit '' absolute -1 1000 DIMENSION expected 'expected throughput' absolute 1 1000 @@ -92,7 +92,7 @@ EOF # _update is called continuously, to collect the values ap_update() { # the first argument to this function is the microseconds since last update - # pass this parameter to the BEGIN statement (see bellow). + # pass this parameter to the BEGIN statement (see below). # do all the work to collect / calculate the values # for each dimension diff --git a/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh b/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh index 014a9c1d..e78d99e7 100644 --- a/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh +++ b/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh @@ -80,35 +80,35 @@ apcupsd_create() { # create the charts cat << EOF -CHART apcupsd_${host}.charge '' "UPS Charge for ${host} on ${src}" "percentage" ups apcupsd.charge area $((apcupsd_priority + 1)) $apcupsd_update_every +CHART apcupsd_${host}.charge '' "UPS Charge for ${host} on ${src}" "percentage" ups apcupsd.charge area $((apcupsd_priority + 1)) $apcupsd_update_every '' '' 'apcupsd' DIMENSION battery_charge charge absolute 1 100 -CHART apcupsd_${host}.battery_voltage '' "UPS Battery Voltage for ${host} on ${src}" "Volts" ups apcupsd.battery.voltage line $((apcupsd_priority + 3)) $apcupsd_update_every +CHART apcupsd_${host}.battery_voltage '' "UPS Battery Voltage for ${host} on ${src}" "Volts" ups apcupsd.battery.voltage line $((apcupsd_priority + 3)) $apcupsd_update_every '' '' 'apcupsd' DIMENSION battery_voltage voltage absolute 1 100 DIMENSION battery_voltage_nominal nominal absolute 1 100 -CHART apcupsd_${host}.input_voltage '' "UPS Input Voltage for ${host} on ${src}" "Volts" input apcupsd.input.voltage line $((apcupsd_priority + 4)) $apcupsd_update_every +CHART apcupsd_${host}.input_voltage '' "UPS Input Voltage for ${host} on ${src}" "Volts" input apcupsd.input.voltage line $((apcupsd_priority + 4)) $apcupsd_update_every '' '' 'apcupsd' DIMENSION input_voltage voltage absolute 1 100 DIMENSION input_voltage_min min absolute 1 100 DIMENSION input_voltage_max max absolute 1 100 -CHART apcupsd_${host}.input_frequency '' "UPS Input Frequency for ${host} on ${src}" "Hz" input apcupsd.input.frequency line $((apcupsd_priority + 5)) $apcupsd_update_every +CHART apcupsd_${host}.input_frequency '' "UPS Input Frequency for ${host} on ${src}" "Hz" input apcupsd.input.frequency line $((apcupsd_priority + 5)) $apcupsd_update_every '' '' 'apcupsd' DIMENSION input_frequency frequency absolute 1 100 -CHART apcupsd_${host}.output_voltage '' "UPS Output Voltage for ${host} on ${src}" "Volts" output apcupsd.output.voltage line $((apcupsd_priority + 6)) $apcupsd_update_every +CHART apcupsd_${host}.output_voltage '' "UPS Output Voltage for ${host} on ${src}" "Volts" output apcupsd.output.voltage line $((apcupsd_priority + 6)) $apcupsd_update_every '' '' 'apcupsd' DIMENSION output_voltage voltage absolute 1 100 DIMENSION output_voltage_nominal nominal absolute 1 100 -CHART apcupsd_${host}.load '' "UPS Load for ${host} on ${src}" "percentage" ups apcupsd.load area $((apcupsd_priority)) $apcupsd_update_every +CHART apcupsd_${host}.load '' "UPS Load for ${host} on ${src}" "percentage" ups apcupsd.load area $((apcupsd_priority)) $apcupsd_update_every '' '' 'apcupsd' DIMENSION load load absolute 1 100 -CHART apcupsd_${host}.temp '' "UPS Temperature for ${host} on ${src}" "Celsius" ups apcupsd.temperature line $((apcupsd_priority + 7)) $apcupsd_update_every +CHART apcupsd_${host}.temp '' "UPS Temperature for ${host} on ${src}" "Celsius" ups apcupsd.temperature line $((apcupsd_priority + 7)) $apcupsd_update_every '' '' 'apcupsd' DIMENSION temp temp absolute 1 100 -CHART apcupsd_${host}.time '' "UPS Time Remaining for ${host} on ${src}" "Minutes" ups apcupsd.time area $((apcupsd_priority + 2)) $apcupsd_update_every +CHART apcupsd_${host}.time '' "UPS Time Remaining for ${host} on ${src}" "Minutes" ups apcupsd.time area $((apcupsd_priority + 2)) $apcupsd_update_every '' '' 'apcupsd' DIMENSION time time absolute 1 100 -CHART apcupsd_${host}.online '' "UPS ONLINE flag for ${host} on ${src}" "boolean" ups apcupsd.online line $((apcupsd_priority + 8)) $apcupsd_update_every +CHART apcupsd_${host}.online '' "UPS ONLINE flag for ${host} on ${src}" "boolean" ups apcupsd.online line $((apcupsd_priority + 8)) $apcupsd_update_every '' '' 'apcupsd' DIMENSION online online absolute 0 1 EOF @@ -118,7 +118,7 @@ EOF apcupsd_update() { # the first argument to this function is the microseconds since last update - # pass this parameter to the BEGIN statement (see bellow). + # pass this parameter to the BEGIN statement (see below). # do all the work to collect / calculate the values # for each dimension @@ -154,7 +154,7 @@ BEGIN { /^LOADPCT.*/ { load = \$3 * 100 }; /^ITEMP.*/ { temp = \$3 * 100 }; /^TIMELEFT.*/ { time = \$3 * 100 }; -/^STATUS.*/ { online=(\$3 == \"ONLINE\" || \$3 == \"ONBATT\")?1:0 }; +/^STATUS.*/ { online=(\$3 != \"COMMLOST\" && !(\$3 == \"SHUTTING\" && \$4 == \"DOWN\"))?1:0 }; END { print \"BEGIN apcupsd_${host}.online $1\"; print \"SET online = \" online; diff --git a/collectors/charts.d.plugin/charts.d.conf b/collectors/charts.d.plugin/charts.d.conf index 0872d39e..2d32f73e 100644 --- a/collectors/charts.d.plugin/charts.d.conf +++ b/collectors/charts.d.plugin/charts.d.conf @@ -45,3 +45,4 @@ # Nothing useful. # Just an example charts.d plugin you can use as a template. # example=force +# sensors=force diff --git a/collectors/charts.d.plugin/charts.d.plugin.in b/collectors/charts.d.plugin/charts.d.plugin.in index 1b5c3f33..9187fc25 100755 --- a/collectors/charts.d.plugin/charts.d.plugin.in +++ b/collectors/charts.d.plugin/charts.d.plugin.in @@ -634,7 +634,7 @@ global_update() { charts_run_counter[$chart]=0 charts_serial_failures[$chart]=0 - echo "CHART netdata.plugin_chartsd_$chart '' 'Execution time for $chart plugin' 'milliseconds / run' charts.d netdata.plugin_charts area 145000 ${charts_update_every[$chart]}" + echo "CHART netdata.plugin_chartsd_$chart '' 'Execution time for $chart plugin' 'milliseconds / run' charts.d netdata.plugin_charts area 145000 ${charts_update_every[$chart]} '' '' '$chart'" echo "DIMENSION run_time 'run time' absolute 1 1" done diff --git a/collectors/charts.d.plugin/example/example.chart.sh b/collectors/charts.d.plugin/example/example.chart.sh index 5ff51a57..6bbbcf1d 100644 --- a/collectors/charts.d.plugin/example/example.chart.sh +++ b/collectors/charts.d.plugin/example/example.chart.sh @@ -89,11 +89,11 @@ example_check() { example_create() { # create the chart with 3 dimensions cat << EOF -CHART example.random '' "Random Numbers Stacked Chart" "% of random numbers" random random stacked $((example_priority)) $example_update_every +CHART example.random '' "Random Numbers Stacked Chart" "% of random numbers" random random stacked $((example_priority)) $example_update_every '' '' 'example' DIMENSION random1 '' percentage-of-absolute-row 1 1 DIMENSION random2 '' percentage-of-absolute-row 1 1 DIMENSION random3 '' percentage-of-absolute-row 1 1 -CHART example.random2 '' "A random number" "random number" random random area $((example_priority + 1)) $example_update_every +CHART example.random2 '' "A random number" "random number" random random area $((example_priority + 1)) $example_update_every '' '' 'example' DIMENSION random '' absolute 1 1 EOF @@ -103,7 +103,7 @@ EOF # _update is called continuously, to collect the values example_update() { # the first argument to this function is the microseconds since last update - # pass this parameter to the BEGIN statement (see bellow). + # pass this parameter to the BEGIN statement (see below). example_get || return 1 diff --git a/collectors/charts.d.plugin/libreswan/libreswan.chart.sh b/collectors/charts.d.plugin/libreswan/libreswan.chart.sh index a3a56b26..d526f7a9 100644 --- a/collectors/charts.d.plugin/libreswan/libreswan.chart.sh +++ b/collectors/charts.d.plugin/libreswan/libreswan.chart.sh @@ -124,10 +124,10 @@ libreswan_create_one() { libreswan_tunnel_charts[${name}]="$(fixid "${name}")" cat << EOF -CHART libreswan.${libreswan_tunnel_charts[${name}]}_net '${name}_net' "LibreSWAN Tunnel ${name} Traffic" "kilobits/s" "${name}" libreswan.net area $((libreswan_priority)) $libreswan_update_every +CHART libreswan.${libreswan_tunnel_charts[${name}]}_net '${name}_net' "LibreSWAN Tunnel ${name} Traffic" "kilobits/s" "${name}" libreswan.net area $((libreswan_priority)) $libreswan_update_every '' '' 'libreswan' DIMENSION in '' incremental 8 1000 DIMENSION out '' incremental -8 1000 -CHART libreswan.${libreswan_tunnel_charts[${name}]}_uptime '${name}_uptime' "LibreSWAN Tunnel ${name} Uptime" "seconds" "${name}" libreswan.uptime line $((libreswan_priority + 1)) $libreswan_update_every +CHART libreswan.${libreswan_tunnel_charts[${name}]}_uptime '${name}_uptime' "LibreSWAN Tunnel ${name} Uptime" "seconds" "${name}" libreswan.uptime line $((libreswan_priority + 1)) $libreswan_update_every '' '' 'libreswan' DIMENSION uptime '' absolute 1 1 EOF @@ -173,7 +173,7 @@ VALUESEOF # _update is called continuously, to collect the values libreswan_update() { # the first argument to this function is the microseconds since last update - # pass this parameter to the BEGIN statement (see bellow). + # pass this parameter to the BEGIN statement (see below). libreswan_get || return 1 libreswan_now=$(date +%s) diff --git a/collectors/charts.d.plugin/nut/nut.chart.sh b/collectors/charts.d.plugin/nut/nut.chart.sh index 60233361..2f7e3f33 100644 --- a/collectors/charts.d.plugin/nut/nut.chart.sh +++ b/collectors/charts.d.plugin/nut/nut.chart.sh @@ -129,7 +129,7 @@ EOF2 nut_update() { # the first argument to this function is the microseconds since last update - # pass this parameter to the BEGIN statement (see bellow). + # pass this parameter to the BEGIN statement (see below). # do all the work to collect / calculate the values # for each dimension diff --git a/collectors/charts.d.plugin/opensips/opensips.chart.sh b/collectors/charts.d.plugin/opensips/opensips.chart.sh index d3a2118c..02401fd5 100644 --- a/collectors/charts.d.plugin/opensips/opensips.chart.sh +++ b/collectors/charts.d.plugin/opensips/opensips.chart.sh @@ -49,61 +49,61 @@ opensips_check() { opensips_create() { # create the charts cat << EOF -CHART opensips.dialogs_active '' "OpenSIPS Active Dialogs" "dialogs" dialogs '' area $((opensips_priority + 1)) $opensips_update_every +CHART opensips.dialogs_active '' "OpenSIPS Active Dialogs" "dialogs" dialogs '' area $((opensips_priority + 1)) $opensips_update_every '' '' 'opensips' DIMENSION dialog_active_dialogs active absolute 1 1 DIMENSION dialog_early_dialogs early absolute -1 1 -CHART opensips.users '' "OpenSIPS Users" "users" users '' line $((opensips_priority + 2)) $opensips_update_every +CHART opensips.users '' "OpenSIPS Users" "users" users '' line $((opensips_priority + 2)) $opensips_update_every '' '' 'opensips' DIMENSION usrloc_registered_users registered absolute 1 1 DIMENSION usrloc_location_users location absolute 1 1 DIMENSION usrloc_location_contacts contacts absolute 1 1 DIMENSION usrloc_location_expires expires incremental -1 1 -CHART opensips.registrar '' "OpenSIPS Registrar" "registrations/s" registrar '' line $((opensips_priority + 3)) $opensips_update_every +CHART opensips.registrar '' "OpenSIPS Registrar" "registrations/s" registrar '' line $((opensips_priority + 3)) $opensips_update_every '' '' 'opensips' DIMENSION registrar_accepted_regs accepted incremental 1 1 DIMENSION registrar_rejected_regs rejected incremental -1 1 -CHART opensips.transactions '' "OpenSIPS Transactions" "transactions/s" transactions '' line $((opensips_priority + 4)) $opensips_update_every +CHART opensips.transactions '' "OpenSIPS Transactions" "transactions/s" transactions '' line $((opensips_priority + 4)) $opensips_update_every '' '' 'opensips' DIMENSION tm_UAS_transactions UAS incremental 1 1 DIMENSION tm_UAC_transactions UAC incremental -1 1 -CHART opensips.core_rcv '' "OpenSIPS Core Receives" "queries/s" core '' line $((opensips_priority + 5)) $opensips_update_every +CHART opensips.core_rcv '' "OpenSIPS Core Receives" "queries/s" core '' line $((opensips_priority + 5)) $opensips_update_every '' '' 'opensips' DIMENSION core_rcv_requests requests incremental 1 1 DIMENSION core_rcv_replies replies incremental -1 1 -CHART opensips.core_fwd '' "OpenSIPS Core Forwards" "queries/s" core '' line $((opensips_priority + 6)) $opensips_update_every +CHART opensips.core_fwd '' "OpenSIPS Core Forwards" "queries/s" core '' line $((opensips_priority + 6)) $opensips_update_every '' '' 'opensips' DIMENSION core_fwd_requests requests incremental 1 1 DIMENSION core_fwd_replies replies incremental -1 1 -CHART opensips.core_drop '' "OpenSIPS Core Drops" "queries/s" core '' line $((opensips_priority + 7)) $opensips_update_every +CHART opensips.core_drop '' "OpenSIPS Core Drops" "queries/s" core '' line $((opensips_priority + 7)) $opensips_update_every '' '' 'opensips' DIMENSION core_drop_requests requests incremental 1 1 DIMENSION core_drop_replies replies incremental -1 1 -CHART opensips.core_err '' "OpenSIPS Core Errors" "queries/s" core '' line $((opensips_priority + 8)) $opensips_update_every +CHART opensips.core_err '' "OpenSIPS Core Errors" "queries/s" core '' line $((opensips_priority + 8)) $opensips_update_every '' '' 'opensips' DIMENSION core_err_requests requests incremental 1 1 DIMENSION core_err_replies replies incremental -1 1 -CHART opensips.core_bad '' "OpenSIPS Core Bad" "queries/s" core '' line $((opensips_priority + 9)) $opensips_update_every +CHART opensips.core_bad '' "OpenSIPS Core Bad" "queries/s" core '' line $((opensips_priority + 9)) $opensips_update_every '' '' 'opensips' DIMENSION core_bad_URIs_rcvd bad_URIs_rcvd incremental 1 1 DIMENSION core_unsupported_methods unsupported_methods incremental 1 1 DIMENSION core_bad_msg_hdr bad_msg_hdr incremental 1 1 -CHART opensips.tm_replies '' "OpenSIPS TM Replies" "replies/s" transactions '' line $((opensips_priority + 10)) $opensips_update_every +CHART opensips.tm_replies '' "OpenSIPS TM Replies" "replies/s" transactions '' line $((opensips_priority + 10)) $opensips_update_every '' '' 'opensips' DIMENSION tm_received_replies received incremental 1 1 DIMENSION tm_relayed_replies relayed incremental 1 1 DIMENSION tm_local_replies local incremental 1 1 -CHART opensips.transactions_status '' "OpenSIPS Transactions Status" "transactions/s" transactions '' line $((opensips_priority + 11)) $opensips_update_every +CHART opensips.transactions_status '' "OpenSIPS Transactions Status" "transactions/s" transactions '' line $((opensips_priority + 11)) $opensips_update_every '' '' 'opensips' DIMENSION tm_2xx_transactions 2xx incremental 1 1 DIMENSION tm_3xx_transactions 3xx incremental 1 1 DIMENSION tm_4xx_transactions 4xx incremental 1 1 DIMENSION tm_5xx_transactions 5xx incremental 1 1 DIMENSION tm_6xx_transactions 6xx incremental 1 1 -CHART opensips.transactions_inuse '' "OpenSIPS InUse Transactions" "transactions" transactions '' line $((opensips_priority + 12)) $opensips_update_every +CHART opensips.transactions_inuse '' "OpenSIPS InUse Transactions" "transactions" transactions '' line $((opensips_priority + 12)) $opensips_update_every '' '' 'opensips' DIMENSION tm_inuse_transactions inuse absolute 1 1 -CHART opensips.sl_replies '' "OpenSIPS SL Replies" "replies/s" core '' line $((opensips_priority + 13)) $opensips_update_every +CHART opensips.sl_replies '' "OpenSIPS SL Replies" "replies/s" core '' line $((opensips_priority + 13)) $opensips_update_every '' '' 'opensips' DIMENSION sl_1xx_replies 1xx incremental 1 1 DIMENSION sl_2xx_replies 2xx incremental 1 1 DIMENSION sl_3xx_replies 3xx incremental 1 1 @@ -114,31 +114,31 @@ DIMENSION sl_sent_replies sent incremental 1 1 DIMENSION sl_sent_err_replies error incremental 1 1 DIMENSION sl_received_ACKs ACKed incremental 1 1 -CHART opensips.dialogs '' "OpenSIPS Dialogs" "dialogs/s" dialogs '' line $((opensips_priority + 14)) $opensips_update_every +CHART opensips.dialogs '' "OpenSIPS Dialogs" "dialogs/s" dialogs '' line $((opensips_priority + 14)) $opensips_update_every '' '' 'opensips' DIMENSION dialog_processed_dialogs processed incremental 1 1 DIMENSION dialog_expired_dialogs expired incremental 1 1 DIMENSION dialog_failed_dialogs failed incremental -1 1 -CHART opensips.net_waiting '' "OpenSIPS Network Waiting" "kilobytes" net '' line $((opensips_priority + 15)) $opensips_update_every +CHART opensips.net_waiting '' "OpenSIPS Network Waiting" "kilobytes" net '' line $((opensips_priority + 15)) $opensips_update_every '' '' 'opensips' DIMENSION net_waiting_udp UDP absolute 1 1024 DIMENSION net_waiting_tcp TCP absolute 1 1024 -CHART opensips.uri_checks '' "OpenSIPS URI Checks" "checks / sec" uri '' line $((opensips_priority + 16)) $opensips_update_every +CHART opensips.uri_checks '' "OpenSIPS URI Checks" "checks / sec" uri '' line $((opensips_priority + 16)) $opensips_update_every '' '' 'opensips' DIMENSION uri_positive_checks positive incremental 1 1 DIMENSION uri_negative_checks negative incremental -1 1 -CHART opensips.traces '' "OpenSIPS Traces" "traces / sec" traces '' line $((opensips_priority + 17)) $opensips_update_every +CHART opensips.traces '' "OpenSIPS Traces" "traces / sec" traces '' line $((opensips_priority + 17)) $opensips_update_every '' '' 'opensips' DIMENSION siptrace_traced_requests requests incremental 1 1 DIMENSION siptrace_traced_replies replies incremental -1 1 -CHART opensips.shmem '' "OpenSIPS Shared Memory" "kilobytes" mem '' line $((opensips_priority + 18)) $opensips_update_every +CHART opensips.shmem '' "OpenSIPS Shared Memory" "kilobytes" mem '' line $((opensips_priority + 18)) $opensips_update_every '' '' 'opensips' DIMENSION shmem_total_size total absolute 1 1024 DIMENSION shmem_used_size used absolute 1 1024 DIMENSION shmem_real_used_size real_used absolute 1 1024 DIMENSION shmem_max_used_size max_used absolute 1 1024 DIMENSION shmem_free_size free absolute 1 1024 -CHART opensips.shmem_fragments '' "OpenSIPS Shared Memory Fragmentation" "fragments" mem '' line $((opensips_priority + 19)) $opensips_update_every +CHART opensips.shmem_fragments '' "OpenSIPS Shared Memory Fragmentation" "fragments" mem '' line $((opensips_priority + 19)) $opensips_update_every '' '' 'opensips' DIMENSION shmem_fragments fragments absolute 1 1 EOF @@ -147,7 +147,7 @@ EOF opensips_update() { # the first argument to this function is the microseconds since last update - # pass this parameter to the BEGIN statement (see bellow). + # pass this parameter to the BEGIN statement (see below). # do all the work to collect / calculate the values # for each dimension @@ -158,7 +158,7 @@ opensips_update() { # local opensips_client_http_ then one or more of these a-z 0-9 _ then = and one of more of 0-9 # local opensips_server_all_ then one or more of these a-z 0-9 _ then = and one of more of 0-9 # 4. then execute this as a script with the eval - # be very carefull with eval: + # be very careful with eval: # prepare the script and always grep at the end the lines that are useful, so that # even if something goes wrong, no other code can be executed diff --git a/collectors/charts.d.plugin/sensors/README.md b/collectors/charts.d.plugin/sensors/README.md index cee3f601..2a1cdb67 100644 --- a/collectors/charts.d.plugin/sensors/README.md +++ b/collectors/charts.d.plugin/sensors/README.md @@ -19,20 +19,37 @@ The plugin will provide charts for all configured system sensors The plugin will create Netdata charts for: -1. **Temperature** -2. **Voltage** -3. **Current** -4. **Power** -5. **Fans Speed** -6. **Energy** -7. **Humidity** +1. **Temperature** +2. **Voltage** +3. **Current** +4. **Power** +5. **Fans Speed** +6. **Energy** +7. **Humidity** One chart for every sensor chip found and each of the above will be created. +## Enable the collector + +The `sensors` collector is disabled by default. To enable it, edit the `charts.d.conf` file using `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config charts.d.conf +``` + +It also needs to be set to "force" to be enabled: + +```shell +# example=force +sensors=force +``` + ## Configuration -Edit the `charts.d/sensors.conf` configuration file using `edit-config` from the Netdata [config -directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. +Edit the `charts.d/sensors.conf` configuration file using `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. ```bash cd /etc/netdata # Replace this path with your Netdata config directory, if different diff --git a/collectors/charts.d.plugin/sensors/sensors.chart.sh b/collectors/charts.d.plugin/sensors/sensors.chart.sh index b9218777..bff381f1 100644 --- a/collectors/charts.d.plugin/sensors/sensors.chart.sh +++ b/collectors/charts.d.plugin/sensors/sensors.chart.sh @@ -143,7 +143,7 @@ sensors_create() { files="$(sensors_check_files "$files")" files="$(sensors_check_temp_type "$files")" [ -z "$files" ] && continue - echo "CHART sensors.temp_$id '' '$name Temperature' 'Celsius' 'temperature' 'sensors.temp' line $((sensors_priority + 1)) $sensors_update_every" + echo "CHART sensors.temp_$id '' '$name Temperature' 'Celsius' 'temperature' 'sensors.temp' line $((sensors_priority + 1)) $sensors_update_every '' '' 'sensors'" echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN sensors.temp_$id \$1\"" divisor=1000 ;; @@ -152,7 +152,7 @@ sensors_create() { files="$(ls "$path"/in*_input 2>/dev/null)" files="$(sensors_check_files "$files")" [ -z "$files" ] && continue - echo "CHART sensors.volt_$id '' '$name Voltage' 'Volts' 'voltage' 'sensors.volt' line $((sensors_priority + 2)) $sensors_update_every" + echo "CHART sensors.volt_$id '' '$name Voltage' 'Volts' 'voltage' 'sensors.volt' line $((sensors_priority + 2)) $sensors_update_every '' '' 'sensors'" echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN sensors.volt_$id \$1\"" divisor=1000 ;; @@ -161,7 +161,7 @@ sensors_create() { files="$(ls "$path"/curr*_input 2>/dev/null)" files="$(sensors_check_files "$files")" [ -z "$files" ] && continue - echo "CHART sensors.curr_$id '' '$name Current' 'Ampere' 'current' 'sensors.curr' line $((sensors_priority + 3)) $sensors_update_every" + echo "CHART sensors.curr_$id '' '$name Current' 'Ampere' 'current' 'sensors.curr' line $((sensors_priority + 3)) $sensors_update_every '' '' 'sensors'" echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN sensors.curr_$id \$1\"" divisor=1000 ;; @@ -170,7 +170,7 @@ sensors_create() { files="$(ls "$path"/power*_input 2>/dev/null)" files="$(sensors_check_files "$files")" [ -z "$files" ] && continue - echo "CHART sensors.power_$id '' '$name Power' 'Watt' 'power' 'sensors.power' line $((sensors_priority + 4)) $sensors_update_every" + echo "CHART sensors.power_$id '' '$name Power' 'Watt' 'power' 'sensors.power' line $((sensors_priority + 4)) $sensors_update_every '' '' 'sensors'" echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN sensors.power_$id \$1\"" divisor=1000000 ;; @@ -179,7 +179,7 @@ sensors_create() { files="$(ls "$path"/fan*_input 2>/dev/null)" files="$(sensors_check_files "$files")" [ -z "$files" ] && continue - echo "CHART sensors.fan_$id '' '$name Fans Speed' 'Rotations / Minute' 'fans' 'sensors.fans' line $((sensors_priority + 5)) $sensors_update_every" + echo "CHART sensors.fan_$id '' '$name Fans Speed' 'Rotations / Minute' 'fans' 'sensors.fans' line $((sensors_priority + 5)) $sensors_update_every '' '' 'sensors'" echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN sensors.fan_$id \$1\"" ;; @@ -187,7 +187,7 @@ sensors_create() { files="$(ls "$path"/energy*_input 2>/dev/null)" files="$(sensors_check_files "$files")" [ -z "$files" ] && continue - echo "CHART sensors.energy_$id '' '$name Energy' 'Joule' 'energy' 'sensors.energy' areastack $((sensors_priority + 6)) $sensors_update_every" + echo "CHART sensors.energy_$id '' '$name Energy' 'Joule' 'energy' 'sensors.energy' areastack $((sensors_priority + 6)) $sensors_update_every '' '' 'sensors'" echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN sensors.energy_$id \$1\"" algorithm="incremental" divisor=1000000 @@ -197,7 +197,7 @@ sensors_create() { files="$(ls "$path"/humidity*_input 2>/dev/null)" files="$(sensors_check_files "$files")" [ -z "$files" ] && continue - echo "CHART sensors.humidity_$id '' '$name Humidity' 'Percent' 'humidity' 'sensors.humidity' line $((sensors_priority + 7)) $sensors_update_every" + echo "CHART sensors.humidity_$id '' '$name Humidity' 'Percent' 'humidity' 'sensors.humidity' line $((sensors_priority + 7)) $sensors_update_every '' '' 'sensors'" echo >>"$TMP_DIR/sensors.sh" "echo \"BEGIN sensors.humidity_$id \$1\"" divisor=1000 ;; @@ -237,7 +237,7 @@ sensors_create() { # _update is called continuously, to collect the values sensors_update() { # the first argument to this function is the microseconds since last update - # pass this parameter to the BEGIN statement (see bellow). + # pass this parameter to the BEGIN statement (see below). # do all the work to collect / calculate the values # for each dimension diff --git a/collectors/checks.plugin/plugin_checks.h b/collectors/checks.plugin/plugin_checks.h index 93494765..c8057253 100644 --- a/collectors/checks.plugin/plugin_checks.h +++ b/collectors/checks.plugin/plugin_checks.h @@ -3,7 +3,7 @@ #ifndef NETDATA_PLUGIN_CHECKS_H #define NETDATA_PLUGIN_CHECKS_H 1 -#include "../../daemon/common.h" +#include "daemon/common.h" #ifdef NETDATA_INTERNAL_CHECKS diff --git a/collectors/cups.plugin/cups_plugin.c b/collectors/cups.plugin/cups_plugin.c index 25d6f8cb..1d493619 100644 --- a/collectors/cups.plugin/cups_plugin.c +++ b/collectors/cups.plugin/cups_plugin.c @@ -6,7 +6,7 @@ * Released under GPL v3+ */ -#include "../../libnetdata/libnetdata.h" +#include "libnetdata/libnetdata.h" #include <cups/cups.h> #include <limits.h> diff --git a/collectors/diskspace.plugin/plugin_diskspace.c b/collectors/diskspace.plugin/plugin_diskspace.c index 311b55ad..2e7d1890 100644 --- a/collectors/diskspace.plugin/plugin_diskspace.c +++ b/collectors/diskspace.plugin/plugin_diskspace.c @@ -83,6 +83,28 @@ int mount_point_cleanup(void *entry, void *data) { return 0; } +// for the full list of protected mount points look at +// https://github.com/systemd/systemd/blob/1eb3ef78b4df28a9e9f464714208f2682f957e36/src/core/namespace.c#L142-L149 +// https://github.com/systemd/systemd/blob/1eb3ef78b4df28a9e9f464714208f2682f957e36/src/core/namespace.c#L180-L194 +static const char *systemd_protected_mount_points[] = { + "/home", + "/root", + "/usr", + "/boot", + "/efi", + "/etc", + NULL +}; + +int mount_point_is_protected(char *mount_point) +{ + for (size_t i = 0; systemd_protected_mount_points[i] != NULL; i++) + if (!strcmp(mount_point, systemd_protected_mount_points[i])) + return 1; + + return 0; +} + static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { const char *family = mi->mount_point; const char *disk = mi->persistent_id; @@ -190,7 +212,12 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { if(unlikely(m->do_space == CONFIG_BOOLEAN_NO && m->do_inodes == CONFIG_BOOLEAN_NO)) return; - if(unlikely(mi->flags & MOUNTINFO_READONLY && !m->collected && m->do_space != CONFIG_BOOLEAN_YES && m->do_inodes != CONFIG_BOOLEAN_YES)) + if (unlikely( + mi->flags & MOUNTINFO_READONLY && + !mount_point_is_protected(mi->mount_point) && + !m->collected && + m->do_space != CONFIG_BOOLEAN_YES && + m->do_inodes != CONFIG_BOOLEAN_YES)) return; struct statvfs buff_statvfs; @@ -389,6 +416,10 @@ void *diskspace_main(void *ptr) { if(unlikely(mi->flags & (MOUNTINFO_IS_DUMMY | MOUNTINFO_IS_BIND))) continue; + // exclude mounts made by ProtectHome and ProtectSystem systemd hardening options + if(mi->flags & MOUNTINFO_READONLY && !strcmp(mi->root, mi->mount_point)) + continue; + do_disk_space_stats(mi, update_every); if(unlikely(netdata_exit)) break; } diff --git a/collectors/diskspace.plugin/plugin_diskspace.h b/collectors/diskspace.plugin/plugin_diskspace.h index 7c9df9d1..1f967087 100644 --- a/collectors/diskspace.plugin/plugin_diskspace.h +++ b/collectors/diskspace.plugin/plugin_diskspace.h @@ -3,7 +3,7 @@ #ifndef NETDATA_PLUGIN_PROC_DISKSPACE_H #define NETDATA_PLUGIN_PROC_DISKSPACE_H -#include "../../daemon/common.h" +#include "daemon/common.h" #if (TARGET_OS == OS_LINUX) diff --git a/collectors/ebpf.plugin/Makefile.am b/collectors/ebpf.plugin/Makefile.am index 18b1fc6c..2d5f92a6 100644 --- a/collectors/ebpf.plugin/Makefile.am +++ b/collectors/ebpf.plugin/Makefile.am @@ -3,10 +3,6 @@ AUTOMAKE_OPTIONS = subdir-objects MAINTAINERCLEANFILES = $(srcdir)/Makefile.in -CLEANFILES = \ - reset_netdata_trace.sh \ - $(NULL) - include $(top_srcdir)/build/subst.inc SUFFIXES = .in @@ -16,12 +12,7 @@ userebpfconfigdir=$(configdir)/ebpf.d install-exec-local: $(INSTALL) -d $(DESTDIR)$(userebpfconfigdir) -dist_plugins_SCRIPTS = \ - reset_netdata_trace.sh \ - $(NULL) - dist_noinst_DATA = \ - reset_netdata_trace.sh.in \ README.md \ $(NULL) @@ -34,7 +25,18 @@ dist_ebpfconfig_DATA = \ ebpf.d/ebpf_kernel_reject_list.txt \ ebpf.d/cachestat.conf \ ebpf.d/dcstat.conf \ + ebpf.d/disk.conf \ + ebpf.d/fd.conf \ + ebpf.d/filesystem.conf \ + ebpf.d/hardirq.conf \ + ebpf.d/mdflush.conf \ + ebpf.d/mount.conf \ ebpf.d/network.conf \ + ebpf.d/oomkill.conf \ ebpf.d/process.conf \ + ebpf.d/shm.conf \ + ebpf.d/softirq.conf \ ebpf.d/sync.conf \ + ebpf.d/swap.conf \ + ebpf.d/vfs.conf \ $(NULL) diff --git a/collectors/ebpf.plugin/README.md b/collectors/ebpf.plugin/README.md index 1e593786..60f1fd74 100644 --- a/collectors/ebpf.plugin/README.md +++ b/collectors/ebpf.plugin/README.md @@ -1,35 +1,52 @@ <!-- title: "eBPF monitoring with Netdata" -description: "Use Netdata's extended Berkeley Packet Filter (eBPF) collector to monitor kernel-level metrics about your complex applications with per-second granularity." +description: "Use Netdata's extended Berkeley Packet Filter (eBPF) collector to monitor kernel-level metrics about your +complex applications with per-second granularity." custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/ebpf.plugin/README.md sidebar_label: "eBPF" --> # eBPF monitoring with Netdata -Netdata's extended Berkeley Packet Filter (eBPF) collector monitors kernel-level metrics for file descriptors, virtual -filesystem IO, and process management on Linux systems. You can use our eBPF collector to analyze how and when a process -accesses files, when it makes system calls, whether it leaks memory or creating zombie processes, and more. +eBPF consists of a wide toolchain that ultimately outputs a set of bytecode that will run inside the eBPF virtual +machine (VM) which lives inside the Linux kernel. The program in particular is executed in response to a [tracepoint +or kprobe](#probes-and-tracepoints) activation. -Netdata's eBPF monitoring toolkit uses two custom eBPF programs. The default, called `entry`, monitors calls to a -variety of kernel functions, such as `do_sys_open`, `__close_fd`, `vfs_read`, `vfs_write`, `_do_fork`, and more. The -`return` program also monitors the return of each kernel functions to deliver more granular metrics about how your -system and its applications interact with the Linux kernel. +Netdata has written many eBPF programs, which, when compiled and integrated into the Netdata Agent, are able to collect +a wide array of data about the host that would otherwise be impossible. The data eBPF programs can collect is truly unique, +which gives the Netdata Agent access to data that is high value but normally hard to capture. -eBPF monitoring can help you troubleshoot and debug how applications interact with the Linux kernel. See our [guide on -troubleshooting apps with eBPF metrics](/docs/guides/troubleshoot/monitor-debug-applications-ebpf.md) for configuration -and troubleshooting tips. +eBPF monitoring can help you troubleshoot and debug how applications interact with the Linux kernel. See +our [guide on troubleshooting apps with eBPF metrics](/docs/guides/troubleshoot/monitor-debug-applications-ebpf.md) for +configuration and troubleshooting tips. <figure> <img src="https://user-images.githubusercontent.com/1153921/74746434-ad6a1e00-5222-11ea-858a-a7882617ae02.png" alt="An example of VFS charts, made possible by the eBPF collector plugin" /> <figcaption>An example of VFS charts made possible by the eBPF collector plugin.</figcaption> </figure> -## Enable the collector on Linux +## Probes and Tracepoints + +The following two features from the Linux kernel are used by Netdata to run eBPF programs: + +- Kprobes and return probes (kretprobe): Probes can insert virtually into any kernel instruction. When eBPF runs in + `entry` mode, it attaches only `kprobes` for internal functions monitoring calls and some arguments every time a + function is called. The user can also change configuration to use [`return`](#global) mode, and this will allow users + to monitor return from these functions and detect possible failures. +- Tracepoints are hooks to call specific functions. Tracepoints are more stable than `kprobes` and are preferred when + both options are available. + +In each case, wherever a normal kprobe, kretprobe, or tracepoint would have run its hook function, an eBPF program is +run instead, performing various collection logic before letting the kernel continue its normal control flow. + +There are more methods by which eBPF programs can be triggered but which are not currently supported, such as via uprobes +which allow hooking into arbitrary user-space functions in a similar manner to kprobes. + +## Manually enable the collector on Linux **The eBPF collector is installed and enabled by default on most new installations of the Agent**. The eBPF collector -does not currently work with [static build installations](/packaging/installer/methods/kickstart-64.md), but improved -support is in active development. +does not currently work with [static build installations](/packaging/installer/methods/kickstart-64.md) for kernels older +than `4.11`, but improved support is in active development. eBPF monitoring only works on Linux systems and with specific Linux kernels, including all kernels newer than `4.11.0`, and all kernels on CentOS 7.6 or later. @@ -39,72 +56,403 @@ section for details. ## Charts -The eBPF collector creates an **eBPF** menu in the Agent's dashboard along with three sub-menus: **File**, **VFS**, and -**Process**. All the charts in this section update every second. The collector stores the actual value inside of its -process, but charts only show the difference between the values collected in the previous and current seconds. +The eBPF collector creates charts on different menus, like System Overview, Memory, MD arrays, Disks, Filesystem, +Mount Points, Networking Stack, systemd Services, and Applications. + +The collector stores the actual value inside of its process, but charts only show the difference between the values +collected in the previous and current seconds. + +### System overview + +Not all charts within the System Overview menu are enabled by default, because they add around 100ns overhead for each +function call, this number is small for a human perspective, but the functions are called many times creating an impact +on host. See the [configuration](#configuration) section for details about how to enable them. + +#### Processes + +Internally, the Linux kernel treats both processes and threads as `tasks`. To create a thread, the kernel offers a few +system calls: `fork(2)`, `vfork(2)`, and `clone(2)`. To generate this chart, the eBPF +collector uses the following `tracepoints` and `kprobe`: + +- `sched/sched_process_fork`: Tracepoint called after a call for `fork (2)`, `vfork (2)` and `clone (2)`. +- `sched/sched_process_exec`: Tracepoint called after a exec-family syscall. +- `kprobe/kernel_clone`: This is the main [`fork()`](https://elixir.bootlin.com/linux/v5.10/source/kernel/fork.c#L2415) + routine since kernel `5.10.0` was released. +- `kprobe/_do_fork`: Like `kernel_clone`, but this was the main function between kernels `4.2.0` and `5.9.16` +- `kprobe/do_fork`: This was the main function before kernel `4.2.0`. + +#### Process Exit + +Ending a task requires two steps. The first is a call to the internal function `do_exit`, which notifies the operating +system that the task is finishing its work. The second step is to release the kernel information with the internal +function `release_task`. The difference between the two dimensions can help you discover +[zombie processes](https://en.wikipedia.org/wiki/Zombie_process). To get the metrics, the collector uses: + +- `sched/sched_process_exit`: Tracepoint called after a task exits. +- `kprobe/release_task`: This function is called when a process exits, as the kernel still needs to remove the process + descriptor. + +#### Task error + +The functions responsible for ending tasks do not return values, so this chart contains information about failures on +process and thread creation only. + +#### Swap + +Inside the swap submenu the eBPF plugin creates the chart `swapcalls`; this chart is displaying when processes are +calling functions [`swap_readpage` and `swap_writepage`](https://hzliu123.github.io/linux-kernel/Page%20Cache%20in%20Linux%202.6.pdf ), +which are functions responsible for doing IO in swap memory. To collect the exact moment that an access to swap happens, +the collector attaches `kprobes` for cited functions. + +#### Soft IRQ + +The following `tracepoints` are used to measure time usage for soft IRQs: + +- [`irq/softirq_entry`](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html#c.trace_softirq_entry): Called + before softirq handler +- [`irq/softirq_exit`](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html#c.trace_softirq_exit): Called when + softirq handler returns. + +#### Hard IRQ + +The following tracepoints are used to measure the latency of servicing a +hardware interrupt request (hard IRQ). + +- [`irq/irq_handler_entry`](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html#c.trace_irq_handler_entry): + Called immediately before the IRQ action handler. +- [`irq/irq_handler_exit`](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html#c.trace_irq_handler_exit): + Called immediately after the IRQ action handler returns. +- `irq_vectors`: These are traces from `irq_handler_entry` and + `irq_handler_exit` when an IRQ is handled. The following elements from vector + are triggered: + - `irq_vectors/local_timer_entry` + - `irq_vectors/local_timer_exit` + - `irq_vectors/reschedule_entry` + - `irq_vectors/reschedule_exit` + - `irq_vectors/call_function_entry` + - `irq_vectors/call_function_exit` + - `irq_vectors/call_function_single_entry` + - `irq_vectors/call_function_single_xit` + - `irq_vectors/irq_work_entry` + - `irq_vectors/irq_work_exit` + - `irq_vectors/error_apic_entry` + - `irq_vectors/error_apic_exit` + - `irq_vectors/thermal_apic_entry` + - `irq_vectors/thermal_apic_exit` + - `irq_vectors/threshold_apic_entry` + - `irq_vectors/threshold_apic_exit` + - `irq_vectors/deferred_error_entry` + - `irq_vectors/deferred_error_exit` + - `irq_vectors/spurious_apic_entry` + - `irq_vectors/spurious_apic_exit` + - `irq_vectors/x86_platform_ipi_entry` + - `irq_vectors/x86_platform_ipi_exit` + +#### IPC shared memory + +To monitor shared memory system call counts, the following `kprobes` are used: + +- `shmget`: Runs when [`shmget`](https://man7.org/linux/man-pages/man2/shmget.2.html) is called. +- `shmat`: Runs when [`shmat`](https://man7.org/linux/man-pages/man2/shmat.2.html) is called. +- `shmdt`: Runs when [`shmdt`](https://man7.org/linux/man-pages/man2/shmat.2.html) is called. +- `shmctl`: Runs when [`shmctl`](https://man7.org/linux/man-pages/man2/shmctl.2.html) is called. + +### Memory + +In the memory submenu the eBPF plugin creates two submenus **page cache** and **synchronization** with the following +organization: + +* Page Cache + * Page cache ratio + * Dirty pages + * Page cache hits + * Page cache misses +* Synchronization + * File sync + * Memory map sync + * File system sync + * File range sync + +#### Page cache ratio + +The chart `cachestat_ratio` shows how processes are accessing page cache. In a normal scenario, we expect values around +100%, which means that the majority of the work on the machine is processed in memory. To calculate the ratio, Netdata +attaches `kprobes` for kernel functions: + +- `add_to_page_cache_lru`: Page addition. +- `mark_page_accessed`: Access to cache. +- `account_page_dirtied`: Dirty (modified) pages. +- `mark_buffer_dirty`: Writes to page cache. + +#### Dirty pages + +On `cachestat_dirties` Netdata demonstrates the number of pages that were modified. This chart shows the number of calls +to the function `mark_buffer_dirty`. + +#### Page cache hits + +A page cache hit is when the page cache is successfully accessed with a read operation. We do not count pages that were +added relatively recently. + +#### Page cache misses + +A page cache miss means that a page was not inside memory when the process tried to access it. This chart shows the +result of the difference for calls between functions `add_to_page_cache_lru` and `account_page_dirtied`. + +#### File sync + +This chart shows calls to synchronization methods, [`fsync(2)`](https://man7.org/linux/man-pages/man2/fdatasync.2.html) +and [`fdatasync(2)`](https://man7.org/linux/man-pages/man2/fdatasync.2.html), to transfer all modified page caches +for the files on disk devices. These calls block until the disk reports that the transfer has been completed. They flush +data for specific file descriptors. + +#### Memory map sync + +The chart shows calls to [`msync(2)`](https://man7.org/linux/man-pages/man2/msync.2.html) syscalls. This syscall flushes +changes to a file that was mapped into memory using [`mmap(2)`](https://man7.org/linux/man-pages/man2/mmap.2.html). + +#### File system sync + +This chart monitors calls demonstrating commits from filesystem caches to disk. Netdata attaches `kprobes` for +[`sync(2)`](https://man7.org/linux/man-pages/man2/sync.2.html), and [`syncfs(2)`](https://man7.org/linux/man-pages/man2/sync.2.html). + +#### File range sync + +This chart shows calls to [`sync_file_range(2)`](https://man7.org/linux/man-pages/man2/sync_file_range.2.html) which +synchronizes file segments with disk. + +> Note: This is the most dangerous syscall to synchronize data, according to its manual. + +### Multiple Device (MD) arrays + +The eBPF plugin shows multi-device flushes happening in real time. This can be used to explain some spikes happening +in [disk latency](#disk) charts. + +By default, MD flush is disabled. To enable it, configure your +`/etc/netdata/ebpf.d.conf` file as: + +```conf +[global] + mdflush = yes +``` + +#### MD flush + +To collect data related to Linux multi-device (MD) flushing, the following kprobe is used: + +- `kprobe/md_flush_request`: called whenever a request for flushing multi-device data is made. + +### Disk + +The eBPF plugin also shows a chart in the Disk section when the `disk` thread is enabled. This will create the +chart `disk_latency_io` for each disk on the host. The following tracepoints are used: + +- [`block/block_rq_issue`](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html#c.trace_block_rq_issue): + IO request operation to a device drive. +- [`block/block_rq_complete`](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html#c.trace_block_rq_complete): + IO operation completed by device. + +### Filesystem + +This group has charts demonstrating how applications interact with the Linux +kernel to open and close file descriptors. It also brings latency charts for +several different filesystems. -### File +#### ext4 -This group has two charts demonstrating how software interacts with the Linux kernel to open and close file descriptors. +To measure the latency of executing some actions in an +[ext4](https://elixir.bootlin.com/linux/latest/source/fs/ext4) filesystem, the +collector needs to attach `kprobes` and `kretprobes` for each of the following +functions: + +- `ext4_file_read_iter`: Function used to measure read latency. +- `ext4_file_write_iter`: Function used to measure write latency. +- `ext4_file_open`: Function used to measure open latency. +- `ext4_sync_file`: Function used to measure sync latency. + +#### ZFS + +To measure the latency of executing some actions in a zfs filesystem, the +collector needs to attach `kprobes` and `kretprobes` for each of the following +functions: + +- `zpl_iter_read`: Function used to measure read latency. +- `zpl_iter_write`: Function used to measure write latency. +- `zpl_open`: Function used to measure open latency. +- `zpl_fsync`: Function used to measure sync latency. + +#### XFS + +To measure the latency of executing some actions in an +[xfs](https://elixir.bootlin.com/linux/latest/source/fs/xfs) filesystem, the +collector needs to attach `kprobes` and `kretprobes` for each of the following +functions: + +- `xfs_file_read_iter`: Function used to measure read latency. +- `xfs_file_write_iter`: Function used to measure write latency. +- `xfs_file_open`: Function used to measure open latency. +- `xfs_file_fsync`: Function used to measure sync latency. + +#### NFS + +To measure the latency of executing some actions in an +[nfs](https://elixir.bootlin.com/linux/latest/source/fs/nfs) filesystem, the +collector needs to attach `kprobes` and `kretprobes` for each of the following +functions: + +- `nfs_file_read`: Function used to measure read latency. +- `nfs_file_write`: Function used to measure write latency. +- `nfs_file_open`: Functions used to measure open latency. +- `nfs4_file_open`: Functions used to measure open latency for NFS v4. +- `nfs_getattr`: Function used to measure sync latency. + +#### btrfs + +To measure the latency of executing some actions in a [btrfs](https://elixir.bootlin.com/linux/latest/source/fs/btrfs/file.c) +filesystem, the collector needs to attach `kprobes` and `kretprobes` for each of the following functions: + +> Note: We are listing two functions used to measure `read` latency, but we use either `btrfs_file_read_iter` or +`generic_file_read_iter`, depending on kernel version. + +- `btrfs_file_read_iter`: Function used to measure read latency since kernel `5.10.0`. +- `generic_file_read_iter`: Like `btrfs_file_read_iter`, but this function was used before kernel `5.10.0`. +- `btrfs_file_write_iter`: Function used to write data. +- `btrfs_file_open`: Function used to open files. +- `btrfs_sync_file`: Function used to synchronize data to filesystem. #### File descriptor -This chart contains two dimensions that show the number of calls to the functions `do_sys_open` and `__close_fd`. Most -software do not commonly call these functions directly, but they are behind the system calls `open(2)`, `openat(2)`, -and `close(2)`. +To give metrics related to `open` and `close` events, instead of attaching kprobes for each syscall used to do these +events, the collector attaches `kprobes` for the common function used for syscalls: + +- [`do_sys_open`](https://0xax.gitbooks.io/linux-insides/content/SysCall/linux-syscall-5.html ): Internal function used to + open files. +- [`do_sys_openat2`](https://elixir.bootlin.com/linux/v5.6/source/fs/open.c#L1162): + Function called from `do_sys_open` since version `5.6.0`. +- [`close_fd`](https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg2271761.html): Function used to close file + descriptor since kernel `5.11.0`. +- `__close_fd`: Function used to close files before version `5.11.0`. #### File error This chart shows the number of times some software tried and failed to open or close a file descriptor. -### VFS +#### VFS + +The Linux Virtual File System (VFS) is an abstraction layer on top of a +concrete filesystem like the ones listed in the parent section, e.g. `ext4`. -A [virtual file system](https://en.wikipedia.org/wiki/Virtual_file_system) (VFS) is a layer on top of regular -filesystems. The functions present inside this API are used for all filesystems, so it's possible the charts in this -group won't show _all_ the actions that occurred on your system. +In this section we list the mechanism by which we gather VFS data, and what +charts are consequently created. -#### Deleted objects +##### VFS eBPF Hooks -This chart monitors calls for `vfs_unlink`. This function is responsible for removing objects from the file system. +To measure the latency and total quantity of executing some VFS-level +functions, ebpf.plugin needs to attach kprobes and kretprobes for each of the +following functions: -#### IO +- `vfs_write`: Function used monitoring the number of successful & failed + filesystem write calls, as well as the total number of written bytes. +- `vfs_writev`: Same function as `vfs_write` but for vector writes (i.e. a + single write operation using a group of buffers rather than 1). +- `vfs_read`: Function used for monitoring the number of successful & failed + filesystem read calls, as well as the total number of read bytes. +- `vfs_readv` Same function as `vfs_read` but for vector reads (i.e. a single + read operation using a group of buffers rather than 1). +- `vfs_unlink`: Function used for monitoring the number of successful & failed + filesystem unlink calls. +- `vfs_fsync`: Function used for monitoring the number of successful & failed + filesystem fsync calls. +- `vfs_open`: Function used for monitoring the number of successful & failed + filesystem open calls. +- `vfs_create`: Function used for monitoring the number of successful & failed + filesystem create calls. + +##### VFS Deleted objects + +This chart monitors calls to `vfs_unlink`. This function is responsible for removing objects from the file system. + +##### VFS IO This chart shows the number of calls to the functions `vfs_read` and `vfs_write`. -#### IO bytes +##### VFS IO bytes -This chart also monitors `vfs_read` and `vfs_write`, but instead shows the total of bytes read and written with these -functions. +This chart also monitors `vfs_read` and `vfs_write` but, instead of the number of calls, it shows the total amount of +bytes read and written with these functions. The Agent displays the number of bytes written as negative because they are moving down to disk. -#### IO errors +##### VFS IO errors The Agent counts and shows the number of instances where a running program experiences a read or write error. -### Process +##### VFS Create -For this group, the eBPF collector monitors process/thread creation and process end, and then displays any errors in the -following charts. +This chart shows the number of calls to `vfs_create`. This function is responsible for creating files. -#### Process thread +##### VFS Synchronization -Internally, the Linux kernel treats both processes and threads as `tasks`. To create a thread, the kernel offers a few -system calls: `fork(2)`, `vfork(2)` and `clone(2)`. In turn, each of these system calls use the function `_do_fork`. To -generate this chart, the eBPF collector monitors `_do_fork` to populate the `process` dimension, and monitors -`sys_clone` to identify threads. +This chart shows the number of calls to `vfs_fsync`. This function is responsible for calling `fsync(2)` or +`fdatasync(2)` on a file. You can see more details in the Synchronization section. -#### Exit +##### VFS Open -Ending a task requires two steps. The first is a call to the internal function `do_exit`, which notifies the operating -system that the task is finishing its work. The second step is to release the kernel information with the internal -function `release_task`. The difference between the two dimensions can help you discover [zombie -processes](https://en.wikipedia.org/wiki/Zombie_process). +This chart shows the number of calls to `vfs_open`. This function is responsible for opening files. -#### Task error +#### Directory Cache -The functions responsible for ending tasks do not return values, so this chart contains information about failures on -process and thread creation. +Metrics for directory cache are collected using kprobe for `lookup_fast`, because we are interested in the number of +times this function is accessed. On the other hand, for `d_lookup` we are not only interested in the number of times it +is accessed, but also in possible errors, so we need to attach a `kretprobe`. For this reason, the following is used: + +- [`lookup_fast`](https://lwn.net/Articles/649115/): Called to look at data inside the directory cache. +- [`d_lookup`](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/dcache.c?id=052b398a43a7de8c68c13e7fa05d6b3d16ce6801#n2223): + Called when the desired file is not inside the directory cache. + +### Mount Points + +The following `kprobes` are used to collect `mount` & `unmount` call counts: + +- [`mount`](https://man7.org/linux/man-pages/man2/mount.2.html): mount filesystem on host. +- [`umount`](https://man7.org/linux/man-pages/man2/umount.2.html): umount filesystem on host. + +### Networking Stack + +Netdata monitors socket bandwidth attaching `kprobes` for internal functions. + +#### TCP functions + +This chart demonstrates calls to functions `tcp_sendmsg`, `tcp_cleanup_rbuf`, and `tcp_close`; these functions are used +to send & receive data and to close connections when `TCP` protocol is used. + +#### TCP bandwidth + +Like the previous chart, this one also monitors `tcp_sendmsg` and `tcp_cleanup_rbuf`, but instead of showing the number +of calls, it demonstrates the number of bytes sent and received. + +#### TCP retransmit + +This chart demonstrates calls to function `tcp_retransmit` that is responsible for executing TCP retransmission when the +receiver did not return the packet during the expected time. + +#### UDP functions + +This chart demonstrates calls to functions `udp_sendmsg` and `udp_recvmsg`, which are responsible for sending & +receiving data for connections when the `UDP` protocol is used. + +#### UDP bandwidth + +Like the previous chart, this one also monitors `udp_sendmsg` and `udp_recvmsg`, but instead of showing the number of +calls, it monitors the number of bytes sent and received. + +### Apps + +#### OOM Killing + +These are tracepoints related to [OOM](https://en.wikipedia.org/wiki/Out_of_memory) killing processes. + +- `oom/mark_victim`: Monitors when an oomkill event happens. ## Configuration @@ -134,7 +482,7 @@ cd /etc/netdata/ # Replace with your Netdata configuration directory, if not / The `[global]` section defines settings for the whole eBPF collector. -#### ebpf load mode +#### eBPF load mode The collector has two different eBPF programs. These programs monitor the same functions inside the kernel, but they monitor, process, and display different kinds of information. @@ -143,43 +491,20 @@ By default, this plugin uses the `entry` mode. Changing this mode can create sig system, but also offer valuable information if you are developing or debugging software. The `ebpf load mode` option accepts the following values: -- `entry`: This is the default mode. In this mode, the eBPF collector only monitors calls for the functions described - in the sections above, and does not show charts related to errors. -- `return`: In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates - new charts for the return of these functions, such as errors. Monitoring function returns can help in debugging - software, such as failing to close file descriptors or creating zombie processes. -- `update every`: Number of seconds used for eBPF to send data for Netdata. -- `pid table size`: Defines the maximum number of PIDs stored inside the application hash table. - +- `entry`: This is the default mode. In this mode, the eBPF collector only monitors calls for the functions described in + the sections above, and does not show charts related to errors. +- `return`: In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates new + charts for the return of these functions, such as errors. Monitoring function returns can help in debugging software, + such as failing to close file descriptors or creating zombie processes. +- `update every`: Number of seconds used for eBPF to send data for Netdata. +- `pid table size`: Defines the maximum number of PIDs stored inside the application hash table. + #### Integration with `apps.plugin` The eBPF collector also creates charts for each running application through an integration with the [`apps.plugin`](/collectors/apps.plugin/README.md). This integration helps you understand how specific applications interact with the Linux kernel. -When the integration is enabled, your dashboard will also show the following charts using low-level Linux metrics: - -- eBPF file - - Number of calls to open files. (`apps.file_open`) - - Number of files closed. (`apps.file_closed`) - - Number of calls to open files that returned errors. - - Number of calls to close files that returned errors. -- eBPF syscall - - Number of calls to delete files. (`apps.file_deleted`) - - Number of calls to `vfs_write`. (`apps.vfs_write_call`) - - Number of calls to `vfs_read`. (`apps.vfs_read_call`) - - Number of bytes written with `vfs_write`. (`apps.vfs_write_bytes`) - - Number of bytes read with `vfs_read`. (`apps.vfs_read_bytes`) - - Number of calls to write a file that returned errors. - - Number of calls to read a file that returned errors. -- eBPF process - - Number of process created with `do_fork`. (`apps.process_create`) - - Number of threads created with `do_fork` or `__x86_64_sys_clone`, depending on your system's kernel version. (`apps.thread_create`) - - Number of times that a process called `do_exit`. (`apps.task_close`) -- eBPF net - - Number of bytes sent. (`apps.bandwidth_sent`) - - Number of bytes received. (`apps.bandwidth_recv`) - If you want to _disable_ the integration with `apps.plugin` along with the above charts, change the setting `apps` to `no`. @@ -188,30 +513,129 @@ If you want to _disable_ the integration with `apps.plugin` along with the above apps = yes ``` -When the integration is enabled, eBPF collector allocates memory for each process running. The total - allocated memory has direct relationship with the kernel version. When the eBPF plugin is running on kernels newer than `4.15`, - it uses per-cpu maps to speed up the update of hash tables. This also implies storing data for the same PID - for each processor it runs. +When the integration is enabled, eBPF collector allocates memory for each process running. The total allocated memory +has direct relationship with the kernel version. When the eBPF plugin is running on kernels newer than `4.15`, it uses +per-cpu maps to speed up the update of hash tables. This also implies storing data for the same PID for each processor +it runs. + +#### Integration with `cgroups.plugin` -#### `[ebpf programs]` +The eBPF collector also creates charts for each cgroup through an integration with the +[`cgroups.plugin`](/collectors/cgroups.plugin/README.md). This integration helps you understand how a specific cgroup +interacts with the Linux kernel. + +The integration with `cgroups.plugin` is disabled by default to avoid creating overhead on your system. If you want to +_enable_ the integration with `cgroups.plugin`, change the `cgroups` setting to `yes`. + +```conf +[global] + cgroups = yes +``` + +If you do not need to monitor specific metrics for your `cgroups`, you can enable `cgroups` inside +`ebpf.d.conf`, and then disable the plugin for a specific `thread` by following the steps in the +[Configuration](#configuration) section. + +#### Integration Dashboard Elements + +When an integration is enabled, your dashboard will also show the following cgroups and apps charts using low-level +Linux metrics: + +> Note: The parenthetical accompanying each bulleted item provides the chart name. + +- mem + - Number of processes killed due out of memory. (`oomkills`) +- process + - Number of processes created with `do_fork`. (`process_create`) + - Number of threads created with `do_fork` or `clone (2)`, depending on your system's kernel + version. (`thread_create`) + - Number of times that a process called `do_exit`. (`task_exit`) + - Number of times that a process called `release_task`. (`task_close`) + - Number of times that an error happened to create thread or process. (`task_error`) +- swap + - Number of calls to `swap_readpage`. (`swap_read_call`) + - Number of calls to `swap_writepage`. (`swap_write_call`) +- network + - Number of bytes sent. (`total_bandwidth_sent`) + - Number of bytes received. (`total_bandwidth_recv`) + - Number of calls to `tcp_sendmsg`. (`bandwidth_tcp_send`) + - Number of calls to `tcp_cleanup_rbuf`. (`bandwidth_tcp_recv`) + - Number of calls to `tcp_retransmit_skb`. (`bandwidth_tcp_retransmit`) + - Number of calls to `udp_sendmsg`. (`bandwidth_udp_send`) + - Number of calls to `udp_recvmsg`. (`bandwidth_udp_recv`) +- file access + - Number of calls to open files. (`file_open`) + - Number of calls to open files that returned errors. (`open_error`) + - Number of files closed. (`file_closed`) + - Number of calls to close files that returned errors. (`file_error_closed`) +- vfs + - Number of calls to `vfs_unlink`. (`file_deleted`) + - Number of calls to `vfs_write`. (`vfs_write_call`) + - Number of calls to write a file that returned errors. (`vfs_write_error`) + - Number of calls to `vfs_read`. (`vfs_read_call`) + - Number of bytes written with `vfs_write`. (`vfs_write_bytes`) + - Number of bytes read with `vfs_read`. (`vfs_read_bytes`) + - Number of calls to read a file that returned errors. (`vfs_read_error`) + - Number of calls to `vfs_fsync`. (`vfs_fsync`) + - Number of calls to sync file that returned errors. (`vfs_fsync_error`) + - Number of calls to `vfs_open`. (`vfs_open`) + - Number of calls to open file that returned errors. (`vfs_open_error`) + - Number of calls to `vfs_create`. (`vfs_create`) + - Number of calls to open file that returned errors. (`vfs_create_error`) +- page cache + - Ratio of pages accessed. (`cachestat_ratio`) + - Number of modified pages ("dirty"). (`cachestat_dirties`) + - Number of accessed pages. (`cachestat_hits`) + - Number of pages brought from disk. (`cachestat_misses`) +- directory cache + - Ratio of files available in directory cache. (`dc_hit_ratio`) + - Number of files accessed. (`dc_reference`) + - Number of files accessed that were not in cache. (`dc_not_cache`) + - Number of files not found. (`dc_not_found`) +- ipc shm + - Number of calls to `shm_get`. (`shmget_call`) + - Number of calls to `shm_at`. (`shmat_call`) + - Number of calls to `shm_dt`. (`shmdt_call`) + - Number of calls to `shm_ctl`. (`shmctl_call`) + +### `[ebpf programs]` The eBPF collector enables and runs the following eBPF programs by default: -- `cachestat`: Netdata's eBPF data collector creates charts about the memory page cache. When the integration with - [`apps.plugin`](/collectors/apps.plugin/README.md) is enabled, this collector creates charts for the whole host _and_ - for each application. -- `dcstat` : This eBPF program creates charts that show information about file access using directory cache. It appends - `kprobes` for `lookup_fast()` and `d_lookup()` to identify if files are inside directory cache, outside and - files are not found. -- `process`: This eBPF program creates charts that show information about process creation, VFS IO, and files removed. - When in `return` mode, it also creates charts showing errors when these operations are executed. -- `network viewer`: This eBPF program creates charts with information about `TCP` and `UDP` functions, including the - bandwidth consumed by each. -- `sync`: Montitor calls for syscalls sync(2), fsync(2), fdatasync(2), syncfs(2), msync(2), and sync_file_range(2). +- `fd` : This eBPF program creates charts that show information about calls to open files. +- `mount`: This eBPF program creates charts that show calls to syscalls mount(2) and umount(2). +- `shm`: This eBPF program creates charts that show calls to syscalls shmget(2), shmat(2), shmdt(2) and shmctl(2). +- `sync`: Montitor calls to syscalls sync(2), fsync(2), fdatasync(2), syncfs(2), msync(2), and sync_file_range(2). +- `network viewer`: This eBPF program creates charts with information about `TCP` and `UDP` functions, including the + bandwidth consumed by each. +- `vfs`: This eBPF program creates charts that show information about VFS (Virtual File System) functions. +- `process`: This eBPF program creates charts that show information about process life. When in `return` mode, it also + creates charts showing errors when these operations are executed. +- `hardirq`: This eBPF program creates charts that show information about time spent servicing individual hardware + interrupt requests (hard IRQs). +- `softirq`: This eBPF program creates charts that show information about time spent servicing individual software + interrupt requests (soft IRQs). +- `oomkill`: This eBPF program creates a chart that shows OOM kills for all applications recognized via + the `apps.plugin` integration. Note that this program will show application charts regardless of whether apps + integration is turned on or off. + +You can also enable the following eBPF programs: + +- `cachestat`: Netdata's eBPF data collector creates charts about the memory page cache. When the integration with + [`apps.plugin`](/collectors/apps.plugin/README.md) is enabled, this collector creates charts for the whole host _and_ + for each application. +- `dcstat` : This eBPF program creates charts that show information about file access using directory cache. It appends + `kprobes` for `lookup_fast()` and `d_lookup()` to identify if files are inside directory cache, outside and files are + not found. +- `disk` : This eBPF program creates charts that show information about disk latency independent of filesystem. +- `filesystem` : This eBPF program creates charts that show information about some filesystem latency. +- `swap` : This eBPF program creates charts that show information about swap access. +- `mdflush`: This eBPF program creates charts that show information about + multi-device software flushes. ## Thread configuration -You can configure each thread of the eBPF data collector by editing either the `cachestat.conf`, `process.conf`, +You can configure each thread of the eBPF data collector by editing either the `cachestat.conf`, `process.conf`, or `network.conf` files. Use [`edit-config`](/docs/configure/nodes.md) from your Netdata config directory: ```bash @@ -225,10 +649,16 @@ The following configuration files are available: - `cachestat.conf`: Configuration for the `cachestat` thread. - `dcstat.conf`: Configuration for the `dcstat` thread. +- `disk.conf`: Configuration for the `disk` thread. +- `fd.conf`: Configuration for the `file descriptor` thread. +- `filesystem.conf`: Configuration for the `filesystem` thread. +- `hardirq.conf`: Configuration for the `hardirq` thread. - `process.conf`: Configuration for the `process` thread. -- `network.conf`: Configuration for the `network viewer` thread. This config file overwrites the global options and - also lets you specify which network the eBPF collector monitors. +- `network.conf`: Configuration for the `network viewer` thread. This config file overwrites the global options and also + lets you specify which network the eBPF collector monitors. +- `softirq.conf`: Configuration for the `softirq` thread. - `sync.conf`: Configuration for the `sync` thread. +- `vfs.conf`: Configuration for the `vfs` thread. ### Network configuration @@ -237,7 +667,7 @@ are divided in the following sections: #### `[network connections]` -You can configure the information shown on `outbound` and `inbound` charts with the settings in this section. +You can configure the information shown on `outbound` and `inbound` charts with the settings in this section. ```conf [network connections] @@ -249,24 +679,24 @@ You can configure the information shown on `outbound` and `inbound` charts with ``` When you define a `ports` setting, Netdata will collect network metrics for that specific port. For example, if you -write `ports = 19999`, Netdata will collect only connections for itself. The `hostnames` setting accepts -[simple patterns](/libnetdata/simple_pattern/README.md). The `ports`, and `ips` settings accept negation (`!`) to - deny specific values or asterisk alone to define all values. +write `ports = 19999`, Netdata will collect only connections for itself. The `hostnames` setting accepts +[simple patterns](/libnetdata/simple_pattern/README.md). The `ports`, and `ips` settings accept negation (`!`) to deny +specific values or asterisk alone to define all values. In the above example, Netdata will collect metrics for all ports between 1 and 443, with the exception of 53 (domain) and 145. The following options are available: -- `ports`: Define the destination ports for Netdata to monitor. -- `hostnames`: The list of hostnames that can be resolved to an IP address. -- `ips`: The IP or range of IPs that you want to monitor. You can use IPv4 or IPv6 addresses, use dashes to define a - range of IPs, or use CIDR values. The default behavior is to only collect data for private IP addresses, but this - can be changed with the `ips` setting. - -By default, Netdata displays up to 500 dimensions on network connection charts. If there are more possible dimensions, -they will be bundled into the `other` dimension. You can increase the number of shown dimensions by changing the `maximum -dimensions` setting. +- `ports`: Define the destination ports for Netdata to monitor. +- `hostnames`: The list of hostnames that can be resolved to an IP address. +- `ips`: The IP or range of IPs that you want to monitor. You can use IPv4 or IPv6 addresses, use dashes to define a + range of IPs, or use CIDR values. The default behavior is to only collect data for private IP addresses, but this can + be changed with the `ips` setting. + +By default, Netdata displays up to 500 dimensions on network connection charts. If there are more possible dimensions, +they will be bundled into the `other` dimension. You can increase the number of shown dimensions by changing +the `maximum dimensions` setting. The dimensions for the traffic charts are created using the destination IPs of the sockets by default. This can be changed setting `resolve hostname ips = yes` and restarting Netdata, after this Netdata will create dimensions using @@ -274,8 +704,9 @@ the `hostnames` every time that is possible to resolve IPs to their hostnames. #### `[service name]` -Netdata uses the list of services in `/etc/services` to plot network connection charts. If this file does not contain the -name for a particular service you use in your infrastructure, you will need to add it to the `[service name]` section. +Netdata uses the list of services in `/etc/services` to plot network connection charts. If this file does not contain +the name for a particular service you use in your infrastructure, you will need to add it to the `[service name]` +section. For example, Netdata's default port (`19999`) is not listed in `/etc/services`. To associate that port with the Netdata service in network connection charts, and thus see the name of the service instead of its port, define it: @@ -287,7 +718,7 @@ service in network connection charts, and thus see the name of the service inste ### Sync configuration -The sync configuration has specific options to disable monitoring for syscalls, as default option all syscalls are +The sync configuration has specific options to disable monitoring for syscalls, as default option all syscalls are monitored. ```conf @@ -300,6 +731,22 @@ monitored. sync_file_range = yes ``` +### Filesystem configuration + +The filesystem configuration has specific options to disable monitoring for filesystems, by default all filesystems are +monitored. + +```conf +[filesystem] + btrfsdist = yes + ext4dist = yes + nfsdist = yes + xfsdist = yes + zfsdist = yes +``` + +The ebpf program `nfsdist` monitors only `nfs` mount points. + ## Troubleshooting If the eBPF collector does not work, you can troubleshoot it by running the `ebpf.plugin` command and investigating its @@ -330,17 +777,18 @@ curl -sSL https://raw.githubusercontent.com/netdata/kernel-collector/master/tool If this script returns no output, your system is ready to compile and run the eBPF collector. -If you see a warning about a missing kernel configuration (`KPROBES KPROBES_ON_FTRACE HAVE_KPROBES BPF BPF_SYSCALL -BPF_JIT`), you will need to recompile your kernel to support this configuration. The process of recompiling Linux -kernels varies based on your distribution and version. Read the documentation for your system's distribution to learn -more about the specific workflow for recompiling the kernel, ensuring that you set all the necessary +If you see a warning about a missing kernel +configuration (`KPROBES KPROBES_ON_FTRACE HAVE_KPROBES BPF BPF_SYSCALL BPF_JIT`), you will need to recompile your kernel +to support this configuration. The process of recompiling Linux kernels varies based on your distribution and version. +Read the documentation for your system's distribution to learn more about the specific workflow for recompiling the +kernel, ensuring that you set all the necessary -- [Ubuntu](https://wiki.ubuntu.com/Kernel/BuildYourOwnKernel) -- [Debian](https://kernel-team.pages.debian.net/kernel-handbook/ch-common-tasks.html#s-common-official) -- [Fedora](https://fedoraproject.org/wiki/Building_a_custom_kernel) -- [CentOS](https://wiki.centos.org/HowTos/Custom_Kernel) -- [Arch Linux](https://wiki.archlinux.org/index.php/Kernel/Traditional_compilation) -- [Slackware](https://docs.slackware.com/howtos:slackware_admin:kernelbuilding) +- [Ubuntu](https://wiki.ubuntu.com/Kernel/BuildYourOwnKernel) +- [Debian](https://kernel-team.pages.debian.net/kernel-handbook/ch-common-tasks.html#s-common-official) +- [Fedora](https://fedoraproject.org/wiki/Building_a_custom_kernel) +- [CentOS](https://wiki.centos.org/HowTos/Custom_Kernel) +- [Arch Linux](https://wiki.archlinux.org/index.php/Kernel/Traditional_compilation) +- [Slackware](https://docs.slackware.com/howtos:slackware_admin:kernelbuilding) ### Mount `debugfs` and `tracefs` @@ -353,19 +801,20 @@ sudo mount -t tracefs nodev /sys/kernel/tracing ``` If they are already mounted, you will see an error. You can also configure your system's `/etc/fstab` configuration to -mount these filesystems on startup. More information can be found in the [ftrace documentation](https://www.kernel.org/doc/Documentation/trace/ftrace.txt). +mount these filesystems on startup. More information can be found in +the [ftrace documentation](https://www.kernel.org/doc/Documentation/trace/ftrace.txt). ## Performance -eBPF monitoring is complex and produces a large volume of metrics. We've discovered scenarios where the eBPF plugin +eBPF monitoring is complex and produces a large volume of metrics. We've discovered scenarios where the eBPF plugin significantly increases kernel memory usage by several hundred MB. -If your node is experiencing high memory usage and there is no obvious culprit to be found in the `apps.mem` chart, -consider testing for high kernel memory usage by [disabling eBPF monitoring](#configuration). Next, -[restart Netdata](/docs/configure/start-stop-restart.md) with `sudo systemctl restart netdata` to see if system -memory usage (see the `system.ram` chart) has dropped significantly. +If your node is experiencing high memory usage and there is no obvious culprit to be found in the `apps.mem` chart, +consider testing for high kernel memory usage by [disabling eBPF monitoring](#configuration). Next, +[restart Netdata](/docs/configure/start-stop-restart.md) with `sudo systemctl restart netdata` to see if system memory +usage (see the `system.ram` chart) has dropped significantly. -Beginning with `v1.31`, kernel memory usage is configurable via the [`pid table size` setting](#ebpf-load-mode) +Beginning with `v1.31`, kernel memory usage is configurable via the [`pid table size` setting](#ebpf-load-mode) in `ebpf.conf`. ## SELinux @@ -423,7 +872,7 @@ allow unconfined_service_t self:bpf { map_create map_read map_write prog_load pr Then compile your `netdata_ebpf.te` file with the following commands to create a binary that loads the new policies: ```bash -# checkmodule -M -m -o netdata_ebpf.mod netdata_ebpf.te +# checkmodule -M -m -o netdata_ebpf.mod netdata_ebpf.te # semodule_package -o netdata_ebpf.pp -m netdata_ebpf.mod ``` @@ -450,9 +899,4 @@ shows how the lockdown module impacts `ebpf.plugin` based on the selected option If you or your distribution compiled the kernel with the last combination, your system cannot load shared libraries required to run `ebpf.plugin`. -## Cleaning `kprobe_events` -The eBPF collector adds entries to the file `/sys/kernel/debug/tracing/kprobe_events`, and cleans them on exit, unless -another process prevents it. If you need to clean the eBPF entries safely, you can manually run the script -`/usr/libexec/netdata/plugins.d/reset_netdata_trace.sh`. - [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fcollectors%2Febpf.plugin%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>) diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c index 5cc005f3..71a13e84 100644 --- a/collectors/ebpf.plugin/ebpf.c +++ b/collectors/ebpf.plugin/ebpf.c @@ -55,7 +55,6 @@ char *ebpf_plugin_dir = PLUGINS_DIR; static char *ebpf_configured_log_dir = LOG_DIR; char *ebpf_algorithms[] = {"absolute", "incremental"}; -int update_every = 1; static int thread_finished = 0; int close_ebpf_plugin = 0; struct config collector_config = { .first_section = NULL, @@ -67,7 +66,7 @@ struct config collector_config = { .first_section = NULL, int running_on_kernel = 0; char kernel_string[64]; int ebpf_nprocs; -static int isrh; +int isrh = 0; uint32_t finalized_threads = 1; pthread_mutex_t lock; @@ -76,32 +75,109 @@ pthread_cond_t collect_data_cond_var; ebpf_module_t ebpf_modules[] = { { .thread_name = "process", .config_name = "process", .enabled = 0, .start_routine = ebpf_process_thread, - .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY, - .optional = 0, .apps_routine = ebpf_process_create_apps_charts, .maps = NULL, - .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL}, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_process_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &process_config, + .config_file = NETDATA_PROCESS_CONFIG_FILE}, { .thread_name = "socket", .config_name = "socket", .enabled = 0, .start_routine = ebpf_socket_thread, - .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY, - .optional = 0, .apps_routine = ebpf_socket_create_apps_charts, .maps = NULL, - .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL}, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_socket_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &socket_config, + .config_file = NETDATA_NETWORK_CONFIG_FILE}, { .thread_name = "cachestat", .config_name = "cachestat", .enabled = 0, .start_routine = ebpf_cachestat_thread, - .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY, - .optional = 0, .apps_routine = ebpf_cachestat_create_apps_charts, .maps = NULL, - .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL}, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_cachestat_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &cachestat_config, + .config_file = NETDATA_CACHESTAT_CONFIG_FILE}, { .thread_name = "sync", .config_name = "sync", .enabled = 0, .start_routine = ebpf_sync_thread, - .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY, - .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL }, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &sync_config, + .config_file = NETDATA_SYNC_CONFIG_FILE}, { .thread_name = "dc", .config_name = "dc", .enabled = 0, .start_routine = ebpf_dcstat_thread, - .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY, - .optional = 0, .apps_routine = ebpf_dcstat_create_apps_charts, .maps = NULL, - .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE }, - { .thread_name = NULL, .enabled = 0, .start_routine = NULL, .update_time = 1, - .global_charts = 0, .apps_charts = 1, .mode = MODE_ENTRY, - .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = 0, .names = NULL }, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_dcstat_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &dcstat_config, + .config_file = NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE}, + { .thread_name = "swap", .config_name = "swap", .enabled = 0, .start_routine = ebpf_swap_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_swap_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &swap_config, + .config_file = NETDATA_DIRECTORY_SWAP_CONFIG_FILE}, + { .thread_name = "vfs", .config_name = "vfs", .enabled = 0, .start_routine = ebpf_vfs_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_vfs_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &vfs_config, + .config_file = NETDATA_DIRECTORY_VFS_CONFIG_FILE }, + { .thread_name = "filesystem", .config_name = "filesystem", .enabled = 0, .start_routine = ebpf_filesystem_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fs_config, + .config_file = NETDATA_FILESYSTEM_CONFIG_FILE}, + { .thread_name = "disk", .config_name = "disk", .enabled = 0, .start_routine = ebpf_disk_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &disk_config, + .config_file = NETDATA_DISK_CONFIG_FILE}, + { .thread_name = "mount", .config_name = "mount", .enabled = 0, .start_routine = ebpf_mount_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mount_config, + .config_file = NETDATA_MOUNT_CONFIG_FILE}, + { .thread_name = "fd", .config_name = "fd", .enabled = 0, .start_routine = ebpf_fd_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_fd_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fd_config, + .config_file = NETDATA_FD_CONFIG_FILE}, + { .thread_name = "hardirq", .config_name = "hardirq", .enabled = 0, .start_routine = ebpf_hardirq_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &hardirq_config, + .config_file = NETDATA_HARDIRQ_CONFIG_FILE}, + { .thread_name = "softirq", .config_name = "softirq", .enabled = 0, .start_routine = ebpf_softirq_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &softirq_config, + .config_file = NETDATA_SOFTIRQ_CONFIG_FILE}, + { .thread_name = "oomkill", .config_name = "oomkill", .enabled = 0, .start_routine = ebpf_oomkill_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_oomkill_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &oomkill_config, + .config_file = NETDATA_OOMKILL_CONFIG_FILE}, + { .thread_name = "shm", .config_name = "shm", .enabled = 0, .start_routine = ebpf_shm_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = ebpf_shm_create_apps_charts, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &shm_config, + .config_file = NETDATA_DIRECTORY_SHM_CONFIG_FILE}, + { .thread_name = "mdflush", .config_name = "mdflush", .enabled = 0, .start_routine = ebpf_mdflush_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mdflush_config, + .config_file = NETDATA_DIRECTORY_MDFLUSH_CONFIG_FILE}, + { .thread_name = NULL, .enabled = 0, .start_routine = NULL, .update_every = EBPF_DEFAULT_UPDATE_EVERY, + .global_charts = 0, .apps_charts = CONFIG_BOOLEAN_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, + .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = 0, .names = NULL, + .cfg = NULL, .config_name = NULL}, }; // Link with apps.plugin ebpf_process_stat_t *global_process_stat = NULL; +// Link with cgroup.plugin +netdata_ebpf_cgroup_shm_t shm_ebpf_cgroup = {NULL, NULL}; +int shm_fd_ebpf_cgroup = -1; +sem_t *shm_sem_ebpf_cgroup = SEM_FAILED; +pthread_mutex_t mutex_cgroup_shm; + //Network viewer ebpf_network_viewer_options_t network_viewer_opt; @@ -155,6 +231,33 @@ static void ebpf_exit(int sig) freez(dcstat_pid); } + if (ebpf_modules[EBPF_MODULE_SWAP_IDX].enabled) { + ebpf_modules[EBPF_MODULE_SWAP_IDX].enabled = 0; + clean_swap_pid_structures(); + freez(swap_pid); + } + + if (ebpf_modules[EBPF_MODULE_VFS_IDX].enabled) { + ebpf_modules[EBPF_MODULE_VFS_IDX].enabled = 0; + clean_vfs_pid_structures(); + freez(vfs_pid); + } + + if (ebpf_modules[EBPF_MODULE_FD_IDX].enabled) { + ebpf_modules[EBPF_MODULE_FD_IDX].enabled = 0; + clean_fd_pid_structures(); + freez(fd_pid); + } + + if (ebpf_modules[EBPF_MODULE_SHM_IDX].enabled) { + ebpf_modules[EBPF_MODULE_SHM_IDX].enabled = 0; + clean_shm_pid_structures(); + freez(shm_pid); + } + + ebpf_close_cgroup_shm(); + + ebpf_clean_cgroup_pids(); /* int ret = fork(); if (ret < 0) // error @@ -241,8 +344,7 @@ inline void write_end_chart() */ void write_chart_dimension(char *dim, long long value) { - int ret = printf("SET %s = %lld\n", dim, value); - UNUSED(ret); + printf("SET %s = %lld\n", dim, value); } /** @@ -253,7 +355,7 @@ void write_chart_dimension(char *dim, long long value) * @param move the pointer with the values that will be published * @param end the number of values that will be written on standard output * - * @return It returns a variable tha maps the charts that did not have zero values. + * @return It returns a variable that maps the charts that did not have zero values. */ void write_count_chart(char *name, char *family, netdata_publish_syscall_t *move, uint32_t end) { @@ -322,7 +424,7 @@ void ebpf_one_dimension_write_charts(char *family, char *chart, char *dim, long * @param dread the dimension name * @param vread the value for previous dimension * - * @return It returns a variable tha maps the charts that did not have zero values. + * @return It returns a variable that maps the charts that did not have zero values. */ void write_io_chart(char *chart, char *family, char *dwrite, long long vwrite, char *dread, long long vread) { @@ -337,6 +439,36 @@ void write_io_chart(char *chart, char *family, char *dwrite, long long vwrite, c /** * Write chart cmd on standard output * + * @param type chart type + * @param id chart id + * @param title chart title + * @param units units label + * @param family group name used to attach the chart on dashboard + * @param charttype chart type + * @param context chart context + * @param order chart order + * @param update_every update interval used by plugin + * @param module chart module name, this is the eBPF thread. + */ +void ebpf_write_chart_cmd(char *type, char *id, char *title, char *units, char *family, + char *charttype, char *context, int order, int update_every, char *module) +{ + printf("CHART %s.%s '' '%s' '%s' '%s' '%s' '%s' %d %d '' 'ebpf.plugin' '%s'\n", + type, + id, + title, + units, + (family)?family:"", + (context)?context:"", + (charttype)?charttype:"", + order, + update_every, + module); +} + +/** + * Write chart cmd on standard output + * * @param type chart type * @param id chart id * @param title chart title @@ -345,11 +477,12 @@ void write_io_chart(char *chart, char *family, char *dwrite, long long vwrite, c * @param charttype chart type * @param context chart context * @param order chart order + * @param update_every value to overwrite the update frequency set by the server. */ -void ebpf_write_chart_cmd(char *type, char *id, char *title, char *units, char *family, - char *charttype, char *context, int order) +void ebpf_write_chart_obsolete(char *type, char *id, char *title, char *units, char *family, + char *charttype, char *context, int order, int update_every) { - printf("CHART %s.%s '' '%s' '%s' '%s' '%s' '%s' %d %d\n", + printf("CHART %s.%s '' '%s' '%s' '%s' '%s' '%s' %d %d 'obsolete'\n", type, id, title, @@ -395,17 +528,19 @@ void ebpf_create_global_dimension(void *ptr, int end) /** * Call write_chart_cmd to create the charts * - * @param type chart type - * @param id chart id - * @param title chart title - * @param units axis label - * @param family group name used to attach the chart on dashboard - * @param context chart context - * @param charttype chart type - * @param order order number of the specified chart - * @param ncd a pointer to a function called to create dimensions - * @param move a pointer for a structure that has the dimensions - * @param end number of dimensions for the chart created + * @param type chart type + * @param id chart id + * @param title chart title + * @param units axis label + * @param family group name used to attach the chart on dashboard + * @param context chart context + * @param charttype chart type + * @param order order number of the specified chart + * @param ncd a pointer to a function called to create dimensions + * @param move a pointer for a structure that has the dimensions + * @param end number of dimensions for the chart created + * @param update_every update interval used with chart. + * @param module chart module name, this is the eBPF thread. */ void ebpf_create_chart(char *type, char *id, @@ -417,11 +552,15 @@ void ebpf_create_chart(char *type, int order, void (*ncd)(void *, int), void *move, - int end) + int end, + int update_every, + char *module) { - ebpf_write_chart_cmd(type, id, title, units, family, charttype, context, order); + ebpf_write_chart_cmd(type, id, title, units, family, charttype, context, order, update_every, module); - ncd(move, end); + if (ncd) { + ncd(move, end); + } } /** @@ -435,12 +574,15 @@ void ebpf_create_chart(char *type, * @param order the chart order * @param algorithm the algorithm used by dimension * @param root structure used to create the dimensions. + * @param update_every update interval used by plugin + * @param module chart module name, this is the eBPF thread. */ void ebpf_create_charts_on_apps(char *id, char *title, char *units, char *family, char *charttype, int order, - char *algorithm, struct target *root) + char *algorithm, struct target *root, int update_every, char *module) { struct target *w; - ebpf_write_chart_cmd(NETDATA_APPS_FAMILY, id, title, units, family, charttype, NULL, order); + ebpf_write_chart_cmd(NETDATA_APPS_FAMILY, id, title, units, family, charttype, NULL, order, + update_every, module); for (w = root; w; w = w->next) { if (unlikely(w->exposed)) @@ -448,6 +590,31 @@ void ebpf_create_charts_on_apps(char *id, char *title, char *units, char *family } } +/** + * Call the necessary functions to create a name. + * + * @param family family name + * @param name chart name + * @param hist0 histogram values + * @param dimensions dimension values. + * @param end number of bins that will be sent to Netdata. + * + * @return It returns a variable that maps the charts that did not have zero values. + */ +void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, char **dimensions, uint32_t end) +{ + write_begin_chart(family, name); + + uint32_t i; + for (i = 0; i < end; i++) { + write_chart_dimension(dimensions[i], (long long) hist[i]); + } + + write_end_chart(); + + fflush(stdout); +} + /***************************************************************** * * FUNCTIONS TO DEFINE OPTIONS @@ -503,43 +670,68 @@ static inline void ebpf_set_thread_mode(netdata_run_mode_t lmode) /** * Enable specific charts selected by user. * - * @param em the structure that will be changed - * @param enable the status about the apps charts. + * @param em the structure that will be changed + * @param disable_apps the status about the apps charts. + * @param disable_cgroup the status about the cgroups charts. */ -static inline void ebpf_enable_specific_chart(struct ebpf_module *em, int enable) +static inline void ebpf_enable_specific_chart(struct ebpf_module *em, int disable_apps, int disable_cgroup) { - em->enabled = 1; - if (!enable) { - em->apps_charts = 1; + em->enabled = CONFIG_BOOLEAN_YES; + + // oomkill stores data inside apps submenu, so it always need to have apps_enabled for plugin to create + // its chart, without this comparison eBPF.plugin will try to store invalid data when apps is disabled. + if (!disable_apps || !strcmp(em->thread_name, "oomkill")) { + em->apps_charts = CONFIG_BOOLEAN_YES; } - em->global_charts = 1; + + if (!disable_cgroup) { + em->cgroup_charts = CONFIG_BOOLEAN_YES; + } + + em->global_charts = CONFIG_BOOLEAN_YES; } /** * Enable all charts * - * @param apps what is the current status of apps + * @param apps what is the current status of apps + * @param cgroups what is the current status of cgroups */ -static inline void ebpf_enable_all_charts(int apps) +static inline void ebpf_enable_all_charts(int apps, int cgroups) { int i; for (i = 0; ebpf_modules[i].thread_name; i++) { - ebpf_enable_specific_chart(&ebpf_modules[i], apps); + ebpf_enable_specific_chart(&ebpf_modules[i], apps, cgroups); } } /** + * Disable all Global charts + * + * Disable charts + */ +static inline void disable_all_global_charts() +{ + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_modules[i].enabled = 0; + ebpf_modules[i].global_charts = 0; + } +} + + +/** * Enable the specified chart group * * @param idx the index of ebpf_modules that I am enabling * @param disable_apps should I keep apps charts? */ -static inline void ebpf_enable_chart(int idx, int disable_apps) +static inline void ebpf_enable_chart(int idx, int disable_apps, int disable_cgroup) { int i; for (i = 0; ebpf_modules[i].thread_name; i++) { if (i == idx) { - ebpf_enable_specific_chart(&ebpf_modules[i], disable_apps); + ebpf_enable_specific_chart(&ebpf_modules[i], disable_apps, disable_cgroup); break; } } @@ -559,6 +751,19 @@ static inline void ebpf_disable_apps() } /** + * Disable Cgroups + * + * Disable charts for apps loading only global charts. + */ +static inline void ebpf_disable_cgroups() +{ + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_modules[i].cgroup_charts = 0; + } +} + +/** * Print help on standard error for user knows how to use the collector. */ void ebpf_print_help() @@ -579,39 +784,140 @@ void ebpf_print_help() " Released under GNU General Public License v3 or later.\n" " All rights reserved.\n" "\n" - " This program is a data collector plugin for netdata.\n" + " This eBPF.plugin is a data collector plugin for netdata.\n" "\n" - " Available command line options:\n" + " This plugin only accepts long options with one or two dashes. The available command line options are:\n" "\n" - " SECONDS Set the data collection frequency.\n" + " SECONDS Set the data collection frequency.\n" "\n" - " --help or -h Show this help.\n" + " [-]-help Show this help.\n" "\n" - " --version or -v Show software version.\n" + " [-]-version Show software version.\n" "\n" - " --global or -g Disable charts per application.\n" + " [-]-global Disable charts per application and cgroup.\n" "\n" - " --all or -a Enable all chart groups (global and apps), unless -g is also given.\n" + " [-]-all Enable all chart groups (global, apps, and cgroup), unless -g is also given.\n" "\n" - " --cachestat or -c Enable charts related to process run time.\n" + " [-]-cachestat Enable charts related to process run time.\n" "\n" - " --dcstat or -d Enable charts related to directory cache.\n" + " [-]-dcstat Enable charts related to directory cache.\n" "\n" - " --net or -n Enable network viewer charts.\n" + " [-]-disk Enable charts related to disk monitoring.\n" "\n" - " --process or -p Enable charts related to process run time.\n" + " [-]-filesystem Enable chart related to filesystem run time.\n" "\n" - " --return or -r Run the collector in return mode.\n" - "\n", - " --sync or -s Enable chart related to sync run time.\n" + " [-]-hardirq Enable chart related to hard IRQ latency.\n" + "\n" + " [-]-mdflush Enable charts related to multi-device flush.\n" + "\n" + " [-]-mount Enable charts related to mount monitoring.\n" "\n" + " [-]-net Enable network viewer charts.\n" + "\n" + " [-]-oomkill Enable chart related to OOM kill tracking.\n" + "\n" + " [-]-process Enable charts related to process run time.\n" + "\n" + " [-]-return Run the collector in return mode.\n" + "\n" + " [-]-shm Enable chart related to shared memory tracking.\n" + "\n" + " [-]-softirq Enable chart related to soft IRQ latency.\n" + "\n" + " [-]-sync Enable chart related to sync run time.\n" + "\n" + " [-]-swap Enable chart related to swap run time.\n" + "\n" + " [-]-vfs Enable chart related to vfs run time.\n" + "\n", VERSION, (year >= 116) ? year + 1900 : 2020); } /***************************************************************** * - * AUXILIAR FUNCTIONS USED DURING INITIALIZATION + * TRACEPOINT MANAGEMENT FUNCTIONS + * + *****************************************************************/ + +/** + * Enable a tracepoint. + * + * @return 0 on success, -1 on error. + */ +int ebpf_enable_tracepoint(ebpf_tracepoint_t *tp) +{ + int test = ebpf_is_tracepoint_enabled(tp->class, tp->event); + + // err? + if (test == -1) { + return -1; + } + // disabled? + else if (test == 0) { + // enable it then. + if (ebpf_enable_tracing_values(tp->class, tp->event)) { + return -1; + } + } + + // enabled now or already was. + tp->enabled = true; + + return 0; +} + +/** + * Disable a tracepoint if it's enabled. + * + * @return 0 on success, -1 on error. + */ +int ebpf_disable_tracepoint(ebpf_tracepoint_t *tp) +{ + int test = ebpf_is_tracepoint_enabled(tp->class, tp->event); + + // err? + if (test == -1) { + return -1; + } + // enabled? + else if (test == 1) { + // disable it then. + if (ebpf_disable_tracing_values(tp->class, tp->event)) { + return -1; + } + } + + // disable now or already was. + tp->enabled = false; + + return 0; +} + +/** + * Enable multiple tracepoints on a list of tracepoints which end when the + * class is NULL. + * + * @return the number of successful enables. + */ +uint32_t ebpf_enable_tracepoints(ebpf_tracepoint_t *tps) +{ + uint32_t cnt = 0; + for (int i = 0; tps[i].class != NULL; i++) { + if (ebpf_enable_tracepoint(&tps[i]) == -1) { + infoerr("failed to enable tracepoint %s:%s", + tps[i].class, tps[i].event); + } + else { + cnt += 1; + } + } + return cnt; +} + +/***************************************************************** + * + * AUXILIARY FUNCTIONS USED DURING INITIALIZATION * *****************************************************************/ @@ -746,20 +1052,6 @@ static void ebpf_allocate_common_vectors() } /** - * Fill the ebpf_data structure with default values - * - * @param ef the pointer to set default values - */ -void fill_ebpf_data(ebpf_data_t *ef) -{ - memset(ef, 0, sizeof(ebpf_data_t)); - ef->kernel_string = kernel_string; - ef->running_on_kernel = running_on_kernel; - ef->map_fd = callocz(EBPF_MAX_MAPS, sizeof(int)); - ef->isrh = isrh; -} - -/** * Define how to load the ebpf programs * * @param ptr the option given by users @@ -778,13 +1070,16 @@ static inline void how_to_load(char *ptr) * Update interval * * Update default interval with value from user + * + * @param update_every value to overwrite the update frequency set by the server. */ -static void ebpf_update_interval() +static void ebpf_update_interval(int update_every) { int i; - int value = (int) appconfig_get_number(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_UPDATE_EVERY, 1); + int value = (int) appconfig_get_number(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_UPDATE_EVERY, + update_every); for (i = 0; ebpf_modules[i].thread_name; i++) { - ebpf_modules[i].update_time = value; + ebpf_modules[i].update_every = value; } } @@ -807,9 +1102,11 @@ static void ebpf_update_table_size() /** * Read collector values * - * @param disable_apps variable to store information related to apps. + * @param disable_apps variable to store information related to apps. + * @param disable_cgroups variable to store information related to cgroups. + * @param update_every value to overwrite the update frequency set by the server. */ -static void read_collector_values(int *disable_apps) +static void read_collector_values(int *disable_apps, int *disable_cgroups, int update_every) { // Read global section char *value; @@ -822,7 +1119,7 @@ static void read_collector_values(int *disable_apps) how_to_load(value); - ebpf_update_interval(); + ebpf_update_interval(update_every); ebpf_update_table_size(); @@ -837,12 +1134,17 @@ static void read_collector_values(int *disable_apps) } *disable_apps = (int)enabled; + // Cgroup is a positive sentence, so we need to invert the values to disable apps. + // We are using the same pattern for cgroup and apps + enabled = appconfig_get_boolean(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_CGROUP, CONFIG_BOOLEAN_NO); + *disable_cgroups = (enabled == CONFIG_BOOLEAN_NO)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_NO; + // Read ebpf programs section enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, ebpf_modules[EBPF_MODULE_PROCESS_IDX].config_name, CONFIG_BOOLEAN_YES); int started = 0; if (enabled) { - ebpf_enable_chart(EBPF_MODULE_PROCESS_IDX, *disable_apps); + ebpf_enable_chart(EBPF_MODULE_PROCESS_IDX, *disable_apps, *disable_cgroups); started++; } @@ -855,7 +1157,7 @@ static void read_collector_values(int *disable_apps) CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, *disable_apps); + ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, *disable_apps, *disable_cgroups); // Read network viewer section if network viewer is enabled // This is kept here to keep backward compatibility parse_network_viewer_section(&collector_config); @@ -869,13 +1171,13 @@ static void read_collector_values(int *disable_apps) if (!enabled) enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "network connections", CONFIG_BOOLEAN_NO); - ebpf_modules[EBPF_MODULE_SOCKET_IDX].optional = enabled; + ebpf_modules[EBPF_MODULE_SOCKET_IDX].optional = (int)enabled; enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "cachestat", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_CACHESTAT_IDX, *disable_apps); + ebpf_enable_chart(EBPF_MODULE_CACHESTAT_IDX, *disable_apps, *disable_cgroups); started++; } @@ -883,19 +1185,96 @@ static void read_collector_values(int *disable_apps) CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_SYNC_IDX, *disable_apps); + ebpf_enable_chart(EBPF_MODULE_SYNC_IDX, *disable_apps, *disable_cgroups); started++; } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "dcstat", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_DCSTAT_IDX, *disable_apps); + ebpf_enable_chart(EBPF_MODULE_DCSTAT_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "swap", + CONFIG_BOOLEAN_NO); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_SWAP_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "vfs", + CONFIG_BOOLEAN_NO); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_VFS_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "filesystem", + CONFIG_BOOLEAN_NO); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_FILESYSTEM_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "disk", + CONFIG_BOOLEAN_NO); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_DISK_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "mount", + CONFIG_BOOLEAN_YES); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_MOUNT_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "fd", + CONFIG_BOOLEAN_YES); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_FD_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "hardirq", + CONFIG_BOOLEAN_YES); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_HARDIRQ_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "softirq", + CONFIG_BOOLEAN_YES); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_SOFTIRQ_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "oomkill", + CONFIG_BOOLEAN_YES); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_OOMKILL_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "shm", + CONFIG_BOOLEAN_YES); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_SHM_IDX, *disable_apps, *disable_cgroups); + started++; + } + + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "mdflush", + CONFIG_BOOLEAN_NO); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_MDFLUSH_IDX, *disable_apps, *disable_cgroups); started++; } if (!started){ - ebpf_enable_all_charts(*disable_apps); + ebpf_enable_all_charts(*disable_apps, *disable_cgroups); // Read network viewer section // This is kept here to keep backward compatibility parse_network_viewer_section(&collector_config); @@ -906,12 +1285,14 @@ static void read_collector_values(int *disable_apps) /** * Load collector config * - * @param path the path where the file ebpf.conf is stored. - * @param disable_apps variable to store the information about apps plugin status. + * @param path the path where the file ebpf.conf is stored. + * @param disable_apps variable to store the information about apps plugin status. + * @param disable_cgroups variable to store the information about cgroups plugin status. + * @param update_every value to overwrite the update frequency set by the server. * * @return 0 on success and -1 otherwise. */ -static int load_collector_config(char *path, int *disable_apps) +static int load_collector_config(char *path, int *disable_apps, int *disable_cgroups, int update_every) { char lpath[4096]; @@ -923,7 +1304,7 @@ static int load_collector_config(char *path, int *disable_apps) } } - read_collector_values(disable_apps); + read_collector_values(disable_apps, disable_cgroups, update_every); return 0; } @@ -957,6 +1338,21 @@ void set_global_variables() isrh = get_redhat_release(); pid_max = get_system_pid_max(); + running_on_kernel = ebpf_get_kernel_version(); + ebpf_update_kernel(kernel_string, 63, isrh, running_on_kernel); +} + +/** + * Load collector config + * + * @param lmode the mode that will be used for them. + */ +static inline void ebpf_load_thread_config() +{ + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_update_module(&ebpf_modules[i]); + } } /** @@ -965,23 +1361,36 @@ void set_global_variables() * @param argc the number of arguments * @param argv the pointer to the arguments */ -static void parse_args(int argc, char **argv) +static void ebpf_parse_args(int argc, char **argv) { - int enabled = 0; int disable_apps = 0; + int disable_cgroups = 1; int freq = 0; int option_index = 0; + uint64_t select_threads = 0; static struct option long_options[] = { - {"help", no_argument, 0, 'h' }, - {"version", no_argument, 0, 'v' }, - {"global", no_argument, 0, 'g' }, - {"all", no_argument, 0, 'a' }, - {"cachestat", no_argument, 0, 'c' }, - {"dcstat", no_argument, 0, 'd' }, - {"net", no_argument, 0, 'n' }, - {"process", no_argument, 0, 'p' }, - {"return", no_argument, 0, 'r' }, - {"sync", no_argument, 0, 's' }, + {"process", no_argument, 0, 0 }, + {"net", no_argument, 0, 0 }, + {"cachestat", no_argument, 0, 0 }, + {"sync", no_argument, 0, 0 }, + {"dcstat", no_argument, 0, 0 }, + {"swap", no_argument, 0, 0 }, + {"vfs", no_argument, 0, 0 }, + {"filesystem", no_argument, 0, 0 }, + {"disk", no_argument, 0, 0 }, + {"mount", no_argument, 0, 0 }, + {"filedescriptor", no_argument, 0, 0 }, + {"hardirq", no_argument, 0, 0 }, + {"softirq", no_argument, 0, 0 }, + {"oomkill", no_argument, 0, 0 }, + {"shm", no_argument, 0, 0 }, + {"mdflush", no_argument, 0, 0 }, + /* INSERT NEW THREADS BEFORE THIS COMMENT TO KEEP COMPATIBILITY WITH enum ebpf_module_indexes */ + {"all", no_argument, 0, 0 }, + {"version", no_argument, 0, 0 }, + {"help", no_argument, 0, 0 }, + {"global", no_argument, 0, 0 }, + {"return", no_argument, 0, 0 }, {0, 0, 0, 0} }; @@ -995,83 +1404,166 @@ static void parse_args(int argc, char **argv) } } + if (!freq) + freq = EBPF_DEFAULT_UPDATE_EVERY; + + if (load_collector_config(ebpf_user_config_dir, &disable_apps, &disable_cgroups, freq)) { + info( + "Does not have a configuration file inside `%s/ebpf.d.conf. It will try to load stock file.", + ebpf_user_config_dir); + if (load_collector_config(ebpf_stock_config_dir, &disable_apps, &disable_cgroups, freq)) { + info("Does not have a stock file. It is starting with default options."); + } + } + + ebpf_load_thread_config(); + while (1) { - int c = getopt_long(argc, argv, "hvgacdnprs", long_options, &option_index); + int c = getopt_long_only(argc, argv, "", long_options, &option_index); if (c == -1) break; - switch (c) { - case 'h': { - ebpf_print_help(); - exit(0); + switch (option_index) { + case EBPF_MODULE_PROCESS_IDX: { + select_threads |= 1<<EBPF_MODULE_PROCESS_IDX; +#ifdef NETDATA_INTERNAL_CHECKS + info("EBPF enabling \"PROCESS\" charts, because it was started with the option \"[-]-process\"."); +#endif + break; } - case 'v': { - printf("ebpf.plugin %s\n", VERSION); - exit(0); + case EBPF_MODULE_SOCKET_IDX: { + select_threads |= 1<<EBPF_MODULE_SOCKET_IDX; +#ifdef NETDATA_INTERNAL_CHECKS + info("EBPF enabling \"NET\" charts, because it was started with the option \"[-]-net\"."); +#endif + break; } - case 'g': { - disable_apps = 1; - ebpf_disable_apps(); + case EBPF_MODULE_CACHESTAT_IDX: { + select_threads |= 1<<EBPF_MODULE_CACHESTAT_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info( - "EBPF running with global chart group, because it was started with the option \"--global\" or \"-g\"."); + info("EBPF enabling \"CACHESTAT\" charts, because it was started with the option \"[-]-cachestat\"."); #endif break; } - case 'a': { - ebpf_enable_all_charts(disable_apps); + case EBPF_MODULE_SYNC_IDX: { + select_threads |= 1<<EBPF_MODULE_SYNC_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF running with all chart groups, because it was started with the option \"--all\" or \"-a\"."); + info("EBPF enabling \"SYNC\" chart, because it was started with the option \"[-]-sync\"."); #endif break; } - case 'c': { - enabled = 1; - ebpf_enable_chart(EBPF_MODULE_CACHESTAT_IDX, disable_apps); + case EBPF_MODULE_DCSTAT_IDX: { + select_threads |= 1<<EBPF_MODULE_DCSTAT_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info( - "EBPF enabling \"CACHESTAT\" charts, because it was started with the option \"--cachestat\" or \"-c\"."); + info("EBPF enabling \"DCSTAT\" charts, because it was started with the option \"[-]-dcstat\"."); #endif break; } - case 'd': { - enabled = 1; - ebpf_enable_chart(EBPF_MODULE_DCSTAT_IDX, disable_apps); + case EBPF_MODULE_SWAP_IDX: { + select_threads |= 1<<EBPF_MODULE_SWAP_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info( - "EBPF enabling \"DCSTAT\" charts, because it was started with the option \"--dcstat\" or \"-d\"."); + info("EBPF enabling \"SWAP\" chart, because it was started with the option \"[-]-swap\"."); #endif break; } - case 'n': { - enabled = 1; - ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, disable_apps); + case EBPF_MODULE_VFS_IDX: { + select_threads |= 1<<EBPF_MODULE_VFS_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"NET\" charts, because it was started with the option \"--net\" or \"-n\"."); + info("EBPF enabling \"VFS\" chart, because it was started with the option \"[-]-vfs\"."); #endif break; } - case 'p': { - enabled = 1; - ebpf_enable_chart(EBPF_MODULE_PROCESS_IDX, disable_apps); + case EBPF_MODULE_FILESYSTEM_IDX: { + select_threads |= 1<<EBPF_MODULE_FILESYSTEM_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info( - "EBPF enabling \"PROCESS\" charts, because it was started with the option \"--process\" or \"-p\"."); + info("EBPF enabling \"FILESYSTEM\" chart, because it was started with the option \"[-]-filesystem\"."); #endif break; } - case 'r': { - ebpf_set_thread_mode(MODE_RETURN); + case EBPF_MODULE_DISK_IDX: { + select_threads |= 1<<EBPF_MODULE_DISK_IDX; +#ifdef NETDATA_INTERNAL_CHECKS + info("EBPF enabling \"DISK\" chart, because it was started with the option \"[-]-disk\"."); +#endif + break; + } + case EBPF_MODULE_MOUNT_IDX: { + select_threads |= 1<<EBPF_MODULE_MOUNT_IDX; +#ifdef NETDATA_INTERNAL_CHECKS + info("EBPF enabling \"MOUNT\" chart, because it was started with the option \"[-]-mount\"."); +#endif + break; + } + case EBPF_MODULE_FD_IDX: { + select_threads |= 1<<EBPF_MODULE_FD_IDX; +#ifdef NETDATA_INTERNAL_CHECKS + info("EBPF enabling \"FILEDESCRIPTOR\" chart, because it was started with the option \"[-]-filedescriptor\"."); +#endif + break; + } + case EBPF_MODULE_HARDIRQ_IDX: { + select_threads |= 1<<EBPF_MODULE_HARDIRQ_IDX; +#ifdef NETDATA_INTERNAL_CHECKS + info("EBPF enabling \"HARDIRQ\" chart, because it was started with the option \"[-]-hardirq\"."); +#endif + break; + } + case EBPF_MODULE_SOFTIRQ_IDX: { + select_threads |= 1<<EBPF_MODULE_SOFTIRQ_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF running in \"return\" mode, because it was started with the option \"--return\" or \"-r\"."); + info("EBPF enabling \"SOFTIRQ\" chart, because it was started with the option \"[-]-softirq\"."); #endif break; } - case 's': { - enabled = 1; - ebpf_enable_chart(EBPF_MODULE_SYNC_IDX, disable_apps); + case EBPF_MODULE_OOMKILL_IDX: { + select_threads |= 1<<EBPF_MODULE_OOMKILL_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"sync\" chart, because it was started with the option \"--sync\" or \"-s\"."); + info("EBPF enabling \"OOMKILL\" chart, because it was started with the option \"[-]-oomkill\"."); +#endif + break; + } + case EBPF_MODULE_SHM_IDX: { + select_threads |= 1<<EBPF_MODULE_SHM_IDX; +#ifdef NETDATA_INTERNAL_CHECKS + info("EBPF enabling \"SHM\" chart, because it was started with the option \"[-]-shm\"."); +#endif + break; + } + case EBPF_MODULE_MDFLUSH_IDX: { + select_threads |= 1<<EBPF_MODULE_MDFLUSH_IDX; +#ifdef NETDATA_INTERNAL_CHECKS + info("EBPF enabling \"MDFLUSH\" chart, because it was started with the option \"[-]-mdflush\"."); +#endif + break; + } + case EBPF_OPTION_ALL_CHARTS: { + disable_apps = 0; + disable_cgroups = 0; +#ifdef NETDATA_INTERNAL_CHECKS + info("EBPF running with all chart groups, because it was started with the option \"[-]-all\"."); +#endif + break; + } + case EBPF_OPTION_VERSION: { + printf("ebpf.plugin %s\n", VERSION); + exit(0); + } + case EBPF_OPTION_HELP: { + ebpf_print_help(); + exit(0); + } + case EBPF_OPTION_GLOBAL_CHART: { + disable_apps = 1; + disable_cgroups = 1; +#ifdef NETDATA_INTERNAL_CHECKS + info("EBPF running with global chart group, because it was started with the option \"[-]-global\"."); +#endif + break; + } + case EBPF_OPTION_RETURN_MODE: { + ebpf_set_thread_mode(MODE_RETURN); +#ifdef NETDATA_INTERNAL_CHECKS + info("EBPF running in \"RETURN\" mode, because it was started with the option \"[-]-return\"."); #endif break; } @@ -1081,44 +1573,34 @@ static void parse_args(int argc, char **argv) } } - if (freq > 0) { - update_every = freq; - } + if (disable_apps || disable_cgroups) { + if (disable_apps) + ebpf_disable_apps(); - if (load_collector_config(ebpf_user_config_dir, &disable_apps)) { - info( - "Does not have a configuration file inside `%s/ebpf.d.conf. It will try to load stock file.", - ebpf_user_config_dir); - if (load_collector_config(ebpf_stock_config_dir, &disable_apps)) { - info("Does not have a stock file. It is starting with default options."); - } else { - enabled = 1; - } - } else { - enabled = 1; - } + if (disable_cgroups) + ebpf_disable_cgroups(); - if (!enabled) { - ebpf_enable_all_charts(disable_apps); -#ifdef NETDATA_INTERNAL_CHECKS - info("EBPF running with all charts, because neither \"-n\" or \"-p\" was given."); -#endif + ebpf_enable_all_charts(disable_apps, disable_cgroups); } - if (disable_apps) - return; + if (select_threads) { + disable_all_global_charts(); + uint64_t idx; + for (idx = 0; idx < EBPF_OPTION_ALL_CHARTS; idx++) { + if (select_threads & 1<<idx) + ebpf_enable_specific_chart(&ebpf_modules[idx], disable_apps, disable_cgroups); + } + } // Load apps_groups.conf if (ebpf_read_apps_groups_conf( &apps_groups_default_target, &apps_groups_root_target, ebpf_user_config_dir, "groups")) { - info( - "Cannot read process groups configuration file '%s/apps_groups.conf'. Will try '%s/apps_groups.conf'", - ebpf_user_config_dir, ebpf_stock_config_dir); + info("Cannot read process groups configuration file '%s/apps_groups.conf'. Will try '%s/apps_groups.conf'", + ebpf_user_config_dir, ebpf_stock_config_dir); if (ebpf_read_apps_groups_conf( &apps_groups_default_target, &apps_groups_root_target, ebpf_stock_config_dir, "groups")) { - error( - "Cannot read process groups '%s/apps_groups.conf'. There are no internal defaults. Failing.", - ebpf_stock_config_dir); + error("Cannot read process groups '%s/apps_groups.conf'. There are no internal defaults. Failing.", + ebpf_stock_config_dir); thread_finished++; ebpf_exit(1); } @@ -1133,6 +1615,136 @@ static void parse_args(int argc, char **argv) *****************************************************************/ /** + * Update PID file + * + * Update the content of PID file + * + * @param filename is the full name of the file. + * @param pid that identifies the process + */ +static void ebpf_update_pid_file(char *filename, pid_t pid) +{ + FILE *fp = fopen(filename, "w"); + if (!fp) + return; + + fprintf(fp, "%d", pid); + fclose(fp); +} + +/** + * Get Process Name + * + * Get process name from /proc/PID/status + * + * @param pid that identifies the process + */ +static char *ebpf_get_process_name(pid_t pid) +{ + char *name = NULL; + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "/proc/%d/status", pid); + + procfile *ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + error("Cannot open %s", filename); + return name; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return name; + + unsigned long i, lines = procfile_lines(ff); + for(i = 0; i < lines ; i++) { + char *cmp = procfile_lineword(ff, i, 0); + if (!strcmp(cmp, "Name:")) { + name = strdupz(procfile_lineword(ff, i, 1)); + break; + } + } + + procfile_close(ff); + + return name; +} + +/** + * Read Previous PID + * + * @param filename is the full name of the file. + * + * @return It returns the PID used during previous execution on success or 0 otherwise + */ +static pid_t ebpf_read_previous_pid(char *filename) +{ + FILE *fp = fopen(filename, "r"); + if (!fp) + return 0; + + char buffer[64]; + size_t length = fread(buffer, sizeof(*buffer), 63, fp); + pid_t old_pid = 0; + if (length) { + if (length > 63) + length = 63; + + buffer[length] = '\0'; + old_pid = (pid_t)str2uint32_t(buffer); + } + fclose(fp); + + return old_pid; +} + +/** + * Kill previous process + * + * Kill previous process whether it was not closed. + * + * @param filename is the full name of the file. + * @param pid that identifies the process + */ +static void ebpf_kill_previous_process(char *filename, pid_t pid) +{ + pid_t old_pid = ebpf_read_previous_pid(filename); + if (!old_pid) + return; + + // Process is not running + char *prev_name = ebpf_get_process_name(old_pid); + if (!prev_name) + return; + + char *current_name = ebpf_get_process_name(pid); + + if (!strcmp(prev_name, current_name)) + kill(old_pid, SIGKILL); + + freez(prev_name); + freez(current_name); + + // wait few microseconds before start new plugin + sleep_usec(USEC_PER_MS * 300); +} + +/** + * Manage PID + * + * This function kills another instance of eBPF whether it is necessary and update the file content. + * + * @param pid that identifies the process + */ +static void ebpf_manage_pid(pid_t pid) +{ + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s/ebpf.d/ebpf.pid", netdata_configured_host_prefix, ebpf_plugin_dir); + + ebpf_kill_previous_process(filename, pid); + ebpf_update_pid_file(filename, pid); +} + +/** * Entry point * * @param argc the number of arguments @@ -1143,9 +1755,9 @@ static void parse_args(int argc, char **argv) int main(int argc, char **argv) { set_global_variables(); - parse_args(argc, argv); + ebpf_parse_args(argc, argv); + ebpf_manage_pid(getpid()); - running_on_kernel = get_kernel_version(kernel_string, 63); if (!has_condition_to_run(running_on_kernel)) { error("The current collector cannot run on this kernel."); return 2; @@ -1203,6 +1815,28 @@ int main(int argc, char **argv) NULL, NULL, ebpf_modules[EBPF_MODULE_SYNC_IDX].start_routine}, {"EBPF DCSTAT" , NULL, NULL, 1, NULL, NULL, ebpf_modules[EBPF_MODULE_DCSTAT_IDX].start_routine}, + {"EBPF SWAP" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_SWAP_IDX].start_routine}, + {"EBPF VFS" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_VFS_IDX].start_routine}, + {"EBPF FILESYSTEM" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_FILESYSTEM_IDX].start_routine}, + {"EBPF DISK" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_DISK_IDX].start_routine}, + {"EBPF MOUNT" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_MOUNT_IDX].start_routine}, + {"EBPF FD" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_FD_IDX].start_routine}, + {"EBPF HARDIRQ" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_HARDIRQ_IDX].start_routine}, + {"EBPF SOFTIRQ" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_SOFTIRQ_IDX].start_routine}, + {"EBPF OOMKILL" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_OOMKILL_IDX].start_routine}, + {"EBPF SHM" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_SHM_IDX].start_routine}, + {"EBPF MDFLUSH" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_MDFLUSH_IDX].start_routine}, {NULL , NULL, NULL, 0, NULL, NULL, NULL} }; diff --git a/collectors/ebpf.plugin/ebpf.d.conf b/collectors/ebpf.plugin/ebpf.d.conf index ef6ff814..845b711c 100644 --- a/collectors/ebpf.plugin/ebpf.d.conf +++ b/collectors/ebpf.plugin/ebpf.d.conf @@ -6,9 +6,10 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # -# The eBPF collector also creates charts for each running application through an integration with the `apps plugin`. -# If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to -# 'no'. +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change the setting +# `apps` and `cgroups` to 'no'. # # The `update every` option defines the number of seconds used to read data from kernel and send to netdata # @@ -17,7 +18,8 @@ [global] ebpf load mode = entry apps = yes - update every = 1 + cgroups = no + update every = 5 pid table size = 32768 # @@ -25,17 +27,39 @@ # # The eBPF collector enables and runs the following eBPF programs by default: # -# `cachestat`: Make charts for kernel functions related to page cache. -# `process` : This eBPF program creates charts that show information about process creation, VFS IO, and +# `cachestat` : Make charts for kernel functions related to page cache. +# `dcstat` : Make charts for kernel functions related to directory cache. +# `disk` : Monitor I/O latencies for disks +# `fd` : This eBPF program creates charts that show information about file manipulation. +# `mdflush` : Monitors flush counts for multi-devices. +# `mount` : Monitor calls for syscalls mount and umount +# `filesystem`: Monitor calls for functions used to manipulate specific filesystems +# `hardirq` : Monitor latency of serving hardware interrupt requests (hard IRQs). +# `oomkill` : This eBPF program creates a chart that shows which process got OOM killed and when. +# `process` : This eBPF program creates charts that show information about process life. +# `shm` : Monitor calls for syscalls shmget, shmat, shmdt and shmctl. +# `socket` : This eBPF program creates charts with information about `TCP` and `UDP` functions, including the +# bandwidth consumed by each. +# `softirq` : Monitor latency of serving software interrupt requests (soft IRQs). +# `sync` : Montitor calls for syscall sync(2). +# `swap` : Monitor calls for internal swap functions. +# `vfs` : This eBPF program creates charts that show information about process VFS IO, VFS file manipulation and # files removed. -# `socket` : This eBPF program creates charts with information about `TCP` and `UDP` functions, including the -# bandwidth consumed by each. -# `sync` : Montitor calls for syscall sync(2). [ebpf programs] cachestat = no dcstat = no + disk = no + fd = yes + filesystem = no + hardirq = yes + mdflush = no + mount = yes + oomkill = yes process = yes + shm = yes socket = yes + softirq = yes sync = yes + swap = no + vfs = yes network connections = no - diff --git a/collectors/ebpf.plugin/ebpf.d/cachestat.conf b/collectors/ebpf.plugin/ebpf.d/cachestat.conf index 0c4d991d..41205930 100644 --- a/collectors/ebpf.plugin/ebpf.d/cachestat.conf +++ b/collectors/ebpf.plugin/ebpf.d/cachestat.conf @@ -3,14 +3,17 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # -# The eBPF collector also creates charts for each running application through an integration with the `apps plugin`. -# If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to -# 'no'. +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. # # The `pid table size` defines the maximum number of PIDs stored inside the application hash table. -# -[global] - ebpf load mode = entry - apps = yes - update every = 2 - pid table size = 32768 +# +# Uncomment lines to define specific options for thread. +#[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 diff --git a/collectors/ebpf.plugin/ebpf.d/dcstat.conf b/collectors/ebpf.plugin/ebpf.d/dcstat.conf index 2607b98f..a65e0acb 100644 --- a/collectors/ebpf.plugin/ebpf.d/dcstat.conf +++ b/collectors/ebpf.plugin/ebpf.d/dcstat.conf @@ -3,11 +3,15 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # -# The eBPF collector also creates charts for each running application through an integration with the `apps plugin`. -# If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to -# 'no'. +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. # -[global] - ebpf load mode = entry - apps = yes - update every = 2 +# Uncomment lines to define specific options for thread. +#[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 diff --git a/collectors/ebpf.plugin/ebpf.d/disk.conf b/collectors/ebpf.plugin/ebpf.d/disk.conf new file mode 100644 index 00000000..4adf88e7 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/disk.conf @@ -0,0 +1,9 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +#[global] +# ebpf load mode = entry +# update every = 10 + diff --git a/collectors/ebpf.plugin/ebpf.d/fd.conf b/collectors/ebpf.plugin/ebpf.d/fd.conf new file mode 100644 index 00000000..f6edd3d9 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/fd.conf @@ -0,0 +1,19 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# The `pid table size` defines the maximum number of PIDs stored inside the hash table. +# +# Uncomment lines to define specific options for thread. +#[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 diff --git a/collectors/ebpf.plugin/ebpf.d/filesystem.conf b/collectors/ebpf.plugin/ebpf.d/filesystem.conf new file mode 100644 index 00000000..c5eb01e5 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/filesystem.conf @@ -0,0 +1,20 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps plugin`. +# If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to +# 'no'. +# +#[global] +# ebpf load mode = entry +# update every = 10 + +# All filesystems are named as 'NAMEdist' where NAME is the filesystem name while 'dist' is a reference for distribution. +[filesystem] + btrfsdist = yes + ext4dist = yes + nfsdist = yes + xfsdist = yes + zfsdist = yes diff --git a/collectors/ebpf.plugin/ebpf.d/hardirq.conf b/collectors/ebpf.plugin/ebpf.d/hardirq.conf new file mode 100644 index 00000000..f2bae1d5 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/hardirq.conf @@ -0,0 +1,8 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +#[global] +# ebpf load mode = entry +# update every = 10 diff --git a/collectors/ebpf.plugin/ebpf.d/mdflush.conf b/collectors/ebpf.plugin/ebpf.d/mdflush.conf new file mode 100644 index 00000000..e65e8672 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/mdflush.conf @@ -0,0 +1,7 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +#[global] +# ebpf load mode = entry +# update every = 1 diff --git a/collectors/ebpf.plugin/ebpf.d/mount.conf b/collectors/ebpf.plugin/ebpf.d/mount.conf new file mode 100644 index 00000000..9d317475 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/mount.conf @@ -0,0 +1,8 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +#[global] +# ebpf load mode = entry +# update every = 1 diff --git a/collectors/ebpf.plugin/ebpf.d/network.conf b/collectors/ebpf.plugin/ebpf.d/network.conf index 6bbd49a4..e692622a 100644 --- a/collectors/ebpf.plugin/ebpf.d/network.conf +++ b/collectors/ebpf.plugin/ebpf.d/network.conf @@ -3,9 +3,10 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # -# The eBPF collector also creates charts for each running application through an integration with the `apps plugin`. -# If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to -# 'no'. +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. # # The following options change the hash table size: # `bandwidth table size`: Maximum number of connections monitored @@ -14,9 +15,10 @@ # `udp connection table size`: Maximum number of UDP connections monitored # [global] - ebpf load mode = entry - apps = yes - update every = 1 +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 bandwidth table size = 16384 ipv4 connection table size = 16384 ipv6 connection table size = 16384 diff --git a/collectors/ebpf.plugin/ebpf.d/oomkill.conf b/collectors/ebpf.plugin/ebpf.d/oomkill.conf new file mode 100644 index 00000000..e65e8672 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/oomkill.conf @@ -0,0 +1,7 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +#[global] +# ebpf load mode = entry +# update every = 1 diff --git a/collectors/ebpf.plugin/ebpf.d/process.conf b/collectors/ebpf.plugin/ebpf.d/process.conf index 511da95a..f6edd3d9 100644 --- a/collectors/ebpf.plugin/ebpf.d/process.conf +++ b/collectors/ebpf.plugin/ebpf.d/process.conf @@ -3,14 +3,17 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # -# The eBPF collector also creates charts for each running application through an integration with the `apps plugin`. -# If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to -# 'no'. +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. # # The `pid table size` defines the maximum number of PIDs stored inside the hash table. -# -[global] - ebpf load mode = entry - apps = yes - update every = 1 - pid table size = 32768 +# +# Uncomment lines to define specific options for thread. +#[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 diff --git a/collectors/ebpf.plugin/ebpf.d/shm.conf b/collectors/ebpf.plugin/ebpf.d/shm.conf new file mode 100644 index 00000000..c0a10c98 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/shm.conf @@ -0,0 +1,24 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# Uncomment lines to define specific options for thread. +#[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 + +# List of monitored syscalls +[syscalls] + shmget = yes + shmat = yes + shmdt = yes + shmctl = yes diff --git a/collectors/ebpf.plugin/ebpf.d/softirq.conf b/collectors/ebpf.plugin/ebpf.d/softirq.conf new file mode 100644 index 00000000..f2bae1d5 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/softirq.conf @@ -0,0 +1,8 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +#[global] +# ebpf load mode = entry +# update every = 10 diff --git a/collectors/ebpf.plugin/ebpf.d/swap.conf b/collectors/ebpf.plugin/ebpf.d/swap.conf new file mode 100644 index 00000000..a65e0acb --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/swap.conf @@ -0,0 +1,17 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# Uncomment lines to define specific options for thread. +#[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 diff --git a/collectors/ebpf.plugin/ebpf.d/sync.conf b/collectors/ebpf.plugin/ebpf.d/sync.conf index de28f339..03c469f6 100644 --- a/collectors/ebpf.plugin/ebpf.d/sync.conf +++ b/collectors/ebpf.plugin/ebpf.d/sync.conf @@ -3,15 +3,17 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # -# The eBPF collector also creates charts for each running application through an integration with the `apps plugin`. -# If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to -# 'no'. +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. # # -[global] - ebpf load mode = entry - apps = yes - update every = 2 +#[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 # List of monitored syscalls [syscalls] diff --git a/collectors/ebpf.plugin/ebpf.d/vfs.conf b/collectors/ebpf.plugin/ebpf.d/vfs.conf new file mode 100644 index 00000000..a65e0acb --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/vfs.conf @@ -0,0 +1,17 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# Uncomment lines to define specific options for thread. +#[global] +# ebpf load mode = entry +# apps = yes +# cgroups = no +# update every = 10 +# pid table size = 32768 diff --git a/collectors/ebpf.plugin/ebpf.h b/collectors/ebpf.plugin/ebpf.h index 841701e2..a59bad03 100644 --- a/collectors/ebpf.plugin/ebpf.h +++ b/collectors/ebpf.plugin/ebpf.h @@ -30,6 +30,7 @@ #include "daemon/main.h" #include "ebpf_apps.h" +#include "ebpf_cgroup.h" #define NETDATA_EBPF_OLD_CONFIG_FILE "ebpf.conf" #define NETDATA_EBPF_CONFIG_FILE "ebpf.d.conf" @@ -73,14 +74,37 @@ typedef struct netdata_error_report { } netdata_error_report_t; extern ebpf_module_t ebpf_modules[]; -enum ebpf_module_indexes { +enum ebpf_main_index { EBPF_MODULE_PROCESS_IDX, EBPF_MODULE_SOCKET_IDX, EBPF_MODULE_CACHESTAT_IDX, EBPF_MODULE_SYNC_IDX, - EBPF_MODULE_DCSTAT_IDX + EBPF_MODULE_DCSTAT_IDX, + EBPF_MODULE_SWAP_IDX, + EBPF_MODULE_VFS_IDX, + EBPF_MODULE_FILESYSTEM_IDX, + EBPF_MODULE_DISK_IDX, + EBPF_MODULE_MOUNT_IDX, + EBPF_MODULE_FD_IDX, + EBPF_MODULE_HARDIRQ_IDX, + EBPF_MODULE_SOFTIRQ_IDX, + EBPF_MODULE_OOMKILL_IDX, + EBPF_MODULE_SHM_IDX, + EBPF_MODULE_MDFLUSH_IDX, + /* THREADS MUST BE INCLUDED BEFORE THIS COMMENT */ + EBPF_OPTION_ALL_CHARTS, + EBPF_OPTION_VERSION, + EBPF_OPTION_HELP, + EBPF_OPTION_GLOBAL_CHART, + EBPF_OPTION_RETURN_MODE }; +typedef struct ebpf_tracepoint { + bool enabled; + char *class; + char *event; +} ebpf_tracepoint_t; + // Copied from musl header #ifndef offsetof #if __GNUC__ > 3 @@ -92,10 +116,16 @@ enum ebpf_module_indexes { // Chart definitions #define NETDATA_EBPF_FAMILY "ebpf" +#define NETDATA_EBPF_IP_FAMILY "ip" #define NETDATA_FILESYSTEM_FAMILY "filesystem" +#define NETDATA_EBPF_MOUNT_GLOBAL_FAMILY "mount_points" #define NETDATA_EBPF_CHART_TYPE_LINE "line" #define NETDATA_EBPF_CHART_TYPE_STACKED "stacked" #define NETDATA_EBPF_MEMORY_GROUP "mem" +#define NETDATA_EBPF_SYSTEM_GROUP "system" +#define NETDATA_SYSTEM_SWAP_SUBMENU "swap" +#define NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU "swap (eBPF)" +#define NETDATA_SYSTEM_IPC_SHM_SUBMENU "ipc shared memory" // Log file #define NETDATA_DEVELOPER_LOG_FILE "developer.log" @@ -111,6 +141,8 @@ enum ebpf_module_indexes { #define EBPF_SYS_CLONE_IDX 11 #define EBPF_MAX_MAPS 32 +#define EBPF_DEFAULT_UPDATE_EVERY 10 + enum ebpf_algorithms_list { NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_INCREMENTAL_IDX @@ -125,6 +157,7 @@ extern pthread_mutex_t lock; extern int close_ebpf_plugin; extern int ebpf_nprocs; extern int running_on_kernel; +extern int isrh; extern char *ebpf_plugin_dir; extern char kernel_string[64]; @@ -146,7 +179,9 @@ extern void ebpf_write_chart_cmd(char *type, char *family, char *charttype, char *context, - int order); + int order, + int update_every, + char *module); extern void ebpf_write_global_dimension(char *name, char *id, char *algorithm); @@ -162,7 +197,9 @@ extern void ebpf_create_chart(char *type, int order, void (*ncd)(void *, int), void *move, - int end); + int end, + int update_every, + char *module); extern void write_begin_chart(char *family, char *name); @@ -175,8 +212,6 @@ extern void write_err_chart(char *name, char *family, netdata_publish_syscall_t extern void write_io_chart(char *chart, char *family, char *dwrite, long long vwrite, char *dread, long long vread); -extern void fill_ebpf_data(ebpf_data_t *ef); - extern void ebpf_create_charts_on_apps(char *name, char *title, char *units, @@ -184,12 +219,18 @@ extern void ebpf_create_charts_on_apps(char *name, char *charttype, int order, char *algorithm, - struct target *root); + struct target *root, + int update_every, + char *module); extern void write_end_chart(); extern void ebpf_cleanup_publish_syscall(netdata_publish_syscall_t *nps); +extern int ebpf_enable_tracepoint(ebpf_tracepoint_t *tp); +extern int ebpf_disable_tracepoint(ebpf_tracepoint_t *tp); +extern uint32_t ebpf_enable_tracepoints(ebpf_tracepoint_t *tps); + #define EBPF_PROGRAMS_SECTION "ebpf programs" #define EBPF_COMMON_DIMENSION_PERCENTAGE "%" @@ -199,16 +240,21 @@ extern void ebpf_cleanup_publish_syscall(netdata_publish_syscall_t *nps); #define EBPF_COMMON_DIMENSION_DIFFERENCE "difference" #define EBPF_COMMON_DIMENSION_PACKETS "packets" #define EBPF_COMMON_DIMENSION_FILES "files" +#define EBPF_COMMON_DIMENSION_MILLISECONDS "milliseconds" +#define EBPF_COMMON_DIMENSION_KILLS "kills" // Common variables extern int debug_enabled; extern struct pid_stat *root_of_pids; +extern ebpf_cgroup_target_t *ebpf_cgroup_pids; extern char *ebpf_algorithms[]; extern struct config collector_config; -extern struct pid_stat *root_of_pids; extern ebpf_process_stat_t *global_process_stat; +extern netdata_ebpf_cgroup_shm_t shm_ebpf_cgroup; +extern int shm_fd_ebpf_cgroup; +extern sem_t *shm_sem_ebpf_cgroup; +extern pthread_mutex_t mutex_cgroup_shm; extern size_t all_pids_count; -extern int update_every; extern uint32_t finalized_threads; // Socket functions and variables @@ -219,6 +265,9 @@ extern void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *root extern void ebpf_one_dimension_write_charts(char *family, char *chart, char *dim, long long v1); extern collected_number get_value_from_structure(char *basis, size_t offset); extern void ebpf_update_pid_table(ebpf_local_maps_t *pid, ebpf_module_t *em); +extern void ebpf_write_chart_obsolete(char *type, char *id, char *title, char *units, char *family, + char *charttype, char *context, int order, int update_every); +extern void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, char **dimensions, uint32_t end); #define EBPF_MAX_SYNCHRONIZATION_TIME 300 diff --git a/collectors/ebpf.plugin/ebpf_apps.c b/collectors/ebpf.plugin/ebpf_apps.c index 6459bad0..015d1bf2 100644 --- a/collectors/ebpf.plugin/ebpf_apps.c +++ b/collectors/ebpf.plugin/ebpf_apps.c @@ -116,9 +116,9 @@ int am_i_running_as_root() /** * Reset the target values * - * @param root the pointer to the chain that will be reseted. + * @param root the pointer to the chain that will be reset. * - * @return it returns the number of structures that was reseted. + * @return it returns the number of structures that was reset. */ size_t zero_all_targets(struct target *root) { @@ -910,6 +910,33 @@ static inline void del_pid_entry(pid_t pid) } /** + * Get command string associated with a PID. + * This can only safely be used when holding the `collect_data_mutex` lock. + * + * @param pid the pid to search the data. + * @param n the maximum amount of bytes to copy into dest. + * if this is greater than the size of the command, it is clipped. + * @param dest the target memory buffer to write the command into. + * @return -1 if the PID hasn't been scraped yet, 0 otherwise. + */ +int get_pid_comm(pid_t pid, size_t n, char *dest) +{ + struct pid_stat *stat; + + stat = all_pids[pid]; + if (unlikely(stat == NULL)) { + return -1; + } + + if (unlikely(n > sizeof(stat->comm))) { + n = sizeof(stat->comm); + } + + strncpyz(dest, stat->comm, n); + return 0; +} + +/** * Cleanup variable from other threads * * @param pid current pid. @@ -922,7 +949,7 @@ void cleanup_variables_from_other_threads(uint32_t pid) socket_bandwidth_curr[pid] = NULL; } - // Clean cachestat strcture + // Clean cachestat structure if (cachestat_pid) { freez(cachestat_pid[pid]); cachestat_pid[pid] = NULL; @@ -933,6 +960,30 @@ void cleanup_variables_from_other_threads(uint32_t pid) freez(dcstat_pid[pid]); dcstat_pid[pid] = NULL; } + + // Clean swap structure + if (swap_pid) { + freez(swap_pid[pid]); + swap_pid[pid] = NULL; + } + + // Clean vfs structure + if (vfs_pid) { + freez(vfs_pid[pid]); + vfs_pid[pid] = NULL; + } + + // Clean fd structure + if (fd_pid) { + freez(fd_pid[pid]); + fd_pid[pid] = NULL; + } + + // Clean shm structure + if (shm_pid) { + freez(shm_pid[pid]); + shm_pid[pid] = NULL; + } } /** diff --git a/collectors/ebpf.plugin/ebpf_apps.h b/collectors/ebpf.plugin/ebpf_apps.h index edcdef60..0c72b878 100644 --- a/collectors/ebpf.plugin/ebpf_apps.h +++ b/collectors/ebpf.plugin/ebpf_apps.h @@ -11,17 +11,28 @@ #include "libnetdata/ebpf/ebpf.h" #define NETDATA_APPS_FAMILY "apps" -#define NETDATA_APPS_FILE_GROUP "file (eBPF)" -#define NETDATA_APPS_VFS_GROUP "vfs (eBPF)" +#define NETDATA_APPS_FILE_GROUP "file_access" +#define NETDATA_APPS_FILE_CGROUP_GROUP "file_access (eBPF)" #define NETDATA_APPS_PROCESS_GROUP "process (eBPF)" -#define NETDATA_APPS_NET_GROUP "net (eBPF)" -#define NETDATA_APPS_CACHESTAT_GROUP "page cache (eBPF)" -#define NETDATA_APPS_DCSTAT_GROUP "directory cache (eBPF)" +#define NETDATA_APPS_NET_GROUP "net" +#define NETDATA_APPS_IPC_SHM_GROUP "ipc shm (eBPF)" #include "ebpf_process.h" #include "ebpf_dcstat.h" +#include "ebpf_disk.h" +#include "ebpf_fd.h" +#include "ebpf_filesystem.h" +#include "ebpf_hardirq.h" #include "ebpf_cachestat.h" +#include "ebpf_mdflush.h" +#include "ebpf_mount.h" +#include "ebpf_oomkill.h" +#include "ebpf_shm.h" +#include "ebpf_socket.h" +#include "ebpf_softirq.h" #include "ebpf_sync.h" +#include "ebpf_swap.h" +#include "ebpf_vfs.h" #define MAX_COMPARE_NAME 100 #define MAX_NAME 100 @@ -113,6 +124,10 @@ struct target { // Changes made to simplify integration between apps and eBPF. netdata_publish_cachestat_t cachestat; netdata_publish_dcstat_t dcstat; + netdata_publish_swap_t swap; + netdata_publish_vfs_t vfs; + netdata_fd_stat_t fd; + netdata_publish_shm_t shm; /* These variables are not necessary for eBPF collector kernel_uint_t minflt; @@ -341,34 +356,13 @@ typedef struct ebpf_process_stat { uint32_t pid; //Counter - uint32_t open_call; - uint32_t write_call; - uint32_t writev_call; - uint32_t read_call; - uint32_t readv_call; - uint32_t unlink_call; uint32_t exit_call; uint32_t release_call; - uint32_t fork_call; - uint32_t clone_call; - uint32_t close_call; - - //Accumulator - uint64_t write_bytes; - uint64_t writev_bytes; - uint64_t readv_bytes; - uint64_t read_bytes; + uint32_t create_process; + uint32_t create_thread; //Counter - uint32_t open_err; - uint32_t write_err; - uint32_t writev_err; - uint32_t read_err; - uint32_t readv_err; - uint32_t unlink_err; - uint32_t fork_err; - uint32_t clone_err; - uint32_t close_err; + uint32_t task_err; uint8_t removeme; } ebpf_process_stat_t; @@ -425,6 +419,8 @@ extern void cleanup_exited_pids(); extern int ebpf_read_hash_table(void *ep, int fd, uint32_t pid); +extern int get_pid_comm(pid_t pid, size_t n, char *dest); + extern size_t read_processes_statistic_using_pid_on_target(ebpf_process_stat_t **ep, int fd, struct pid_on_target *pids); diff --git a/collectors/ebpf.plugin/ebpf_cachestat.c b/collectors/ebpf.plugin/ebpf_cachestat.c index cdeac695..7ba8c01a 100644 --- a/collectors/ebpf.plugin/ebpf_cachestat.c +++ b/collectors/ebpf.plugin/ebpf_cachestat.c @@ -3,7 +3,6 @@ #include "ebpf.h" #include "ebpf_cachestat.h" -static ebpf_data_t cachestat_data; netdata_publish_cachestat_t **cachestat_pid; static struct bpf_link **probe_links = NULL; @@ -16,7 +15,8 @@ static netdata_publish_syscall_t cachestat_counter_publish_aggregated[NETDATA_CA netdata_cachestat_pid_t *cachestat_vector = NULL; -static netdata_idx_t *cachestat_hash_values = NULL; +static netdata_idx_t cachestat_hash_values[NETDATA_CACHESTAT_END]; +static netdata_idx_t *cachestat_values = NULL; static int read_thread_closed = 1; @@ -24,11 +24,20 @@ struct netdata_static_thread cachestat_threads = {"CACHESTAT KERNEL", NULL, NULL, 1, NULL, NULL, NULL}; -static ebpf_local_maps_t cachestat_maps[] = {{.name = "cstat_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, - .user_input = 0}, - {.name = NULL, .internal_input = 0, .user_input = 0}}; - -static int *map_fd = NULL; +static ebpf_local_maps_t cachestat_maps[] = {{.name = "cstat_global", .internal_input = NETDATA_CACHESTAT_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "cstat_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "cstat_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; struct config cachestat_config = { .first_section = NULL, .last_section = NULL, @@ -78,15 +87,17 @@ static void ebpf_cachestat_cleanup(void *ptr) ebpf_cleanup_publish_syscall(cachestat_counter_publish_aggregated); freez(cachestat_vector); - freez(cachestat_hash_values); - - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; + freez(cachestat_values); + + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); } - bpf_object__close(objects); } /***************************************************************** @@ -100,7 +111,7 @@ static void ebpf_cachestat_cleanup(void *ptr) * * Update publish values before to write dimension. * - * @param out strcuture that will receive data. + * @param out structure that will receive data. * @param mpa calls for mark_page_accessed during the last second. * @param mbd calls for mark_buffer_dirty during the last second. * @param apcl calls for add_to_page_cache_lru during the last second. @@ -247,7 +258,7 @@ static void read_apps_table() netdata_cachestat_pid_t *cv = cachestat_vector; uint32_t key; struct pid_stat *pids = root_of_pids; - int fd = map_fd[NETDATA_CACHESTAT_PID_STATS]; + int fd = cachestat_maps[NETDATA_CACHESTAT_PID_STATS].map_fd; size_t length = sizeof(netdata_cachestat_pid_t)*ebpf_nprocs; while (pids) { key = pids->pid; @@ -269,6 +280,43 @@ static void read_apps_table() } /** + * Update cgroup + * + * Update cgroup data based in + */ +static void ebpf_update_cachestat_cgroup() +{ + netdata_cachestat_pid_t *cv = cachestat_vector; + int fd = cachestat_maps[NETDATA_CACHESTAT_PID_STATS].map_fd; + size_t length = sizeof(netdata_cachestat_pid_t) * ebpf_nprocs; + + ebpf_cgroup_target_t *ect; + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + netdata_cachestat_pid_t *out = &pids->cachestat; + if (likely(cachestat_pid) && cachestat_pid[pid]) { + netdata_publish_cachestat_t *in = cachestat_pid[pid]; + + memcpy(out, &in->current, sizeof(netdata_cachestat_pid_t)); + } else { + memset(cv, 0, length); + if (bpf_map_lookup_elem(fd, &pid, cv)) { + continue; + } + + cachestat_apps_accumulator(cv); + + memcpy(out, cv, sizeof(netdata_cachestat_pid_t)); + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** * Create apps charts * * Call ebpf_create_chart to create the charts on apps submenu. @@ -277,43 +325,42 @@ static void read_apps_table() */ void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *ptr) { - UNUSED(em); struct target *root = ptr; ebpf_create_charts_on_apps(NETDATA_CACHESTAT_HIT_RATIO_CHART, "The ratio is calculated dividing the Hit pages per total cache accesses without counting dirties.", EBPF_COMMON_DIMENSION_PERCENTAGE, - NETDATA_APPS_CACHESTAT_GROUP, + NETDATA_CACHESTAT_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, 20090, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); ebpf_create_charts_on_apps(NETDATA_CACHESTAT_DIRTY_CHART, "Number of pages marked as dirty. When a page is called dirty, this means that the data stored inside the page needs to be written to devices.", EBPF_CACHESTAT_DIMENSION_PAGE, - NETDATA_APPS_CACHESTAT_GROUP, + NETDATA_CACHESTAT_SUBMENU, NETDATA_EBPF_CHART_TYPE_STACKED, 20091, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); ebpf_create_charts_on_apps(NETDATA_CACHESTAT_HIT_CHART, "Number of cache access without counting dirty pages and page additions.", EBPF_CACHESTAT_DIMENSION_HITS, - NETDATA_APPS_CACHESTAT_GROUP, + NETDATA_CACHESTAT_SUBMENU, NETDATA_EBPF_CHART_TYPE_STACKED, 20092, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); ebpf_create_charts_on_apps(NETDATA_CACHESTAT_MISSES_CHART, "Page caches added without counting dirty pages", EBPF_CACHESTAT_DIMENSION_MISSES, - NETDATA_APPS_CACHESTAT_GROUP, + NETDATA_CACHESTAT_SUBMENU, NETDATA_EBPF_CHART_TYPE_STACKED, 20093, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); } /***************************************************************** @@ -331,12 +378,18 @@ static void read_global_table() { uint32_t idx; netdata_idx_t *val = cachestat_hash_values; - netdata_idx_t stored; - int fd = map_fd[NETDATA_CACHESTAT_GLOBAL_STATS]; + netdata_idx_t *stored = cachestat_values; + int fd = cachestat_maps[NETDATA_CACHESTAT_GLOBAL_STATS].map_fd; for (idx = NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU; idx < NETDATA_CACHESTAT_END; idx++) { - if (!bpf_map_lookup_elem(fd, &idx, &stored)) { - val[idx] = stored; + if (!bpf_map_lookup_elem(fd, &idx, stored)) { + int i; + int end = ebpf_nprocs; + netdata_idx_t total = 0; + for (i = 0; i < end; i++) + total += stored[i]; + + val[idx] = total; } } } @@ -360,7 +413,7 @@ void *ebpf_cachestat_read_hash(void *ptr) ebpf_module_t *em = (ebpf_module_t *)ptr; - usec_t step = NETDATA_LATENCY_CACHESTAT_SLEEP_MS * em->update_time; + usec_t step = NETDATA_LATENCY_CACHESTAT_SLEEP_MS * em->update_every; while (!close_ebpf_plugin) { usec_t dt = heartbeat_next(&hb, step); (void)dt; @@ -428,7 +481,7 @@ void ebpf_cachestat_sum_pids(netdata_publish_cachestat_t *publish, struct pid_on } /** - * Send data to Netdata calling auxiliar functions. + * Send data to Netdata calling auxiliary functions. * * @param root the target list. */ @@ -446,7 +499,7 @@ void ebpf_cache_send_apps_data(struct target *root) uint64_t mpa = current->mark_page_accessed - prev->mark_page_accessed; uint64_t mbd = current->mark_buffer_dirty - prev->mark_buffer_dirty; - w->cachestat.dirty = current->mark_buffer_dirty; + w->cachestat.dirty = mbd; uint64_t apcl = current->add_to_page_cache_lru - prev->add_to_page_cache_lru; uint64_t apd = current->account_page_dirtied - prev->account_page_dirtied; @@ -487,6 +540,297 @@ void ebpf_cache_send_apps_data(struct target *root) } /** + * Cachestat sum PIDs + * + * Sum values for all PIDs associated to a group + * + * @param publish output structure. + * @param root structure with listed IPs + */ +void ebpf_cachestat_sum_cgroup_pids(netdata_publish_cachestat_t *publish, struct pid_on_target2 *root) +{ + memcpy(&publish->prev, &publish->current,sizeof(publish->current)); + memset(&publish->current, 0, sizeof(publish->current)); + + netdata_cachestat_pid_t *dst = &publish->current; + while (root) { + netdata_cachestat_pid_t *src = &root->cachestat; + + dst->account_page_dirtied += src->account_page_dirtied; + dst->add_to_page_cache_lru += src->add_to_page_cache_lru; + dst->mark_buffer_dirty += src->mark_buffer_dirty; + dst->mark_page_accessed += src->mark_page_accessed; + + root = root->next; + } +} + +/** + * Calc chart values + * + * Do necessary math to plot charts. + */ +void ebpf_cachestat_calc_chart_values() +{ + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_cachestat_sum_cgroup_pids(&ect->publish_cachestat, ect->pids); + + netdata_cachestat_pid_t *current = &ect->publish_cachestat.current; + netdata_cachestat_pid_t *prev = &ect->publish_cachestat.prev; + + uint64_t mpa = current->mark_page_accessed - prev->mark_page_accessed; + uint64_t mbd = current->mark_buffer_dirty - prev->mark_buffer_dirty; + ect->publish_cachestat.dirty = mbd; + uint64_t apcl = current->add_to_page_cache_lru - prev->add_to_page_cache_lru; + uint64_t apd = current->account_page_dirtied - prev->account_page_dirtied; + + cachestat_update_publish(&ect->publish_cachestat, mpa, mbd, apcl, apd); + } +} + +/** + * Create Systemd cachestat Charts + * + * Create charts when systemd is enabled + * + * @param update_every value to overwrite the update frequency set by the server. + **/ +static void ebpf_create_systemd_cachestat_charts(int update_every) +{ + ebpf_create_charts_on_systemd(NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit is calculating using total cache added without dirties per total added because of red misses.", + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, 21100, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_CACHESTAT_HIT_RATIO_CONTEXT, NETDATA_EBPF_MODULE_NAME_CACHESTAT, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages added to the page cache.", + EBPF_CACHESTAT_DIMENSION_PAGE, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, 21101, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_CACHESTAT_MODIFIED_CACHE_CONTEXT, NETDATA_EBPF_MODULE_NAME_CACHESTAT, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_CACHESTAT_HIT_CHART, "Hits are function calls that Netdata counts.", + EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, 21102, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_CACHESTAT_HIT_FILE_CONTEXT, NETDATA_EBPF_MODULE_NAME_CACHESTAT, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_CACHESTAT_MISSES_CHART, "Misses are function calls that Netdata counts.", + EBPF_CACHESTAT_DIMENSION_MISSES, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, 21103, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_CACHESTAT_MISS_FILES_CONTEXT, NETDATA_EBPF_MODULE_NAME_CACHESTAT, + update_every); +} + +/** + * Send Cache Stat charts + * + * Send collected data to Netdata. + * + * @return It returns the status for chart creation, if it is necessary to remove a specific dimension, zero is returned + * otherwise function returns 1 to avoid chart recreation + */ +static int ebpf_send_systemd_cachestat_charts() +{ + int ret = 1; + ebpf_cgroup_target_t *ect; + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_RATIO_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_cachestat.ratio); + } else + ret = 0; + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_DIRTY_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_cachestat.dirty); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_cachestat.hit); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_MISSES_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_cachestat.miss); + } + } + write_end_chart(); + + return ret; +} + +/** + * Send Directory Cache charts + * + * Send collected data to Netdata. + */ +static void ebpf_send_specific_cachestat_data(char *type, netdata_publish_cachestat_t *npc) +{ + write_begin_chart(type, NETDATA_CACHESTAT_HIT_RATIO_CHART); + write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_RATIO].name, (long long)npc->ratio); + write_end_chart(); + + write_begin_chart(type, NETDATA_CACHESTAT_DIRTY_CHART); + write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_DIRTY].name, (long long)npc->dirty); + write_end_chart(); + + write_begin_chart(type, NETDATA_CACHESTAT_HIT_CHART); + write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_HIT].name, (long long)npc->hit); + write_end_chart(); + + write_begin_chart(type, NETDATA_CACHESTAT_MISSES_CHART); + write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_MISS].name, (long long)npc->miss); + write_end_chart(); +} + +/** + * Create specific cache Stat charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_specific_cachestat_charts(char *type, int update_every) +{ + ebpf_create_chart(type, NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit is calculating using total cache added without dirties per total added because of red misses.", + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_CACHESTAT_CGROUP_SUBMENU, + NETDATA_CGROUP_CACHESTAT_HIT_RATIO_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5200, + ebpf_create_global_dimension, + cachestat_counter_publish_aggregated, 1, update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + ebpf_create_chart(type, NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages added to the page cache.", + EBPF_CACHESTAT_DIMENSION_PAGE, NETDATA_CACHESTAT_CGROUP_SUBMENU, + NETDATA_CGROUP_CACHESTAT_MODIFIED_CACHE_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5201, + ebpf_create_global_dimension, + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_DIRTY], 1, + update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + ebpf_create_chart(type, NETDATA_CACHESTAT_HIT_CHART, + "Hits are function calls that Netdata counts.", + EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_CGROUP_SUBMENU, + NETDATA_CGROUP_CACHESTAT_HIT_FILES_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5202, + ebpf_create_global_dimension, + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_HIT], 1, + update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + ebpf_create_chart(type, NETDATA_CACHESTAT_MISSES_CHART, + "Misses are function calls that Netdata counts.", + EBPF_CACHESTAT_DIMENSION_MISSES, NETDATA_CACHESTAT_CGROUP_SUBMENU, + NETDATA_CGROUP_CACHESTAT_MISS_FILES_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5203, + ebpf_create_global_dimension, + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_MISS], 1, + update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); +} + +/** + * Obsolete specific cache stat charts + * + * Obsolete charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_specific_cachestat_charts(char *type, int update_every) +{ + ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit is calculating using total cache added without dirties per total added because of red misses.", + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_HIT_RATIO_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5200, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages added to the page cache.", + EBPF_CACHESTAT_DIMENSION_PAGE, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_MODIFIED_CACHE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5201, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_HIT_CHART, + "Hits are function calls that Netdata counts.", + EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_HIT_FILES_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5202, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_MISSES_CHART, + "Misses are function calls that Netdata counts.", + EBPF_CACHESTAT_DIMENSION_MISSES, NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_MISS_FILES_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5203, update_every); +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param update_every value to overwrite the update frequency set by the server. +*/ +void ebpf_cachestat_send_cgroup_data(int update_every) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + ebpf_cachestat_calc_chart_values(); + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + static int systemd_charts = 0; + if (!systemd_charts) { + ebpf_create_systemd_cachestat_charts(update_every); + systemd_charts = 1; + } + + systemd_charts = ebpf_send_systemd_cachestat_charts(); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_CACHESTAT_CHART) && ect->updated) { + ebpf_create_specific_cachestat_charts(ect->name, update_every); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_CACHESTAT_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_CACHESTAT_CHART) { + if (ect->updated) { + ebpf_send_specific_cachestat_data(ect->name, &ect->publish_cachestat); + } else { + ebpf_obsolete_specific_cachestat_charts(ect->name, update_every); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_CACHESTAT_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** * Main loop for this collector. */ static void cachestat_collector(ebpf_module_t *em) @@ -494,29 +838,40 @@ static void cachestat_collector(ebpf_module_t *em) cachestat_threads.thread = mallocz(sizeof(netdata_thread_t)); cachestat_threads.start_routine = ebpf_cachestat_read_hash; - map_fd = cachestat_data.map_fd; - netdata_thread_create(cachestat_threads.thread, cachestat_threads.name, NETDATA_THREAD_OPTION_JOINABLE, ebpf_cachestat_read_hash, em); netdata_publish_cachestat_t publish; memset(&publish, 0, sizeof(publish)); int apps = em->apps_charts; + int cgroups = em->cgroup_charts; + int update_every = em->update_every; + int counter = update_every - 1; while (!close_ebpf_plugin) { pthread_mutex_lock(&collect_data_mutex); pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - if (apps) - read_apps_table(); + if (++counter == update_every) { + counter = 0; + if (apps) + read_apps_table(); - pthread_mutex_lock(&lock); + if (cgroups) + ebpf_update_cachestat_cgroup(); - cachestat_send_global(&publish); + pthread_mutex_lock(&lock); - if (apps) - ebpf_cache_send_apps_data(apps_groups_root_target); + cachestat_send_global(&publish); + + if (apps) + ebpf_cache_send_apps_data(apps_groups_root_target); + + if (cgroups) + ebpf_cachestat_send_cgroup_data(update_every); + + pthread_mutex_unlock(&lock); + } - pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); } } @@ -531,8 +886,10 @@ static void cachestat_collector(ebpf_module_t *em) * Create global charts * * Call ebpf_create_chart to create the charts for the collector. + * + * @param em a pointer to `struct ebpf_module` */ -static void ebpf_create_memory_charts() +static void ebpf_create_memory_charts(ebpf_module_t *em) { ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_RATIO_CHART, "Hit is calculating using total cache added without dirties per total added because of red misses.", @@ -541,7 +898,7 @@ static void ebpf_create_memory_charts() NETDATA_EBPF_CHART_TYPE_LINE, 21100, ebpf_create_global_dimension, - cachestat_counter_publish_aggregated, 1); + cachestat_counter_publish_aggregated, 1, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_DIRTY_CHART, "Number of dirty pages added to the page cache.", @@ -550,7 +907,8 @@ static void ebpf_create_memory_charts() NETDATA_EBPF_CHART_TYPE_LINE, 21101, ebpf_create_global_dimension, - &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_DIRTY], 1); + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_DIRTY], 1, + em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_CHART, "Hits are function calls that Netdata counts.", @@ -559,7 +917,8 @@ static void ebpf_create_memory_charts() NETDATA_EBPF_CHART_TYPE_LINE, 21102, ebpf_create_global_dimension, - &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_HIT], 1); + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_HIT], 1, + em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_MISSES_CHART, "Misses are function calls that Netdata counts.", @@ -568,7 +927,8 @@ static void ebpf_create_memory_charts() NETDATA_EBPF_CHART_TYPE_LINE, 21103, ebpf_create_global_dimension, - &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_MISS], 1); + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_MISS], 1, + em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); fflush(stdout); } @@ -579,17 +939,20 @@ static void ebpf_create_memory_charts() * We are not testing the return, because callocz does this and shutdown the software * case it was not possible to allocate. * - * @param length is the length for the vectors used inside the collector. + * @param apps is apps enabled? */ -static void ebpf_cachestat_allocate_global_vectors(size_t length) +static void ebpf_cachestat_allocate_global_vectors(int apps) { - cachestat_pid = callocz((size_t)pid_max, sizeof(netdata_publish_cachestat_t *)); + if (apps) + cachestat_pid = callocz((size_t)pid_max, sizeof(netdata_publish_cachestat_t *)); + cachestat_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_cachestat_pid_t)); - cachestat_hash_values = callocz(length, sizeof(netdata_idx_t)); + cachestat_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); - memset(cachestat_counter_aggregated_data, 0, length * sizeof(netdata_syscall_stat_t)); - memset(cachestat_counter_publish_aggregated, 0, length * sizeof(netdata_publish_syscall_t)); + memset(cachestat_hash_values, 0, NETDATA_CACHESTAT_END * sizeof(netdata_idx_t)); + memset(cachestat_counter_aggregated_data, 0, NETDATA_CACHESTAT_END * sizeof(netdata_syscall_stat_t)); + memset(cachestat_counter_publish_aggregated, 0, NETDATA_CACHESTAT_END * sizeof(netdata_publish_syscall_t)); } /***************************************************************** @@ -613,22 +976,16 @@ void *ebpf_cachestat_thread(void *ptr) ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = cachestat_maps; - fill_ebpf_data(&cachestat_data); - ebpf_update_module(em, &cachestat_config, NETDATA_CACHESTAT_CONFIG_FILE); - ebpf_update_pid_table(&cachestat_maps[0], em); + ebpf_update_pid_table(&cachestat_maps[NETDATA_CACHESTAT_PID_STATS], em); if (!em->enabled) goto endcachestat; pthread_mutex_lock(&lock); - ebpf_cachestat_allocate_global_vectors(NETDATA_CACHESTAT_END); - if (ebpf_update_kernel(&cachestat_data)) { - pthread_mutex_unlock(&lock); - goto endcachestat; - } + ebpf_cachestat_allocate_global_vectors(em->apps_charts); - probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects, cachestat_data.map_fd); + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); if (!probe_links) { pthread_mutex_unlock(&lock); goto endcachestat; @@ -642,7 +999,7 @@ void *ebpf_cachestat_thread(void *ptr) cachestat_counter_dimension_name, cachestat_counter_dimension_name, algorithms, NETDATA_CACHESTAT_END); - ebpf_create_memory_charts(); + ebpf_create_memory_charts(em); pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_cachestat.h b/collectors/ebpf.plugin/ebpf_cachestat.h index 694933e0..7904c811 100644 --- a/collectors/ebpf.plugin/ebpf_cachestat.h +++ b/collectors/ebpf.plugin/ebpf_cachestat.h @@ -3,13 +3,17 @@ #ifndef NETDATA_EBPF_CACHESTAT_H #define NETDATA_EBPF_CACHESTAT_H 1 +// Module name +#define NETDATA_EBPF_MODULE_NAME_CACHESTAT "cachestat" + // charts #define NETDATA_CACHESTAT_HIT_RATIO_CHART "cachestat_ratio" #define NETDATA_CACHESTAT_DIRTY_CHART "cachestat_dirties" #define NETDATA_CACHESTAT_HIT_CHART "cachestat_hits" #define NETDATA_CACHESTAT_MISSES_CHART "cachestat_misses" -#define NETDATA_CACHESTAT_SUBMENU "page cache (eBPF)" +#define NETDATA_CACHESTAT_SUBMENU "page_cache" +#define NETDATA_CACHESTAT_CGROUP_SUBMENU "page cache (eBPF)" #define EBPF_CACHESTAT_DIMENSION_PAGE "pages/s" #define EBPF_CACHESTAT_DIMENSION_HITS "hits/s" @@ -20,6 +24,17 @@ // configuration file #define NETDATA_CACHESTAT_CONFIG_FILE "cachestat.conf" +// Contexts +#define NETDATA_CGROUP_CACHESTAT_HIT_RATIO_CONTEXT "cgroup.cachestat_ratio" +#define NETDATA_CGROUP_CACHESTAT_MODIFIED_CACHE_CONTEXT "cgroup.cachestat_dirties" +#define NETDATA_CGROUP_CACHESTAT_HIT_FILES_CONTEXT "cgroup.cachestat_hits" +#define NETDATA_CGROUP_CACHESTAT_MISS_FILES_CONTEXT "cgroup.cachestat_misses" + +#define NETDATA_SYSTEMD_CACHESTAT_HIT_RATIO_CONTEXT "services.cachestat_ratio" +#define NETDATA_SYSTEMD_CACHESTAT_MODIFIED_CACHE_CONTEXT "services.cachestat_dirties" +#define NETDATA_SYSTEMD_CACHESTAT_HIT_FILE_CONTEXT "services.cachestat_hits" +#define NETDATA_SYSTEMD_CACHESTAT_MISS_FILES_CONTEXT "services.cachestat_misses" + // variables enum cachestat_counters { NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU, @@ -62,4 +77,6 @@ typedef struct netdata_publish_cachestat { extern void *ebpf_cachestat_thread(void *ptr); extern void clean_cachestat_pid_structures(); +extern struct config cachestat_config; + #endif // NETDATA_EBPF_CACHESTAT_H diff --git a/collectors/ebpf.plugin/ebpf_cgroup.c b/collectors/ebpf.plugin/ebpf_cgroup.c new file mode 100644 index 00000000..ecdc46c0 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_cgroup.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include <sys/resource.h> + +#include "ebpf.h" +#include "ebpf_cgroup.h" + +ebpf_cgroup_target_t *ebpf_cgroup_pids = NULL; + +// -------------------------------------------------------------------------------------------------------------------- +// Map shared memory + +/** + * Map Shared Memory locally + * + * Map the shared memory for current process + * + * @param fd file descriptor returned after shm_open was called. + * @param length length of the shared memory + * + * @return It returns a pointer to the region mapped. + */ +static inline void *ebpf_cgroup_map_shm_locally(int fd, size_t length) +{ + void *value; + + value = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (!value) { + error("Cannot map shared memory used between eBPF and cgroup, integration between processes won't happen"); + close(shm_fd_ebpf_cgroup); + shm_fd_ebpf_cgroup = -1; + shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME); + } + + return value; +} + +/** + * Map cgroup shared memory + * + * Map cgroup shared memory from cgroup to plugin + */ +void ebpf_map_cgroup_shared_memory() +{ + static int limit_try = 0; + static time_t next_try = 0; + + if (shm_ebpf_cgroup.header || limit_try > NETDATA_EBPF_CGROUP_MAX_TRIES) + return; + + time_t curr_time = time(NULL); + if (curr_time < next_try) + return; + + limit_try++; + next_try = curr_time + NETDATA_EBPF_CGROUP_NEXT_TRY_SEC; + + shm_fd_ebpf_cgroup = shm_open(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME, O_RDWR, 0660); + if (shm_fd_ebpf_cgroup < 0) { + if (limit_try == NETDATA_EBPF_CGROUP_MAX_TRIES) + error("Shared memory was not initialized, integration between processes won't happen."); + + return; + } + + // Map only header + shm_ebpf_cgroup.header = (netdata_ebpf_cgroup_shm_header_t *) ebpf_cgroup_map_shm_locally(shm_fd_ebpf_cgroup, + sizeof(netdata_ebpf_cgroup_shm_header_t)); + if (!shm_ebpf_cgroup.header) { + limit_try = NETDATA_EBPF_CGROUP_MAX_TRIES + 1; + return; + } + + size_t length = shm_ebpf_cgroup.header->body_length; + + munmap(shm_ebpf_cgroup.header, sizeof(netdata_ebpf_cgroup_shm_header_t)); + + shm_ebpf_cgroup.header = (netdata_ebpf_cgroup_shm_header_t *)ebpf_cgroup_map_shm_locally(shm_fd_ebpf_cgroup, length); + if (!shm_ebpf_cgroup.header) { + limit_try = NETDATA_EBPF_CGROUP_MAX_TRIES + 1; + return; + } + shm_ebpf_cgroup.body = (netdata_ebpf_cgroup_shm_body_t *) ((char *)shm_ebpf_cgroup.header + + sizeof(netdata_ebpf_cgroup_shm_header_t)); + + shm_sem_ebpf_cgroup = sem_open(NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME, O_CREAT, 0660, 1); + + if (shm_sem_ebpf_cgroup == SEM_FAILED) { + error("Cannot create semaphore, integration between eBPF and cgroup won't happen"); + munmap(shm_ebpf_cgroup.header, length); + shm_ebpf_cgroup.header = NULL; + close(shm_fd_ebpf_cgroup); + shm_fd_ebpf_cgroup = -1; + shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME); + } +} + +// -------------------------------------------------------------------------------------------------------------------- +// Close and Cleanup + +/** + * Close shared memory + */ +void ebpf_close_cgroup_shm() +{ + if (shm_sem_ebpf_cgroup != SEM_FAILED) { + sem_close(shm_sem_ebpf_cgroup); + sem_unlink(NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME); + shm_sem_ebpf_cgroup = SEM_FAILED; + } + + if (shm_fd_ebpf_cgroup > 0) { + close(shm_fd_ebpf_cgroup); + shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME); + shm_fd_ebpf_cgroup = -1; + } +} + +/** + * Clean Specific cgroup pid + * + * Clean all PIDs associated with cgroup. + * + * @param pt structure pid on target that will have your PRs removed + */ +static inline void ebpf_clean_specific_cgroup_pids(struct pid_on_target2 *pt) +{ + while (pt) { + struct pid_on_target2 *next_pid = pt->next; + + freez(pt); + pt = next_pid; + } +} + +/** + * Cleanup link list + */ +void ebpf_clean_cgroup_pids() +{ + if (!ebpf_cgroup_pids) + return; + + ebpf_cgroup_target_t *ect = ebpf_cgroup_pids; + while (ect) { + ebpf_cgroup_target_t *next_cgroup = ect->next; + + ebpf_clean_specific_cgroup_pids(ect->pids); + freez(ect); + + ect = next_cgroup; + } + ebpf_cgroup_pids = NULL; +} + +/** + * Remove Cgroup Update Target Update List + * + * Remove from cgroup target and update the link list + */ +static void ebpf_remove_cgroup_target_update_list() +{ + ebpf_cgroup_target_t *next, *ect = ebpf_cgroup_pids; + ebpf_cgroup_target_t *prev = ebpf_cgroup_pids; + while (ect) { + next = ect->next; + if (!ect->updated) { + if (ect == ebpf_cgroup_pids) { + ebpf_cgroup_pids = next; + prev = next; + } else { + prev->next = next; + } + + ebpf_clean_specific_cgroup_pids(ect->pids); + freez(ect); + } else { + prev = ect; + } + + ect = next; + } +} + +// -------------------------------------------------------------------------------------------------------------------- +// Fill variables + +/** + * Set Target Data + * + * Set local variable values according shared memory information. + * + * @param out local output variable. + * @param ptr input from shared memory. + */ +static inline void ebpf_cgroup_set_target_data(ebpf_cgroup_target_t *out, netdata_ebpf_cgroup_shm_body_t *ptr) +{ + out->hash = ptr->hash; + snprintfz(out->name, 255, "%s", ptr->name); + out->systemd = ptr->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE; + out->updated = 1; +} + +/** + * Find or create + * + * Find the structure inside the link list or allocate and link when it is not present. + * + * @param ptr Input from shared memory. + * + * @return It returns a pointer for the structure associated with the input. + */ +static ebpf_cgroup_target_t * ebpf_cgroup_find_or_create(netdata_ebpf_cgroup_shm_body_t *ptr) +{ + ebpf_cgroup_target_t *ect, *prev; + for (ect = ebpf_cgroup_pids, prev = ebpf_cgroup_pids; ect; prev = ect, ect = ect->next) { + if (ect->hash == ptr->hash && !strcmp(ect->name, ptr->name)) { + ect->updated = 1; + return ect; + } + } + + ebpf_cgroup_target_t *new_ect = callocz(1, sizeof(ebpf_cgroup_target_t)); + + ebpf_cgroup_set_target_data(new_ect, ptr); + if (!ebpf_cgroup_pids) { + ebpf_cgroup_pids = new_ect; + } else { + prev->next = new_ect; + } + + return new_ect; +} + +/** + * Update pid link list + * + * Update PIDs list associated with specific cgroup. + * + * @param ect cgroup structure where pids will be stored + * @param path file with PIDs associated to cgroup. + */ +static void ebpf_update_pid_link_list(ebpf_cgroup_target_t *ect, char *path) +{ + procfile *ff = procfile_open(path, " \t:", PROCFILE_FLAG_DEFAULT); + if (!ff) + return; + + ff = procfile_readall(ff); + if (!ff) + return; + + size_t lines = procfile_lines(ff), l; + for (l = 0; l < lines ;l++) { + int pid = (int)str2l(procfile_lineword(ff, l, 0)); + if (pid) { + struct pid_on_target2 *pt, *prev; + for (pt = ect->pids, prev = ect->pids; pt; prev = pt, pt = pt->next) { + if (pt->pid == pid) + break; + } + + if (!pt) { + struct pid_on_target2 *w = callocz(1, sizeof(struct pid_on_target2)); + w->pid = pid; + if (!ect->pids) + ect->pids = w; + else + prev->next = w; + } + } + } + + procfile_close(ff); +} + +/** + * Set remove var + * + * Set variable remove. If this variable is not reset, the structure will be removed from link list. + */ + void ebpf_reset_updated_var() + { + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + ect->updated = 0; + } + } + +/** + * Parse cgroup shared memory + * + * This function is responsible to copy necessary data from shared memory to local memory. + */ +void ebpf_parse_cgroup_shm_data() +{ + if (shm_ebpf_cgroup.header) { + sem_wait(shm_sem_ebpf_cgroup); + int i, end = shm_ebpf_cgroup.header->cgroup_root_count; + + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_remove_cgroup_target_update_list(); + + ebpf_reset_updated_var(); + + for (i = 0; i < end; i++) { + netdata_ebpf_cgroup_shm_body_t *ptr = &shm_ebpf_cgroup.body[i]; + if (ptr->enabled) { + ebpf_cgroup_target_t *ect = ebpf_cgroup_find_or_create(ptr); + ebpf_update_pid_link_list(ect, ptr->path); + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); + + sem_post(shm_sem_ebpf_cgroup); + } +} + +// -------------------------------------------------------------------------------------------------------------------- +// Create charts + +/** + * Create charts on systemd submenu + * + * @param id the chart id + * @param title the value displayed on vertical axis. + * @param units the value displayed on vertical axis. + * @param family Submenu that the chart will be attached on dashboard. + * @param charttype chart type + * @param order the chart order + * @param algorithm the algorithm used by dimension + * @param context add context for chart + * @param module chart module name, this is the eBPF thread. + * @param update_every value to overwrite the update frequency set by the server. + */ +void ebpf_create_charts_on_systemd(char *id, char *title, char *units, char *family, char *charttype, int order, + char *algorithm, char *context, char *module, int update_every) +{ + ebpf_cgroup_target_t *w; + ebpf_write_chart_cmd(NETDATA_SERVICE_FAMILY, id, title, units, family, charttype, context, + order, update_every, module); + + for (w = ebpf_cgroup_pids; w; w = w->next) { + if (unlikely(w->systemd) && unlikely(w->updated)) + fprintf(stdout, "DIMENSION %s '' %s 1 1\n", w->name, algorithm); + } +} diff --git a/collectors/ebpf.plugin/ebpf_cgroup.h b/collectors/ebpf.plugin/ebpf_cgroup.h new file mode 100644 index 00000000..03969194 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_cgroup.h @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_CGROUP_H +#define NETDATA_EBPF_CGROUP_H 1 + +#define NETDATA_EBPF_CGROUP_MAX_TRIES 3 +#define NETDATA_EBPF_CGROUP_NEXT_TRY_SEC 30 + +#include "ebpf.h" +#include "ebpf_apps.h" + +#define NETDATA_SERVICE_FAMILY "services" + +struct pid_on_target2 { + int32_t pid; + int updated; + + netdata_publish_swap_t swap; + netdata_fd_stat_t fd; + netdata_publish_vfs_t vfs; + ebpf_process_stat_t ps; + netdata_dcstat_pid_t dc; + netdata_publish_shm_t shm; + ebpf_bandwidth_t socket; + netdata_cachestat_pid_t cachestat; + + struct pid_on_target2 *next; +}; + +enum ebpf_cgroup_flags { + NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART = 1, + NETDATA_EBPF_CGROUP_HAS_SWAP_CHART = 1<<2, + NETDATA_EBPF_CGROUP_HAS_SOCKET_CHART = 1<<3, + NETDATA_EBPF_CGROUP_HAS_FD_CHART = 1<<4, + NETDATA_EBPF_CGROUP_HAS_VFS_CHART = 1<<5, + NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART = 1<<6, + NETDATA_EBPF_CGROUP_HAS_CACHESTAT_CHART = 1<<7, + NETDATA_EBPF_CGROUP_HAS_DC_CHART = 1<<8, + NETDATA_EBPF_CGROUP_HAS_SHM_CHART = 1<<9 +}; + +typedef struct ebpf_cgroup_target { + char name[256]; // title + uint32_t hash; + uint32_t flags; + uint32_t systemd; + uint32_t updated; + + netdata_publish_swap_t publish_systemd_swap; + netdata_fd_stat_t publish_systemd_fd; + netdata_publish_vfs_t publish_systemd_vfs; + ebpf_process_stat_t publish_systemd_ps; + netdata_publish_dcstat_t publish_dc; + int oomkill; + netdata_publish_shm_t publish_shm; + ebpf_socket_publish_apps_t publish_socket; + netdata_publish_cachestat_t publish_cachestat; + + struct pid_on_target2 *pids; + struct ebpf_cgroup_target *next; +} ebpf_cgroup_target_t; + +extern void ebpf_map_cgroup_shared_memory(); +extern void ebpf_parse_cgroup_shm_data(); +extern void ebpf_close_cgroup_shm(); +extern void ebpf_clean_cgroup_pids(); +extern void ebpf_create_charts_on_systemd(char *id, char *title, char *units, char *family, char *charttype, int order, + char *algorithm, char *context, char *module, int update_every); + +#endif /* NETDATA_EBPF_CGROUP_H */ diff --git a/collectors/ebpf.plugin/ebpf_dcstat.c b/collectors/ebpf.plugin/ebpf_dcstat.c index 01fd9797..7ae82188 100644 --- a/collectors/ebpf.plugin/ebpf_dcstat.c +++ b/collectors/ebpf.plugin/ebpf_dcstat.c @@ -7,16 +7,14 @@ static char *dcstat_counter_dimension_name[NETDATA_DCSTAT_IDX_END] = { "ratio", static netdata_syscall_stat_t dcstat_counter_aggregated_data[NETDATA_DCSTAT_IDX_END]; static netdata_publish_syscall_t dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_END]; -static ebpf_data_t dcstat_data; - netdata_dcstat_pid_t *dcstat_vector = NULL; netdata_publish_dcstat_t **dcstat_pid = NULL; static struct bpf_link **probe_links = NULL; static struct bpf_object *objects = NULL; -static int *map_fd = NULL; static netdata_idx_t dcstat_hash_values[NETDATA_DCSTAT_IDX_END]; +static netdata_idx_t *dcstat_values = NULL; static int read_thread_closed = 1; @@ -30,9 +28,20 @@ struct netdata_static_thread dcstat_threads = {"DCSTAT KERNEL", NULL, NULL, 1, NULL, NULL, NULL}; -static ebpf_local_maps_t dcstat_maps[] = {{.name = "dcstat_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, - .user_input = 0}, - {.name = NULL, .internal_input = 0, .user_input = 0}}; +static ebpf_local_maps_t dcstat_maps[] = {{.name = "dcstat_global", .internal_input = NETDATA_DIRECTORY_CACHE_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "dcstat_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "dcstat_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; static ebpf_specify_name_t dc_optional_name[] = { {.program_name = "netdata_lookup_fast", .function_to_attach = "lookup_fast", @@ -51,7 +60,7 @@ static ebpf_specify_name_t dc_optional_name[] = { {.program_name = "netdata_look * * Update publish values before to write dimension. * - * @param out strcuture that will receive data. + * @param out structure that will receive data. * @param cache_access number of access to directory cache. * @param not_found number of files not found on the file system */ @@ -117,18 +126,21 @@ static void ebpf_dcstat_cleanup(void *ptr) } freez(dcstat_vector); + freez(dcstat_values); ebpf_cleanup_publish_syscall(dcstat_counter_publish_aggregated); ebpf_dcstat_clean_names(); - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); } - bpf_object__close(objects); } /***************************************************************** @@ -146,43 +158,42 @@ static void ebpf_dcstat_cleanup(void *ptr) */ void ebpf_dcstat_create_apps_charts(struct ebpf_module *em, void *ptr) { - UNUSED(em); struct target *root = ptr; ebpf_create_charts_on_apps(NETDATA_DC_HIT_CHART, "Percentage of files listed inside directory cache", EBPF_COMMON_DIMENSION_PERCENTAGE, - NETDATA_APPS_DCSTAT_GROUP, + NETDATA_DIRECTORY_CACHE_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, 20100, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); ebpf_create_charts_on_apps(NETDATA_DC_REFERENCE_CHART, "Count file access.", EBPF_COMMON_DIMENSION_FILES, - NETDATA_APPS_DCSTAT_GROUP, + NETDATA_DIRECTORY_CACHE_SUBMENU, NETDATA_EBPF_CHART_TYPE_STACKED, 20101, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); ebpf_create_charts_on_apps(NETDATA_DC_REQUEST_NOT_CACHE_CHART, "Access to files that were not present inside directory cache.", EBPF_COMMON_DIMENSION_FILES, - NETDATA_APPS_DCSTAT_GROUP, + NETDATA_DIRECTORY_CACHE_SUBMENU, NETDATA_EBPF_CHART_TYPE_STACKED, 20102, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); ebpf_create_charts_on_apps(NETDATA_DC_REQUEST_NOT_FOUND_CHART, "Number of requests for files that were not found on filesystem.", EBPF_COMMON_DIMENSION_FILES, - NETDATA_APPS_DCSTAT_GROUP, + NETDATA_DIRECTORY_CACHE_SUBMENU, NETDATA_EBPF_CHART_TYPE_STACKED, 20103, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); } /***************************************************************** @@ -252,7 +263,7 @@ static void read_apps_table() netdata_dcstat_pid_t *cv = dcstat_vector; uint32_t key; struct pid_stat *pids = root_of_pids; - int fd = map_fd[NETDATA_DCSTAT_PID_STATS]; + int fd = dcstat_maps[NETDATA_DCSTAT_PID_STATS].map_fd; size_t length = sizeof(netdata_dcstat_pid_t)*ebpf_nprocs; while (pids) { key = pids->pid; @@ -274,6 +285,43 @@ static void read_apps_table() } /** + * Update cgroup + * + * Update cgroup data based in + */ +static void ebpf_update_dc_cgroup() +{ + netdata_dcstat_pid_t *cv = dcstat_vector; + int fd = dcstat_maps[NETDATA_DCSTAT_PID_STATS].map_fd; + size_t length = sizeof(netdata_dcstat_pid_t)*ebpf_nprocs; + + ebpf_cgroup_target_t *ect; + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + netdata_dcstat_pid_t *out = &pids->dc; + if (likely(dcstat_pid) && dcstat_pid[pid]) { + netdata_publish_dcstat_t *in = dcstat_pid[pid]; + + memcpy(out, &in->curr, sizeof(netdata_dcstat_pid_t)); + } else { + memset(cv, 0, length); + if (bpf_map_lookup_elem(fd, &pid, cv)) { + continue; + } + + dcstat_apps_accumulator(cv); + + memcpy(out, cv, sizeof(netdata_dcstat_pid_t)); + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** * Read global table * * Read the table with number of calls for all functions @@ -282,12 +330,18 @@ static void read_global_table() { uint32_t idx; netdata_idx_t *val = dcstat_hash_values; - netdata_idx_t stored; - int fd = map_fd[NETDATA_DCSTAT_GLOBAL_STATS]; + netdata_idx_t *stored = dcstat_values; + int fd = dcstat_maps[NETDATA_DCSTAT_GLOBAL_STATS].map_fd; for (idx = NETDATA_KEY_DC_REFERENCE; idx < NETDATA_DIRECTORY_CACHE_END; idx++) { - if (!bpf_map_lookup_elem(fd, &idx, &stored)) { - val[idx] = stored; + if (!bpf_map_lookup_elem(fd, &idx, stored)) { + int i; + int end = ebpf_nprocs; + netdata_idx_t total = 0; + for (i = 0; i < end; i++) + total += stored[i]; + + val[idx] = total; } } } @@ -311,7 +365,7 @@ void *ebpf_dcstat_read_hash(void *ptr) ebpf_module_t *em = (ebpf_module_t *)ptr; - usec_t step = NETDATA_LATENCY_DCSTAT_SLEEP_MS * em->update_time; + usec_t step = NETDATA_LATENCY_DCSTAT_SLEEP_MS * em->update_every; while (!close_ebpf_plugin) { usec_t dt = heartbeat_next(&hb, step); (void)dt; @@ -350,7 +404,7 @@ void ebpf_dcstat_sum_pids(netdata_publish_dcstat_t *publish, struct pid_on_targe } /** - * Send data to Netdata calling auxiliar functions. + * Send data to Netdata calling auxiliary functions. * * @param root the target list. */ @@ -457,6 +511,324 @@ static void dcstat_send_global(netdata_publish_dcstat_t *publish) } /** + * Create specific directory cache charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_specific_dc_charts(char *type, int update_every) +{ + ebpf_create_chart(type, NETDATA_DC_HIT_CHART, "Percentage of files listed inside directory cache.", + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_CGROUP_DC_HIT_RATIO_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5700, + ebpf_create_global_dimension, + dcstat_counter_publish_aggregated, 1, update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + + ebpf_create_chart(type, NETDATA_DC_REFERENCE_CHART, "Count file access.", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_CGROUP_DC_REFERENCE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5701, + ebpf_create_global_dimension, + &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE], 1, + update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + + ebpf_create_chart(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "Access to files that were not present inside directory cache.", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_CGROUP_DC_NOT_CACHE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5702, + ebpf_create_global_dimension, + &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_SLOW], 1, + update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + + ebpf_create_chart(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "Number of requests for files that were not found on filesystem.", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_CGROUP_DC_NOT_FOUND_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5703, + ebpf_create_global_dimension, + &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_MISS], 1, + update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); +} + +/** + * Obsolete specific directory cache charts + * + * Obsolete charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_specific_dc_charts(char *type, int update_every) +{ + ebpf_write_chart_obsolete(type, NETDATA_DC_HIT_CHART, + "Percentage of files listed inside directory cache.", + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_HIT_RATIO_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5700, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_DC_REFERENCE_CHART, + "Count file access.", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_REFERENCE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5701, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "Access to files that were not present inside directory cache.", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_NOT_CACHE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5702, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "Number of requests for files that were not found on filesystem.", + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_NOT_FOUND_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5703, update_every); +} + +/** + * Cachestat sum PIDs + * + * Sum values for all PIDs associated to a group + * + * @param publish output structure. + * @param root structure with listed IPs + */ +void ebpf_dc_sum_cgroup_pids(netdata_publish_dcstat_t *publish, struct pid_on_target2 *root) +{ + memset(&publish->curr, 0, sizeof(netdata_dcstat_pid_t)); + netdata_dcstat_pid_t *dst = &publish->curr; + while (root) { + netdata_dcstat_pid_t *src = &root->dc; + + dst->cache_access += src->cache_access; + dst->file_system += src->file_system; + dst->not_found += src->not_found; + + root = root->next; + } +} + +/** + * Calc chart values + * + * Do necessary math to plot charts. + */ +void ebpf_dc_calc_chart_values() +{ + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_dc_sum_cgroup_pids(&ect->publish_dc, ect->pids); + uint64_t cache = ect->publish_dc.curr.cache_access; + uint64_t not_found = ect->publish_dc.curr.not_found; + + dcstat_update_publish(&ect->publish_dc, cache, not_found); + + ect->publish_dc.cache_access = (long long)ect->publish_dc.curr.cache_access - + (long long)ect->publish_dc.prev.cache_access; + ect->publish_dc.prev.cache_access = ect->publish_dc.curr.cache_access; + + if (ect->publish_dc.curr.not_found < ect->publish_dc.prev.not_found) { + ect->publish_dc.prev.not_found = 0; + } + } +} + +/** + * Create Systemd directory cache Charts + * + * Create charts when systemd is enabled + * + * @param update_every value to overwrite the update frequency set by the server. + **/ +static void ebpf_create_systemd_dc_charts(int update_every) +{ + ebpf_create_charts_on_systemd(NETDATA_DC_HIT_CHART, + "Percentage of files listed inside directory cache.", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + 21200, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_DC_HIT_RATIO_CONTEXT, NETDATA_EBPF_MODULE_NAME_DCSTAT, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_DC_REFERENCE_CHART, + "Count file access.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + 21201, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_DC_REFERENCE_CONTEXT, NETDATA_EBPF_MODULE_NAME_DCSTAT, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "Access to files that were not present inside directory cache.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + 21202, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_DC_NOT_CACHE_CONTEXT, NETDATA_EBPF_MODULE_NAME_DCSTAT, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "Number of requests for files that were not found on filesystem.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + 21202, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + NETDATA_SYSTEMD_DC_NOT_FOUND_CONTEXT, NETDATA_EBPF_MODULE_NAME_DCSTAT, + update_every); +} + +/** + * Send Directory Cache charts + * + * Send collected data to Netdata. + * + * @return It returns the status for chart creation, if it is necessary to remove a specific dimension, zero is returned + * otherwise function returns 1 to avoid chart recreation + */ +static int ebpf_send_systemd_dc_charts() +{ + int ret = 1; + collected_number value; + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_HIT_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long) ect->publish_dc.ratio); + } else + ret = 0; + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REFERENCE_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long) ect->publish_dc.cache_access); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_CACHE_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + value = (collected_number) (!ect->publish_dc.cache_access) ? 0 : + (long long )ect->publish_dc.curr.file_system - (long long)ect->publish_dc.prev.file_system; + ect->publish_dc.prev.file_system = ect->publish_dc.curr.file_system; + + write_chart_dimension(ect->name, (long long) value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_FOUND_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + value = (collected_number) (!ect->publish_dc.cache_access) ? 0 : + (long long)ect->publish_dc.curr.not_found - (long long)ect->publish_dc.prev.not_found; + + ect->publish_dc.prev.not_found = ect->publish_dc.curr.not_found; + + write_chart_dimension(ect->name, (long long) value); + } + } + write_end_chart(); + + return ret; +} + +/** + * Send Directory Cache charts + * + * Send collected data to Netdata. + * + */ +static void ebpf_send_specific_dc_data(char *type, netdata_publish_dcstat_t *pdc) +{ + collected_number value; + write_begin_chart(type, NETDATA_DC_HIT_CHART); + write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_RATIO].name, + (long long) pdc->ratio); + write_end_chart(); + + write_begin_chart(type, NETDATA_DC_REFERENCE_CHART); + write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE].name, + (long long) pdc->cache_access); + write_end_chart(); + + value = (collected_number) (!pdc->cache_access) ? 0 : + (long long )pdc->curr.file_system - (long long)pdc->prev.file_system; + pdc->prev.file_system = pdc->curr.file_system; + + write_begin_chart(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART); + write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_SLOW].name, (long long) value); + write_end_chart(); + + value = (collected_number) (!pdc->cache_access) ? 0 : + (long long)pdc->curr.not_found - (long long)pdc->prev.not_found; + pdc->prev.not_found = pdc->curr.not_found; + + write_begin_chart(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART); + write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_MISS].name, (long long) value); + write_end_chart(); +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param update_every value to overwrite the update frequency set by the server. +*/ +void ebpf_dc_send_cgroup_data(int update_every) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + ebpf_dc_calc_chart_values(); + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + static int systemd_charts = 0; + if (!systemd_charts) { + ebpf_create_systemd_dc_charts(update_every); + systemd_charts = 1; + } + + systemd_charts = ebpf_send_systemd_dc_charts(); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_DC_CHART) && ect->updated) { + ebpf_create_specific_dc_charts(ect->name, update_every); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_DC_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_DC_CHART) { + if (ect->updated) { + ebpf_send_specific_dc_data(ect->name, &ect->publish_dc); + } else { + ebpf_obsolete_specific_dc_charts(ect->name, update_every); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_DC_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** * Main loop for this collector. */ static void dcstat_collector(ebpf_module_t *em) @@ -464,29 +836,40 @@ static void dcstat_collector(ebpf_module_t *em) dcstat_threads.thread = mallocz(sizeof(netdata_thread_t)); dcstat_threads.start_routine = ebpf_dcstat_read_hash; - map_fd = dcstat_data.map_fd; - netdata_thread_create(dcstat_threads.thread, dcstat_threads.name, NETDATA_THREAD_OPTION_JOINABLE, ebpf_dcstat_read_hash, em); netdata_publish_dcstat_t publish; memset(&publish, 0, sizeof(publish)); int apps = em->apps_charts; + int cgroups = em->cgroup_charts; + int update_every = em->update_every; + int counter = update_every - 1; while (!close_ebpf_plugin) { pthread_mutex_lock(&collect_data_mutex); pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - if (apps) - read_apps_table(); + if (++counter == update_every) { + counter = 0; + if (apps) + read_apps_table(); - pthread_mutex_lock(&lock); + if (cgroups) + ebpf_update_dc_cgroup(); - dcstat_send_global(&publish); + pthread_mutex_lock(&lock); - if (apps) - ebpf_dcache_send_apps_data(apps_groups_root_target); + dcstat_send_global(&publish); + + if (apps) + ebpf_dcache_send_apps_data(apps_groups_root_target); + + if (cgroups) + ebpf_dc_send_cgroup_data(update_every); + + pthread_mutex_unlock(&lock); + } - pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); } } @@ -501,26 +884,29 @@ static void dcstat_collector(ebpf_module_t *em) * Create filesystem charts * * Call ebpf_create_chart to create the charts for the collector. + * + * @param update_every value to overwrite the update frequency set by the server. */ -static void ebpf_create_filesystem_charts() +static void ebpf_create_filesystem_charts(int update_every) { ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, NETDATA_DC_HIT_CHART, "Percentage of files listed inside directory cache", - EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_FILESYSTEM_SUBMENU, + EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU, NULL, NETDATA_EBPF_CHART_TYPE_LINE, 21200, ebpf_create_global_dimension, - dcstat_counter_publish_aggregated, 1); + dcstat_counter_publish_aggregated, 1, update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, NETDATA_DC_REFERENCE_CHART, "Variables used to calculate hit ratio.", - EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_FILESYSTEM_SUBMENU, + EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, NULL, NETDATA_EBPF_CHART_TYPE_LINE, 21201, ebpf_create_global_dimension, - &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE], 3); + &dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE], 3, + update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); fflush(stdout); } @@ -531,15 +917,18 @@ static void ebpf_create_filesystem_charts() * We are not testing the return, because callocz does this and shutdown the software * case it was not possible to allocate. * - * @param length is the length for the vectors used inside the collector. + * @param apps is apps enabled? */ -static void ebpf_dcstat_allocate_global_vectors(size_t length) +static void ebpf_dcstat_allocate_global_vectors(int apps) { - dcstat_pid = callocz((size_t)pid_max, sizeof(netdata_publish_dcstat_t *)); + if (apps) + dcstat_pid = callocz((size_t)pid_max, sizeof(netdata_publish_dcstat_t *)); + dcstat_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_dcstat_pid_t)); + dcstat_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); - memset(dcstat_counter_aggregated_data, 0, length*sizeof(netdata_syscall_stat_t)); - memset(dcstat_counter_publish_aggregated, 0, length*sizeof(netdata_publish_syscall_t)); + memset(dcstat_counter_aggregated_data, 0, NETDATA_DCSTAT_IDX_END * sizeof(netdata_syscall_stat_t)); + memset(dcstat_counter_publish_aggregated, 0, NETDATA_DCSTAT_IDX_END * sizeof(netdata_publish_syscall_t)); } /***************************************************************** @@ -563,21 +952,19 @@ void *ebpf_dcstat_thread(void *ptr) ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = dcstat_maps; - fill_ebpf_data(&dcstat_data); - ebpf_update_module(em, &dcstat_config, NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE); - ebpf_update_pid_table(&dcstat_maps[0], em); + ebpf_update_pid_table(&dcstat_maps[NETDATA_DCSTAT_PID_STATS], em); ebpf_update_names(dc_optional_name, em); if (!em->enabled) goto enddcstat; - ebpf_dcstat_allocate_global_vectors(NETDATA_DCSTAT_IDX_END); + ebpf_dcstat_allocate_global_vectors(em->apps_charts); pthread_mutex_lock(&lock); - probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects, dcstat_data.map_fd); + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); if (!probe_links) { pthread_mutex_unlock(&lock); goto enddcstat; @@ -592,7 +979,7 @@ void *ebpf_dcstat_thread(void *ptr) dcstat_counter_dimension_name, dcstat_counter_dimension_name, algorithms, NETDATA_DCSTAT_IDX_END); - ebpf_create_filesystem_charts(); + ebpf_create_filesystem_charts(em->update_every); pthread_mutex_unlock(&lock); dcstat_collector(em); diff --git a/collectors/ebpf.plugin/ebpf_dcstat.h b/collectors/ebpf.plugin/ebpf_dcstat.h index ad4bd199..c5e6e2bc 100644 --- a/collectors/ebpf.plugin/ebpf_dcstat.h +++ b/collectors/ebpf.plugin/ebpf_dcstat.h @@ -3,6 +3,8 @@ #ifndef NETDATA_EBPF_DCSTAT_H #define NETDATA_EBPF_DCSTAT_H 1 +// Module name +#define NETDATA_EBPF_MODULE_NAME_DCSTAT "dcstat" // charts #define NETDATA_DC_HIT_CHART "dc_hit_ratio" @@ -11,11 +13,21 @@ #define NETDATA_DC_REQUEST_NOT_FOUND_CHART "dc_not_found" #define NETDATA_DIRECTORY_CACHE_SUBMENU "directory cache (eBPF)" -#define NETDATA_DIRECTORY_FILESYSTEM_SUBMENU "Directory Cache (eBPF)" // configuration file #define NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE "dcstat.conf" +// Contexts +#define NETDATA_CGROUP_DC_HIT_RATIO_CONTEXT "cgroup.dc_ratio" +#define NETDATA_CGROUP_DC_REFERENCE_CONTEXT "cgroup.dc_reference" +#define NETDATA_CGROUP_DC_NOT_CACHE_CONTEXT "cgroup.dc_not_cache" +#define NETDATA_CGROUP_DC_NOT_FOUND_CONTEXT "cgroup.dc_not_found" + +#define NETDATA_SYSTEMD_DC_HIT_RATIO_CONTEXT "services.dc_ratio" +#define NETDATA_SYSTEMD_DC_REFERENCE_CONTEXT "services.dc_reference" +#define NETDATA_SYSTEMD_DC_NOT_CACHE_CONTEXT "services.dc_not_cache" +#define NETDATA_SYSTEMD_DC_NOT_FOUND_CONTEXT "services.dc_not_found" + #define NETDATA_LATENCY_DCSTAT_SLEEP_MS 700000ULL enum directory_cache_indexes { @@ -60,5 +72,6 @@ typedef struct netdata_publish_dcstat { extern void *ebpf_dcstat_thread(void *ptr); extern void ebpf_dcstat_create_apps_charts(struct ebpf_module *em, void *ptr); extern void clean_dcstat_pid_structures(); +extern struct config dcstat_config; #endif // NETDATA_EBPF_DCSTAT_H diff --git a/collectors/ebpf.plugin/ebpf_disk.c b/collectors/ebpf.plugin/ebpf_disk.c new file mode 100644 index 00000000..6e139ec9 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_disk.c @@ -0,0 +1,842 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include <sys/resource.h> +#include <stdlib.h> + +#include "ebpf.h" +#include "ebpf_disk.h" + +struct config disk_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static ebpf_local_maps_t disk_maps[] = {{.name = "tbl_disk_iocall", .internal_input = NETDATA_DISK_HISTOGRAM_LENGTH, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; +static avl_tree_lock disk_tree; +netdata_ebpf_disks_t *disk_list = NULL; + +char *tracepoint_block_type = { "block"} ; +char *tracepoint_block_issue = { "block_rq_issue" }; +char *tracepoint_block_rq_complete = { "block_rq_complete" }; + +static struct bpf_link **probe_links = NULL; +static struct bpf_object *objects = NULL; + +static int was_block_issue_enabled = 0; +static int was_block_rq_complete_enabled = 0; + +static char **dimensions = NULL; +static netdata_syscall_stat_t disk_aggregated_data[NETDATA_EBPF_HIST_MAX_BINS]; +static netdata_publish_syscall_t disk_publish_aggregated[NETDATA_EBPF_HIST_MAX_BINS]; + +static int read_thread_closed = 1; + +static netdata_idx_t *disk_hash_values = NULL; +static struct netdata_static_thread disk_threads = {"DISK KERNEL", + NULL, NULL, 1, NULL, + NULL, NULL }; + +ebpf_publish_disk_t *plot_disks = NULL; +pthread_mutex_t plot_mutex; + +/***************************************************************** + * + * FUNCTIONS TO MANIPULATE HARD DISKS + * + *****************************************************************/ + +/** + * Parse start + * + * Parse start address of disk + * + * @param w structure where data is stored + * @param filename variable used to store value + * + * @return It returns 0 on success and -1 otherwise + */ +static inline int ebpf_disk_parse_start(netdata_ebpf_disks_t *w, char *filename) +{ + char content[FILENAME_MAX + 1]; + int fd = open(filename, O_RDONLY, 0); + if (fd < 0) { + return -1; + } + + ssize_t file_length = read(fd, content, 4095); + if (file_length > 0) { + if (file_length > FILENAME_MAX) + file_length = FILENAME_MAX; + + content[file_length] = '\0'; + w->start = strtoul(content, NULL, 10); + } + close(fd); + + return 0; +} + +/** + * Parse uevent + * + * Parse uevent file + * + * @param w structure where data is stored + * @param filename variable used to store value + * + * @return It returns 0 on success and -1 otherwise + */ +static inline int ebpf_parse_uevent(netdata_ebpf_disks_t *w, char *filename) +{ + char content[FILENAME_MAX + 1]; + int fd = open(filename, O_RDONLY, 0); + if (fd < 0) { + return -1; + } + + ssize_t file_length = read(fd, content, FILENAME_MAX); + if (file_length > 0) { + if (file_length > FILENAME_MAX) + file_length = FILENAME_MAX; + + content[file_length] = '\0'; + + char *s = strstr(content, "PARTNAME=EFI"); + if (s) { + w->main->boot_partition = w; + w->flags |= NETDATA_DISK_HAS_EFI; + w->boot_chart = strdupz("disk_bootsector"); + } + } + close(fd); + + return 0; +} + +/** + * Parse Size + * + * @param w structure where data is stored + * @param filename variable used to store value + * + * @return It returns 0 on success and -1 otherwise + */ +static inline int ebpf_parse_size(netdata_ebpf_disks_t *w, char *filename) +{ + char content[FILENAME_MAX + 1]; + int fd = open(filename, O_RDONLY, 0); + if (fd < 0) { + return -1; + } + + ssize_t file_length = read(fd, content, FILENAME_MAX); + if (file_length > 0) { + if (file_length > FILENAME_MAX) + file_length = FILENAME_MAX; + + content[file_length] = '\0'; + w->end = w->start + strtoul(content, NULL, 10) -1; + } + close(fd); + + return 0; +} + +/** + * Read Disk information + * + * Read disk information from /sys/block + * + * @param w structure where data is stored + * @param name disk name + */ +static void ebpf_read_disk_info(netdata_ebpf_disks_t *w, char *name) +{ + static netdata_ebpf_disks_t *main_disk = NULL; + static uint32_t key = 0; + char *path = { "/sys/block" }; + char disk[NETDATA_DISK_NAME_LEN + 1]; + char filename[FILENAME_MAX + 1]; + snprintfz(disk, NETDATA_DISK_NAME_LEN, "%s", name); + size_t length = strlen(disk); + if (!length) { + return; + } + + length--; + size_t curr = length; + while (isdigit((int)disk[length])) { + disk[length--] = '\0'; + } + + // We are looking for partition information, if it is a device we will ignore it. + if (curr == length) { + main_disk = w; + key = MKDEV(w->major, w->minor); + w->bootsector_key = key; + return; + } + w->bootsector_key = key; + w->main = main_disk; + + snprintfz(filename, FILENAME_MAX, "%s/%s/%s/uevent", path, disk, name); + if (ebpf_parse_uevent(w, filename)) + return; + + snprintfz(filename, FILENAME_MAX, "%s/%s/%s/start", path, disk, name); + if (ebpf_disk_parse_start(w, filename)) + return; + + snprintfz(filename, FILENAME_MAX, "%s/%s/%s/size", path, disk, name); + ebpf_parse_size(w, filename); +} + +/** + * New encode dev + * + * New encode algorithm extracted from https://elixir.bootlin.com/linux/v5.10.8/source/include/linux/kdev_t.h#L39 + * + * @param major driver major number + * @param minor driver minor number + * + * @return + */ +static inline uint32_t netdata_new_encode_dev(uint32_t major, uint32_t minor) { + return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12); +} + +/** + * Compare disks + * + * Compare major and minor values to add disks to tree. + * + * @param a pointer to netdata_ebpf_disks + * @param b pointer to netdata_ebpf_disks + * + * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b. +*/ +static int ebpf_compare_disks(void *a, void *b) +{ + netdata_ebpf_disks_t *ptr1 = a; + netdata_ebpf_disks_t *ptr2 = b; + + if (ptr1->dev > ptr2->dev) + return 1; + if (ptr1->dev < ptr2->dev) + return -1; + + return 0; +} + +/** + * Update listen table + * + * Update link list when it is necessary. + * + * @param name disk name + * @param major major disk identifier + * @param minor minor disk identifier + * @param current_time current timestamp + */ +static void update_disk_table(char *name, int major, int minor, time_t current_time) +{ + netdata_ebpf_disks_t find; + netdata_ebpf_disks_t *w; + size_t length; + + uint32_t dev = netdata_new_encode_dev(major, minor); + find.dev = dev; + netdata_ebpf_disks_t *ret = (netdata_ebpf_disks_t *) avl_search_lock(&disk_tree, (avl_t *)&find); + if (ret) { // Disk is already present + ret->flags |= NETDATA_DISK_IS_HERE; + ret->last_update = current_time; + return; + } + + netdata_ebpf_disks_t *update_next = disk_list; + if (likely(disk_list)) { + netdata_ebpf_disks_t *move = disk_list; + while (move) { + if (dev == move->dev) + return; + + update_next = move; + move = move->next; + } + + w = callocz(1, sizeof(netdata_ebpf_disks_t)); + length = strlen(name); + if (length >= NETDATA_DISK_NAME_LEN) + length = NETDATA_DISK_NAME_LEN; + + memcpy(w->family, name, length); + w->family[length] = '\0'; + w->major = major; + w->minor = minor; + w->dev = netdata_new_encode_dev(major, minor); + update_next->next = w; + } else { + disk_list = callocz(1, sizeof(netdata_ebpf_disks_t)); + length = strlen(name); + if (length >= NETDATA_DISK_NAME_LEN) + length = NETDATA_DISK_NAME_LEN; + + memcpy(disk_list->family, name, length); + disk_list->family[length] = '\0'; + disk_list->major = major; + disk_list->minor = minor; + disk_list->dev = netdata_new_encode_dev(major, minor); + + w = disk_list; + } + + ebpf_read_disk_info(w, name); + + netdata_ebpf_disks_t *check; + check = (netdata_ebpf_disks_t *) avl_insert_lock(&disk_tree, (avl_t *)w); + if (check != w) + error("Internal error, cannot insert the AVL tree."); + +#ifdef NETDATA_INTERNAL_CHECKS + info("The Latency is monitoring the hard disk %s (Major = %d, Minor = %d, Device = %u)", name, major, minor,w->dev); +#endif + + w->flags |= NETDATA_DISK_IS_HERE; +} + +/** + * Read Local Disks + * + * Parse /proc/partitions to get block disks used to measure latency. + * + * @return It returns 0 on success and -1 otherwise + */ +static int read_local_disks() +{ + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, NETDATA_EBPF_PROC_PARTITIONS); + procfile *ff = procfile_open(filename, " \t:", PROCFILE_FLAG_DEFAULT); + if (!ff) + return -1; + + ff = procfile_readall(ff); + if (!ff) + return -1; + + size_t lines = procfile_lines(ff), l; + time_t current_time = now_realtime_sec(); + for(l = 2; l < lines ;l++) { + size_t words = procfile_linewords(ff, l); + // This is header or end of file + if (unlikely(words < 4)) + continue; + + int major = (int)strtol(procfile_lineword(ff, l, 0), NULL, 10); + // The main goal of this thread is to measure block devices, so any block device with major number + // smaller than 7 according /proc/devices is not "important". + if (major > 7) { + int minor = (int)strtol(procfile_lineword(ff, l, 1), NULL, 10); + update_disk_table(procfile_lineword(ff, l, 3), major, minor, current_time); + } + } + + procfile_close(ff); + + return 0; +} + +/** + * Update disks + * + * @param em main thread structure + */ +void ebpf_update_disks(ebpf_module_t *em) +{ + static time_t update_every = 0; + time_t curr = now_realtime_sec(); + if (curr < update_every) + return; + + update_every = curr + 5 * em->update_every; + + (void)read_local_disks(); +} + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Disk disable tracepoints + * + * Disable tracepoints when the plugin was responsible to enable it. + */ +static void ebpf_disk_disable_tracepoints() +{ + char *default_message = { "Cannot disable the tracepoint" }; + if (!was_block_issue_enabled) { + if (ebpf_disable_tracing_values(tracepoint_block_type, tracepoint_block_issue)) + error("%s %s/%s.", default_message, tracepoint_block_type, tracepoint_block_issue); + } + + if (!was_block_rq_complete_enabled) { + if (ebpf_disable_tracing_values(tracepoint_block_type, tracepoint_block_rq_complete)) + error("%s %s/%s.", default_message, tracepoint_block_type, tracepoint_block_rq_complete); + } +} + +/** + * Cleanup plot disks + * + * Clean disk list + */ +static void ebpf_cleanup_plot_disks() +{ + ebpf_publish_disk_t *move = plot_disks, *next; + while (move) { + next = move->next; + + freez(move); + + move = next; + } +} + +/** + * Cleanup Disk List + */ +static void ebpf_cleanup_disk_list() +{ + netdata_ebpf_disks_t *move = disk_list; + while (move) { + netdata_ebpf_disks_t *next = move->next; + + freez(move->histogram.name); + freez(move->boot_chart); + freez(move); + + move = next; + } +} + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_disk_cleanup(void *ptr) +{ + ebpf_disk_disable_tracepoints(); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) + return; + + heartbeat_t hb; + heartbeat_init(&hb); + uint32_t tick = 2 * USEC_PER_MS; + while (!read_thread_closed) { + usec_t dt = heartbeat_next(&hb, tick); + UNUSED(dt); + } + + if (dimensions) + ebpf_histogram_dimension_cleanup(dimensions, NETDATA_EBPF_HIST_MAX_BINS); + + freez(disk_hash_values); + freez(disk_threads.thread); + pthread_mutex_destroy(&plot_mutex); + + ebpf_cleanup_plot_disks(); + ebpf_cleanup_disk_list(); + + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); + } +} + +/***************************************************************** + * + * MAIN LOOP + * + *****************************************************************/ + +/** + * Fill Plot list + * + * @param ptr a pointer for current disk + */ +static void ebpf_fill_plot_disks(netdata_ebpf_disks_t *ptr) +{ + pthread_mutex_lock(&plot_mutex); + ebpf_publish_disk_t *w; + if (likely(plot_disks)) { + ebpf_publish_disk_t *move = plot_disks, *store = plot_disks; + while (move) { + if (move->plot == ptr) { + pthread_mutex_unlock(&plot_mutex); + return; + } + + store = move; + move = move->next; + } + + w = callocz(1, sizeof(ebpf_publish_disk_t)); + w->plot = ptr; + store->next = w; + } else { + plot_disks = callocz(1, sizeof(ebpf_publish_disk_t)); + plot_disks->plot = ptr; + } + pthread_mutex_unlock(&plot_mutex); + + ptr->flags |= NETDATA_DISK_ADDED_TO_PLOT_LIST; +} + +/** + * Read hard disk table + * + * @param table file descriptor for table + * + * Read the table with number of calls for all functions + */ +static void read_hard_disk_tables(int table) +{ + netdata_idx_t *values = disk_hash_values; + block_key_t key = {}; + block_key_t next_key = {}; + + netdata_ebpf_disks_t *ret = NULL; + + while (bpf_map_get_next_key(table, &key, &next_key) == 0) { + int test = bpf_map_lookup_elem(table, &key, values); + if (test < 0) { + key = next_key; + continue; + } + + netdata_ebpf_disks_t find; + find.dev = key.dev; + + if (likely(ret)) { + if (find.dev != ret->dev) + ret = (netdata_ebpf_disks_t *)avl_search_lock(&disk_tree, (avl_t *)&find); + } else + ret = (netdata_ebpf_disks_t *)avl_search_lock(&disk_tree, (avl_t *)&find); + + // Disk was inserted after we parse /proc/partitions + if (!ret) { + if (read_local_disks()) { + key = next_key; + continue; + } + + ret = (netdata_ebpf_disks_t *)avl_search_lock(&disk_tree, (avl_t *)&find); + if (!ret) { + // We should never reach this point, but we are adding it to keep a safe code + key = next_key; + continue; + } + } + + uint64_t total = 0; + int i; + int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + for (i = 0; i < end; i++) { + total += values[i]; + } + + ret->histogram.histogram[key.bin] = total; + + if (!(ret->flags & NETDATA_DISK_ADDED_TO_PLOT_LIST)) + ebpf_fill_plot_disks(ret); + + key = next_key; + } +} + +/** + * Disk read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data on very busy socket. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_disk_read_hash(void *ptr) +{ + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_LATENCY_DISK_SLEEP_MS * em->update_every; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + (void)dt; + + read_hard_disk_tables(disk_maps[NETDATA_DISK_READ].map_fd); + } + + return NULL; +} + +/** + * Obsolete Hard Disk charts + * + * Make Hard disk charts and fill chart name + * + * @param w the structure with necessary information to create the chart + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_hd_charts(netdata_ebpf_disks_t *w, int update_every) +{ + ebpf_write_chart_obsolete(w->histogram.name, w->family, w->histogram.title, EBPF_COMMON_DIMENSION_CALL, + w->family, NETDATA_EBPF_CHART_TYPE_STACKED, "disk.latency_io", + w->histogram.order, update_every); + + w->flags = 0; +} + +/** + * Create Hard Disk charts + * + * Make Hard disk charts and fill chart name + * + * @param w the structure with necessary information to create the chart + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_hd_charts(netdata_ebpf_disks_t *w, int update_every) +{ + int order = NETDATA_CHART_PRIO_DISK_LATENCY; + char *family = w->family; + + w->histogram.name = strdupz("disk_latency_io"); + w->histogram.title = NULL; + w->histogram.order = order; + + ebpf_create_chart(w->histogram.name, family, "Disk latency", EBPF_COMMON_DIMENSION_CALL, + family, "disk.latency_io", NETDATA_EBPF_CHART_TYPE_STACKED, order, + ebpf_create_global_dimension, disk_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, + update_every, NETDATA_EBPF_MODULE_NAME_DISK); + order++; + + w->flags |= NETDATA_DISK_CHART_CREATED; +} + +/** + * Remove pointer from plot + * + * Remove pointer from plot list when the disk is not present. + */ +static void ebpf_remove_pointer_from_plot_disk(ebpf_module_t *em) +{ + time_t current_time = now_realtime_sec(); + time_t limit = 10 * em->update_every; + pthread_mutex_lock(&plot_mutex); + ebpf_publish_disk_t *move = plot_disks, *prev = plot_disks; + int update_every = em->update_every; + while (move) { + netdata_ebpf_disks_t *ned = move->plot; + uint32_t flags = ned->flags; + + if (!(flags & NETDATA_DISK_IS_HERE) && ((current_time - ned->last_update) > limit)) { + ebpf_obsolete_hd_charts(ned, update_every); + avl_t *ret = (avl_t *)avl_remove_lock(&disk_tree, (avl_t *)ned); + UNUSED(ret); + if (move == plot_disks) { + freez(move); + plot_disks = NULL; + break; + } else { + prev->next = move->next; + ebpf_publish_disk_t *clean = move; + move = move->next; + freez(clean); + continue; + } + } + + prev = move; + move = move->next; + } + pthread_mutex_unlock(&plot_mutex); +} + +/** + * Send Hard disk data + * + * Send hard disk information to Netdata. + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_latency_send_hd_data(int update_every) +{ + pthread_mutex_lock(&plot_mutex); + if (!plot_disks) { + pthread_mutex_unlock(&plot_mutex); + return; + } + + ebpf_publish_disk_t *move = plot_disks; + while (move) { + netdata_ebpf_disks_t *ned = move->plot; + uint32_t flags = ned->flags; + if (!(flags & NETDATA_DISK_CHART_CREATED)) { + ebpf_create_hd_charts(ned, update_every); + } + + if ((flags & NETDATA_DISK_CHART_CREATED)) { + write_histogram_chart(ned->histogram.name, ned->family, + ned->histogram.histogram, dimensions, NETDATA_EBPF_HIST_MAX_BINS); + } + + ned->flags &= ~NETDATA_DISK_IS_HERE; + + move = move->next; + } + pthread_mutex_unlock(&plot_mutex); +} + +/** +* Main loop for this collector. +*/ +static void disk_collector(ebpf_module_t *em) +{ + disk_hash_values = callocz(ebpf_nprocs, sizeof(netdata_idx_t)); + disk_threads.thread = mallocz(sizeof(netdata_thread_t)); + disk_threads.start_routine = ebpf_disk_read_hash; + + netdata_thread_create(disk_threads.thread, disk_threads.name, NETDATA_THREAD_OPTION_JOINABLE, + ebpf_disk_read_hash, em); + + int update_every = em->update_every; + int counter = update_every - 1; + read_thread_closed = 0; + while (!close_ebpf_plugin) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + + if (++counter == update_every) { + counter = 0; + pthread_mutex_lock(&lock); + ebpf_remove_pointer_from_plot_disk(em); + ebpf_latency_send_hd_data(update_every); + + pthread_mutex_unlock(&lock); + } + + pthread_mutex_unlock(&collect_data_mutex); + + ebpf_update_disks(em); + } + read_thread_closed = 1; +} + +/***************************************************************** + * + * EBPF DISK THREAD + * + *****************************************************************/ + +/** + * Enable tracepoints + * + * Enable necessary tracepoints for thread. + * + * @return It returns 0 on success and -1 otherwise + */ +static int ebpf_disk_enable_tracepoints() +{ + int test = ebpf_is_tracepoint_enabled(tracepoint_block_type, tracepoint_block_issue); + if (test == -1) + return -1; + else if (!test) { + if (ebpf_enable_tracing_values(tracepoint_block_type, tracepoint_block_issue)) + return -1; + } + was_block_issue_enabled = test; + + test = ebpf_is_tracepoint_enabled(tracepoint_block_type, tracepoint_block_rq_complete); + if (test == -1) + return -1; + else if (!test) { + if (ebpf_enable_tracing_values(tracepoint_block_type, tracepoint_block_rq_complete)) + return -1; + } + was_block_rq_complete_enabled = test; + + return 0; +} + +/** + * Disk thread + * + * Thread used to generate disk charts. + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always return NULL + */ +void *ebpf_disk_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_disk_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = disk_maps; + + if (!em->enabled) + goto enddisk; + + if (ebpf_disk_enable_tracepoints()) { + em->enabled = CONFIG_BOOLEAN_NO; + goto enddisk; + } + + avl_init_lock(&disk_tree, ebpf_compare_disks); + if (read_local_disks()) { + em->enabled = CONFIG_BOOLEAN_NO; + goto enddisk; + } + + if (pthread_mutex_init(&plot_mutex, NULL)) { + error("Cannot initialize local mutex"); + goto enddisk; + } + + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); + if (!probe_links) { + goto enddisk; + } + + int algorithms[NETDATA_EBPF_HIST_MAX_BINS]; + ebpf_fill_algorithms(algorithms, NETDATA_EBPF_HIST_MAX_BINS, NETDATA_EBPF_INCREMENTAL_IDX); + dimensions = ebpf_fill_histogram_dimension(NETDATA_EBPF_HIST_MAX_BINS); + + ebpf_global_labels(disk_aggregated_data, disk_publish_aggregated, dimensions, dimensions, algorithms, + NETDATA_EBPF_HIST_MAX_BINS); + + disk_collector(em); + +enddisk: + netdata_thread_cleanup_pop(1); + + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_disk.h b/collectors/ebpf.plugin/ebpf_disk.h new file mode 100644 index 00000000..8e58174b --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_disk.h @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_DISK_H +#define NETDATA_EBPF_DISK_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_DISK "disk" + +#include "libnetdata/avl/avl.h" +#include "libnetdata/ebpf/ebpf.h" + +#define NETDATA_EBPF_PROC_PARTITIONS "/proc/partitions" + +#define NETDATA_LATENCY_DISK_SLEEP_MS 650000ULL + +// Process configuration name +#define NETDATA_DISK_CONFIG_FILE "disk.conf" + +// Decode function extracted from: https://elixir.bootlin.com/linux/v5.10.8/source/include/linux/kdev_t.h#L7 +#define MINORBITS 20 +#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) + +enum netdata_latency_disks_flags { + NETDATA_DISK_ADDED_TO_PLOT_LIST = 1, + NETDATA_DISK_CHART_CREATED = 2, + NETDATA_DISK_IS_HERE = 4, + NETDATA_DISK_HAS_EFI = 8 +}; + +/* + * The definition (DISK_NAME_LEN) has been a stable value since Kernel 3.0, + * I decided to bring it as internal definition, to avoid include linux/genhd.h. + */ +#define NETDATA_DISK_NAME_LEN 32 +typedef struct netdata_ebpf_disks { + // Search + avl_t avl; + uint32_t dev; + uint32_t major; + uint32_t minor; + uint32_t bootsector_key; + uint64_t start; // start sector + uint64_t end; // end sector + + // Print information + char family[NETDATA_DISK_NAME_LEN + 1]; + char *boot_chart; + + netdata_ebpf_histogram_t histogram; + + uint32_t flags; + time_t last_update; + + struct netdata_ebpf_disks *main; + struct netdata_ebpf_disks *boot_partition; + struct netdata_ebpf_disks *next; +} netdata_ebpf_disks_t; + +enum ebpf_disk_tables { + NETDATA_DISK_READ +}; + +typedef struct block_key { + uint32_t bin; + uint32_t dev; +} block_key_t; + +typedef struct netdata_ebpf_publish_disk { + netdata_ebpf_disks_t *plot; + struct netdata_ebpf_publish_disk *next; +} ebpf_publish_disk_t; + +extern struct config disk_config; + +extern void *ebpf_disk_thread(void *ptr); + +#endif /* NETDATA_EBPF_DISK_H */ + diff --git a/collectors/ebpf.plugin/ebpf_fd.c b/collectors/ebpf.plugin/ebpf_fd.c new file mode 100644 index 00000000..6eecf584 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_fd.c @@ -0,0 +1,865 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_fd.h" + +static char *fd_dimension_names[NETDATA_FD_SYSCALL_END] = { "open", "close" }; +static char *fd_id_names[NETDATA_FD_SYSCALL_END] = { "do_sys_open", "__close_fd" }; + +static netdata_syscall_stat_t fd_aggregated_data[NETDATA_FD_SYSCALL_END]; +static netdata_publish_syscall_t fd_publish_aggregated[NETDATA_FD_SYSCALL_END]; + +static ebpf_local_maps_t fd_maps[] = {{.name = "tbl_fd_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_fd_global", .internal_input = NETDATA_KEY_END_VECTOR, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "fd_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + + +struct config fd_config = { .first_section = NULL, .last_section = NULL, .mutex = NETDATA_MUTEX_INITIALIZER, + .index = {.avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static struct bpf_link **probe_links = NULL; +static struct bpf_object *objects = NULL; + +struct netdata_static_thread fd_thread = {"FD KERNEL", NULL, NULL, 1, NULL, + NULL, NULL}; +static int read_thread_closed = 1; +static netdata_idx_t fd_hash_values[NETDATA_FD_COUNTER]; +static netdata_idx_t *fd_values = NULL; + +netdata_fd_stat_t *fd_vector = NULL; +netdata_fd_stat_t **fd_pid = NULL; + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Clean PID structures + * + * Clean the allocated structures. + */ +void clean_fd_pid_structures() { + struct pid_stat *pids = root_of_pids; + while (pids) { + freez(fd_pid[pids->pid]); + + pids = pids->next; + } +} + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_fd_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) + return; + + heartbeat_t hb; + heartbeat_init(&hb); + uint32_t tick = 2 * USEC_PER_MS; + while (!read_thread_closed) { + usec_t dt = heartbeat_next(&hb, tick); + UNUSED(dt); + } + + ebpf_cleanup_publish_syscall(fd_publish_aggregated); + freez(fd_thread.thread); + freez(fd_values); + freez(fd_vector); + + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); + } +} + +/***************************************************************** + * + * MAIN LOOP + * + *****************************************************************/ + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the structure with thread information + */ +static void ebpf_fd_send_data(ebpf_module_t *em) +{ + fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].ncall = fd_hash_values[NETDATA_KEY_CALLS_DO_SYS_OPEN]; + fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].nerr = fd_hash_values[NETDATA_KEY_ERROR_DO_SYS_OPEN]; + + fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].ncall = fd_hash_values[NETDATA_KEY_CALLS_CLOSE_FD]; + fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].nerr = fd_hash_values[NETDATA_KEY_ERROR_CLOSE_FD]; + + write_count_chart(NETDATA_FILE_OPEN_CLOSE_COUNT, NETDATA_FILESYSTEM_FAMILY, fd_publish_aggregated, + NETDATA_FD_SYSCALL_END); + + if (em->mode < MODE_ENTRY) { + write_err_chart(NETDATA_FILE_OPEN_ERR_COUNT, NETDATA_FILESYSTEM_FAMILY, + fd_publish_aggregated, NETDATA_FD_SYSCALL_END); + } +} + +/** + * Read global counter + * + * Read the table with number of calls for all functions + */ +static void read_global_table() +{ + uint32_t idx; + netdata_idx_t *val = fd_hash_values; + netdata_idx_t *stored = fd_values; + int fd = fd_maps[NETDATA_FD_GLOBAL_STATS].map_fd; + + for (idx = NETDATA_KEY_CALLS_DO_SYS_OPEN; idx < NETDATA_FD_COUNTER; idx++) { + if (!bpf_map_lookup_elem(fd, &idx, stored)) { + int i; + int end = ebpf_nprocs; + netdata_idx_t total = 0; + for (i = 0; i < end; i++) + total += stored[i]; + + val[idx] = total; + } + } +} + +/** + * File descriptor read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_fd_read_hash(void *ptr) +{ + read_thread_closed = 0; + + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + usec_t step = NETDATA_FD_SLEEP_MS * em->update_every; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + (void)dt; + + read_global_table(); + } + + read_thread_closed = 1; + return NULL; +} + +/** + * Apps Accumulator + * + * Sum all values read from kernel and store in the first address. + * + * @param out the vector with read values. + */ +static void fd_apps_accumulator(netdata_fd_stat_t *out) +{ + int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + netdata_fd_stat_t *total = &out[0]; + for (i = 1; i < end; i++) { + netdata_fd_stat_t *w = &out[i]; + total->open_call += w->open_call; + total->close_call += w->close_call; + total->open_err += w->open_err; + total->close_err += w->close_err; + } +} + +/** + * Fill PID + * + * Fill PID structures + * + * @param current_pid pid that we are collecting data + * @param out values read from hash tables; + */ +static void fd_fill_pid(uint32_t current_pid, netdata_fd_stat_t *publish) +{ + netdata_fd_stat_t *curr = fd_pid[current_pid]; + if (!curr) { + curr = callocz(1, sizeof(netdata_fd_stat_t)); + fd_pid[current_pid] = curr; + } + + memcpy(curr, &publish[0], sizeof(netdata_fd_stat_t)); +} + +/** + * Read APPS table + * + * Read the apps table and store data inside the structure. + */ +static void read_apps_table() +{ + netdata_fd_stat_t *fv = fd_vector; + uint32_t key; + struct pid_stat *pids = root_of_pids; + int fd = fd_maps[NETDATA_FD_PID_STATS].map_fd; + size_t length = sizeof(netdata_fd_stat_t) * ebpf_nprocs; + while (pids) { + key = pids->pid; + + if (bpf_map_lookup_elem(fd, &key, fv)) { + pids = pids->next; + continue; + } + + fd_apps_accumulator(fv); + + fd_fill_pid(key, fv); + + // We are cleaning to avoid passing data read from one process to other. + memset(fv, 0, length); + + pids = pids->next; + } +} + +/** + * Update cgroup + * + * Update cgroup data based in + */ +static void ebpf_update_fd_cgroup() +{ + ebpf_cgroup_target_t *ect ; + netdata_fd_stat_t *fv = fd_vector; + int fd = fd_maps[NETDATA_FD_PID_STATS].map_fd; + size_t length = sizeof(netdata_fd_stat_t) * ebpf_nprocs; + + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + netdata_fd_stat_t *out = &pids->fd; + if (likely(fd_pid) && fd_pid[pid]) { + netdata_fd_stat_t *in = fd_pid[pid]; + + memcpy(out, in, sizeof(netdata_fd_stat_t)); + } else { + memset(fv, 0, length); + if (!bpf_map_lookup_elem(fd, &pid, fv)) { + fd_apps_accumulator(fv); + + memcpy(out, fv, sizeof(netdata_fd_stat_t)); + } + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param fd the output + * @param root list of pids + */ +static void ebpf_fd_sum_pids(netdata_fd_stat_t *fd, struct pid_on_target *root) +{ + uint32_t open_call = 0; + uint32_t close_call = 0; + uint32_t open_err = 0; + uint32_t close_err = 0; + + while (root) { + int32_t pid = root->pid; + netdata_fd_stat_t *w = fd_pid[pid]; + if (w) { + open_call += w->open_call; + close_call += w->close_call; + open_err += w->open_err; + close_err += w->close_err; + } + + root = root->next; + } + + // These conditions were added, because we are using incremental algorithm + fd->open_call = (open_call >= fd->open_call) ? open_call : fd->open_call; + fd->close_call = (close_call >= fd->close_call) ? close_call : fd->close_call; + fd->open_err = (open_err >= fd->open_err) ? open_err : fd->open_err; + fd->close_err = (close_err >= fd->close_err) ? close_err : fd->close_err; +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the structure with thread information + * @param root the target list. +*/ +void ebpf_fd_send_apps_data(ebpf_module_t *em, struct target *root) +{ + struct target *w; + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + ebpf_fd_sum_pids(&w->fd, w->root_pid); + } + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->fd.open_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->fd.open_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSED); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->fd.close_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->fd.close_err); + } + } + write_end_chart(); + } +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param fd structure used to store data + * @param pids input data + */ +static void ebpf_fd_sum_cgroup_pids(netdata_fd_stat_t *fd, struct pid_on_target2 *pids) +{ + netdata_fd_stat_t accumulator; + memset(&accumulator, 0, sizeof(accumulator)); + + while (pids) { + netdata_fd_stat_t *w = &pids->fd; + + accumulator.open_err += w->open_err; + accumulator.open_call += w->open_call; + accumulator.close_call += w->close_call; + accumulator.close_err += w->close_err; + + pids = pids->next; + } + + fd->open_call = (accumulator.open_call >= fd->open_call) ? accumulator.open_call : fd->open_call; + fd->open_err = (accumulator.open_err >= fd->open_err) ? accumulator.open_err : fd->open_err; + fd->close_call = (accumulator.close_call >= fd->close_call) ? accumulator.close_call : fd->close_call; + fd->close_err = (accumulator.close_err >= fd->close_err) ? accumulator.close_err : fd->close_err; +} + +/** + * Create specific file descriptor charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param em the main thread structure. + */ +static void ebpf_create_specific_fd_charts(char *type, ebpf_module_t *em) +{ + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN, "Number of open files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_CGROUP_FD_OPEN_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5400, + ebpf_create_global_dimension, + &fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "Fails to open files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5401, + ebpf_create_global_dimension, + &fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN], + 1, em->update_every, + NETDATA_EBPF_MODULE_NAME_SWAP); + } + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSED, "Files closed", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_CGROUP_FD_CLOSE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5402, + ebpf_create_global_dimension, + &fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "Fails to close files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5403, + ebpf_create_global_dimension, + &fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE], + 1, em->update_every, + NETDATA_EBPF_MODULE_NAME_SWAP); + } +} + +/** + * Obsolete specific file descriptor charts + * + * Obsolete charts for cgroup/application. + * + * @param type the chart type. + * @param em the main thread structure. + */ +static void ebpf_obsolete_specific_fd_charts(char *type, ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN, "Number of open files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_OPEN_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5400, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "Fails to open files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5401, em->update_every); + } + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSED, "Files closed", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_CLOSE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5402, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "Fails to close files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5403, em->update_every); + } +} + +/* + * Send specific file descriptor data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param values structure with values that will be sent to netdata + */ +static void ebpf_send_specific_fd_data(char *type, netdata_fd_stat_t *values, ebpf_module_t *em) +{ + write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN); + write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].name, (long long)values->open_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR); + write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].name, (long long)values->open_err); + write_end_chart(); + } + + write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSED); + write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].name, (long long)values->close_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR); + write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].name, (long long)values->close_err); + write_end_chart(); + } +} + +/** + * Create systemd file descriptor charts + * + * Create charts when systemd is enabled + * + * @param em the main collector structure + **/ +static void ebpf_create_systemd_fd_charts(ebpf_module_t *em) +{ + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_OPEN, "Number of open files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20061, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_FD_OPEN_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "Fails to open files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20062, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_FD_OPEN_ERR_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + } + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_CLOSED, "Files closed", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20063, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_FD_CLOSE_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "Fails to close files", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20064, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_FD_CLOSE_ERR_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + } +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + * + * @param em the main collector structure + * + * @return It returns the status for chart creation, if it is necessary to remove a specific dimension zero is returned + * otherwise function returns 1 to avoid chart recreation + */ +static int ebpf_send_systemd_fd_charts(ebpf_module_t *em) +{ + int ret = 1; + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_fd.open_call); + } else + ret = 0; + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_fd.open_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSED); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_fd.close_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_fd.close_err); + } + } + write_end_chart(); + } + + return ret; +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the main collector structure +*/ +static void ebpf_fd_send_cgroup_data(ebpf_module_t *em) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_fd_sum_cgroup_pids(&ect->publish_systemd_fd, ect->pids); + } + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + static int systemd_charts = 0; + if (!systemd_charts) { + ebpf_create_systemd_fd_charts(em); + systemd_charts = 1; + } + + systemd_charts = ebpf_send_systemd_fd_charts(em); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_FD_CHART) && ect->updated) { + ebpf_create_specific_fd_charts(ect->name, em); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_FD_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_FD_CHART ) { + if (ect->updated) { + ebpf_send_specific_fd_data(ect->name, &ect->publish_systemd_fd, em); + } else { + ebpf_obsolete_specific_fd_charts(ect->name, em); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_FD_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** +* Main loop for this collector. +*/ +static void fd_collector(ebpf_module_t *em) +{ + fd_thread.thread = mallocz(sizeof(netdata_thread_t)); + fd_thread.start_routine = ebpf_fd_read_hash; + + netdata_thread_create(fd_thread.thread, fd_thread.name, NETDATA_THREAD_OPTION_JOINABLE, + ebpf_fd_read_hash, em); + + int apps = em->apps_charts; + int cgroups = em->cgroup_charts; + int update_every = em->update_every; + int counter = update_every - 1; + while (!close_ebpf_plugin) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + + if (++counter == update_every) { + counter = 0; + if (apps) + read_apps_table(); + + if (cgroups) + ebpf_update_fd_cgroup(); + + pthread_mutex_lock(&lock); + + ebpf_fd_send_data(em); + + if (apps) + ebpf_fd_send_apps_data(em, apps_groups_root_target); + + if (cgroups) + ebpf_fd_send_cgroup_data(em); + + pthread_mutex_unlock(&lock); + } + + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * + * CREATE CHARTS + * + *****************************************************************/ + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_fd_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_OPEN, + "Number of open files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20061, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, + "Fails to open files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20062, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + } + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_CLOSED, + "Files closed", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20063, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, + "Fails to close files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20064, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + } +} + +/** + * Create global charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param em a pointer to the structure with the default values. + */ +static void ebpf_create_fd_global_charts(ebpf_module_t *em) +{ + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_FILE_OPEN_CLOSE_COUNT, + "Open and close calls", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_FILE_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_EBPF_FD_CHARTS, + ebpf_create_global_dimension, + fd_publish_aggregated, + NETDATA_FD_SYSCALL_END, + em->update_every, NETDATA_EBPF_MODULE_NAME_FD); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_FILE_OPEN_ERR_COUNT, + "Open fails", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_FILE_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_EBPF_FD_CHARTS + 1, + ebpf_create_global_dimension, + fd_publish_aggregated, + NETDATA_FD_SYSCALL_END, + em->update_every, NETDATA_EBPF_MODULE_NAME_FD); + } +} + +/***************************************************************** + * + * MAIN THREAD + * + *****************************************************************/ + +/** + * Allocate vectors used with this thread. + * + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param apps is apps enabled? + */ +static void ebpf_fd_allocate_global_vectors(int apps) +{ + if (apps) + fd_pid = callocz((size_t)pid_max, sizeof(netdata_fd_stat_t *)); + + fd_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_fd_stat_t)); + + fd_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); +} + +/** + * Directory Cache thread + * + * Thread used to make dcstat thread + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always returns NULL + */ +void *ebpf_fd_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_fd_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = fd_maps; + + if (!em->enabled) + goto endfd; + + ebpf_fd_allocate_global_vectors(em->apps_charts); + + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); + if (!probe_links) { + goto endfd; + } + + int algorithms[NETDATA_FD_SYSCALL_END] = { + NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX + }; + + ebpf_global_labels(fd_aggregated_data, fd_publish_aggregated, fd_dimension_names, fd_id_names, + algorithms, NETDATA_FD_SYSCALL_END); + + pthread_mutex_lock(&lock); + ebpf_create_fd_global_charts(em); + pthread_mutex_unlock(&lock); + + fd_collector(em); + +endfd: + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_fd.h b/collectors/ebpf.plugin/ebpf_fd.h new file mode 100644 index 00000000..851e040e --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_fd.h @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_FD_H +#define NETDATA_EBPF_FD_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_FD "filedescriptor" + +#define NETDATA_FD_SLEEP_MS 850000ULL + +// Menu group +#define NETDATA_FILE_GROUP "file_access" + +// Global chart name +#define NETDATA_FILE_OPEN_CLOSE_COUNT "file_descriptor" +#define NETDATA_FILE_OPEN_ERR_COUNT "file_error" + +// Charts created on Apps submenu +#define NETDATA_SYSCALL_APPS_FILE_OPEN "file_open" +#define NETDATA_SYSCALL_APPS_FILE_CLOSED "file_closed" +#define NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR "file_open_error" +#define NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR "file_close_error" + +// Process configuration name +#define NETDATA_FD_CONFIG_FILE "fd.conf" + +// Contexts +#define NETDATA_CGROUP_FD_OPEN_CONTEXT "cgroup.fd_open" +#define NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT "cgroup.fd_open_error" +#define NETDATA_CGROUP_FD_CLOSE_CONTEXT "cgroup.fd_close" +#define NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT "cgroup.fd_close_error" + +#define NETDATA_SYSTEMD_FD_OPEN_CONTEXT "services.fd_open" +#define NETDATA_SYSTEMD_FD_OPEN_ERR_CONTEXT "services.fd_open_error" +#define NETDATA_SYSTEMD_FD_CLOSE_CONTEXT "services.fd_close" +#define NETDATA_SYSTEMD_FD_CLOSE_ERR_CONTEXT "services.fd_close_error" + +typedef struct netdata_fd_stat { + uint64_t pid_tgid; // Unique identifier + uint32_t pid; // Process ID + + uint32_t open_call; // Open syscalls (open and openat) + uint32_t close_call; // Close syscall (close) + + // Errors + uint32_t open_err; + uint32_t close_err; +} netdata_fd_stat_t; + +enum fd_tables { + NETDATA_FD_PID_STATS, + NETDATA_FD_GLOBAL_STATS, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_FD_CONTROLLER +}; + +enum fd_counters { + NETDATA_KEY_CALLS_DO_SYS_OPEN, + NETDATA_KEY_ERROR_DO_SYS_OPEN, + + NETDATA_KEY_CALLS_CLOSE_FD, + NETDATA_KEY_ERROR_CLOSE_FD, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_FD_COUNTER +}; + +enum fd_syscalls { + NETDATA_FD_SYSCALL_OPEN, + NETDATA_FD_SYSCALL_CLOSE, + + // Do not insert nothing after this value + NETDATA_FD_SYSCALL_END +}; + + +extern void *ebpf_fd_thread(void *ptr); +extern void ebpf_fd_create_apps_charts(struct ebpf_module *em, void *ptr); +extern struct config fd_config; +extern netdata_fd_stat_t **fd_pid; +extern void clean_fd_pid_structures(); + +#endif /* NETDATA_EBPF_FD_H */ + diff --git a/collectors/ebpf.plugin/ebpf_filesystem.c b/collectors/ebpf.plugin/ebpf_filesystem.c new file mode 100644 index 00000000..ad2c9eff --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_filesystem.c @@ -0,0 +1,661 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf_filesystem.h" + +struct config fs_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static ebpf_local_maps_t fs_maps[] = {{.name = "tbl_ext4", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_xfs", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_nfs", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_zfs", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_btrfs", .internal_input = NETDATA_KEY_CALLS_SYNC, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_ext_addr", .internal_input = 1, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + +ebpf_filesystem_partitions_t localfs[] = + {{.filesystem = "ext4", + .optional_filesystem = NULL, + .family = "ext4", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}}, + {.filesystem = "xfs", + .optional_filesystem = NULL, + .family = "xfs", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}}, + {.filesystem = "nfs", + .optional_filesystem = "nfs4", + .family = "nfs", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_ATTR_CHARTS, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}}, + {.filesystem = "zfs", + .optional_filesystem = NULL, + .family = "zfs", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}}, + {.filesystem = "btrfs", + .optional_filesystem = NULL, + .family = "btrfs", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FILL_ADDRESS_TABLE, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = "btrfs_file_operations", .addr = 0}}, + {.filesystem = NULL, + .optional_filesystem = NULL, + .family = NULL, + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}}}; + +struct netdata_static_thread filesystem_threads = {"EBPF FS READ", + NULL, NULL, 1, NULL, + NULL, NULL }; + +static int read_thread_closed = 1; +static netdata_syscall_stat_t filesystem_aggregated_data[NETDATA_EBPF_HIST_MAX_BINS]; +static netdata_publish_syscall_t filesystem_publish_aggregated[NETDATA_EBPF_HIST_MAX_BINS]; + +char **dimensions = NULL; +static netdata_idx_t *filesystem_hash_values = NULL; + +/***************************************************************** + * + * COMMON FUNCTIONS + * + *****************************************************************/ + +/** + * Create Filesystem chart + * + * Create latency charts + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_fs_charts(int update_every) +{ + int i; + uint32_t test = NETDATA_FILESYSTEM_FLAG_CHART_CREATED | NETDATA_FILESYSTEM_REMOVE_CHARTS; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + uint32_t flags = efp->flags; + if ((flags & test) == test) { + flags &= ~NETDATA_FILESYSTEM_FLAG_CHART_CREATED; + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hread.name, + efp->hread.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hread.order, update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hwrite.name, + efp->hwrite.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hwrite.order, update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, efp->hopen.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hopen.order, update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name, efp->hadditional.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hadditional.order, + update_every); + } + efp->flags = flags; + } +} + +/** + * Create Filesystem chart + * + * Create latency charts + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_fs_charts(int update_every) +{ + static int order = NETDATA_CHART_PRIO_EBPF_FILESYSTEM_CHARTS; + char chart_name[64], title[256], family[64]; + int i; + uint32_t test = NETDATA_FILESYSTEM_FLAG_CHART_CREATED|NETDATA_FILESYSTEM_REMOVE_CHARTS; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + uint32_t flags = efp->flags; + if (flags & NETDATA_FILESYSTEM_FLAG_HAS_PARTITION && !(flags & test)) { + snprintfz(title, 255, "%s latency for each read request.", efp->filesystem); + snprintfz(family, 63, "%s_latency", efp->family); + snprintfz(chart_name, 63, "%s_read_latency", efp->filesystem); + efp->hread.name = strdupz(chart_name); + efp->hread.title = strdupz(title); + efp->hread.order = order; + efp->family_name = strdupz(family); + + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hread.name, + title, + EBPF_COMMON_DIMENSION_CALL, family, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, + update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); + order++; + + snprintfz(title, 255, "%s latency for each write request.", efp->filesystem); + snprintfz(chart_name, 63, "%s_write_latency", efp->filesystem); + efp->hwrite.name = strdupz(chart_name); + efp->hwrite.title = strdupz(title); + efp->hwrite.order = order; + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hwrite.name, + title, + EBPF_COMMON_DIMENSION_CALL, family, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, + update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); + order++; + + snprintfz(title, 255, "%s latency for each open request.", efp->filesystem); + snprintfz(chart_name, 63, "%s_open_latency", efp->filesystem); + efp->hopen.name = strdupz(chart_name); + efp->hopen.title = strdupz(title); + efp->hopen.order = order; + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, + title, + EBPF_COMMON_DIMENSION_CALL, family, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, + update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); + order++; + + char *type = (efp->flags & NETDATA_FILESYSTEM_ATTR_CHARTS) ? "attribute" : "sync"; + snprintfz(title, 255, "%s latency for each %s request.", efp->filesystem, type); + snprintfz(chart_name, 63, "%s_%s_latency", efp->filesystem, type); + efp->hadditional.name = strdupz(chart_name); + efp->hadditional.title = strdupz(title); + efp->hadditional.order = order; + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name, title, + EBPF_COMMON_DIMENSION_CALL, family, + NULL, NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, + update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); + order++; + efp->flags |= NETDATA_FILESYSTEM_FLAG_CHART_CREATED; + } + } +} + +/** + * Initialize eBPF data + * + * @param em main thread structure. + * + * @return it returns 0 on success and -1 otherwise. + */ +int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em) +{ + int i; + const char *saved_name = em->thread_name; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + if (!efp->probe_links && efp->flags & NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM) { + em->thread_name = efp->filesystem; + efp->probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &efp->objects); + if (!efp->probe_links) { + em->thread_name = saved_name; + return -1; + } + efp->flags |= NETDATA_FILESYSTEM_FLAG_HAS_PARTITION; + + // Nedeed for filesystems like btrfs + if ((efp->flags & NETDATA_FILESYSTEM_FILL_ADDRESS_TABLE) && (efp->addresses.function)) { + ebpf_load_addresses(&efp->addresses, fs_maps[i + 1].map_fd); + } + } + efp->flags &= ~NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM; + } + em->thread_name = saved_name; + + if (!dimensions) { + dimensions = ebpf_fill_histogram_dimension(NETDATA_EBPF_HIST_MAX_BINS); + + memset(filesystem_aggregated_data, 0 , NETDATA_EBPF_HIST_MAX_BINS * sizeof(netdata_syscall_stat_t)); + memset(filesystem_publish_aggregated, 0 , NETDATA_EBPF_HIST_MAX_BINS * sizeof(netdata_publish_syscall_t)); + + filesystem_hash_values = callocz(ebpf_nprocs, sizeof(netdata_idx_t)); + } + + return 0; +} + +/** + * Read Local partitions + * + * @return the total of partitions that will be monitored + */ +static int ebpf_read_local_partitions() +{ + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/self/mountinfo", netdata_configured_host_prefix); + procfile *ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + snprintfz(filename, FILENAME_MAX, "%s/proc/1/mountinfo", netdata_configured_host_prefix); + ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 0; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return 0; + + int count = 0; + unsigned long l, i, lines = procfile_lines(ff); + for (i = 0; localfs[i].filesystem; i++) { + localfs[i].flags |= NETDATA_FILESYSTEM_REMOVE_CHARTS; + } + + for(l = 0; l < lines ; l++) { + // In "normal" situation the expected value is at column 7 + // When `shared` options is added to mount information, the filesystem is at column 8 + // Finally when we have systemd starting netdata, it will be at column 9 + unsigned long index = procfile_linewords(ff, l) - 3; + + char *fs = procfile_lineword(ff, l, index); + + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *w = &localfs[i]; + if (w->enabled && (!strcmp(fs, w->filesystem) || + (w->optional_filesystem && !strcmp(fs, w->optional_filesystem)))) { + localfs[i].flags |= NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM; + localfs[i].flags &= ~NETDATA_FILESYSTEM_REMOVE_CHARTS; + count++; + break; + } + } + } + procfile_close(ff); + + return count; +} + +/** + * Update partition + * + * Update the partition structures before to plot + * + * @param em main thread structure + * + * @return 0 on success and -1 otherwise. + */ +static int ebpf_update_partitions(ebpf_module_t *em) +{ + static time_t update_every = 0; + time_t curr = now_realtime_sec(); + if (curr < update_every) + return 0; + + update_every = curr + 5 * em->update_every; + if (!ebpf_read_local_partitions()) { + em->optional = -1; + return -1; + } + + if (ebpf_filesystem_initialize_ebpf_data(em)) { + return -1; + } + + return 0; +} + +/***************************************************************** + * + * CLEANUP FUNCTIONS + * + *****************************************************************/ + +/* + * Cleanup eBPF data + */ +void ebpf_filesystem_cleanup_ebpf_data() +{ + int i; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + if (efp->probe_links) { + freez(efp->family_name); + + freez(efp->hread.name); + freez(efp->hread.title); + + freez(efp->hwrite.name); + freez(efp->hwrite.title); + + freez(efp->hopen.name); + freez(efp->hopen.title); + + freez(efp->hadditional.name); + freez(efp->hadditional.title); + + struct bpf_link **probe_links = efp->probe_links; + size_t j = 0 ; + struct bpf_program *prog; + bpf_object__for_each_program(prog, efp->objects) { + bpf_link__destroy(probe_links[j]); + j++; + } + bpf_object__close(efp->objects); + } + } +} + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_filesystem_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) + return; + + heartbeat_t hb; + heartbeat_init(&hb); + uint32_t tick = 2*USEC_PER_MS; + while (!read_thread_closed) { + usec_t dt = heartbeat_next(&hb, tick); + UNUSED(dt); + } + + freez(filesystem_threads.thread); + ebpf_cleanup_publish_syscall(filesystem_publish_aggregated); + + ebpf_filesystem_cleanup_ebpf_data(); + if (dimensions) + ebpf_histogram_dimension_cleanup(dimensions, NETDATA_EBPF_HIST_MAX_BINS); + freez(filesystem_hash_values); +} + +/***************************************************************** + * + * MAIN THREAD + * + *****************************************************************/ + +/** + * Select hist + * + * Select a histogram to store data. + * + * @param efp pointer for the structure with pointers. + * @param id histogram selector + * + * @return It returns a pointer for the histogram + */ +static inline netdata_ebpf_histogram_t *select_hist(ebpf_filesystem_partitions_t *efp, uint32_t *idx, uint32_t id) +{ + if (id < NETDATA_KEY_CALLS_READ) { + *idx = id; + return &efp->hread; + } else if (id < NETDATA_KEY_CALLS_WRITE) { + *idx = id - NETDATA_KEY_CALLS_READ; + return &efp->hwrite; + } else if (id < NETDATA_KEY_CALLS_OPEN) { + *idx = id - NETDATA_KEY_CALLS_WRITE; + return &efp->hopen; + } else if (id < NETDATA_KEY_CALLS_SYNC ){ + *idx = id - NETDATA_KEY_CALLS_OPEN; + return &efp->hadditional; + } + + return NULL; +} + +/** + * Read hard disk table + * + * @param table index for the hash table + * + * Read the table with number of calls for all functions + */ +static void read_filesystem_table(ebpf_filesystem_partitions_t *efp, int fd) +{ + netdata_idx_t *values = filesystem_hash_values; + uint32_t key; + uint32_t idx; + for (key = 0; key < NETDATA_KEY_CALLS_SYNC; key++) { + netdata_ebpf_histogram_t *w = select_hist(efp, &idx, key); + if (!w) { + continue; + } + + int test = bpf_map_lookup_elem(fd, &key, values); + if (test < 0) { + continue; + } + + uint64_t total = 0; + int i; + int end = ebpf_nprocs; + for (i = 0; i < end; i++) { + total += values[i]; + } + + if (idx >= NETDATA_EBPF_HIST_MAX_BINS) + idx = NETDATA_EBPF_HIST_MAX_BINS - 1; + w->histogram[idx] = total; + } +} + +/** + * Read hard disk table + * + * @param table index for the hash table + * + * Read the table with number of calls for all functions + */ +static void read_filesystem_tables() +{ + int i; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + if (efp->flags & NETDATA_FILESYSTEM_FLAG_HAS_PARTITION) { + read_filesystem_table(efp, fs_maps[i].map_fd); + } + } +} + +/** + * Socket read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data on very busy socket. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_filesystem_read_hash(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + read_thread_closed = 0; + + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = NETDATA_FILESYSTEM_READ_SLEEP_MS * em->update_every; + int update_every = em->update_every; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + (void)dt; + + (void) ebpf_update_partitions(em); + ebpf_obsolete_fs_charts(update_every); + + // No more partitions, it is not necessary to read tables + if (em->optional) + continue; + + read_filesystem_tables(); + } + + read_thread_closed = 1; + return NULL; +} + +/** + * Send Hard disk data + * + * Send hard disk information to Netdata. + */ +static void ebpf_histogram_send_data() +{ + uint32_t i; + uint32_t test = NETDATA_FILESYSTEM_FLAG_HAS_PARTITION | NETDATA_FILESYSTEM_REMOVE_CHARTS; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + if ((efp->flags & test) == NETDATA_FILESYSTEM_FLAG_HAS_PARTITION) { + write_histogram_chart(NETDATA_FILESYSTEM_FAMILY, efp->hread.name, + efp->hread.histogram, dimensions, NETDATA_EBPF_HIST_MAX_BINS); + + write_histogram_chart(NETDATA_FILESYSTEM_FAMILY, efp->hwrite.name, + efp->hwrite.histogram, dimensions, NETDATA_EBPF_HIST_MAX_BINS); + + write_histogram_chart(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, + efp->hopen.histogram, dimensions, NETDATA_EBPF_HIST_MAX_BINS); + + write_histogram_chart(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name, + efp->hadditional.histogram, dimensions, NETDATA_EBPF_HIST_MAX_BINS); + } + } +} + +/** + * Main loop for this collector. + * + * @param em main structure for this thread + */ +static void filesystem_collector(ebpf_module_t *em) +{ + filesystem_threads.thread = mallocz(sizeof(netdata_thread_t)); + filesystem_threads.start_routine = ebpf_filesystem_read_hash; + + netdata_thread_create(filesystem_threads.thread, filesystem_threads.name, + NETDATA_THREAD_OPTION_JOINABLE, ebpf_filesystem_read_hash, em); + + int update_every = em->update_every; + int counter = update_every - 1; + while (!close_ebpf_plugin || em->optional) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + + if (++counter == update_every) { + counter = 0; + pthread_mutex_lock(&lock); + + ebpf_create_fs_charts(update_every); + ebpf_histogram_send_data(); + + pthread_mutex_unlock(&lock); + } + + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * + * ENTRY THREAD + * + *****************************************************************/ + +/** + * Update Filesystem + * + * Update file system structure using values read from configuration file. + */ +static void ebpf_update_filesystem() +{ + char dist[NETDATA_FS_MAX_DIST_NAME + 1]; + int i; + for (i = 0; localfs[i].filesystem; i++) { + snprintfz(dist, NETDATA_FS_MAX_DIST_NAME, "%sdist", localfs[i].filesystem); + + localfs[i].enabled = appconfig_get_boolean(&fs_config, NETDATA_FILESYSTEM_CONFIG_NAME, dist, + CONFIG_BOOLEAN_YES); + } +} + +/** + * Filesystem thread + * + * Thread used to generate socket charts. + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always return NULL + */ +void *ebpf_filesystem_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_filesystem_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = fs_maps; + ebpf_update_filesystem(); + + if (!em->enabled) + goto endfilesystem; + + // Initialize optional as zero, to identify when there are not partitions to monitor + em->optional = 0; + + if (ebpf_update_partitions(em)) { + if (em->optional) + info("Netdata cannot monitor the filesystems used on this host."); + + em->enabled = 0; + goto endfilesystem; + } + + int algorithms[NETDATA_EBPF_HIST_MAX_BINS]; + ebpf_fill_algorithms(algorithms, NETDATA_EBPF_HIST_MAX_BINS, NETDATA_EBPF_INCREMENTAL_IDX); + ebpf_global_labels(filesystem_aggregated_data, filesystem_publish_aggregated, dimensions, dimensions, + algorithms, NETDATA_EBPF_HIST_MAX_BINS); + + pthread_mutex_lock(&lock); + ebpf_create_fs_charts(em->update_every); + pthread_mutex_unlock(&lock); + + filesystem_collector(em); + +endfilesystem: + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_filesystem.h b/collectors/ebpf.plugin/ebpf_filesystem.h new file mode 100644 index 00000000..295eec20 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_filesystem.h @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_FILESYSTEM_H +#define NETDATA_EBPF_FILESYSTEM_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_FILESYSTEM "filesystem" + +#include "ebpf.h" + +#define NETDATA_FS_MAX_DIST_NAME 64UL + +#define NETDATA_FILESYSTEM_CONFIG_NAME "filesystem" +#define NETDATA_FILESYSTEM_READ_SLEEP_MS 600000ULL + +// Process configuration name +#define NETDATA_FILESYSTEM_CONFIG_FILE "filesystem.conf" + +typedef struct netdata_fs_hist { + uint32_t hist_id; + uint32_t bin; +} netdata_fs_hist_t; + +enum filesystem_limit { + NETDATA_KEY_CALLS_READ = 24, + NETDATA_KEY_CALLS_WRITE = 48, + NETDATA_KEY_CALLS_OPEN = 72, + NETDATA_KEY_CALLS_SYNC = 96 +}; + +enum netdata_filesystem_flags { + NETDATA_FILESYSTEM_FLAG_NO_PARTITION = 0, + NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM = 1, + NETDATA_FILESYSTEM_FLAG_HAS_PARTITION = 2, + NETDATA_FILESYSTEM_FLAG_CHART_CREATED = 4, + NETDATA_FILESYSTEM_FILL_ADDRESS_TABLE = 8, + NETDATA_FILESYSTEM_REMOVE_CHARTS = 16, + NETDATA_FILESYSTEM_ATTR_CHARTS = 32 +}; + +enum netdata_filesystem_table { + NETDATA_MAIN_FS_TABLE, + NETDATA_ADDR_FS_TABLE +}; + +typedef struct ebpf_filesystem_partitions { + char *filesystem; + char *optional_filesystem; + char *family; + char *family_name; + struct bpf_object *objects; + struct bpf_link **probe_links; + + netdata_ebpf_histogram_t hread; + netdata_ebpf_histogram_t hwrite; + netdata_ebpf_histogram_t hopen; + netdata_ebpf_histogram_t hadditional; + + uint32_t flags; + uint32_t enabled; + + ebpf_addresses_t addresses; +} ebpf_filesystem_partitions_t; + +extern void *ebpf_filesystem_thread(void *ptr); +extern struct config fs_config; + +#endif /* NETDATA_EBPF_FILESYSTEM_H */ diff --git a/collectors/ebpf.plugin/ebpf_hardirq.c b/collectors/ebpf.plugin/ebpf_hardirq.c new file mode 100644 index 00000000..ff649e9c --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_hardirq.c @@ -0,0 +1,494 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_hardirq.h" + +struct config hardirq_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +#define HARDIRQ_MAP_LATENCY 0 +#define HARDIRQ_MAP_LATENCY_STATIC 1 +static ebpf_local_maps_t hardirq_maps[] = { + { + .name = "tbl_hardirq", + .internal_input = NETDATA_HARDIRQ_MAX_IRQS, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + }, + { + .name = "tbl_hardirq_static", + .internal_input = HARDIRQ_EBPF_STATIC_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + }, + /* end */ + { + .name = NULL, + .internal_input = 0, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + } +}; + +#define HARDIRQ_TP_CLASS_IRQ "irq" +#define HARDIRQ_TP_CLASS_IRQ_VECTORS "irq_vectors" +static ebpf_tracepoint_t hardirq_tracepoints[] = { + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ, .event = "irq_handler_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ, .event = "irq_handler_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "thermal_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "thermal_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "threshold_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "threshold_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "error_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "error_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "deferred_error_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "deferred_error_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "spurious_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "spurious_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "call_function_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "call_function_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "call_function_single_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "call_function_single_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "reschedule_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "reschedule_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "local_timer_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "local_timer_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "irq_work_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "irq_work_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "x86_platform_ipi_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "x86_platform_ipi_exit"}, + /* end */ + {.enabled = false, .class = NULL, .event = NULL} +}; + +static hardirq_static_val_t hardirq_static_vals[] = { + { + .idx = HARDIRQ_EBPF_STATIC_APIC_THERMAL, + .name = "apic_thermal", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_APIC_THRESHOLD, + .name = "apic_threshold", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_APIC_ERROR, + .name = "apic_error", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_APIC_DEFERRED_ERROR, + .name = "apic_deferred_error", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_APIC_SPURIOUS, + .name = "apic_spurious", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_FUNC_CALL, + .name = "func_call", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_FUNC_CALL_SINGLE, + .name = "func_call_single", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_RESCHEDULE, + .name = "reschedule", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_LOCAL_TIMER, + .name = "local_timer", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_IRQ_WORK, + .name = "irq_work", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_X86_PLATFORM_IPI, + .name = "x86_platform_ipi", + .latency = 0 + }, +}; + +static struct bpf_link **probe_links = NULL; +static struct bpf_object *objects = NULL; + +static int read_thread_closed = 1; + +// store for "published" data from the reader thread, which the collector +// thread will write to netdata agent. +static avl_tree_lock hardirq_pub; + +// tmp store for dynamic hard IRQ values we get from a per-CPU eBPF map. +static hardirq_ebpf_val_t *hardirq_ebpf_vals = NULL; + +// tmp store for static hard IRQ values we get from a per-CPU eBPF map. +static hardirq_ebpf_static_val_t *hardirq_ebpf_static_vals = NULL; + +static struct netdata_static_thread hardirq_threads = {"HARDIRQ KERNEL", + NULL, NULL, 1, NULL, + NULL, NULL }; + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void hardirq_cleanup(void *ptr) +{ + for (int i = 0; hardirq_tracepoints[i].class != NULL; i++) { + ebpf_disable_tracepoint(&hardirq_tracepoints[i]); + } + + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + return; + } + + heartbeat_t hb; + heartbeat_init(&hb); + uint32_t tick = 1 * USEC_PER_MS; + while (!read_thread_closed) { + usec_t dt = heartbeat_next(&hb, tick); + UNUSED(dt); + } + + freez(hardirq_ebpf_vals); + freez(hardirq_ebpf_static_vals); + freez(hardirq_threads.thread); + + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); + } +} + +/***************************************************************** + * MAIN LOOP + *****************************************************************/ + +/** + * Compare hard IRQ values. + * + * @param a `hardirq_val_t *`. + * @param b `hardirq_val_t *`. + * + * @return 0 if a==b, 1 if a>b, -1 if a<b. +*/ +static int hardirq_val_cmp(void *a, void *b) +{ + hardirq_val_t *ptr1 = a; + hardirq_val_t *ptr2 = b; + + if (ptr1->irq > ptr2->irq) { + return 1; + } + else if (ptr1->irq < ptr2->irq) { + return -1; + } + else { + return 0; + } +} + +static void hardirq_read_latency_map(int mapfd) +{ + hardirq_ebpf_key_t key = {}; + hardirq_ebpf_key_t next_key = {}; + hardirq_val_t search_v = {}; + hardirq_val_t *v = NULL; + + while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) { + // get val for this key. + int test = bpf_map_lookup_elem(mapfd, &key, hardirq_ebpf_vals); + if (unlikely(test < 0)) { + key = next_key; + continue; + } + + // is this IRQ saved yet? + // + // if not, make a new one, mark it as unsaved for now, and continue; we + // will insert it at the end after all of its values are correctly set, + // so that we can safely publish it to the collector within a single, + // short locked operation. + // + // otherwise simply continue; we will only update the latency, which + // can be republished safely without a lock. + // + // NOTE: lock isn't strictly necessary for this initial search, as only + // this thread does writing, but the AVL is using a read-write lock so + // there is no congestion. + bool v_is_new = false; + search_v.irq = key.irq; + v = (hardirq_val_t *)avl_search_lock(&hardirq_pub, (avl_t *)&search_v); + if (unlikely(v == NULL)) { + // latency/name can only be added reliably at a later time. + // when they're added, only then will we AVL insert. + v = callocz(1, sizeof(hardirq_val_t)); + v->irq = key.irq; + v->dim_exists = false; + + v_is_new = true; + } + + // note two things: + // 1. we must add up latency value for this IRQ across all CPUs. + // 2. the name is unfortunately *not* available on all CPU maps - only + // a single map contains the name, so we must find it. we only need + // to copy it though if the IRQ is new for us. + bool name_saved = false; + uint64_t total_latency = 0; + int i; + int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + for (i = 0; i < end; i++) { + total_latency += hardirq_ebpf_vals[i].latency/1000; + + // copy name for new IRQs. + if (v_is_new && !name_saved && hardirq_ebpf_vals[i].name[0] != '\0') { + strncpyz( + v->name, + hardirq_ebpf_vals[i].name, + NETDATA_HARDIRQ_NAME_LEN + ); + name_saved = true; + } + } + + // can now safely publish latency for existing IRQs. + v->latency = total_latency; + + // can now safely publish new IRQ. + if (v_is_new) { + avl_t *check = avl_insert_lock(&hardirq_pub, (avl_t *)v); + if (check != (avl_t *)v) { + error("Internal error, cannot insert the AVL tree."); + } + } + + key = next_key; + } +} + +static void hardirq_read_latency_static_map(int mapfd) +{ + uint32_t i; + for (i = 0; i < HARDIRQ_EBPF_STATIC_END; i++) { + uint32_t map_i = hardirq_static_vals[i].idx; + int test = bpf_map_lookup_elem(mapfd, &map_i, hardirq_ebpf_static_vals); + if (unlikely(test < 0)) { + continue; + } + + uint64_t total_latency = 0; + int cpu_i; + int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + for (cpu_i = 0; cpu_i < end; cpu_i++) { + total_latency += hardirq_ebpf_static_vals[cpu_i].latency/1000; + } + + hardirq_static_vals[i].latency = total_latency; + } +} + +/** + * Read eBPF maps for hard IRQ. + */ +static void *hardirq_reader(void *ptr) +{ + read_thread_closed = 0; + + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_HARDIRQ_SLEEP_MS * em->update_every; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + UNUSED(dt); + + hardirq_read_latency_map(hardirq_maps[HARDIRQ_MAP_LATENCY].map_fd); + hardirq_read_latency_static_map(hardirq_maps[HARDIRQ_MAP_LATENCY_STATIC].map_fd); + } + + read_thread_closed = 1; + return NULL; +} + +static void hardirq_create_charts(int update_every) +{ + ebpf_create_chart( + NETDATA_EBPF_SYSTEM_GROUP, + "hardirq_latency", + "Hardware IRQ latency", + EBPF_COMMON_DIMENSION_MILLISECONDS, + "interrupts", + NULL, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CHART_PRIO_HARDIRQ_LATENCY, + NULL, NULL, 0, update_every, + NETDATA_EBPF_MODULE_NAME_HARDIRQ + ); + + fflush(stdout); +} + +static void hardirq_create_static_dims() +{ + uint32_t i; + for (i = 0; i < HARDIRQ_EBPF_STATIC_END; i++) { + ebpf_write_global_dimension( + hardirq_static_vals[i].name, hardirq_static_vals[i].name, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX] + ); + } +} + +// callback for avl tree traversal on `hardirq_pub`. +static int hardirq_write_dims(void *entry, void *data) +{ + UNUSED(data); + + hardirq_val_t *v = entry; + + // IRQs get dynamically added in, so add the dimension if we haven't yet. + if (!v->dim_exists) { + ebpf_write_global_dimension( + v->name, v->name, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX] + ); + v->dim_exists = true; + } + + write_chart_dimension(v->name, v->latency); + + return 1; +} + +static inline void hardirq_write_static_dims() +{ + uint32_t i; + for (i = 0; i < HARDIRQ_EBPF_STATIC_END; i++) { + write_chart_dimension( + hardirq_static_vals[i].name, + hardirq_static_vals[i].latency + ); + } +} + +/** +* Main loop for this collector. +*/ +static void hardirq_collector(ebpf_module_t *em) +{ + hardirq_ebpf_vals = callocz( + (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs, + sizeof(hardirq_ebpf_val_t) + ); + hardirq_ebpf_static_vals = callocz( + (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs, + sizeof(hardirq_ebpf_static_val_t) + ); + + avl_init_lock(&hardirq_pub, hardirq_val_cmp); + + // create reader thread. + hardirq_threads.thread = mallocz(sizeof(netdata_thread_t)); + hardirq_threads.start_routine = hardirq_reader; + netdata_thread_create( + hardirq_threads.thread, + hardirq_threads.name, + NETDATA_THREAD_OPTION_JOINABLE, + hardirq_reader, + em + ); + + // create chart and static dims. + pthread_mutex_lock(&lock); + hardirq_create_charts(em->update_every); + hardirq_create_static_dims(); + pthread_mutex_unlock(&lock); + + // loop and read from published data until ebpf plugin is closed. + int update_every = em->update_every; + int counter = update_every - 1; + while (!close_ebpf_plugin) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + + if (++counter == update_every) { + counter = 0; + pthread_mutex_lock(&lock); + + // write dims now for all hitherto discovered IRQs. + write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "hardirq_latency"); + avl_traverse_lock(&hardirq_pub, hardirq_write_dims, NULL); + hardirq_write_static_dims(); + write_end_chart(); + + pthread_mutex_unlock(&lock); + } + + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * EBPF HARDIRQ THREAD + *****************************************************************/ + +/** + * Hard IRQ latency thread. + * + * @param ptr a `ebpf_module_t *`. + * @return always NULL. + */ +void *ebpf_hardirq_thread(void *ptr) +{ + netdata_thread_cleanup_push(hardirq_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = hardirq_maps; + + if (!em->enabled) { + goto endhardirq; + } + + if (ebpf_enable_tracepoints(hardirq_tracepoints) == 0) { + em->enabled = CONFIG_BOOLEAN_NO; + goto endhardirq; + } + + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); + if (!probe_links) { + goto endhardirq; + } + + hardirq_collector(em); + +endhardirq: + netdata_thread_cleanup_pop(1); + + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_hardirq.h b/collectors/ebpf.plugin/ebpf_hardirq.h new file mode 100644 index 00000000..4c8a7a09 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_hardirq.h @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_HARDIRQ_H +#define NETDATA_EBPF_HARDIRQ_H 1 + +/***************************************************************** + * copied from kernel-collectors repo, with modifications needed + * for inclusion here. + *****************************************************************/ + +#define NETDATA_HARDIRQ_NAME_LEN 32 +#define NETDATA_HARDIRQ_MAX_IRQS 1024L + +typedef struct hardirq_ebpf_key { + int irq; +} hardirq_ebpf_key_t; + +typedef struct hardirq_ebpf_val { + uint64_t latency; + uint64_t ts; + char name[NETDATA_HARDIRQ_NAME_LEN]; +} hardirq_ebpf_val_t; + +enum hardirq_ebpf_static { + HARDIRQ_EBPF_STATIC_APIC_THERMAL, + HARDIRQ_EBPF_STATIC_APIC_THRESHOLD, + HARDIRQ_EBPF_STATIC_APIC_ERROR, + HARDIRQ_EBPF_STATIC_APIC_DEFERRED_ERROR, + HARDIRQ_EBPF_STATIC_APIC_SPURIOUS, + HARDIRQ_EBPF_STATIC_FUNC_CALL, + HARDIRQ_EBPF_STATIC_FUNC_CALL_SINGLE, + HARDIRQ_EBPF_STATIC_RESCHEDULE, + HARDIRQ_EBPF_STATIC_LOCAL_TIMER, + HARDIRQ_EBPF_STATIC_IRQ_WORK, + HARDIRQ_EBPF_STATIC_X86_PLATFORM_IPI, + + HARDIRQ_EBPF_STATIC_END +}; + +typedef struct hardirq_ebpf_static_val { + uint64_t latency; + uint64_t ts; +} hardirq_ebpf_static_val_t; + +/***************************************************************** + * below this is eBPF plugin-specific code. + *****************************************************************/ + +#define NETDATA_EBPF_MODULE_NAME_HARDIRQ "hardirq" +#define NETDATA_HARDIRQ_SLEEP_MS 650000ULL +#define NETDATA_HARDIRQ_CONFIG_FILE "hardirq.conf" + +typedef struct hardirq_val { + // must be at top for simplified AVL tree usage. + // if it's not at the top, we need to use `containerof` for almost all ops. + avl_t avl; + + int irq; + bool dim_exists; // keep this after `int irq` for alignment byte savings. + uint64_t latency; + char name[NETDATA_HARDIRQ_NAME_LEN]; +} hardirq_val_t; + +typedef struct hardirq_static_val { + enum hardirq_ebpf_static idx; + char *name; + uint64_t latency; +} hardirq_static_val_t; + +extern struct config hardirq_config; +extern void *ebpf_hardirq_thread(void *ptr); + +#endif /* NETDATA_EBPF_HARDIRQ_H */ diff --git a/collectors/ebpf.plugin/ebpf_mdflush.c b/collectors/ebpf.plugin/ebpf_mdflush.c new file mode 100644 index 00000000..e2420ec8 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_mdflush.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_mdflush.h" + +struct config mdflush_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +#define MDFLUSH_MAP_COUNT 0 +static ebpf_local_maps_t mdflush_maps[] = { + { + .name = "tbl_mdflush", + .internal_input = 1024, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + }, + /* end */ + { + .name = NULL, + .internal_input = 0, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + } +}; + +// store for "published" data from the reader thread, which the collector +// thread will write to netdata agent. +static avl_tree_lock mdflush_pub; + +// tmp store for mdflush values we get from a per-CPU eBPF map. +static mdflush_ebpf_val_t *mdflush_ebpf_vals = NULL; + +static struct bpf_link **probe_links = NULL; +static struct bpf_object *objects = NULL; + +static int read_thread_closed = 1; + +static struct netdata_static_thread mdflush_threads = {"MDFLUSH KERNEL", + NULL, NULL, 1, NULL, + NULL, NULL }; + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void mdflush_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + return; + } + + heartbeat_t hb; + heartbeat_init(&hb); + uint32_t tick = 1 * USEC_PER_MS; + while (!read_thread_closed) { + usec_t dt = heartbeat_next(&hb, tick); + UNUSED(dt); + } + + freez(mdflush_ebpf_vals); + freez(mdflush_threads.thread); + + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); + } +} + +/** + * Compare mdflush values. + * + * @param a `netdata_mdflush_t *`. + * @param b `netdata_mdflush_t *`. + * + * @return 0 if a==b, 1 if a>b, -1 if a<b. +*/ +static int mdflush_val_cmp(void *a, void *b) +{ + netdata_mdflush_t *ptr1 = a; + netdata_mdflush_t *ptr2 = b; + + if (ptr1->unit > ptr2->unit) { + return 1; + } + else if (ptr1->unit < ptr2->unit) { + return -1; + } + else { + return 0; + } +} + +static void mdflush_read_count_map() +{ + int mapfd = mdflush_maps[MDFLUSH_MAP_COUNT].map_fd; + mdflush_ebpf_key_t curr_key = (uint32_t)-1; + mdflush_ebpf_key_t key = (uint32_t)-1; + netdata_mdflush_t search_v; + netdata_mdflush_t *v = NULL; + + while (bpf_map_get_next_key(mapfd, &curr_key, &key) == 0) { + curr_key = key; + + // get val for this key. + int test = bpf_map_lookup_elem(mapfd, &key, mdflush_ebpf_vals); + if (unlikely(test < 0)) { + continue; + } + + // is this record saved yet? + // + // if not, make a new one, mark it as unsaved for now, and continue; we + // will insert it at the end after all of its values are correctly set, + // so that we can safely publish it to the collector within a single, + // short locked operation. + // + // otherwise simply continue; we will only update the flush count, + // which can be republished safely without a lock. + // + // NOTE: lock isn't strictly necessary for this initial search, as only + // this thread does writing, but the AVL is using a read-write lock so + // there is no congestion. + bool v_is_new = false; + search_v.unit = key; + v = (netdata_mdflush_t *)avl_search_lock( + &mdflush_pub, + (avl_t *)&search_v + ); + if (unlikely(v == NULL)) { + // flush count can only be added reliably at a later time. + // when they're added, only then will we AVL insert. + v = callocz(1, sizeof(netdata_mdflush_t)); + v->unit = key; + sprintf(v->disk_name, "md%u", key); + v->dim_exists = false; + + v_is_new = true; + } + + // we must add up count value for this record across all CPUs. + uint64_t total_cnt = 0; + int i; + int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + for (i = 0; i < end; i++) { + total_cnt += mdflush_ebpf_vals[i]; + } + + // can now safely publish count for existing records. + v->cnt = total_cnt; + + // can now safely publish new record. + if (v_is_new) { + avl_t *check = avl_insert_lock(&mdflush_pub, (avl_t *)v); + if (check != (avl_t *)v) { + error("Internal error, cannot insert the AVL tree."); + } + } + } +} + +/** + * Read eBPF maps for mdflush. + */ +static void *mdflush_reader(void *ptr) +{ + read_thread_closed = 0; + + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_MDFLUSH_SLEEP_MS * em->update_every; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + UNUSED(dt); + + mdflush_read_count_map(); + } + + read_thread_closed = 1; + return NULL; +} + +static void mdflush_create_charts(int update_every) +{ + ebpf_create_chart( + "mdstat", + "mdstat_flush", + "MD flushes", + "flushes", + "flush (eBPF)", + "md.flush", + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CHART_PRIO_MDSTAT_FLUSH, + NULL, NULL, 0, update_every, + NETDATA_EBPF_MODULE_NAME_MDFLUSH + ); + + fflush(stdout); +} + +// callback for avl tree traversal on `mdflush_pub`. +static int mdflush_write_dims(void *entry, void *data) +{ + UNUSED(data); + + netdata_mdflush_t *v = entry; + + // records get dynamically added in, so add the dim if we haven't yet. + if (!v->dim_exists) { + ebpf_write_global_dimension( + v->disk_name, v->disk_name, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX] + ); + v->dim_exists = true; + } + + write_chart_dimension(v->disk_name, v->cnt); + + return 1; +} + +/** +* Main loop for this collector. +*/ +static void mdflush_collector(ebpf_module_t *em) +{ + mdflush_ebpf_vals = callocz(ebpf_nprocs, sizeof(mdflush_ebpf_val_t)); + + avl_init_lock(&mdflush_pub, mdflush_val_cmp); + + // create reader thread. + mdflush_threads.thread = mallocz(sizeof(netdata_thread_t)); + mdflush_threads.start_routine = mdflush_reader; + netdata_thread_create( + mdflush_threads.thread, + mdflush_threads.name, + NETDATA_THREAD_OPTION_JOINABLE, + mdflush_reader, + em + ); + + // create chart and static dims. + pthread_mutex_lock(&lock); + mdflush_create_charts(em->update_every); + pthread_mutex_unlock(&lock); + + // loop and read from published data until ebpf plugin is closed. + int update_every = em->update_every; + int counter = update_every - 1; + while (!close_ebpf_plugin) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + + if (++counter == update_every) { + counter = 0; + pthread_mutex_lock(&lock); + + // write dims now for all hitherto discovered devices. + write_begin_chart("mdstat", "mdstat_flush"); + avl_traverse_lock(&mdflush_pub, mdflush_write_dims, NULL); + write_end_chart(); + + pthread_mutex_unlock(&lock); + } + + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/** + * mdflush thread. + * + * @param ptr a `ebpf_module_t *`. + * @return always NULL. + */ +void *ebpf_mdflush_thread(void *ptr) +{ + netdata_thread_cleanup_push(mdflush_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = mdflush_maps; + + if (!em->enabled) { + goto endmdflush; + } + + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); + if (!probe_links) { + goto endmdflush; + } + + mdflush_collector(em); + +endmdflush: + netdata_thread_cleanup_pop(1); + + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_mdflush.h b/collectors/ebpf.plugin/ebpf_mdflush.h new file mode 100644 index 00000000..59856ad6 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_mdflush.h @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_MDFLUSH_H +#define NETDATA_EBPF_MDFLUSH_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_MDFLUSH "mdflush" + +#define NETDATA_MDFLUSH_SLEEP_MS 850000ULL + +// charts +#define NETDATA_MDFLUSH_GLOBAL_CHART "mdflush" + +// configuration file +#define NETDATA_DIRECTORY_MDFLUSH_CONFIG_FILE "mdflush.conf" + +// copy of mdflush types from kernel-collectors repo. +typedef uint32_t mdflush_ebpf_key_t; +typedef uint64_t mdflush_ebpf_val_t; + +typedef struct netdata_mdflush { + // must be at top for simplified AVL tree usage. + // if it's not at the top, we need to use `containerof` for almost all ops. + avl_t avl; + + // key & name of device. + // the name is generated by the key, usually as `md<unit>`. + uint32_t unit; + char disk_name[32]; + + // have we defined the dimension for this device yet? + bool dim_exists; + + // incremental flush count value. + uint64_t cnt; +} netdata_mdflush_t; + +extern void *ebpf_mdflush_thread(void *ptr); + +extern struct config mdflush_config; + +#endif diff --git a/collectors/ebpf.plugin/ebpf_mount.c b/collectors/ebpf.plugin/ebpf_mount.c new file mode 100644 index 00000000..46f32347 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_mount.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_mount.h" + +static ebpf_local_maps_t mount_maps[] = {{.name = "tbl_mount", .internal_input = NETDATA_MOUNT_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + +static char *mount_dimension_name[NETDATA_EBPF_MOUNT_SYSCALL] = { "mount", "umount" }; +static netdata_syscall_stat_t mount_aggregated_data[NETDATA_EBPF_MOUNT_SYSCALL]; +static netdata_publish_syscall_t mount_publish_aggregated[NETDATA_EBPF_MOUNT_SYSCALL]; + +struct config mount_config = { .first_section = NULL, .last_section = NULL, .mutex = NETDATA_MUTEX_INITIALIZER, + .index = {.avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static int read_thread_closed = 1; +static netdata_idx_t *mount_values = NULL; + +static struct bpf_link **probe_links = NULL; +static struct bpf_object *objects = NULL; + +static netdata_idx_t mount_hash_values[NETDATA_MOUNT_END]; + +struct netdata_static_thread mount_thread = {"MOUNT KERNEL", + NULL, NULL, 1, NULL, + NULL, NULL}; + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_mount_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) + return; + + freez(mount_thread.thread); + freez(mount_values); + + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); + } +} + +/***************************************************************** + * + * MAIN LOOP + * + *****************************************************************/ + +/** + * Read global table + * + * Read the table with number of calls for all functions + */ +static void read_global_table() +{ + uint32_t idx; + netdata_idx_t *val = mount_hash_values; + netdata_idx_t *stored = mount_values; + int fd = mount_maps[NETDATA_KEY_MOUNT_TABLE].map_fd; + + for (idx = NETDATA_KEY_MOUNT_CALL; idx < NETDATA_MOUNT_END; idx++) { + if (!bpf_map_lookup_elem(fd, &idx, stored)) { + int i; + int end = ebpf_nprocs; + netdata_idx_t total = 0; + for (i = 0; i < end; i++) + total += stored[i]; + + val[idx] = total; + } + } +} + +/** + * Mount read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_mount_read_hash(void *ptr) +{ + read_thread_closed = 0; + + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_LATENCY_MOUNT_SLEEP_MS * em->update_every; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + (void)dt; + + read_global_table(); + } + read_thread_closed = 1; + + return NULL; +} + +/** + * Send data to Netdata calling auxiliary functions. +*/ +static void ebpf_mount_send_data() +{ + int i, j; + int end = NETDATA_EBPF_MOUNT_SYSCALL; + for (i = NETDATA_KEY_MOUNT_CALL, j = NETDATA_KEY_MOUNT_ERROR; i < end; i++, j++) { + mount_publish_aggregated[i].ncall = mount_hash_values[i]; + mount_publish_aggregated[i].nerr = mount_hash_values[j]; + } + + write_count_chart(NETDATA_EBPF_MOUNT_CALLS, NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, + mount_publish_aggregated, NETDATA_EBPF_MOUNT_SYSCALL); + + write_err_chart(NETDATA_EBPF_MOUNT_ERRORS, NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, + mount_publish_aggregated, NETDATA_EBPF_MOUNT_SYSCALL); +} + +/** +* Main loop for this collector. +*/ +static void mount_collector(ebpf_module_t *em) +{ + mount_thread.thread = mallocz(sizeof(netdata_thread_t)); + mount_thread.start_routine = ebpf_mount_read_hash; + memset(mount_hash_values, 0, sizeof(mount_hash_values)); + + mount_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); + + netdata_thread_create(mount_thread.thread, mount_thread.name, NETDATA_THREAD_OPTION_JOINABLE, + ebpf_mount_read_hash, em); + + int update_every = em->update_every; + int counter = update_every - 1; + while (!close_ebpf_plugin) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + + if (++counter == update_every) { + counter = 0; + pthread_mutex_lock(&lock); + + ebpf_mount_send_data(); + + pthread_mutex_unlock(&lock); + } + + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * + * INITIALIZE THREAD + * + *****************************************************************/ + +/** + * Create mount charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_mount_charts(int update_every) +{ + ebpf_create_chart(NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, NETDATA_EBPF_MOUNT_CALLS, + "Calls to mount and umount syscalls.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_EBPF_MOUNT_FAMILY, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_EBPF_MOUNT_CHARTS, + ebpf_create_global_dimension, + mount_publish_aggregated, NETDATA_EBPF_MOUNT_SYSCALL, + update_every, NETDATA_EBPF_MODULE_NAME_MOUNT); + + ebpf_create_chart(NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, NETDATA_EBPF_MOUNT_ERRORS, + "Errors to mount and umount syscalls.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_EBPF_MOUNT_FAMILY, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_EBPF_MOUNT_CHARTS + 1, + ebpf_create_global_dimension, + mount_publish_aggregated, NETDATA_EBPF_MOUNT_SYSCALL, + update_every, NETDATA_EBPF_MODULE_NAME_MOUNT); + + fflush(stdout); +} + +/***************************************************************** + * + * MAIN THREAD + * + *****************************************************************/ + +/** + * Mount thread + * + * Thread used to make mount thread + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always returns NULL + */ +void *ebpf_mount_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_mount_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = mount_maps; + + if (!em->enabled) + goto endmount; + + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); + if (!probe_links) { + goto endmount; + } + + int algorithms[NETDATA_EBPF_MOUNT_SYSCALL] = { NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX }; + + ebpf_global_labels(mount_aggregated_data, mount_publish_aggregated, mount_dimension_name, mount_dimension_name, + algorithms, NETDATA_EBPF_MOUNT_SYSCALL); + + pthread_mutex_lock(&lock); + ebpf_create_mount_charts(em->update_every); + pthread_mutex_unlock(&lock); + + mount_collector(em); + +endmount: + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_mount.h b/collectors/ebpf.plugin/ebpf_mount.h new file mode 100644 index 00000000..700bea13 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_mount.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_MOUNT_H +#define NETDATA_EBPF_MOUNT_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_MOUNT "mount" + +#define NETDATA_EBPF_MOUNT_SYSCALL 2 + +#define NETDATA_LATENCY_MOUNT_SLEEP_MS 700000ULL + +#define NETDATA_EBPF_MOUNT_CALLS "call" +#define NETDATA_EBPF_MOUNT_ERRORS "error" +#define NETDATA_EBPF_MOUNT_FAMILY "mount (eBPF)" + +// Process configuration name +#define NETDATA_MOUNT_CONFIG_FILE "mount.conf" + +enum mount_counters { + NETDATA_KEY_MOUNT_CALL, + NETDATA_KEY_UMOUNT_CALL, + NETDATA_KEY_MOUNT_ERROR, + NETDATA_KEY_UMOUNT_ERROR, + + NETDATA_MOUNT_END +}; + +enum mount_tables { + NETDATA_KEY_MOUNT_TABLE +}; + +extern struct config mount_config; +extern void *ebpf_mount_thread(void *ptr); + +#endif /* NETDATA_EBPF_MOUNT_H */ diff --git a/collectors/ebpf.plugin/ebpf_oomkill.c b/collectors/ebpf.plugin/ebpf_oomkill.c new file mode 100644 index 00000000..7f7df36f --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_oomkill.c @@ -0,0 +1,400 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_oomkill.h" + +struct config oomkill_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +#define OOMKILL_MAP_KILLCNT 0 +static ebpf_local_maps_t oomkill_maps[] = { + { + .name = "tbl_oomkill", + .internal_input = NETDATA_OOMKILL_MAX_ENTRIES, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + }, + /* end */ + { + .name = NULL, + .internal_input = 0, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + } +}; + +static ebpf_tracepoint_t oomkill_tracepoints[] = { + {.enabled = false, .class = "oom", .event = "mark_victim"}, + /* end */ + {.enabled = false, .class = NULL, .event = NULL} +}; + +static struct bpf_link **probe_links = NULL; +static struct bpf_object *objects = NULL; + +static netdata_publish_syscall_t oomkill_publish_aggregated = {.name = "oomkill", .dimension = "oomkill", + .algorithm = "absolute", + .next = NULL}; + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void oomkill_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + return; + } + + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); + } +} + +static void oomkill_write_data(int32_t *keys, uint32_t total) +{ + // for each app, see if it was OOM killed. record as 1 if so otherwise 0. + struct target *w; + for (w = apps_groups_root_target; w != NULL; w = w->next) { + if (likely(w->exposed && w->processes)) { + bool was_oomkilled = false; + struct pid_on_target *pids = w->root_pid; + while (pids) { + uint32_t j; + for (j = 0; j < total; j++) { + if (pids->pid == keys[j]) { + was_oomkilled = true; + // set to 0 so we consider it "done". + keys[j] = 0; + goto write_dim; + } + } + pids = pids->next; + } + + write_dim:; + write_chart_dimension(w->name, was_oomkilled); + } + } + + // for any remaining keys for which we couldn't find a group, this could be + // for various reasons, but the primary one is that the PID has not yet + // been picked up by the process thread when parsing the proc filesystem. + // since it's been OOM killed, it will never be parsed in the future, so + // we have no choice but to dump it into `other`. + uint32_t j; + uint32_t rem_count = 0; + for (j = 0; j < total; j++) { + int32_t key = keys[j]; + if (key != 0) { + rem_count += 1; + } + } + if (rem_count > 0) { + write_chart_dimension("other", rem_count); + } +} + +/** + * Create specific OOMkill charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_specific_oomkill_charts(char *type, int update_every) +{ + ebpf_create_chart(type, NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.", + EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP, + NULL, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5600, + ebpf_create_global_dimension, + &oomkill_publish_aggregated, 1, update_every, NETDATA_EBPF_MODULE_NAME_OOMKILL); +} + +/** + * Create Systemd OOMkill Charts + * + * Create charts when systemd is enabled + * + * @param update_every value to overwrite the update frequency set by the server. + **/ +static void ebpf_create_systemd_oomkill_charts(int update_every) +{ + ebpf_create_charts_on_systemd(NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.", + EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, 20191, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NULL, + NETDATA_EBPF_MODULE_NAME_OOMKILL, update_every); +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + * + * @return It returns the status for chart creation, if it is necessary to remove a specific dimension, zero is returned + * otherwise function returns 1 to avoid chart recreation + */ +static int ebpf_send_systemd_oomkill_charts() +{ + int ret = 1; + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_OOMKILL_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long) ect->oomkill); + ect->oomkill = 0; + } else + ret = 0; + } + write_end_chart(); + + return ret; +} + +/* + * Send Specific OOMkill data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param value value for oomkill + */ +static void ebpf_send_specific_oomkill_data(char *type, int value) +{ + write_begin_chart(type, NETDATA_OOMKILL_CHART); + write_chart_dimension(oomkill_publish_aggregated.name, (long long)value); + write_end_chart(); +} + +/** + * Create specific OOMkill charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_specific_oomkill_charts(char *type, int update_every) +{ + ebpf_write_chart_obsolete(type, NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.", + EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NULL, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5600, update_every); +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param update_every value to overwrite the update frequency set by the server. +*/ +void ebpf_oomkill_send_cgroup_data(int update_every) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + static int systemd_charts = 0; + if (!systemd_charts) { + ebpf_create_systemd_oomkill_charts(update_every); + systemd_charts = 1; + } + systemd_charts = ebpf_send_systemd_oomkill_charts(); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART) && ect->updated) { + ebpf_create_specific_oomkill_charts(ect->name, update_every); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART && ect->updated) { + ebpf_send_specific_oomkill_data(ect->name, ect->oomkill); + } else { + ebpf_obsolete_specific_oomkill_charts(ect->name, update_every); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_OOMKILL_CHART; + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Read data + * + * Read OOMKILL events from table. + * + * @param keys vector where data will be stored + * + * @return It returns the number of read elements + */ +static uint32_t oomkill_read_data(int32_t *keys) +{ + // the first `i` entries of `keys` will contain the currently active PIDs + // in the eBPF map. + uint32_t i = 0; + + uint32_t curr_key = 0; + uint32_t key = 0; + int mapfd = oomkill_maps[OOMKILL_MAP_KILLCNT].map_fd; + while (bpf_map_get_next_key(mapfd, &curr_key, &key) == 0) { + curr_key = key; + + keys[i] = (int32_t)key; + i += 1; + + // delete this key now that we've recorded its existence. there's no + // race here, as the same PID will only get OOM killed once. + int test = bpf_map_delete_elem(mapfd, &key); + if (unlikely(test < 0)) { + // since there's only 1 thread doing these deletions, it should be + // impossible to get this condition. + error("key unexpectedly not available for deletion."); + } + } + + return i; +} + +/** + * Update cgroup + * + * Update cgroup data based in + * + * @param keys vector with pids that had oomkill event + * @param total number of elements in keys vector. + */ +static void ebpf_update_oomkill_cgroup(int32_t *keys, uint32_t total) +{ + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + ect->oomkill = 0; + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + uint32_t j; + int32_t pid = pids->pid; + for (j = 0; j < total; j++) { + if (pid == keys[j]) { + ect->oomkill = 1; + break; + } + } + } + } +} + +/** +* Main loop for this collector. +*/ +static void oomkill_collector(ebpf_module_t *em) +{ + int cgroups = em->cgroup_charts; + int update_every = em->update_every; + int counter = update_every - 1; + int32_t keys[NETDATA_OOMKILL_MAX_ENTRIES]; + memset(keys, 0, sizeof(keys)); + + // loop and read until ebpf plugin is closed. + while (!close_ebpf_plugin) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + + if (++counter == update_every) { + counter = 0; + pthread_mutex_lock(&lock); + + uint32_t count = oomkill_read_data(keys); + if (cgroups && count) + ebpf_update_oomkill_cgroup(keys, count); + + // write everything from the ebpf map. + if (cgroups) + ebpf_oomkill_send_cgroup_data(update_every); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_OOMKILL_CHART); + oomkill_write_data(keys, count); + write_end_chart(); + + pthread_mutex_unlock(&lock); + } + + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_oomkill_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + ebpf_create_charts_on_apps(NETDATA_OOMKILL_CHART, + "OOM kills", + EBPF_COMMON_DIMENSION_KILLS, + "mem", + NETDATA_EBPF_CHART_TYPE_STACKED, + 20020, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_OOMKILL); +} + +/** + * OOM kill tracking thread. + * + * @param ptr a `ebpf_module_t *`. + * @return always NULL. + */ +void *ebpf_oomkill_thread(void *ptr) +{ + netdata_thread_cleanup_push(oomkill_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = oomkill_maps; + + if (!em->enabled) { + goto endoomkill; + } + + if (ebpf_enable_tracepoints(oomkill_tracepoints) == 0) { + em->enabled = CONFIG_BOOLEAN_NO; + goto endoomkill; + } + + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); + if (!probe_links) { + goto endoomkill; + } + + oomkill_collector(em); + +endoomkill: + netdata_thread_cleanup_pop(1); + + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_oomkill.h b/collectors/ebpf.plugin/ebpf_oomkill.h new file mode 100644 index 00000000..86f9463d --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_oomkill.h @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_OOMKILL_H +#define NETDATA_EBPF_OOMKILL_H 1 + +/***************************************************************** + * copied from kernel-collectors repo, with modifications needed + * for inclusion here. + *****************************************************************/ + +#define NETDATA_OOMKILL_MAX_ENTRIES 64 + +typedef uint8_t oomkill_ebpf_val_t; + +/***************************************************************** + * below this is eBPF plugin-specific code. + *****************************************************************/ + +#define NETDATA_EBPF_MODULE_NAME_OOMKILL "oomkill" +#define NETDATA_OOMKILL_SLEEP_MS 650000ULL +#define NETDATA_OOMKILL_CONFIG_FILE "oomkill.conf" + +#define NETDATA_OOMKILL_CHART "oomkills" + +extern struct config oomkill_config; +extern void *ebpf_oomkill_thread(void *ptr); +extern void ebpf_oomkill_create_apps_charts(struct ebpf_module *em, void *ptr); + +#endif /* NETDATA_EBPF_OOMKILL_H */ diff --git a/collectors/ebpf.plugin/ebpf_process.c b/collectors/ebpf.plugin/ebpf_process.c index 9b15c840..a4a6709e 100644 --- a/collectors/ebpf.plugin/ebpf_process.c +++ b/collectors/ebpf.plugin/ebpf_process.c @@ -11,29 +11,42 @@ * *****************************************************************/ -static char *process_dimension_names[NETDATA_KEY_PUBLISH_PROCESS_END] = { "open", "close", "delete", "read", "write", - "process", "task", "process", "thread" }; -static char *process_id_names[NETDATA_KEY_PUBLISH_PROCESS_END] = { "do_sys_open", "__close_fd", "vfs_unlink", - "vfs_read", "vfs_write", "do_exit", - "release_task", "_do_fork", "sys_clone" }; +static char *process_dimension_names[NETDATA_KEY_PUBLISH_PROCESS_END] = { "process", "task", "process", "thread" }; +static char *process_id_names[NETDATA_KEY_PUBLISH_PROCESS_END] = { "do_exit", "release_task", "_do_fork", "sys_clone" }; static char *status[] = { "process", "zombie" }; static ebpf_local_maps_t process_maps[] = {{.name = "tbl_pid_stats", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, - .user_input = 0}, - {.name = NULL, .internal_input = 0, .user_input = 0}}; + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_total_stats", .internal_input = NETDATA_KEY_END_VECTOR, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "process_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + +char *tracepoint_sched_type = { "sched" } ; +char *tracepoint_sched_process_exit = { "sched_process_exit" }; +char *tracepoint_sched_process_exec = { "sched_process_exec" }; +char *tracepoint_sched_process_fork = { "sched_process_fork" }; +static int was_sched_process_exit_enabled = 0; +static int was_sched_process_exec_enabled = 0; +static int was_sched_process_fork_enabled = 0; static netdata_idx_t *process_hash_values = NULL; static netdata_syscall_stat_t process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_END]; static netdata_publish_syscall_t process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_END]; -static ebpf_data_t process_data; - ebpf_process_stat_t **global_process_stats = NULL; ebpf_process_publish_apps_t **current_apps_data = NULL; int process_enabled = 0; -static int *map_fd = NULL; static struct bpf_object *objects = NULL; static struct bpf_link **probe_links = NULL; @@ -43,6 +56,8 @@ struct config process_config = { .first_section = NULL, .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, .rwlock = AVL_LOCK_INITIALIZER } }; +static struct netdata_static_thread cgroup_thread = {"EBPF CGROUP", NULL, NULL, + 1, NULL, NULL, NULL}; /***************************************************************** * * PROCESS DATA AND SEND TO NETDATA @@ -56,40 +71,30 @@ struct config process_config = { .first_section = NULL, * @param pvc the second output structure with correlated dimensions * @param input the structure with the input data. */ -static void ebpf_update_global_publish( - netdata_publish_syscall_t *publish, netdata_publish_vfs_common_t *pvc, netdata_syscall_stat_t *input) +static void ebpf_update_global_publish(netdata_publish_syscall_t *publish, netdata_publish_vfs_common_t *pvc, + netdata_syscall_stat_t *input) { netdata_publish_syscall_t *move = publish; - int selector = NETDATA_KEY_PUBLISH_PROCESS_OPEN; + int selector = NETDATA_KEY_PUBLISH_PROCESS_EXIT; while (move) { - // Until NETDATA_KEY_PUBLISH_PROCESS_READ we are creating accumulators, so it is possible - // to use incremental charts, but after this we will do some math with the values, so we are storing - // absolute values - if (selector < NETDATA_KEY_PUBLISH_PROCESS_READ) { - move->ncall = input->call; - move->nbyte = input->bytes; - move->nerr = input->ecall; - } else { - move->ncall = (input->call > move->pcall) ? input->call - move->pcall : move->pcall - input->call; - move->nbyte = (input->bytes > move->pbyte) ? input->bytes - move->pbyte : move->pbyte - input->bytes; - move->nerr = (input->ecall > move->nerr) ? input->ecall - move->perr : move->perr - input->ecall; + move->ncall = (input->call > move->pcall) ? input->call - move->pcall : move->pcall - input->call; + move->nbyte = (input->bytes > move->pbyte) ? input->bytes - move->pbyte : move->pbyte - input->bytes; + move->nerr = (input->ecall > move->nerr) ? input->ecall - move->perr : move->perr - input->ecall; - move->pcall = input->call; - move->pbyte = input->bytes; - move->perr = input->ecall; - } + move->pcall = input->call; + move->pbyte = input->bytes; + move->perr = input->ecall; input = input->next; move = move->next; selector++; } - pvc->write = -((long)publish[NETDATA_KEY_PUBLISH_PROCESS_WRITE].nbyte); - pvc->read = (long)publish[NETDATA_KEY_PUBLISH_PROCESS_READ].nbyte; - - pvc->running = (long)publish[NETDATA_KEY_PUBLISH_PROCESS_FORK].ncall - (long)publish[NETDATA_KEY_PUBLISH_PROCESS_CLONE].ncall; + pvc->running = (long)publish[NETDATA_KEY_PUBLISH_PROCESS_FORK].ncall - + (long)publish[NETDATA_KEY_PUBLISH_PROCESS_CLONE].ncall; publish[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].ncall = -publish[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].ncall; - pvc->zombie = (long)publish[NETDATA_KEY_PUBLISH_PROCESS_EXIT].ncall + (long)publish[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].ncall; + pvc->zombie = (long)publish[NETDATA_KEY_PUBLISH_PROCESS_EXIT].ncall + + (long)publish[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].ncall; } /** @@ -109,7 +114,7 @@ static void write_status_chart(char *family, netdata_publish_vfs_common_t *pvc) } /** - * Send data to Netdata calling auxiliar functions. + * Send data to Netdata calling auxiliary functions. * * @param em the structure with thread information */ @@ -118,33 +123,16 @@ static void ebpf_process_send_data(ebpf_module_t *em) netdata_publish_vfs_common_t pvc; ebpf_update_global_publish(process_publish_aggregated, &pvc, process_aggregated_data); - write_count_chart( - NETDATA_FILE_OPEN_CLOSE_COUNT, NETDATA_EBPF_FAMILY, process_publish_aggregated, 2); - - write_count_chart( - NETDATA_VFS_FILE_CLEAN_COUNT, NETDATA_EBPF_FAMILY, &process_publish_aggregated[NETDATA_DEL_START], 1); - - write_count_chart( - NETDATA_VFS_FILE_IO_COUNT, NETDATA_EBPF_FAMILY, &process_publish_aggregated[NETDATA_IN_START_BYTE], 2); - - write_count_chart( - NETDATA_EXIT_SYSCALL, NETDATA_EBPF_FAMILY, &process_publish_aggregated[NETDATA_EXIT_START], 2); - write_count_chart( - NETDATA_PROCESS_SYSCALL, NETDATA_EBPF_FAMILY, &process_publish_aggregated[NETDATA_PROCESS_START], 2); + write_count_chart(NETDATA_EXIT_SYSCALL, NETDATA_EBPF_SYSTEM_GROUP, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT], 2); + write_count_chart(NETDATA_PROCESS_SYSCALL, NETDATA_EBPF_SYSTEM_GROUP, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], 2); - write_status_chart(NETDATA_EBPF_FAMILY, &pvc); + write_status_chart(NETDATA_EBPF_SYSTEM_GROUP, &pvc); if (em->mode < MODE_ENTRY) { - write_err_chart( - NETDATA_FILE_OPEN_ERR_COUNT, NETDATA_EBPF_FAMILY, process_publish_aggregated, 2); - write_err_chart( - NETDATA_VFS_FILE_ERR_COUNT, NETDATA_EBPF_FAMILY, &process_publish_aggregated[2], NETDATA_VFS_ERRORS); - write_err_chart( - NETDATA_PROCESS_ERROR_NAME, NETDATA_EBPF_FAMILY, &process_publish_aggregated[NETDATA_PROCESS_START], 2); + write_err_chart(NETDATA_PROCESS_ERROR_NAME, NETDATA_EBPF_SYSTEM_GROUP, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], 2); } - - write_io_chart(NETDATA_VFS_IO_FILE_BYTES, NETDATA_EBPF_FAMILY, - process_id_names[NETDATA_KEY_PUBLISH_PROCESS_WRITE], (long long) pvc.write, - process_id_names[NETDATA_KEY_PUBLISH_PROCESS_READ], (long long)pvc.read); } /** @@ -180,7 +168,7 @@ long long ebpf_process_sum_values_for_pids(struct pid_on_target *root, size_t of void ebpf_process_remove_pids() { struct pid_stat *pids = root_of_pids; - int pid_fd = map_fd[0]; + int pid_fd = process_maps[NETDATA_PROCESS_PID_TABLE].map_fd; while (pids) { uint32_t pid = pids->pid; ebpf_process_stat_t *w = global_process_stats[pid]; @@ -197,163 +185,65 @@ void ebpf_process_remove_pids() } /** - * Send data to Netdata calling auxiliar functions. + * Send data to Netdata calling auxiliary functions. * - * @param em the structure with thread information * @param root the target list. */ -void ebpf_process_send_apps_data(ebpf_module_t *em, struct target *root) +void ebpf_process_send_apps_data(struct target *root, ebpf_module_t *em) { struct target *w; collected_number value; - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, call_sys_open)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); - - if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids( - w->root_pid, offsetof(ebpf_process_publish_apps_t, ecall_sys_open)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); - } - - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSED); + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS); for (w = root; w; w = w->next) { if (unlikely(w->exposed && w->processes)) { - value = - ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, call_close_fd)); + value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, create_process)); write_chart_dimension(w->name, value); } } write_end_chart(); - if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids( - w->root_pid, offsetof(ebpf_process_publish_apps_t, ecall_close_fd)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); - } - - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_DELETED); + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD); for (w = root; w; w = w->next) { if (unlikely(w->exposed && w->processes)) { - value = - ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, call_vfs_unlink)); + value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, create_thread)); write_chart_dimension(w->name, value); } } write_end_chart(); - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS); + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_EXIT); for (w = root; w; w = w->next) { if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids( - w->root_pid, offsetof(ebpf_process_publish_apps_t, call_write)); + value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, + call_do_exit)); write_chart_dimension(w->name, value); } } write_end_chart(); - if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids( - w->root_pid, offsetof(ebpf_process_publish_apps_t, ecall_write)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); - } - - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS); + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_CLOSE); for (w = root; w; w = w->next) { if (unlikely(w->exposed && w->processes)) { - value = - ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, call_read)); + value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, + call_release_task)); write_chart_dimension(w->name, value); } } write_end_chart(); if (em->mode < MODE_ENTRY) { - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR); + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_ERROR); for (w = root; w; w = w->next) { if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids( - w->root_pid, offsetof(ebpf_process_publish_apps_t, ecall_read)); + value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, + task_err)); write_chart_dimension(w->name, value); } } write_end_chart(); } - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids( - w->root_pid, offsetof(ebpf_process_publish_apps_t, bytes_written)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); - - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_BYTES); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids( - w->root_pid, offsetof(ebpf_process_publish_apps_t, bytes_read)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); - - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = - ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, call_do_fork)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); - - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = - ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, call_sys_clone)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); - - write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_CLOSE); - for (w = root; w; w = w->next) { - if (unlikely(w->exposed && w->processes)) { - value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_publish_apps_t, - call_release_task)); - write_chart_dimension(w->name, value); - } - } - write_end_chart(); - ebpf_process_remove_pids(); } @@ -369,14 +259,15 @@ void ebpf_process_send_apps_data(ebpf_module_t *em, struct target *root) static void read_hash_global_tables() { uint64_t idx; - netdata_idx_t res[NETDATA_GLOBAL_VECTOR]; + netdata_idx_t res[NETDATA_KEY_END_VECTOR]; netdata_idx_t *val = process_hash_values; - for (idx = 0; idx < NETDATA_GLOBAL_VECTOR; idx++) { - if (!bpf_map_lookup_elem(map_fd[1], &idx, val)) { + int fd = process_maps[NETDATA_PROCESS_GLOBAL_TABLE].map_fd; + for (idx = 0; idx < NETDATA_KEY_END_VECTOR; idx++) { + if (!bpf_map_lookup_elem(fd, &idx, val)) { uint64_t total = 0; int i; - int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + int end = ebpf_nprocs; for (i = 0; i < end; i++) total += val[i]; @@ -386,28 +277,13 @@ static void read_hash_global_tables() } } - process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_OPEN].call = res[NETDATA_KEY_CALLS_DO_SYS_OPEN]; - process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_CLOSE].call = res[NETDATA_KEY_CALLS_CLOSE_FD]; - process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_UNLINK].call = res[NETDATA_KEY_CALLS_VFS_UNLINK]; - process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_READ].call = res[NETDATA_KEY_CALLS_VFS_READ] + res[NETDATA_KEY_CALLS_VFS_READV]; - process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_WRITE].call = res[NETDATA_KEY_CALLS_VFS_WRITE] + res[NETDATA_KEY_CALLS_VFS_WRITEV]; process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_EXIT].call = res[NETDATA_KEY_CALLS_DO_EXIT]; process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].call = res[NETDATA_KEY_CALLS_RELEASE_TASK]; process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_FORK].call = res[NETDATA_KEY_CALLS_DO_FORK]; process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_CLONE].call = res[NETDATA_KEY_CALLS_SYS_CLONE]; - process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_OPEN].ecall = res[NETDATA_KEY_ERROR_DO_SYS_OPEN]; - process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_CLOSE].ecall = res[NETDATA_KEY_ERROR_CLOSE_FD]; - process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_UNLINK].ecall = res[NETDATA_KEY_ERROR_VFS_UNLINK]; - process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_READ].ecall = res[NETDATA_KEY_ERROR_VFS_READ] + res[NETDATA_KEY_ERROR_VFS_READV]; - process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_WRITE].ecall = res[NETDATA_KEY_ERROR_VFS_WRITE] + res[NETDATA_KEY_ERROR_VFS_WRITEV]; process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_FORK].ecall = res[NETDATA_KEY_ERROR_DO_FORK]; process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_CLONE].ecall = res[NETDATA_KEY_ERROR_SYS_CLONE]; - - process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_WRITE].bytes = (uint64_t)res[NETDATA_KEY_BYTES_VFS_WRITE] + - (uint64_t)res[NETDATA_KEY_BYTES_VFS_WRITEV]; - process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_READ].bytes = (uint64_t)res[NETDATA_KEY_BYTES_VFS_READ] + - (uint64_t)res[NETDATA_KEY_BYTES_VFS_READV]; } /** @@ -431,67 +307,53 @@ static void ebpf_process_update_apps_data() } //Read data - cad->call_sys_open = ps->open_call; - cad->call_close_fd = ps->close_call; - cad->call_vfs_unlink = ps->unlink_call; - cad->call_read = ps->read_call + ps->readv_call; - cad->call_write = ps->write_call + ps->writev_call; cad->call_do_exit = ps->exit_call; cad->call_release_task = ps->release_call; - cad->call_do_fork = ps->fork_call; - cad->call_sys_clone = ps->clone_call; + cad->create_process = ps->create_process; + cad->create_thread = ps->create_thread; - cad->ecall_sys_open = ps->open_err; - cad->ecall_close_fd = ps->close_err; - cad->ecall_vfs_unlink = ps->unlink_err; - cad->ecall_read = ps->read_err + ps->readv_err; - cad->ecall_write = ps->write_err + ps->writev_err; - cad->ecall_do_fork = ps->fork_err; - cad->ecall_sys_clone = ps->clone_err; - - cad->bytes_written = (uint64_t)ps->write_bytes + (uint64_t)ps->write_bytes; - cad->bytes_read = (uint64_t)ps->read_bytes + (uint64_t)ps->readv_bytes; + cad->task_err = ps->task_err; pids = pids->next; } } -/***************************************************************** - * - * FUNCTIONS TO CREATE CHARTS - * - *****************************************************************/ - /** - * Create IO chart + * Update cgroup * - * @param family the chart family - * @param name the chart name - * @param axis the axis label - * @param web the group name used to attach the chart on dashboard - * @param order the order number of the specified chart - * @param algorithm the algorithm used to make the charts. + * Update cgroup data based in */ -static void ebpf_create_io_chart(char *family, char *name, char *axis, char *web, int order, int algorithm) +static void ebpf_update_process_cgroup() { - printf("CHART %s.%s '' 'Bytes written and read' '%s' '%s' '' line %d %d\n", - family, - name, - axis, - web, - order, - update_every); - - printf("DIMENSION %s %s %s 1 1\n", - process_id_names[NETDATA_KEY_PUBLISH_PROCESS_READ], - process_dimension_names[NETDATA_KEY_PUBLISH_PROCESS_READ], - ebpf_algorithms[algorithm]); - printf("DIMENSION %s %s %s 1 1\n", - process_id_names[NETDATA_KEY_PUBLISH_PROCESS_WRITE], - process_dimension_names[NETDATA_KEY_PUBLISH_PROCESS_WRITE], - ebpf_algorithms[algorithm]); + ebpf_cgroup_target_t *ect ; + int pid_fd = process_maps[NETDATA_PROCESS_PID_TABLE].map_fd; + + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + ebpf_process_stat_t *out = &pids->ps; + if (global_process_stats[pid]) { + ebpf_process_stat_t *in = global_process_stats[pid]; + + memcpy(out, in, sizeof(ebpf_process_stat_t)); + } else { + if (bpf_map_lookup_elem(pid_fd, &pid, out)) { + memset(out, 0, sizeof(ebpf_process_stat_t)); + } + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); } +/***************************************************************** + * + * FUNCTIONS TO CREATE CHARTS + * + *****************************************************************/ + /** * Create process status chart * @@ -500,11 +362,12 @@ static void ebpf_create_io_chart(char *family, char *name, char *axis, char *web * @param axis the axis label * @param web the group name used to attach the chart on dashboard * @param order the order number of the specified chart + * @param update_every value to overwrite the update frequency set by the server. */ static void ebpf_process_status_chart(char *family, char *name, char *axis, - char *web, char *algorithm, int order) + char *web, char *algorithm, int order, int update_every) { - printf("CHART %s.%s '' 'Process not closed' '%s' '%s' '' line %d %d ''\n", + printf("CHART %s.%s '' 'Process not closed' '%s' '%s' '' line %d %d '' 'ebpf.plugin' 'process'\n", family, name, axis, @@ -525,119 +388,49 @@ static void ebpf_process_status_chart(char *family, char *name, char *axis, */ static void ebpf_create_global_charts(ebpf_module_t *em) { - ebpf_create_chart(NETDATA_EBPF_FAMILY, - NETDATA_FILE_OPEN_CLOSE_COUNT, - "Open and close calls", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_FILE_GROUP, - NULL, - NETDATA_EBPF_CHART_TYPE_LINE, - 21000, - ebpf_create_global_dimension, - process_publish_aggregated, - 2); - - if (em->mode < MODE_ENTRY) { - ebpf_create_chart(NETDATA_EBPF_FAMILY, - NETDATA_FILE_OPEN_ERR_COUNT, - "Open fails", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_FILE_GROUP, - NULL, - NETDATA_EBPF_CHART_TYPE_LINE, - 21001, - ebpf_create_global_dimension, - process_publish_aggregated, - 2); - } - - ebpf_create_chart(NETDATA_EBPF_FAMILY, - NETDATA_VFS_FILE_CLEAN_COUNT, - "Remove files", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NULL, - NETDATA_EBPF_CHART_TYPE_LINE, - 21002, - ebpf_create_global_dimension, - &process_publish_aggregated[NETDATA_DEL_START], - 1); - - ebpf_create_chart(NETDATA_EBPF_FAMILY, - NETDATA_VFS_FILE_IO_COUNT, - "Calls to IO", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NULL, - NETDATA_EBPF_CHART_TYPE_LINE, - 21003, - ebpf_create_global_dimension, - &process_publish_aggregated[NETDATA_IN_START_BYTE], - 2); - - ebpf_create_io_chart(NETDATA_EBPF_FAMILY, - NETDATA_VFS_IO_FILE_BYTES, EBPF_COMMON_DIMENSION_BYTES, - NETDATA_VFS_GROUP, - 21004, - NETDATA_EBPF_ABSOLUTE_IDX); - - if (em->mode < MODE_ENTRY) { - ebpf_create_chart(NETDATA_EBPF_FAMILY, - NETDATA_VFS_FILE_ERR_COUNT, - "Fails to write or read", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_VFS_GROUP, - NULL, - NETDATA_EBPF_CHART_TYPE_LINE, - 21005, - ebpf_create_global_dimension, - &process_publish_aggregated[2], - NETDATA_VFS_ERRORS); - } - - ebpf_create_chart(NETDATA_EBPF_FAMILY, + ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_PROCESS_SYSCALL, "Start process", EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NULL, NETDATA_EBPF_CHART_TYPE_LINE, - 21006, + 21002, ebpf_create_global_dimension, - &process_publish_aggregated[NETDATA_PROCESS_START], - 2); + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); - ebpf_create_chart(NETDATA_EBPF_FAMILY, + ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_EXIT_SYSCALL, "Exit process", EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NULL, NETDATA_EBPF_CHART_TYPE_LINE, - 21007, + 21003, ebpf_create_global_dimension, - &process_publish_aggregated[NETDATA_EXIT_START], - 2); + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); - ebpf_process_status_chart(NETDATA_EBPF_FAMILY, + ebpf_process_status_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_PROCESS_STATUS_NAME, EBPF_COMMON_DIMENSION_DIFFERENCE, NETDATA_PROCESS_GROUP, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - 21008); + 21004, em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_create_chart(NETDATA_EBPF_FAMILY, + ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_PROCESS_ERROR_NAME, "Fails to create process", EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NULL, NETDATA_EBPF_CHART_TYPE_LINE, - 21009, + 21005, ebpf_create_global_dimension, - &process_publish_aggregated[NETDATA_PROCESS_START], - 2); + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); } } @@ -652,137 +445,53 @@ static void ebpf_create_global_charts(ebpf_module_t *em) void ebpf_process_create_apps_charts(struct ebpf_module *em, void *ptr) { struct target *root = ptr; - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_OPEN, - "Number of open files", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_FILE_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20061, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); - - if (em->mode < MODE_ENTRY) { - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, - "Fails to open files", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_FILE_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20062, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); - } - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_CLOSED, - "Files closed", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_FILE_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20063, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); - - if (em->mode < MODE_ENTRY) { - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, - "Fails to close files", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_FILE_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20064, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); - } - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_DELETED, - "Files deleted", + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_PROCESS, + "Process started", EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_VFS_GROUP, + NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, 20065, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, - "Write to disk", + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_THREAD, + "Threads started", EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_VFS_GROUP, + NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, 20066, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - apps_groups_root_target); - - if (em->mode < MODE_ENTRY) { - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, - "Fails to write", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20067, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); - } - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_CALLS, - "Read from disk", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20068, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); - - if (em->mode < MODE_ENTRY) { - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, - "Fails to read", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20069, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); - } - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, - "Bytes written on disk", EBPF_COMMON_DIMENSION_BYTES, - NETDATA_APPS_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20070, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_BYTES, - "Bytes read from disk", EBPF_COMMON_DIMENSION_BYTES, - NETDATA_APPS_VFS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20071, - ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); - - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_PROCESS, - "Process started", - EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_PROCESS_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - 20072, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); - ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_THREAD, - "Threads started", + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_EXIT, + "Tasks starts exit process.", EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_PROCESS_GROUP, + NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - 20073, + 20067, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_CLOSE, "Tasks closed", EBPF_COMMON_DIMENSION_CALL, - NETDATA_APPS_PROCESS_GROUP, + NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - 20074, + 20068, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_ERROR, + "Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20069, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root, + em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + } } /** @@ -790,10 +499,9 @@ void ebpf_process_create_apps_charts(struct ebpf_module *em, void *ptr) * * Call ebpf_create_chart to create the charts on apps submenu. * - * @param em a pointer to the structure with the default values. * @param root a pointer for the targets. */ -static void ebpf_create_apps_charts(ebpf_module_t *em, struct target *root) +static void ebpf_create_apps_charts(struct target *root) { struct target *w; int newly_added = 0; @@ -831,7 +539,7 @@ static void ebpf_create_apps_charts(ebpf_module_t *em, struct target *root) for (counter = 0; ebpf_modules[counter].thread_name; counter++) { ebpf_module_t *current = &ebpf_modules[counter]; if (current->enabled && current->apps_charts && current->apps_routine) - current->apps_routine(em, root); + current->apps_routine(current, root); } } @@ -842,47 +550,440 @@ static void ebpf_create_apps_charts(ebpf_module_t *em, struct target *root) *****************************************************************/ /** + * Cgroup update shm + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data from shared memory. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_cgroup_update_shm(void *ptr) +{ + UNUSED(ptr); + heartbeat_t hb; + heartbeat_init(&hb); + + usec_t step = 30 * USEC_PER_SEC; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + (void)dt; + + if (close_ebpf_plugin) + break; + + if (!shm_ebpf_cgroup.header) + ebpf_map_cgroup_shared_memory(); + + ebpf_parse_cgroup_shm_data(); + } + + return NULL; +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param ps structure used to store data + * @param pids input data + */ +static void ebpf_process_sum_cgroup_pids(ebpf_process_stat_t *ps, struct pid_on_target2 *pids) +{ + ebpf_process_stat_t accumulator; + memset(&accumulator, 0, sizeof(accumulator)); + + while (pids) { + ebpf_process_stat_t *ps = &pids->ps; + + accumulator.exit_call += ps->exit_call; + accumulator.release_call += ps->release_call; + accumulator.create_process += ps->create_process; + accumulator.create_thread += ps->create_thread; + + accumulator.task_err += ps->task_err; + + pids = pids->next; + } + + ps->exit_call = (accumulator.exit_call >= ps->exit_call) ? accumulator.exit_call : ps->exit_call; + ps->release_call = (accumulator.release_call >= ps->release_call) ? accumulator.release_call : ps->release_call; + ps->create_process = (accumulator.create_process >= ps->create_process) ? accumulator.create_process : ps->create_process; + ps->create_thread = (accumulator.create_thread >= ps->create_thread) ? accumulator.create_thread : ps->create_thread; + + ps->task_err = (accumulator.task_err >= ps->task_err) ? accumulator.task_err : ps->task_err; +} + +/* + * Send Specific Process data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param values structure with values that will be sent to netdata + * @param em the structure with thread information + */ +static void ebpf_send_specific_process_data(char *type, ebpf_process_stat_t *values, ebpf_module_t *em) +{ + write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_PROCESS); + write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK].name, + (long long) values->create_process); + write_end_chart(); + + write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_THREAD); + write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_CLONE].name, + (long long) values->create_thread); + write_end_chart(); + + write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_EXIT); + write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT].name, + (long long) values->release_call); + write_end_chart(); + + write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_CLOSE); + write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].name, + (long long) values->release_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_ERROR); + write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT].name, + (long long) values->task_err); + write_end_chart(); + } +} + +/** + * Create specific process charts + * + * Create charts for cgroup/application + * + * @param type the chart type. + * @param em the structure with thread information + */ +static void ebpf_create_specific_process_charts(char *type, ebpf_module_t *em) +{ + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, "Process started", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP, + NETDATA_CGROUP_PROCESS_CREATE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5000, + ebpf_create_global_dimension, &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_THREAD, "Threads started", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP, + NETDATA_CGROUP_THREAD_CREATE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5001, + ebpf_create_global_dimension, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_CLONE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_EXIT, "Tasks starts exit process.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP, + NETDATA_CGROUP_PROCESS_EXIT_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5002, + ebpf_create_global_dimension, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_CLOSE, "Tasks closed", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP, + NETDATA_CGROUP_PROCESS_CLOSE_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5003, + ebpf_create_global_dimension, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_TASK_ERROR, "Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_CGROUP_GROUP, + NETDATA_CGROUP_PROCESS_ERROR_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5004, + ebpf_create_global_dimension, + &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); + } +} + +/** + * Obsolete specific process charts + * + * Obsolete charts for cgroup/application + * + * @param type the chart type. + * @param em the structure with thread information + */ +static void ebpf_obsolete_specific_process_charts(char *type, ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, "Process started", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_PROCESS_CREATE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5000, + em->update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_THREAD, "Threads started", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_THREAD_CREATE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5001, + em->update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_EXIT,"Tasks starts exit process.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_PROCESS_EXIT_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5002, + em->update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_CLOSE,"Tasks closed", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_PROCESS_CLOSE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5003, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_ERROR,"Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_PROCESS_ERROR_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5004, + em->update_every); + } +} + +/** + * Create Systemd process Charts + * + * Create charts when systemd is enabled + * + * @param em the structure with thread information + **/ +static void ebpf_create_systemd_process_charts(ebpf_module_t *em) +{ + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_PROCESS, "Process started", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20065, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_PROCESS_CREATE_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_THREAD, "Threads started", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20066, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_THREAD_CREATE_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_CLOSE, "Tasks starts exit process.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20067, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_PROCESS_EXIT_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_EXIT, "Tasks closed", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20068, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_PROCESS_CLOSE_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_TASK_ERROR, "Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20069, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_PROCESS_ERROR_CONTEXT, + NETDATA_EBPF_MODULE_NAME_PROCESS, em->update_every); + } +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + * + * @param em the structure with thread information + * + * @return It returns the status for chart creation, if it is necessary to remove a specific dimension, zero is returned + * otherwise function returns 1 to avoid chart recreation + */ +static int ebpf_send_systemd_process_charts(ebpf_module_t *em) +{ + int ret = 1; + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_ps.create_process); + } else + ret = 0; + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_ps.create_thread); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_EXIT); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_ps.exit_call); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_CLOSE); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_ps.release_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_ps.task_err); + } + } + write_end_chart(); + } + + return ret; +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the structure with thread information +*/ +static void ebpf_process_send_cgroup_data(ebpf_module_t *em) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_process_sum_cgroup_pids(&ect->publish_systemd_ps, ect->pids); + } + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + + if (has_systemd) { + static int systemd_chart = 0; + if (!systemd_chart) { + ebpf_create_systemd_process_charts(em); + systemd_chart = 1; + } + + systemd_chart = ebpf_send_systemd_process_charts(em); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART) && ect->updated) { + ebpf_create_specific_process_charts(ect->name, em); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART) { + if (ect->updated) { + ebpf_send_specific_process_data(ect->name, &ect->publish_systemd_ps, em); + } else { + ebpf_obsolete_specific_process_charts(ect->name, em); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_PROCESS_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Update Cgroup algorithm + * + * Change algorithm from absolute to incremental + */ +void ebpf_process_update_cgroup_algorithm() +{ + int i; + for (i = 0; i < NETDATA_KEY_PUBLISH_PROCESS_END; i++) { + netdata_publish_syscall_t *ptr = &process_publish_aggregated[i]; + freez(ptr->algorithm); + ptr->algorithm = strdupz(ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + } +} + +/** * Main loop for this collector. * - * @param step the number of microseconds used with heart beat * @param em the structure with thread information */ -static void process_collector(usec_t step, ebpf_module_t *em) +static void process_collector(ebpf_module_t *em) { + cgroup_thread.thread = mallocz(sizeof(netdata_thread_t)); + cgroup_thread.start_routine = ebpf_cgroup_update_shm; + + netdata_thread_create(cgroup_thread.thread, cgroup_thread.name, NETDATA_THREAD_OPTION_JOINABLE, + ebpf_cgroup_update_shm, em); + heartbeat_t hb; heartbeat_init(&hb); int publish_global = em->global_charts; int apps_enabled = em->apps_charts; - int pid_fd = map_fd[0]; + int cgroups = em->cgroup_charts; + int thread_enabled = em->enabled; + if (cgroups) + ebpf_process_update_cgroup_algorithm(); + + int pid_fd = process_maps[NETDATA_PROCESS_PID_TABLE].map_fd; + int update_every = em->update_every; + int counter = update_every - 1; while (!close_ebpf_plugin) { - usec_t dt = heartbeat_next(&hb, step); + usec_t dt = heartbeat_next(&hb, USEC_PER_SEC); (void)dt; - read_hash_global_tables(); - pthread_mutex_lock(&collect_data_mutex); cleanup_exited_pids(); collect_data_for_all_processes(pid_fd); - ebpf_create_apps_charts(em, apps_groups_root_target); + ebpf_create_apps_charts(apps_groups_root_target); pthread_cond_broadcast(&collect_data_cond_var); pthread_mutex_unlock(&collect_data_mutex); - int publish_apps = 0; - if (apps_enabled && all_pids_count > 0) { - publish_apps = 1; - ebpf_process_update_apps_data(); - } + if (++counter == update_every) { + counter = 0; - pthread_mutex_lock(&lock); - if (publish_global) { - ebpf_process_send_data(em); - } + read_hash_global_tables(); + + int publish_apps = 0; + if (all_pids_count > 0) { + if (apps_enabled) { + publish_apps = 1; + ebpf_process_update_apps_data(); + } - if (publish_apps) { - ebpf_process_send_apps_data(em, apps_groups_root_target); + if (cgroups) { + ebpf_update_process_cgroup(); + } + } + + if (thread_enabled) { + pthread_mutex_lock(&lock); + if (publish_global) { + ebpf_process_send_data(em); + } + + if (publish_apps) { + ebpf_process_send_apps_data(apps_groups_root_target, em); + } + + if (cgroups) { + ebpf_process_send_cgroup_data(em); + } + pthread_mutex_unlock(&lock); + } } + pthread_mutex_unlock(&lock); fflush(stdout); @@ -896,7 +997,7 @@ static void process_collector(usec_t step, ebpf_module_t *em) *****************************************************************/ void clean_global_memory() { - int pid_fd = map_fd[0]; + int pid_fd = process_maps[NETDATA_PROCESS_PID_TABLE].map_fd; struct pid_stat *pids = root_of_pids; while (pids) { uint32_t pid = pids->pid; @@ -910,6 +1011,30 @@ void clean_global_memory() { } /** + * Process disable tracepoints + * + * Disable tracepoints when the plugin was responsible to enable it. + */ +static void ebpf_process_disable_tracepoints() +{ + char *default_message = { "Cannot disable the tracepoint" }; + if (!was_sched_process_exit_enabled) { + if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exit)) + error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exit); + } + + if (!was_sched_process_exec_enabled) { + if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exec)) + error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exec); + } + + if (!was_sched_process_fork_enabled) { + if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_fork)) + error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_fork); + } +} + +/** * Clean up the main thread. * * @param ptr thread data. @@ -920,7 +1045,7 @@ static void ebpf_process_cleanup(void *ptr) heartbeat_t hb; heartbeat_init(&hb); - uint32_t tick = 50*USEC_PER_MS; + uint32_t tick = 1 * USEC_PER_SEC; while (!finalized_threads) { usec_t dt = heartbeat_next(&hb, tick); UNUSED(dt); @@ -933,15 +1058,19 @@ static void ebpf_process_cleanup(void *ptr) freez(global_process_stats); freez(current_apps_data); - freez(process_data.map_fd); + ebpf_process_disable_tracepoints(); - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); } - bpf_object__close(objects); + + freez(cgroup_thread.thread); } /***************************************************************** @@ -970,7 +1099,7 @@ static void ebpf_process_allocate_global_vectors(size_t length) static void change_syscalls() { static char *lfork = { "do_fork" }; - process_id_names[7] = lfork; + process_id_names[NETDATA_KEY_PUBLISH_PROCESS_FORK] = lfork; } /** @@ -979,9 +1108,7 @@ static void change_syscalls() */ static void set_local_pointers() { - map_fd = process_data.map_fd; - - if (process_data.isrh >= NETDATA_MINIMUM_RH_VERSION && process_data.isrh < NETDATA_RH_8) + if (isrh >= NETDATA_MINIMUM_RH_VERSION && isrh < NETDATA_RH_8) change_syscalls(); } @@ -1020,6 +1147,45 @@ static void wait_for_all_threads_die() } /** + * Enable tracepoints + * + * Enable necessary tracepoints for thread. + * + * @return It returns 0 on success and -1 otherwise + */ +static int ebpf_process_enable_tracepoints() +{ + int test = ebpf_is_tracepoint_enabled(tracepoint_sched_type, tracepoint_sched_process_exit); + if (test == -1) + return -1; + else if (!test) { + if (ebpf_enable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exit)) + return -1; + } + was_sched_process_exit_enabled = test; + + test = ebpf_is_tracepoint_enabled(tracepoint_sched_type, tracepoint_sched_process_exec); + if (test == -1) + return -1; + else if (!test) { + if (ebpf_enable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exec)) + return -1; + } + was_sched_process_exec_enabled = test; + + test = ebpf_is_tracepoint_enabled(tracepoint_sched_type, tracepoint_sched_process_fork); + if (test == -1) + return -1; + else if (!test) { + if (ebpf_enable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_fork)) + return -1; + } + was_sched_process_fork_enabled = test; + + return 0; +} + +/** * Process thread * * Thread used to generate process charts. @@ -1034,31 +1200,26 @@ void *ebpf_process_thread(void *ptr) ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = process_maps; + + if (ebpf_process_enable_tracepoints()) { + em->enabled = em->global_charts = em->apps_charts = em->cgroup_charts = CONFIG_BOOLEAN_NO; + } process_enabled = em->enabled; - fill_ebpf_data(&process_data); pthread_mutex_lock(&lock); ebpf_process_allocate_global_vectors(NETDATA_KEY_PUBLISH_PROCESS_END); - if (ebpf_update_kernel(&process_data)) { - pthread_mutex_unlock(&lock); - goto endprocess; - } - - ebpf_update_module(em, &process_config, NETDATA_PROCESS_CONFIG_FILE); ebpf_update_pid_table(&process_maps[0], em); set_local_pointers(); - probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects, process_data.map_fd); + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); if (!probe_links) { pthread_mutex_unlock(&lock); goto endprocess; } int algorithms[NETDATA_KEY_PUBLISH_PROCESS_END] = { - NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX,NETDATA_EBPF_INCREMENTAL_IDX, //open, close, unlink - NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, - NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX + NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX }; ebpf_global_labels( @@ -1071,7 +1232,7 @@ void *ebpf_process_thread(void *ptr) pthread_mutex_unlock(&lock); - process_collector((usec_t)(em->update_time * USEC_PER_SEC), em); + process_collector(em); endprocess: wait_for_all_threads_die(); diff --git a/collectors/ebpf.plugin/ebpf_process.h b/collectors/ebpf.plugin/ebpf_process.h index a731227e..73421049 100644 --- a/collectors/ebpf.plugin/ebpf_process.h +++ b/collectors/ebpf.plugin/ebpf_process.h @@ -3,73 +3,44 @@ #ifndef NETDATA_EBPF_PROCESS_H #define NETDATA_EBPF_PROCESS_H 1 -// Groups used on Dashboard -#define NETDATA_FILE_GROUP "File" -#define NETDATA_VFS_GROUP "VFS" -#define NETDATA_PROCESS_GROUP "Process" - -// Internal constants -#define NETDATA_GLOBAL_VECTOR 24 -#define NETDATA_VFS_ERRORS 3 +// Module name +#define NETDATA_EBPF_MODULE_NAME_PROCESS "process" -// Map index -#define NETDATA_DEL_START 2 -#define NETDATA_IN_START_BYTE 3 -#define NETDATA_EXIT_START 5 -#define NETDATA_PROCESS_START 7 +// Groups used on Dashboard +#define NETDATA_PROCESS_GROUP "processes" +#define NETDATA_PROCESS_CGROUP_GROUP "processes (eBPF)" // Global chart name -#define NETDATA_FILE_OPEN_CLOSE_COUNT "file_descriptor" -#define NETDATA_FILE_OPEN_ERR_COUNT "file_error" -#define NETDATA_VFS_FILE_CLEAN_COUNT "deleted_objects" -#define NETDATA_VFS_FILE_IO_COUNT "io" -#define NETDATA_VFS_FILE_ERR_COUNT "io_error" - #define NETDATA_EXIT_SYSCALL "exit" #define NETDATA_PROCESS_SYSCALL "process_thread" #define NETDATA_PROCESS_ERROR_NAME "task_error" #define NETDATA_PROCESS_STATUS_NAME "process_status" -#define NETDATA_VFS_IO_FILE_BYTES "io_bytes" - // Charts created on Apps submenu -#define NETDATA_SYSCALL_APPS_FILE_OPEN "file_open" -#define NETDATA_SYSCALL_APPS_FILE_CLOSED "file_closed" -#define NETDATA_SYSCALL_APPS_FILE_DELETED "file_deleted" -#define NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS "vfs_write_call" -#define NETDATA_SYSCALL_APPS_VFS_READ_CALLS "vfs_read_call" -#define NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES "vfs_write_bytes" -#define NETDATA_SYSCALL_APPS_VFS_READ_BYTES "vfs_read_bytes" #define NETDATA_SYSCALL_APPS_TASK_PROCESS "process_create" #define NETDATA_SYSCALL_APPS_TASK_THREAD "thread_create" +#define NETDATA_SYSCALL_APPS_TASK_EXIT "task_exit" #define NETDATA_SYSCALL_APPS_TASK_CLOSE "task_close" - -// Charts created on Apps submenu, if and only if, the return mode is active - -#define NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR "file_open_error" -#define NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR "file_close_error" -#define NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR "vfs_write_error" -#define NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR "vfs_read_error" +#define NETDATA_SYSCALL_APPS_TASK_ERROR "task_error" // Process configuration name #define NETDATA_PROCESS_CONFIG_FILE "process.conf" -// Index from kernel -typedef enum ebpf_process_index { - NETDATA_KEY_CALLS_DO_SYS_OPEN, - NETDATA_KEY_ERROR_DO_SYS_OPEN, - - NETDATA_KEY_CALLS_VFS_WRITE, - NETDATA_KEY_ERROR_VFS_WRITE, - NETDATA_KEY_BYTES_VFS_WRITE, - - NETDATA_KEY_CALLS_VFS_READ, - NETDATA_KEY_ERROR_VFS_READ, - NETDATA_KEY_BYTES_VFS_READ, +// Contexts +#define NETDATA_CGROUP_PROCESS_CREATE_CONTEXT "cgroup.process_create" +#define NETDATA_CGROUP_THREAD_CREATE_CONTEXT "cgroup.thread_create" +#define NETDATA_CGROUP_PROCESS_CLOSE_CONTEXT "cgroup.task_close" +#define NETDATA_CGROUP_PROCESS_EXIT_CONTEXT "cgroup.task_exit" +#define NETDATA_CGROUP_PROCESS_ERROR_CONTEXT "cgroup.task_error" - NETDATA_KEY_CALLS_VFS_UNLINK, - NETDATA_KEY_ERROR_VFS_UNLINK, +#define NETDATA_SYSTEMD_PROCESS_CREATE_CONTEXT "services.process_create" +#define NETDATA_SYSTEMD_THREAD_CREATE_CONTEXT "services.thread_create" +#define NETDATA_SYSTEMD_PROCESS_CLOSE_CONTEXT "services.task_close" +#define NETDATA_SYSTEMD_PROCESS_EXIT_CONTEXT "services.task_exit" +#define NETDATA_SYSTEMD_PROCESS_ERROR_CONTEXT "services.task_error" +// Index from kernel +typedef enum ebpf_process_index { NETDATA_KEY_CALLS_DO_EXIT, NETDATA_KEY_CALLS_RELEASE_TASK, @@ -77,20 +48,10 @@ typedef enum ebpf_process_index { NETDATA_KEY_CALLS_DO_FORK, NETDATA_KEY_ERROR_DO_FORK, - NETDATA_KEY_CALLS_CLOSE_FD, - NETDATA_KEY_ERROR_CLOSE_FD, - NETDATA_KEY_CALLS_SYS_CLONE, NETDATA_KEY_ERROR_SYS_CLONE, - NETDATA_KEY_CALLS_VFS_WRITEV, - NETDATA_KEY_ERROR_VFS_WRITEV, - NETDATA_KEY_BYTES_VFS_WRITEV, - - NETDATA_KEY_CALLS_VFS_READV, - NETDATA_KEY_ERROR_VFS_READV, - NETDATA_KEY_BYTES_VFS_READV - + NETDATA_KEY_END_VECTOR } ebpf_process_index_t; // This enum acts as an index for publish vector. @@ -99,11 +60,6 @@ typedef enum ebpf_process_index { // values (the three initial positions) and absolute values // (the remaining charts). typedef enum netdata_publish_process { - NETDATA_KEY_PUBLISH_PROCESS_OPEN, - NETDATA_KEY_PUBLISH_PROCESS_CLOSE, - NETDATA_KEY_PUBLISH_PROCESS_UNLINK, - NETDATA_KEY_PUBLISH_PROCESS_READ, - NETDATA_KEY_PUBLISH_PROCESS_WRITE, NETDATA_KEY_PUBLISH_PROCESS_EXIT, NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK, NETDATA_KEY_PUBLISH_PROCESS_FORK, @@ -114,28 +70,21 @@ typedef enum netdata_publish_process { typedef struct ebpf_process_publish_apps { // Number of calls during the last read - uint64_t call_sys_open; - uint64_t call_close_fd; - uint64_t call_vfs_unlink; - uint64_t call_read; - uint64_t call_write; uint64_t call_do_exit; uint64_t call_release_task; - uint64_t call_do_fork; - uint64_t call_sys_clone; + uint64_t create_process; + uint64_t create_thread; // Number of errors during the last read - uint64_t ecall_sys_open; - uint64_t ecall_close_fd; - uint64_t ecall_vfs_unlink; - uint64_t ecall_read; - uint64_t ecall_write; - uint64_t ecall_do_fork; - uint64_t ecall_sys_clone; - - // Number of bytes during the last read - uint64_t bytes_written; - uint64_t bytes_read; + uint64_t task_err; } ebpf_process_publish_apps_t; +enum ebpf_process_tables { + NETDATA_PROCESS_PID_TABLE, + NETDATA_PROCESS_GLOBAL_TABLE, + NETDATA_PROCESS_CTRL_TABLE +}; + +extern struct config process_config; + #endif /* NETDATA_EBPF_PROCESS_H */ diff --git a/collectors/ebpf.plugin/ebpf_shm.c b/collectors/ebpf.plugin/ebpf_shm.c new file mode 100644 index 00000000..156ae9aa --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_shm.c @@ -0,0 +1,855 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_shm.h" + +static char *shm_dimension_name[NETDATA_SHM_END] = { "get", "at", "dt", "ctl" }; +static netdata_syscall_stat_t shm_aggregated_data[NETDATA_SHM_END]; +static netdata_publish_syscall_t shm_publish_aggregated[NETDATA_SHM_END]; + +static int read_thread_closed = 1; +netdata_publish_shm_t *shm_vector = NULL; + +static netdata_idx_t shm_hash_values[NETDATA_SHM_END]; +static netdata_idx_t *shm_values = NULL; + +netdata_publish_shm_t **shm_pid = NULL; + +struct config shm_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static ebpf_local_maps_t shm_maps[] = {{.name = "tbl_pid_shm", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "shm_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_shm", .internal_input = NETDATA_SHM_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0}}; + +static struct bpf_link **probe_links = NULL; +static struct bpf_object *objects = NULL; + +struct netdata_static_thread shm_threads = {"SHM KERNEL", NULL, NULL, 1, + NULL, NULL, NULL}; + +/***************************************************************** + * FUNCTIONS TO CLOSE THE THREAD + *****************************************************************/ + +/** + * Clean shm structure + */ +void clean_shm_pid_structures() { + struct pid_stat *pids = root_of_pids; + while (pids) { + freez(shm_pid[pids->pid]); + + pids = pids->next; + } +} + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_shm_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + return; + } + + heartbeat_t hb; + heartbeat_init(&hb); + uint32_t tick = 2 * USEC_PER_MS; + while (!read_thread_closed) { + usec_t dt = heartbeat_next(&hb, tick); + UNUSED(dt); + } + + ebpf_cleanup_publish_syscall(shm_publish_aggregated); + + freez(shm_vector); + freez(shm_values); + + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); + } +} + +/***************************************************************** + * COLLECTOR THREAD + *****************************************************************/ + +/** + * Apps Accumulator + * + * Sum all values read from kernel and store in the first address. + * + * @param out the vector with read values. + */ +static void shm_apps_accumulator(netdata_publish_shm_t *out) +{ + int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + netdata_publish_shm_t *total = &out[0]; + for (i = 1; i < end; i++) { + netdata_publish_shm_t *w = &out[i]; + total->get += w->get; + total->at += w->at; + total->dt += w->dt; + total->ctl += w->ctl; + } +} + +/** + * Fill PID + * + * Fill PID structures + * + * @param current_pid pid that we are collecting data + * @param out values read from hash tables; + */ +static void shm_fill_pid(uint32_t current_pid, netdata_publish_shm_t *publish) +{ + netdata_publish_shm_t *curr = shm_pid[current_pid]; + if (!curr) { + curr = callocz(1, sizeof(netdata_publish_shm_t)); + shm_pid[current_pid] = curr; + } + + memcpy(curr, publish, sizeof(netdata_publish_shm_t)); +} + +/** + * Update cgroup + * + * Update cgroup data based in + */ +static void ebpf_update_shm_cgroup() +{ + netdata_publish_shm_t *cv = shm_vector; + int fd = shm_maps[NETDATA_PID_SHM_TABLE].map_fd; + size_t length = sizeof(netdata_publish_shm_t) * ebpf_nprocs; + ebpf_cgroup_target_t *ect; + + memset(cv, 0, length); + + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + netdata_publish_shm_t *out = &pids->shm; + if (likely(shm_pid) && shm_pid[pid]) { + netdata_publish_shm_t *in = shm_pid[pid]; + + memcpy(out, in, sizeof(netdata_publish_shm_t)); + } else { + if (!bpf_map_lookup_elem(fd, &pid, cv)) { + shm_apps_accumulator(cv); + + memcpy(out, cv, sizeof(netdata_publish_shm_t)); + + // now that we've consumed the value, zero it out in the map. + memset(cv, 0, length); + bpf_map_update_elem(fd, &pid, cv, BPF_EXIST); + } + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Read APPS table + * + * Read the apps table and store data inside the structure. + */ +static void read_apps_table() +{ + netdata_publish_shm_t *cv = shm_vector; + uint32_t key; + struct pid_stat *pids = root_of_pids; + int fd = shm_maps[NETDATA_PID_SHM_TABLE].map_fd; + size_t length = sizeof(netdata_publish_shm_t)*ebpf_nprocs; + while (pids) { + key = pids->pid; + + if (bpf_map_lookup_elem(fd, &key, cv)) { + pids = pids->next; + continue; + } + + shm_apps_accumulator(cv); + + shm_fill_pid(key, cv); + + // now that we've consumed the value, zero it out in the map. + memset(cv, 0, length); + bpf_map_update_elem(fd, &key, cv, BPF_EXIST); + + pids = pids->next; + } +} + +/** +* Send global charts to netdata agent. +*/ +static void shm_send_global() +{ + write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_SHM_GLOBAL_CHART); + write_chart_dimension( + shm_publish_aggregated[NETDATA_KEY_SHMGET_CALL].dimension, + (long long) shm_hash_values[NETDATA_KEY_SHMGET_CALL] + ); + write_chart_dimension( + shm_publish_aggregated[NETDATA_KEY_SHMAT_CALL].dimension, + (long long) shm_hash_values[NETDATA_KEY_SHMAT_CALL] + ); + write_chart_dimension( + shm_publish_aggregated[NETDATA_KEY_SHMDT_CALL].dimension, + (long long) shm_hash_values[NETDATA_KEY_SHMDT_CALL] + ); + write_chart_dimension( + shm_publish_aggregated[NETDATA_KEY_SHMCTL_CALL].dimension, + (long long) shm_hash_values[NETDATA_KEY_SHMCTL_CALL] + ); + write_end_chart(); +} + +/** + * Read global counter + * + * Read the table with number of calls for all functions + */ +static void read_global_table() +{ + netdata_idx_t *stored = shm_values; + netdata_idx_t *val = shm_hash_values; + int fd = shm_maps[NETDATA_SHM_GLOBAL_TABLE].map_fd; + + uint32_t i, end = NETDATA_SHM_END; + for (i = NETDATA_KEY_SHMGET_CALL; i < end; i++) { + if (!bpf_map_lookup_elem(fd, &i, stored)) { + int j; + int last = ebpf_nprocs; + netdata_idx_t total = 0; + for (j = 0; j < last; j++) + total += stored[j]; + + val[i] = total; + } + } +} + +/** + * Shared memory reader thread. + * + * @param ptr It is a NULL value for this thread. + * @return It always returns NULL. + */ +void *ebpf_shm_read_hash(void *ptr) +{ + read_thread_closed = 0; + + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + usec_t step = NETDATA_SHM_SLEEP_MS * em->update_every; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + (void)dt; + + read_global_table(); + } + + read_thread_closed = 1; + return NULL; +} + +/** + * Sum values for all targets. + */ +static void ebpf_shm_sum_pids(netdata_publish_shm_t *shm, struct pid_on_target *root) +{ + while (root) { + int32_t pid = root->pid; + netdata_publish_shm_t *w = shm_pid[pid]; + if (w) { + shm->get += w->get; + shm->at += w->at; + shm->dt += w->dt; + shm->ctl += w->ctl; + + // reset for next collection. + w->get = 0; + w->at = 0; + w->dt = 0; + w->ctl = 0; + } + root = root->next; + } +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param root the target list. +*/ +void ebpf_shm_send_apps_data(struct target *root) +{ + struct target *w; + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + ebpf_shm_sum_pids(&w->shm, w->root_pid); + } + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMGET_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, (long long) w->shm.get); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMAT_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, (long long) w->shm.at); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMDT_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, (long long) w->shm.dt); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMCTL_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, (long long) w->shm.ctl); + } + } + write_end_chart(); +} + +/** + * Sum values for all targets. + */ +static void ebpf_shm_sum_cgroup_pids(netdata_publish_shm_t *shm, struct pid_on_target2 *root) +{ + netdata_publish_shm_t shmv; + memset(&shmv, 0, sizeof(shmv)); + while (root) { + netdata_publish_shm_t *w = &root->shm; + shmv.get += w->get; + shmv.at += w->at; + shmv.dt += w->dt; + shmv.ctl += w->ctl; + + root = root->next; + } + + memcpy(shm, &shmv, sizeof(shmv)); +} + +/** + * Create specific shared memory charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_specific_shm_charts(char *type, int update_every) +{ + ebpf_create_chart(type, NETDATA_SHMGET_CHART, + "Calls to syscall <code>shmget(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_CGROUP_SHM_GET_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5800, + ebpf_create_global_dimension, + &shm_publish_aggregated[NETDATA_KEY_SHMGET_CALL], + 1, + update_every, + NETDATA_EBPF_MODULE_NAME_SHM); + + ebpf_create_chart(type, NETDATA_SHMAT_CHART, + "Calls to syscall <code>shmat(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_CGROUP_SHM_AT_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5801, + ebpf_create_global_dimension, + &shm_publish_aggregated[NETDATA_KEY_SHMAT_CALL], + 1, + update_every, + NETDATA_EBPF_MODULE_NAME_SHM); + + ebpf_create_chart(type, NETDATA_SHMDT_CHART, + "Calls to syscall <code>shmdt(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_CGROUP_SHM_DT_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5802, + ebpf_create_global_dimension, + &shm_publish_aggregated[NETDATA_KEY_SHMDT_CALL], + 1, + update_every, + NETDATA_EBPF_MODULE_NAME_SHM); + + ebpf_create_chart(type, NETDATA_SHMCTL_CHART, + "Calls to syscall <code>shmctl(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_CGROUP_SHM_CTL_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5803, + ebpf_create_global_dimension, + &shm_publish_aggregated[NETDATA_KEY_SHMCTL_CALL], + 1, + update_every, + NETDATA_EBPF_MODULE_NAME_SHM); +} + +/** + * Obsolete specific shared memory charts + * + * Obsolete charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_specific_shm_charts(char *type, int update_every) +{ + ebpf_write_chart_obsolete(type, NETDATA_SHMGET_CHART, + "Calls to syscall <code>shmget(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_GET_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5800, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SHMAT_CHART, + "Calls to syscall <code>shmat(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_AT_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5801, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SHMDT_CHART, + "Calls to syscall <code>shmdt(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_DT_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5802, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SHMCTL_CHART, + "Calls to syscall <code>shmctl(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_CTL_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5803, update_every); +} + +/** + * Create Systemd Swap Charts + * + * Create charts when systemd is enabled + * + * @param update_every value to overwrite the update frequency set by the server. + **/ +static void ebpf_create_systemd_shm_charts(int update_every) +{ + ebpf_create_charts_on_systemd(NETDATA_SHMGET_CHART, + "Calls to syscall <code>shmget(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20191, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SYSTEMD_SHM_GET_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every); + + ebpf_create_charts_on_systemd(NETDATA_SHMAT_CHART, + "Calls to syscall <code>shmat(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20192, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SYSTEMD_SHM_AT_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every); + + ebpf_create_charts_on_systemd(NETDATA_SHMDT_CHART, + "Calls to syscall <code>shmdt(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20193, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SYSTEMD_SHM_DT_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every); + + ebpf_create_charts_on_systemd(NETDATA_SHMCTL_CHART, + "Calls to syscall <code>shmctl(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20193, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SYSTEMD_SHM_CTL_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every); +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + * + * @return It returns the status for chart creation, if it is necessary to remove a specific dimension, zero is returned + * otherwise function returns 1 to avoid chart recreation + */ +static int ebpf_send_systemd_shm_charts() +{ + int ret = 1; + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMGET_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_shm.get); + } else + ret = 0; + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMAT_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_shm.at); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMDT_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_shm.dt); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMCTL_CHART); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_shm.ctl); + } + } + write_end_chart(); + + return ret; +} + +/* + * Send Specific Shared memory data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param values structure with values that will be sent to netdata + */ +static void ebpf_send_specific_shm_data(char *type, netdata_publish_shm_t *values) +{ + write_begin_chart(type, NETDATA_SHMGET_CHART); + write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMGET_CALL].name, (long long)values->get); + write_end_chart(); + + write_begin_chart(type, NETDATA_SHMAT_CHART); + write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMAT_CALL].name, (long long)values->at); + write_end_chart(); + + write_begin_chart(type, NETDATA_SHMDT_CHART); + write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMDT_CALL].name, (long long)values->dt); + write_end_chart(); + + write_begin_chart(type, NETDATA_SHMCTL_CHART); + write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMCTL_CALL].name, (long long)values->ctl); + write_end_chart(); +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param update_every value to overwrite the update frequency set by the server. +*/ +void ebpf_shm_send_cgroup_data(int update_every) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_shm_sum_cgroup_pids(&ect->publish_shm, ect->pids); + } + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + static int systemd_charts = 0; + if (!systemd_charts) { + ebpf_create_systemd_shm_charts(update_every); + systemd_charts = 1; + } + + systemd_charts = ebpf_send_systemd_shm_charts(); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_SHM_CHART) && ect->updated) { + ebpf_create_specific_shm_charts(ect->name, update_every); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_SHM_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_SHM_CHART) { + if (ect->updated) { + ebpf_send_specific_shm_data(ect->name, &ect->publish_shm); + } else { + ebpf_obsolete_specific_shm_charts(ect->name, update_every); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_SWAP_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** +* Main loop for this collector. +*/ +static void shm_collector(ebpf_module_t *em) +{ + shm_threads.thread = mallocz(sizeof(netdata_thread_t)); + shm_threads.start_routine = ebpf_shm_read_hash; + + netdata_thread_create( + shm_threads.thread, + shm_threads.name, + NETDATA_THREAD_OPTION_JOINABLE, + ebpf_shm_read_hash, + em + ); + + int apps = em->apps_charts; + int cgroups = em->cgroup_charts; + int update_every = em->update_every; + int counter = update_every - 1; + while (!close_ebpf_plugin) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + + if (++counter == update_every) { + counter = 0; + if (apps) { + read_apps_table(); + } + + if (cgroups) { + ebpf_update_shm_cgroup(); + } + + pthread_mutex_lock(&lock); + + shm_send_global(); + + if (apps) { + ebpf_shm_send_apps_data(apps_groups_root_target); + } + + if (cgroups) { + ebpf_shm_send_cgroup_data(update_every); + } + + pthread_mutex_unlock(&lock); + } + + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * INITIALIZE THREAD + *****************************************************************/ + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_shm_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + ebpf_create_charts_on_apps(NETDATA_SHMGET_CHART, + "Calls to syscall <code>shmget(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20191, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM); + + ebpf_create_charts_on_apps(NETDATA_SHMAT_CHART, + "Calls to syscall <code>shmat(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20192, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM); + + ebpf_create_charts_on_apps(NETDATA_SHMDT_CHART, + "Calls to syscall <code>shmdt(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20193, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM); + + ebpf_create_charts_on_apps(NETDATA_SHMCTL_CHART, + "Calls to syscall <code>shmctl(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20194, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM); +} + +/** + * Allocate vectors used with this thread. + * + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param apps is apps enabled? + */ +static void ebpf_shm_allocate_global_vectors(int apps) +{ + if (apps) + shm_pid = callocz((size_t)pid_max, sizeof(netdata_publish_shm_t *)); + + shm_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_publish_shm_t)); + + shm_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); + + memset(shm_hash_values, 0, sizeof(shm_hash_values)); +} + +/***************************************************************** + * MAIN THREAD + *****************************************************************/ + +/** + * Create global charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_shm_charts(int update_every) +{ + ebpf_create_chart( + NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_SHM_GLOBAL_CHART, + "Calls to shared memory system calls.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SYSTEM_IPC_SHM_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_CALLS, + ebpf_create_global_dimension, + shm_publish_aggregated, + NETDATA_SHM_END, + update_every, NETDATA_EBPF_MODULE_NAME_SHM + ); + + fflush(stdout); +} + +/** + * Shared memory thread. + * + * @param ptr a pointer to `struct ebpf_module` + * @return It always return NULL + */ +void *ebpf_shm_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_shm_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = shm_maps; + + ebpf_update_pid_table(&shm_maps[NETDATA_PID_SHM_TABLE], em); + + if (!em->enabled) { + goto endshm; + } + + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); + if (!probe_links) { + goto endshm; + } + + ebpf_shm_allocate_global_vectors(em->apps_charts); + + int algorithms[NETDATA_SHM_END] = { + NETDATA_EBPF_INCREMENTAL_IDX, + NETDATA_EBPF_INCREMENTAL_IDX, + NETDATA_EBPF_INCREMENTAL_IDX, + NETDATA_EBPF_INCREMENTAL_IDX + }; + ebpf_global_labels( + shm_aggregated_data, + shm_publish_aggregated, + shm_dimension_name, + shm_dimension_name, + algorithms, + NETDATA_SHM_END + ); + + pthread_mutex_lock(&lock); + ebpf_create_shm_charts(em->update_every); + pthread_mutex_unlock(&lock); + + shm_collector(em); + +endshm: + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_shm.h b/collectors/ebpf.plugin/ebpf_shm.h new file mode 100644 index 00000000..4e7e183a --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_shm.h @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_SHM_H +#define NETDATA_EBPF_SHM_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_SHM "shm" + +#define NETDATA_SHM_SLEEP_MS 850000ULL + +// charts +#define NETDATA_SHM_GLOBAL_CHART "shared_memory_calls" +#define NETDATA_SHMGET_CHART "shmget_call" +#define NETDATA_SHMAT_CHART "shmat_call" +#define NETDATA_SHMDT_CHART "shmdt_call" +#define NETDATA_SHMCTL_CHART "shmctl_call" + +// configuration file +#define NETDATA_DIRECTORY_SHM_CONFIG_FILE "shm.conf" + +// Contexts +#define NETDATA_CGROUP_SHM_GET_CONTEXT "cgroup.shmget" +#define NETDATA_CGROUP_SHM_AT_CONTEXT "cgroup.shmat" +#define NETDATA_CGROUP_SHM_DT_CONTEXT "cgroup.shmdt" +#define NETDATA_CGROUP_SHM_CTL_CONTEXT "cgroup.shmctl" + +#define NETDATA_SYSTEMD_SHM_GET_CONTEXT "services.shmget" +#define NETDATA_SYSTEMD_SHM_AT_CONTEXT "services.shmat" +#define NETDATA_SYSTEMD_SHM_DT_CONTEXT "services.shmdt" +#define NETDATA_SYSTEMD_SHM_CTL_CONTEXT "services.shmctl" + +typedef struct netdata_publish_shm { + uint64_t get; + uint64_t at; + uint64_t dt; + uint64_t ctl; +} netdata_publish_shm_t; + +enum shm_tables { + NETDATA_PID_SHM_TABLE, + NETDATA_SHM_CONTROLLER, + NETDATA_SHM_GLOBAL_TABLE +}; + +enum shm_counters { + NETDATA_KEY_SHMGET_CALL, + NETDATA_KEY_SHMAT_CALL, + NETDATA_KEY_SHMDT_CALL, + NETDATA_KEY_SHMCTL_CALL, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_SHM_END +}; + +extern netdata_publish_shm_t **shm_pid; + +extern void *ebpf_shm_thread(void *ptr); +extern void ebpf_shm_create_apps_charts(struct ebpf_module *em, void *ptr); +extern void clean_shm_pid_structures(); + +extern struct config shm_config; + +#endif diff --git a/collectors/ebpf.plugin/ebpf_socket.c b/collectors/ebpf.plugin/ebpf_socket.c index cbb4dded..f7710ff2 100644 --- a/collectors/ebpf.plugin/ebpf_socket.c +++ b/collectors/ebpf.plugin/ebpf_socket.c @@ -11,31 +11,49 @@ * *****************************************************************/ -static char *socket_dimension_names[NETDATA_MAX_SOCKET_VECTOR] = { "sent", "received", "close", "sent", - "received", "retransmitted" }; -static char *socket_id_names[NETDATA_MAX_SOCKET_VECTOR] = { "tcp_sendmsg", "tcp_cleanup_rbuf", "tcp_close", - "udp_sendmsg", "udp_recvmsg", "tcp_retransmit_skb" }; +static char *socket_dimension_names[NETDATA_MAX_SOCKET_VECTOR] = { "received", "sent", "close", + "received", "sent", "retransmitted" }; +static char *socket_id_names[NETDATA_MAX_SOCKET_VECTOR] = { "tcp_cleanup_rbuf", "tcp_sendmsg", "tcp_close", + "udp_recvmsg", "udp_sendmsg", "tcp_retransmit_skb" }; static ebpf_local_maps_t socket_maps[] = {{.name = "tbl_bandwidth", .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED, - .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED}, + .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_global_sock", + .internal_input = NETDATA_SOCKET_COUNTER, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_lports", + .internal_input = NETDATA_SOCKET_COUNTER, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, {.name = "tbl_conn_ipv4", .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED, - .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED}, + .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, {.name = "tbl_conn_ipv6", .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED, - .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED}, - {.name = "tbl_nv_udp_conn_stats", + .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_nv_udp", .internal_input = NETDATA_COMPILED_UDP_CONNECTIONS_ALLOWED, - .user_input = NETDATA_MAXIMUM_UDP_CONNECTIONS_ALLOWED}, + .user_input = NETDATA_MAXIMUM_UDP_CONNECTIONS_ALLOWED, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "socket_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, {.name = NULL, .internal_input = 0, .user_input = 0}}; static netdata_idx_t *socket_hash_values = NULL; static netdata_syscall_stat_t socket_aggregated_data[NETDATA_MAX_SOCKET_VECTOR]; static netdata_publish_syscall_t socket_publish_aggregated[NETDATA_MAX_SOCKET_VECTOR]; -static ebpf_data_t socket_data; - ebpf_socket_publish_apps_t **socket_bandwidth_curr = NULL; static ebpf_bandwidth_t *bandwidth_vector = NULL; @@ -50,7 +68,6 @@ netdata_socket_t *socket_values; ebpf_network_viewer_port_list_t *listen_ports = NULL; -static int *map_fd = NULL; static struct bpf_object *objects = NULL; static struct bpf_link **probe_links = NULL; @@ -277,7 +294,7 @@ static void ebpf_socket_send_nv_data(netdata_vector_plot_t *ptr) } /** - * Send data to Netdata calling auxiliar functions. + * Send data to Netdata calling auxiliary functions. * * @param em the structure with thread information */ @@ -287,32 +304,26 @@ static void ebpf_socket_send_data(ebpf_module_t *em) netdata_publish_vfs_common_t common_udp; ebpf_update_global_publish(socket_publish_aggregated, &common_tcp, &common_udp, socket_aggregated_data); - // We read bytes from function arguments, but bandiwdth is given in bits, + // We read bytes from function arguments, but bandwidth is given in bits, // so we need to multiply by 8 to convert for the final value. - write_count_chart( - NETDATA_TCP_FUNCTION_COUNT, NETDATA_EBPF_FAMILY, socket_publish_aggregated, 3); - write_io_chart( - NETDATA_TCP_FUNCTION_BITS, NETDATA_EBPF_FAMILY, socket_id_names[0], common_tcp.write*8/1000, - socket_id_names[1], common_tcp.read*8/1000); + write_count_chart(NETDATA_TCP_FUNCTION_COUNT, NETDATA_EBPF_IP_FAMILY, socket_publish_aggregated, 3); + write_io_chart(NETDATA_TCP_FUNCTION_BITS, NETDATA_EBPF_IP_FAMILY, socket_id_names[0], + common_tcp.read * 8/BITS_IN_A_KILOBIT, socket_id_names[1], + common_tcp.write * 8/BITS_IN_A_KILOBIT); if (em->mode < MODE_ENTRY) { - write_err_chart( - NETDATA_TCP_FUNCTION_ERROR, NETDATA_EBPF_FAMILY, socket_publish_aggregated, 2); - } - write_count_chart( - NETDATA_TCP_RETRANSMIT, NETDATA_EBPF_FAMILY, &socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT], - 1); - - write_count_chart( - NETDATA_UDP_FUNCTION_COUNT, NETDATA_EBPF_FAMILY, &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF], - 2); - write_io_chart( - NETDATA_UDP_FUNCTION_BITS, NETDATA_EBPF_FAMILY, - socket_id_names[3],(long long)common_udp.write*8/100, - socket_id_names[4], (long long)common_udp.read*8/1000); + write_err_chart(NETDATA_TCP_FUNCTION_ERROR, NETDATA_EBPF_IP_FAMILY, socket_publish_aggregated, 2); + } + write_count_chart(NETDATA_TCP_RETRANSMIT, NETDATA_EBPF_IP_FAMILY, + &socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT],1); + + write_count_chart(NETDATA_UDP_FUNCTION_COUNT, NETDATA_EBPF_IP_FAMILY, + &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF],2); + write_io_chart(NETDATA_UDP_FUNCTION_BITS, NETDATA_EBPF_IP_FAMILY, + socket_id_names[3], (long long)common_udp.read * 8/BITS_IN_A_KILOBIT, + socket_id_names[4], (long long)common_udp.write * 8/BITS_IN_A_KILOBIT); if (em->mode < MODE_ENTRY) { - write_err_chart( - NETDATA_UDP_FUNCTION_ERROR, NETDATA_EBPF_FAMILY, &socket_publish_aggregated[NETDATA_UDP_START], - 2); + write_err_chart(NETDATA_UDP_FUNCTION_ERROR, NETDATA_EBPF_IP_FAMILY, + &socket_publish_aggregated[NETDATA_UDP_START], 2); } } @@ -342,7 +353,7 @@ long long ebpf_socket_sum_values_for_pids(struct pid_on_target *root, size_t off } /** - * Send data to Netdata calling auxiliar functions. + * Send data to Netdata calling auxiliary functions. * * @param em the structure with thread information * @param root the target list. @@ -445,88 +456,88 @@ void ebpf_socket_send_apps_data(ebpf_module_t *em, struct target *root) */ static void ebpf_create_global_charts(ebpf_module_t *em) { - ebpf_create_chart(NETDATA_EBPF_FAMILY, + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_TCP_FUNCTION_COUNT, "Calls to internal functions", EBPF_COMMON_DIMENSION_CALL, - NETDATA_SOCKET_GROUP, + NETDATA_SOCKET_KERNEL_FUNCTIONS, NULL, NETDATA_EBPF_CHART_TYPE_LINE, 21070, ebpf_create_global_dimension, socket_publish_aggregated, - 3); + 3, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - ebpf_create_chart(NETDATA_EBPF_FAMILY, NETDATA_TCP_FUNCTION_BITS, + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_TCP_FUNCTION_BITS, "TCP bandwidth", EBPF_COMMON_DIMENSION_BITS, - NETDATA_SOCKET_GROUP, + NETDATA_SOCKET_KERNEL_FUNCTIONS, NULL, NETDATA_EBPF_CHART_TYPE_LINE, 21071, ebpf_create_global_dimension, socket_publish_aggregated, - 3); + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); if (em->mode < MODE_ENTRY) { - ebpf_create_chart(NETDATA_EBPF_FAMILY, + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_TCP_FUNCTION_ERROR, "TCP errors", EBPF_COMMON_DIMENSION_CALL, - NETDATA_SOCKET_GROUP, + NETDATA_SOCKET_KERNEL_FUNCTIONS, NULL, NETDATA_EBPF_CHART_TYPE_LINE, 21072, ebpf_create_global_dimension, socket_publish_aggregated, - 2); + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); } - ebpf_create_chart(NETDATA_EBPF_FAMILY, + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_TCP_RETRANSMIT, "Packages retransmitted", EBPF_COMMON_DIMENSION_CALL, - NETDATA_SOCKET_GROUP, + NETDATA_SOCKET_KERNEL_FUNCTIONS, NULL, NETDATA_EBPF_CHART_TYPE_LINE, 21073, ebpf_create_global_dimension, &socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT], - 1); + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - ebpf_create_chart(NETDATA_EBPF_FAMILY, + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_UDP_FUNCTION_COUNT, "UDP calls", EBPF_COMMON_DIMENSION_CALL, - NETDATA_SOCKET_GROUP, + NETDATA_SOCKET_KERNEL_FUNCTIONS, NULL, NETDATA_EBPF_CHART_TYPE_LINE, 21074, ebpf_create_global_dimension, &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF], - 2); + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - ebpf_create_chart(NETDATA_EBPF_FAMILY, NETDATA_UDP_FUNCTION_BITS, + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_UDP_FUNCTION_BITS, "UDP bandwidth", EBPF_COMMON_DIMENSION_BITS, - NETDATA_SOCKET_GROUP, + NETDATA_SOCKET_KERNEL_FUNCTIONS, NULL, NETDATA_EBPF_CHART_TYPE_LINE, 21075, ebpf_create_global_dimension, &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF], - 2); + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); if (em->mode < MODE_ENTRY) { - ebpf_create_chart(NETDATA_EBPF_FAMILY, + ebpf_create_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_UDP_FUNCTION_ERROR, "UDP errors", EBPF_COMMON_DIMENSION_CALL, - NETDATA_SOCKET_GROUP, + NETDATA_SOCKET_KERNEL_FUNCTIONS, NULL, NETDATA_EBPF_CHART_TYPE_LINE, 21076, ebpf_create_global_dimension, &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF], - 2); + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); } } @@ -540,7 +551,6 @@ static void ebpf_create_global_charts(ebpf_module_t *em) */ void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) { - UNUSED(em); struct target *root = ptr;; ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_SENT, "Bytes sent", EBPF_COMMON_DIMENSION_BITS, @@ -548,7 +558,7 @@ void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) NETDATA_EBPF_CHART_TYPE_STACKED, 20080, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_RECV, "bytes received", EBPF_COMMON_DIMENSION_BITS, @@ -556,7 +566,7 @@ void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) NETDATA_EBPF_CHART_TYPE_STACKED, 20081, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, "Calls for tcp_sendmsg", @@ -565,7 +575,7 @@ void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) NETDATA_EBPF_CHART_TYPE_STACKED, 20082, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, "Calls for tcp_cleanup_rbuf", @@ -574,7 +584,7 @@ void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) NETDATA_EBPF_CHART_TYPE_STACKED, 20083, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, "Calls for tcp_retransmit", @@ -583,7 +593,7 @@ void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) NETDATA_EBPF_CHART_TYPE_STACKED, 20084, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, "Calls for udp_sendmsg", @@ -592,7 +602,7 @@ void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) NETDATA_EBPF_CHART_TYPE_STACKED, 20085, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, "Calls for udp_recvmsg", @@ -601,7 +611,7 @@ void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) NETDATA_EBPF_CHART_TYPE_STACKED, 20086, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], - root); + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); socket_apps_created = 1; } @@ -611,15 +621,16 @@ void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) * * Create common charts. * - * @param id the chart id - * @param title the chart title - * @param units the units label - * @param family the group name used to attach the chart on dashboard - * @param order the chart order - * @param ptr the plot structure with values. + * @param id chart id + * @param title chart title + * @param units units label + * @param family group name used to attach the chart on dashboard + * @param order chart order + * @param update_every value to overwrite the update frequency set by the server. + * @param ptr plot structure with values. */ static void ebpf_socket_create_nv_chart(char *id, char *title, char *units, - char *family, int order, netdata_vector_plot_t *ptr) + char *family, int order, int update_every, netdata_vector_plot_t *ptr) { ebpf_write_chart_cmd(NETDATA_EBPF_FAMILY, id, @@ -628,7 +639,9 @@ static void ebpf_socket_create_nv_chart(char *id, char *title, char *units, family, NETDATA_EBPF_CHART_TYPE_STACKED, NULL, - order); + order, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); uint32_t i; uint32_t end = ptr->last_plot; @@ -653,10 +666,11 @@ static void ebpf_socket_create_nv_chart(char *id, char *title, char *units, * @param units the units label * @param family the group name used to attach the chart on dashboard * @param order the chart order + * @param update_every value to overwrite the update frequency set by the server. * @param ptr the plot structure with values. */ static void ebpf_socket_create_nv_retransmit(char *id, char *title, char *units, - char *family, int order, netdata_vector_plot_t *ptr) + char *family, int order, int update_every, netdata_vector_plot_t *ptr) { ebpf_write_chart_cmd(NETDATA_EBPF_FAMILY, id, @@ -665,7 +679,9 @@ static void ebpf_socket_create_nv_retransmit(char *id, char *title, char *units, family, NETDATA_EBPF_CHART_TYPE_STACKED, NULL, - order); + order, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); uint32_t i; uint32_t end = ptr->last_plot; @@ -684,8 +700,9 @@ static void ebpf_socket_create_nv_retransmit(char *id, char *title, char *units, * Recreate the charts when new sockets are created. * * @param ptr a pointer for inbound or outbound vectors. + * @param update_every value to overwrite the update frequency set by the server. */ -static void ebpf_socket_create_nv_charts(netdata_vector_plot_t *ptr) +static void ebpf_socket_create_nv_charts(netdata_vector_plot_t *ptr, int update_every) { // We do not have new sockets, so we do not need move forward if (ptr->max_plot == ptr->last_plot) @@ -698,34 +715,34 @@ static void ebpf_socket_create_nv_charts(netdata_vector_plot_t *ptr) "Outbound connections (bytes).", EBPF_COMMON_DIMENSION_BYTES, NETDATA_NETWORK_CONNECTIONS_GROUP, 21080, - ptr); + update_every, ptr); ebpf_socket_create_nv_chart(NETDATA_NV_OUTBOUND_PACKETS, "Outbound connections (packets)", EBPF_COMMON_DIMENSION_PACKETS, NETDATA_NETWORK_CONNECTIONS_GROUP, 21082, - ptr); + update_every, ptr); ebpf_socket_create_nv_retransmit(NETDATA_NV_OUTBOUND_RETRANSMIT, "Retransmitted packets", EBPF_COMMON_DIMENSION_CALL, NETDATA_NETWORK_CONNECTIONS_GROUP, 21083, - ptr); + update_every, ptr); } else { ebpf_socket_create_nv_chart(NETDATA_NV_INBOUND_BYTES, "Inbound connections (bytes)", EBPF_COMMON_DIMENSION_BYTES, NETDATA_NETWORK_CONNECTIONS_GROUP, 21084, - ptr); + update_every, ptr); ebpf_socket_create_nv_chart(NETDATA_NV_INBOUND_PACKETS, "Inbound connections (packets)", EBPF_COMMON_DIMENSION_PACKETS, NETDATA_NETWORK_CONNECTIONS_GROUP, 21085, - ptr); + update_every, ptr); } ptr->flags |= NETWORK_VIEWER_CHARTS_CREATED; @@ -1437,7 +1454,7 @@ static void read_listen_table() uint16_t key = 0; uint16_t next_key = 0; - int fd = map_fd[NETDATA_SOCKET_LISTEN_TABLE]; + int fd = socket_maps[NETDATA_SOCKET_LPORTS].map_fd; uint8_t value; while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { int test = bpf_map_lookup_elem(fd, &key, &value); @@ -1475,9 +1492,9 @@ void *ebpf_socket_read_hash(void *ptr) read_thread_closed = 0; heartbeat_t hb; heartbeat_init(&hb); - usec_t step = NETDATA_SOCKET_READ_SLEEP_MS * em->update_time; - int fd_ipv4 = map_fd[NETDATA_SOCKET_IPV4_HASH_TABLE]; - int fd_ipv6 = map_fd[NETDATA_SOCKET_IPV6_HASH_TABLE]; + usec_t step = NETDATA_SOCKET_READ_SLEEP_MS * em->update_every; + int fd_ipv4 = socket_maps[NETDATA_SOCKET_TABLE_IPV4].map_fd; + int fd_ipv6 = socket_maps[NETDATA_SOCKET_TABLE_IPV6].map_fd; int network_connection = em->optional; while (!close_ebpf_plugin) { usec_t dt = heartbeat_next(&hb, step); @@ -1504,12 +1521,12 @@ static void read_hash_global_tables() netdata_idx_t res[NETDATA_SOCKET_COUNTER]; netdata_idx_t *val = socket_hash_values; - int fd = map_fd[NETDATA_SOCKET_GLOBAL_HASH_TABLE]; + int fd = socket_maps[NETDATA_SOCKET_GLOBAL].map_fd; for (idx = 0; idx < NETDATA_SOCKET_COUNTER; idx++) { if (!bpf_map_lookup_elem(fd, &idx, val)) { uint64_t total = 0; int i; - int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + int end = ebpf_nprocs; for (i = 0; i < end; i++) total += val[i]; @@ -1586,7 +1603,7 @@ void ebpf_socket_bandwidth_accumulator(ebpf_bandwidth_t *out) */ static void ebpf_socket_update_apps_data() { - int fd = map_fd[NETDATA_SOCKET_APPS_HASH_TABLE]; + int fd = socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].map_fd; ebpf_bandwidth_t *eb = bandwidth_vector; uint32_t key; struct pid_stat *pids = root_of_pids; @@ -1606,6 +1623,475 @@ static void ebpf_socket_update_apps_data() } } +/** + * Update cgroup + * + * Update cgroup data based in + */ +static void ebpf_update_socket_cgroup() +{ + ebpf_cgroup_target_t *ect ; + + ebpf_bandwidth_t *eb = bandwidth_vector; + int fd = socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].map_fd; + + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + ebpf_bandwidth_t *out = &pids->socket; + ebpf_socket_publish_apps_t *publish = &ect->publish_socket; + if (likely(socket_bandwidth_curr) && socket_bandwidth_curr[pid]) { + ebpf_socket_publish_apps_t *in = socket_bandwidth_curr[pid]; + + publish->bytes_sent = in->bytes_sent; + publish->bytes_received = in->bytes_received; + publish->call_tcp_sent = in->call_tcp_sent; + publish->call_tcp_received = in->call_tcp_received; + publish->retransmit = in->retransmit; + publish->call_udp_sent = in->call_udp_sent; + publish->call_udp_received = in->call_udp_received; + } else { + if (!bpf_map_lookup_elem(fd, &pid, eb)) { + ebpf_socket_bandwidth_accumulator(eb); + + memcpy(out, eb, sizeof(ebpf_bandwidth_t)); + + publish->bytes_sent = out->bytes_sent; + publish->bytes_received = out->bytes_received; + publish->call_tcp_sent = out->call_tcp_sent; + publish->call_tcp_received = out->call_tcp_received; + publish->retransmit = out->retransmit; + publish->call_udp_sent = out->call_udp_sent; + publish->call_udp_received = out->call_udp_received; + } + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param fd structure used to store data + * @param pids input data + */ +static void ebpf_socket_sum_cgroup_pids(ebpf_socket_publish_apps_t *socket, struct pid_on_target2 *pids) +{ + ebpf_socket_publish_apps_t accumulator; + memset(&accumulator, 0, sizeof(accumulator)); + + while (pids) { + ebpf_bandwidth_t *w = &pids->socket; + + accumulator.bytes_received += w->bytes_received; + accumulator.bytes_sent += w->bytes_sent; + accumulator.call_tcp_received += w->call_tcp_received; + accumulator.call_tcp_sent += w->call_tcp_sent; + accumulator.retransmit += w->retransmit; + accumulator.call_udp_received += w->call_udp_received; + accumulator.call_udp_sent += w->call_udp_sent; + + pids = pids->next; + } + + socket->bytes_sent = (accumulator.bytes_sent >= socket->bytes_sent) ? accumulator.bytes_sent : socket->bytes_sent; + socket->bytes_received = (accumulator.bytes_received >= socket->bytes_received) ? accumulator.bytes_received : socket->bytes_received; + socket->call_tcp_sent = (accumulator.call_tcp_sent >= socket->call_tcp_sent) ? accumulator.call_tcp_sent : socket->call_tcp_sent; + socket->call_tcp_received = (accumulator.call_tcp_received >= socket->call_tcp_received) ? accumulator.call_tcp_received : socket->call_tcp_received; + socket->retransmit = (accumulator.retransmit >= socket->retransmit) ? accumulator.retransmit : socket->retransmit; + socket->call_udp_sent = (accumulator.call_udp_sent >= socket->call_udp_sent) ? accumulator.call_udp_sent : socket->call_udp_sent; + socket->call_udp_received = (accumulator.call_udp_received >= socket->call_udp_received) ? accumulator.call_udp_received : socket->call_udp_received; +} + +/** + * Create specific socket charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_specific_socket_charts(char *type, int update_every) +{ + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV, + "Bytes received", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_BYTES_RECV_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5300, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_SENT, + "Bytes sent", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_BYTES_SEND_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5301, + ebpf_create_global_dimension, + socket_publish_aggregated, 1, + update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, + "Calls to tcp_cleanup_rbuf.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_TCP_RECV_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5302, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, + "Calls to tcp_sendmsg.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_TCP_SEND_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5303, + ebpf_create_global_dimension, + socket_publish_aggregated, 1, + update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, + "Calls to tcp_retransmit.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_TCP_RETRANSMIT_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5304, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, + "Calls to udp_sendmsg", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_UDP_SEND_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5305, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_UDP_SENDMSG], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, + "Calls to udp_recvmsg", + EBPF_COMMON_DIMENSION_CALL, NETDATA_CGROUP_NET_GROUP, + NETDATA_CGROUP_SOCKET_UDP_RECV_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5306, + ebpf_create_global_dimension, + &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SWAP); +} + +/** + * Obsolete specific socket charts + * + * Obsolete charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_specific_socket_charts(char *type, int update_every) +{ + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_RECV, "Bytes received", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5300, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_SENT,"Bytes sent", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5301, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, "Calls to tcp_cleanup_rbuf.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5302, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, "Calls to tcp_sendmsg.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5303, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, "Calls to tcp_retransmit.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5304, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, "Calls to udp_sendmsg", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5305, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, "Calls to udp_recvmsg", + EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5306, update_every); +} + +/* + * Send Specific Swap data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param values structure with values that will be sent to netdata + */ +static void ebpf_send_specific_socket_data(char *type, ebpf_socket_publish_apps_t *values) +{ + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_SENT); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_SENDMSG].name, + (long long) values->bytes_sent); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF].name, + (long long) values->bytes_received); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_SENDMSG].name, + (long long) values->call_tcp_sent); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF].name, + (long long) values->call_tcp_received); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT].name, + (long long) values->retransmit); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_UDP_SENDMSG].name, + (long long) values->call_udp_sent); + write_end_chart(); + + write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS); + write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF].name, + (long long) values->call_udp_received); + write_end_chart(); +} + +/** + * Create Systemd Socket Charts + * + * Create charts when systemd is enabled + * + * @param update_every value to overwrite the update frequency set by the server. + **/ +static void ebpf_create_systemd_socket_charts(int update_every) +{ + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_RECV, + "Bytes received", EBPF_COMMON_DIMENSION_BITS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20080, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_SENT, + "Bytes sent", EBPF_COMMON_DIMENSION_BITS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20081, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, + "Calls to tcp_cleanup_rbuf.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20082, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, + "Calls to tcp_sendmsg.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20083, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, + "Calls to tcp_retransmit", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20084, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, + "Calls to udp_sendmsg", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20085, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); + + ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, + "Calls to udp_recvmsg", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20086, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET, + update_every); +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + * + * @return It returns the status for chart creation, if it is necessary to remove a specific dimension, zero is returned + * otherwise function returns 1 to avoid chart recreation + */ +static int ebpf_send_systemd_socket_charts() +{ + int ret = 1; + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_SENT); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.bytes_sent); + } else + ret = 0; + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_RECV); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.bytes_received); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_sent); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_received); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.retransmit); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.call_udp_sent); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long)ect->publish_socket.call_udp_received); + } + } + write_end_chart(); + + return ret; +} + +/** + * Update Cgroup algorithm + * + * Change algorithm from absolute to incremental + */ +void ebpf_socket_update_cgroup_algorithm() +{ + int i; + for (i = 0; i < NETDATA_MAX_SOCKET_VECTOR; i++) { + netdata_publish_syscall_t *ptr = &socket_publish_aggregated[i]; + freez(ptr->algorithm); + ptr->algorithm = strdupz(ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + } +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param update_every value to overwrite the update frequency set by the server. +*/ +static void ebpf_socket_send_cgroup_data(int update_every) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_socket_sum_cgroup_pids(&ect->publish_socket, ect->pids); + } + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + static int systemd_charts = 0; + if (!systemd_charts) { + ebpf_create_systemd_socket_charts(update_every); + systemd_charts = 1; + } + systemd_charts = ebpf_send_systemd_socket_charts(); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_SOCKET_CHART)) { + ebpf_create_specific_socket_charts(ect->name, update_every); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_SOCKET_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_SOCKET_CHART && ect->updated) { + ebpf_send_specific_socket_data(ect->name, &ect->publish_socket); + } else { + ebpf_obsolete_specific_socket_charts(ect->name, update_every); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_SOCKET_CHART; + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + /***************************************************************** * * FUNCTIONS WITH THE MAIN LOOP @@ -1624,7 +2110,6 @@ struct netdata_static_thread socket_threads = {"EBPF SOCKET READ", */ static void socket_collector(usec_t step, ebpf_module_t *em) { - UNUSED(em); UNUSED(step); heartbeat_t hb; heartbeat_init(&hb); @@ -1634,49 +2119,63 @@ static void socket_collector(usec_t step, ebpf_module_t *em) netdata_thread_create(socket_threads.thread, socket_threads.name, NETDATA_THREAD_OPTION_JOINABLE, ebpf_socket_read_hash, em); + int cgroups = em->cgroup_charts; + if (cgroups) + ebpf_socket_update_cgroup_algorithm(); + int socket_apps_enabled = ebpf_modules[EBPF_MODULE_SOCKET_IDX].apps_charts; int socket_global_enabled = ebpf_modules[EBPF_MODULE_SOCKET_IDX].global_charts; int network_connection = em->optional; + int update_every = em->update_every; + int counter = update_every - 1; while (!close_ebpf_plugin) { pthread_mutex_lock(&collect_data_mutex); pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - if (socket_global_enabled) - read_hash_global_tables(); + if (++counter == update_every) { + counter = 0; + if (socket_global_enabled) + read_hash_global_tables(); - if (socket_apps_enabled) - ebpf_socket_update_apps_data(); + if (socket_apps_enabled) + ebpf_socket_update_apps_data(); - calculate_nv_plot(); + if (cgroups) + ebpf_update_socket_cgroup(); - pthread_mutex_lock(&lock); - if (socket_global_enabled) - ebpf_socket_send_data(em); + calculate_nv_plot(); - if (socket_apps_enabled) - ebpf_socket_send_apps_data(em, apps_groups_root_target); + pthread_mutex_lock(&lock); + if (socket_global_enabled) + ebpf_socket_send_data(em); - fflush(stdout); + if (socket_apps_enabled) + ebpf_socket_send_apps_data(em, apps_groups_root_target); - if (network_connection) { - // We are calling fflush many times, because when we have a lot of dimensions - // we began to have not expected outputs and Netdata closed the plugin. - pthread_mutex_lock(&nv_mutex); - ebpf_socket_create_nv_charts(&inbound_vectors); - fflush(stdout); - ebpf_socket_send_nv_data(&inbound_vectors); + if (cgroups) + ebpf_socket_send_cgroup_data(update_every); - ebpf_socket_create_nv_charts(&outbound_vectors); fflush(stdout); - ebpf_socket_send_nv_data(&outbound_vectors); - wait_to_plot = 0; - pthread_mutex_unlock(&nv_mutex); + if (network_connection) { + // We are calling fflush many times, because when we have a lot of dimensions + // we began to have not expected outputs and Netdata closed the plugin. + pthread_mutex_lock(&nv_mutex); + ebpf_socket_create_nv_charts(&inbound_vectors, update_every); + fflush(stdout); + ebpf_socket_send_nv_data(&inbound_vectors); + + ebpf_socket_create_nv_charts(&outbound_vectors, update_every); + fflush(stdout); + ebpf_socket_send_nv_data(&outbound_vectors); + wait_to_plot = 0; + pthread_mutex_unlock(&nv_mutex); + + } + pthread_mutex_unlock(&lock); } pthread_mutex_unlock(&collect_data_mutex); - pthread_mutex_unlock(&lock); - } } @@ -1885,17 +2384,18 @@ static void ebpf_socket_cleanup(void *ptr) clean_hostnames(network_viewer_opt.excluded_hostnames); pthread_mutex_destroy(&nv_mutex); - freez(socket_data.map_fd); freez(socket_threads.thread); - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); } - bpf_object__close(objects); finalized_threads = 1; } @@ -1910,15 +2410,17 @@ static void ebpf_socket_cleanup(void *ptr) * We are not testing the return, because callocz does this and shutdown the software * case it was not possible to allocate. * - * @param length is the length for the vectors used inside the collector. + * @param apps is apps enabled? */ -static void ebpf_socket_allocate_global_vectors(size_t length) +static void ebpf_socket_allocate_global_vectors(int apps) { - memset(socket_aggregated_data, 0 ,length * sizeof(netdata_syscall_stat_t)); - memset(socket_publish_aggregated, 0 ,length * sizeof(netdata_publish_syscall_t)); + memset(socket_aggregated_data, 0 ,NETDATA_MAX_SOCKET_VECTOR * sizeof(netdata_syscall_stat_t)); + memset(socket_publish_aggregated, 0 ,NETDATA_MAX_SOCKET_VECTOR * sizeof(netdata_publish_syscall_t)); socket_hash_values = callocz(ebpf_nprocs, sizeof(netdata_idx_t)); - socket_bandwidth_curr = callocz((size_t)pid_max, sizeof(ebpf_socket_publish_apps_t *)); + if (apps) + socket_bandwidth_curr = callocz((size_t)pid_max, sizeof(ebpf_socket_publish_apps_t *)); + bandwidth_vector = callocz((size_t)ebpf_nprocs, sizeof(ebpf_bandwidth_t)); socket_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_socket_t)); @@ -1927,14 +2429,6 @@ static void ebpf_socket_allocate_global_vectors(size_t length) } /** - * Set local function pointers, this function will never be compiled with static libraries - */ -static void set_local_pointers() -{ - map_fd = socket_data.map_fd; -} - -/** * Initialize Inbound and Outbound * * Initialize the common outbound and inbound sockets. @@ -2860,9 +3354,7 @@ void *ebpf_socket_thread(void *ptr) ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = socket_maps; - fill_ebpf_data(&socket_data); - ebpf_update_module(em, &socket_config, NETDATA_NETWORK_CONFIG_FILE); parse_network_viewer_section(&socket_config); parse_service_name_section(&socket_config); parse_table_size_options(&socket_config); @@ -2876,16 +3368,13 @@ void *ebpf_socket_thread(void *ptr) } pthread_mutex_lock(&lock); - ebpf_socket_allocate_global_vectors(NETDATA_MAX_SOCKET_VECTOR); + ebpf_socket_allocate_global_vectors(em->apps_charts); initialize_inbound_outbound(); - if (ebpf_update_kernel(&socket_data)) { - pthread_mutex_unlock(&lock); - goto endsocket; - } + if (running_on_kernel < NETDATA_EBPF_KERNEL_5_0) + em->mode = MODE_ENTRY; - set_local_pointers(); - probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects, socket_data.map_fd); + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); if (!probe_links) { pthread_mutex_unlock(&lock); goto endsocket; @@ -2904,7 +3393,7 @@ void *ebpf_socket_thread(void *ptr) finalized_threads = 0; pthread_mutex_unlock(&lock); - socket_collector((usec_t)(em->update_time * USEC_PER_SEC), em); + socket_collector((usec_t)(em->update_every * USEC_PER_SEC), em); endsocket: netdata_thread_cleanup_pop(1); diff --git a/collectors/ebpf.plugin/ebpf_socket.h b/collectors/ebpf.plugin/ebpf_socket.h index 8dd42250..e3c000c7 100644 --- a/collectors/ebpf.plugin/ebpf_socket.h +++ b/collectors/ebpf.plugin/ebpf_socket.h @@ -4,15 +4,11 @@ #include <stdint.h> #include "libnetdata/avl/avl.h" +// Module name +#define NETDATA_EBPF_MODULE_NAME_SOCKET "socket" + // Vector indexes #define NETDATA_UDP_START 3 -#define NETDATA_RETRANSMIT_START 5 - -#define NETDATA_SOCKET_APPS_HASH_TABLE 0 -#define NETDATA_SOCKET_IPV4_HASH_TABLE 1 -#define NETDATA_SOCKET_IPV6_HASH_TABLE 2 -#define NETDATA_SOCKET_GLOBAL_HASH_TABLE 4 -#define NETDATA_SOCKET_LISTEN_TABLE 5 #define NETDATA_SOCKET_READ_SLEEP_MS 800000ULL @@ -32,9 +28,12 @@ enum ebpf_socket_table_list { NETDATA_SOCKET_TABLE_BANDWIDTH, + NETDATA_SOCKET_GLOBAL, + NETDATA_SOCKET_LPORTS, NETDATA_SOCKET_TABLE_IPV4, NETDATA_SOCKET_TABLE_IPV6, - NETDATA_SOCKET_TABLE_UDP + NETDATA_SOCKET_TABLE_UDP, + NETDATA_SOCKET_TABLE_CTRL }; enum ebpf_socket_publish_index { @@ -74,8 +73,9 @@ typedef enum ebpf_socket_idx { NETDATA_SOCKET_COUNTER } ebpf_socket_index_t; -#define NETDATA_SOCKET_GROUP "Socket" -#define NETDATA_NETWORK_CONNECTIONS_GROUP "Network connections" +#define NETDATA_SOCKET_KERNEL_FUNCTIONS "kernel" +#define NETDATA_NETWORK_CONNECTIONS_GROUP "network connections" +#define NETDATA_CGROUP_NET_GROUP "network (eBPF)" // Global chart name #define NETDATA_TCP_FUNCTION_COUNT "tcp_functions" @@ -113,6 +113,23 @@ typedef enum ebpf_socket_idx { #define NETDATA_MINIMUM_IPV4_CIDR 0 #define NETDATA_MAXIMUM_IPV4_CIDR 32 +// Contexts +#define NETDATA_CGROUP_SOCKET_BYTES_RECV_CONTEXT "cgroup.net_bytes_recv" +#define NETDATA_CGROUP_SOCKET_BYTES_SEND_CONTEXT "cgroup.net_bytes_send" +#define NETDATA_CGROUP_SOCKET_TCP_RECV_CONTEXT "cgroup.net_tcp_recv" +#define NETDATA_CGROUP_SOCKET_TCP_SEND_CONTEXT "cgroup.net_tcp_send" +#define NETDATA_CGROUP_SOCKET_TCP_RETRANSMIT_CONTEXT "cgroup.net_retransmit" +#define NETDATA_CGROUP_SOCKET_UDP_RECV_CONTEXT "cgroup.net_udp_recv" +#define NETDATA_CGROUP_SOCKET_UDP_SEND_CONTEXT "cgroup.net_udp_send" + +#define NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT "services.net_bytes_recv" +#define NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT "services.net_bytes_send" +#define NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT "services.net_tcp_recv" +#define NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT "services.net_tcp_send" +#define NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT "services.net_retransmit" +#define NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT "services.net_udp_recv" +#define NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT "services.net_udp_send" + typedef struct ebpf_socket_publish_apps { // Data read uint64_t bytes_sent; // Bytes sent @@ -312,5 +329,6 @@ extern void parse_service_name_section(struct config *cfg); extern void clean_socket_apps_structures(); extern ebpf_socket_publish_apps_t **socket_bandwidth_curr; +extern struct config socket_config; #endif diff --git a/collectors/ebpf.plugin/ebpf_softirq.c b/collectors/ebpf.plugin/ebpf_softirq.c new file mode 100644 index 00000000..119c1222 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_softirq.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_softirq.h" + +struct config softirq_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +#define SOFTIRQ_MAP_LATENCY 0 +static ebpf_local_maps_t softirq_maps[] = { + { + .name = "tbl_softirq", + .internal_input = NETDATA_SOFTIRQ_MAX_IRQS, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + }, + /* end */ + { + .name = NULL, + .internal_input = 0, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + } +}; + +#define SOFTIRQ_TP_CLASS_IRQ "irq" +static ebpf_tracepoint_t softirq_tracepoints[] = { + {.enabled = false, .class = SOFTIRQ_TP_CLASS_IRQ, .event = "softirq_entry"}, + {.enabled = false, .class = SOFTIRQ_TP_CLASS_IRQ, .event = "softirq_exit"}, + /* end */ + {.enabled = false, .class = NULL, .event = NULL} +}; + +// these must be in the order defined by the kernel: +// https://elixir.bootlin.com/linux/v5.12.19/source/include/trace/events/irq.h#L13 +static softirq_val_t softirq_vals[] = { + {.name = "HI", .latency = 0}, + {.name = "TIMER", .latency = 0}, + {.name = "NET_TX", .latency = 0}, + {.name = "NET_RX", .latency = 0}, + {.name = "BLOCK", .latency = 0}, + {.name = "IRQ_POLL", .latency = 0}, + {.name = "TASKLET", .latency = 0}, + {.name = "SCHED", .latency = 0}, + {.name = "HRTIMER", .latency = 0}, + {.name = "RCU", .latency = 0}, +}; + +// tmp store for soft IRQ values we get from a per-CPU eBPF map. +static softirq_ebpf_val_t *softirq_ebpf_vals = NULL; + +static struct bpf_link **probe_links = NULL; +static struct bpf_object *objects = NULL; + +static int read_thread_closed = 1; + +static struct netdata_static_thread softirq_threads = {"SOFTIRQ KERNEL", + NULL, NULL, 1, NULL, + NULL, NULL }; + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void softirq_cleanup(void *ptr) +{ + for (int i = 0; softirq_tracepoints[i].class != NULL; i++) { + ebpf_disable_tracepoint(&softirq_tracepoints[i]); + } + + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + return; + } + + heartbeat_t hb; + heartbeat_init(&hb); + uint32_t tick = 1 * USEC_PER_MS; + while (!read_thread_closed) { + usec_t dt = heartbeat_next(&hb, tick); + UNUSED(dt); + } + + freez(softirq_ebpf_vals); + freez(softirq_threads.thread); + + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); + } +} + +/***************************************************************** + * MAIN LOOP + *****************************************************************/ + +static void softirq_read_latency_map() +{ + int fd = softirq_maps[SOFTIRQ_MAP_LATENCY].map_fd; + int i; + for (i = 0; i < NETDATA_SOFTIRQ_MAX_IRQS; i++) { + int test = bpf_map_lookup_elem(fd, &i, softirq_ebpf_vals); + if (unlikely(test < 0)) { + continue; + } + + uint64_t total_latency = 0; + int cpu_i; + int end = ebpf_nprocs; + for (cpu_i = 0; cpu_i < end; cpu_i++) { + total_latency += softirq_ebpf_vals[cpu_i].latency/1000; + } + + softirq_vals[i].latency = total_latency; + } +} + +/** + * Read eBPF maps for soft IRQ. + */ +static void *softirq_reader(void *ptr) +{ + read_thread_closed = 0; + + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_SOFTIRQ_SLEEP_MS * em->update_every; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + UNUSED(dt); + + softirq_read_latency_map(); + } + + read_thread_closed = 1; + return NULL; +} + +static void softirq_create_charts(int update_every) +{ + ebpf_create_chart( + NETDATA_EBPF_SYSTEM_GROUP, + "softirq_latency", + "Software IRQ latency", + EBPF_COMMON_DIMENSION_MILLISECONDS, + "softirqs", + NULL, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CHART_PRIO_SYSTEM_SOFTIRQS+1, + NULL, NULL, 0, update_every, + NETDATA_EBPF_MODULE_NAME_SOFTIRQ + ); + + fflush(stdout); +} + +static void softirq_create_dims() +{ + uint32_t i; + for (i = 0; i < NETDATA_SOFTIRQ_MAX_IRQS; i++) { + ebpf_write_global_dimension( + softirq_vals[i].name, softirq_vals[i].name, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX] + ); + } +} + +static inline void softirq_write_dims() +{ + uint32_t i; + for (i = 0; i < NETDATA_SOFTIRQ_MAX_IRQS; i++) { + write_chart_dimension(softirq_vals[i].name, softirq_vals[i].latency); + } +} + +/** +* Main loop for this collector. +*/ +static void softirq_collector(ebpf_module_t *em) +{ + softirq_ebpf_vals = callocz(ebpf_nprocs, sizeof(softirq_ebpf_val_t)); + + // create reader thread. + softirq_threads.thread = mallocz(sizeof(netdata_thread_t)); + softirq_threads.start_routine = softirq_reader; + netdata_thread_create( + softirq_threads.thread, + softirq_threads.name, + NETDATA_THREAD_OPTION_JOINABLE, + softirq_reader, + em + ); + + // create chart and static dims. + pthread_mutex_lock(&lock); + softirq_create_charts(em->update_every); + softirq_create_dims(); + pthread_mutex_unlock(&lock); + + // loop and read from published data until ebpf plugin is closed. + int update_every = em->update_every; + int counter = update_every - 1; + while (!close_ebpf_plugin) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + + if (++counter == update_every) { + counter = 0; + pthread_mutex_lock(&lock); + + // write dims now for all hitherto discovered IRQs. + write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "softirq_latency"); + softirq_write_dims(); + write_end_chart(); + + pthread_mutex_unlock(&lock); + } + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * EBPF SOFTIRQ THREAD + *****************************************************************/ + +/** + * Soft IRQ latency thread. + * + * @param ptr a `ebpf_module_t *`. + * @return always NULL. + */ +void *ebpf_softirq_thread(void *ptr) +{ + netdata_thread_cleanup_push(softirq_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = softirq_maps; + + if (!em->enabled) { + goto endsoftirq; + } + + if (ebpf_enable_tracepoints(softirq_tracepoints) == 0) { + em->enabled = CONFIG_BOOLEAN_NO; + goto endsoftirq; + } + + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); + if (!probe_links) { + goto endsoftirq; + } + + softirq_collector(em); + +endsoftirq: + netdata_thread_cleanup_pop(1); + + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_softirq.h b/collectors/ebpf.plugin/ebpf_softirq.h new file mode 100644 index 00000000..a2275189 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_softirq.h @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_SOFTIRQ_H +#define NETDATA_EBPF_SOFTIRQ_H 1 + +/***************************************************************** + * copied from kernel-collectors repo, with modifications needed + * for inclusion here. + *****************************************************************/ + +#define NETDATA_SOFTIRQ_MAX_IRQS 10 + +typedef struct softirq_ebpf_val { + uint64_t latency; + uint64_t ts; +} softirq_ebpf_val_t; + +/***************************************************************** + * below this is eBPF plugin-specific code. + *****************************************************************/ + +#define NETDATA_EBPF_MODULE_NAME_SOFTIRQ "softirq" +#define NETDATA_SOFTIRQ_SLEEP_MS 650000ULL +#define NETDATA_SOFTIRQ_CONFIG_FILE "softirq.conf" + +typedef struct sofirq_val { + uint64_t latency; + char *name; +} softirq_val_t; + +extern struct config softirq_config; +extern void *ebpf_softirq_thread(void *ptr); + +#endif /* NETDATA_EBPF_SOFTIRQ_H */ diff --git a/collectors/ebpf.plugin/ebpf_swap.c b/collectors/ebpf.plugin/ebpf_swap.c new file mode 100644 index 00000000..34750c79 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_swap.c @@ -0,0 +1,698 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_swap.h" + +static char *swap_dimension_name[NETDATA_SWAP_END] = { "read", "write" }; +static netdata_syscall_stat_t swap_aggregated_data[NETDATA_SWAP_END]; +static netdata_publish_syscall_t swap_publish_aggregated[NETDATA_SWAP_END]; + +static int read_thread_closed = 1; +netdata_publish_swap_t *swap_vector = NULL; + +static netdata_idx_t swap_hash_values[NETDATA_SWAP_END]; +static netdata_idx_t *swap_values = NULL; + +netdata_publish_swap_t **swap_pid = NULL; + +struct config swap_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static ebpf_local_maps_t swap_maps[] = {{.name = "tbl_pid_swap", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, + .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "swap_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_swap", .internal_input = NETDATA_SWAP_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0}}; + +static struct bpf_link **probe_links = NULL; +static struct bpf_object *objects = NULL; + +struct netdata_static_thread swap_threads = {"SWAP KERNEL", NULL, NULL, 1, + NULL, NULL, NULL}; + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Clean swap structure + */ +void clean_swap_pid_structures() { + struct pid_stat *pids = root_of_pids; + while (pids) { + freez(swap_pid[pids->pid]); + + pids = pids->next; + } +} + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_swap_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) + return; + + heartbeat_t hb; + heartbeat_init(&hb); + uint32_t tick = 2 * USEC_PER_MS; + while (!read_thread_closed) { + usec_t dt = heartbeat_next(&hb, tick); + UNUSED(dt); + } + + ebpf_cleanup_publish_syscall(swap_publish_aggregated); + + freez(swap_vector); + freez(swap_values); + + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); + } +} + +/***************************************************************** + * + * COLLECTOR THREAD + * + *****************************************************************/ + +/** + * Apps Accumulator + * + * Sum all values read from kernel and store in the first address. + * + * @param out the vector with read values. + */ +static void swap_apps_accumulator(netdata_publish_swap_t *out) +{ + int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + netdata_publish_swap_t *total = &out[0]; + for (i = 1; i < end; i++) { + netdata_publish_swap_t *w = &out[i]; + total->write += w->write; + total->read += w->read; + } +} + +/** + * Fill PID + * + * Fill PID structures + * + * @param current_pid pid that we are collecting data + * @param out values read from hash tables; + */ +static void swap_fill_pid(uint32_t current_pid, netdata_publish_swap_t *publish) +{ + netdata_publish_swap_t *curr = swap_pid[current_pid]; + if (!curr) { + curr = callocz(1, sizeof(netdata_publish_swap_t)); + swap_pid[current_pid] = curr; + } + + memcpy(curr, publish, sizeof(netdata_publish_swap_t)); +} + +/** + * Update cgroup + * + * Update cgroup data based in + */ +static void ebpf_update_swap_cgroup() +{ + ebpf_cgroup_target_t *ect ; + netdata_publish_swap_t *cv = swap_vector; + int fd = swap_maps[NETDATA_PID_SWAP_TABLE].map_fd; + size_t length = sizeof(netdata_publish_swap_t)*ebpf_nprocs; + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + netdata_publish_swap_t *out = &pids->swap; + if (likely(swap_pid) && swap_pid[pid]) { + netdata_publish_swap_t *in = swap_pid[pid]; + + memcpy(out, in, sizeof(netdata_publish_swap_t)); + } else { + memset(cv, 0, length); + if (!bpf_map_lookup_elem(fd, &pid, cv)) { + swap_apps_accumulator(cv); + + memcpy(out, cv, sizeof(netdata_publish_swap_t)); + } + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Read APPS table + * + * Read the apps table and store data inside the structure. + */ +static void read_apps_table() +{ + netdata_publish_swap_t *cv = swap_vector; + uint32_t key; + struct pid_stat *pids = root_of_pids; + int fd = swap_maps[NETDATA_PID_SWAP_TABLE].map_fd; + size_t length = sizeof(netdata_publish_swap_t)*ebpf_nprocs; + while (pids) { + key = pids->pid; + + if (bpf_map_lookup_elem(fd, &key, cv)) { + pids = pids->next; + continue; + } + + swap_apps_accumulator(cv); + + swap_fill_pid(key, cv); + + // We are cleaning to avoid passing data read from one process to other. + memset(cv, 0, length); + + pids = pids->next; + } +} + +/** +* Send global +* +* Send global charts to Netdata +*/ +static void swap_send_global() +{ + write_io_chart(NETDATA_MEM_SWAP_CHART, NETDATA_EBPF_SYSTEM_GROUP, + swap_publish_aggregated[NETDATA_KEY_SWAP_WRITEPAGE_CALL].dimension, + (long long) swap_hash_values[NETDATA_KEY_SWAP_WRITEPAGE_CALL], + swap_publish_aggregated[NETDATA_KEY_SWAP_READPAGE_CALL].dimension, + (long long) swap_hash_values[NETDATA_KEY_SWAP_READPAGE_CALL]); +} + +/** + * Read global counter + * + * Read the table with number of calls to all functions + */ +static void read_global_table() +{ + netdata_idx_t *stored = swap_values; + netdata_idx_t *val = swap_hash_values; + int fd = swap_maps[NETDATA_SWAP_GLOBAL_TABLE].map_fd; + + uint32_t i, end = NETDATA_SWAP_END; + for (i = NETDATA_KEY_SWAP_READPAGE_CALL; i < end; i++) { + if (!bpf_map_lookup_elem(fd, &i, stored)) { + int j; + int last = ebpf_nprocs; + netdata_idx_t total = 0; + for (j = 0; j < last; j++) + total += stored[j]; + + val[i] = total; + } + } +} + +/** + * Swap read hash + * + * This is the thread callback. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_swap_read_hash(void *ptr) +{ + read_thread_closed = 0; + + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + usec_t step = NETDATA_SWAP_SLEEP_MS * em->update_every; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + (void)dt; + + read_global_table(); + } + + read_thread_closed = 1; + return NULL; +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param swap + * @param root + */ +static void ebpf_swap_sum_pids(netdata_publish_swap_t *swap, struct pid_on_target *root) +{ + uint64_t local_read = 0; + uint64_t local_write = 0; + + while (root) { + int32_t pid = root->pid; + netdata_publish_swap_t *w = swap_pid[pid]; + if (w) { + local_write += w->write; + local_read += w->read; + } + root = root->next; + } + + // These conditions were added, because we are using incremental algorithm + swap->write = (local_write >= swap->write) ? local_write : swap->write; + swap->read = (local_read >= swap->read) ? local_read : swap->read; +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param root the target list. +*/ +void ebpf_swap_send_apps_data(struct target *root) +{ + struct target *w; + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + ebpf_swap_sum_pids(&w->swap, w->root_pid); + } + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_MEM_SWAP_READ_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, (long long) w->swap.read); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_MEM_SWAP_WRITE_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, (long long) w->swap.write); + } + } + write_end_chart(); +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param swap + * @param root + */ +static void ebpf_swap_sum_cgroup_pids(netdata_publish_swap_t *swap, struct pid_on_target2 *pids) +{ + uint64_t local_read = 0; + uint64_t local_write = 0; + + while (pids) { + netdata_publish_swap_t *w = &pids->swap; + local_write += w->write; + local_read += w->read; + + pids = pids->next; + } + + // These conditions were added, because we are using incremental algorithm + swap->write = (local_write >= swap->write) ? local_write : swap->write; + swap->read = (local_read >= swap->read) ? local_read : swap->read; +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + * + * @return It returns the status for chart creation, if it is necessary to remove a specific dimension, zero is returned + * otherwise function returns 1 to avoid chart recreation + */ +static int ebpf_send_systemd_swap_charts() +{ + int ret = 1; + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_READ_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long) ect->publish_systemd_swap.read); + } else + ret = 0; + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_WRITE_CHART); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, (long long) ect->publish_systemd_swap.write); + } + } + write_end_chart(); + + return ret; +} + +/** + * Create specific swap charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_specific_swap_charts(char *type, int update_every) +{ + ebpf_create_chart(type, NETDATA_MEM_SWAP_READ_CHART, + "Calls to function <code>swap_readpage</code>.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_CGROUP_SWAP_READ_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5100, + ebpf_create_global_dimension, + swap_publish_aggregated, 1, update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_MEM_SWAP_WRITE_CHART, + "Calls to function <code>swap_writepage</code>.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_CGROUP_SWAP_WRITE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5101, + ebpf_create_global_dimension, + &swap_publish_aggregated[NETDATA_KEY_SWAP_WRITEPAGE_CALL], 1, + update_every, NETDATA_EBPF_MODULE_NAME_SWAP); +} + +/** + * Create specific swap charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_obsolete_specific_swap_charts(char *type, int update_every) +{ + ebpf_write_chart_obsolete(type, NETDATA_MEM_SWAP_READ_CHART,"Calls to function <code>swap_readpage</code>.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SWAP_READ_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5100, update_every); + + ebpf_write_chart_obsolete(type, NETDATA_MEM_SWAP_WRITE_CHART, "Calls to function <code>swap_writepage</code>.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SWAP_WRITE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5101, update_every); +} + +/* + * Send Specific Swap data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param values structure with values that will be sent to netdata + */ +static void ebpf_send_specific_swap_data(char *type, netdata_publish_swap_t *values) +{ + write_begin_chart(type, NETDATA_MEM_SWAP_READ_CHART); + write_chart_dimension(swap_publish_aggregated[NETDATA_KEY_SWAP_READPAGE_CALL].name, (long long) values->read); + write_end_chart(); + + write_begin_chart(type, NETDATA_MEM_SWAP_WRITE_CHART); + write_chart_dimension(swap_publish_aggregated[NETDATA_KEY_SWAP_WRITEPAGE_CALL].name, (long long) values->write); + write_end_chart(); +} + +/** + * Create Systemd Swap Charts + * + * Create charts when systemd is enabled + * + * @param update_every value to overwrite the update frequency set by the server. + **/ +static void ebpf_create_systemd_swap_charts(int update_every) +{ + ebpf_create_charts_on_systemd(NETDATA_MEM_SWAP_READ_CHART, + "Calls to <code>swap_readpage</code>.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, 20191, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_SWAP_READ_CONTEXT, + NETDATA_EBPF_MODULE_NAME_SWAP, update_every); + + ebpf_create_charts_on_systemd(NETDATA_MEM_SWAP_WRITE_CHART, + "Calls to function <code>swap_writepage</code>.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, 20192, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_SWAP_WRITE_CONTEXT, + NETDATA_EBPF_MODULE_NAME_SWAP, update_every); +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param update_every value to overwrite the update frequency set by the server. +*/ +void ebpf_swap_send_cgroup_data(int update_every) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_swap_sum_cgroup_pids(&ect->publish_systemd_swap, ect->pids); + } + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + + if (has_systemd) { + static int systemd_charts = 0; + if (!systemd_charts) { + ebpf_create_systemd_swap_charts(update_every); + systemd_charts = 1; + fflush(stdout); + } + + systemd_charts = ebpf_send_systemd_swap_charts(); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_SWAP_CHART) && ect->updated) { + ebpf_create_specific_swap_charts(ect->name, update_every); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_SWAP_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_SWAP_CHART) { + if (ect->updated) { + ebpf_send_specific_swap_data(ect->name, &ect->publish_systemd_swap); + } else { + ebpf_obsolete_specific_swap_charts(ect->name, update_every); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_SWAP_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** +* Main loop for this collector. +*/ +static void swap_collector(ebpf_module_t *em) +{ + swap_threads.thread = mallocz(sizeof(netdata_thread_t)); + swap_threads.start_routine = ebpf_swap_read_hash; + + netdata_thread_create(swap_threads.thread, swap_threads.name, NETDATA_THREAD_OPTION_JOINABLE, + ebpf_swap_read_hash, em); + + int apps = em->apps_charts; + int cgroup = em->cgroup_charts; + int update_every = em->update_every; + int counter = update_every - 1; + while (!close_ebpf_plugin) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + + if (++counter == update_every) { + counter = 0; + if (apps) + read_apps_table(); + + if (cgroup) + ebpf_update_swap_cgroup(); + + pthread_mutex_lock(&lock); + + swap_send_global(); + + if (apps) + ebpf_swap_send_apps_data(apps_groups_root_target); + + if (cgroup) + ebpf_swap_send_cgroup_data(update_every); + + pthread_mutex_unlock(&lock); + } + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * + * INITIALIZE THREAD + * + *****************************************************************/ + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_swap_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + ebpf_create_charts_on_apps(NETDATA_MEM_SWAP_READ_CHART, + "Calls to function <code>swap_readpage</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20191, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_charts_on_apps(NETDATA_MEM_SWAP_WRITE_CHART, + "Calls to function <code>swap_writepage</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20192, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); +} + +/** + * Allocate vectors used with this thread. + * + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param apps is apps enabled? + */ +static void ebpf_swap_allocate_global_vectors(int apps) +{ + if (apps) + swap_pid = callocz((size_t)pid_max, sizeof(netdata_publish_swap_t *)); + + swap_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_publish_swap_t)); + + swap_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); + + memset(swap_hash_values, 0, sizeof(swap_hash_values)); +} + +/***************************************************************** + * + * MAIN THREAD + * + *****************************************************************/ + +/** + * Create global charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_swap_charts(int update_every) +{ + ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_MEM_SWAP_CHART, + "Calls to internal functions used to access swap.", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 202, + ebpf_create_global_dimension, + swap_publish_aggregated, NETDATA_SWAP_END, + update_every, NETDATA_EBPF_MODULE_NAME_SWAP); +} + +/** + * SWAP thread + * + * Thread used to make swap thread + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always return NULL + */ +void *ebpf_swap_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_swap_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = swap_maps; + + ebpf_update_pid_table(&swap_maps[NETDATA_PID_SWAP_TABLE], em); + + if (!em->enabled) + goto endswap; + + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); + if (!probe_links) { + goto endswap; + } + + ebpf_swap_allocate_global_vectors(em->apps_charts); + + int algorithms[NETDATA_SWAP_END] = { NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX }; + ebpf_global_labels(swap_aggregated_data, swap_publish_aggregated, swap_dimension_name, swap_dimension_name, + algorithms, NETDATA_SWAP_END); + + pthread_mutex_lock(&lock); + ebpf_create_swap_charts(em->update_every); + pthread_mutex_unlock(&lock); + + swap_collector(em); + +endswap: + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_swap.h b/collectors/ebpf.plugin/ebpf_swap.h new file mode 100644 index 00000000..1dba9c17 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_swap.h @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_SWAP_H +#define NETDATA_EBPF_SWAP_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_SWAP "swap" + +#define NETDATA_SWAP_SLEEP_MS 850000ULL + +// charts +#define NETDATA_MEM_SWAP_CHART "swapcalls" +#define NETDATA_MEM_SWAP_READ_CHART "swap_read_call" +#define NETDATA_MEM_SWAP_WRITE_CHART "swap_write_call" +#define NETDATA_SWAP_SUBMENU "swap" + +// configuration file +#define NETDATA_DIRECTORY_SWAP_CONFIG_FILE "swap.conf" + +// Contexts +#define NETDATA_CGROUP_SWAP_READ_CONTEXT "cgroup.swap_read" +#define NETDATA_CGROUP_SWAP_WRITE_CONTEXT "cgroup.swap_write" +#define NETDATA_SYSTEMD_SWAP_READ_CONTEXT "services.swap_read" +#define NETDATA_SYSTEMD_SWAP_WRITE_CONTEXT "services.swap_write" + +typedef struct netdata_publish_swap { + uint64_t read; + uint64_t write; +} netdata_publish_swap_t; + +enum swap_tables { + NETDATA_PID_SWAP_TABLE, + NETDATA_SWAP_CONTROLLER, + NETDATA_SWAP_GLOBAL_TABLE +}; + +enum swap_counters { + NETDATA_KEY_SWAP_READPAGE_CALL, + NETDATA_KEY_SWAP_WRITEPAGE_CALL, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_SWAP_END +}; + +extern netdata_publish_swap_t **swap_pid; + +extern void *ebpf_swap_thread(void *ptr); +extern void ebpf_swap_create_apps_charts(struct ebpf_module *em, void *ptr); +extern void clean_swap_pid_structures(); + +extern struct config swap_config; + +#endif diff --git a/collectors/ebpf.plugin/ebpf_sync.c b/collectors/ebpf.plugin/ebpf_sync.c index f0db1cc4..4bd62bca 100644 --- a/collectors/ebpf.plugin/ebpf_sync.c +++ b/collectors/ebpf.plugin/ebpf_sync.c @@ -3,8 +3,6 @@ #include "ebpf.h" #include "ebpf_sync.h" -static ebpf_data_t sync_data; - static char *sync_counter_dimension_name[NETDATA_SYNC_IDX_END] = { "sync", "syncfs", "msync", "fsync", "fdatasync", "sync_file_range" }; static netdata_syscall_stat_t sync_counter_aggregated_data[NETDATA_SYNC_IDX_END]; @@ -17,6 +15,28 @@ static netdata_idx_t sync_hash_values[NETDATA_SYNC_IDX_END]; struct netdata_static_thread sync_threads = {"SYNC KERNEL", NULL, NULL, 1, NULL, NULL, NULL}; +static ebpf_local_maps_t sync_maps[] = {{.name = "tbl_sync", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_syncfs", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_msync", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_fsync", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_fdatasync", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_syncfr", .internal_input = NETDATA_SYNC_END, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; + struct config sync_config = { .first_section = NULL, .last_section = NULL, .mutex = NETDATA_MUTEX_INITIALIZER, @@ -53,15 +73,8 @@ static int ebpf_sync_initialize_syscall(ebpf_module_t *em) for (i = 0; local_syscalls[i].syscall; i++) { ebpf_sync_syscalls_t *w = &local_syscalls[i]; if (!w->probe_links && w->enabled) { - fill_ebpf_data(&w->kernel_info); - if (ebpf_update_kernel(&w->kernel_info)) { - em->thread_name = saved_name; - error("Cannot update the kernel for eBPF module %s", w->syscall); - return -1; - } - em->thread_name = w->syscall; - w->probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &w->objects, w->kernel_info.map_fd); + w->probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &w->objects); if (!w->probe_links) { em->thread_name = saved_name; return -1; @@ -95,7 +108,7 @@ static void read_global_table() int i; for (i = 0; local_syscalls[i].syscall; i++) { if (local_syscalls[i].enabled) { - int fd = local_syscalls[i].kernel_info.map_fd[NETDATA_SYNC_GLOBLAL_TABLE]; + int fd = sync_maps[i].map_fd; if (!bpf_map_lookup_elem(fd, &idx, &stored)) { sync_hash_values[i] = stored; } @@ -119,7 +132,7 @@ void *ebpf_sync_read_hash(void *ptr) heartbeat_t hb; heartbeat_init(&hb); - usec_t step = NETDATA_EBPF_SYNC_SLEEP_MS * em->update_time; + usec_t step = NETDATA_EBPF_SYNC_SLEEP_MS * em->update_every; while (!close_ebpf_plugin) { usec_t dt = heartbeat_next(&hb, step); @@ -197,15 +210,20 @@ static void sync_collector(ebpf_module_t *em) netdata_thread_create(sync_threads.thread, sync_threads.name, NETDATA_THREAD_OPTION_JOINABLE, ebpf_sync_read_hash, em); + int update_every = em->update_every; + int counter = update_every - 1; while (!close_ebpf_plugin) { pthread_mutex_lock(&collect_data_mutex); pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - pthread_mutex_lock(&lock); + if (++counter == update_every) { + counter = 0; + pthread_mutex_lock(&lock); - sync_send_data(); + sync_send_data(); - pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&lock); + } pthread_mutex_unlock(&collect_data_mutex); } } @@ -228,8 +246,6 @@ void ebpf_sync_cleanup_objects() for (i = 0; local_syscalls[i].syscall; i++) { ebpf_sync_syscalls_t *w = &local_syscalls[i]; if (w->probe_links) { - freez(w->kernel_info.map_fd); - struct bpf_program *prog; size_t j = 0 ; bpf_object__for_each_program(prog, w->objects) { @@ -280,15 +296,19 @@ static void ebpf_sync_cleanup(void *ptr) * @param order order number of the specified chart * @param idx the first index with data. * @param end the last index with data. + * @param update_every value to overwrite the update frequency set by the server. */ static void ebpf_create_sync_chart(char *id, char *title, int order, int idx, - int end) + int end, + int update_every) { ebpf_write_chart_cmd(NETDATA_EBPF_MEMORY_GROUP, id, title, EBPF_COMMON_DIMENSION_CALL, - NETDATA_EBPF_SYNC_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NULL, order); + NETDATA_EBPF_SYNC_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NULL, order, + update_every, + NETDATA_EBPF_MODULE_NAME_SYNC); netdata_publish_syscall_t *move = &sync_counter_publish_aggregated[idx]; @@ -305,28 +325,30 @@ static void ebpf_create_sync_chart(char *id, * Create global charts * * Call ebpf_create_chart to create the charts for the collector. + * + * @param update_every value to overwrite the update frequency set by the server. */ -static void ebpf_create_sync_charts() +static void ebpf_create_sync_charts(int update_every) { if (local_syscalls[NETDATA_SYNC_FSYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].enabled) ebpf_create_sync_chart(NETDATA_EBPF_FILE_SYNC_CHART, "Monitor calls for <code>fsync(2)</code> and <code>fdatasync(2)</code>.", 21300, - NETDATA_SYNC_FSYNC_IDX, NETDATA_SYNC_FDATASYNC_IDX); + NETDATA_SYNC_FSYNC_IDX, NETDATA_SYNC_FDATASYNC_IDX, update_every); if (local_syscalls[NETDATA_SYNC_MSYNC_IDX].enabled) ebpf_create_sync_chart(NETDATA_EBPF_MSYNC_CHART, "Monitor calls for <code>msync(2)</code>.", 21301, - NETDATA_SYNC_MSYNC_IDX, NETDATA_SYNC_MSYNC_IDX); + NETDATA_SYNC_MSYNC_IDX, NETDATA_SYNC_MSYNC_IDX, update_every); if (local_syscalls[NETDATA_SYNC_SYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_SYNCFS_IDX].enabled) ebpf_create_sync_chart(NETDATA_EBPF_SYNC_CHART, "Monitor calls for <code>sync(2)</code> and <code>syncfs(2)</code>.", 21302, - NETDATA_SYNC_SYNC_IDX, NETDATA_SYNC_SYNCFS_IDX); + NETDATA_SYNC_SYNC_IDX, NETDATA_SYNC_SYNCFS_IDX, update_every); if (local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].enabled) ebpf_create_sync_chart(NETDATA_EBPF_FILE_SEGMENT_CHART, "Monitor calls for <code>sync_file_range(2)</code>.", 21303, - NETDATA_SYNC_SYNC_FILE_RANGE_IDX, NETDATA_SYNC_SYNC_FILE_RANGE_IDX); + NETDATA_SYNC_SYNC_FILE_RANGE_IDX, NETDATA_SYNC_SYNC_FILE_RANGE_IDX, update_every); } /** @@ -357,9 +379,8 @@ void *ebpf_sync_thread(void *ptr) netdata_thread_cleanup_push(ebpf_sync_cleanup, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; - fill_ebpf_data(&sync_data); + em->maps = sync_maps; - ebpf_update_module(em, &sync_config, NETDATA_SYNC_CONFIG_FILE); ebpf_sync_parse_syscalls(); if (!em->enabled) @@ -378,7 +399,7 @@ void *ebpf_sync_thread(void *ptr) algorithms, NETDATA_SYNC_IDX_END); pthread_mutex_lock(&lock); - ebpf_create_sync_charts(); + ebpf_create_sync_charts(em->update_every); pthread_mutex_unlock(&lock); sync_collector(em); diff --git a/collectors/ebpf.plugin/ebpf_sync.h b/collectors/ebpf.plugin/ebpf_sync.h index 45831821..1f811d34 100644 --- a/collectors/ebpf.plugin/ebpf_sync.h +++ b/collectors/ebpf.plugin/ebpf_sync.h @@ -3,6 +3,9 @@ #ifndef NETDATA_EBPF_SYNC_H #define NETDATA_EBPF_SYNC_H 1 +// Module name +#define NETDATA_EBPF_MODULE_NAME_SYNC "sync" + // charts #define NETDATA_EBPF_SYNC_CHART "sync" #define NETDATA_EBPF_MSYNC_CHART "memory_map" @@ -34,8 +37,6 @@ typedef struct ebpf_sync_syscalls { struct bpf_object *objects; struct bpf_link **probe_links; - - ebpf_data_t kernel_info; } ebpf_sync_syscalls_t; enum netdata_sync_charts { @@ -50,5 +51,6 @@ enum netdata_sync_table { }; extern void *ebpf_sync_thread(void *ptr); +extern struct config sync_config; #endif /* NETDATA_EBPF_SYNC_H */ diff --git a/collectors/ebpf.plugin/ebpf_vfs.c b/collectors/ebpf.plugin/ebpf_vfs.c new file mode 100644 index 00000000..060469ec --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_vfs.c @@ -0,0 +1,1601 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include <sys/resource.h> + +#include "ebpf.h" +#include "ebpf_vfs.h" + +static char *vfs_dimension_names[NETDATA_KEY_PUBLISH_VFS_END] = { "delete", "read", "write", + "fsync", "open", "create" }; +static char *vfs_id_names[NETDATA_KEY_PUBLISH_VFS_END] = { "vfs_unlink", "vfs_read", "vfs_write", + "vfs_fsync", "vfs_open", "vfs_create"}; + +static netdata_idx_t *vfs_hash_values = NULL; +static netdata_syscall_stat_t vfs_aggregated_data[NETDATA_KEY_PUBLISH_VFS_END]; +static netdata_publish_syscall_t vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_END]; +netdata_publish_vfs_t **vfs_pid = NULL; +netdata_publish_vfs_t *vfs_vector = NULL; + +static ebpf_local_maps_t vfs_maps[] = {{.name = "tbl_vfs_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, + .user_input = 0, .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "tbl_vfs_stats", .internal_input = NETDATA_VFS_COUNTER, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = "vfs_ctrl", .internal_input = NETDATA_CONTROLLER_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0}}; + +struct config vfs_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static struct bpf_object *objects = NULL; +static struct bpf_link **probe_links = NULL; + +struct netdata_static_thread vfs_threads = {"VFS KERNEL", + NULL, NULL, 1, NULL, + NULL, NULL}; + +static int read_thread_closed = 1; + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Clean PID structures + * + * Clean the allocated structures. + */ +void clean_vfs_pid_structures() { + struct pid_stat *pids = root_of_pids; + while (pids) { + freez(vfs_pid[pids->pid]); + + pids = pids->next; + } +} + +/** +* Clean up the main thread. +* +* @param ptr thread data. +**/ +static void ebpf_vfs_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) + return; + + heartbeat_t hb; + heartbeat_init(&hb); + uint32_t tick = 50 * USEC_PER_MS; + while (!read_thread_closed) { + usec_t dt = heartbeat_next(&hb, tick); + UNUSED(dt); + } + + freez(vfs_hash_values); + freez(vfs_vector); + + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); + } +} + +/***************************************************************** + * + * FUNCTIONS WITH THE MAIN LOOP + * + *****************************************************************/ + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the structure with thread information +*/ +static void ebpf_vfs_send_data(ebpf_module_t *em) +{ + netdata_publish_vfs_common_t pvc; + + pvc.write = (long)vfs_aggregated_data[NETDATA_KEY_PUBLISH_VFS_WRITE].bytes; + pvc.read = (long)vfs_aggregated_data[NETDATA_KEY_PUBLISH_VFS_READ].bytes; + + write_count_chart(NETDATA_VFS_FILE_CLEAN_COUNT, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK], 1); + + write_count_chart(NETDATA_VFS_FILE_IO_COUNT, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], 2); + + if (em->mode < MODE_ENTRY) { + write_err_chart(NETDATA_VFS_FILE_ERR_COUNT, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], 2); + } + + write_io_chart(NETDATA_VFS_IO_FILE_BYTES, NETDATA_FILESYSTEM_FAMILY, vfs_id_names[NETDATA_KEY_PUBLISH_VFS_WRITE], + (long long)pvc.write, vfs_id_names[NETDATA_KEY_PUBLISH_VFS_READ], (long long)pvc.read); + + write_count_chart(NETDATA_VFS_FSYNC, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC], 1); + + if (em->mode < MODE_ENTRY) { + write_err_chart(NETDATA_VFS_FSYNC_ERR, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC], 1); + } + + write_count_chart(NETDATA_VFS_OPEN, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN], 1); + + if (em->mode < MODE_ENTRY) { + write_err_chart(NETDATA_VFS_OPEN_ERR, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN], 1); + } + + write_count_chart(NETDATA_VFS_CREATE, NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], 1); + + if (em->mode < MODE_ENTRY) { + write_err_chart( + NETDATA_VFS_CREATE_ERR, + NETDATA_FILESYSTEM_FAMILY, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], + 1); + } +} + +/** + * Read the hash table and store data to allocated vectors. + */ +static void read_global_table() +{ + uint64_t idx; + netdata_idx_t res[NETDATA_VFS_COUNTER]; + + netdata_idx_t *val = vfs_hash_values; + int fd = vfs_maps[NETDATA_VFS_ALL].map_fd; + for (idx = 0; idx < NETDATA_VFS_COUNTER; idx++) { + uint64_t total = 0; + if (!bpf_map_lookup_elem(fd, &idx, val)) { + int i; + int end = ebpf_nprocs; + for (i = 0; i < end; i++) + total += val[i]; + } + res[idx] = total; + } + + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK].ncall = res[NETDATA_KEY_CALLS_VFS_UNLINK]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].ncall = res[NETDATA_KEY_CALLS_VFS_READ] + + res[NETDATA_KEY_CALLS_VFS_READV]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].ncall = res[NETDATA_KEY_CALLS_VFS_WRITE] + + res[NETDATA_KEY_CALLS_VFS_WRITEV]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC].ncall = res[NETDATA_KEY_CALLS_VFS_FSYNC]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN].ncall = res[NETDATA_KEY_CALLS_VFS_OPEN]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE].ncall = res[NETDATA_KEY_CALLS_VFS_CREATE]; + + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK].nerr = res[NETDATA_KEY_ERROR_VFS_UNLINK]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].nerr = res[NETDATA_KEY_ERROR_VFS_READ] + + res[NETDATA_KEY_ERROR_VFS_READV]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].nerr = res[NETDATA_KEY_ERROR_VFS_WRITE] + + res[NETDATA_KEY_ERROR_VFS_WRITEV]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC].nerr = res[NETDATA_KEY_ERROR_VFS_FSYNC]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN].nerr = res[NETDATA_KEY_ERROR_VFS_OPEN]; + vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE].nerr = res[NETDATA_KEY_ERROR_VFS_CREATE]; + + vfs_aggregated_data[NETDATA_KEY_PUBLISH_VFS_WRITE].bytes = (uint64_t)res[NETDATA_KEY_BYTES_VFS_WRITE] + + (uint64_t)res[NETDATA_KEY_BYTES_VFS_WRITEV]; + vfs_aggregated_data[NETDATA_KEY_PUBLISH_VFS_READ].bytes = (uint64_t)res[NETDATA_KEY_BYTES_VFS_READ] + + (uint64_t)res[NETDATA_KEY_BYTES_VFS_READV]; +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param swap output structure + * @param root link list with structure to be used + */ +static void ebpf_vfs_sum_pids(netdata_publish_vfs_t *vfs, struct pid_on_target *root) +{ + netdata_publish_vfs_t accumulator; + memset(&accumulator, 0, sizeof(accumulator)); + + while (root) { + int32_t pid = root->pid; + netdata_publish_vfs_t *w = vfs_pid[pid]; + if (w) { + accumulator.write_call += w->write_call; + accumulator.writev_call += w->writev_call; + accumulator.read_call += w->read_call; + accumulator.readv_call += w->readv_call; + accumulator.unlink_call += w->unlink_call; + accumulator.fsync_call += w->fsync_call; + accumulator.open_call += w->open_call; + accumulator.create_call += w->create_call; + + accumulator.write_bytes += w->write_bytes; + accumulator.writev_bytes += w->writev_bytes; + accumulator.read_bytes += w->read_bytes; + accumulator.readv_bytes += w->readv_bytes; + + accumulator.write_err += w->write_err; + accumulator.writev_err += w->writev_err; + accumulator.read_err += w->read_err; + accumulator.readv_err += w->readv_err; + accumulator.unlink_err += w->unlink_err; + accumulator.fsync_err += w->fsync_err; + accumulator.open_err += w->open_err; + accumulator.create_err += w->create_err; + } + root = root->next; + } + + // These conditions were added, because we are using incremental algorithm + vfs->write_call = (accumulator.write_call >= vfs->write_call) ? accumulator.write_call : vfs->write_call; + vfs->writev_call = (accumulator.writev_call >= vfs->writev_call) ? accumulator.writev_call : vfs->writev_call; + vfs->read_call = (accumulator.read_call >= vfs->read_call) ? accumulator.read_call : vfs->read_call; + vfs->readv_call = (accumulator.readv_call >= vfs->readv_call) ? accumulator.readv_call : vfs->readv_call; + vfs->unlink_call = (accumulator.unlink_call >= vfs->unlink_call) ? accumulator.unlink_call : vfs->unlink_call; + vfs->fsync_call = (accumulator.fsync_call >= vfs->fsync_call) ? accumulator.fsync_call : vfs->fsync_call; + vfs->open_call = (accumulator.open_call >= vfs->open_call) ? accumulator.open_call : vfs->open_call; + vfs->create_call = (accumulator.create_call >= vfs->create_call) ? accumulator.create_call : vfs->create_call; + + vfs->write_bytes = (accumulator.write_bytes >= vfs->write_bytes) ? accumulator.write_bytes : vfs->write_bytes; + vfs->writev_bytes = (accumulator.writev_bytes >= vfs->writev_bytes) ? accumulator.writev_bytes : vfs->writev_bytes; + vfs->read_bytes = (accumulator.read_bytes >= vfs->read_bytes) ? accumulator.read_bytes : vfs->read_bytes; + vfs->readv_bytes = (accumulator.readv_bytes >= vfs->readv_bytes) ? accumulator.readv_bytes : vfs->readv_bytes; + + vfs->write_err = (accumulator.write_err >= vfs->write_err) ? accumulator.write_err : vfs->write_err; + vfs->writev_err = (accumulator.writev_err >= vfs->writev_err) ? accumulator.writev_err : vfs->writev_err; + vfs->read_err = (accumulator.read_err >= vfs->read_err) ? accumulator.read_err : vfs->read_err; + vfs->readv_err = (accumulator.readv_err >= vfs->readv_err) ? accumulator.readv_err : vfs->readv_err; + vfs->unlink_err = (accumulator.unlink_err >= vfs->unlink_err) ? accumulator.unlink_err : vfs->unlink_err; + vfs->fsync_err = (accumulator.fsync_err >= vfs->fsync_err) ? accumulator.fsync_err : vfs->fsync_err; + vfs->open_err = (accumulator.open_err >= vfs->open_err) ? accumulator.open_err : vfs->open_err; + vfs->create_err = (accumulator.create_err >= vfs->create_err) ? accumulator.create_err : vfs->create_err; +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the structure with thread information + * @param root the target list. + */ +void ebpf_vfs_send_apps_data(ebpf_module_t *em, struct target *root) +{ + struct target *w; + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + ebpf_vfs_sum_pids(&w->vfs, w->root_pid); + } + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_DELETED); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.unlink_call); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.write_call + w->vfs.writev_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.write_err + w->vfs.writev_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.read_call + w->vfs.readv_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.read_err + w->vfs.readv_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.write_bytes + w->vfs.writev_bytes); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_BYTES); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.read_bytes + w->vfs.readv_bytes); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.fsync_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.fsync_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.open_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.open_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.create_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + write_chart_dimension(w->name, w->vfs.create_err); + } + } + write_end_chart(); + } +} + +/** + * Apps Accumulator + * + * Sum all values read from kernel and store in the first address. + * + * @param out the vector with read values. + */ +static void vfs_apps_accumulator(netdata_publish_vfs_t *out) +{ + int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + netdata_publish_vfs_t *total = &out[0]; + for (i = 1; i < end; i++) { + netdata_publish_vfs_t *w = &out[i]; + + total->write_call += w->write_call; + total->writev_call += w->writev_call; + total->read_call += w->read_call; + total->readv_call += w->readv_call; + total->unlink_call += w->unlink_call; + + total->write_bytes += w->write_bytes; + total->writev_bytes += w->writev_bytes; + total->read_bytes += w->read_bytes; + total->readv_bytes += w->readv_bytes; + + total->write_err += w->write_err; + total->writev_err += w->writev_err; + total->read_err += w->read_err; + total->readv_err += w->readv_err; + total->unlink_err += w->unlink_err; + } +} + +/** + * Fill PID + * + * Fill PID structures + * + * @param current_pid pid that we are collecting data + * @param out values read from hash tables; + */ +static void vfs_fill_pid(uint32_t current_pid, netdata_publish_vfs_t *publish) +{ + netdata_publish_vfs_t *curr = vfs_pid[current_pid]; + if (!curr) { + curr = callocz(1, sizeof(netdata_publish_vfs_t)); + vfs_pid[current_pid] = curr; + } + + memcpy(curr, &publish[0], sizeof(netdata_publish_vfs_t)); +} + +/** + * Read the hash table and store data to allocated vectors. + */ +static void ebpf_vfs_read_apps() +{ + struct pid_stat *pids = root_of_pids; + netdata_publish_vfs_t *vv = vfs_vector; + int fd = vfs_maps[NETDATA_VFS_PID].map_fd; + size_t length = sizeof(netdata_publish_vfs_t) * ebpf_nprocs; + while (pids) { + uint32_t key = pids->pid; + + if (bpf_map_lookup_elem(fd, &key, vv)) { + pids = pids->next; + continue; + } + + vfs_apps_accumulator(vv); + + vfs_fill_pid(key, vv); + + // We are cleaning to avoid passing data read from one process to other. + memset(vv, 0, length); + + pids = pids->next; + } +} + +/** + * Update cgroup + * + * Update cgroup data based in + */ +static void read_update_vfs_cgroup() +{ + ebpf_cgroup_target_t *ect ; + netdata_publish_vfs_t *vv = vfs_vector; + int fd = vfs_maps[NETDATA_VFS_PID].map_fd; + size_t length = sizeof(netdata_publish_vfs_t) * ebpf_nprocs; + + pthread_mutex_lock(&mutex_cgroup_shm); + for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { + struct pid_on_target2 *pids; + for (pids = ect->pids; pids; pids = pids->next) { + int pid = pids->pid; + netdata_publish_vfs_t *out = &pids->vfs; + if (likely(vfs_pid) && vfs_pid[pid]) { + netdata_publish_vfs_t *in = vfs_pid[pid]; + + memcpy(out, in, sizeof(netdata_publish_vfs_t)); + } else { + memset(vv, 0, length); + if (!bpf_map_lookup_elem(fd, &pid, vv)) { + vfs_apps_accumulator(vv); + + memcpy(out, vv, sizeof(netdata_publish_vfs_t)); + } + } + } + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * VFS read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_vfs_read_hash(void *ptr) +{ + read_thread_closed = 0; + + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_LATENCY_VFS_SLEEP_MS * em->update_every; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + (void)dt; + + read_global_table(); + } + + read_thread_closed = 1; + + return NULL; +} + +/** + * Sum PIDs + * + * Sum values for all targets. + * + * @param vfs structure used to store data + * @param pids input data + */ +static void ebpf_vfs_sum_cgroup_pids(netdata_publish_vfs_t *vfs, struct pid_on_target2 *pids) + { + netdata_publish_vfs_t accumulator; + memset(&accumulator, 0, sizeof(accumulator)); + + while (pids) { + netdata_publish_vfs_t *w = &pids->vfs; + + accumulator.write_call += w->write_call; + accumulator.writev_call += w->writev_call; + accumulator.read_call += w->read_call; + accumulator.readv_call += w->readv_call; + accumulator.unlink_call += w->unlink_call; + accumulator.fsync_call += w->fsync_call; + accumulator.open_call += w->open_call; + accumulator.create_call += w->create_call; + + accumulator.write_bytes += w->write_bytes; + accumulator.writev_bytes += w->writev_bytes; + accumulator.read_bytes += w->read_bytes; + accumulator.readv_bytes += w->readv_bytes; + + accumulator.write_err += w->write_err; + accumulator.writev_err += w->writev_err; + accumulator.read_err += w->read_err; + accumulator.readv_err += w->readv_err; + accumulator.unlink_err += w->unlink_err; + accumulator.fsync_err += w->fsync_err; + accumulator.open_err += w->open_err; + accumulator.create_err += w->create_err; + + pids = pids->next; + } + + // These conditions were added, because we are using incremental algorithm + vfs->write_call = (accumulator.write_call >= vfs->write_call) ? accumulator.write_call : vfs->write_call; + vfs->writev_call = (accumulator.writev_call >= vfs->writev_call) ? accumulator.writev_call : vfs->writev_call; + vfs->read_call = (accumulator.read_call >= vfs->read_call) ? accumulator.read_call : vfs->read_call; + vfs->readv_call = (accumulator.readv_call >= vfs->readv_call) ? accumulator.readv_call : vfs->readv_call; + vfs->unlink_call = (accumulator.unlink_call >= vfs->unlink_call) ? accumulator.unlink_call : vfs->unlink_call; + vfs->fsync_call = (accumulator.fsync_call >= vfs->fsync_call) ? accumulator.fsync_call : vfs->fsync_call; + vfs->open_call = (accumulator.open_call >= vfs->open_call) ? accumulator.open_call : vfs->open_call; + vfs->create_call = (accumulator.create_call >= vfs->create_call) ? accumulator.create_call : vfs->create_call; + + vfs->write_bytes = (accumulator.write_bytes >= vfs->write_bytes) ? accumulator.write_bytes : vfs->write_bytes; + vfs->writev_bytes = (accumulator.writev_bytes >= vfs->writev_bytes) ? accumulator.writev_bytes : vfs->writev_bytes; + vfs->read_bytes = (accumulator.read_bytes >= vfs->read_bytes) ? accumulator.read_bytes : vfs->read_bytes; + vfs->readv_bytes = (accumulator.readv_bytes >= vfs->readv_bytes) ? accumulator.readv_bytes : vfs->readv_bytes; + + vfs->write_err = (accumulator.write_err >= vfs->write_err) ? accumulator.write_err : vfs->write_err; + vfs->writev_err = (accumulator.writev_err >= vfs->writev_err) ? accumulator.writev_err : vfs->writev_err; + vfs->read_err = (accumulator.read_err >= vfs->read_err) ? accumulator.read_err : vfs->read_err; + vfs->readv_err = (accumulator.readv_err >= vfs->readv_err) ? accumulator.readv_err : vfs->readv_err; + vfs->unlink_err = (accumulator.unlink_err >= vfs->unlink_err) ? accumulator.unlink_err : vfs->unlink_err; + vfs->fsync_err = (accumulator.fsync_err >= vfs->fsync_err) ? accumulator.fsync_err : vfs->fsync_err; + vfs->open_err = (accumulator.open_err >= vfs->open_err) ? accumulator.open_err : vfs->open_err; + vfs->create_err = (accumulator.create_err >= vfs->create_err) ? accumulator.create_err : vfs->create_err; +} + +/** + * Create specific VFS charts + * + * Create charts for cgroup/application. + * + * @param type the chart type. + * @param em the main thread structure. + */ +static void ebpf_create_specific_vfs_charts(char *type, ebpf_module_t *em) +{ + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_FILE_DELETED,"Files deleted", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_UNLINK_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5500, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, "Write to disk", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_WRITE_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5501, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, "Fails to write", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_WRITE_ERROR_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5502, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + } + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, "Read from disk", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_READ_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5503, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, "Fails to read", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_READ_ERROR_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5504, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + } + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, "Bytes written on disk", + EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_WRITE_BYTES_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5505, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, "Bytes read from disk", + EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_READ_BYTES_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5506, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls for <code>vfs_fsync</code>", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NULL, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5507, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, "Sync error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NULL, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5508, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + } + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls for <code>vfs_open</code>", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NULL, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5509, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, "Open error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NULL, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5510, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + } + + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls for <code>vfs_create</code>", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NULL, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5511, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, "Create error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NULL, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5512, + ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + } +} + +/** + * Obsolete specific VFS charts + * + * Obsolete charts for cgroup/application. + * + * @param type the chart type. + * @param em the main thread structure. + */ +static void ebpf_obsolete_specific_vfs_charts(char *type, ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_DELETED, "Files deleted", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_UNLINK_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5500, em->update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, "Write to disk", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_WRITE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5501, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, "Fails to write", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_WRITE_ERROR_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5502, em->update_every); + } + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, "Read from disk", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_READ_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5503, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, "Fails to read", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_READ_ERROR_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5504, em->update_every); + } + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, "Bytes written on disk", + EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_WRITE_BYTES_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5505, em->update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, "Bytes read from disk", + EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_READ_BYTES_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5506, em->update_every); + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls for <code>vfs_fsync</code>", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NULL, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5507, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, "Sync error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NULL, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5508, em->update_every); + } + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls for <code>vfs_open</code>", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NULL, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5509, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, "Open error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NULL, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5510, em->update_every); + } + + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls for <code>vfs_create</code>", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NULL, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5511, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, "Create error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, NULL, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5512, em->update_every); + } +} + +/* + * Send specific VFS data + * + * Send data for specific cgroup/apps. + * + * @param type chart type + * @param values structure with values that will be sent to netdata + */ +static void ebpf_send_specific_vfs_data(char *type, netdata_publish_vfs_t *values, ebpf_module_t *em) +{ + write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_DELETED); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK].name, (long long)values->unlink_call); + write_end_chart(); + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].name, + (long long)values->write_call + (long long)values->writev_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].name, + (long long)values->write_err + (long long)values->writev_err); + write_end_chart(); + } + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].name, + (long long)values->read_call + (long long)values->readv_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].name, + (long long)values->read_err + (long long)values->readv_err); + write_end_chart(); + } + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].name, + (long long)values->write_bytes + (long long)values->writev_bytes); + write_end_chart(); + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].name, + (long long)values->read_bytes + (long long)values->readv_bytes); + write_end_chart(); + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC].name, + (long long)values->fsync_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC].name, + (long long)values->fsync_err); + write_end_chart(); + } + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN].name, + (long long)values->open_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN].name, + (long long)values->open_err); + write_end_chart(); + } + + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE].name, + (long long)values->create_call); + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR); + write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE].name, + (long long)values->create_err); + write_end_chart(); + } +} + +/** + * Create Systemd Socket Charts + * + * Create charts when systemd is enabled + * + * @param em the main collector structure + **/ +static void ebpf_create_systemd_vfs_charts(ebpf_module_t *em) +{ + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_FILE_DELETED, "Files deleted", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20065, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_UNLINK_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, "Write to disk", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20066, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_WRITE_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, "Fails to write", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20067, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SYSTEMD_VFS_WRITE_ERROR_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + } + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_READ_CALLS, "Read from disk", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20068, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_READ_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, "Fails to read", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20069, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + NETDATA_SYSTEMD_VFS_READ_ERROR_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + } + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, "Bytes written on disk", + EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20070, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_WRITE_BYTES_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_READ_BYTES, "Bytes read from disk", + EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20071, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_READ_BYTES_CONTEXT, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls to <code>vfs_fsync</code>", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20072, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NULL, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, "Sync error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20073, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NULL, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + } + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls to <code>vfs_open</code>", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20074, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NULL, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, "Open error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20075, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NULL, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + } + + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls to <code>vfs_create</code>", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20076, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NULL, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, "Create error", + EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, 20077, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NULL, + NETDATA_EBPF_MODULE_NAME_VFS, em->update_every); + } +} + +/** + * Send Systemd charts + * + * Send collected data to Netdata. + * + * @param em the main collector structure + * + * @return It returns the status for chart creation, if it is necessary to remove a specific dimension, zero is returned + * otherwise function returns 1 to avoid chart recreation + */ +static int ebpf_send_systemd_vfs_charts(ebpf_module_t *em) +{ + int ret = 1; + ebpf_cgroup_target_t *ect; + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_DELETED); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.unlink_call); + } else + ret = 0; + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.write_call + + ect->publish_systemd_vfs.writev_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.write_err + + ect->publish_systemd_vfs.writev_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.read_call + + ect->publish_systemd_vfs.readv_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.read_err + + ect->publish_systemd_vfs.readv_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.write_bytes + + ect->publish_systemd_vfs.writev_bytes); + } + } + write_end_chart(); + + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_BYTES); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.read_bytes + + ect->publish_systemd_vfs.readv_bytes); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.fsync_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.fsync_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.open_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.open_err); + } + } + write_end_chart(); + } + + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.create_call); + } + } + write_end_chart(); + + if (em->mode < MODE_ENTRY) { + write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR); + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (unlikely(ect->systemd) && unlikely(ect->updated)) { + write_chart_dimension(ect->name, ect->publish_systemd_vfs.create_err); + } + } + write_end_chart(); + } + + return ret; +} + +/** + * Send data to Netdata calling auxiliary functions. + * + * @param em the main collector structure +*/ +static void ebpf_vfs_send_cgroup_data(ebpf_module_t *em) +{ + if (!ebpf_cgroup_pids) + return; + + pthread_mutex_lock(&mutex_cgroup_shm); + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + ebpf_vfs_sum_cgroup_pids(&ect->publish_systemd_vfs, ect->pids); + } + + int has_systemd = shm_ebpf_cgroup.header->systemd_enabled; + if (has_systemd) { + static int systemd_charts = 0; + if (!systemd_charts) { + ebpf_create_systemd_vfs_charts(em); + systemd_charts = 1; + } + + systemd_charts = ebpf_send_systemd_vfs_charts(em); + } + + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + if (!(ect->flags & NETDATA_EBPF_CGROUP_HAS_VFS_CHART) && ect->updated) { + ebpf_create_specific_vfs_charts(ect->name, em); + ect->flags |= NETDATA_EBPF_CGROUP_HAS_VFS_CHART; + } + + if (ect->flags & NETDATA_EBPF_CGROUP_HAS_VFS_CHART) { + if (ect->updated) { + ebpf_send_specific_vfs_data(ect->name, &ect->publish_systemd_vfs, em); + } else { + ebpf_obsolete_specific_vfs_charts(ect->name, em); + ect->flags &= ~NETDATA_EBPF_CGROUP_HAS_VFS_CHART; + } + } + } + + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Main loop for this collector. + * + * @param step the number of microseconds used with heart beat + * @param em the structure with thread information + */ +static void vfs_collector(ebpf_module_t *em) +{ + vfs_threads.thread = mallocz(sizeof(netdata_thread_t)); + vfs_threads.start_routine = ebpf_vfs_read_hash; + + netdata_thread_create(vfs_threads.thread, vfs_threads.name, NETDATA_THREAD_OPTION_JOINABLE, + ebpf_vfs_read_hash, em); + + int apps = em->apps_charts; + int cgroups = em->cgroup_charts; + int update_every = em->update_every; + int counter = update_every - 1; + while (!close_ebpf_plugin) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + + if (++counter == update_every) { + counter = 0; + if (apps) + ebpf_vfs_read_apps(); + + if (cgroups) + read_update_vfs_cgroup(); + + pthread_mutex_lock(&lock); + + ebpf_vfs_send_data(em); + fflush(stdout); + + if (apps) + ebpf_vfs_send_apps_data(em, apps_groups_root_target); + + if (cgroups) + ebpf_vfs_send_cgroup_data(em); + + pthread_mutex_unlock(&lock); + } + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * + * FUNCTIONS TO CREATE CHARTS + * + *****************************************************************/ + +/** + * Create IO chart + * + * @param family the chart family + * @param name the chart name + * @param axis the axis label + * @param web the group name used to attach the chart on dashboard + * @param order the order number of the specified chart + * @param algorithm the algorithm used to make the charts. + * @param update_every value to overwrite the update frequency set by the server. + */ +static void ebpf_create_io_chart(char *family, char *name, char *axis, char *web, + int order, int algorithm, int update_every) +{ + printf("CHART %s.%s '' 'Bytes written and read' '%s' '%s' '' line %d %d '' 'ebpf.plugin' 'filesystem'\n", + family, + name, + axis, + web, + order, + update_every); + + printf("DIMENSION %s %s %s 1 1\n", + vfs_id_names[NETDATA_KEY_PUBLISH_VFS_READ], + vfs_dimension_names[NETDATA_KEY_PUBLISH_VFS_READ], + ebpf_algorithms[algorithm]); + printf("DIMENSION %s %s %s -1 1\n", + vfs_id_names[NETDATA_KEY_PUBLISH_VFS_WRITE], + vfs_dimension_names[NETDATA_KEY_PUBLISH_VFS_WRITE], + ebpf_algorithms[algorithm]); +} + +/** + * Create global charts + * + * Call ebpf_create_chart to create the charts for the collector. + * + * @param em a pointer to the structure with the default values. + */ +static void ebpf_create_global_charts(ebpf_module_t *em) +{ + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FILE_CLEAN_COUNT, + "Remove files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_CLEAN, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FILE_IO_COUNT, + "Calls to IO", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_COUNT, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + ebpf_create_io_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_IO_FILE_BYTES, EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_BYTES, + NETDATA_EBPF_INCREMENTAL_IDX, em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FILE_ERR_COUNT, + "Fails to write or read", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EBYTES, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ], + 2, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FSYNC, + "Calls for <code>vfs_fsync</code>", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_FSYNC, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FSYNC_ERR, + "Fails to synchronize", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EFSYNC, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_OPEN, + "Calls for <code>vfs_open</code>", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_OPEN, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_OPEN_ERR, + "Fails to open a file", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EOPEN, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_CREATE, + "Calls for <code>vfs_create</code>", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_CREATE, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_CREATE_ERR, + "Fails to create a file.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_ECREATE, + ebpf_create_global_dimension, + &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], + 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } +} + +/** + * Create process apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + * @param ptr a pointer for the targets. + **/ +void ebpf_vfs_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + struct target *root = ptr; + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_DELETED, + "Files deleted", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20065, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, + "Write to disk", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20066, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, + "Fails to write", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20067, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_CALLS, + "Read from disk", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20068, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, + "Fails to read", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20069, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, + "Bytes written on disk", EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20070, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_BYTES, + "Bytes read from disk", EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20071, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_FSYNC, + "Calls for <code>vfs_fsync</code>", EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20072, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, + "Sync error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20073, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_OPEN, + "Calls for <code>vfs_open</code>", EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20074, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, + "Open error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20075, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } + + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_CREATE, + "Calls for <code>vfs_create</code>", EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20076, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + + if (em->mode < MODE_ENTRY) { + ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, + "Create error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20077, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); + } +} + +/***************************************************************** + * + * FUNCTIONS TO START THREAD + * + *****************************************************************/ + +/** + * Allocate vectors used with this thread. + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param apps is apps enabled? + */ +static void ebpf_vfs_allocate_global_vectors(int apps) +{ + memset(vfs_aggregated_data, 0, sizeof(vfs_aggregated_data)); + memset(vfs_publish_aggregated, 0, sizeof(vfs_publish_aggregated)); + + vfs_hash_values = callocz(ebpf_nprocs, sizeof(netdata_idx_t)); + vfs_vector = callocz(ebpf_nprocs, sizeof(netdata_publish_vfs_t)); + + if (apps) + vfs_pid = callocz((size_t)pid_max, sizeof(netdata_publish_vfs_t *)); +} + +/***************************************************************** + * + * EBPF VFS THREAD + * + *****************************************************************/ + +/** + * Process thread + * + * Thread used to generate process charts. + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always return NULL + */ +void *ebpf_vfs_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_vfs_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = vfs_maps; + + ebpf_update_pid_table(&vfs_maps[NETDATA_VFS_PID], em); + + ebpf_vfs_allocate_global_vectors(em->apps_charts); + + if (!em->enabled) + goto endvfs; + + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects); + if (!probe_links) { + goto endvfs; + } + + int algorithms[NETDATA_KEY_PUBLISH_VFS_END] = { + NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX,NETDATA_EBPF_INCREMENTAL_IDX, + NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX,NETDATA_EBPF_INCREMENTAL_IDX + }; + + ebpf_global_labels(vfs_aggregated_data, vfs_publish_aggregated, vfs_dimension_names, + vfs_id_names, algorithms, NETDATA_KEY_PUBLISH_VFS_END); + + pthread_mutex_lock(&lock); + ebpf_create_global_charts(em); + pthread_mutex_unlock(&lock); + + vfs_collector(em); + +endvfs: + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_vfs.h b/collectors/ebpf.plugin/ebpf_vfs.h new file mode 100644 index 00000000..0a972c98 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_vfs.h @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_VFS_H +#define NETDATA_EBPF_VFS_H 1 + +// Module name +#define NETDATA_EBPF_MODULE_NAME_VFS "vfs" + +#define NETDATA_DIRECTORY_VFS_CONFIG_FILE "vfs.conf" + +#define NETDATA_LATENCY_VFS_SLEEP_MS 750000ULL + +// Global chart name +#define NETDATA_VFS_FILE_CLEAN_COUNT "vfs_deleted_objects" +#define NETDATA_VFS_FILE_IO_COUNT "vfs_io" +#define NETDATA_VFS_FILE_ERR_COUNT "vfs_io_error" +#define NETDATA_VFS_IO_FILE_BYTES "vfs_io_bytes" +#define NETDATA_VFS_FSYNC "vfs_fsync" +#define NETDATA_VFS_FSYNC_ERR "vfs_fsync_error" +#define NETDATA_VFS_OPEN "vfs_open" +#define NETDATA_VFS_OPEN_ERR "vfs_open_error" +#define NETDATA_VFS_CREATE "vfs_create" +#define NETDATA_VFS_CREATE_ERR "vfs_create_error" + +// Charts created on Apps submenu +#define NETDATA_SYSCALL_APPS_FILE_DELETED "file_deleted" +#define NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS "vfs_write_call" +#define NETDATA_SYSCALL_APPS_VFS_READ_CALLS "vfs_read_call" +#define NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES "vfs_write_bytes" +#define NETDATA_SYSCALL_APPS_VFS_READ_BYTES "vfs_read_bytes" +#define NETDATA_SYSCALL_APPS_VFS_FSYNC "vfs_fsync" +#define NETDATA_SYSCALL_APPS_VFS_OPEN "vfs_open" +#define NETDATA_SYSCALL_APPS_VFS_CREATE "vfs_create" + +#define NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR "vfs_write_error" +#define NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR "vfs_read_error" +#define NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR "vfs_fsync_error" +#define NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR "vfs_open_error" +#define NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR "vfs_create_error" + +// Group used on Dashboard +#define NETDATA_VFS_GROUP "vfs" +#define NETDATA_VFS_CGROUP_GROUP "vfs (eBPF)" + +// Contexts +#define NETDATA_CGROUP_VFS_UNLINK_CONTEXT "cgroup.vfs_unlink" +#define NETDATA_CGROUP_VFS_WRITE_CONTEXT "cgroup.vfs_write" +#define NETDATA_CGROUP_VFS_WRITE_ERROR_CONTEXT "cgroup.vfs_write_error" +#define NETDATA_CGROUP_VFS_READ_CONTEXT "cgroup.vfs_read" +#define NETDATA_CGROUP_VFS_READ_ERROR_CONTEXT "cgroup.vfs_read_error" +#define NETDATA_CGROUP_VFS_WRITE_BYTES_CONTEXT "cgroup.vfs_write_bytes" +#define NETDATA_CGROUP_VFS_READ_BYTES_CONTEXT "cgroup.vfs_read_bytes" + +#define NETDATA_SYSTEMD_VFS_UNLINK_CONTEXT "services.vfs_unlink" +#define NETDATA_SYSTEMD_VFS_WRITE_CONTEXT "services.vfs_write" +#define NETDATA_SYSTEMD_VFS_WRITE_ERROR_CONTEXT "services.vfs_write_error" +#define NETDATA_SYSTEMD_VFS_READ_CONTEXT "services.vfs_read" +#define NETDATA_SYSTEMD_VFS_READ_ERROR_CONTEXT "services.vfs_read_error" +#define NETDATA_SYSTEMD_VFS_WRITE_BYTES_CONTEXT "services.vfs_write_bytes" +#define NETDATA_SYSTEMD_VFS_READ_BYTES_CONTEXT "services.vfs_read_bytes" + +typedef struct netdata_publish_vfs { + uint64_t pid_tgid; + uint32_t pid; + uint32_t pad; + + //Counter + uint32_t write_call; + uint32_t writev_call; + uint32_t read_call; + uint32_t readv_call; + uint32_t unlink_call; + uint32_t fsync_call; + uint32_t open_call; + uint32_t create_call; + + //Accumulator + uint64_t write_bytes; + uint64_t writev_bytes; + uint64_t readv_bytes; + uint64_t read_bytes; + + //Counter + uint32_t write_err; + uint32_t writev_err; + uint32_t read_err; + uint32_t readv_err; + uint32_t unlink_err; + uint32_t fsync_err; + uint32_t open_err; + uint32_t create_err; +} netdata_publish_vfs_t; + +enum netdata_publish_vfs_list { + NETDATA_KEY_PUBLISH_VFS_UNLINK, + NETDATA_KEY_PUBLISH_VFS_READ, + NETDATA_KEY_PUBLISH_VFS_WRITE, + NETDATA_KEY_PUBLISH_VFS_FSYNC, + NETDATA_KEY_PUBLISH_VFS_OPEN, + NETDATA_KEY_PUBLISH_VFS_CREATE, + + NETDATA_KEY_PUBLISH_VFS_END +}; + +enum vfs_counters { + NETDATA_KEY_CALLS_VFS_WRITE, + NETDATA_KEY_ERROR_VFS_WRITE, + NETDATA_KEY_BYTES_VFS_WRITE, + + NETDATA_KEY_CALLS_VFS_WRITEV, + NETDATA_KEY_ERROR_VFS_WRITEV, + NETDATA_KEY_BYTES_VFS_WRITEV, + + NETDATA_KEY_CALLS_VFS_READ, + NETDATA_KEY_ERROR_VFS_READ, + NETDATA_KEY_BYTES_VFS_READ, + + NETDATA_KEY_CALLS_VFS_READV, + NETDATA_KEY_ERROR_VFS_READV, + NETDATA_KEY_BYTES_VFS_READV, + + NETDATA_KEY_CALLS_VFS_UNLINK, + NETDATA_KEY_ERROR_VFS_UNLINK, + + NETDATA_KEY_CALLS_VFS_FSYNC, + NETDATA_KEY_ERROR_VFS_FSYNC, + + NETDATA_KEY_CALLS_VFS_OPEN, + NETDATA_KEY_ERROR_VFS_OPEN, + + NETDATA_KEY_CALLS_VFS_CREATE, + NETDATA_KEY_ERROR_VFS_CREATE, + + // Keep this as last and don't skip numbers as it is used as element counter + NETDATA_VFS_COUNTER +}; + +enum netdata_vfs_tables { + NETDATA_VFS_PID, + NETDATA_VFS_ALL +}; + +extern netdata_publish_vfs_t **vfs_pid; + +extern void *ebpf_vfs_thread(void *ptr); +extern void ebpf_vfs_create_apps_charts(struct ebpf_module *em, void *ptr); +extern void clean_vfs_pid_structures(); + +extern struct config vfs_config; + +#endif /* NETDATA_EBPF_VFS_H */ diff --git a/collectors/ebpf.plugin/reset_netdata_trace.sh.in b/collectors/ebpf.plugin/reset_netdata_trace.sh.in deleted file mode 100644 index 51d981ee..00000000 --- a/collectors/ebpf.plugin/reset_netdata_trace.sh.in +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -KPROBE_FILE="/sys/kernel/debug/tracing/kprobe_events" - -DATA="$(grep _netdata_ $KPROBE_FILE| cut -d' ' -f1 | cut -d: -f2)" - -for I in $DATA; do - echo "-:$I" > $KPROBE_FILE 2>/dev/null; -done diff --git a/collectors/freebsd.plugin/freebsd_getifaddrs.c b/collectors/freebsd.plugin/freebsd_getifaddrs.c index 1a84902d..0c0c1e7a 100644 --- a/collectors/freebsd.plugin/freebsd_getifaddrs.c +++ b/collectors/freebsd.plugin/freebsd_getifaddrs.c @@ -144,7 +144,7 @@ int do_getifaddrs(int update_every, usec_t dt) { (void)dt; #define DEFAULT_EXCLUDED_INTERFACES "lo*" -#define DEFAULT_PHYSICAL_INTERFACES "igb* ix* cxl* em* ixl* ixlv* bge* ixgbe* vtnet* vmx* re*" +#define DEFAULT_PHYSICAL_INTERFACES "igb* ix* cxl* em* ixl* ixlv* bge* ixgbe* vtnet* vmx* re* igc* dwc*" #define CONFIG_SECTION_GETIFADDRS "plugin:freebsd:getifaddrs" static int enable_new_interfaces = -1; diff --git a/collectors/freebsd.plugin/freebsd_kstat_zfs.c b/collectors/freebsd.plugin/freebsd_kstat_zfs.c index 7d609eaf..8b5cc579 100644 --- a/collectors/freebsd.plugin/freebsd_kstat_zfs.c +++ b/collectors/freebsd.plugin/freebsd_kstat_zfs.c @@ -213,8 +213,8 @@ int do_kstat_zfs_misc_arcstats(int update_every, usec_t dt) { // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.arc_need_free", mibs.arc_need_free, arcstats.arc_need_free); // missing mib: GETSYSCTL_SIMPLE("kstat.zfs.misc.arcstats.arc_sys_free", mibs.arc_sys_free, arcstats.arc_sys_free); - generate_charts_arcstats("freebsd", "zfs", show_zero_charts, update_every); - generate_charts_arc_summary("freebsd", "zfs", show_zero_charts, update_every); + generate_charts_arcstats("freebsd.plugin", "zfs", show_zero_charts, update_every); + generate_charts_arc_summary("freebsd.plugin", "zfs", show_zero_charts, update_every); return 0; } @@ -252,7 +252,7 @@ int do_kstat_zfs_misc_zio_trim(int update_every, usec_t dt) { NULL, "Successfully TRIMmed bytes", "bytes", - "freebsd", + "freebsd.plugin", "zfs", 2320, update_every, @@ -280,7 +280,7 @@ int do_kstat_zfs_misc_zio_trim(int update_every, usec_t dt) { NULL, "TRIM requests", "requests", - "freebsd", + "freebsd.plugin", "zfs", 2321, update_every, @@ -301,4 +301,4 @@ int do_kstat_zfs_misc_zio_trim(int update_every, usec_t dt) { } return 0; -}
\ No newline at end of file +} diff --git a/collectors/freebsd.plugin/freebsd_sysctl.c b/collectors/freebsd.plugin/freebsd_sysctl.c index 7d48e76d..3dc1fbfb 100644 --- a/collectors/freebsd.plugin/freebsd_sysctl.c +++ b/collectors/freebsd.plugin/freebsd_sysctl.c @@ -641,52 +641,58 @@ int do_hw_intcnt(int update_every, usec_t dt) { static int mib_hw_intrnames[2] = {0, 0}; static char *intrnames = NULL; - size = nintr * (MAXCOMLEN + 1); - if (unlikely(nintr != old_nintr)) - intrnames = reallocz(intrnames, size); - if (unlikely(GETSYSCTL_WSIZE("hw.intrnames", mib_hw_intrnames, intrnames, size))) { + if (unlikely(GETSYSCTL_SIZE("hw.intrnames", mib_hw_intrnames, size))) { error("DISABLED: system.intr chart"); error("DISABLED: system.interrupts chart"); error("DISABLED: hw.intrcnt module"); return 1; } else { + if (unlikely(nintr != old_nintr)) + intrnames = reallocz(intrnames, size); + if (unlikely(GETSYSCTL_WSIZE("hw.intrnames", mib_hw_intrnames, intrnames, size))) { + error("DISABLED: system.intr chart"); + error("DISABLED: system.interrupts chart"); + error("DISABLED: hw.intrcnt module"); + return 1; + } else { - // -------------------------------------------------------------------- - - static RRDSET *st_interrupts = NULL; - - if (unlikely(!st_interrupts)) - st_interrupts = rrdset_create_localhost( - "system", - "interrupts", - NULL, - "interrupts", - NULL, - "System interrupts", - "interrupts/s", - "freebsd.plugin", - "hw.intrcnt", - NETDATA_CHART_PRIO_SYSTEM_INTERRUPTS, - update_every, - RRDSET_TYPE_STACKED - ); - else - rrdset_next(st_interrupts); - - for (i = 0; i < nintr; i++) { - void *p; - - p = intrnames + i * (MAXCOMLEN + 1); - if (unlikely((intrcnt[i] != 0) && (*(char *) p != 0))) { - RRDDIM *rd_interrupts = rrddim_find_active(st_interrupts, p); - - if (unlikely(!rd_interrupts)) - rd_interrupts = rrddim_add(st_interrupts, p, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - - rrddim_set_by_pointer(st_interrupts, rd_interrupts, intrcnt[i]); + // -------------------------------------------------------------------- + + static RRDSET *st_interrupts = NULL; + + if (unlikely(!st_interrupts)) + st_interrupts = rrdset_create_localhost( + "system", + "interrupts", + NULL, + "interrupts", + NULL, + "System interrupts", + "interrupts/s", + "freebsd.plugin", + "hw.intrcnt", + NETDATA_CHART_PRIO_SYSTEM_INTERRUPTS, + update_every, + RRDSET_TYPE_STACKED + ); + else + rrdset_next(st_interrupts); + + for (i = 0; i < nintr; i++) { + void *p; + + p = intrnames + i * (strlen(intrnames) + 1); + if (unlikely((intrcnt[i] != 0) && (*(char *) p != 0))) { + RRDDIM *rd_interrupts = rrddim_find_active(st_interrupts, p); + + if (unlikely(!rd_interrupts)) + rd_interrupts = rrddim_add(st_interrupts, p, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_interrupts, rd_interrupts, intrcnt[i]); + } } + rrdset_done(st_interrupts); } - rrdset_done(st_interrupts); } } diff --git a/collectors/freebsd.plugin/plugin_freebsd.c b/collectors/freebsd.plugin/plugin_freebsd.c index 17fec412..97ca1d9a 100644 --- a/collectors/freebsd.plugin/plugin_freebsd.c +++ b/collectors/freebsd.plugin/plugin_freebsd.c @@ -146,7 +146,7 @@ void *freebsd_main(void *ptr) NULL, "Netdata FreeBSD plugin CPU usage", "milliseconds/s", - "freebsd", + "freebsd.plugin", "stats", 132000, localhost->rrd_update_every, @@ -178,7 +178,7 @@ void *freebsd_main(void *ptr) NULL, "Netdata FreeBSD plugin modules durations", "milliseconds/run", - "freebsd", + "freebsd.plugin", "stats", 132001, localhost->rrd_update_every, diff --git a/collectors/freeipmi.plugin/README.md b/collectors/freeipmi.plugin/README.md index 02a61dd2..3ff6f409 100644 --- a/collectors/freeipmi.plugin/README.md +++ b/collectors/freeipmi.plugin/README.md @@ -15,6 +15,11 @@ Netdata has a [freeipmi](https://www.gnu.org/software/freeipmi/) plugin. 2. re-install Netdata from source. The installer will detect that the required libraries are now available and will also build `freeipmi.plugin`. +> ❗ In some distributions `libipmimonitoring.pc` is located in an unregistered directory. +> In that case you should find the file and link it to the standard pkg-config directory. Usually, running +> `sudo ln -s /usr/lib/x86_64-linux-gnu/pkgconfig/libipmimonitoring.pc/libipmimonitoring.pc /usr/lib/pkgconfig/libipmimonitoring.pc` +> resolves the issue. + Keep in mind IPMI requires root access, so the plugin is setuid to root. If you just installed the required IPMI tools, please run at least once the command `ipmimonitoring` and verify it returns sensors information. This command initialises IPMI configuration, so that the Netdata plugin will be able to work. diff --git a/collectors/freeipmi.plugin/freeipmi_plugin.c b/collectors/freeipmi.plugin/freeipmi_plugin.c index e9702e78..0141a6a7 100644 --- a/collectors/freeipmi.plugin/freeipmi_plugin.c +++ b/collectors/freeipmi.plugin/freeipmi_plugin.c @@ -15,7 +15,7 @@ * UCRL-CODE-222073 */ -#include "../../libnetdata/libnetdata.h" +#include "libnetdata/libnetdata.h" #include <stdio.h> #include <stdlib.h> diff --git a/collectors/idlejitter.plugin/plugin_idlejitter.h b/collectors/idlejitter.plugin/plugin_idlejitter.h index 62fabea1..6da78a08 100644 --- a/collectors/idlejitter.plugin/plugin_idlejitter.h +++ b/collectors/idlejitter.plugin/plugin_idlejitter.h @@ -3,7 +3,7 @@ #ifndef NETDATA_PLUGIN_IDLEJITTER_H #define NETDATA_PLUGIN_IDLEJITTER_H 1 -#include "../../daemon/common.h" +#include "daemon/common.h" #define NETDATA_PLUGIN_HOOK_IDLEJITTER \ { \ diff --git a/collectors/macos.plugin/macos_fw.c b/collectors/macos.plugin/macos_fw.c index d0b3e0fd..1fa2d39c 100644 --- a/collectors/macos.plugin/macos_fw.c +++ b/collectors/macos.plugin/macos_fw.c @@ -155,7 +155,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "disk.io" , "Disk I/O Bandwidth" , "KiB/s" - , "macos" + , "macos.plugin" , "iokit" , 2000 , update_every @@ -193,7 +193,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "disk.ops" , "Disk Completed I/O Operations" , "operations/s" - , "macos" + , "macos.plugin" , "iokit" , 2001 , update_every @@ -232,7 +232,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "disk.util" , "Disk Utilization Time" , "% of time working" - , "macos" + , "macos.plugin" , "iokit" , 2004 , update_every @@ -270,7 +270,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "disk.iotime" , "Disk Total I/O Time" , "milliseconds/s" - , "macos" + , "macos.plugin" , "iokit" , 2022 , update_every @@ -307,7 +307,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "disk.await" , "Average Completed I/O Operation Time" , "milliseconds/operation" - , "macos" + , "macos.plugin" , "iokit" , 2005 , update_every @@ -338,7 +338,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "disk.avgsz" , "Average Completed I/O Operation Bandwidth" , "KiB/operation" - , "macos" + , "macos.plugin" , "iokit" , 2006 , update_every @@ -369,7 +369,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "disk.svctm" , "Average Service Time" , "milliseconds/operation" - , "macos" + , "macos.plugin" , "iokit" , 2007 , update_every @@ -411,7 +411,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , NULL , "Disk I/O" , "KiB/s" - , "macos" + , "macos.plugin" , "iokit" , 150 , update_every @@ -464,7 +464,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "disk.space" , title , "GiB" - , "macos" + , "macos.plugin" , "iokit" , 2023 , update_every @@ -497,7 +497,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "disk.inodes" , title , "inodes" - , "macos" + , "macos.plugin" , "iokit" , 2024 , update_every @@ -543,7 +543,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "net.net" , "Bandwidth" , "kilobits/s" - , "macos" + , "macos.plugin" , "iokit" , 7000 , update_every @@ -571,7 +571,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "net.packets" , "Packets" , "packets/s" - , "macos" + , "macos.plugin" , "iokit" , 7001 , update_every @@ -604,7 +604,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "net.errors" , "Interface Errors" , "errors/s" - , "macos" + , "macos.plugin" , "iokit" , 7002 , update_every @@ -633,7 +633,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "net.drops" , "Interface Drops" , "drops/s" - , "macos" + , "macos.plugin" , "iokit" , 7003 , update_every @@ -660,7 +660,7 @@ int do_macos_iokit(int update_every, usec_t dt) { , "net.events" , "Network Interface Events" , "events/s" - , "macos" + , "macos.plugin" , "iokit" , 7006 , update_every diff --git a/collectors/macos.plugin/macos_mach_smi.c b/collectors/macos.plugin/macos_mach_smi.c index 973b90a2..f2c4623c 100644 --- a/collectors/macos.plugin/macos_mach_smi.c +++ b/collectors/macos.plugin/macos_mach_smi.c @@ -65,7 +65,7 @@ int do_macos_mach_smi(int update_every, usec_t dt) { , "system.cpu" , "Total CPU utilization" , "percentage" - , "macos" + , "macos.plugin" , "mach_smi" , 100 , update_every @@ -119,7 +119,7 @@ int do_macos_mach_smi(int update_every, usec_t dt) { , NULL , "System RAM" , "MiB" - , "macos" + , "macos.plugin" , "mach_smi" , 200 , update_every @@ -166,7 +166,7 @@ int do_macos_mach_smi(int update_every, usec_t dt) { , NULL , "Swap I/O" , "KiB/s" - , "macos" + , "macos.plugin" , "mach_smi" , 250 , update_every @@ -197,7 +197,7 @@ int do_macos_mach_smi(int update_every, usec_t dt) { , NULL , "Memory Page Faults" , "faults/s" - , "macos" + , "macos.plugin" , "mach_smi" , NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS , update_every diff --git a/collectors/macos.plugin/macos_sysctl.c b/collectors/macos.plugin/macos_sysctl.c index 84f75418..b744ebbc 100644 --- a/collectors/macos.plugin/macos_sysctl.c +++ b/collectors/macos.plugin/macos_sysctl.c @@ -240,7 +240,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "System Load Average" , "load" - , "macos" + , "macos.plugin" , "sysctl" , 100 , (update_every < MIN_LOADAVG_UPDATE_EVERY) ? MIN_LOADAVG_UPDATE_EVERY : update_every @@ -280,7 +280,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "System Swap" , "MiB" - , "macos" + , "macos.plugin" , "sysctl" , 201 , update_every @@ -342,7 +342,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 Bandwidth" , "kilobits/s" - , "macos" + , "macos.plugin" , "sysctl" , 500 , update_every @@ -392,7 +392,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 TCP Packets" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 2600 , update_every @@ -422,7 +422,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 TCP Errors" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 2700 , update_every @@ -455,7 +455,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 TCP Handshake Issues" , "events/s" - , "macos" + , "macos.plugin" , "sysctl" , 2900 , update_every @@ -496,7 +496,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "TCP Connection Aborts" , "connections/s" - , "macos" + , "macos.plugin" , "sysctl" , 3010 , update_every @@ -533,7 +533,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "TCP Out-Of-Order Queue" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 3050 , update_every @@ -567,7 +567,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "TCP SYN Cookies" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 3100 , update_every @@ -605,7 +605,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 ECN Statistics" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 8700 , update_every @@ -648,7 +648,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 UDP Packets" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 2601 , update_every @@ -678,7 +678,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 UDP Errors" , "events/s" - , "macos" + , "macos.plugin" , "sysctl" , 2701 , update_every @@ -739,7 +739,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 ICMP Packets" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 2602 , update_every @@ -768,7 +768,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 ICMP Errors" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 2603 , update_every @@ -801,7 +801,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 ICMP Messages" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 2604 , update_every @@ -850,7 +850,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 Packets" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 3000 , update_every @@ -884,7 +884,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 Fragments Sent" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 3010 , update_every @@ -917,7 +917,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 Fragments Reassembly" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 3011 , update_every @@ -950,7 +950,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv4 Errors" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 3002 , update_every @@ -1010,7 +1010,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv6 Packets" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 3000 , update_every @@ -1049,7 +1049,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv6 Fragments Sent" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 3010 , update_every @@ -1088,7 +1088,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv6 Fragments Reassembly" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 3011 , update_every @@ -1134,7 +1134,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv6 Errors" , "packets/s" - , "macos" + , "macos.plugin" , "sysctl" , 3002 , update_every @@ -1196,7 +1196,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv6 ICMP Messages" , "messages/s" - , "macos" + , "macos.plugin" , "sysctl" , 10000 , update_every @@ -1230,7 +1230,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv6 ICMP Redirects" , "redirects/s" - , "macos" + , "macos.plugin" , "sysctl" , 10050 , update_every @@ -1273,7 +1273,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv6 ICMP Errors" , "errors/s" - , "macos" + , "macos.plugin" , "sysctl" , 10100 , update_every @@ -1326,7 +1326,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv6 ICMP Echo" , "messages/s" - , "macos" + , "macos.plugin" , "sysctl" , 10200 , update_every @@ -1366,7 +1366,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv6 Router Messages" , "messages/s" - , "macos" + , "macos.plugin" , "sysctl" , 10400 , update_every @@ -1406,7 +1406,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv6 Neighbor Messages" , "messages/s" - , "macos" + , "macos.plugin" , "sysctl" , 10500 , update_every @@ -1452,7 +1452,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "IPv6 ICMP Types" , "messages/s" - , "macos" + , "macos.plugin" , "sysctl" , 10700 , update_every @@ -1506,7 +1506,7 @@ int do_macos_sysctl(int update_every, usec_t dt) { , NULL , "System Uptime" , "seconds" - , "macos" + , "macos.plugin" , "sysctl" , 1000 , update_every diff --git a/collectors/macos.plugin/plugin_macos.c b/collectors/macos.plugin/plugin_macos.c index 1a64ed81..4566c09e 100644 --- a/collectors/macos.plugin/plugin_macos.c +++ b/collectors/macos.plugin/plugin_macos.c @@ -92,7 +92,7 @@ void *macos_main(void *ptr) NULL, "Netdata macOS plugin CPU usage", "milliseconds/s", - "macos", + "macos.plugin", "stats", 132000, localhost->rrd_update_every, @@ -124,7 +124,7 @@ void *macos_main(void *ptr) NULL, "Netdata macOS plugin modules durations", "milliseconds/run", - "macos", + "macos.plugin", "stats", 132001, localhost->rrd_update_every, diff --git a/collectors/macos.plugin/plugin_macos.h b/collectors/macos.plugin/plugin_macos.h index 0815c59c..a66ec085 100644 --- a/collectors/macos.plugin/plugin_macos.h +++ b/collectors/macos.plugin/plugin_macos.h @@ -4,7 +4,7 @@ #ifndef NETDATA_PLUGIN_MACOS_H #define NETDATA_PLUGIN_MACOS_H 1 -#include "../../daemon/common.h" +#include "daemon/common.h" #if (TARGET_OS == OS_MACOS) diff --git a/collectors/nfacct.plugin/plugin_nfacct.c b/collectors/nfacct.plugin/plugin_nfacct.c index acdd0586..7876c231 100644 --- a/collectors/nfacct.plugin/plugin_nfacct.c +++ b/collectors/nfacct.plugin/plugin_nfacct.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "../../libnetdata/libnetdata.h" +#include "libnetdata/libnetdata.h" #include <linux/netfilter/nfnetlink_conntrack.h> #include <libmnl/libmnl.h> #include <libnetfilter_acct/libnetfilter_acct.h> diff --git a/collectors/node.d.plugin/named/named.node.js b/collectors/node.d.plugin/named/named.node.js index 04cded8b..668a044c 100644 --- a/collectors/node.d.plugin/named/named.node.js +++ b/collectors/node.d.plugin/named/named.node.js @@ -233,7 +233,7 @@ var named = { x = keys[len]; // we maintain an index of the values found - // mapping them to objects splitted + // mapping them to objects split look = named.lookups.nsstats[x]; if(typeof look === 'undefined') { @@ -418,7 +418,7 @@ var named = { var y = ykeys[ylen]; // we maintain an index of the values found - // mapping them to objects splitted + // mapping them to objects split look = named.lookups.resolver_stats[y]; if(typeof look === 'undefined') { diff --git a/collectors/perf.plugin/perf_plugin.c b/collectors/perf.plugin/perf_plugin.c index 135e7798..151ba907 100644 --- a/collectors/perf.plugin/perf_plugin.c +++ b/collectors/perf.plugin/perf_plugin.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "../../libnetdata/libnetdata.h" +#include "libnetdata/libnetdata.h" #include <linux/perf_event.h> @@ -9,10 +9,11 @@ // Hardware counters #define NETDATA_CHART_PRIO_PERF_CPU_CYCLES 8800 #define NETDATA_CHART_PRIO_PERF_INSTRUCTIONS 8801 -#define NETDATA_CHART_PRIO_PERF_BRANCH_INSTRUCTIONS 8802 -#define NETDATA_CHART_PRIO_PERF_CACHE 8803 -#define NETDATA_CHART_PRIO_PERF_BUS_CYCLES 8804 -#define NETDATA_CHART_PRIO_PERF_FRONT_BACK_CYCLES 8805 +#define NETDATA_CHART_PRIO_PERF_IPC 8802 +#define NETDATA_CHART_PRIO_PERF_BRANCH_INSTRUCTIONS 8803 +#define NETDATA_CHART_PRIO_PERF_CACHE 8804 +#define NETDATA_CHART_PRIO_PERF_BUS_CYCLES 8805 +#define NETDATA_CHART_PRIO_PERF_FRONT_BACK_CYCLES 8806 // Software counters #define NETDATA_CHART_PRIO_PERF_MIGRATIONS 8810 @@ -436,6 +437,7 @@ static void perf_send_metrics() { static int // Hardware counters cpu_cycles_chart_generated = 0, instructions_chart_generated = 0, + ipc_chart_generated = 0, branch_chart_generated = 0, cache_chart_generated = 0, bus_cycles_chart_generated = 0, @@ -461,7 +463,7 @@ static void perf_send_metrics() { if(unlikely(!cpu_cycles_chart_generated)) { cpu_cycles_chart_generated = 1; - printf("CHART %s.%s '' 'CPU cycles' 'cycles/s' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'CPU cycles' 'cycles/s' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "cpu_cycles" , RRD_FAMILY_HW @@ -501,7 +503,7 @@ static void perf_send_metrics() { if(unlikely(!instructions_chart_generated)) { instructions_chart_generated = 1; - printf("CHART %s.%s '' 'Instructions' 'instructions/s' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'Instructions' 'instructions/s' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "instructions" , RRD_FAMILY_HW @@ -527,11 +529,43 @@ static void perf_send_metrics() { // ------------------------------------------------------------------------ + if(likely(perf_events[EV_ID_INSTRUCTIONS].updated) && likely(perf_events[EV_ID_CPU_CYCLES].updated)) { + if(unlikely(!ipc_chart_generated)) { + ipc_chart_generated = 1; + + printf("CHART %s.%s '' '%s' 'instructions/cycle' %s '' line %d %d '' %s\n" + , RRD_TYPE_PERF + , "instructions_per_cycle" + , "Instructions per Cycle(IPC)" + , RRD_FAMILY_HW + , NETDATA_CHART_PRIO_PERF_IPC + , update_every + , PLUGIN_PERF_NAME + ); + printf("DIMENSION %s '' absolute 1 100\n", "ipc"); + } + + printf("BEGIN %s.%s\n" + , RRD_TYPE_PERF + , "instructions_per_cycle" + ); + + calculated_number result = ((calculated_number)perf_events[EV_ID_INSTRUCTIONS].value / + (calculated_number)perf_events[EV_ID_CPU_CYCLES].value) * 100.0; + printf("SET %s = %lld\n" + , "ipc" + , (collected_number) result + ); + printf("END\n"); + } + + // ------------------------------------------------------------------------ + if(likely(perf_events[EV_ID_BRANCH_INSTRUCTIONS].updated || perf_events[EV_ID_BRANCH_MISSES].updated)) { if(unlikely(!branch_chart_generated)) { branch_chart_generated = 1; - printf("CHART %s.%s '' 'Branch instructions' 'instructions/s' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'Branch instructions' 'instructions/s' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "branch_instructions" , RRD_FAMILY_HW @@ -571,7 +605,7 @@ static void perf_send_metrics() { if(unlikely(!cache_chart_generated)) { cache_chart_generated = 1; - printf("CHART %s.%s '' 'Cache operations' 'operations/s' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'Cache operations' 'operations/s' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "cache" , RRD_FAMILY_HW @@ -611,7 +645,7 @@ static void perf_send_metrics() { if(unlikely(!bus_cycles_chart_generated)) { bus_cycles_chart_generated = 1; - printf("CHART %s.%s '' 'Bus cycles' 'cycles/s' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'Bus cycles' 'cycles/s' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "bus_cycles" , RRD_FAMILY_HW @@ -641,7 +675,7 @@ static void perf_send_metrics() { if(unlikely(!stalled_cycles_chart_generated)) { stalled_cycles_chart_generated = 1; - printf("CHART %s.%s '' 'Stalled frontend and backend cycles' 'cycles/s' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'Stalled frontend and backend cycles' 'cycles/s' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "stalled_cycles" , RRD_FAMILY_HW @@ -681,7 +715,7 @@ static void perf_send_metrics() { if(unlikely(!migrations_chart_generated)) { migrations_chart_generated = 1; - printf("CHART %s.%s '' 'CPU migrations' 'migrations' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'CPU migrations' 'migrations' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "migrations" , RRD_FAMILY_SW @@ -711,7 +745,7 @@ static void perf_send_metrics() { if(unlikely(!alignment_chart_generated)) { alignment_chart_generated = 1; - printf("CHART %s.%s '' 'Alignment faults' 'faults' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'Alignment faults' 'faults' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "alignment_faults" , RRD_FAMILY_SW @@ -741,7 +775,7 @@ static void perf_send_metrics() { if(unlikely(!emulation_chart_generated)) { emulation_chart_generated = 1; - printf("CHART %s.%s '' 'Emulation faults' 'faults' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'Emulation faults' 'faults' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "emulation_faults" , RRD_FAMILY_SW @@ -772,7 +806,7 @@ static void perf_send_metrics() { if(unlikely(!L1D_chart_generated)) { L1D_chart_generated = 1; - printf("CHART %s.%s '' 'L1D cache operations' 'events/s' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'L1D cache operations' 'events/s' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "l1d_cache" , RRD_FAMILY_CACHE @@ -828,7 +862,7 @@ static void perf_send_metrics() { if(unlikely(!L1D_prefetch_chart_generated)) { L1D_prefetch_chart_generated = 1; - printf("CHART %s.%s '' 'L1D prefetch cache operations' 'prefetches/s' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'L1D prefetch cache operations' 'prefetches/s' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "l1d_cache_prefetch" , RRD_FAMILY_CACHE @@ -858,7 +892,7 @@ static void perf_send_metrics() { if(unlikely(!L1I_chart_generated)) { L1I_chart_generated = 1; - printf("CHART %s.%s '' 'L1I cache operations' 'events/s' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'L1I cache operations' 'events/s' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "l1i_cache" , RRD_FAMILY_CACHE @@ -899,7 +933,7 @@ static void perf_send_metrics() { if(unlikely(!LL_chart_generated)) { LL_chart_generated = 1; - printf("CHART %s.%s '' 'LL cache operations' 'events/s' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'LL cache operations' 'events/s' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "ll_cache" , RRD_FAMILY_CACHE @@ -956,7 +990,7 @@ static void perf_send_metrics() { if(unlikely(!DTLB_chart_generated)) { DTLB_chart_generated = 1; - printf("CHART %s.%s '' 'DTLB cache operations' 'events/s' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'DTLB cache operations' 'events/s' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "dtlb_cache" , RRD_FAMILY_CACHE @@ -1012,7 +1046,7 @@ static void perf_send_metrics() { if(unlikely(!ITLB_chart_generated)) { ITLB_chart_generated = 1; - printf("CHART %s.%s '' 'ITLB cache operations' 'events/s' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'ITLB cache operations' 'events/s' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "itlb_cache" , RRD_FAMILY_CACHE @@ -1052,7 +1086,7 @@ static void perf_send_metrics() { if(unlikely(!PBU_chart_generated)) { PBU_chart_generated = 1; - printf("CHART %s.%s '' 'PBU cache operations' 'events/s' %s '' line %d %d %s\n" + printf("CHART %s.%s '' 'PBU cache operations' 'events/s' %s '' line %d %d '' %s\n" , RRD_TYPE_PERF , "pbu_cache" , RRD_FAMILY_CACHE diff --git a/collectors/plugins.d/plugins_d.h b/collectors/plugins.d/plugins_d.h index fd99b358..b9e30e12 100644 --- a/collectors/plugins.d/plugins_d.h +++ b/collectors/plugins.d/plugins_d.h @@ -3,7 +3,7 @@ #ifndef NETDATA_PLUGINS_D_H #define NETDATA_PLUGINS_D_H 1 -#include "../../daemon/common.h" +#include "daemon/common.h" #define NETDATA_PLUGIN_HOOK_PLUGINSD \ { \ diff --git a/collectors/plugins.d/pluginsd_parser.c b/collectors/plugins.d/pluginsd_parser.c index 2d0788d8..c0dcedb6 100644 --- a/collectors/plugins.d/pluginsd_parser.c +++ b/collectors/plugins.d/pluginsd_parser.c @@ -152,6 +152,24 @@ PARSER_RC pluginsd_label_action(void *user, char *key, char *value, LABEL_SOURCE return PARSER_RC_OK; } +PARSER_RC pluginsd_clabel_action(void *user, char *key, char *value, LABEL_SOURCE source) +{ + ((PARSER_USER_OBJECT *) user)->chart_labels = add_label_to_list(((PARSER_USER_OBJECT *) user)->chart_labels, key, value, source); + + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_clabel_commit_action(void *user, RRDHOST *host, struct label *new_labels) +{ + RRDSET *st = ((PARSER_USER_OBJECT *)user)->st; + if (unlikely(!st)) { + error("requested CLABEL_COMMIT on host '%s', without a BEGIN, ignoring it.", host->hostname); + return PARSER_RC_OK; + } + + rrdset_update_labels(st, new_labels); + return PARSER_RC_OK; +} PARSER_RC pluginsd_overwrite_action(void *user, RRDHOST *host, struct label *new_labels) { @@ -560,6 +578,38 @@ PARSER_RC pluginsd_label(char **words, void *user, PLUGINSD_ACTION *plugins_act return PARSER_RC_OK; } +PARSER_RC pluginsd_clabel(char **words, void *user, PLUGINSD_ACTION *plugins_action) +{ + if (!words[1] || !words[2] || !words[3]) { + error("Ignoring malformed or empty CHART LABEL command."); + return PARSER_RC_OK; + } + + if (plugins_action->clabel_action) { + PARSER_RC rc = plugins_action->clabel_action(user, words[1], words[2], strtol(words[3], NULL, 10)); + return rc; + } + + return PARSER_RC_OK; +} + +PARSER_RC pluginsd_clabel_commit(char **words, void *user, PLUGINSD_ACTION *plugins_action) +{ + UNUSED(words); + + RRDHOST *host = ((PARSER_USER_OBJECT *) user)->host; + debug(D_PLUGINSD, "requested to commit chart labels"); + + struct label *chart_labels = ((PARSER_USER_OBJECT *)user)->chart_labels; + ((PARSER_USER_OBJECT *)user)->chart_labels = NULL; + + if (plugins_action->clabel_commit_action) { + return plugins_action->clabel_commit_action(user, host, chart_labels); + } + + return PARSER_RC_OK; +} + PARSER_RC pluginsd_overwrite(char **words, void *user, PLUGINSD_ACTION *plugins_action) { UNUSED(words); diff --git a/collectors/plugins.d/pluginsd_parser.h b/collectors/plugins.d/pluginsd_parser.h index 61e9c9ba..fb4a45b7 100644 --- a/collectors/plugins.d/pluginsd_parser.h +++ b/collectors/plugins.d/pluginsd_parser.h @@ -3,7 +3,7 @@ #ifndef NETDATA_PLUGINSD_PARSER_H #define NETDATA_PLUGINSD_PARSER_H -#include "../../parser/parser.h" +#include "parser/parser.h" typedef struct parser_user_object { @@ -14,6 +14,7 @@ typedef struct parser_user_object { struct plugind *cd; int trust_durations; struct label *new_labels; + struct label *chart_labels; size_t count; int enabled; uint8_t st_exists; @@ -35,6 +36,8 @@ extern PARSER_RC pluginsd_dimension_action(void *user, RRDSET *st, char *id, cha long multiplier, long divisor, char *options, RRD_ALGORITHM algorithm_type); extern PARSER_RC pluginsd_label_action(void *user, char *key, char *value, LABEL_SOURCE source); extern PARSER_RC pluginsd_overwrite_action(void *user, RRDHOST *host, struct label *new_labels); +extern PARSER_RC pluginsd_clabel_commit_action(void *user, RRDHOST *host, struct label *new_labels); +extern PARSER_RC pluginsd_clabel_action(void *user, char *key, char *value, LABEL_SOURCE source); #endif //NETDATA_PLUGINSD_PARSER_H diff --git a/collectors/proc.plugin/README.md b/collectors/proc.plugin/README.md index 7fff1ec0..a9ce2dfa 100644 --- a/collectors/proc.plugin/README.md +++ b/collectors/proc.plugin/README.md @@ -553,7 +553,7 @@ Each port will have its counters metrics monitored, grouped in the following cha - **Errors Statistics** Many errors counters are provided, presenting statistics for: - - Packets: malformated, sent/received discarded by card/switch, missing ressource + - Packets: malformed, sent/received discarded by card/switch, missing resource - Link: downed, recovered, integrity error, minor error - Other events: Tick Wait to send, buffer overrun diff --git a/collectors/proc.plugin/plugin_proc.h b/collectors/proc.plugin/plugin_proc.h index b0d60cd8..18714b54 100644 --- a/collectors/proc.plugin/plugin_proc.h +++ b/collectors/proc.plugin/plugin_proc.h @@ -3,7 +3,7 @@ #ifndef NETDATA_PLUGIN_PROC_H #define NETDATA_PLUGIN_PROC_H 1 -#include "../../daemon/common.h" +#include "daemon/common.h" #if (TARGET_OS == OS_LINUX) diff --git a/collectors/proc.plugin/proc_interrupts.c b/collectors/proc.plugin/proc_interrupts.c index 73b11717..2db980a0 100644 --- a/collectors/proc.plugin/proc_interrupts.c +++ b/collectors/proc.plugin/proc_interrupts.c @@ -65,7 +65,7 @@ int do_proc_interrupts(int update_every, usec_t dt) { if(unlikely(!ff)) { char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/interrupts"); - ff = procfile_open(config_get(CONFIG_SECTION_PLUGIN_PROC_INTERRUPTS, "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT); + ff = procfile_open(config_get(CONFIG_SECTION_PLUGIN_PROC_INTERRUPTS, "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); } if(unlikely(!ff)) return 1; diff --git a/collectors/proc.plugin/proc_mdstat.c b/collectors/proc.plugin/proc_mdstat.c index 46f0134e..bdc298d6 100644 --- a/collectors/proc.plugin/proc_mdstat.c +++ b/collectors/proc.plugin/proc_mdstat.c @@ -560,8 +560,8 @@ int do_proc_mdstat(int update_every, usec_t dt) id, NULL, family, - "md.rate", - "Approximate Time Unit Finish", + "md.expected_time_until_operation_finish", + "Approximate Time Until Finish", "seconds", PLUGIN_PROC_NAME, PLUGIN_PROC_MODULE_MDSTAT_NAME, @@ -591,7 +591,7 @@ int do_proc_mdstat(int update_every, usec_t dt) id, NULL, family, - "md.rate", + "md.operation_speed", "Operation Speed", "KiB/s", PLUGIN_PROC_NAME, diff --git a/collectors/proc.plugin/proc_net_dev.c b/collectors/proc.plugin/proc_net_dev.c index bbf8a590..e06da69a 100644 --- a/collectors/proc.plugin/proc_net_dev.c +++ b/collectors/proc.plugin/proc_net_dev.c @@ -979,7 +979,7 @@ int do_proc_net_dev(int update_every, usec_t dt) { , NULL , d->chart_family , "net.carrier" - , "Inteface Physical Link State" + , "Interface Physical Link State" , "state" , PLUGIN_PROC_NAME , PLUGIN_PROC_MODULE_NETDEV_NAME diff --git a/collectors/proc.plugin/proc_net_rpc_nfsd.c b/collectors/proc.plugin/proc_net_rpc_nfsd.c index 29ef7a39..48f218e4 100644 --- a/collectors/proc.plugin/proc_net_rpc_nfsd.c +++ b/collectors/proc.plugin/proc_net_rpc_nfsd.c @@ -226,7 +226,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { (void)dt; static procfile *ff = NULL; static int do_rc = -1, do_fh = -1, do_io = -1, do_th = -1, do_ra = -1, do_net = -1, do_rpc = -1, do_proc2 = -1, do_proc3 = -1, do_proc4 = -1, do_proc4ops = -1; - static int ra_warning = 0, th_warning = 0, proc2_warning = 0, proc3_warning = 0, proc4_warning = 0, proc4ops_warning = 0; + static int ra_warning = 0, proc2_warning = 0, proc3_warning = 0, proc4_warning = 0, proc4ops_warning = 0; if(unlikely(!ff)) { char filename[FILENAME_MAX + 1]; @@ -270,9 +270,9 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { char *type; unsigned long long rc_hits = 0, rc_misses = 0, rc_nocache = 0; - unsigned long long fh_stale = 0, fh_total_lookups = 0, fh_anonymous_lookups = 0, fh_dir_not_in_dcache = 0, fh_non_dir_not_in_dcache = 0; + unsigned long long fh_stale = 0; unsigned long long io_read = 0, io_write = 0; - unsigned long long th_threads = 0, th_fullcnt = 0, th_hist10 = 0, th_hist20 = 0, th_hist30 = 0, th_hist40 = 0, th_hist50 = 0, th_hist60 = 0, th_hist70 = 0, th_hist80 = 0, th_hist90 = 0, th_hist100 = 0; + unsigned long long th_threads = 0; unsigned long long ra_size = 0, ra_hist10 = 0, ra_hist20 = 0, ra_hist30 = 0, ra_hist40 = 0, ra_hist50 = 0, ra_hist60 = 0, ra_hist70 = 0, ra_hist80 = 0, ra_hist90 = 0, ra_hist100 = 0, ra_none = 0; unsigned long long net_count = 0, net_udp_count = 0, net_tcp_count = 0, net_tcp_connections = 0; unsigned long long rpc_calls = 0, rpc_bad_format = 0, rpc_bad_auth = 0, rpc_bad_client = 0; @@ -304,13 +304,10 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { } fh_stale = str2ull(procfile_lineword(ff, l, 1)); - fh_total_lookups = str2ull(procfile_lineword(ff, l, 2)); - fh_anonymous_lookups = str2ull(procfile_lineword(ff, l, 3)); - fh_dir_not_in_dcache = str2ull(procfile_lineword(ff, l, 4)); - fh_non_dir_not_in_dcache = str2ull(procfile_lineword(ff, l, 5)); + + // other file handler metrics were never used and are always zero - unsigned long long sum = fh_stale + fh_total_lookups + fh_anonymous_lookups + fh_dir_not_in_dcache + fh_non_dir_not_in_dcache; - if(sum == 0ULL) do_fh = -1; + if(fh_stale == 0ULL) do_fh = -1; else do_fh = 2; } else if(do_io == 1 && strcmp(type, "io") == 0) { @@ -333,29 +330,11 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { } th_threads = str2ull(procfile_lineword(ff, l, 1)); - th_fullcnt = str2ull(procfile_lineword(ff, l, 2)); - th_hist10 = (unsigned long long)(atof(procfile_lineword(ff, l, 3)) * 1000.0); - th_hist20 = (unsigned long long)(atof(procfile_lineword(ff, l, 4)) * 1000.0); - th_hist30 = (unsigned long long)(atof(procfile_lineword(ff, l, 5)) * 1000.0); - th_hist40 = (unsigned long long)(atof(procfile_lineword(ff, l, 6)) * 1000.0); - th_hist50 = (unsigned long long)(atof(procfile_lineword(ff, l, 7)) * 1000.0); - th_hist60 = (unsigned long long)(atof(procfile_lineword(ff, l, 8)) * 1000.0); - th_hist70 = (unsigned long long)(atof(procfile_lineword(ff, l, 9)) * 1000.0); - th_hist80 = (unsigned long long)(atof(procfile_lineword(ff, l, 10)) * 1000.0); - th_hist90 = (unsigned long long)(atof(procfile_lineword(ff, l, 11)) * 1000.0); - th_hist100 = (unsigned long long)(atof(procfile_lineword(ff, l, 12)) * 1000.0); - - // threads histogram has been disabled on recent kernels - // http://permalink.gmane.org/gmane.linux.nfs/24528 - unsigned long long sum = th_hist10 + th_hist20 + th_hist30 + th_hist40 + th_hist50 + th_hist60 + th_hist70 + th_hist80 + th_hist90 + th_hist100; - if(sum == 0ULL) { - if(!th_warning) { - info("Disabling /proc/net/rpc/nfsd threads histogram. It seems unused on this machine. It will be enabled automatically when found with data in it."); - th_warning = 1; - } - do_th = -1; - } - else do_th = 2; + + // thread histogram has been disabled since 2009 (kernel 2.6.30) + // https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=8bbfa9f3889b643fc7de82c0c761ef17097f8faf + + do_th = 2; } else if(do_ra == 1 && strcmp(type, "ra") == 0) { if(unlikely(words < 13)) { @@ -363,6 +342,9 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { continue; } + // readahead cache has been disabled since 2019 (kernel 5.4) + // https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/fs/nfsd/vfs.c?id=501cb1849f865960501d19d54e6a5af306f9b6fd + ra_size = str2ull(procfile_lineword(ff, l, 1)); ra_hist10 = str2ull(procfile_lineword(ff, l, 2)); ra_hist20 = str2ull(procfile_lineword(ff, l, 3)); @@ -408,9 +390,9 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { } rpc_calls = str2ull(procfile_lineword(ff, l, 1)); - rpc_bad_format = str2ull(procfile_lineword(ff, l, 2)); - rpc_bad_auth = str2ull(procfile_lineword(ff, l, 3)); - rpc_bad_client = str2ull(procfile_lineword(ff, l, 4)); + rpc_bad_format = str2ull(procfile_lineword(ff, l, 3)); + rpc_bad_auth = str2ull(procfile_lineword(ff, l, 4)); + rpc_bad_client = str2ull(procfile_lineword(ff, l, 5)); unsigned long long sum = rpc_calls + rpc_bad_format + rpc_bad_auth + rpc_bad_client; if(sum == 0ULL) do_rpc = -1; @@ -542,11 +524,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { if(do_fh == 2) { static RRDSET *st = NULL; - static RRDDIM *rd_stale = NULL, - *rd_total_lookups = NULL, - *rd_anonymous_lookups = NULL, - *rd_dir_not_in_dcache = NULL, - *rd_non_dir_not_in_dcache = NULL; + static RRDDIM *rd_stale = NULL; if(unlikely(!st)) { st = rrdset_create_localhost( @@ -566,18 +544,10 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { rrdset_flag_set(st, RRDSET_FLAG_DETAIL); rd_stale = rrddim_add(st, "stale", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_total_lookups = rrddim_add(st, "total_lookups", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_anonymous_lookups = rrddim_add(st, "anonymous_lookups", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_dir_not_in_dcache = rrddim_add(st, "dir_not_in_dcache", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_non_dir_not_in_dcache = rrddim_add(st, "non_dir_not_in_dcache", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); } else rrdset_next(st); rrddim_set_by_pointer(st, rd_stale, fh_stale); - rrddim_set_by_pointer(st, rd_total_lookups, fh_total_lookups); - rrddim_set_by_pointer(st, rd_anonymous_lookups, fh_anonymous_lookups); - rrddim_set_by_pointer(st, rd_dir_not_in_dcache, fh_dir_not_in_dcache); - rrddim_set_by_pointer(st, rd_non_dir_not_in_dcache, fh_non_dir_not_in_dcache); rrdset_done(st); } @@ -617,116 +587,32 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { // -------------------------------------------------------------------- if(do_th == 2) { - { - static RRDSET *st = NULL; - static RRDDIM *rd_threads = NULL; - - if(unlikely(!st)) { - st = rrdset_create_localhost( - "nfsd" - , "threads" - , NULL - , "threads" - , NULL - , "NFS Server Threads" - , "threads" - , PLUGIN_PROC_NAME - , PLUGIN_PROC_MODULE_NFSD_NAME - , NETDATA_CHART_PRIO_NFSD_THREADS - , update_every - , RRDSET_TYPE_LINE - ); - - rd_threads = rrddim_add(st, "threads", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } - else rrdset_next(st); - - rrddim_set_by_pointer(st, rd_threads, th_threads); - rrdset_done(st); - } + static RRDSET *st = NULL; + static RRDDIM *rd_threads = NULL; - { - static RRDSET *st = NULL; - static RRDDIM *rd_full_count = NULL; - - if(unlikely(!st)) { - st = rrdset_create_localhost( - "nfsd" - , "threads_fullcnt" - , NULL - , "threads" - , NULL - , "NFS Server Threads Full Count" - , "events" - , PLUGIN_PROC_NAME - , PLUGIN_PROC_MODULE_NFSD_NAME - , NETDATA_CHART_PRIO_NFSD_THREADS_FULLCNT - , update_every - , RRDSET_TYPE_LINE - ); - - rd_full_count = rrddim_add(st, "full_count", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - else rrdset_next(st); + if(unlikely(!st)) { + st = rrdset_create_localhost( + "nfsd" + , "threads" + , NULL + , "threads" + , NULL + , "NFS Server Threads" + , "threads" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_NFSD_NAME + , NETDATA_CHART_PRIO_NFSD_THREADS + , update_every + , RRDSET_TYPE_LINE + ); - rrddim_set_by_pointer(st, rd_full_count, th_fullcnt); - rrdset_done(st); + rd_threads = rrddim_add(st, "threads", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } + else rrdset_next(st); + + rrddim_set_by_pointer(st, rd_threads, th_threads); + rrdset_done(st); - { - static RRDSET *st = NULL; - static RRDDIM *rd_th_hist10 = NULL, - *rd_th_hist20 = NULL, - *rd_th_hist30 = NULL, - *rd_th_hist40 = NULL, - *rd_th_hist50 = NULL, - *rd_th_hist60 = NULL, - *rd_th_hist70 = NULL, - *rd_th_hist80 = NULL, - *rd_th_hist90 = NULL, - *rd_th_hist100 = NULL; - - if(unlikely(!st)) { - st = rrdset_create_localhost( - "nfsd" - , "threads_histogram" - , NULL - , "threads" - , NULL - , "NFS Server Threads Usage Histogram" - , "percentage" - , PLUGIN_PROC_NAME - , PLUGIN_PROC_MODULE_NFSD_NAME - , NETDATA_CHART_PRIO_NFSD_THREADS_HISTOGRAM - , update_every - , RRDSET_TYPE_LINE - ); - - rd_th_hist10 = rrddim_add(st, "0%-10%", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - rd_th_hist20 = rrddim_add(st, "10%-20%", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - rd_th_hist30 = rrddim_add(st, "20%-30%", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - rd_th_hist40 = rrddim_add(st, "30%-40%", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - rd_th_hist50 = rrddim_add(st, "40%-50%", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - rd_th_hist60 = rrddim_add(st, "50%-60%", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - rd_th_hist70 = rrddim_add(st, "60%-70%", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - rd_th_hist80 = rrddim_add(st, "70%-80%", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - rd_th_hist90 = rrddim_add(st, "80%-90%", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - rd_th_hist100 = rrddim_add(st, "90%-100%", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - } - else rrdset_next(st); - - rrddim_set_by_pointer(st, rd_th_hist10, th_hist10); - rrddim_set_by_pointer(st, rd_th_hist20, th_hist20); - rrddim_set_by_pointer(st, rd_th_hist30, th_hist30); - rrddim_set_by_pointer(st, rd_th_hist40, th_hist40); - rrddim_set_by_pointer(st, rd_th_hist50, th_hist50); - rrddim_set_by_pointer(st, rd_th_hist60, th_hist60); - rrddim_set_by_pointer(st, rd_th_hist70, th_hist70); - rrddim_set_by_pointer(st, rd_th_hist80, th_hist80); - rrddim_set_by_pointer(st, rd_th_hist90, th_hist90); - rrddim_set_by_pointer(st, rd_th_hist100, th_hist100); - rrdset_done(st); - } } // -------------------------------------------------------------------- @@ -978,7 +864,7 @@ int do_proc_net_rpc_nfsd(int update_every, usec_t dt) { "nfsd" , "proc4ops" , NULL - , "nfsv2ops" + , "nfsv4ops" , NULL , "NFS v4 Server Operations" , "operations/s" diff --git a/collectors/proc.plugin/proc_net_stat_synproxy.c b/collectors/proc.plugin/proc_net_stat_synproxy.c index f5030f99..c74c5374 100644 --- a/collectors/proc.plugin/proc_net_stat_synproxy.c +++ b/collectors/proc.plugin/proc_net_stat_synproxy.c @@ -10,11 +10,10 @@ int do_proc_net_stat_synproxy(int update_every, usec_t dt) { (void)dt; - static int do_entries = -1, do_cookies = -1, do_syns = -1, do_reopened = -1; + static int do_cookies = -1, do_syns = -1, do_reopened = -1; static procfile *ff = NULL; - if(unlikely(do_entries == -1)) { - do_entries = config_get_boolean_ondemand("plugin:proc:/proc/net/stat/synproxy", "SYNPROXY entries", CONFIG_BOOLEAN_AUTO); + if(unlikely(do_cookies == -1)) { do_cookies = config_get_boolean_ondemand("plugin:proc:/proc/net/stat/synproxy", "SYNPROXY cookies", CONFIG_BOOLEAN_AUTO); do_syns = config_get_boolean_ondemand("plugin:proc:/proc/net/stat/synproxy", "SYNPROXY SYN received", CONFIG_BOOLEAN_AUTO); do_reopened = config_get_boolean_ondemand("plugin:proc:/proc/net/stat/synproxy", "SYNPROXY connections reopened", CONFIG_BOOLEAN_AUTO); @@ -39,7 +38,7 @@ int do_proc_net_stat_synproxy(int update_every, usec_t dt) { return 1; } - unsigned long long entries = 0, syn_received = 0, cookie_invalid = 0, cookie_valid = 0, cookie_retrans = 0, conn_reopened = 0; + unsigned long long syn_received = 0, cookie_invalid = 0, cookie_valid = 0, cookie_retrans = 0, conn_reopened = 0; // synproxy gives its values per CPU for(l = 1; l < lines ;l++) { @@ -47,7 +46,6 @@ int do_proc_net_stat_synproxy(int update_every, usec_t dt) { if(unlikely(words < 6)) continue; - entries += strtoull(procfile_lineword(ff, l, 0), NULL, 16); syn_received += strtoull(procfile_lineword(ff, l, 1), NULL, 16); cookie_invalid += strtoull(procfile_lineword(ff, l, 2), NULL, 16); cookie_valid += strtoull(procfile_lineword(ff, l, 3), NULL, 16); @@ -55,38 +53,7 @@ int do_proc_net_stat_synproxy(int update_every, usec_t dt) { conn_reopened += strtoull(procfile_lineword(ff, l, 5), NULL, 16); } - unsigned long long events = entries + syn_received + cookie_invalid + cookie_valid + cookie_retrans + conn_reopened; - - // -------------------------------------------------------------------- - - if(do_entries == CONFIG_BOOLEAN_YES || (do_entries == CONFIG_BOOLEAN_AUTO && - (events || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { - do_entries = CONFIG_BOOLEAN_YES; - - static RRDSET *st = NULL; - if(unlikely(!st)) { - st = rrdset_create_localhost( - RRD_TYPE_NET_STAT_NETFILTER - , RRD_TYPE_NET_STAT_SYNPROXY "_entries" - , NULL - , RRD_TYPE_NET_STAT_SYNPROXY - , NULL - , "SYNPROXY Entries Used" - , "entries" - , PLUGIN_PROC_NAME - , PLUGIN_PROC_MODULE_SYNPROXY_NAME - , NETDATA_CHART_PRIO_SYNPROXY_ENTRIES - , update_every - , RRDSET_TYPE_LINE - ); - - rrddim_add(st, "entries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } - else rrdset_next(st); - - rrddim_set(st, "entries", entries); - rrdset_done(st); - } + unsigned long long events = syn_received + cookie_invalid + cookie_valid + cookie_retrans + conn_reopened; // -------------------------------------------------------------------- diff --git a/collectors/proc.plugin/proc_pagetypeinfo.c b/collectors/proc.plugin/proc_pagetypeinfo.c index 3ce29222..e1026cf5 100644 --- a/collectors/proc.plugin/proc_pagetypeinfo.c +++ b/collectors/proc.plugin/proc_pagetypeinfo.c @@ -139,7 +139,7 @@ int do_proc_pagetypeinfo(int update_every, usec_t dt) { return 1; } - // 4th line is the "Free pages count per migrate type at order". Just substract these 8 words. + // 4th line is the "Free pages count per migrate type at order". Just subtract these 8 words. pageorders_cnt = procfile_linewords(ff, 3); if (pageorders_cnt < 9) { error("PLUGIN: PROC_PAGETYPEINFO: Unable to parse Line 4 of %s", ff_path); diff --git a/collectors/proc.plugin/proc_spl_kstat_zfs.c b/collectors/proc.plugin/proc_spl_kstat_zfs.c index ce95c2d3..fedc0343 100644 --- a/collectors/proc.plugin/proc_spl_kstat_zfs.c +++ b/collectors/proc.plugin/proc_spl_kstat_zfs.c @@ -6,7 +6,7 @@ #define ZFS_PROC_ARCSTATS "/proc/spl/kstat/zfs/arcstats" #define ZFS_PROC_POOLS "/proc/spl/kstat/zfs" -#define STATE_SIZE 8 +#define STATE_SIZE 9 #define MAX_CHART_ID 256 extern struct arcstats arcstats; diff --git a/collectors/proc.plugin/sys_class_infiniband.c b/collectors/proc.plugin/sys_class_infiniband.c index 69e27f81..1a75ce13 100644 --- a/collectors/proc.plugin/sys_class_infiniband.c +++ b/collectors/proc.plugin/sys_class_infiniband.c @@ -37,7 +37,7 @@ GEN(port_rcv_constraint_errors, errors, "Pkts rcvd discarded ", 1, __VA_ARGS__) \ GEN(port_xmit_discards, errors, "Pkts sent discarded", 1, __VA_ARGS__) \ GEN(port_xmit_wait, errors, "Tick Wait to send", 1, __VA_ARGS__) \ - GEN(VL15_dropped, errors, "Pkts missed ressource", 1, __VA_ARGS__) \ + GEN(VL15_dropped, errors, "Pkts missed resource", 1, __VA_ARGS__) \ GEN(excessive_buffer_overrun_errors, errors, "Buffer overrun", 1, __VA_ARGS__) \ GEN(link_downed, errors, "Link Downed", 1, __VA_ARGS__) \ GEN(link_error_recovery, errors, "Link recovered", 1, __VA_ARGS__) \ diff --git a/collectors/proc.plugin/zfs_common.h b/collectors/proc.plugin/zfs_common.h index 148f9e47..9d61de2f 100644 --- a/collectors/proc.plugin/zfs_common.h +++ b/collectors/proc.plugin/zfs_common.h @@ -3,7 +3,7 @@ #ifndef NETDATA_ZFS_COMMON_H #define NETDATA_ZFS_COMMON_H 1 -#include "../../daemon/common.h" +#include "daemon/common.h" #define ZFS_FAMILY_SIZE "size" #define ZFS_FAMILY_EFFICIENCY "efficiency" diff --git a/collectors/python.d.plugin/anomalies/README.md b/collectors/python.d.plugin/anomalies/README.md index 9d24e868..c58c858b 100644 --- a/collectors/python.d.plugin/anomalies/README.md +++ b/collectors/python.d.plugin/anomalies/README.md @@ -82,8 +82,8 @@ The default configuration should look something like this. Here you can see each # JOBS (data collection sources) # Pull data from local Netdata node. -local: - name: 'local' +anomalies: + name: 'Anomalies' # Host to pull data from. host: '127.0.0.1:19999' diff --git a/collectors/python.d.plugin/anomalies/anomalies.chart.py b/collectors/python.d.plugin/anomalies/anomalies.chart.py index 61b51d9c..8ca3df68 100644 --- a/collectors/python.d.plugin/anomalies/anomalies.chart.py +++ b/collectors/python.d.plugin/anomalies/anomalies.chart.py @@ -188,7 +188,7 @@ class Service(SimpleService): self.custom_model_scalers[model] = MinMaxScaler() def reinitialize(self): - """Reinitialize charts, models and data to a begining state. + """Reinitialize charts, models and data to a beginning state. """ self.charts_init() self.custom_models_init() @@ -385,7 +385,7 @@ class Service(SimpleService): def get_data(self): - # initialize to whats available right now + # initialize to what's available right now if self.reinitialize_at_every_step or len(self.host_charts_dict[self.host]) == 0: self.charts_init() self.custom_models_init() diff --git a/collectors/python.d.plugin/anomalies/anomalies.conf b/collectors/python.d.plugin/anomalies/anomalies.conf index 0dc40ef2..ef867709 100644 --- a/collectors/python.d.plugin/anomalies/anomalies.conf +++ b/collectors/python.d.plugin/anomalies/anomalies.conf @@ -31,8 +31,8 @@ # JOBS (data collection sources) # Pull data from local Netdata node. -local: - name: 'local' +anomalies: + name: 'Anomalies' # Host to pull data from. host: '127.0.0.1:19999' diff --git a/collectors/python.d.plugin/changefinder/README.md b/collectors/python.d.plugin/changefinder/README.md index e1c1d4ba..051639d1 100644 --- a/collectors/python.d.plugin/changefinder/README.md +++ b/collectors/python.d.plugin/changefinder/README.md @@ -12,8 +12,8 @@ on your Netdata charts and/or dimensions. Instead of this collector just _collecting_ data, it also does some computation on the data it collects to return a changepoint score for each chart or dimension you configure it to work on. This is -an [online](https://en.wikipedia.org/wiki/Online_machine_learning) machine learning algorithim so there is no batch step -to train the model, instead it evolves over time as more data arrives. That makes this particualr algorithim quite cheap +an [online](https://en.wikipedia.org/wiki/Online_machine_learning) machine learning algorithm so there is no batch step +to train the model, instead it evolves over time as more data arrives. That makes this particular algorithm quite cheap to compute at each step of data collection (see the notes section below for more details) and it should scale fairly well to work on lots of charts or hosts (if running on a parent node for example). @@ -28,7 +28,7 @@ Two charts are available: This chart shows the percentile of the score that is output from the ChangeFinder library (it is turned off by default but available with `show_scores: true`). -A high observed score is more likley to be a valid changepoint worth exploring, even more so when multiple charts or +A high observed score is more likely to be a valid changepoint worth exploring, even more so when multiple charts or dimensions have high changepoint scores at the same time or very close together. ### ChangeFinder Flags (`changefinder.flags`) @@ -36,11 +36,11 @@ dimensions have high changepoint scores at the same time or very close together. This chart shows `1` or `0` if the latest score has a percentile value that exceeds the `cf_threshold` threshold. By default, any scores that are in the 99th or above percentile will raise a flag on this chart. -The raw changefinder score itself can be a little noisey and so limiting ourselves to just periods where it surpasses +The raw changefinder score itself can be a little noisy and so limiting ourselves to just periods where it surpasses the 99th percentile can help manage the "[signal to noise ratio](https://en.wikipedia.org/wiki/Signal-to-noise_ratio)" better. -The `cf_threshold` paramater might be one you want to play around with to tune things specifically for the workloads on +The `cf_threshold` parameter might be one you want to play around with to tune things specifically for the workloads on your node and the specific charts you want to monitor. For example, maybe the 95th percentile might work better for you than the 99th percentile. @@ -164,7 +164,7 @@ sudo su -s /bin/bash netdata - It may take an hour or two (depending on your choice of `n_score_samples`) for the collector to 'settle' into it's typical behaviour in terms of the trained models and scores you will see in the normal running of your node. Mainly this is because it can take a while to build up a proper distribution of previous scores in over to convert the raw - score returned by the ChangeFinder algorithim into a percentile based on the most recent `n_score_samples` that have + score returned by the ChangeFinder algorithm into a percentile based on the most recent `n_score_samples` that have already been produced. So when you first turn the collector on, it will have a lot of flags in the beginning and then should 'settle down' once it has built up enough history. This is a typical characteristic of online machine learning approaches which need some initial window of time before they can be useful. diff --git a/collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py b/collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py index dddf50b4..93614b08 100644 --- a/collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py +++ b/collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py @@ -506,7 +506,9 @@ CHARTS = { def convert_index_store_size_to_bytes(size): - # can be b, kb, mb, gb + # can be b, kb, mb, gb or None + if size is None: + return -1 if size.endswith('kb'): return round(float(size[:-2]) * 1024) elif size.endswith('mb'): @@ -520,6 +522,12 @@ def convert_index_store_size_to_bytes(size): return -1 +def convert_index_null_value(value): + if value is None: + return -1 + return value + + def convert_index_health(health): if health == 'green': return 0 @@ -634,6 +642,30 @@ class Service(UrlService): # "docs.count": "10", # "docs.deleted": "3", # "store.size": "650b" + # }, + # { + # "status":"open", + # "index":".kibana_3", + # "health":"red", + # "uuid":"umAdNrq6QaOXrmZjAowTNw", + # "store.size":null, + # "pri.store.size":null, + # "docs.count":null, + # "rep":"0", + # "pri":"1", + # "docs.deleted":null + # }, + # { + # "health" : "green", + # "status" : "close", + # "index" : "siem-events-2021.09.12", + # "uuid" : "mTQ-Yl5TS7S3lGoRORE-Pg", + # "pri" : "4", + # "rep" : "0", + # "docs.count" : null, + # "docs.deleted" : null, + # "store.size" : null, + # "pri.store.size" : null # } # ] raw_data = self._get_raw_data(url) @@ -654,10 +686,12 @@ class Service(UrlService): continue v = { - '{0}_index_docs_count'.format(name): idx['docs.count'], '{0}_index_replica'.format(name): idx['rep'], '{0}_index_health'.format(name): convert_index_health(idx['health']), } + docs_count = convert_index_null_value(idx['docs.count']) + if docs_count != -1: + v['{0}_index_docs_count'.format(name)] = idx['docs.count'] size = convert_index_store_size_to_bytes(idx['store.size']) if size != -1: v['{0}_index_store_size'.format(name)] = size diff --git a/collectors/python.d.plugin/go_expvar/go_expvar.chart.py b/collectors/python.d.plugin/go_expvar/go_expvar.chart.py index f9bbdc16..dca01081 100644 --- a/collectors/python.d.plugin/go_expvar/go_expvar.chart.py +++ b/collectors/python.d.plugin/go_expvar/go_expvar.chart.py @@ -237,7 +237,7 @@ class Service(UrlService): gc_pauses = memstats['PauseNs'] try: gc_pause_avg = sum(gc_pauses) / len([x for x in gc_pauses if x > 0]) - # no GC cycles have occured yet + # no GC cycles have occurred yet except ZeroDivisionError: gc_pause_avg = 0 diff --git a/collectors/python.d.plugin/httpcheck/README.md b/collectors/python.d.plugin/httpcheck/README.md index 55aad52f..59c60f54 100644 --- a/collectors/python.d.plugin/httpcheck/README.md +++ b/collectors/python.d.plugin/httpcheck/README.md @@ -25,7 +25,7 @@ Following charts are drawn per job: ## Configuration -Edit the `python.d/httpcheck.conf` configuration file using `edit-config` from the Netdata [config +Edit the [`python.d/httpcheck.conf`](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/httpcheck/httpcheck.conf) configuration file using `edit-config` from the Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. ```bash diff --git a/collectors/python.d.plugin/mongodb/mongodb.chart.py b/collectors/python.d.plugin/mongodb/mongodb.chart.py index 2e6fb220..bec94d3e 100644 --- a/collectors/python.d.plugin/mongodb/mongodb.chart.py +++ b/collectors/python.d.plugin/mongodb/mongodb.chart.py @@ -250,10 +250,10 @@ CHARTS = { ] }, 'cursors': { - 'options': [None, 'Currently openned cursors, cursors with timeout disabled and timed out cursors', + 'options': [None, 'Currently opened cursors, cursors with timeout disabled and timed out cursors', 'cursors', 'database performance', 'mongodb.cursors', 'stacked'], 'lines': [ - ['cursor_total', 'openned', 'absolute', 1, 1], + ['cursor_total', 'opened', 'absolute', 1, 1], ['noTimeout', None, 'absolute', 1, 1], ['timedOut', None, 'incremental', 1, 1] ] diff --git a/collectors/python.d.plugin/mysql/README.md b/collectors/python.d.plugin/mysql/README.md index d8d3c1d0..63d2c1e5 100644 --- a/collectors/python.d.plugin/mysql/README.md +++ b/collectors/python.d.plugin/mysql/README.md @@ -17,7 +17,7 @@ To create the `netdata` user, execute the following in the MySQL shell: ```sh create user 'netdata'@'localhost'; -grant usage on *.* to 'netdata'@'localhost'; +grant usage, replication client on *.* to 'netdata'@'localhost'; flush privileges; ``` The `netdata` user will have the ability to connect to the MySQL server on `localhost` without a password. diff --git a/collectors/python.d.plugin/mysql/mysql.chart.py b/collectors/python.d.plugin/mysql/mysql.chart.py index 1737e16b..e8c03cb0 100644 --- a/collectors/python.d.plugin/mysql/mysql.chart.py +++ b/collectors/python.d.plugin/mysql/mysql.chart.py @@ -398,7 +398,7 @@ CHARTS = { ] }, 'innodb_os_log_fsync_writes': { - 'options': [None, 'InnoDB OS Log Operations', 'operations/s', 'innodb', 'mysql.innodb_os_log', 'line'], + 'options': [None, 'InnoDB OS Log Operations', 'operations/s', 'innodb', 'mysql.innodb_os_log_fsyncs', 'line'], 'lines': [ ['Innodb_os_log_fsyncs', 'fsyncs', 'incremental'], ] @@ -445,7 +445,7 @@ CHARTS = { }, 'innodb_buffer_pool_flush_pages_requests': { 'options': [None, 'InnoDB Buffer Pool Flush Pages Requests', 'requests/s', 'innodb', - 'mysql.innodb_buffer_pool_pages', 'line'], + 'mysql.innodb_buffer_pool_pages_flushed', 'line'], 'lines': [ ['Innodb_buffer_pool_pages_flushed', 'flush pages', 'incremental'], ] diff --git a/collectors/python.d.plugin/postgres/README.md b/collectors/python.d.plugin/postgres/README.md index dc9b1846..0515ec57 100644 --- a/collectors/python.d.plugin/postgres/README.md +++ b/collectors/python.d.plugin/postgres/README.md @@ -12,6 +12,8 @@ Collects database health and performance metrics. - `python-psycopg2` package. You have to install it manually and make sure that it is available to the `netdata` user, either using `pip`, the package manager of your Linux distribution, or any other method you prefer. +- PostgreSQL v9.4+ + Following charts are drawn: 1. **Database size** MB @@ -68,6 +70,23 @@ Following charts are drawn: - locks +12. **Standby delta** KB + + - sent delta + - write delta + - flush delta + - replay delta + +13. **Standby lag** seconds + + - write lag + - flush lag + - replay lag + +14. **Average number of blocking transactions in db** processes + + - blocking + ## Configuration Edit the `python.d/postgres.conf` configuration file using `edit-config` from the Netdata [config diff --git a/collectors/python.d.plugin/postgres/postgres.chart.py b/collectors/python.d.plugin/postgres/postgres.chart.py index bd28dd9b..29026a6a 100644 --- a/collectors/python.d.plugin/postgres/postgres.chart.py +++ b/collectors/python.d.plugin/postgres/postgres.chart.py @@ -45,14 +45,18 @@ QUERY_NAME_INDEX_STATS = 'INDEX_STATS' QUERY_NAME_DATABASE = 'DATABASE' QUERY_NAME_BGWRITER = 'BGWRITER' QUERY_NAME_LOCKS = 'LOCKS' +QUERY_NAME_BLOCKERS = 'BLOCKERS' QUERY_NAME_DATABASES = 'DATABASES' QUERY_NAME_STANDBY = 'STANDBY' QUERY_NAME_REPLICATION_SLOT = 'REPLICATION_SLOT' QUERY_NAME_STANDBY_DELTA = 'STANDBY_DELTA' +QUERY_NAME_STANDBY_LAG = 'STANDBY_LAG' QUERY_NAME_REPSLOT_FILES = 'REPSLOT_FILES' QUERY_NAME_IF_SUPERUSER = 'IF_SUPERUSER' QUERY_NAME_SERVER_VERSION = 'SERVER_VERSION' QUERY_NAME_AUTOVACUUM = 'AUTOVACUUM' +QUERY_NAME_FORCED_AUTOVACUUM = 'FORCED_AUTOVACUUM' +QUERY_NAME_TX_WRAPAROUND = 'TX_WRAPAROUND' QUERY_NAME_DIFF_LSN = 'DIFF_LSN' QUERY_NAME_WAL_WRITES = 'WAL_WRITES' @@ -123,6 +127,9 @@ METRICS = { 'ShareLock', 'RowExclusiveLock' ], + QUERY_NAME_BLOCKERS: [ + 'blocking_pids_avg' + ], QUERY_NAME_AUTOVACUUM: [ 'analyze', 'vacuum_analyze', @@ -130,12 +137,24 @@ METRICS = { 'vacuum_freeze', 'brin_summarize' ], + QUERY_NAME_FORCED_AUTOVACUUM: [ + 'percent_towards_forced_vacuum' + ], + QUERY_NAME_TX_WRAPAROUND: [ + 'oldest_current_xid', + 'percent_towards_wraparound' + ], QUERY_NAME_STANDBY_DELTA: [ 'sent_delta', 'write_delta', 'flush_delta', 'replay_delta' ], + QUERY_NAME_STANDBY_LAG: [ + 'write_lag', + 'flush_lag', + 'replay_lag' + ], QUERY_NAME_REPSLOT_FILES: [ 'replslot_wal_keep', 'replslot_files' @@ -177,7 +196,7 @@ FROM FROM pg_catalog.pg_ls_dir('pg_wal') AS wal(name) WHERE name ~ '^[0-9A-F]{24}$' ORDER BY - (pg_stat_file('pg_wal/'||name)).modification, + (pg_stat_file('pg_wal/'||name, true)).modification, wal.name DESC) sub; """, V96: """ @@ -204,7 +223,7 @@ FROM FROM pg_catalog.pg_ls_dir('pg_xlog') AS wal(name) WHERE name ~ '^[0-9A-F]{24}$' ORDER BY - (pg_stat_file('pg_xlog/'||name)).modification, + (pg_stat_file('pg_xlog/'||name, true)).modification, wal.name DESC) sub; """, } @@ -263,7 +282,7 @@ FROM ( FROM pg_catalog.pg_stat_activity WHERE backend_type IN ('client backend', 'background worker') UNION ALL - SELECT 'r', COUNT(1) + SELECT 'r', COUNT(1) FROM pg_catalog.pg_stat_replication ) as s; """, @@ -277,7 +296,7 @@ FROM ( FROM pg_catalog.pg_stat_activity WHERE query NOT LIKE 'autovacuum: %%' UNION ALL - SELECT 'r', COUNT(1) + SELECT 'r', COUNT(1) FROM pg_catalog.pg_stat_replication ) as s; """, @@ -291,7 +310,7 @@ FROM ( FROM pg_catalog.pg_stat_activity WHERE current_query NOT LIKE 'autovacuum: %%' UNION ALL - SELECT 'r', COUNT(1) + SELECT 'r', COUNT(1) FROM pg_catalog.pg_stat_replication ) as s; """, @@ -386,6 +405,48 @@ ORDER BY datname, mode; """, } +QUERY_BLOCKERS = { + DEFAULT: """ +WITH B AS ( +SELECT DISTINCT + pg_database.datname as database_name, + pg_locks.pid, + cardinality(pg_blocking_pids(pg_locks.pid)) AS blocking_pids +FROM pg_locks +INNER JOIN pg_database ON pg_database.oid = pg_locks.database +WHERE NOT pg_locks.granted) +SELECT database_name, AVG(blocking_pids) AS blocking_pids_avg +FROM B +GROUP BY database_name +""", + V96: """ +WITH B AS ( +SELECT DISTINCT + pg_database.datname as database_name, + blocked_locks.pid AS blocked_pid, + COUNT(blocking_locks.pid) AS blocking_pids +FROM pg_catalog.pg_locks blocked_locks +INNER JOIN pg_database ON pg_database.oid = blocked_locks.database +JOIN pg_catalog.pg_locks blocking_locks + ON blocking_locks.locktype = blocked_locks.locktype + AND blocking_locks.database IS NOT DISTINCT FROM blocked_locks.database + AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation + AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page + AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple + AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid + AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid + AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid + AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid + AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid + AND blocking_locks.pid != blocked_locks.pid +WHERE NOT blocked_locks.GRANTED +GROUP BY database_name, blocked_pid) +SELECT database_name, AVG(blocking_pids) AS blocking_pids_avg +FROM B +GROUP BY database_name +""" +} + QUERY_DATABASES = { DEFAULT: """ SELECT @@ -394,17 +455,18 @@ FROM pg_stat_database WHERE has_database_privilege( (SELECT current_user), datname, 'connect') - AND NOT datname ~* '^template\d'; + AND NOT datname ~* '^template\d' +ORDER BY datname; """, } QUERY_STANDBY = { DEFAULT: """ SELECT - application_name -FROM pg_stat_replication -WHERE application_name IS NOT NULL -GROUP BY application_name; + COALESCE(prs.slot_name, psr.application_name) application_name +FROM pg_stat_replication psr +LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid +WHERE application_name IS NOT NULL; """, } @@ -418,7 +480,7 @@ FROM pg_replication_slots; QUERY_STANDBY_DELTA = { DEFAULT: """ SELECT - application_name, + COALESCE(prs.slot_name, psr.application_name) application_name, pg_wal_lsn_diff( CASE pg_is_in_recovery() WHEN true THEN pg_last_wal_receive_lsn() @@ -443,12 +505,13 @@ SELECT ELSE pg_current_wal_lsn() END, replay_lsn) AS replay_delta -FROM pg_stat_replication +FROM pg_stat_replication psr +LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid WHERE application_name IS NOT NULL; """, V96: """ SELECT - application_name, + COALESCE(prs.slot_name, psr.application_name) application_name, pg_xlog_location_diff( CASE pg_is_in_recovery() WHEN true THEN pg_last_xlog_receive_location() @@ -473,11 +536,25 @@ SELECT ELSE pg_current_xlog_location() END, replay_location) AS replay_delta -FROM pg_stat_replication +FROM pg_stat_replication psr +LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid WHERE application_name IS NOT NULL; """, } +QUERY_STANDBY_LAG = { + DEFAULT: """ +SELECT + COALESCE(prs.slot_name, psr.application_name) application_name, + COALESCE(EXTRACT(EPOCH FROM write_lag)::bigint, 0) AS write_lag, + COALESCE(EXTRACT(EPOCH FROM flush_lag)::bigint, 0) AS flush_lag, + COALESCE(EXTRACT(EPOCH FROM replay_lag)::bigint, 0) AS replay_lag +FROM pg_stat_replication psr +LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid +WHERE application_name IS NOT NULL; +""" +} + QUERY_REPSLOT_FILES = { DEFAULT: """ WITH wal_size AS ( @@ -500,8 +577,20 @@ FROM slot_type, COALESCE ( floor( - (pg_wal_lsn_diff(pg_current_wal_lsn (),slot.restart_lsn) - - (pg_walfile_name_offset (restart_lsn)).file_offset) / (s.val) + CASE WHEN pg_is_in_recovery() + THEN ( + pg_wal_lsn_diff(pg_last_wal_receive_lsn(), slot.restart_lsn) + -- this is needed to account for whole WAL retention and + -- not only size retention + + (pg_wal_lsn_diff(restart_lsn, '0/0') %% s.val) + ) / s.val + ELSE ( + pg_wal_lsn_diff(pg_current_wal_lsn(), slot.restart_lsn) + -- this is needed to account for whole WAL retention and + -- not only size retention + + (pg_walfile_name_offset(restart_lsn)).file_offset + ) / s.val + END ),0) AS replslot_wal_keep FROM pg_replication_slots slot LEFT JOIN ( @@ -539,8 +628,20 @@ FROM slot_type, COALESCE ( floor( - (pg_wal_lsn_diff(pg_current_wal_lsn (),slot.restart_lsn) - - (pg_walfile_name_offset (restart_lsn)).file_offset) / (s.val) + CASE WHEN pg_is_in_recovery() + THEN ( + pg_wal_lsn_diff(pg_last_wal_receive_lsn(), slot.restart_lsn) + -- this is needed to account for whole WAL retention and + -- not only size retention + + (pg_wal_lsn_diff(restart_lsn, '0/0') %% s.val) + ) / s.val + ELSE ( + pg_wal_lsn_diff(pg_current_wal_lsn(), slot.restart_lsn) + -- this is needed to account for whole WAL retention and + -- not only size retention + + (pg_walfile_name_offset(restart_lsn)).file_offset + ) / s.val + END ),0) AS replslot_wal_keep FROM pg_replication_slots slot LEFT JOIN ( @@ -586,6 +687,43 @@ WHERE query NOT LIKE '%%pg_stat_activity%%'; """, } +QUERY_FORCED_AUTOVACUUM = { + DEFAULT: """ +WITH max_age AS ( + SELECT setting AS autovacuum_freeze_max_age + FROM pg_catalog.pg_settings + WHERE name = 'autovacuum_freeze_max_age' ) +, per_database_stats AS ( + SELECT datname + , m.autovacuum_freeze_max_age::int + , age(d.datfrozenxid) AS oldest_current_xid + FROM pg_catalog.pg_database d + JOIN max_age m ON (true) + WHERE d.datallowconn ) +SELECT max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_forced_autovacuum +FROM per_database_stats; +""", +} + +QUERY_TX_WRAPAROUND = { + DEFAULT: """ +WITH max_age AS ( + SELECT 2000000000 as max_old_xid + FROM pg_catalog.pg_settings + WHERE name = 'autovacuum_freeze_max_age' ) +, per_database_stats AS ( + SELECT datname + , m.max_old_xid::int + , age(d.datfrozenxid) AS oldest_current_xid + FROM pg_catalog.pg_database d + JOIN max_age m ON (true) + WHERE d.datallowconn ) +SELECT max(oldest_current_xid) AS oldest_current_xid + , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound +FROM per_database_stats; +""", +} + QUERY_DIFF_LSN = { DEFAULT: """ SELECT @@ -632,6 +770,10 @@ def query_factory(name, version=NO_VERSION): return QUERY_BGWRITER[DEFAULT] elif name == QUERY_NAME_LOCKS: return QUERY_LOCKS[DEFAULT] + elif name == QUERY_NAME_BLOCKERS: + if version < 90600: + return QUERY_BLOCKERS[V96] + return QUERY_BLOCKERS[DEFAULT] elif name == QUERY_NAME_DATABASES: return QUERY_DATABASES[DEFAULT] elif name == QUERY_NAME_STANDBY: @@ -644,6 +786,10 @@ def query_factory(name, version=NO_VERSION): return QUERY_SHOW_VERSION[DEFAULT] elif name == QUERY_NAME_AUTOVACUUM: return QUERY_AUTOVACUUM[DEFAULT] + elif name == QUERY_NAME_FORCED_AUTOVACUUM: + return QUERY_FORCED_AUTOVACUUM[DEFAULT] + elif name == QUERY_NAME_TX_WRAPAROUND: + return QUERY_TX_WRAPAROUND[DEFAULT] elif name == QUERY_NAME_WAL: if version < 100000: return QUERY_WAL[V96] @@ -656,6 +802,8 @@ def query_factory(name, version=NO_VERSION): if version < 100000: return QUERY_STANDBY_DELTA[V96] return QUERY_STANDBY_DELTA[DEFAULT] + elif name == QUERY_NAME_STANDBY_LAG: + return QUERY_STANDBY_LAG[DEFAULT] elif name == QUERY_NAME_REPSLOT_FILES: if version < 110000: return QUERY_REPSLOT_FILES[V10] @@ -676,6 +824,7 @@ ORDER = [ 'db_stat_tuple_write', 'db_stat_transactions', 'db_stat_connections', + 'db_stat_blocking_pids_avg', 'database_size', 'backend_process', 'backend_usage', @@ -695,7 +844,11 @@ ORDER = [ 'stat_bgwriter_maxwritten', 'replication_slot', 'standby_delta', - 'autovacuum' + 'standby_lag', + 'autovacuum', + 'forced_autovacuum', + 'tx_wraparound_oldest_current_xid', + 'tx_wraparound_percent_towards_wraparound' ] CHARTS = { @@ -752,6 +905,13 @@ CHARTS = { ['temp_files', 'files', 'incremental'] ] }, + 'db_stat_blocking_pids_avg': { + 'options': [None, 'Average number of blocking transactions in db', 'processes', 'db statistics', + 'postgres.db_stat_blocking_pids_avg', 'line'], + 'lines': [ + ['blocking_pids_avg', 'blocking', 'absolute'] + ] + }, 'database_size': { 'options': [None, 'Database size', 'MiB', 'database size', 'postgres.db_size', 'stacked'], 'lines': [ @@ -875,6 +1035,24 @@ CHARTS = { ['brin_summarize', 'brin summarize', 'absolute'] ] }, + 'forced_autovacuum': { + 'options': [None, 'Percent towards forced autovacuum', 'percent', 'autovacuum', 'postgres.forced_autovacuum', 'line'], + 'lines': [ + ['percent_towards_forced_autovacuum', 'percent', 'absolute'] + ] + }, + 'tx_wraparound_oldest_current_xid': { + 'options': [None, 'Oldest current XID', 'xid', 'tx_wraparound', 'postgres.tx_wraparound_oldest_current_xid', 'line'], + 'lines': [ + ['oldest_current_xid', 'xid', 'absolute'] + ] + }, + 'tx_wraparound_percent_towards_wraparound': { + 'options': [None, 'Percent towards wraparound', 'percent', 'tx_wraparound', 'postgres.percent_towards_wraparound', 'line'], + 'lines': [ + ['percent_towards_wraparound', 'percent', 'absolute'] + ] + }, 'standby_delta': { 'options': [None, 'Standby delta', 'KiB', 'replication delta', 'postgres.standby_delta', 'line'], 'lines': [ @@ -884,6 +1062,14 @@ CHARTS = { ['replay_delta', 'replay delta', 'absolute', 1, 1024] ] }, + 'standby_lag': { + 'options': [None, 'Standby lag', 'seconds', 'replication lag', 'postgres.standby_lag', 'line'], + 'lines': [ + ['write_lag', 'write lag', 'absolute'], + ['flush_lag', 'flush lag', 'absolute'], + ['replay_lag', 'replay lag', 'absolute'] + ] + }, 'replication_slot': { 'options': [None, 'Replication slot files', 'files', 'replication slot', 'postgres.replication_slot', 'line'], 'lines': [ @@ -1073,6 +1259,7 @@ class Service(SimpleService): self.queries[query_factory(QUERY_NAME_BGWRITER)] = METRICS[QUERY_NAME_BGWRITER] self.queries[query_factory(QUERY_NAME_DIFF_LSN, self.server_version)] = METRICS[QUERY_NAME_WAL_WRITES] self.queries[query_factory(QUERY_NAME_STANDBY_DELTA, self.server_version)] = METRICS[QUERY_NAME_STANDBY_DELTA] + self.queries[query_factory(QUERY_NAME_BLOCKERS, self.server_version)] = METRICS[QUERY_NAME_BLOCKERS] if self.do_index_stats: self.queries[query_factory(QUERY_NAME_INDEX_STATS)] = METRICS[QUERY_NAME_INDEX_STATS] @@ -1092,6 +1279,12 @@ class Service(SimpleService): if self.server_version >= 90400: self.queries[query_factory(QUERY_NAME_AUTOVACUUM)] = METRICS[QUERY_NAME_AUTOVACUUM] + self.queries[query_factory(QUERY_NAME_FORCED_AUTOVACUUM)] = METRICS[QUERY_NAME_FORCED_AUTOVACUUM] + self.queries[query_factory(QUERY_NAME_TX_WRAPAROUND)] = METRICS[QUERY_NAME_TX_WRAPAROUND] + + if self.server_version >= 100000: + self.queries[query_factory(QUERY_NAME_STANDBY_LAG)] = METRICS[QUERY_NAME_STANDBY_LAG] + def create_dynamic_charts(self): for database_name in self.databases[::-1]: dim = [ @@ -1116,11 +1309,19 @@ class Service(SimpleService): ) for application_name in self.secondaries[::-1]: - add_replication_delta_chart( + add_replication_standby_chart( order=self.order, definitions=self.definitions, name='standby_delta', application_name=application_name, + chart_family='replication delta', + ) + add_replication_standby_chart( + order=self.order, + definitions=self.definitions, + name='standby_lag', + application_name=application_name, + chart_family='replication lag', ) for slot_name in self.replication_slots[::-1]: @@ -1199,7 +1400,7 @@ def add_database_stat_chart(order, definitions, name, database_name): 'lines': create_lines(database_name, chart_template['lines'])} -def add_replication_delta_chart(order, definitions, name, application_name): +def add_replication_standby_chart(order, definitions, name, application_name, chart_family): def create_lines(standby, lines): result = list() for line in lines: @@ -1213,7 +1414,7 @@ def add_replication_delta_chart(order, definitions, name, application_name): order.insert(position, chart_name) name, title, units, _, context, chart_type = chart_template['options'] definitions[chart_name] = { - 'options': [name, title + ': ' + application_name, units, 'replication delta', context, chart_type], + 'options': [name, title + ': ' + application_name, units, chart_family, context, chart_type], 'lines': create_lines(application_name, chart_template['lines'])} diff --git a/collectors/python.d.plugin/postgres/postgres.conf b/collectors/python.d.plugin/postgres/postgres.conf index 1970a7a2..7e354d99 100644 --- a/collectors/python.d.plugin/postgres/postgres.conf +++ b/collectors/python.d.plugin/postgres/postgres.conf @@ -97,14 +97,7 @@ # the client (Netdata) is not considered local, unless it runs from inside # the same container. # -# Postgres supported versions are : -# - 9.3 (without autovacuum) -# - 9.4 -# - 9.5 -# - 9.6 -# - 10 -# -# Superuser access is needed for theses charts: +# Superuser access is needed for these charts: # Write-Ahead Logs # Archive Write-Ahead Logs # diff --git a/collectors/python.d.plugin/python.d.plugin.in b/collectors/python.d.plugin/python.d.plugin.in index 9d575d86..b263f229 100644 --- a/collectors/python.d.plugin/python.d.plugin.in +++ b/collectors/python.d.plugin/python.d.plugin.in @@ -500,27 +500,31 @@ class Plugin: self.saver = None self.runs = 0 - def load_config(self): - paths = [ - DIRS.plugin_user_config, - DIRS.plugin_stock_config, - ] - self.log.debug("looking for '{0}' in {1}".format(self.config_name, paths)) - abs_path = multi_path_find(self.config_name, *paths) - if not abs_path: - self.log.warning("'{0}' was not found, using defaults".format(self.config_name)) - return True - - self.log.debug("loading '{0}'".format(abs_path)) + def load_config_file(self, filepath, expected): + self.log.debug("looking for '{0}'".format(filepath)) + if not os.path.isfile(filepath): + log = self.log.info if not expected else self.log.error + log("'{0}' was not found".format(filepath)) + return dict() try: - config = load_config(abs_path) + config = load_config(filepath) except Exception as error: - self.log.error("error on loading '{0}' : {1}".format(abs_path, repr(error))) - return False + self.log.error("error on loading '{0}' : {1}".format(filepath, repr(error))) + return dict() + self.log.debug("'{0}' is loaded".format(filepath)) + return config - self.log.debug("'{0}' is loaded".format(abs_path)) - self.config.update(config) - return True + def load_config(self): + user_config = self.load_config_file( + filepath=os.path.join(DIRS.plugin_user_config, self.config_name), + expected=False, + ) + stock_config = self.load_config_file( + filepath=os.path.join(DIRS.plugin_stock_config, self.config_name), + expected=True, + ) + self.config.update(stock_config) + self.config.update(user_config) def load_job_statuses(self): self.log.debug("looking for '{0}' in {1}".format(self.jobs_status_dump_name, DIRS.var_lib)) @@ -593,8 +597,7 @@ class Plugin: return jobs def setup(self): - if not self.load_config(): - return False + self.load_config() if not self.config['enabled']: self.log.info('disabled in the configuration file') diff --git a/collectors/python.d.plugin/smartd_log/smartd_log.chart.py b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py index 402035f1..75b8c8c4 100644 --- a/collectors/python.d.plugin/smartd_log/smartd_log.chart.py +++ b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py @@ -154,7 +154,7 @@ CHARTS = { 'algo': INCREMENTAL, }, 'write_total_err_corrected': { - 'options': [None, 'Write Error Corrected', 'errors', 'errors', 'smartd_log.read_total_err_corrected', 'line'], + 'options': [None, 'Write Error Corrected', 'errors', 'errors', 'smartd_log.write_total_err_corrected', 'line'], 'lines': [], 'attrs': [ATTR_WRITE_ERR_COR], 'algo': INCREMENTAL, diff --git a/collectors/python.d.plugin/varnish/varnish.chart.py b/collectors/python.d.plugin/varnish/varnish.chart.py index 534d7092..506ad026 100644 --- a/collectors/python.d.plugin/varnish/varnish.chart.py +++ b/collectors/python.d.plugin/varnish/varnish.chart.py @@ -197,7 +197,7 @@ class VarnishVersion: class Parser: _backend_new = re.compile(r'VBE.([\d\w_.]+)\(.*?\).(beresp[\w_]+)\s+(\d+)') - _backend_old = re.compile(r'VBE\.[\d\w-]+\.([\w\d_]+).(beresp[\w_]+)\s+(\d+)') + _backend_old = re.compile(r'VBE\.[\d\w-]+\.([\w\d_-]+).(beresp[\w_]+)\s+(\d+)') _default = re.compile(r'([A-Z]+\.)?([\d\w_.]+)\s+(\d+)') def __init__(self): diff --git a/collectors/python.d.plugin/zscores/README.md b/collectors/python.d.plugin/zscores/README.md index 0b447237..7fb189f6 100644 --- a/collectors/python.d.plugin/zscores/README.md +++ b/collectors/python.d.plugin/zscores/README.md @@ -43,7 +43,7 @@ looking at first (for more background information on why 3 stddev see [here](https://en.wikipedia.org/wiki/68%E2%80%9395%E2%80%9399.7_rule#:~:text=In%20the%20empirical%20sciences%20the,99.7%25%20probability%20as%20near%20certainty.)) . -In the example below we basically took a sledge hammer to our system so its not suprising that lots of charts light up +In the example below we basically took a sledge hammer to our system so its not surprising that lots of charts light up after we run the stress command. In a more realistic setting you might just see a handful of charts with strange zscores and that could be a good indication of where to look first. @@ -101,9 +101,9 @@ information about each one and what it does. host: '127.0.0.1:19999' # What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc. charts_regex: 'system\..*' -# length of time to base calulcations off for mean and stddev +# length of time to base calculations off for mean and stddev train_secs: 14400 # use last 4 hours to work out the mean and stddev for the zscore -# offset preceeding latest data to ignore when calculating mean and stddev +# offset preceding latest data to ignore when calculating mean and stddev offset_secs: 300 # ignore last 5 minutes of data when calculating the mean and stddev # recalculate the mean and stddev every n steps of the collector train_every_n: 900 # recalculate mean and stddev every 15 minutes @@ -114,11 +114,11 @@ z_clip: 10 # cap each zscore at 10 so as to avoid really large individual zscore # set z_abs: 'true' to make all zscores be absolute values only. z_abs: 'true' # burn in period in which to initially calculate mean and stddev on every step -burn_in: 2 # on startup of the collector continually update the mean and stddev in case any gaps or inital calculations fail to return +burn_in: 2 # on startup of the collector continually update the mean and stddev in case any gaps or initial calculations fail to return # mode can be to get a zscore 'per_dim' or 'per_chart' mode: 'per_chart' # 'per_chart' means individual dimension level smoothed zscores will be aggregated to one zscore per chart per time step # per_chart_agg is how you aggregate from dimension to chart when mode='per_chart' -per_chart_agg: 'mean' # 'absmax' will take the max absolute value accross all dimensions but will maintain the sign. 'mean' will just average. +per_chart_agg: 'mean' # 'absmax' will take the max absolute value across all dimensions but will maintain the sign. 'mean' will just average. ``` ## Notes @@ -128,7 +128,7 @@ per_chart_agg: 'mean' # 'absmax' will take the max absolute value accross all di calls to the netdata rest api to get the required data for each chart when calculating the mean and stddev. - It may take a few hours or so for the collector to 'settle' into it's typical behaviour in terms of the scores you will see in the normal running of your system. -- The zscore you see for each chart when using `mode: 'per_chart'` as actually an aggregated zscore accross all the +- The zscore you see for each chart when using `mode: 'per_chart'` as actually an aggregated zscore across all the dimensions on the underlying chart. - If you set `mode: 'per_dim'` then you will see a zscore for each dimension on each chart as opposed to one per chart. - As this collector does some calculations itself in python you may want to try it out first on a test or development diff --git a/collectors/python.d.plugin/zscores/zscores.chart.py b/collectors/python.d.plugin/zscores/zscores.chart.py index 48397d8d..1099b937 100644 --- a/collectors/python.d.plugin/zscores/zscores.chart.py +++ b/collectors/python.d.plugin/zscores/zscores.chart.py @@ -24,11 +24,11 @@ ORDER = [ CHARTS = { 'z': { - 'options': ['z', 'Z Score', 'z', 'Z Score', 'z', 'line'], + 'options': ['z', 'Z Score', 'z', 'Z Score', 'zscores.z', 'line'], 'lines': [] }, '3stddev': { - 'options': ['3stddev', 'Z Score >3', 'count', '3 Stddev', '3stddev', 'stacked'], + 'options': ['3stddev', 'Z Score >3', 'count', '3 Stddev', 'zscores.3stddev', 'stacked'], 'lines': [] }, } diff --git a/collectors/python.d.plugin/zscores/zscores.conf b/collectors/python.d.plugin/zscores/zscores.conf index fab18c78..07d62ebe 100644 --- a/collectors/python.d.plugin/zscores/zscores.conf +++ b/collectors/python.d.plugin/zscores/zscores.conf @@ -83,7 +83,7 @@ local: # length of time to base calculations off for mean and stddev train_secs: 14400 # use last 4 hours to work out the mean and stddev for the zscore - # offset preceeding latest data to ignore when calculating mean and stddev + # offset preceding latest data to ignore when calculating mean and stddev offset_secs: 300 # ignore last 5 minutes of data when calculating the mean and stddev # recalculate the mean and stddev every n steps of the collector @@ -99,10 +99,10 @@ local: z_abs: 'true' # burn in period in which to initially calculate mean and stddev on every step - burn_in: 2 # on startup of the collector continually update the mean and stddev in case any gaps or inital calculations fail to return + burn_in: 2 # on startup of the collector continually update the mean and stddev in case any gaps or initial calculations fail to return # mode can be to get a zscore 'per_dim' or 'per_chart' mode: 'per_chart' # 'per_chart' means individual dimension level smoothed zscores will be aggregated to one zscore per chart per time step # per_chart_agg is how you aggregate from dimension to chart when mode='per_chart' - per_chart_agg: 'mean' # 'absmax' will take the max absolute value accross all dimensions but will maintain the sign. 'mean' will just average. + per_chart_agg: 'mean' # 'absmax' will take the max absolute value across all dimensions but will maintain the sign. 'mean' will just average. diff --git a/collectors/slabinfo.plugin/slabinfo.c b/collectors/slabinfo.plugin/slabinfo.c index 863f440e..58d9c4ff 100644 --- a/collectors/slabinfo.plugin/slabinfo.c +++ b/collectors/slabinfo.plugin/slabinfo.c @@ -51,6 +51,8 @@ char *netdata_configured_host_prefix = ""; int running = 1; int debug = 0; +size_t lines_discovered = 0; +int redraw_chart = 0; // ---------------------------------------------------------------------------- @@ -187,6 +189,10 @@ struct slabinfo *read_file_slabinfo() { // Iterate on all lines to populate / update the slabinfo struct size_t lines = procfile_lines(ff), l; + if (unlikely(lines != lines_discovered)) { + lines_discovered = lines; + redraw_chart = 1; + } slabdebug(" Read %lu lines from procfile", (unsigned long)lines); for(l = 2; l < lines; l++) { @@ -254,7 +260,8 @@ unsigned int do_slab_stats(int update_every) { sactive = read_file_slabinfo(); // Init Charts - if (unlikely(loops == 0)) { + if (unlikely(redraw_chart)) { + redraw_chart = 0; // Memory Usage printf("CHART %s.%s '' 'Memory Usage' 'B' '%s' '' line %d %d %s\n" , CHART_TYPE diff --git a/collectors/statsd.plugin/README.md b/collectors/statsd.plugin/README.md index f3050ceb..ba4ada51 100644 --- a/collectors/statsd.plugin/README.md +++ b/collectors/statsd.plugin/README.md @@ -21,7 +21,7 @@ Netdata statsd is fast. It can collect more than **1.200.000 metrics per second* # Available StatsD collectors -Netdata ships with collectors implemented using the StatsD collector. They are configuration files (as you will read bellow), but they function as a collector, in the sense that configuration file organize the metrics of a data source into pre-defined charts. +Netdata ships with collectors implemented using the StatsD collector. They are configuration files (as you will read below), but they function as a collector, in the sense that configuration file organize the metrics of a data source into pre-defined charts. On these charts, we can have alarms as with any metric and chart. @@ -64,7 +64,7 @@ Netdata fully supports the StatsD protocol. All StatsD client libraries can be u - Timers use `|ms` - Histograms use `|h` - The only difference between the two, is the `units` of the charts, as timers report *miliseconds*. + The only difference between the two, is the `units` of the charts, as timers report *milliseconds*. [Sampling rate](#sampling-rates) is supported. @@ -102,7 +102,7 @@ When sending multiple packets over UDP, it is important not to exceed the networ Netdata will accept UDP packets up to 9000 bytes, but the underlying network will not exceed MTU. -> You can read more about the network maxium transmission unit(MTU) in this cloudflare [article](https://www.cloudflare.com/en-gb/learning/network-layer/what-is-mtu/). +> You can read more about the network maximum transmission unit(MTU) in this cloudflare [article](https://www.cloudflare.com/en-gb/learning/network-layer/what-is-mtu/). ## Configuration diff --git a/collectors/statsd.plugin/statsd.c b/collectors/statsd.plugin/statsd.c index e30cc6e2..9e152b09 100644 --- a/collectors/statsd.plugin/statsd.c +++ b/collectors/statsd.plugin/statsd.c @@ -196,13 +196,13 @@ typedef struct statsd_app_chart_dimension { } STATSD_APP_CHART_DIM; typedef struct statsd_app_chart { - const char *source; const char *id; const char *name; const char *title; const char *family; const char *context; const char *units; + const char *module; long priority; RRDSET_TYPE chart_type; STATSD_APP_CHART_DIM *dimensions; @@ -1214,10 +1214,15 @@ static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHA chart->next = app->charts; app->charts = chart; - { - char lineandfile[FILENAME_MAX + 1]; - snprintfz(lineandfile, FILENAME_MAX, "%zu@%s", line, filename); - chart->source = strdupz(lineandfile); + if (!strncmp( + filename, + netdata_configured_stock_config_dir, + strlen(netdata_configured_stock_config_dir))) { + char tmpfilename[FILENAME_MAX + 1]; + strncpyz(tmpfilename, filename, FILENAME_MAX); + chart->module = strdupz(basename(tmpfilename)); + } else { + chart->module = strdupz("synthetic_chart"); } } } @@ -1996,7 +2001,7 @@ static inline void statsd_update_app_chart(STATSD_APP *app, STATSD_APP_CHART *ch , chart->title // title , chart->units // units , PLUGIN_STATSD_NAME // plugin - , chart->source // module + , chart->module // module , chart->priority // priority , statsd.update_every // update every , chart->chart_type // chart type @@ -2175,8 +2180,8 @@ void *statsd_main(void *ptr) { statsd.histogram_percentile = 95.0; } { - char buffer[100 + 1]; - snprintf(buffer, 100, "%0.1f%%", statsd.histogram_percentile); + char buffer[314 + 1]; + snprintfz(buffer, 314, "%0.1f%%", statsd.histogram_percentile); statsd.histogram_percentile_str = strdupz(buffer); } @@ -2436,7 +2441,7 @@ void *statsd_main(void *ptr) { char title[100 + 1]; snprintfz(id, 100, "plugin_statsd_collector%d_cpu", i + 1); - snprintfz(title, 100, "NetData statsd collector thread No %d CPU usage", i + 1); + snprintfz(title, 100, "Netdata statsd collector thread No %d CPU usage", i + 1); statsd.collection_threads_status[i].st_cpu = rrdset_create_localhost( "netdata" diff --git a/collectors/statsd.plugin/statsd.h b/collectors/statsd.plugin/statsd.h index b741be76..37d6a08b 100644 --- a/collectors/statsd.plugin/statsd.h +++ b/collectors/statsd.plugin/statsd.h @@ -3,7 +3,7 @@ #ifndef NETDATA_STATSD_H #define NETDATA_STATSD_H 1 -#include "../../daemon/common.h" +#include "daemon/common.h" #define STATSD_LISTEN_PORT 8125 #define STATSD_LISTEN_BACKLOG 4096 diff --git a/collectors/tc.plugin/plugin_tc.h b/collectors/tc.plugin/plugin_tc.h index c6465841..d51fcf67 100644 --- a/collectors/tc.plugin/plugin_tc.h +++ b/collectors/tc.plugin/plugin_tc.h @@ -3,7 +3,7 @@ #ifndef NETDATA_PLUGIN_TC_H #define NETDATA_PLUGIN_TC_H 1 -#include "../../daemon/common.h" +#include "daemon/common.h" #if (TARGET_OS == OS_LINUX) diff --git a/collectors/timex.plugin/plugin_timex.h b/collectors/timex.plugin/plugin_timex.h index 6025641a..f8378616 100644 --- a/collectors/timex.plugin/plugin_timex.h +++ b/collectors/timex.plugin/plugin_timex.h @@ -3,7 +3,7 @@ #ifndef NETDATA_PLUGIN_TIMEX_H #define NETDATA_PLUGIN_TIMEX_H -#include "../../daemon/common.h" +#include "daemon/common.h" #if (TARGET_OS == OS_LINUX) diff --git a/collectors/xenstat.plugin/xenstat_plugin.c b/collectors/xenstat.plugin/xenstat_plugin.c index a322dd1c..abcb5a1c 100644 --- a/collectors/xenstat.plugin/xenstat_plugin.c +++ b/collectors/xenstat.plugin/xenstat_plugin.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "../../libnetdata/libnetdata.h" +#include "libnetdata/libnetdata.h" #include <xenstat.h> #include <libxl.h> |