diff options
Diffstat (limited to '')
87 files changed, 2961 insertions, 2908 deletions
diff --git a/collectors/COLLECTORS.md b/collectors/COLLECTORS.md index 02dfd50a1..1be50fd14 100644 --- a/collectors/COLLECTORS.md +++ b/collectors/COLLECTORS.md @@ -21,13 +21,19 @@ If you want to use a Python version of a collector, you need to explicitly [disa version](/docs/collect/enable-configure.md), and enable the Python version. Netdata then skips the Go version and attempts to load the Python version and its accompanying configuration file. -If you don't see the app/service you'd like to monitor in this list, check out our [GitHub -issues](https://github.com/netdata/netdata/issues). Use the search bar to look for previous discussions about that -collector—we may be looking for contributions from users such as yourself! If you don't see the collector there, make a -[feature request](https://community.netdata.cloud/c/feature-requests/7/none) on our community forums. +If you don't see the app/service you'd like to monitor in this list: -- [Supported collectors list](#supported-collectors-list) - - [Service and application collectors](#service-and-application-collectors) +- Check out our [GitHub issues](https://github.com/netdata/netdata/issues). Use the search bar to look for previous + discussions about that collector—we may be looking for assistance from users such as yourself! +- If you don't see the collector there, you can make + a [feature request](https://github.com/netdata/netdata/issues/new/choose) on GitHub. +- If you have basic software development skills, you can add your own plugin + in [Go](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin#how-to-develop-a-collector) + or [Python](https://learn.netdata.cloud/guides/python-collector) + +Supported Collectors List: + +- [Service and application collectors](#service-and-application-collectors) - [Generic](#generic) - [APM (application performance monitoring)](#apm-application-performance-monitoring) - [Containers and VMs](#containers-and-vms) @@ -43,7 +49,7 @@ collector—we may be looking for contributions from users such as yourself! If - [Search](#search) - [Storage](#storage) - [Web](#web) - - [System collectors](#system-collectors) +- [System collectors](#system-collectors) - [Applications](#applications) - [Disks and filesystems](#disks-and-filesystems) - [eBPF](#ebpf) @@ -54,10 +60,10 @@ collector—we may be looking for contributions from users such as yourself! If - [Processes](#processes) - [Resources](#resources) - [Users](#users) - - [Netdata collectors](#netdata-collectors) - - [Orchestrators](#orchestrators) - - [Third-party collectors](#third-party-collectors) - - [Etc](#etc) +- [Netdata collectors](#netdata-collectors) +- [Orchestrators](#orchestrators) +- [Third-party collectors](#third-party-collectors) +- [Etc](#etc) ## Service and application collectors @@ -125,7 +131,8 @@ configure any of these collectors according to your setup and infrastructure. - [OracleDB](/collectors/python.d.plugin/oracledb/README.md): Monitor database performance and health metrics. - [Pika](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/pika/): Gather metric, such as clients, memory usage, queries, and more from the Redis interface-compatible database. -- [Postgres](/collectors/python.d.plugin/postgres/README.md): Collect database health and performance metrics. +- [Postgres](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/postgres): Collect database health + and performance metrics. - [ProxySQL](/collectors/python.d.plugin/proxysql/README.md): Monitor database backend and frontend performance metrics. - [Redis](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/redis/): Monitor status from any @@ -168,9 +175,8 @@ configure any of these collectors according to your setup and infrastructure. plugins metrics from an endpoint provided by `in_monitor plugin`. - [Logstash](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/logstash/): Monitor JVM threads, memory usage, garbage collection statistics, and more. -- [OpenVPN status logs](/collectors/python.d.plugin/ovpn_status_log/README.md): Parse server log files and provide - summary - (client, traffic) metrics. +- [OpenVPN status logs](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/openvpn_status_log): Parse + server log files and provide summary (client, traffic) metrics. - [Squid web server logs](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/squidlog/): Tail Squid access logs to return the volume of requests, types of requests, bandwidth, and much more. - [Web server logs (Go version for Apache, @@ -199,8 +205,8 @@ configure any of these collectors according to your setup and infrastructure. - [Bind 9](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/bind/): Collect nameserver summary performance statistics via a web interface (`statistics-channels` feature). -- [Chrony](/collectors/python.d.plugin/chrony/README.md): Monitor the precision and statistics of a local `chronyd` - server. +- [Chrony](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/chrony): Monitor the precision and + statistics of a local `chronyd` server. - [CoreDNS](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/coredns/): Measure DNS query round trip time. - [Dnsmasq](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/dnsmasq_dhcp/): Automatically @@ -250,13 +256,14 @@ configure any of these collectors according to your setup and infrastructure. - [AM2320](/collectors/python.d.plugin/am2320/README.md): Monitor sensor temperature and humidity. - [Access point](/collectors/charts.d.plugin/ap/README.md): Monitor client, traffic and signal metrics using the `aw` - tool. + tool. - [APC UPS](/collectors/charts.d.plugin/apcupsd/README.md): Capture status information using the `apcaccess` tool. - [Energi Core](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/energid): Monitor - blockchain indexes, memory usage, network usage, and transactions of wallet instances. + blockchain indexes, memory usage, network usage, and transactions of wallet instances. - [UPS/PDU](/collectors/charts.d.plugin/nut/README.md): Read the status of UPS/PDU devices using the `upsc` tool. -- [SNMP devices](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/snmp): Gather data using the SNMP protocol. -- [1-Wire sensors](/collectors/python.d.plugin/w1sensor/README.md): Monitor sensor temperature. +- [SNMP devices](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/snmp): Gather data using the SNMP + protocol. +- [1-Wire sensors](/collectors/python.d.plugin/w1sensor/README.md): Monitor sensor temperature. ### Search @@ -481,9 +488,9 @@ Plugin orchestrators organize and run many of the above collectors. If you're interested in developing a new collector that you'd like to contribute to Netdata, we highly recommend using the `go.d.plugin`. -- [go.d.plugin](https://github.com/netdata/go.d.plugin): An orchestrator for data collection modules written in `go`. -- [python.d.plugin](python.d.plugin/README.md): An orchestrator for data collection modules written in `python` v2/v3. -- [charts.d.plugin](charts.d.plugin/README.md): An orchestrator for data collection modules written in `bash` v4+. +- [go.d.plugin](https://github.com/netdata/go.d.plugin): An orchestrator for data collection modules written in `go`. +- [python.d.plugin](python.d.plugin/README.md): An orchestrator for data collection modules written in `python` v2/v3. +- [charts.d.plugin](charts.d.plugin/README.md): An orchestrator for data collection modules written in `bash` v4+. ## Third-party collectors diff --git a/collectors/apps.plugin/apps_groups.conf b/collectors/apps.plugin/apps_groups.conf index 1d1af4b70..89db6c4e8 100644 --- a/collectors/apps.plugin/apps_groups.conf +++ b/collectors/apps.plugin/apps_groups.conf @@ -121,7 +121,7 @@ columndb: clickhouse-server* # ----------------------------------------------------------------------------- # email servers -email: dovecot imapd pop3d amavis* zmstat* zmmailboxdmgr saslauthd opendkim postfwd2 smtp* lmtp* sendmail postfix master pickup qmgr showq tlsmgr postscreen oqmgr +email: dovecot imapd pop3d amavis* zmstat-* zmdiaglog zmmailboxdmgr saslauthd opendkim postfwd2 smtp* lmtp* sendmail postfix master pickup qmgr showq tlsmgr postscreen oqmgr # ----------------------------------------------------------------------------- # network, routing, VPN @@ -227,7 +227,7 @@ dnsdist: dnsdist # installation / compilation / debugging build: cc1 cc1plus as gcc* cppcheck ld make cmake automake autoconf autoreconf -build: git gdb valgrind* +build: cargo rustc bazel buck git gdb valgrind* # ----------------------------------------------------------------------------- # antivirus diff --git a/collectors/apps.plugin/apps_plugin.c b/collectors/apps.plugin/apps_plugin.c index 8a115d061..8521e078e 100644 --- a/collectors/apps.plugin/apps_plugin.c +++ b/collectors/apps.plugin/apps_plugin.c @@ -173,7 +173,8 @@ static kernel_uint_t global_gtime = 0; // the normalization ratios, as calculated by normalize_utilization() -double utime_fix_ratio = 1.0, +NETDATA_DOUBLE + utime_fix_ratio = 1.0, stime_fix_ratio = 1.0, gtime_fix_ratio = 1.0, minflt_fix_ratio = 1.0, @@ -501,7 +502,8 @@ struct file_descriptor { static int all_files_len = 0, all_files_size = 0; - long double currentmaxfds = 0; + +long currentmaxfds = 0; // ---------------------------------------------------------------------------- // read users and groups from files @@ -3021,7 +3023,7 @@ static inline void aggregate_pid_fds_on_targets(struct pid_stat *p) { reallocate_target_fds(u); reallocate_target_fds(g); - long double currentfds = 0; + long currentfds = 0; size_t c, size = p->fds_size; struct pid_fd *fds = p->fds; for(c = 0; c < size ;c++) { @@ -3373,7 +3375,7 @@ static void normalize_utilization(struct target *root) { gtime_fix_ratio = cutime_fix_ratio = cstime_fix_ratio = - cgtime_fix_ratio = 1.0; //(double)(global_utime + global_stime) / (double)(utime + cutime + stime + cstime); + cgtime_fix_ratio = 1.0; //(NETDATA_DOUBLE)(global_utime + global_stime) / (NETDATA_DOUBLE)(utime + cutime + stime + cstime); } else if((global_utime + global_stime > utime + stime) && (cutime || cstime)) { // children resources are too high @@ -3383,7 +3385,7 @@ static void normalize_utilization(struct target *root) { gtime_fix_ratio = 1.0; cutime_fix_ratio = cstime_fix_ratio = - cgtime_fix_ratio = (double)((global_utime + global_stime) - (utime + stime)) / (double)(cutime + cstime); + cgtime_fix_ratio = (NETDATA_DOUBLE)((global_utime + global_stime) - (utime + stime)) / (NETDATA_DOUBLE)(cutime + cstime); } else if(utime || stime) { // even running processes are unrealistic @@ -3391,7 +3393,7 @@ static void normalize_utilization(struct target *root) { // lower the running processes resources utime_fix_ratio = stime_fix_ratio = - gtime_fix_ratio = (double)(global_utime + global_stime) / (double)(utime + stime); + gtime_fix_ratio = (NETDATA_DOUBLE)(global_utime + global_stime) / (NETDATA_DOUBLE)(utime + stime); cutime_fix_ratio = cstime_fix_ratio = cgtime_fix_ratio = 0.0; @@ -3439,14 +3441,14 @@ static void normalize_utilization(struct target *root) { if(utime || stime || gtime) majflt_fix_ratio = - minflt_fix_ratio = (double)(utime * utime_fix_ratio + stime * stime_fix_ratio + gtime * gtime_fix_ratio) / (double)(utime + stime + gtime); + minflt_fix_ratio = (NETDATA_DOUBLE)(utime * utime_fix_ratio + stime * stime_fix_ratio + gtime * gtime_fix_ratio) / (NETDATA_DOUBLE)(utime + stime + gtime); else minflt_fix_ratio = majflt_fix_ratio = 1.0; if(cutime || cstime || cgtime) cmajflt_fix_ratio = - cminflt_fix_ratio = (double)(cutime * cutime_fix_ratio + cstime * cstime_fix_ratio + cgtime * cgtime_fix_ratio) / (double)(cutime + cstime + cgtime); + cminflt_fix_ratio = (NETDATA_DOUBLE)(cutime * cutime_fix_ratio + cstime * cstime_fix_ratio + cgtime * cgtime_fix_ratio) / (NETDATA_DOUBLE)(cutime + cstime + cgtime); else cminflt_fix_ratio = cmajflt_fix_ratio = 1.0; diff --git a/collectors/cgroups.plugin/cgroup-name.sh b/collectors/cgroups.plugin/cgroup-name.sh index 00d7e614c..d1277b745 100755 --- a/collectors/cgroups.plugin/cgroup-name.sh +++ b/collectors/cgroups.plugin/cgroup-name.sh @@ -45,11 +45,24 @@ fatal() { exit 1 } +function parse_docker_like_inspect_output() { + local output="${1}" + eval "$(grep -E "^(NOMAD_NAMESPACE|NOMAD_JOB_NAME|NOMAD_TASK_NAME|NOMAD_SHORT_ALLOC_ID|CONT_NAME)=" <<<"$output")" + if [ -n "$NOMAD_NAMESPACE" ] && [ -n "$NOMAD_JOB_NAME" ] && [ -n "$NOMAD_TASK_NAME" ] && [ -n "$NOMAD_SHORT_ALLOC_ID" ]; then + echo "${NOMAD_NAMESPACE}-${NOMAD_JOB_NAME}-${NOMAD_TASK_NAME}-${NOMAD_SHORT_ALLOC_ID}" + else + echo "${CONT_NAME}" | sed 's|^/||' + fi +} + function docker_like_get_name_command() { local command="${1}" local id="${2}" - info "Running command: ${command} ps --filter=id=\"${id}\" --format=\"{{.Names}}\"" - NAME="$(${command} ps --filter=id="${id}" --format="{{.Names}}")" + info "Running command: ${command} inspect --format='{{range .Config.Env}}{{println .}}{{end}}CONT_NAME={{ .Name}}' \"${id}\"" + if OUTPUT="$(${command} inspect --format='{{range .Config.Env}}{{println .}}{{end}}CONT_NAME={{ .Name}}' "${id}")" && + [ -n "$OUTPUT" ]; then + NAME="$(parse_docker_like_inspect_output "$OUTPUT")" + fi return 0 } @@ -61,7 +74,7 @@ function docker_like_get_name_api() { warning "No ${host_var} is set" return 1 fi - if ! command -v jq > /dev/null 2>&1; then + if ! command -v jq >/dev/null 2>&1; then warning "Can't find jq command line tool. jq is required for netdata to retrieve container name using ${host} API, falling back to docker ps" return 1 fi @@ -72,7 +85,9 @@ function docker_like_get_name_api() { info "Running API command: curl \"${host}${path}\"" JSON=$(curl -sS "${host}${path}") fi - NAME=$(echo "${JSON}" | jq -r .Name,.Config.Hostname | grep -v null | head -n1 | sed 's|^/||') + if OUTPUT=$(echo "${JSON}" | jq -r '.Config.Env[],"CONT_NAME=\(.Name)"') && [ -n "$OUTPUT" ]; then + NAME="$(parse_docker_like_inspect_output "$OUTPUT")" + fi return 0 } @@ -185,6 +200,13 @@ function k8s_get_kubepod_name() { # | | |-- kubepods-besteffort-pod10fb5647_c724_400c_b9cc_0e6eae3110e7.slice # | | | |-- docker-36e5eb5056dfdf6dbb75c0c44a1ecf23217fe2c50d606209d8130fcbb19fb5a7.scope # + # kind v0.14.0 + # |-- kubelet.slice + # | |-- kubelet-kubepods.slice + # | | |-- kubelet-kubepods-besteffort.slice + # | | | |-- kubelet-kubepods-besteffort-pod7881ed9e_c63e_4425_b5e0_ac55a08ae939.slice + # | | | | |-- cri-containerd-00c7939458bffc416bb03451526e9fde13301d6654cfeadf5b4964a7fb5be1a9.scope + # # NOTE: cgroups plugin # - uses '_' to join dir names (so it is <parent>_<child>_<child>_...) # - replaces '.' with '-' @@ -193,7 +215,7 @@ function k8s_get_kubepod_name() { local cgroup_path="${1}" local id="${2}" - if [[ ! $id =~ ^kubepods ]]; then + if [[ ! $id =~ ^.*kubepods.* ]]; then warning "${fn}: '${id}' is not kubepod cgroup." return 1 fi @@ -371,7 +393,7 @@ function k8s_get_kubepod_name() { name+="_$(get_lbl_val "$labels" pod_name)" labels=$(add_lbl_prefix "$labels" "k8s_") name+=" $labels" - else + else return 2 fi fi @@ -393,7 +415,7 @@ function k8s_get_name() { local id="${2}" NAME=$(k8s_get_kubepod_name "$cgroup_path" "$id") - + case "$?" in 0) NAME="k8s_${NAME}" @@ -511,6 +533,11 @@ if [ -z "${NAME}" ]; then #shellcheck disable=SC1117 DOCKERID="$(echo "${CGROUP}" | sed "s|^.*ecs[-_/].*[-_/]\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")" docker_validate_id "${DOCKERID}" + elif [[ ${CGROUP} =~ system.slice_containerd.service_cpuset_[a-fA-F0-9]+[-_\.]?.*$ ]]; then + # docker containers under containerd + #shellcheck disable=SC1117 + DOCKERID="$(echo "${CGROUP}" | sed "s|^.*ystem.slice_containerd.service_cpuset_\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")" + docker_validate_id "${DOCKERID}" elif [[ ${CGROUP} =~ ^.*libpod-[a-fA-F0-9]+.*$ ]]; then # Podman PODMANID="$(echo "${CGROUP}" | sed "s|^.*libpod-\([a-fA-F0-9]\+\).*$|\1|")" @@ -522,16 +549,13 @@ if [ -z "${NAME}" ]; then elif [[ ${CGROUP} =~ machine.slice_machine.*-lxc ]]; then # libvirtd / lxc containers - # examples: - # before: machine.slice machine-lxc/x2d969/x2dhubud0xians01.scope - # after: lxc/hubud0xians01 - # before: machine.slice_machine-lxc/x2d969/x2dhubud0xians01.scope/libvirt_init.scope - # after: lxc/hubud0xians01/libvirt_init - NAME="lxc/$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-lxc//; s/\/x2d[[:digit:]]*//; s/\/x2d//g; s/\.scope//g')" + # machine.slice machine-lxc/x2d969/x2dhubud0xians01.scope => lxc/hubud0xians01 + # machine.slice_machine-lxc/x2d969/x2dhubud0xians01.scope/libvirt_init.scope => lxc/hubud0xians01/libvirt_init + NAME="lxc/$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-lxc//; s/[\/_]x2d[[:digit:]]*//; s/[\/_]x2d//g; s/\.scope//g')" elif [[ ${CGROUP} =~ machine.slice_machine.*-qemu ]]; then # libvirtd / qemu virtual machines - # NAME="$(echo ${CGROUP} | sed 's/machine.slice_machine.*-qemu//; s/\/x2d//; s/\/x2d/\-/g; s/\.scope//g')" - NAME="qemu_$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-qemu//; s/\/x2d[[:digit:]]*//; s/\/x2d//g; s/\.scope//g')" + # machine.slice_machine-qemu_x2d1_x2dopnsense.scope => qemu_opnsense + NAME="qemu_$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-qemu//; s/[\/_]x2d[[:digit:]]*//; s/[\/_]x2d//g; s/\.scope//g')" elif [[ ${CGROUP} =~ machine_.*\.libvirt-qemu ]]; then # libvirtd / qemu virtual machines diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index 5676ef8ca..330562173 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -6,6 +6,12 @@ #define PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME "systemd" #define PLUGIN_CGROUPS_MODULE_CGROUPS_NAME "/sys/fs/cgroup" +#ifdef NETDATA_INTERNAL_CHECKS +#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_DEFAULT +#else +#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_NO_ERROR_ON_FILE_IO +#endif + // main cgroups thread worker jobs #define WORKER_CGROUPS_LOCK 0 #define WORKER_CGROUPS_READ 1 @@ -446,12 +452,17 @@ void read_cgroup_plugin_configuration() { // ---------------------------------------------------------------- " /machine.slice/*.service " // #3367 systemd-nspawn - " /kubepods/pod*/* " // k8s containers - " /kubepods/*/pod*/* " // k8s containers // ---------------------------------------------------------------- - " !/kubepods* " // all other k8s cgroups + " */kubepods/pod*/* " // k8s containers + " */kubepods/*/pod*/* " // k8s containers + " */*-kubepods-pod*/* " // k8s containers + " */*-kubepods-*-pod*/* " // k8s containers + " !*kubepods* !*kubelet* " // all other k8s cgroups + + // ---------------------------------------------------------------- + " !*/vcpu* " // libvirtd adds these sub-cgroups " !*/emulator " // libvirtd adds these sub-cgroups " !*.mount " @@ -523,9 +534,11 @@ void read_cgroup_plugin_configuration() { " *docker* " " *lxc* " " *qemu* " - " /kubepods/pod*/* " // k8s containers - " /kubepods/*/pod*/* " // k8s containers - " !/kubepods* " // all other k8s cgroups + " */kubepods/pod*/* " // k8s containers + " */kubepods/*/pod*/* " // k8s containers + " */*-kubepods-pod*/* " // k8s containers + " */*-kubepods-*-pod*/* " // k8s containers + " !*kubepods* !*kubelet* " // all other k8s cgroups " *.libvirt-qemu " // #3010 " * " ), NULL, SIMPLE_PATTERN_EXACT); @@ -755,6 +768,12 @@ struct cgroup_network_interface { struct cgroup_network_interface *next; }; +enum cgroups_container_orchestrator { + CGROUPS_ORCHESTRATOR_UNSET, + CGROUPS_ORCHESTRATOR_UNKNOWN, + CGROUPS_ORCHESTRATOR_K8S +}; + // *** WARNING *** The fields are not thread safe. Take care of safe usage. struct cgroup { uint32_t options; @@ -776,7 +795,9 @@ struct cgroup { char *chart_title; - struct label *chart_labels; + DICTIONARY *chart_labels; + + int container_orchestrator; struct cpuacct_stat cpuacct_stat; struct cpuacct_usage cpuacct_usage; @@ -835,7 +856,7 @@ struct cgroup { unsigned long long cpu_cfs_quota; RRDSETVAR *chart_var_cpu_limit; - calculated_number prev_cpu_usage; + NETDATA_DOUBLE prev_cpu_usage; char *filename_memory_limit; unsigned long long memory_limit; @@ -922,6 +943,10 @@ static inline int is_cgroup_systemd_service(struct cgroup *cg) { } // --------------------------------------------------------------------------------------------- +static int k8s_is_kubepod(struct cgroup *cg) { + return cg->container_orchestrator == CGROUPS_ORCHESTRATOR_K8S; +} + static int k8s_is_container(const char *id) { // examples: // https://github.com/netdata/netdata/blob/0fc101679dcd12f1cb8acdd07bb4c85d8e553e53/collectors/cgroups.plugin/cgroup-name.sh#L121-L147 @@ -949,7 +974,7 @@ static int k8s_get_container_first_proc_comm(const char *id, char *comm) { char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s/%s/cgroup.procs", cgroup_cpuacct_base, id); - ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT); + ff = procfile_reopen(ff, filename, NULL, CGROUP_PROCFILE_FLAG); if (unlikely(!ff)) { debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); return 1; @@ -1012,7 +1037,7 @@ static unsigned long long calc_percentage(unsigned long long value, unsigned lon if (total == 0) { return 0; } - return (calculated_number)value / (calculated_number)total * 100; + return (NETDATA_DOUBLE)value / (NETDATA_DOUBLE)total * 100; } static int calc_cgroup_depth(const char *id) { @@ -1031,7 +1056,7 @@ static inline void cgroup_read_cpuacct_stat(struct cpuacct_stat *cp) { static procfile *ff = NULL; if(likely(cp->filename)) { - ff = procfile_reopen(ff, cp->filename, NULL, PROCFILE_FLAG_DEFAULT); + ff = procfile_reopen(ff, cp->filename, NULL, CGROUP_PROCFILE_FLAG); if(unlikely(!ff)) { cp->updated = 0; cgroups_check = 1; @@ -1078,7 +1103,7 @@ static inline void cgroup_read_cpuacct_cpu_stat(struct cpuacct_cpu_throttling *c } static procfile *ff = NULL; - ff = procfile_reopen(ff, cp->filename, NULL, PROCFILE_FLAG_DEFAULT); + ff = procfile_reopen(ff, cp->filename, NULL, CGROUP_PROCFILE_FLAG); if (unlikely(!ff)) { cp->updated = 0; cgroups_check = 1; @@ -1134,7 +1159,7 @@ static inline void cgroup2_read_cpuacct_cpu_stat(struct cpuacct_stat *cp, struct return; } - ff = procfile_reopen(ff, cp->filename, NULL, PROCFILE_FLAG_DEFAULT); + ff = procfile_reopen(ff, cp->filename, NULL, CGROUP_PROCFILE_FLAG); if (unlikely(!ff)) { cp->updated = 0; cgroups_check = 1; @@ -1217,7 +1242,7 @@ static inline void cgroup_read_cpuacct_usage(struct cpuacct_usage *ca) { static procfile *ff = NULL; if(likely(ca->filename)) { - ff = procfile_reopen(ff, ca->filename, NULL, PROCFILE_FLAG_DEFAULT); + ff = procfile_reopen(ff, ca->filename, NULL, CGROUP_PROCFILE_FLAG); if(unlikely(!ff)) { ca->updated = 0; cgroups_check = 1; @@ -1280,7 +1305,7 @@ static inline void cgroup_read_blkio(struct blkio *io) { if(likely(io->filename)) { static procfile *ff = NULL; - ff = procfile_reopen(ff, io->filename, NULL, PROCFILE_FLAG_DEFAULT); + ff = procfile_reopen(ff, io->filename, NULL, CGROUP_PROCFILE_FLAG); if(unlikely(!ff)) { io->updated = 0; cgroups_check = 1; @@ -1352,7 +1377,7 @@ static inline void cgroup2_read_blkio(struct blkio *io, unsigned int word_offset if(likely(io->filename)) { static procfile *ff = NULL; - ff = procfile_reopen(ff, io->filename, NULL, PROCFILE_FLAG_DEFAULT); + ff = procfile_reopen(ff, io->filename, NULL, CGROUP_PROCFILE_FLAG); if(unlikely(!ff)) { io->updated = 0; cgroups_check = 1; @@ -1397,7 +1422,7 @@ static inline void cgroup2_read_pressure(struct pressure *res) { static procfile *ff = NULL; if (likely(res->filename)) { - ff = procfile_reopen(ff, res->filename, " =", PROCFILE_FLAG_DEFAULT); + ff = procfile_reopen(ff, res->filename, " =", CGROUP_PROCFILE_FLAG); if (unlikely(!ff)) { res->updated = 0; cgroups_check = 1; @@ -1454,7 +1479,7 @@ static inline void cgroup_read_memory(struct memory *mem, char parent_cg_is_unif goto memory_next; } - ff = procfile_reopen(ff, mem->filename_detailed, NULL, PROCFILE_FLAG_DEFAULT); + ff = procfile_reopen(ff, mem->filename_detailed, NULL, CGROUP_PROCFILE_FLAG); if(unlikely(!ff)) { mem->updated_detailed = 0; cgroups_check = 1; @@ -1677,7 +1702,8 @@ static inline void read_cgroup_network_interfaces(struct cgroup *cg) { info("CGROUP: cgroup '%s' has network interface '%s' as '%s'", cg->id, i->host_device, i->container_device); // register a device rename to proc_net_dev.c - netdev_rename_device_add(i->host_device, i->container_device, cg->chart_id, cg->chart_labels); + netdev_rename_device_add( + i->host_device, i->container_device, cg->chart_id, cg->chart_labels, k8s_is_kubepod(cg) ? "k8s." : ""); } } @@ -1735,34 +1761,24 @@ static inline void substitute_dots_in_id(char *s) { } } -char *k8s_parse_resolved_name(struct label **labels, char *data) { - char *name = mystrsep(&data, " "); - - if (!data) { - return name; - } - - while (data) { - char *key = mystrsep(&data, "="); - - char *value; - if (data && *data == ',') { - value = ""; - *data++ = '\0'; - } else { - value = mystrsep(&data, ","); - } - value = strip_double_quotes(value, 1); +// ---------------------------------------------------------------------------- +// parse k8s labels - if (!key || *key == '\0' || !value || *value == '\0') - continue; +char *k8s_parse_resolved_name_and_labels(DICTIONARY *labels, char *data) { + // the first word, up to the first space is the name + char *name = mystrsep(&data, " "); - *labels = add_label_to_list(*labels, key, value, LABEL_SOURCE_KUBERNETES); + // the rest are key=value pairs separated by comma + while(data) { + char *pair = mystrsep(&data, ","); + rrdlabels_add_pair(labels, pair, RRDLABEL_SRC_AUTO| RRDLABEL_SRC_K8S); } return name; } +// ---------------------------------------------------------------------------- + static inline void free_pressure(struct pressure *res) { if (res->some.share_time.st) rrdset_is_obsolete(res->some.share_time.st); if (res->some.total_time.st) rrdset_is_obsolete(res->some.total_time.st); @@ -1834,7 +1850,7 @@ static inline void cgroup_free(struct cgroup *cg) { freez(cg->chart_id); freez(cg->chart_title); - free_label_list(cg->chart_labels); + rrdlabels_destroy(cg->chart_labels); freez(cg); @@ -1870,31 +1886,33 @@ static inline void discovery_rename_cgroup(struct cgroup *cg) { case 0: cg->pending_renames = 0; break; + case 3: cg->pending_renames = 0; cg->processed = 1; break; } - if (cg->pending_renames || cg->processed) { - return; - } - if (!(new_name && *new_name && *new_name != '\n')) { - return; - } - new_name = trim(new_name); - if (!(new_name)) { - return; - } + if(cg->pending_renames || cg->processed) return; + if(!new_name || !*new_name || *new_name == '\n') return; + if(!(new_name = trim(new_name))) return; + char *name = new_name; if (!strncmp(new_name, "k8s_", 4)) { - free_label_list(cg->chart_labels); - name = k8s_parse_resolved_name(&cg->chart_labels, new_name); + if(!cg->chart_labels) cg->chart_labels = rrdlabels_create(); + + // read the new labels and remove the obsolete ones + rrdlabels_unmark_all(cg->chart_labels); + name = k8s_parse_resolved_name_and_labels(cg->chart_labels, new_name); + rrdlabels_remove_all_unmarked(cg->chart_labels); } + freez(cg->chart_title); cg->chart_title = cgroup_title_strdupz(name); + freez(cg->chart_id); cg->chart_id = cgroup_chart_id_strdupz(name); + substitute_dots_in_id(cg->chart_id); cg->hash_chart = simple_hash(cg->chart_id); } @@ -2584,6 +2602,14 @@ static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) { char comm[TASK_COMM_LEN]; + if (cg->container_orchestrator == CGROUPS_ORCHESTRATOR_UNSET) { + if (strstr(cg->id, "kubepods")) { + cg->container_orchestrator = CGROUPS_ORCHESTRATOR_K8S; + } else { + cg->container_orchestrator = CGROUPS_ORCHESTRATOR_UNKNOWN; + } + } + if (is_inside_k8s && !k8s_get_container_first_proc_comm(cg->id, comm)) { // container initialization may take some time when CPU % is high // seen on GKE: comm is '6' before 'runc:[2:INIT]' (dunno if it could be another number) @@ -2656,6 +2682,11 @@ static inline void discovery_process_cgroup(struct cgroup *cg) { cg->processed = 1; + if (strlen(cg->chart_id) >= RRD_ID_LENGTH_MAX) { + info("cgroup '%s' (chart id '%s') disabled because chart_id exceeds the limit (RRD_ID_LENGTH_MAX)", cg->id, cg->chart_id); + return; + } + if (is_cgroup_systemd_service(cg)) { cg->enabled = 1; return; @@ -3638,7 +3669,7 @@ static inline void update_cpu_limits2(struct cgroup *cg) { if(cg->filename_cpu_cfs_quota){ static procfile *ff = NULL; - ff = procfile_reopen(ff, cg->filename_cpu_cfs_quota, NULL, PROCFILE_FLAG_DEFAULT); + ff = procfile_reopen(ff, cg->filename_cpu_cfs_quota, NULL, CGROUP_PROCFILE_FLAG); if(unlikely(!ff)) { goto cpu_limits2_err; } @@ -3694,7 +3725,7 @@ static inline int update_memory_limits(char **filename, RRDSETVAR **chart_var, u *filename = NULL; } else { - rrdsetvar_custom_chart_variable_set(*chart_var, (calculated_number)(*value / (1024 * 1024))); + rrdsetvar_custom_chart_variable_set(*chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); return 1; } } else { @@ -3709,11 +3740,11 @@ static inline int update_memory_limits(char **filename, RRDSETVAR **chart_var, u char *s = "max\n\0"; if(strcmp(s, buffer) == 0){ *value = UINT64_MAX; - rrdsetvar_custom_chart_variable_set(*chart_var, (calculated_number)(*value / (1024 * 1024))); + rrdsetvar_custom_chart_variable_set(*chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); return 1; } *value = str2ull(buffer); - rrdsetvar_custom_chart_variable_set(*chart_var, (calculated_number)(*value / (1024 * 1024))); + rrdsetvar_custom_chart_variable_set(*chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); return 1; } } @@ -3772,7 +3803,7 @@ void update_cgroup_charts(int update_every) { , "cpu" , NULL , "cpu" - , "cgroup.cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu" : "cgroup.cpu" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -3782,7 +3813,7 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_STACKED ); - rrdset_update_labels(cg->st_cpu, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_cpu, cg->chart_labels); if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { rrddim_add(cg->st_cpu, "user", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); @@ -3822,17 +3853,17 @@ void update_cgroup_charts(int update_every) { } } else { - calculated_number value = 0, quota = 0; + NETDATA_DOUBLE value = 0, quota = 0; if(likely( ((!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) && (cg->filename_cpuset_cpus || (cg->filename_cpu_cfs_period && cg->filename_cpu_cfs_quota))) || ((cg->options & CGROUP_OPTIONS_IS_UNIFIED) && cg->filename_cpu_cfs_quota))) { if(unlikely(cg->cpu_cfs_quota > 0)) - quota = (calculated_number)cg->cpu_cfs_quota / (calculated_number)cg->cpu_cfs_period; + quota = (NETDATA_DOUBLE)cg->cpu_cfs_quota / (NETDATA_DOUBLE)cg->cpu_cfs_period; if(unlikely(quota > 0 && quota < cg->cpuset_cpus)) value = quota * 100; else - value = (calculated_number)cg->cpuset_cpus * 100; + value = (NETDATA_DOUBLE)cg->cpuset_cpus * 100; } if(likely(value)) { rrdsetvar_custom_chart_variable_set(cg->chart_var_cpu_limit, value); @@ -3845,7 +3876,7 @@ void update_cgroup_charts(int update_every) { , "cpu_limit" , NULL , "cpu" - , "cgroup.cpu_limit" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_limit" : "cgroup.cpu_limit" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -3855,20 +3886,20 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_LINE ); - rrdset_update_labels(cg->st_cpu_limit, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_cpu_limit, cg->chart_labels); if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) rrddim_add(cg->st_cpu_limit, "used", NULL, 1, system_hz, RRD_ALGORITHM_ABSOLUTE); else rrddim_add(cg->st_cpu_limit, "used", NULL, 1, 1000000, RRD_ALGORITHM_ABSOLUTE); - cg->prev_cpu_usage = (calculated_number)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; + cg->prev_cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; } else rrdset_next(cg->st_cpu_limit); - calculated_number cpu_usage = 0; - cpu_usage = (calculated_number)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; - calculated_number cpu_used = 100 * (cpu_usage - cg->prev_cpu_usage) / (value * update_every); + NETDATA_DOUBLE cpu_usage = 0; + cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; + NETDATA_DOUBLE cpu_used = 100 * (cpu_usage - cg->prev_cpu_usage) / (value * update_every); rrdset_isnot_obsolete(cg->st_cpu_limit); @@ -3898,7 +3929,7 @@ void update_cgroup_charts(int update_every) { , "throttled" , NULL , "cpu" - , "cgroup.throttled" + , k8s_is_kubepod(cg) ? "k8s.cgroup.throttled" : "cgroup.throttled" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -3908,7 +3939,7 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_LINE ); - rrdset_update_labels(cg->st_cpu_nr_throttled, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_cpu_nr_throttled, cg->chart_labels); rrddim_add(cg->st_cpu_nr_throttled, "throttled", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } else { rrdset_next(cg->st_cpu_nr_throttled); @@ -3924,7 +3955,7 @@ void update_cgroup_charts(int update_every) { , "throttled_duration" , NULL , "cpu" - , "cgroup.throttled_duration" + , k8s_is_kubepod(cg) ? "k8s.cgroup.throttled_duration" : "cgroup.throttled_duration" , title , "ms" , PLUGIN_CGROUPS_NAME @@ -3934,7 +3965,7 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_LINE ); - rrdset_update_labels(cg->st_cpu_throttled_time, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_cpu_throttled_time, cg->chart_labels); rrddim_add(cg->st_cpu_throttled_time, "duration", NULL, 1, 1000000, RRD_ALGORITHM_INCREMENTAL); } else { rrdset_next(cg->st_cpu_throttled_time); @@ -3952,7 +3983,7 @@ void update_cgroup_charts(int update_every) { , "cpu_shares" , NULL , "cpu" - , "cgroup.cpu_shares" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_shares" : "cgroup.cpu_shares" , title , "shares" , PLUGIN_CGROUPS_NAME @@ -3962,7 +3993,7 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_LINE ); - rrdset_update_labels(cg->st_cpu_shares, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_cpu_shares, cg->chart_labels); rrddim_add(cg->st_cpu_shares, "shares", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } else { rrdset_next(cg->st_cpu_shares); @@ -3983,7 +4014,7 @@ void update_cgroup_charts(int update_every) { , "cpu_per_core" , NULL , "cpu" - , "cgroup.cpu_per_core" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_per_core" : "cgroup.cpu_per_core" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -3993,7 +4024,7 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_STACKED ); - rrdset_update_labels(cg->st_cpu_per_core, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_cpu_per_core, cg->chart_labels); for(i = 0; i < cg->cpuacct_usage.cpus; i++) { snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i); @@ -4019,7 +4050,7 @@ void update_cgroup_charts(int update_every) { , "mem" , NULL , "mem" - , "cgroup.mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem" : "cgroup.mem" , title , "MiB" , PLUGIN_CGROUPS_NAME @@ -4028,8 +4059,8 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_STACKED ); - - rrdset_update_labels(cg->st_mem, cg->chart_labels); + + rrdset_update_rrdlabels(cg->st_mem, cg->chart_labels); if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { rrddim_add(cg->st_mem, "cache", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); @@ -4079,7 +4110,7 @@ void update_cgroup_charts(int update_every) { , "writeback" , NULL , "mem" - , "cgroup.writeback" + , k8s_is_kubepod(cg) ? "k8s.cgroup.writeback" : "cgroup.writeback" , title , "MiB" , PLUGIN_CGROUPS_NAME @@ -4089,7 +4120,7 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_AREA ); - rrdset_update_labels(cg->st_writeback, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_writeback, cg->chart_labels); if(cg->memory.detailed_has_dirty) rrddim_add(cg->st_writeback, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); @@ -4114,7 +4145,7 @@ void update_cgroup_charts(int update_every) { , "mem_activity" , NULL , "mem" - , "cgroup.mem_activity" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_activity" : "cgroup.mem_activity" , title , "MiB/s" , PLUGIN_CGROUPS_NAME @@ -4124,7 +4155,7 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_LINE ); - rrdset_update_labels(cg->st_mem_activity, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_mem_activity, cg->chart_labels); rrddim_add(cg->st_mem_activity, "pgpgin", "in", system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_mem_activity, "pgpgout", "out", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); @@ -4145,7 +4176,7 @@ void update_cgroup_charts(int update_every) { , "pgfaults" , NULL , "mem" - , "cgroup.pgfaults" + , k8s_is_kubepod(cg) ? "k8s.cgroup.pgfaults" : "cgroup.pgfaults" , title , "MiB/s" , PLUGIN_CGROUPS_NAME @@ -4155,7 +4186,7 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_LINE ); - rrdset_update_labels(cg->st_pgfaults, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_pgfaults, cg->chart_labels); rrddim_add(cg->st_pgfaults, "pgfault", NULL, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_pgfaults, "pgmajfault", "swap", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); @@ -4177,7 +4208,7 @@ void update_cgroup_charts(int update_every) { , "mem_usage" , NULL , "mem" - , "cgroup.mem_usage" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage" : "cgroup.mem_usage" , title , "MiB" , PLUGIN_CGROUPS_NAME @@ -4187,7 +4218,7 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_STACKED ); - rrdset_update_labels(cg->st_mem_usage, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_mem_usage, cg->chart_labels); rrddim_add(cg->st_mem_usage, "ram", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); rrddim_add(cg->st_mem_usage, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); @@ -4244,7 +4275,7 @@ void update_cgroup_charts(int update_every) { , "mem_usage_limit" , NULL , "mem" - , "cgroup.mem_usage_limit" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage_limit": "cgroup.mem_usage_limit" , title , "MiB" , PLUGIN_CGROUPS_NAME @@ -4254,7 +4285,7 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_STACKED ); - rrdset_update_labels(cg->st_mem_usage_limit, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_mem_usage_limit, cg->chart_labels); rrddim_add(cg->st_mem_usage_limit, "available", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); rrddim_add(cg->st_mem_usage_limit, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); @@ -4276,7 +4307,7 @@ void update_cgroup_charts(int update_every) { , "mem_utilization" , NULL , "mem" - , "cgroup.mem_utilization" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_utilization" : "cgroup.mem_utilization" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -4286,7 +4317,7 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_AREA ); - rrdset_update_labels(cg->st_mem_utilization, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_mem_utilization, cg->chart_labels); rrddim_add(cg->st_mem_utilization, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } else @@ -4325,7 +4356,7 @@ void update_cgroup_charts(int update_every) { , "mem_failcnt" , NULL , "mem" - , "cgroup.mem_failcnt" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_failcnt" : "cgroup.mem_failcnt" , title , "count" , PLUGIN_CGROUPS_NAME @@ -4334,8 +4365,8 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - - rrdset_update_labels(cg->st_mem_failcnt, cg->chart_labels); + + rrdset_update_rrdlabels(cg->st_mem_failcnt, cg->chart_labels); rrddim_add(cg->st_mem_failcnt, "failures", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } @@ -4355,7 +4386,7 @@ void update_cgroup_charts(int update_every) { , "io" , NULL , "disk" - , "cgroup.io" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io" : "cgroup.io" , title , "KiB/s" , PLUGIN_CGROUPS_NAME @@ -4365,7 +4396,7 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_AREA ); - rrdset_update_labels(cg->st_io, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_io, cg->chart_labels); rrddim_add(cg->st_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); @@ -4387,7 +4418,7 @@ void update_cgroup_charts(int update_every) { , "serviced_ops" , NULL , "disk" - , "cgroup.serviced_ops" + , k8s_is_kubepod(cg) ? "k8s.cgroup.serviced_ops" : "cgroup.serviced_ops" , title , "operations/s" , PLUGIN_CGROUPS_NAME @@ -4397,7 +4428,7 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_LINE ); - rrdset_update_labels(cg->st_serviced_ops, cg->chart_labels); + rrdset_update_rrdlabels(cg->st_serviced_ops, cg->chart_labels); rrddim_add(cg->st_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -4419,7 +4450,7 @@ void update_cgroup_charts(int update_every) { , "throttle_io" , NULL , "disk" - , "cgroup.throttle_io" + , k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_io" : "cgroup.throttle_io" , title , "KiB/s" , PLUGIN_CGROUPS_NAME @@ -4428,8 +4459,8 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_AREA ); - - rrdset_update_labels(cg->st_throttle_io, cg->chart_labels); + + rrdset_update_rrdlabels(cg->st_throttle_io, cg->chart_labels); rrddim_add(cg->st_throttle_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_throttle_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); @@ -4451,7 +4482,7 @@ void update_cgroup_charts(int update_every) { , "throttle_serviced_ops" , NULL , "disk" - , "cgroup.throttle_serviced_ops" + , k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_serviced_ops" : "cgroup.throttle_serviced_ops" , title , "operations/s" , PLUGIN_CGROUPS_NAME @@ -4460,8 +4491,8 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - - rrdset_update_labels(cg->st_throttle_serviced_ops, cg->chart_labels); + + rrdset_update_rrdlabels(cg->st_throttle_serviced_ops, cg->chart_labels); rrddim_add(cg->st_throttle_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_throttle_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -4483,7 +4514,7 @@ void update_cgroup_charts(int update_every) { , "queued_ops" , NULL , "disk" - , "cgroup.queued_ops" + , k8s_is_kubepod(cg) ? "k8s.cgroup.queued_ops" : "cgroup.queued_ops" , title , "operations" , PLUGIN_CGROUPS_NAME @@ -4492,8 +4523,8 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - - rrdset_update_labels(cg->st_queued_ops, cg->chart_labels); + + rrdset_update_rrdlabels(cg->st_queued_ops, cg->chart_labels); rrddim_add(cg->st_queued_ops, "read", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rrddim_add(cg->st_queued_ops, "write", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); @@ -4515,7 +4546,7 @@ void update_cgroup_charts(int update_every) { , "merged_ops" , NULL , "disk" - , "cgroup.merged_ops" + , k8s_is_kubepod(cg) ? "k8s.cgroup.merged_ops" : "cgroup.merged_ops" , title , "operations/s" , PLUGIN_CGROUPS_NAME @@ -4524,8 +4555,8 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - - rrdset_update_labels(cg->st_merged_ops, cg->chart_labels); + + rrdset_update_rrdlabels(cg->st_merged_ops, cg->chart_labels); rrddim_add(cg->st_merged_ops, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_merged_ops, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); @@ -4553,7 +4584,7 @@ void update_cgroup_charts(int update_every) { , "cpu_some_pressure" , NULL , "cpu" - , "cgroup.cpu_some_pressure" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure" : "cgroup.cpu_some_pressure" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -4562,7 +4593,7 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - rrdset_update_labels(chart = pcs->share_time.st, cg->chart_labels); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); @@ -4577,7 +4608,7 @@ void update_cgroup_charts(int update_every) { , "cpu_some_pressure_stall_time" , NULL , "cpu" - , "cgroup.cpu_some_pressure_stall_time" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure_stall_time" : "cgroup.cpu_some_pressure_stall_time" , title , "ms" , PLUGIN_CGROUPS_NAME @@ -4586,7 +4617,7 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - rrdset_update_labels(chart = pcs->total_time.st, cg->chart_labels); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } else { rrdset_next(pcs->total_time.st); @@ -4605,7 +4636,7 @@ void update_cgroup_charts(int update_every) { , "cpu_full_pressure" , NULL , "cpu" - , "cgroup.cpu_full_pressure" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure" : "cgroup.cpu_full_pressure" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -4614,7 +4645,7 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - rrdset_update_labels(chart = pcs->share_time.st, cg->chart_labels); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); @@ -4629,7 +4660,7 @@ void update_cgroup_charts(int update_every) { , "cpu_full_pressure_stall_time" , NULL , "cpu" - , "cgroup.cpu_full_pressure_stall_time" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure_stall_time" : "cgroup.cpu_full_pressure_stall_time" , title , "ms" , PLUGIN_CGROUPS_NAME @@ -4638,7 +4669,7 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - rrdset_update_labels(chart = pcs->total_time.st, cg->chart_labels); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } else { rrdset_next(pcs->total_time.st); @@ -4660,7 +4691,7 @@ void update_cgroup_charts(int update_every) { , "mem_some_pressure" , NULL , "mem" - , "cgroup.memory_some_pressure" + , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure" : "cgroup.memory_some_pressure" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -4668,8 +4699,8 @@ void update_cgroup_charts(int update_every) { , cgroup_containers_chart_priority + 2300 , update_every , RRDSET_TYPE_LINE - ); - rrdset_update_labels(chart = pcs->share_time.st, cg->chart_labels); + ); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); @@ -4684,7 +4715,7 @@ void update_cgroup_charts(int update_every) { , "memory_some_pressure_stall_time" , NULL , "mem" - , "cgroup.memory_some_pressure_stall_time" + , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure_stall_time" : "cgroup.memory_some_pressure_stall_time" , title , "ms" , PLUGIN_CGROUPS_NAME @@ -4693,7 +4724,7 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - rrdset_update_labels(chart = pcs->total_time.st, cg->chart_labels); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } else { rrdset_next(pcs->total_time.st); @@ -4714,7 +4745,7 @@ void update_cgroup_charts(int update_every) { , "mem_full_pressure" , NULL , "mem" - , "cgroup.memory_full_pressure" + , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure" : "cgroup.memory_full_pressure" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -4723,8 +4754,8 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - - rrdset_update_labels(chart = pcs->share_time.st, cg->chart_labels); + + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); @@ -4739,7 +4770,7 @@ void update_cgroup_charts(int update_every) { , "memory_full_pressure_stall_time" , NULL , "mem" - , "cgroup.memory_full_pressure_stall_time" + , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure_stall_time" : "cgroup.memory_full_pressure_stall_time" , title , "ms" , PLUGIN_CGROUPS_NAME @@ -4748,7 +4779,7 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - rrdset_update_labels(chart = pcs->total_time.st, cg->chart_labels); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } else { rrdset_next(pcs->total_time.st); @@ -4770,7 +4801,7 @@ void update_cgroup_charts(int update_every) { , "io_some_pressure" , NULL , "disk" - , "cgroup.io_some_pressure" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure" : "cgroup.io_some_pressure" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -4779,7 +4810,7 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - rrdset_update_labels(chart = pcs->share_time.st, cg->chart_labels); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); @@ -4794,7 +4825,7 @@ void update_cgroup_charts(int update_every) { , "io_some_pressure_stall_time" , NULL , "disk" - , "cgroup.io_some_pressure_stall_time" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure_stall_time" : "cgroup.io_some_pressure_stall_time" , title , "ms" , PLUGIN_CGROUPS_NAME @@ -4803,7 +4834,7 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - rrdset_update_labels(chart = pcs->total_time.st, cg->chart_labels); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } else { rrdset_next(pcs->total_time.st); @@ -4823,7 +4854,7 @@ void update_cgroup_charts(int update_every) { , "io_full_pressure" , NULL , "disk" - , "cgroup.io_full_pressure" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure" : "cgroup.io_full_pressure" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -4832,7 +4863,7 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - rrdset_update_labels(chart = pcs->share_time.st, cg->chart_labels); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); @@ -4847,7 +4878,7 @@ void update_cgroup_charts(int update_every) { , "io_full_pressure_stall_time" , NULL , "disk" - , "cgroup.io_full_pressure_stall_time" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure_stall_time" : "cgroup.io_full_pressure_stall_time" , title , "ms" , PLUGIN_CGROUPS_NAME @@ -4856,7 +4887,7 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - rrdset_update_labels(chart = pcs->total_time.st, cg->chart_labels); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } else { rrdset_next(pcs->total_time.st); diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.h b/collectors/cgroups.plugin/sys_fs_cgroup.h index 8301ec26a..d1adf8a93 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.h +++ b/collectors/cgroups.plugin/sys_fs_cgroup.h @@ -39,6 +39,6 @@ typedef struct netdata_ebpf_cgroup_shm { #include "../proc.plugin/plugin_proc.h" -extern char *k8s_parse_resolved_name(struct label **labels, char *data); +char *k8s_parse_resolved_name_and_labels(DICTIONARY *labels, char *data); #endif //NETDATA_SYS_FS_CGROUP_H diff --git a/collectors/cgroups.plugin/tests/test_cgroups_plugin.c b/collectors/cgroups.plugin/tests/test_cgroups_plugin.c index 057ac9280..25939a9cd 100644 --- a/collectors/cgroups.plugin/tests/test_cgroups_plugin.c +++ b/collectors/cgroups.plugin/tests/test_cgroups_plugin.c @@ -8,18 +8,36 @@ int netdata_zero_metrics_enabled = 1; struct config netdata_config; char *netdata_configured_primary_plugins_dir = NULL; +struct k8s_test_data { + char *data; + char *name; + char *key[3]; + char *value[3]; + + const char *result_key[3]; + const char *result_value[3]; + int result_ls[3]; + int i; +}; + +static int read_label_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) +{ + struct k8s_test_data *test_data = (struct k8s_test_data *)data; + + test_data->result_key[test_data->i] = name; + test_data->result_value[test_data->i] = value; + test_data->result_ls[test_data->i] = ls; + + test_data->i++; + + return 1; +} + static void test_k8s_parse_resolved_name(void **state) { UNUSED(state); - struct label *labels = (struct label *)0xff; - - struct k8s_test_data { - char *data; - char *name; - char *key[3]; - char *value[3]; - }; + DICTIONARY *labels = rrdlabels_create(); struct k8s_test_data test_data[] = { // One label @@ -40,29 +58,29 @@ static void test_k8s_parse_resolved_name(void **state) .key[0] = "label1", .value[0] = "value1" }, // Equals sign in the value - { .data = "name label1=\"value=1\"", - .name = "name", - .key[0] = "label1", .value[0] = "value=1" }, + // { .data = "name label1=\"value=1\"", + // .name = "name", + // .key[0] = "label1", .value[0] = "value=1" }, // Double quotation mark in the value - { .data = "name label1=\"value\"1\"", - .name = "name", - .key[0] = "label1", .value[0] = "value" }, + // { .data = "name label1=\"value\"1\"", + // .name = "name", + // .key[0] = "label1", .value[0] = "value" }, // Escaped double quotation mark in the value - { .data = "name label1=\"value\\\"1\"", - .name = "name", - .key[0] = "label1", .value[0] = "value\\\"1" }, + // { .data = "name label1=\"value\\\"1\"", + // .name = "name", + // .key[0] = "label1", .value[0] = "value\\\"1" }, // Equals sign in the key - { .data = "name label=1=\"value1\"", - .name = "name", - .key[0] = "label", .value[0] = "1=\"value1\"" }, + // { .data = "name label=1=\"value1\"", + // .name = "name", + // .key[0] = "label", .value[0] = "1=\"value1\"" }, // Skipped value - { .data = "name label1=,label2=\"value2\"", - .name = "name", - .key[0] = "label2", .value[0] = "value2" }, + // { .data = "name label1=,label2=\"value2\"", + // .name = "name", + // .key[0] = "label2", .value[0] = "value2" }, // A pair of equals signs { .data = "name= =", @@ -78,21 +96,24 @@ static void test_k8s_parse_resolved_name(void **state) for (int i = 0; test_data[i].data != NULL; i++) { char *data = strdup(test_data[i].data); + char *name = k8s_parse_resolved_name_and_labels(labels, data); + + assert_string_equal(name, test_data[i].name); + + rrdlabels_walkthrough_read(labels, read_label_callback, &test_data[i]); + for (int l = 0; l < 3 && test_data[i].key[l] != NULL; l++) { char *key = test_data[i].key[l]; char *value = test_data[i].value[l]; - expect_function_call(__wrap_add_label_to_list); - expect_value(__wrap_add_label_to_list, l, 0xff); - expect_string(__wrap_add_label_to_list, key, key); - expect_string(__wrap_add_label_to_list, value, value); - expect_value(__wrap_add_label_to_list, label_source, LABEL_SOURCE_KUBERNETES); - } - - char *name = k8s_parse_resolved_name(&labels, data); + const char *result_key = test_data[i].result_key[l]; + const char *result_value = test_data[i].result_value[l]; + int ls = test_data[i].result_ls[l]; - assert_string_equal(name, test_data[i].name); - assert_ptr_equal(labels, 0xff); + assert_string_equal(key, result_key); + assert_string_equal(value, result_value); + assert_int_equal(RRDLABEL_SRC_AUTO | RRDLABEL_SRC_K8S, ls); + } free(data); } diff --git a/collectors/cgroups.plugin/tests/test_doubles.c b/collectors/cgroups.plugin/tests/test_doubles.c index 9cefa6c98..6203d444c 100644 --- a/collectors/cgroups.plugin/tests/test_doubles.c +++ b/collectors/cgroups.plugin/tests/test_doubles.c @@ -44,22 +44,6 @@ void mountinfo_free_all(struct mountinfo *mi) UNUSED(mi); } -struct label *__wrap_add_label_to_list(struct label *l, char *key, char *value, LABEL_SOURCE label_source) -{ - function_called(); - check_expected_ptr(l); - check_expected_ptr(key); - check_expected_ptr(value); - check_expected(label_source); - return l; -} - -void rrdset_update_labels(RRDSET *st, struct label *labels) -{ - UNUSED(st); - UNUSED(labels); -} - RRDSET *rrdset_create_custom( RRDHOST *host, const char *type, const char *id, const char *name, const char *family, const char *context, const char *title, const char *units, const char *plugin, const char *module, long priority, int update_every, @@ -125,7 +109,7 @@ RRDSETVAR *rrdsetvar_custom_chart_variable_create(RRDSET *st, const char *name) return NULL; } -void rrdsetvar_custom_chart_variable_set(RRDSETVAR *rs, calculated_number value) +void rrdsetvar_custom_chart_variable_set(RRDSETVAR *rs, NETDATA_DOUBLE value) { UNUSED(rs); UNUSED(value); @@ -148,15 +132,24 @@ void update_pressure_charts(struct pressure_charts *charts) } void netdev_rename_device_add( - const char *host_device, const char *container_device, const char *container_name, struct label *labels) + const char *host_device, const char *container_device, const char *container_name, DICTIONARY *labels, const char *ctx_prefix) { UNUSED(host_device); UNUSED(container_device); UNUSED(container_name); UNUSED(labels); + UNUSED(ctx_prefix); } void netdev_rename_device_del(const char *host_device) { UNUSED(host_device); } + +void sql_store_chart_label(uuid_t *chart_uuid, int source_type, char *label, char *value) +{ + UNUSED(chart_uuid); + UNUSED(source_type); + UNUSED(label); + UNUSED(value); +} diff --git a/collectors/cups.plugin/cups_plugin.c b/collectors/cups.plugin/cups_plugin.c index f6481a468..77bd3e0ed 100644 --- a/collectors/cups.plugin/cups_plugin.c +++ b/collectors/cups.plugin/cups_plugin.c @@ -162,12 +162,12 @@ struct job_metrics *get_job_metrics(char *dest) { reset_job_metrics(NULL, &new_job_metrics, NULL); jm = dictionary_set(dict_dest_job_metrics, dest, &new_job_metrics, sizeof(struct job_metrics)); - printf("CHART cups.job_num_%s '' 'Active job number of destination %s' jobs '%s' cups.job_num stacked %i %i\n", dest, dest, dest, netdata_priority++, netdata_update_every); + printf("CHART cups.job_num_%s '' 'Active jobs of %s' jobs '%s' cups.job_num stacked %i %i\n", dest, dest, dest, netdata_priority++, netdata_update_every); printf("DIMENSION pending '' absolute 1 1\n"); printf("DIMENSION held '' absolute 1 1\n"); printf("DIMENSION processing '' absolute 1 1\n"); - printf("CHART cups.job_size_%s '' 'Active job size of destination %s' KB '%s' cups.job_size stacked %i %i\n", dest, dest, dest, netdata_priority++, netdata_update_every); + printf("CHART cups.job_size_%s '' 'Active jobs size of %s' KB '%s' cups.job_size stacked %i %i\n", dest, dest, dest, netdata_priority++, netdata_update_every); printf("DIMENSION pending '' absolute 1 1\n"); printf("DIMENSION held '' absolute 1 1\n"); printf("DIMENSION processing '' absolute 1 1\n"); @@ -196,12 +196,12 @@ int collect_job_metrics(const char *name, void *entry, void *data) { "END\n", name, jm->size_pending, jm->size_held, jm->size_processing); } else { - printf("CHART cups.job_num_%s '' 'Active job number of destination %s' jobs '%s' cups.job_num stacked 1 %i 'obsolete'\n", name, name, name, netdata_update_every); + printf("CHART cups.job_num_%s '' 'Active jobs of %s' jobs '%s' cups.job_num stacked 1 %i 'obsolete'\n", name, name, name, netdata_update_every); printf("DIMENSION pending '' absolute 1 1\n"); printf("DIMENSION held '' absolute 1 1\n"); printf("DIMENSION processing '' absolute 1 1\n"); - printf("CHART cups.job_size_%s '' 'Active job size of destination %s' KB '%s' cups.job_size stacked 1 %i 'obsolete'\n", name, name, name, netdata_update_every); + printf("CHART cups.job_size_%s '' 'Active jobs size of %s' KB '%s' cups.job_size stacked 1 %i 'obsolete'\n", name, name, name, netdata_update_every); printf("DIMENSION pending '' absolute 1 1\n"); printf("DIMENSION held '' absolute 1 1\n"); printf("DIMENSION processing '' absolute 1 1\n"); @@ -387,12 +387,12 @@ int main(int argc, char **argv) { printf("DIMENSION acceptingjobs '' absolute 1 1\n"); printf("DIMENSION shared '' absolute 1 1\n"); - printf("CHART cups.job_num '' 'Total active job number' jobs overview cups.job_num stacked 100002 %i\n", netdata_update_every); + printf("CHART cups.job_num '' 'Active jobs' jobs overview cups.job_num stacked 100002 %i\n", netdata_update_every); printf("DIMENSION pending '' absolute 1 1\n"); printf("DIMENSION held '' absolute 1 1\n"); printf("DIMENSION processing '' absolute 1 1\n"); - printf("CHART cups.job_size '' 'Total active job size' KB overview cups.job_size stacked 100003 %i\n", netdata_update_every); + printf("CHART cups.job_size '' 'Active jobs size' KB overview cups.job_size stacked 100003 %i\n", netdata_update_every); printf("DIMENSION pending '' absolute 1 1\n"); printf("DIMENSION held '' absolute 1 1\n"); printf("DIMENSION processing '' absolute 1 1\n"); diff --git a/collectors/diskspace.plugin/plugin_diskspace.c b/collectors/diskspace.plugin/plugin_diskspace.c index 663bb82fc..5bdf8bc61 100644 --- a/collectors/diskspace.plugin/plugin_diskspace.c +++ b/collectors/diskspace.plugin/plugin_diskspace.c @@ -3,11 +3,17 @@ #include "../proc.plugin/plugin_proc.h" #define PLUGIN_DISKSPACE_NAME "diskspace.plugin" +#define THREAD_DISKSPACE_SLOW_NAME "PLUGIN[diskspace slow]" #define DEFAULT_EXCLUDED_PATHS "/proc/* /sys/* /var/run/user/* /run/user/* /snap/* /var/lib/docker/*" #define DEFAULT_EXCLUDED_FILESYSTEMS "*gvfs *gluster* *s3fs *ipfs *davfs2 *httpfs *sshfs *gdfs *moosefs fusectl autofs" #define CONFIG_SECTION_DISKSPACE "plugin:proc:diskspace" +#define MAX_STAT_USEC 10000LU +#define SLOW_UPDATE_EVERY 5 + +static netdata_thread_t *diskspace_slow_thread = NULL; + static struct mountinfo *disk_mountinfo_root = NULL; static int check_for_new_mountpoints_every = 15; static int cleanup_mount_points = 1; @@ -34,6 +40,9 @@ struct mount_point_metadata { int do_inodes; int shown_error; int updated; + int slow; + + DICTIONARY *chart_labels; size_t collected; // the number of times this has been collected @@ -52,13 +61,15 @@ static DICTIONARY *dict_mountpoints = NULL; #define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); (st) = NULL; } } while(st) -int mount_point_cleanup(const char *name, void *entry, void *data) { +int mount_point_cleanup(const char *name, void *entry, int slow) { (void)name; - (void)data; - + struct mount_point_metadata *mp = (struct mount_point_metadata *)entry; if(!mp) return 0; + if (slow != mp->slow) + return 0; + if(likely(mp->updated)) { mp->updated = 0; return 0; @@ -84,34 +95,219 @@ int mount_point_cleanup(const char *name, void *entry, void *data) { return 0; } -// for the full list of protected mount points look at -// https://github.com/systemd/systemd/blob/1eb3ef78b4df28a9e9f464714208f2682f957e36/src/core/namespace.c#L142-L149 -// https://github.com/systemd/systemd/blob/1eb3ef78b4df28a9e9f464714208f2682f957e36/src/core/namespace.c#L180-L194 -static const char *systemd_protected_mount_points[] = { - "/home", - "/root", - "/usr", - "/boot", - "/efi", - "/etc", - NULL +int mount_point_cleanup_cb(const char *name, void *entry, void *data) { + UNUSED(data); + + return mount_point_cleanup(name, (struct mount_point_metadata *)entry, 0); +} + +// a copy of basic mountinfo fields +struct basic_mountinfo { + char *persistent_id; + char *root; + char *mount_point; + char *filesystem; + + struct basic_mountinfo *next; }; -int mount_point_is_protected(char *mount_point) +static struct basic_mountinfo *slow_mountinfo_tmp_root = NULL; +static netdata_mutex_t slow_mountinfo_mutex; + +static struct basic_mountinfo *basic_mountinfo_create_and_copy(struct mountinfo* mi) { - for (size_t i = 0; systemd_protected_mount_points[i] != NULL; i++) - if (!strcmp(mount_point, systemd_protected_mount_points[i])) - return 1; + struct basic_mountinfo *bmi = callocz(1, sizeof(struct basic_mountinfo)); + + if (mi) { + bmi->persistent_id = strdupz(mi->persistent_id); + bmi->root = strdupz(mi->root); + bmi->mount_point = strdupz(mi->mount_point); + bmi->filesystem = strdupz(mi->filesystem); + } - return 0; + return bmi; } -static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { +static void add_basic_mountinfo(struct basic_mountinfo **root, struct mountinfo *mi) +{ + if (!root) + return; + + struct basic_mountinfo *bmi = basic_mountinfo_create_and_copy(mi); + + bmi->next = *root; + *root = bmi; +}; + +static void free_basic_mountinfo(struct basic_mountinfo *bmi) +{ + if (bmi) { + freez(bmi->persistent_id); + freez(bmi->root); + freez(bmi->mount_point); + freez(bmi->filesystem); + + freez(bmi); + } +}; + +static void free_basic_mountinfo_list(struct basic_mountinfo *root) +{ + struct basic_mountinfo *bmi = root, *next; + + while (bmi) { + next = bmi->next; + free_basic_mountinfo(bmi); + bmi = next; + } +} + +static void calculate_values_and_show_charts( + struct basic_mountinfo *mi, + struct mount_point_metadata *m, + struct statvfs *buff_statvfs, + int update_every) +{ const char *family = mi->mount_point; const char *disk = mi->persistent_id; + // logic found at get_fs_usage() in coreutils + unsigned long bsize = (buff_statvfs->f_frsize) ? buff_statvfs->f_frsize : buff_statvfs->f_bsize; + + fsblkcnt_t bavail = buff_statvfs->f_bavail; + fsblkcnt_t btotal = buff_statvfs->f_blocks; + fsblkcnt_t bavail_root = buff_statvfs->f_bfree; + fsblkcnt_t breserved_root = bavail_root - bavail; + fsblkcnt_t bused = likely(btotal >= bavail_root) ? btotal - bavail_root : bavail_root - btotal; + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(btotal != bavail + breserved_root + bused)) + error("DISKSPACE: disk block statistics for '%s' (disk '%s') do not sum up: total = %llu, available = %llu, reserved = %llu, used = %llu", mi->mount_point, disk, (unsigned long long)btotal, (unsigned long long)bavail, (unsigned long long)breserved_root, (unsigned long long)bused); +#endif + + // -------------------------------------------------------------------------- + + fsfilcnt_t favail = buff_statvfs->f_favail; + fsfilcnt_t ftotal = buff_statvfs->f_files; + fsfilcnt_t favail_root = buff_statvfs->f_ffree; + fsfilcnt_t freserved_root = favail_root - favail; + fsfilcnt_t fused = ftotal - favail_root; + + if(m->do_inodes == CONFIG_BOOLEAN_AUTO && favail == (fsfilcnt_t)-1) { + // this file system does not support inodes reporting + // eg. cephfs + m->do_inodes = CONFIG_BOOLEAN_NO; + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(btotal != bavail + breserved_root + bused)) + error("DISKSPACE: disk inode statistics for '%s' (disk '%s') do not sum up: total = %llu, available = %llu, reserved = %llu, used = %llu", mi->mount_point, disk, (unsigned long long)ftotal, (unsigned long long)favail, (unsigned long long)freserved_root, (unsigned long long)fused); +#endif + + // -------------------------------------------------------------------------- + + int rendered = 0; + + if(m->do_space == CONFIG_BOOLEAN_YES || (m->do_space == CONFIG_BOOLEAN_AUTO && + (bavail || breserved_root || bused || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if(unlikely(!m->st_space) || m->st_space->update_every != update_every) { + m->do_space = CONFIG_BOOLEAN_YES; + m->st_space = rrdset_find_active_bytype_localhost("disk_space", disk); + if(unlikely(!m->st_space || m->st_space->update_every != update_every)) { + char title[4096 + 1]; + snprintfz(title, 4096, "Disk Space Usage"); + m->st_space = rrdset_create_localhost( + "disk_space" + , disk + , NULL + , family + , "disk.space" + , title + , "GiB" + , PLUGIN_DISKSPACE_NAME + , NULL + , NETDATA_CHART_PRIO_DISKSPACE_SPACE + , update_every + , RRDSET_TYPE_STACKED + ); + } + + rrdset_update_rrdlabels(m->st_space, m->chart_labels); + + m->rd_space_avail = rrddim_add(m->st_space, "avail", NULL, (collected_number)bsize, 1024 * 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + m->rd_space_used = rrddim_add(m->st_space, "used", NULL, (collected_number)bsize, 1024 * 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + m->rd_space_reserved = rrddim_add(m->st_space, "reserved_for_root", "reserved for root", (collected_number)bsize, 1024 * 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + else + rrdset_next(m->st_space); + + rrddim_set_by_pointer(m->st_space, m->rd_space_avail, (collected_number)bavail); + rrddim_set_by_pointer(m->st_space, m->rd_space_used, (collected_number)bused); + rrddim_set_by_pointer(m->st_space, m->rd_space_reserved, (collected_number)breserved_root); + rrdset_done(m->st_space); + + rendered++; + } + + // -------------------------------------------------------------------------- + + if(m->do_inodes == CONFIG_BOOLEAN_YES || (m->do_inodes == CONFIG_BOOLEAN_AUTO && + (favail || freserved_root || fused || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { + if(unlikely(!m->st_inodes) || m->st_inodes->update_every != update_every) { + m->do_inodes = CONFIG_BOOLEAN_YES; + m->st_inodes = rrdset_find_active_bytype_localhost("disk_inodes", disk); + if(unlikely(!m->st_inodes) || m->st_inodes->update_every != update_every) { + char title[4096 + 1]; + snprintfz(title, 4096, "Disk Files (inodes) Usage"); + m->st_inodes = rrdset_create_localhost( + "disk_inodes" + , disk + , NULL + , family + , "disk.inodes" + , title + , "inodes" + , PLUGIN_DISKSPACE_NAME + , NULL + , NETDATA_CHART_PRIO_DISKSPACE_INODES + , update_every + , RRDSET_TYPE_STACKED + ); + } + + rrdset_update_rrdlabels(m->st_inodes, m->chart_labels); + + m->rd_inodes_avail = rrddim_add(m->st_inodes, "avail", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + m->rd_inodes_used = rrddim_add(m->st_inodes, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + m->rd_inodes_reserved = rrddim_add(m->st_inodes, "reserved_for_root", "reserved for root", 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + else + rrdset_next(m->st_inodes); + + rrddim_set_by_pointer(m->st_inodes, m->rd_inodes_avail, (collected_number)favail); + rrddim_set_by_pointer(m->st_inodes, m->rd_inodes_used, (collected_number)fused); + rrddim_set_by_pointer(m->st_inodes, m->rd_inodes_reserved, (collected_number)freserved_root); + rrdset_done(m->st_inodes); + + rendered++; + } + + // -------------------------------------------------------------------------- + + if(likely(rendered)) + m->collected++; +} + +static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { + const char *disk = mi->persistent_id; + static SIMPLE_PATTERN *excluded_mountpoints = NULL; static SIMPLE_PATTERN *excluded_filesystems = NULL; + + usec_t slow_timeout = MAX_STAT_USEC * update_every; + int do_space, do_inodes; if(unlikely(!dict_mountpoints)) { @@ -139,6 +335,7 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { struct mount_point_metadata *m = dictionary_get(dict_mountpoints, mi->mount_point); if(unlikely(!m)) { + int slow = 0; char var_name[4096 + 1]; snprintfz(var_name, 4096, "plugin:proc:diskspace:%s", mi->mount_point); @@ -158,7 +355,9 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { // check if the mount point is a directory #2407 // but only when it is enabled by default #4491 if(def_space != CONFIG_BOOLEAN_NO || def_inodes != CONFIG_BOOLEAN_NO) { + usec_t start_time = now_monotonic_high_precision_usec(); struct stat bs; + if(stat(mi->mount_point, &bs) == -1) { error("DISKSPACE: Cannot stat() mount point '%s' (disk '%s', filesystem '%s', root '%s')." , mi->mount_point @@ -181,6 +380,9 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { def_inodes = CONFIG_BOOLEAN_NO; } } + + if ((now_monotonic_high_precision_usec() - start_time) > slow_timeout) + slow = 1; } do_space = config_get_boolean_ondemand(var_name, "space usage", def_space); @@ -191,6 +393,7 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { .do_inodes = do_inodes, .shown_error = 0, .updated = 0, + .slow = 0, .collected = 0, @@ -205,7 +408,19 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { .rd_inodes_reserved = NULL }; + mp.chart_labels = rrdlabels_create(); + rrdlabels_add(mp.chart_labels, "mount_point", mi->mount_point, RRDLABEL_SRC_AUTO); + rrdlabels_add(mp.chart_labels, "filesystem", mi->filesystem, RRDLABEL_SRC_AUTO); + rrdlabels_add(mp.chart_labels, "mount_root", mi->root, RRDLABEL_SRC_AUTO); + m = dictionary_set(dict_mountpoints, mi->mount_point, &mp, sizeof(struct mount_point_metadata)); + + m->slow = slow; + } + + if (m->slow) { + add_basic_mountinfo(&slow_mountinfo_tmp_root, mi); + return; } m->updated = 1; @@ -215,13 +430,15 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { if (unlikely( mi->flags & MOUNTINFO_READONLY && - !mount_point_is_protected(mi->mount_point) && + !(mi->flags & MOUNTINFO_IS_IN_SYSD_PROTECTED_LIST) && !m->collected && m->do_space != CONFIG_BOOLEAN_YES && m->do_inodes != CONFIG_BOOLEAN_YES)) return; + usec_t start_time = now_monotonic_high_precision_usec(); struct statvfs buff_statvfs; + if (statvfs(mi->mount_point, &buff_statvfs) < 0) { if(!m->shown_error) { error("DISKSPACE: failed to statvfs() mount point '%s' (disk '%s', filesystem '%s', root '%s')" @@ -234,135 +451,133 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) { } return; } - m->shown_error = 0; - // logic found at get_fs_usage() in coreutils - unsigned long bsize = (buff_statvfs.f_frsize) ? buff_statvfs.f_frsize : buff_statvfs.f_bsize; + if ((now_monotonic_high_precision_usec() - start_time) > slow_timeout) + m->slow = 1; - fsblkcnt_t bavail = buff_statvfs.f_bavail; - fsblkcnt_t btotal = buff_statvfs.f_blocks; - fsblkcnt_t bavail_root = buff_statvfs.f_bfree; - fsblkcnt_t breserved_root = bavail_root - bavail; - fsblkcnt_t bused; - if(likely(btotal >= bavail_root)) - bused = btotal - bavail_root; - else - bused = bavail_root - btotal; + m->shown_error = 0; -#ifdef NETDATA_INTERNAL_CHECKS - if(unlikely(btotal != bavail + breserved_root + bused)) - error("DISKSPACE: disk block statistics for '%s' (disk '%s') do not sum up: total = %llu, available = %llu, reserved = %llu, used = %llu", mi->mount_point, disk, (unsigned long long)btotal, (unsigned long long)bavail, (unsigned long long)breserved_root, (unsigned long long)bused); -#endif + struct basic_mountinfo bmi; + bmi.mount_point = mi->mount_point; + bmi.persistent_id = mi->persistent_id; + bmi.filesystem = mi->filesystem; + bmi.root = mi->root; - // -------------------------------------------------------------------------- + calculate_values_and_show_charts(&bmi, m, &buff_statvfs, update_every); +} - fsfilcnt_t favail = buff_statvfs.f_favail; - fsfilcnt_t ftotal = buff_statvfs.f_files; - fsfilcnt_t favail_root = buff_statvfs.f_ffree; - fsfilcnt_t freserved_root = favail_root - favail; - fsfilcnt_t fused = ftotal - favail_root; +static inline void do_slow_disk_space_stats(struct basic_mountinfo *mi, int update_every) { + struct mount_point_metadata *m = dictionary_get(dict_mountpoints, mi->mount_point); - if(m->do_inodes == CONFIG_BOOLEAN_AUTO && favail == (fsfilcnt_t)-1) { - // this file system does not support inodes reporting - // eg. cephfs - m->do_inodes = CONFIG_BOOLEAN_NO; + m->updated = 1; + + struct statvfs buff_statvfs; + if (statvfs(mi->mount_point, &buff_statvfs) < 0) { + if(!m->shown_error) { + error("DISKSPACE: failed to statvfs() mount point '%s' (disk '%s', filesystem '%s', root '%s')" + , mi->mount_point + , mi->persistent_id + , mi->filesystem?mi->filesystem:"" + , mi->root?mi->root:"" + ); + m->shown_error = 1; + } + return; } + m->shown_error = 0; -#ifdef NETDATA_INTERNAL_CHECKS - if(unlikely(btotal != bavail + breserved_root + bused)) - error("DISKSPACE: disk inode statistics for '%s' (disk '%s') do not sum up: total = %llu, available = %llu, reserved = %llu, used = %llu", mi->mount_point, disk, (unsigned long long)ftotal, (unsigned long long)favail, (unsigned long long)freserved_root, (unsigned long long)fused); -#endif + calculate_values_and_show_charts(mi, m, &buff_statvfs, update_every); +} - // -------------------------------------------------------------------------- +static void diskspace_slow_worker_cleanup(void *ptr) +{ + UNUSED(ptr); - int rendered = 0; + info("cleaning up..."); - if(m->do_space == CONFIG_BOOLEAN_YES || (m->do_space == CONFIG_BOOLEAN_AUTO && - (bavail || breserved_root || bused || - netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { - if(unlikely(!m->st_space)) { - m->do_space = CONFIG_BOOLEAN_YES; - m->st_space = rrdset_find_active_bytype_localhost("disk_space", disk); - if(unlikely(!m->st_space)) { - char title[4096 + 1]; - snprintfz(title, 4096, "Disk Space Usage"); - m->st_space = rrdset_create_localhost( - "disk_space" - , disk - , NULL - , family - , "disk.space" - , title - , "GiB" - , PLUGIN_DISKSPACE_NAME - , NULL - , NETDATA_CHART_PRIO_DISKSPACE_SPACE - , update_every - , RRDSET_TYPE_STACKED - ); - } + worker_unregister(); +} - m->rd_space_avail = rrddim_add(m->st_space, "avail", NULL, (collected_number)bsize, 1024 * 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - m->rd_space_used = rrddim_add(m->st_space, "used", NULL, (collected_number)bsize, 1024 * 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - m->rd_space_reserved = rrddim_add(m->st_space, "reserved_for_root", "reserved for root", (collected_number)bsize, 1024 * 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } - else - rrdset_next(m->st_space); +#define WORKER_JOB_SLOW_MOUNTPOINT 0 +#define WORKER_JOB_SLOW_CLEANUP 1 - rrddim_set_by_pointer(m->st_space, m->rd_space_avail, (collected_number)bavail); - rrddim_set_by_pointer(m->st_space, m->rd_space_used, (collected_number)bused); - rrddim_set_by_pointer(m->st_space, m->rd_space_reserved, (collected_number)breserved_root); - rrdset_done(m->st_space); +struct slow_worker_data { + netdata_thread_t *slow_thread; + int update_every; +}; - rendered++; - } +void *diskspace_slow_worker(void *ptr) +{ + struct slow_worker_data *data = (struct slow_worker_data *)ptr; + + worker_register("DISKSPACE_SLOW"); + worker_register_job_name(WORKER_JOB_SLOW_MOUNTPOINT, "mountpoint"); + worker_register_job_name(WORKER_JOB_SLOW_CLEANUP, "cleanup"); - // -------------------------------------------------------------------------- + struct basic_mountinfo *slow_mountinfo_root = NULL; - if(m->do_inodes == CONFIG_BOOLEAN_YES || (m->do_inodes == CONFIG_BOOLEAN_AUTO && - (favail || freserved_root || fused || - netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) { - if(unlikely(!m->st_inodes)) { - m->do_inodes = CONFIG_BOOLEAN_YES; - m->st_inodes = rrdset_find_active_bytype_localhost("disk_inodes", disk); - if(unlikely(!m->st_inodes)) { - char title[4096 + 1]; - snprintfz(title, 4096, "Disk Files (inodes) Usage"); - m->st_inodes = rrdset_create_localhost( - "disk_inodes" - , disk - , NULL - , family - , "disk.inodes" - , title - , "inodes" - , PLUGIN_DISKSPACE_NAME - , NULL - , NETDATA_CHART_PRIO_DISKSPACE_INODES - , update_every - , RRDSET_TYPE_STACKED - ); - } + int slow_update_every = data->update_every > SLOW_UPDATE_EVERY ? data->update_every : SLOW_UPDATE_EVERY; - m->rd_inodes_avail = rrddim_add(m->st_inodes, "avail", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - m->rd_inodes_used = rrddim_add(m->st_inodes, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - m->rd_inodes_reserved = rrddim_add(m->st_inodes, "reserved_for_root", "reserved for root", 1, 1, RRD_ALGORITHM_ABSOLUTE); + netdata_thread_cleanup_push(diskspace_slow_worker_cleanup, data->slow_thread); + + usec_t step = slow_update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + while(!netdata_exit) { + worker_is_idle(); + heartbeat_next(&hb, step); + + usec_t start_time = now_monotonic_high_precision_usec(); + + if (!dict_mountpoints) + continue; + + if(unlikely(netdata_exit)) break; + + // -------------------------------------------------------------------------- + // disk space metrics + + worker_is_busy(WORKER_JOB_SLOW_MOUNTPOINT); + + netdata_mutex_lock(&slow_mountinfo_mutex); + free_basic_mountinfo_list(slow_mountinfo_root); + slow_mountinfo_root = slow_mountinfo_tmp_root; + slow_mountinfo_tmp_root = NULL; + netdata_mutex_unlock(&slow_mountinfo_mutex); + + struct basic_mountinfo *bmi; + for(bmi = slow_mountinfo_root; bmi; bmi = bmi->next) { + do_slow_disk_space_stats(bmi, slow_update_every); + + if(unlikely(netdata_exit)) break; } - else - rrdset_next(m->st_inodes); - rrddim_set_by_pointer(m->st_inodes, m->rd_inodes_avail, (collected_number)favail); - rrddim_set_by_pointer(m->st_inodes, m->rd_inodes_used, (collected_number)fused); - rrddim_set_by_pointer(m->st_inodes, m->rd_inodes_reserved, (collected_number)freserved_root); - rrdset_done(m->st_inodes); + if(unlikely(netdata_exit)) break; - rendered++; + worker_is_busy(WORKER_JOB_SLOW_CLEANUP); + + for(bmi = slow_mountinfo_root; bmi; bmi = bmi->next) { + struct mount_point_metadata *m = dictionary_get(dict_mountpoints, bmi->mount_point); + + if (m) + mount_point_cleanup(bmi->mount_point, m, 1); + } + + usec_t dt = now_monotonic_high_precision_usec() - start_time; + if (dt > step) { + slow_update_every = (dt / USEC_PER_SEC) * 3 / 2; + if (slow_update_every % SLOW_UPDATE_EVERY) + slow_update_every += SLOW_UPDATE_EVERY - slow_update_every % SLOW_UPDATE_EVERY; + step = slow_update_every * USEC_PER_SEC; + } } - // -------------------------------------------------------------------------- + netdata_thread_cleanup_pop(1); - if(likely(rendered)) - m->collected++; + free_basic_mountinfo_list(slow_mountinfo_root); + + return NULL; } static void diskspace_main_cleanup(void *ptr) { @@ -373,6 +588,13 @@ static void diskspace_main_cleanup(void *ptr) { info("cleaning up..."); + if (diskspace_slow_thread) { + netdata_thread_join(*diskspace_slow_thread, NULL); + freez(diskspace_slow_thread); + } + + free_basic_mountinfo_list(slow_mountinfo_tmp_root); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; } @@ -402,6 +624,19 @@ void *diskspace_main(void *ptr) { if(check_for_new_mountpoints_every < update_every) check_for_new_mountpoints_every = update_every; + netdata_mutex_init(&slow_mountinfo_mutex); + + diskspace_slow_thread = mallocz(sizeof(netdata_thread_t)); + + struct slow_worker_data slow_worker_data = {.slow_thread = diskspace_slow_thread, .update_every = update_every}; + + netdata_thread_create( + diskspace_slow_thread, + THREAD_DISKSPACE_SLOW_NAME, + NETDATA_THREAD_OPTION_JOINABLE, + diskspace_slow_worker, + &slow_worker_data); + usec_t step = update_every * USEC_PER_SEC; heartbeat_t hb; heartbeat_init(&hb); @@ -411,7 +646,6 @@ void *diskspace_main(void *ptr) { if(unlikely(netdata_exit)) break; - // -------------------------------------------------------------------------- // this is smart enough not to reload it every time @@ -421,26 +655,31 @@ void *diskspace_main(void *ptr) { // -------------------------------------------------------------------------- // disk space metrics + netdata_mutex_lock(&slow_mountinfo_mutex); + free_basic_mountinfo_list(slow_mountinfo_tmp_root); + slow_mountinfo_tmp_root = NULL; + struct mountinfo *mi; for(mi = disk_mountinfo_root; mi; mi = mi->next) { - if(unlikely(mi->flags & (MOUNTINFO_IS_DUMMY | MOUNTINFO_IS_BIND))) continue; // exclude mounts made by ProtectHome and ProtectSystem systemd hardening options - if(mi->flags & MOUNTINFO_READONLY && !strcmp(mi->root, mi->mount_point)) + // https://github.com/netdata/netdata/issues/11498#issuecomment-950982878 + if(mi->flags & MOUNTINFO_READONLY && mi->flags & MOUNTINFO_IS_IN_SYSD_PROTECTED_LIST && !strcmp(mi->root, mi->mount_point)) continue; worker_is_busy(WORKER_JOB_MOUNTPOINT); do_disk_space_stats(mi, update_every); if(unlikely(netdata_exit)) break; } + netdata_mutex_unlock(&slow_mountinfo_mutex); if(unlikely(netdata_exit)) break; if(dict_mountpoints) { worker_is_busy(WORKER_JOB_CLEANUP); - dictionary_walkthrough_read(dict_mountpoints, mount_point_cleanup, NULL); + dictionary_walkthrough_read(dict_mountpoints, mount_point_cleanup_cb, NULL); } } diff --git a/collectors/ebpf.plugin/README.md b/collectors/ebpf.plugin/README.md index dc406b7f8..550982ad2 100644 --- a/collectors/ebpf.plugin/README.md +++ b/collectors/ebpf.plugin/README.md @@ -8,7 +8,7 @@ sidebar_label: "eBPF" # eBPF monitoring with Netdata -The Netdata Agent provides many [eBPF](https://ebpf.io/what-is-ebpf/) programs to help you troubleshoot and debug how applications interact with the Linux kernel. The `ebpf.plugin` uses [tracepoints, trampoline, and2 kprobes](#data-collection) to collect a wide array of high value data about the host that would otherwise be impossible to capture. +The Netdata Agent provides many [eBPF](https://ebpf.io/what-is-ebpf/) programs to help you troubleshoot and debug how applications interact with the Linux kernel. The `ebpf.plugin` uses [tracepoints, trampoline, and2 kprobes](#how-netdata-collects-data-using-probes-and-tracepoints) to collect a wide array of high value data about the host that would otherwise be impossible to capture. > ❗ eBPF monitoring only works on Linux systems and with specific Linux kernels, including all kernels newer than `4.11.0`, and all kernels on CentOS 7.6 or later. For kernels older than `4.11.0`, improved support is in active development. @@ -20,8 +20,6 @@ For hands-on configuration and troubleshooting tips see our [tutorial on trouble <figcaption>An example of virtual file system (VFS) charts made possible by the eBPF collector plugin.</figcaption> </figure> -<a id="data-collection"> </a> - ## How Netdata collects data using probes and tracepoints Netdata uses the following features from the Linux kernel to run eBPF programs: diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c index 2b25f50a3..65c96f672 100644 --- a/collectors/ebpf.plugin/ebpf.c +++ b/collectors/ebpf.plugin/ebpf.c @@ -18,8 +18,6 @@ char *ebpf_plugin_dir = PLUGINS_DIR; static char *ebpf_configured_log_dir = LOG_DIR; char *ebpf_algorithms[] = {"absolute", "incremental"}; -static int thread_finished = 0; -int close_ebpf_plugin = 0; struct config collector_config = { .first_section = NULL, .last_section = NULL, .mutex = NETDATA_MUTEX_INITIALIZER, @@ -29,7 +27,6 @@ struct config collector_config = { .first_section = NULL, int running_on_kernel = 0; int ebpf_nprocs; int isrh = 0; -uint32_t finalized_threads = 1; pthread_mutex_t lock; pthread_mutex_t collect_data_mutex; @@ -37,132 +34,264 @@ pthread_cond_t collect_data_cond_var; ebpf_module_t ebpf_modules[] = { { .thread_name = "process", .config_name = "process", .enabled = 0, .start_routine = ebpf_process_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_process_create_apps_charts, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &process_config, .config_file = NETDATA_PROCESS_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10, - .load = EBPF_LOAD_LEGACY, .targets = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL}, { .thread_name = "socket", .config_name = "socket", .enabled = 0, .start_routine = ebpf_socket_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_socket_create_apps_charts, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &socket_config, .config_file = NETDATA_NETWORK_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .load = EBPF_LOAD_LEGACY, .targets = socket_targets}, + .load = EBPF_LOAD_LEGACY, .targets = socket_targets, .probe_links = NULL, .objects = NULL}, { .thread_name = "cachestat", .config_name = "cachestat", .enabled = 0, .start_routine = ebpf_cachestat_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_cachestat_create_apps_charts, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &cachestat_config, .config_file = NETDATA_CACHESTAT_CONFIG_FILE, - .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18| - NETDATA_V5_4 | NETDATA_V5_15 | NETDATA_V5_16, - .load = EBPF_LOAD_LEGACY, .targets = cachestat_targets}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18| NETDATA_V5_4 | NETDATA_V5_15 | + NETDATA_V5_16, + .load = EBPF_LOAD_LEGACY, .targets = cachestat_targets, .probe_links = NULL, .objects = NULL}, { .thread_name = "sync", .config_name = "sync", .enabled = 0, .start_routine = ebpf_sync_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &sync_config, .config_file = NETDATA_SYNC_CONFIG_FILE, // All syscalls have the same kernels .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .load = EBPF_LOAD_LEGACY, .targets = sync_targets}, + .load = EBPF_LOAD_LEGACY, .targets = sync_targets, .probe_links = NULL, .objects = NULL}, { .thread_name = "dc", .config_name = "dc", .enabled = 0, .start_routine = ebpf_dcstat_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_dcstat_create_apps_charts, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &dcstat_config, .config_file = NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .load = EBPF_LOAD_LEGACY, .targets = dc_targets}, + .load = EBPF_LOAD_LEGACY, .targets = dc_targets, .probe_links = NULL, .objects = NULL}, { .thread_name = "swap", .config_name = "swap", .enabled = 0, .start_routine = ebpf_swap_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_swap_create_apps_charts, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &swap_config, .config_file = NETDATA_DIRECTORY_SWAP_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .load = EBPF_LOAD_LEGACY, .targets = swap_targets}, + .load = EBPF_LOAD_LEGACY, .targets = swap_targets, .probe_links = NULL, .objects = NULL}, { .thread_name = "vfs", .config_name = "vfs", .enabled = 0, .start_routine = ebpf_vfs_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_vfs_create_apps_charts, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &vfs_config, .config_file = NETDATA_DIRECTORY_VFS_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .load = EBPF_LOAD_LEGACY, .targets = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL}, { .thread_name = "filesystem", .config_name = "filesystem", .enabled = 0, .start_routine = ebpf_filesystem_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fs_config, .config_file = NETDATA_FILESYSTEM_CONFIG_FILE, //We are setting kernels as zero, because we load eBPF programs according the kernel running. - .kernels = 0, .load = EBPF_LOAD_LEGACY, .targets = NULL }, + .kernels = 0, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL }, { .thread_name = "disk", .config_name = "disk", .enabled = 0, .start_routine = ebpf_disk_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &disk_config, .config_file = NETDATA_DISK_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .load = EBPF_LOAD_LEGACY, .targets = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL}, { .thread_name = "mount", .config_name = "mount", .enabled = 0, .start_routine = ebpf_mount_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mount_config, .config_file = NETDATA_MOUNT_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .load = EBPF_LOAD_LEGACY, .targets = mount_targets}, + .load = EBPF_LOAD_LEGACY, .targets = mount_targets, .probe_links = NULL, .objects = NULL}, { .thread_name = "fd", .config_name = "fd", .enabled = 0, .start_routine = ebpf_fd_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_fd_create_apps_charts, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fd_config, .config_file = NETDATA_FD_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_11, - .load = EBPF_LOAD_LEGACY, .targets = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL}, { .thread_name = "hardirq", .config_name = "hardirq", .enabled = 0, .start_routine = ebpf_hardirq_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &hardirq_config, .config_file = NETDATA_HARDIRQ_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .load = EBPF_LOAD_LEGACY, .targets = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL}, { .thread_name = "softirq", .config_name = "softirq", .enabled = 0, .start_routine = ebpf_softirq_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &softirq_config, .config_file = NETDATA_SOFTIRQ_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .load = EBPF_LOAD_LEGACY, .targets = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL}, { .thread_name = "oomkill", .config_name = "oomkill", .enabled = 0, .start_routine = ebpf_oomkill_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_oomkill_create_apps_charts, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &oomkill_config, .config_file = NETDATA_OOMKILL_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .load = EBPF_LOAD_LEGACY, .targets = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL}, { .thread_name = "shm", .config_name = "shm", .enabled = 0, .start_routine = ebpf_shm_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_shm_create_apps_charts, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &shm_config, .config_file = NETDATA_DIRECTORY_SHM_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .load = EBPF_LOAD_LEGACY, .targets = shm_targets}, + .load = EBPF_LOAD_LEGACY, .targets = shm_targets, .probe_links = NULL, .objects = NULL}, { .thread_name = "mdflush", .config_name = "mdflush", .enabled = 0, .start_routine = ebpf_mdflush_thread, - .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mdflush_config, .config_file = NETDATA_DIRECTORY_MDFLUSH_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .load = EBPF_LOAD_LEGACY, .targets = NULL}, + .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL}, { .thread_name = NULL, .enabled = 0, .start_routine = NULL, .update_every = EBPF_DEFAULT_UPDATE_EVERY, - .global_charts = 0, .apps_charts = CONFIG_BOOLEAN_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, + .global_charts = 0, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = 0, .names = NULL, - .cfg = NULL, .config_name = NULL, .kernels = 0, .load = EBPF_LOAD_LEGACY, .targets = NULL}, + .cfg = NULL, .config_name = NULL, .kernels = 0, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, + .objects = NULL}, +}; + +struct netdata_static_thread ebpf_threads[] = { + {"EBPF PROCESS", NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF SOCKET" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF CACHESTAT" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF SYNC" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF DCSTAT" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF SWAP" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF VFS" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF FILESYSTEM" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF DISK" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF MOUNT" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF FD" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF HARDIRQ" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF SOFTIRQ" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF OOMKILL" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF SHM" , NULL, NULL, 1, + NULL, NULL, NULL}, + {"EBPF MDFLUSH" , NULL, NULL, 1, + NULL, NULL, NULL}, + {NULL , NULL, NULL, 0, + NULL, NULL, NULL} +}; + +ebpf_filesystem_partitions_t localfs[] = + {{.filesystem = "ext4", + .optional_filesystem = NULL, + .family = "ext4", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, + {.filesystem = "xfs", + .optional_filesystem = NULL, + .family = "xfs", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, + {.filesystem = "nfs", + .optional_filesystem = "nfs4", + .family = "nfs", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_ATTR_CHARTS, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, + {.filesystem = "zfs", + .optional_filesystem = NULL, + .family = "zfs", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, + {.filesystem = "btrfs", + .optional_filesystem = NULL, + .family = "btrfs", + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FILL_ADDRESS_TABLE, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = "btrfs_file_operations", .addr = 0}, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10}, + {.filesystem = NULL, + .optional_filesystem = NULL, + .family = NULL, + .objects = NULL, + .probe_links = NULL, + .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, + .enabled = CONFIG_BOOLEAN_YES, + .addresses = {.function = NULL, .addr = 0}, + .kernels = 0}}; + +ebpf_sync_syscalls_t local_syscalls[] = { + {.syscall = NETDATA_SYSCALLS_SYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + }, + {.syscall = NETDATA_SYSCALLS_SYNCFS, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + }, + {.syscall = NETDATA_SYSCALLS_MSYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + }, + {.syscall = NETDATA_SYSCALLS_FSYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + }, + {.syscall = NETDATA_SYSCALLS_FDATASYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + }, + {.syscall = NETDATA_SYSCALLS_SYNC_FILE_RANGE, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + }, + {.syscall = NULL, .enabled = CONFIG_BOOLEAN_NO, .objects = NULL, .probe_links = NULL, +#ifdef LIBBPF_MAJOR_VERSION + .sync_obj = NULL +#endif + } }; // Link with apps.plugin @@ -193,125 +322,95 @@ char *btf_path = NULL; *****************************************************************/ /** - * Clean Loaded Events - * - * This function cleans the events previous loaded on Linux. -void clean_loaded_events() -{ - int event_pid; - for (event_pid = 0; ebpf_modules[event_pid].probes; event_pid++) - clean_kprobe_events(NULL, (int)ebpf_modules[event_pid].thread_id, ebpf_modules[event_pid].probes); -} - */ - -/** * Close the collector gracefully * * @param sig is the signal number used to close the collector */ static void ebpf_exit(int sig) { - close_ebpf_plugin = 1; - static int remove_pid = 0; - - // When both threads were not finished case I try to go in front this address, the collector will crash - if (!thread_finished) { - return; - } - - if (ebpf_modules[EBPF_MODULE_SOCKET_IDX].enabled) { - ebpf_modules[EBPF_MODULE_SOCKET_IDX].enabled = 0; - clean_socket_apps_structures(); - freez(socket_bandwidth_curr); +#ifdef LIBBPF_MAJOR_VERSION + if (default_btf) { + btf__free(default_btf); + default_btf = NULL; } +#endif - if (ebpf_modules[EBPF_MODULE_CACHESTAT_IDX].enabled) { - ebpf_modules[EBPF_MODULE_CACHESTAT_IDX].enabled = 0; - clean_cachestat_pid_structures(); - freez(cachestat_pid); - } + char filename[FILENAME_MAX + 1]; + ebpf_pid_file(filename, FILENAME_MAX); + if (unlink(filename)) + error("Cannot remove PID file %s", filename); - if (ebpf_modules[EBPF_MODULE_DCSTAT_IDX].enabled) { - ebpf_modules[EBPF_MODULE_DCSTAT_IDX].enabled = 0; - clean_dcstat_pid_structures(); - freez(dcstat_pid); - } + exit(sig); +} - if (ebpf_modules[EBPF_MODULE_SWAP_IDX].enabled) { - ebpf_modules[EBPF_MODULE_SWAP_IDX].enabled = 0; - clean_swap_pid_structures(); - freez(swap_pid); - } +/** + * Unload loegacy code + * + * @param objects objects loaded from eBPF programs + * @param probe_links links from loader + */ +static void ebpf_unload_legacy_code(struct bpf_object *objects, struct bpf_link **probe_links) +{ + if (!probe_links || !objects) + return; - if (ebpf_modules[EBPF_MODULE_VFS_IDX].enabled) { - ebpf_modules[EBPF_MODULE_VFS_IDX].enabled = 0; - clean_vfs_pid_structures(); - freez(vfs_pid); + struct bpf_program *prog; + size_t j = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[j]); + j++; } + freez(probe_links); + if (objects) + bpf_object__close(objects); +} - if (ebpf_modules[EBPF_MODULE_FD_IDX].enabled) { - ebpf_modules[EBPF_MODULE_FD_IDX].enabled = 0; - clean_fd_pid_structures(); - freez(fd_pid); +int ebpf_exit_plugin = 0; +/** + * Close the collector gracefully + * + * @param sig is the signal number used to close the collector + */ +static void ebpf_stop_threads(int sig) +{ + ebpf_exit_plugin = 1; + int i; + for (i = 0; ebpf_threads[i].name != NULL; i++); + + usec_t max = 2 * USEC_PER_SEC, step = 100000; + while (i && max) { + max -= step; + sleep_usec(step); + i = 0; + int j; + for (j = 0; ebpf_threads[j].name != NULL; j++) { + if (ebpf_threads[j].enabled != NETDATA_MAIN_THREAD_EXITED) + i++; + } } - if (ebpf_modules[EBPF_MODULE_SHM_IDX].enabled) { - ebpf_modules[EBPF_MODULE_SHM_IDX].enabled = 0; - clean_shm_pid_structures(); - freez(shm_pid); + //Unload threads(except sync and filesystem) + for (i = 0; ebpf_threads[i].name != NULL; i++) { + if (ebpf_threads[i].enabled == NETDATA_MAIN_THREAD_EXITED && i != EBPF_MODULE_FILESYSTEM_IDX && + i != EBPF_MODULE_SYNC_IDX) + ebpf_unload_legacy_code(ebpf_modules[i].objects, ebpf_modules[i].probe_links); } - ebpf_close_cgroup_shm(); - - ebpf_clean_cgroup_pids(); - /* - int ret = fork(); - if (ret < 0) // error - error("Cannot fork(), so I won't be able to clean %skprobe_events", NETDATA_DEBUGFS); - else if (!ret) { // child - int i; - for (i = getdtablesize(); i >= 0; --i) - close(i); - - int fd = open("/dev/null", O_RDWR, 0); - if (fd != -1) { - dup2(fd, STDIN_FILENO); - dup2(fd, STDOUT_FILENO); - dup2(fd, STDERR_FILENO); - } - - if (fd > 2) - close(fd); - - int sid = setsid(); - if (sid >= 0) { - debug(D_EXIT, "Wait for father %d die", getpid()); - sleep_usec(200000); // Sleep 200 milliseconds to father dies. - clean_loaded_events(); - } else { - error("Cannot become session id leader, so I won't try to clean kprobe_events.\n"); + //Unload filesystem + if (ebpf_threads[EBPF_MODULE_FILESYSTEM_IDX].enabled == NETDATA_MAIN_THREAD_EXITED) { + for (i = 0; localfs[i].filesystem != NULL; i++) { + ebpf_unload_legacy_code(localfs[i].objects, localfs[i].probe_links); } - } else { // parent - exit(0); - } - */ - -#ifdef LIBBPF_MAJOR_VERSION - if (default_btf) { - btf__free(default_btf); - default_btf = NULL; } -#endif - if (!remove_pid) { - remove_pid = 1; - char filename[FILENAME_MAX + 1]; - ebpf_pid_file(filename, FILENAME_MAX); - if (unlink(filename)) - error("Cannot remove PID file %s", filename); + //Unload Sync + if (ebpf_threads[EBPF_MODULE_SYNC_IDX].enabled == NETDATA_MAIN_THREAD_EXITED) { + for (i = 0; local_syscalls[i].syscall != NULL; i++) { + ebpf_unload_legacy_code(local_syscalls[i].objects, local_syscalls[i].probe_links); + } } - exit(sig); + ebpf_exit(sig); } /***************************************************************** @@ -702,7 +801,7 @@ static inline void ebpf_enable_specific_chart(struct ebpf_module *em, int disabl // oomkill stores data inside apps submenu, so it always need to have apps_enabled for plugin to create // its chart, without this comparison eBPF.plugin will try to store invalid data when apps is disabled. if (!disable_apps || !strcmp(em->thread_name, "oomkill")) { - em->apps_charts = CONFIG_BOOLEAN_YES; + em->apps_charts = NETDATA_EBPF_APPS_FLAG_YES; } if (!disable_cgroup) { @@ -767,7 +866,7 @@ static inline void ebpf_disable_apps() { int i; for (i = 0; ebpf_modules[i].thread_name; i++) { - ebpf_modules[i].apps_charts = 0; + ebpf_modules[i].apps_charts = NETDATA_EBPF_APPS_FLAG_NO; } } @@ -1074,7 +1173,6 @@ int ebpf_start_pthread_variables() pthread_mutex_init(&collect_data_mutex, NULL); if (pthread_cond_init(&collect_data_cond_var, NULL)) { - thread_finished++; error("Cannot start conditional variable to control Apps charts."); return -1; } @@ -1094,7 +1192,7 @@ static inline uint32_t ebpf_am_i_collect_pids() uint32_t ret = 0; int i; for (i = 0; ebpf_modules[i].thread_name; i++) { - ret |= ebpf_modules[i].cgroup_charts | ebpf_modules[i].apps_charts; + ret |= ebpf_modules[i].cgroup_charts | (ebpf_modules[i].apps_charts & NETDATA_EBPF_APPS_FLAG_YES); } return ret; @@ -1718,7 +1816,6 @@ static void ebpf_parse_args(int argc, char **argv) &apps_groups_default_target, &apps_groups_root_target, ebpf_stock_config_dir, "groups")) { error("Cannot read process groups '%s/apps_groups.conf'. There are no internal defaults. Failing.", ebpf_stock_config_dir); - thread_finished++; ebpf_exit(1); } } else @@ -1875,6 +1972,19 @@ static void ebpf_manage_pid(pid_t pid) } /** + * Set start routine + * + * Set static routine before threads to be created. + */ + static void ebpf_set_static_routine() + { + int i; + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_threads[i].start_routine = ebpf_modules[i].start_routine; + } + } + +/** * Entry point * * @param argc the number of arguments @@ -1918,16 +2028,19 @@ int main(int argc, char **argv) return 4; } - signal(SIGINT, ebpf_exit); - signal(SIGTERM, ebpf_exit); - signal(SIGPIPE, ebpf_exit); + signal(SIGINT, ebpf_stop_threads); + signal(SIGQUIT, ebpf_stop_threads); + signal(SIGTERM, ebpf_stop_threads); + signal(SIGPIPE, ebpf_stop_threads); if (ebpf_start_pthread_variables()) { - thread_finished++; error("Cannot start mutex to control overall charts."); ebpf_exit(5); } + netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); + if(verify_netdata_host_prefix() == -1) ebpf_exit(6); + ebpf_allocate_common_vectors(); #ifdef LIBBPF_MAJOR_VERSION @@ -1940,44 +2053,7 @@ int main(int argc, char **argv) read_local_ports("/proc/net/udp", IPPROTO_UDP); read_local_ports("/proc/net/udp6", IPPROTO_UDP); - struct netdata_static_thread ebpf_threads[] = { - {"EBPF PROCESS", NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_PROCESS_IDX].start_routine}, - {"EBPF SOCKET" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_SOCKET_IDX].start_routine}, - {"EBPF CACHESTAT" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_CACHESTAT_IDX].start_routine}, - {"EBPF SYNC" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_SYNC_IDX].start_routine}, - {"EBPF DCSTAT" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_DCSTAT_IDX].start_routine}, - {"EBPF SWAP" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_SWAP_IDX].start_routine}, - {"EBPF VFS" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_VFS_IDX].start_routine}, - {"EBPF FILESYSTEM" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_FILESYSTEM_IDX].start_routine}, - {"EBPF DISK" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_DISK_IDX].start_routine}, - {"EBPF MOUNT" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_MOUNT_IDX].start_routine}, - {"EBPF FD" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_FD_IDX].start_routine}, - {"EBPF HARDIRQ" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_HARDIRQ_IDX].start_routine}, - {"EBPF SOFTIRQ" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_SOFTIRQ_IDX].start_routine}, - {"EBPF OOMKILL" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_OOMKILL_IDX].start_routine}, - {"EBPF SHM" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_SHM_IDX].start_routine}, - {"EBPF MDFLUSH" , NULL, NULL, 1, - NULL, NULL, ebpf_modules[EBPF_MODULE_MDFLUSH_IDX].start_routine}, - {NULL , NULL, NULL, 0, - NULL, NULL, NULL} - }; - - //clean_loaded_events(); + ebpf_set_static_routine(); int i; for (i = 0; ebpf_threads[i].name != NULL; i++) { @@ -1986,16 +2062,16 @@ int main(int argc, char **argv) ebpf_module_t *em = &ebpf_modules[i]; em->thread_id = i; - netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_JOINABLE, st->start_routine, em); + netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, em); } - for (i = 0; ebpf_threads[i].name != NULL; i++) { - struct netdata_static_thread *st = &ebpf_threads[i]; - netdata_thread_join(*st->thread, NULL); + usec_t step = 60 * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + //Plugin will be killed when it receives a signal + for (;;) { + (void)heartbeat_next(&hb, step); } - thread_finished++; - ebpf_exit(0); - return 0; } diff --git a/collectors/ebpf.plugin/ebpf.h b/collectors/ebpf.plugin/ebpf.h index 337e4f471..c23ca332d 100644 --- a/collectors/ebpf.plugin/ebpf.h +++ b/collectors/ebpf.plugin/ebpf.h @@ -108,6 +108,12 @@ typedef struct ebpf_tracepoint { char *event; } ebpf_tracepoint_t; +enum ebpf_threads_status { + NETDATA_THREAD_EBPF_RUNNING, + NETDATA_THREAD_EBPF_STOPPING, + NETDATA_THREAD_EBPF_STOPPED +}; + // Copied from musl header #ifndef offsetof #if __GNUC__ > 3 @@ -162,7 +168,6 @@ extern void *ebpf_socket_thread(void *ptr); // Common variables extern pthread_mutex_t lock; -extern int close_ebpf_plugin; extern int ebpf_nprocs; extern int running_on_kernel; extern int isrh; @@ -265,7 +270,6 @@ extern int shm_fd_ebpf_cgroup; extern sem_t *shm_sem_ebpf_cgroup; extern pthread_mutex_t mutex_cgroup_shm; extern size_t all_pids_count; -extern uint32_t finalized_threads; extern ebpf_plugin_stats_t plugin_statistics; extern struct btf *default_btf; @@ -281,6 +285,9 @@ extern void ebpf_write_chart_obsolete(char *type, char *id, char *title, char *u char *charttype, char *context, int order, int update_every); extern void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, char **dimensions, uint32_t end); void ebpf_update_disabled_plugin_stats(ebpf_module_t *em); +extern ebpf_filesystem_partitions_t localfs[]; +extern ebpf_sync_syscalls_t local_syscalls[]; +extern int ebpf_exit_plugin; #define EBPF_MAX_SYNCHRONIZATION_TIME 300 diff --git a/collectors/ebpf.plugin/ebpf_apps.c b/collectors/ebpf.plugin/ebpf_apps.c index 2c65db8d1..7519e0640 100644 --- a/collectors/ebpf.plugin/ebpf_apps.c +++ b/collectors/ebpf.plugin/ebpf_apps.c @@ -134,7 +134,7 @@ size_t zero_all_targets(struct target *root) while (pid_on_target) { struct pid_on_target *pid_on_target_to_free = pid_on_target; pid_on_target = pid_on_target->next; - free(pid_on_target_to_free); + freez(pid_on_target_to_free); } w->root_pid = NULL; @@ -1119,7 +1119,7 @@ void collect_data_for_all_processes(int tbl_pid_stats_fd) key = pids->pid; ebpf_process_stat_t *w = global_process_stats[key]; if (!w) { - w = mallocz(sizeof(ebpf_process_stat_t)); + w = callocz(1, sizeof(ebpf_process_stat_t)); global_process_stats[key] = w; } diff --git a/collectors/ebpf.plugin/ebpf_apps.h b/collectors/ebpf.plugin/ebpf_apps.h index 259e642ad..f65a137b5 100644 --- a/collectors/ebpf.plugin/ebpf_apps.h +++ b/collectors/ebpf.plugin/ebpf_apps.h @@ -433,8 +433,6 @@ extern size_t read_bandwidth_statistic_using_pid_on_target(ebpf_bandwidth_t **ep extern void collect_data_for_all_processes(int tbl_pid_stats_fd); -extern void clean_global_memory(); - extern ebpf_process_stat_t **global_process_stats; extern ebpf_process_publish_apps_t **current_apps_data; extern netdata_publish_cachestat_t **cachestat_pid; diff --git a/collectors/ebpf.plugin/ebpf_cachestat.c b/collectors/ebpf.plugin/ebpf_cachestat.c index b565f635f..14669bf68 100644 --- a/collectors/ebpf.plugin/ebpf_cachestat.c +++ b/collectors/ebpf.plugin/ebpf_cachestat.c @@ -5,9 +5,6 @@ netdata_publish_cachestat_t **cachestat_pid; -static struct bpf_link **probe_links = NULL; -static struct bpf_object *objects = NULL; - static char *cachestat_counter_dimension_name[NETDATA_CACHESTAT_END] = { "ratio", "dirty", "hit", "miss" }; static netdata_syscall_stat_t cachestat_counter_aggregated_data[NETDATA_CACHESTAT_END]; @@ -18,8 +15,6 @@ netdata_cachestat_pid_t *cachestat_vector = NULL; static netdata_idx_t cachestat_hash_values[NETDATA_CACHESTAT_END]; static netdata_idx_t *cachestat_values = NULL; -static int read_thread_closed = 1; - struct netdata_static_thread cachestat_threads = {"CACHESTAT KERNEL", NULL, NULL, 1, NULL, NULL, NULL}; @@ -44,6 +39,7 @@ struct config cachestat_config = { .first_section = NULL, .mutex = NETDATA_MUTEX_INITIALIZER, .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, .rwlock = AVL_LOCK_INITIALIZER } }; +static enum ebpf_threads_status ebpf_cachestat_exited = NETDATA_THREAD_EBPF_RUNNING; netdata_ebpf_targets_t cachestat_targets[] = { {.name = "add_to_page_cache_lru", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = "mark_page_accessed", .mode = EBPF_LOAD_TRAMPOLINE}, @@ -294,56 +290,48 @@ static inline int ebpf_cachestat_load_and_attach(struct cachestat_bpf *obj, ebpf *****************************************************************/ /** - * Clean PID structures + * Cachestat exit. * - * Clean the allocated structures. + * Cancel child and exit. + * + * @param ptr thread data. */ -void clean_cachestat_pid_structures() { - struct pid_stat *pids = root_of_pids; - while (pids) { - freez(cachestat_pid[pids->pid]); - - pids = pids->next; +static void ebpf_cachestat_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; + return; } + + ebpf_cachestat_exited = NETDATA_THREAD_EBPF_STOPPING; } /** - * Clean up the main thread. + * Cachestat cleanup + * + * Clean up allocated addresses. * * @param ptr thread data. */ static void ebpf_cachestat_cleanup(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (!em->enabled) + if (ebpf_cachestat_exited != NETDATA_THREAD_EBPF_STOPPED) return; - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 2*USEC_PER_MS; - while (!read_thread_closed) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); - } - ebpf_cleanup_publish_syscall(cachestat_counter_publish_aggregated); freez(cachestat_vector); freez(cachestat_values); + freez(cachestat_threads.thread); - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); - } #ifdef LIBBPF_MAJOR_VERSION - else if (bpf_obj) + if (bpf_obj) cachestat_bpf__destroy(bpf_obj); #endif + cachestat_threads.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; } /***************************************************************** @@ -367,23 +355,23 @@ void cachestat_update_publish(netdata_publish_cachestat_t *out, uint64_t mpa, ui uint64_t apcl, uint64_t apd) { // Adapted algorithm from https://github.com/iovisor/bcc/blob/master/tools/cachestat.py#L126-L138 - calculated_number total = (calculated_number) (((long long)mpa) - ((long long)mbd)); + NETDATA_DOUBLE total = (NETDATA_DOUBLE) (((long long)mpa) - ((long long)mbd)); if (total < 0) total = 0; - calculated_number misses = (calculated_number) ( ((long long) apcl) - ((long long) apd) ); + NETDATA_DOUBLE misses = (NETDATA_DOUBLE) ( ((long long) apcl) - ((long long) apd) ); if (misses < 0) misses = 0; // If hits are < 0, then its possible misses are overestimate due to possibly page cache read ahead adding // more pages than needed. In this case just assume misses as total and reset hits. - calculated_number hits = total - misses; + NETDATA_DOUBLE hits = total - misses; if (hits < 0 ) { misses = total; hits = 0; } - calculated_number ratio = (total > 0) ? hits/total : 1; + NETDATA_DOUBLE ratio = (total > 0) ? hits/total : 1; out->ratio = (long long )(ratio*100); out->hit = (long long)hits; @@ -607,6 +595,8 @@ void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *ptr) 20093, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT); + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } /***************************************************************** @@ -652,22 +642,25 @@ static void read_global_table() */ void *ebpf_cachestat_read_hash(void *ptr) { - read_thread_closed = 0; - + netdata_thread_cleanup_push(ebpf_cachestat_cleanup, ptr); heartbeat_t hb; heartbeat_init(&hb); ebpf_module_t *em = (ebpf_module_t *)ptr; usec_t step = NETDATA_LATENCY_CACHESTAT_SLEEP_MS * em->update_every; - while (!close_ebpf_plugin) { + while (ebpf_cachestat_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); (void)dt; + if (ebpf_cachestat_exited == NETDATA_THREAD_EBPF_STOPPING) + break; read_global_table(); } - read_thread_closed = 1; + ebpf_cachestat_exited = NETDATA_THREAD_EBPF_STOPPED; + + netdata_thread_cleanup_pop(1); return NULL; } @@ -892,7 +885,7 @@ static int ebpf_send_systemd_cachestat_charts() for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_cachestat.ratio); - } else + } else if (unlikely(ect->systemd)) ret = 0; } write_end_chart(); @@ -1081,43 +1074,44 @@ void ebpf_cachestat_send_cgroup_data(int update_every) */ static void cachestat_collector(ebpf_module_t *em) { - cachestat_threads.thread = mallocz(sizeof(netdata_thread_t)); + cachestat_threads.thread = callocz(1, sizeof(netdata_thread_t)); cachestat_threads.start_routine = ebpf_cachestat_read_hash; - netdata_thread_create(cachestat_threads.thread, cachestat_threads.name, NETDATA_THREAD_OPTION_JOINABLE, + netdata_thread_create(cachestat_threads.thread, cachestat_threads.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_cachestat_read_hash, em); netdata_publish_cachestat_t publish; memset(&publish, 0, sizeof(publish)); - int apps = em->apps_charts; int cgroups = em->cgroup_charts; int update_every = em->update_every; - int counter = update_every - 1; - while (!close_ebpf_plugin) { + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + //This will be cancelled by its parent + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + netdata_apps_integration_flags_t apps = em->apps_charts; pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - - if (++counter == update_every) { - counter = 0; - if (apps) - read_apps_table(); + if (apps) + read_apps_table(); - if (cgroups) - ebpf_update_cachestat_cgroup(); + if (cgroups) + ebpf_update_cachestat_cgroup(); - pthread_mutex_lock(&lock); + pthread_mutex_lock(&lock); - cachestat_send_global(&publish); + cachestat_send_global(&publish); - if (apps) - ebpf_cache_send_apps_data(apps_groups_root_target); + if (apps & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + ebpf_cache_send_apps_data(apps_groups_root_target); - if (cgroups) - ebpf_cachestat_send_cgroup_data(update_every); - - pthread_mutex_unlock(&lock); - } + if (cgroups) + ebpf_cachestat_send_cgroup_data(update_every); + pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); } } @@ -1234,8 +1228,8 @@ static int ebpf_cachestat_load_bpf(ebpf_module_t *em) { int ret = 0; if (em->load == EBPF_LOAD_LEGACY) { - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { ret = -1; } } @@ -1266,7 +1260,7 @@ static int ebpf_cachestat_load_bpf(ebpf_module_t *em) */ void *ebpf_cachestat_thread(void *ptr) { - netdata_thread_cleanup_push(ebpf_cachestat_cleanup, ptr); + netdata_thread_cleanup_push(ebpf_cachestat_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = cachestat_maps; diff --git a/collectors/ebpf.plugin/ebpf_cachestat.h b/collectors/ebpf.plugin/ebpf_cachestat.h index b386e383c..fdd88464a 100644 --- a/collectors/ebpf.plugin/ebpf_cachestat.h +++ b/collectors/ebpf.plugin/ebpf_cachestat.h @@ -82,7 +82,6 @@ typedef struct netdata_publish_cachestat { } netdata_publish_cachestat_t; extern void *ebpf_cachestat_thread(void *ptr); -extern void clean_cachestat_pid_structures(); extern struct config cachestat_config; extern netdata_ebpf_targets_t cachestat_targets[]; diff --git a/collectors/ebpf.plugin/ebpf_cgroup.c b/collectors/ebpf.plugin/ebpf_cgroup.c index e6b483baf..24469c642 100644 --- a/collectors/ebpf.plugin/ebpf_cgroup.c +++ b/collectors/ebpf.plugin/ebpf_cgroup.c @@ -134,26 +134,6 @@ static inline void ebpf_clean_specific_cgroup_pids(struct pid_on_target2 *pt) } /** - * Cleanup link list - */ -void ebpf_clean_cgroup_pids() -{ - if (!ebpf_cgroup_pids) - return; - - ebpf_cgroup_target_t *ect = ebpf_cgroup_pids; - while (ect) { - ebpf_cgroup_target_t *next_cgroup = ect->next; - - ebpf_clean_specific_cgroup_pids(ect->pids); - freez(ect); - - ect = next_cgroup; - } - ebpf_cgroup_pids = NULL; -} - -/** * Remove Cgroup Update Target Update List * * Remove from cgroup target and update the link list diff --git a/collectors/ebpf.plugin/ebpf_cgroup.h b/collectors/ebpf.plugin/ebpf_cgroup.h index 03969194a..cca9a950d 100644 --- a/collectors/ebpf.plugin/ebpf_cgroup.h +++ b/collectors/ebpf.plugin/ebpf_cgroup.h @@ -63,7 +63,6 @@ typedef struct ebpf_cgroup_target { extern void ebpf_map_cgroup_shared_memory(); extern void ebpf_parse_cgroup_shm_data(); extern void ebpf_close_cgroup_shm(); -extern void ebpf_clean_cgroup_pids(); extern void ebpf_create_charts_on_systemd(char *id, char *title, char *units, char *family, char *charttype, int order, char *algorithm, char *context, char *module, int update_every); diff --git a/collectors/ebpf.plugin/ebpf_dcstat.c b/collectors/ebpf.plugin/ebpf_dcstat.c index 619d8520b..8cf063ca1 100644 --- a/collectors/ebpf.plugin/ebpf_dcstat.c +++ b/collectors/ebpf.plugin/ebpf_dcstat.c @@ -10,14 +10,9 @@ static netdata_publish_syscall_t dcstat_counter_publish_aggregated[NETDATA_DCSTA netdata_dcstat_pid_t *dcstat_vector = NULL; netdata_publish_dcstat_t **dcstat_pid = NULL; -static struct bpf_link **probe_links = NULL; -static struct bpf_object *objects = NULL; - static netdata_idx_t dcstat_hash_values[NETDATA_DCSTAT_IDX_END]; static netdata_idx_t *dcstat_values = NULL; -static int read_thread_closed = 1; - struct config dcstat_config = { .first_section = NULL, .last_section = NULL, .mutex = NETDATA_MUTEX_INITIALIZER, @@ -27,6 +22,7 @@ struct config dcstat_config = { .first_section = NULL, struct netdata_static_thread dcstat_threads = {"DCSTAT KERNEL", NULL, NULL, 1, NULL, NULL, NULL}; +static enum ebpf_threads_status ebpf_dcstat_exited = NETDATA_THREAD_EBPF_RUNNING; static ebpf_local_maps_t dcstat_maps[] = {{.name = "dcstat_global", .internal_input = NETDATA_DIRECTORY_CACHE_END, .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, @@ -239,8 +235,8 @@ static inline int ebpf_dc_load_and_attach(struct dc_bpf *obj, ebpf_module_t *em) */ void dcstat_update_publish(netdata_publish_dcstat_t *out, uint64_t cache_access, uint64_t not_found) { - calculated_number successful_access = (calculated_number) (((long long)cache_access) - ((long long)not_found)); - calculated_number ratio = (cache_access) ? successful_access/(calculated_number)cache_access : 0; + NETDATA_DOUBLE successful_access = (NETDATA_DOUBLE) (((long long)cache_access) - ((long long)not_found)); + NETDATA_DOUBLE ratio = (cache_access) ? successful_access/(NETDATA_DOUBLE)cache_access : 0; out->ratio = (long long )(ratio*100); } @@ -252,20 +248,6 @@ void dcstat_update_publish(netdata_publish_dcstat_t *out, uint64_t cache_access, *****************************************************************/ /** - * Clean PID structures - * - * Clean the allocated structures. - */ -void clean_dcstat_pid_structures() { - struct pid_stat *pids = root_of_pids; - while (pids) { - freez(dcstat_pid[pids->pid]); - - pids = pids->next; - } -} - -/** * Clean names * * Clean the optional names allocated during startup. @@ -280,6 +262,24 @@ void ebpf_dcstat_clean_names() } /** + * DCstat exit + * + * Cancel child and exit. + * + * @param ptr thread data. + */ +static void ebpf_dcstat_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; + return; + } + + ebpf_dcstat_exited = NETDATA_THREAD_EBPF_STOPPING; +} + +/** * Clean up the main thread. * * @param ptr thread data. @@ -287,37 +287,24 @@ void ebpf_dcstat_clean_names() static void ebpf_dcstat_cleanup(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (!em->enabled) + if (ebpf_dcstat_exited != NETDATA_THREAD_EBPF_STOPPED) return; - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 2 * USEC_PER_MS; - while (!read_thread_closed) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); - } - freez(dcstat_vector); freez(dcstat_values); + freez(dcstat_threads.thread); ebpf_cleanup_publish_syscall(dcstat_counter_publish_aggregated); ebpf_dcstat_clean_names(); - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); - } #ifdef LIBBPF_MAJOR_VERSION - else if (bpf_obj) + if (bpf_obj) dc_bpf__destroy(bpf_obj); #endif + + dcstat_threads.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; } /***************************************************************** @@ -371,6 +358,8 @@ void ebpf_dcstat_create_apps_charts(struct ebpf_module *em, void *ptr) 20103, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT); + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } /***************************************************************** @@ -535,22 +524,25 @@ static void read_global_table() */ void *ebpf_dcstat_read_hash(void *ptr) { - read_thread_closed = 0; - + netdata_thread_cleanup_push(ebpf_dcstat_cleanup, ptr); heartbeat_t hb; heartbeat_init(&hb); ebpf_module_t *em = (ebpf_module_t *)ptr; usec_t step = NETDATA_LATENCY_DCSTAT_SLEEP_MS * em->update_every; - while (!close_ebpf_plugin) { + while (ebpf_dcstat_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); (void)dt; + if (ebpf_dcstat_exited == NETDATA_THREAD_EBPF_STOPPING) + break; read_global_table(); } - read_thread_closed = 1; + ebpf_dcstat_exited = NETDATA_THREAD_EBPF_STOPPED; + + netdata_thread_cleanup_pop(1); return NULL; } @@ -881,7 +873,7 @@ static int ebpf_send_systemd_dc_charts() for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long) ect->publish_dc.ratio); - } else + } else if (unlikely(ect->systemd)) ret = 0; } write_end_chart(); @@ -1013,40 +1005,40 @@ static void dcstat_collector(ebpf_module_t *em) dcstat_threads.thread = mallocz(sizeof(netdata_thread_t)); dcstat_threads.start_routine = ebpf_dcstat_read_hash; - netdata_thread_create(dcstat_threads.thread, dcstat_threads.name, NETDATA_THREAD_OPTION_JOINABLE, + netdata_thread_create(dcstat_threads.thread, dcstat_threads.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_dcstat_read_hash, em); netdata_publish_dcstat_t publish; memset(&publish, 0, sizeof(publish)); - int apps = em->apps_charts; int cgroups = em->cgroup_charts; int update_every = em->update_every; - int counter = update_every - 1; - while (!close_ebpf_plugin) { - pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - - if (++counter == update_every) { - counter = 0; - if (apps) - read_apps_table(); + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; - if (cgroups) - ebpf_update_dc_cgroup(); + netdata_apps_integration_flags_t apps = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); + if (apps) + read_apps_table(); - pthread_mutex_lock(&lock); + if (cgroups) + ebpf_update_dc_cgroup(); - dcstat_send_global(&publish); + pthread_mutex_lock(&lock); - if (apps) - ebpf_dcache_send_apps_data(apps_groups_root_target); + dcstat_send_global(&publish); - if (cgroups) - ebpf_dc_send_cgroup_data(update_every); + if (apps & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + ebpf_dcache_send_apps_data(apps_groups_root_target); - pthread_mutex_unlock(&lock); - } + if (cgroups) + ebpf_dc_send_cgroup_data(update_every); + pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); } } @@ -1125,8 +1117,8 @@ static int ebpf_dcstat_load_bpf(ebpf_module_t *em) { int ret = 0; if (em->load == EBPF_LOAD_LEGACY) { - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { ret = -1; } } @@ -1157,7 +1149,7 @@ static int ebpf_dcstat_load_bpf(ebpf_module_t *em) */ void *ebpf_dcstat_thread(void *ptr) { - netdata_thread_cleanup_push(ebpf_dcstat_cleanup, ptr); + netdata_thread_cleanup_push(ebpf_dcstat_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = dcstat_maps; diff --git a/collectors/ebpf.plugin/ebpf_dcstat.h b/collectors/ebpf.plugin/ebpf_dcstat.h index 940864737..5c4a80cd7 100644 --- a/collectors/ebpf.plugin/ebpf_dcstat.h +++ b/collectors/ebpf.plugin/ebpf_dcstat.h @@ -77,7 +77,6 @@ typedef struct netdata_publish_dcstat { extern void *ebpf_dcstat_thread(void *ptr); extern void ebpf_dcstat_create_apps_charts(struct ebpf_module *em, void *ptr); -extern void clean_dcstat_pid_structures(); extern struct config dcstat_config; extern netdata_ebpf_targets_t dc_targets[]; diff --git a/collectors/ebpf.plugin/ebpf_disk.c b/collectors/ebpf.plugin/ebpf_disk.c index 3ddf50b93..96b1705ce 100644 --- a/collectors/ebpf.plugin/ebpf_disk.c +++ b/collectors/ebpf.plugin/ebpf_disk.c @@ -25,9 +25,6 @@ char *tracepoint_block_type = { "block"} ; char *tracepoint_block_issue = { "block_rq_issue" }; char *tracepoint_block_rq_complete = { "block_rq_complete" }; -static struct bpf_link **probe_links = NULL; -static struct bpf_object *objects = NULL; - static int was_block_issue_enabled = 0; static int was_block_rq_complete_enabled = 0; @@ -35,12 +32,11 @@ static char **dimensions = NULL; static netdata_syscall_stat_t disk_aggregated_data[NETDATA_EBPF_HIST_MAX_BINS]; static netdata_publish_syscall_t disk_publish_aggregated[NETDATA_EBPF_HIST_MAX_BINS]; -static int read_thread_closed = 1; - static netdata_idx_t *disk_hash_values = NULL; static struct netdata_static_thread disk_threads = {"DISK KERNEL", NULL, NULL, 1, NULL, NULL, NULL }; +static enum ebpf_threads_status ebpf_disk_exited = NETDATA_THREAD_EBPF_RUNNING; ebpf_publish_disk_t *plot_disks = NULL; pthread_mutex_t plot_mutex; @@ -428,25 +424,37 @@ static void ebpf_cleanup_disk_list() } /** - * Clean up the main thread. + * Disk exit. + * + * Cancel child and exit. * * @param ptr thread data. */ -static void ebpf_disk_cleanup(void *ptr) +static void ebpf_disk_exit(void *ptr) { - ebpf_disk_disable_tracepoints(); + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; + return; + } + ebpf_disk_exited = NETDATA_THREAD_EBPF_STOPPING; +} + +/** + * Disk Cleanup + * + * Clean up allocated memory. + * + * @param ptr thread data. + */ +static void ebpf_disk_cleanup(void *ptr) +{ ebpf_module_t *em = (ebpf_module_t *)ptr; - if (!em->enabled) + if (ebpf_disk_exited != NETDATA_THREAD_EBPF_STOPPED) return; - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 2 * USEC_PER_MS; - while (!read_thread_closed) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); - } + ebpf_disk_disable_tracepoints(); if (dimensions) ebpf_histogram_dimension_cleanup(dimensions, NETDATA_EBPF_HIST_MAX_BINS); @@ -458,15 +466,8 @@ static void ebpf_disk_cleanup(void *ptr) ebpf_cleanup_plot_disks(); ebpf_cleanup_disk_list(); - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); - } + disk_threads.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; } /***************************************************************** @@ -582,19 +583,25 @@ static void read_hard_disk_tables(int table) */ void *ebpf_disk_read_hash(void *ptr) { + netdata_thread_cleanup_push(ebpf_disk_cleanup, ptr); heartbeat_t hb; heartbeat_init(&hb); ebpf_module_t *em = (ebpf_module_t *)ptr; usec_t step = NETDATA_LATENCY_DISK_SLEEP_MS * em->update_every; - while (!close_ebpf_plugin) { + while (ebpf_disk_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); (void)dt; + if (ebpf_disk_exited == NETDATA_THREAD_EBPF_STOPPING) + break; read_hard_disk_tables(disk_maps[NETDATA_DISK_READ].map_fd); } + ebpf_disk_exited = NETDATA_THREAD_EBPF_STOPPED; + + netdata_thread_cleanup_pop(1); return NULL; } @@ -724,30 +731,26 @@ static void disk_collector(ebpf_module_t *em) disk_threads.thread = mallocz(sizeof(netdata_thread_t)); disk_threads.start_routine = ebpf_disk_read_hash; - netdata_thread_create(disk_threads.thread, disk_threads.name, NETDATA_THREAD_OPTION_JOINABLE, + netdata_thread_create(disk_threads.thread, disk_threads.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_disk_read_hash, em); int update_every = em->update_every; - int counter = update_every - 1; - read_thread_closed = 0; - while (!close_ebpf_plugin) { - pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - - if (++counter == update_every) { - counter = 0; - pthread_mutex_lock(&lock); - ebpf_remove_pointer_from_plot_disk(em); - ebpf_latency_send_hd_data(update_every); - - pthread_mutex_unlock(&lock); - } + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; - pthread_mutex_unlock(&collect_data_mutex); + pthread_mutex_lock(&lock); + ebpf_remove_pointer_from_plot_disk(em); + ebpf_latency_send_hd_data(update_every); + + pthread_mutex_unlock(&lock); ebpf_update_disks(em); } - read_thread_closed = 1; } /***************************************************************** @@ -797,7 +800,7 @@ static int ebpf_disk_enable_tracepoints() */ void *ebpf_disk_thread(void *ptr) { - netdata_thread_cleanup_push(ebpf_disk_cleanup, ptr); + netdata_thread_cleanup_push(ebpf_disk_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = disk_maps; @@ -822,8 +825,8 @@ void *ebpf_disk_thread(void *ptr) goto enddisk; } - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { em->enabled = 0; goto enddisk; } diff --git a/collectors/ebpf.plugin/ebpf_fd.c b/collectors/ebpf.plugin/ebpf_fd.c index 10a50c4eb..b4e577dad 100644 --- a/collectors/ebpf.plugin/ebpf_fd.c +++ b/collectors/ebpf.plugin/ebpf_fd.c @@ -29,12 +29,9 @@ struct config fd_config = { .first_section = NULL, .last_section = NULL, .mutex .index = {.avl_tree = { .root = NULL, .compar = appconfig_section_compare }, .rwlock = AVL_LOCK_INITIALIZER } }; -static struct bpf_link **probe_links = NULL; -static struct bpf_object *objects = NULL; - struct netdata_static_thread fd_thread = {"FD KERNEL", NULL, NULL, 1, NULL, NULL, NULL}; -static int read_thread_closed = 1; +static enum ebpf_threads_status ebpf_fd_exited = NETDATA_THREAD_EBPF_RUNNING; static netdata_idx_t fd_hash_values[NETDATA_FD_COUNTER]; static netdata_idx_t *fd_values = NULL; @@ -48,17 +45,21 @@ netdata_fd_stat_t **fd_pid = NULL; *****************************************************************/ /** - * Clean PID structures + * FD Exit + * + * Cancel child thread and exit. * - * Clean the allocated structures. + * @param ptr thread data. */ -void clean_fd_pid_structures() { - struct pid_stat *pids = root_of_pids; - while (pids) { - freez(fd_pid[pids->pid]); - - pids = pids->next; +static void ebpf_fd_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; + return; } + + ebpf_fd_exited = NETDATA_THREAD_EBPF_STOPPING; } /** @@ -69,31 +70,16 @@ void clean_fd_pid_structures() { static void ebpf_fd_cleanup(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (!em->enabled) + if (ebpf_fd_exited != NETDATA_THREAD_EBPF_STOPPED) return; - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 2 * USEC_PER_MS; - while (!read_thread_closed) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); - } - ebpf_cleanup_publish_syscall(fd_publish_aggregated); freez(fd_thread.thread); freez(fd_values); freez(fd_vector); - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); - } + fd_thread.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; } /***************************************************************** @@ -161,21 +147,24 @@ static void read_global_table() */ void *ebpf_fd_read_hash(void *ptr) { - read_thread_closed = 0; - + netdata_thread_cleanup_push(ebpf_fd_cleanup, ptr); heartbeat_t hb; heartbeat_init(&hb); ebpf_module_t *em = (ebpf_module_t *)ptr; usec_t step = NETDATA_FD_SLEEP_MS * em->update_every; - while (!close_ebpf_plugin) { + while (ebpf_fd_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); (void)dt; + if (ebpf_fd_exited == NETDATA_THREAD_EBPF_STOPPING) + break; read_global_table(); } - read_thread_closed = 1; + ebpf_fd_exited = NETDATA_THREAD_EBPF_STOPPED; + + netdata_thread_cleanup_pop(1); return NULL; } @@ -572,7 +561,7 @@ static int ebpf_send_systemd_fd_charts(ebpf_module_t *em) for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_fd.open_call); - } else + } else if (unlikely(ect->systemd)) ret = 0; } write_end_chart(); @@ -665,38 +654,37 @@ static void fd_collector(ebpf_module_t *em) fd_thread.thread = mallocz(sizeof(netdata_thread_t)); fd_thread.start_routine = ebpf_fd_read_hash; - netdata_thread_create(fd_thread.thread, fd_thread.name, NETDATA_THREAD_OPTION_JOINABLE, + netdata_thread_create(fd_thread.thread, fd_thread.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_fd_read_hash, em); - int apps = em->apps_charts; int cgroups = em->cgroup_charts; - int update_every = em->update_every; - int counter = update_every - 1; - while (!close_ebpf_plugin) { - pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; - if (++counter == update_every) { - counter = 0; - if (apps) - read_apps_table(); + netdata_apps_integration_flags_t apps = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); + if (apps) + read_apps_table(); - if (cgroups) - ebpf_update_fd_cgroup(); + if (cgroups) + ebpf_update_fd_cgroup(); - pthread_mutex_lock(&lock); + pthread_mutex_lock(&lock); - ebpf_fd_send_data(em); + ebpf_fd_send_data(em); - if (apps) - ebpf_fd_send_apps_data(em, apps_groups_root_target); + if (apps & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + ebpf_fd_send_apps_data(em, apps_groups_root_target); - if (cgroups) - ebpf_fd_send_cgroup_data(em); - - pthread_mutex_unlock(&lock); - } + if (cgroups) + ebpf_fd_send_cgroup_data(em); + pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); } } @@ -756,6 +744,8 @@ void ebpf_fd_create_apps_charts(struct ebpf_module *em, void *ptr) ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); } + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } /** @@ -831,7 +821,7 @@ static void ebpf_fd_allocate_global_vectors(int apps) */ void *ebpf_fd_thread(void *ptr) { - netdata_thread_cleanup_push(ebpf_fd_cleanup, ptr); + netdata_thread_cleanup_push(ebpf_fd_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = fd_maps; @@ -841,8 +831,8 @@ void *ebpf_fd_thread(void *ptr) ebpf_fd_allocate_global_vectors(em->apps_charts); - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { em->enabled = CONFIG_BOOLEAN_NO; goto endfd; } diff --git a/collectors/ebpf.plugin/ebpf_fd.h b/collectors/ebpf.plugin/ebpf_fd.h index 851e040e5..8742558df 100644 --- a/collectors/ebpf.plugin/ebpf_fd.h +++ b/collectors/ebpf.plugin/ebpf_fd.h @@ -79,7 +79,6 @@ extern void *ebpf_fd_thread(void *ptr); extern void ebpf_fd_create_apps_charts(struct ebpf_module *em, void *ptr); extern struct config fd_config; extern netdata_fd_stat_t **fd_pid; -extern void clean_fd_pid_structures(); #endif /* NETDATA_EBPF_FD_H */ diff --git a/collectors/ebpf.plugin/ebpf_filesystem.c b/collectors/ebpf.plugin/ebpf_filesystem.c index 415a42dbc..bc767fbc9 100644 --- a/collectors/ebpf.plugin/ebpf_filesystem.c +++ b/collectors/ebpf.plugin/ebpf_filesystem.c @@ -30,67 +30,11 @@ static ebpf_local_maps_t fs_maps[] = {{.name = "tbl_ext4", .internal_input = NET .type = NETDATA_EBPF_MAP_CONTROLLER, .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; -ebpf_filesystem_partitions_t localfs[] = - {{.filesystem = "ext4", - .optional_filesystem = NULL, - .family = "ext4", - .objects = NULL, - .probe_links = NULL, - .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, - .enabled = CONFIG_BOOLEAN_YES, - .addresses = {.function = NULL, .addr = 0}, - .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, - {.filesystem = "xfs", - .optional_filesystem = NULL, - .family = "xfs", - .objects = NULL, - .probe_links = NULL, - .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, - .enabled = CONFIG_BOOLEAN_YES, - .addresses = {.function = NULL, .addr = 0}, - .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, - {.filesystem = "nfs", - .optional_filesystem = "nfs4", - .family = "nfs", - .objects = NULL, - .probe_links = NULL, - .flags = NETDATA_FILESYSTEM_ATTR_CHARTS, - .enabled = CONFIG_BOOLEAN_YES, - .addresses = {.function = NULL, .addr = 0}, - .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, - {.filesystem = "zfs", - .optional_filesystem = NULL, - .family = "zfs", - .objects = NULL, - .probe_links = NULL, - .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, - .enabled = CONFIG_BOOLEAN_YES, - .addresses = {.function = NULL, .addr = 0}, - .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4}, - {.filesystem = "btrfs", - .optional_filesystem = NULL, - .family = "btrfs", - .objects = NULL, - .probe_links = NULL, - .flags = NETDATA_FILESYSTEM_FILL_ADDRESS_TABLE, - .enabled = CONFIG_BOOLEAN_YES, - .addresses = {.function = "btrfs_file_operations", .addr = 0}, - .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10}, - {.filesystem = NULL, - .optional_filesystem = NULL, - .family = NULL, - .objects = NULL, - .probe_links = NULL, - .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, - .enabled = CONFIG_BOOLEAN_YES, - .addresses = {.function = NULL, .addr = 0}, - .kernels = 0}}; - struct netdata_static_thread filesystem_threads = {"EBPF FS READ", NULL, NULL, 1, NULL, NULL, NULL }; +static enum ebpf_threads_status ebpf_fs_exited = NETDATA_THREAD_EBPF_RUNNING; -static int read_thread_closed = 1; static netdata_syscall_stat_t filesystem_aggregated_data[NETDATA_EBPF_HIST_MAX_BINS]; static netdata_publish_syscall_t filesystem_publish_aggregated[NETDATA_EBPF_HIST_MAX_BINS]; @@ -381,30 +325,44 @@ void ebpf_filesystem_cleanup_ebpf_data() bpf_link__destroy(probe_links[j]); j++; } - bpf_object__close(efp->objects); + freez(probe_links); + if (efp->objects) + bpf_object__close(efp->objects); } } } /** - * Clean up the main thread. + * Filesystem exit + * + * Cancel child thread. * * @param ptr thread data. */ -static void ebpf_filesystem_cleanup(void *ptr) +static void ebpf_filesystem_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (!em->enabled) + if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; return; - - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 2*USEC_PER_MS; - while (!read_thread_closed) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); } + ebpf_fs_exited = NETDATA_THREAD_EBPF_STOPPING; +} + +/** + * File system cleanup + * + * Clean up allocated thread. + * + * @param ptr thread data. + */ +static void ebpf_filesystem_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (ebpf_fs_exited != NETDATA_THREAD_EBPF_STOPPED) + return; + freez(filesystem_threads.thread); ebpf_cleanup_publish_syscall(filesystem_publish_aggregated); @@ -412,6 +370,9 @@ static void ebpf_filesystem_cleanup(void *ptr) if (dimensions) ebpf_histogram_dimension_cleanup(dimensions, NETDATA_EBPF_HIST_MAX_BINS); freez(filesystem_hash_values); + + filesystem_threads.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; } /***************************************************************** @@ -515,16 +476,18 @@ static void read_filesystem_tables() */ void *ebpf_filesystem_read_hash(void *ptr) { + netdata_thread_cleanup_push(ebpf_filesystem_cleanup, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; - read_thread_closed = 0; heartbeat_t hb; heartbeat_init(&hb); usec_t step = NETDATA_FILESYSTEM_READ_SLEEP_MS * em->update_every; int update_every = em->update_every; - while (!close_ebpf_plugin) { + while (ebpf_fs_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); (void)dt; + if (ebpf_fs_exited == NETDATA_THREAD_EBPF_STOPPING) + break; (void) ebpf_update_partitions(em); ebpf_obsolete_fs_charts(update_every); @@ -536,7 +499,9 @@ void *ebpf_filesystem_read_hash(void *ptr) read_filesystem_tables(); } - read_thread_closed = 1; + ebpf_fs_exited = NETDATA_THREAD_EBPF_STOPPED; + + netdata_thread_cleanup_pop(1); return NULL; } @@ -578,25 +543,23 @@ static void filesystem_collector(ebpf_module_t *em) filesystem_threads.start_routine = ebpf_filesystem_read_hash; netdata_thread_create(filesystem_threads.thread, filesystem_threads.name, - NETDATA_THREAD_OPTION_JOINABLE, ebpf_filesystem_read_hash, em); + NETDATA_THREAD_OPTION_DEFAULT, ebpf_filesystem_read_hash, em); int update_every = em->update_every; - int counter = update_every - 1; - while (!close_ebpf_plugin || em->optional) { - pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - - if (++counter == update_every) { - counter = 0; - pthread_mutex_lock(&lock); + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; - ebpf_create_fs_charts(update_every); - ebpf_histogram_send_data(); + pthread_mutex_lock(&lock); - pthread_mutex_unlock(&lock); - } + ebpf_create_fs_charts(update_every); + ebpf_histogram_send_data(); - pthread_mutex_unlock(&collect_data_mutex); + pthread_mutex_unlock(&lock); } } @@ -634,7 +597,7 @@ static void ebpf_update_filesystem() */ void *ebpf_filesystem_thread(void *ptr) { - netdata_thread_cleanup_push(ebpf_filesystem_cleanup, ptr); + netdata_thread_cleanup_push(ebpf_filesystem_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = fs_maps; diff --git a/collectors/ebpf.plugin/ebpf_filesystem.h b/collectors/ebpf.plugin/ebpf_filesystem.h index 8b7c54c58..f6a10c874 100644 --- a/collectors/ebpf.plugin/ebpf_filesystem.h +++ b/collectors/ebpf.plugin/ebpf_filesystem.h @@ -43,26 +43,6 @@ enum netdata_filesystem_table { NETDATA_ADDR_FS_TABLE }; -typedef struct ebpf_filesystem_partitions { - char *filesystem; - char *optional_filesystem; - char *family; - char *family_name; - struct bpf_object *objects; - struct bpf_link **probe_links; - - netdata_ebpf_histogram_t hread; - netdata_ebpf_histogram_t hwrite; - netdata_ebpf_histogram_t hopen; - netdata_ebpf_histogram_t hadditional; - - uint32_t flags; - uint32_t enabled; - - ebpf_addresses_t addresses; - uint64_t kernels; -} ebpf_filesystem_partitions_t; - extern void *ebpf_filesystem_thread(void *ptr); extern struct config fs_config; diff --git a/collectors/ebpf.plugin/ebpf_hardirq.c b/collectors/ebpf.plugin/ebpf_hardirq.c index 25b2a0ec6..41a881647 100644 --- a/collectors/ebpf.plugin/ebpf_hardirq.c +++ b/collectors/ebpf.plugin/ebpf_hardirq.c @@ -125,11 +125,6 @@ static hardirq_static_val_t hardirq_static_vals[] = { }, }; -static struct bpf_link **probe_links = NULL; -static struct bpf_object *objects = NULL; - -static int read_thread_closed = 1; - // store for "published" data from the reader thread, which the collector // thread will write to netdata agent. static avl_tree_lock hardirq_pub; @@ -143,44 +138,49 @@ static hardirq_ebpf_static_val_t *hardirq_ebpf_static_vals = NULL; static struct netdata_static_thread hardirq_threads = {"HARDIRQ KERNEL", NULL, NULL, 1, NULL, NULL, NULL }; +static enum ebpf_threads_status ebpf_hardirq_exited = NETDATA_THREAD_EBPF_RUNNING; /** - * Clean up the main thread. + * Hardirq Exit + * + * Cancel child and exit. * * @param ptr thread data. */ -static void hardirq_cleanup(void *ptr) +static void hardirq_exit(void *ptr) { - for (int i = 0; hardirq_tracepoints[i].class != NULL; i++) { - ebpf_disable_tracepoint(&hardirq_tracepoints[i]); - } - ebpf_module_t *em = (ebpf_module_t *)ptr; if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; return; } - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 1 * USEC_PER_MS; - while (!read_thread_closed) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); - } + ebpf_hardirq_exited = NETDATA_THREAD_EBPF_STOPPING; +} +/** + * Hardirq clean up + * + * Clean up allocated memory. + * + * @param ptr thread data. + */ +static void hardirq_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + + if (ebpf_hardirq_exited != NETDATA_THREAD_EBPF_STOPPED) + return; + + freez(hardirq_threads.thread); + for (int i = 0; hardirq_tracepoints[i].class != NULL; i++) { + ebpf_disable_tracepoint(&hardirq_tracepoints[i]); + } freez(hardirq_ebpf_vals); freez(hardirq_ebpf_static_vals); - freez(hardirq_threads.thread); - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); - } + hardirq_threads.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; } /***************************************************************** @@ -316,23 +316,25 @@ static void hardirq_read_latency_static_map(int mapfd) */ static void *hardirq_reader(void *ptr) { - read_thread_closed = 0; - + netdata_thread_cleanup_push(hardirq_cleanup, ptr); heartbeat_t hb; heartbeat_init(&hb); ebpf_module_t *em = (ebpf_module_t *)ptr; usec_t step = NETDATA_HARDIRQ_SLEEP_MS * em->update_every; - while (!close_ebpf_plugin) { + while (ebpf_hardirq_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); UNUSED(dt); + if (ebpf_hardirq_exited == NETDATA_THREAD_EBPF_STOPPING) + break; hardirq_read_latency_map(hardirq_maps[HARDIRQ_MAP_LATENCY].map_fd); hardirq_read_latency_static_map(hardirq_maps[HARDIRQ_MAP_LATENCY_STATIC].map_fd); } + ebpf_hardirq_exited = NETDATA_THREAD_EBPF_STOPPED; - read_thread_closed = 1; + netdata_thread_cleanup_pop(1); return NULL; } @@ -419,7 +421,7 @@ static void hardirq_collector(ebpf_module_t *em) netdata_thread_create( hardirq_threads.thread, hardirq_threads.name, - NETDATA_THREAD_OPTION_JOINABLE, + NETDATA_THREAD_OPTION_DEFAULT, hardirq_reader, em ); @@ -432,26 +434,24 @@ static void hardirq_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); // loop and read from published data until ebpf plugin is closed. - int update_every = em->update_every; - int counter = update_every - 1; - while (!close_ebpf_plugin) { - pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - - if (++counter == update_every) { - counter = 0; - pthread_mutex_lock(&lock); - - // write dims now for all hitherto discovered IRQs. - write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "hardirq_latency"); - avl_traverse_lock(&hardirq_pub, hardirq_write_dims, NULL); - hardirq_write_static_dims(); - write_end_chart(); - - pthread_mutex_unlock(&lock); - } - - pthread_mutex_unlock(&collect_data_mutex); + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + //This will be cancelled by its parent + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + pthread_mutex_lock(&lock); + + // write dims now for all hitherto discovered IRQs. + write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "hardirq_latency"); + avl_traverse_lock(&hardirq_pub, hardirq_write_dims, NULL); + hardirq_write_static_dims(); + write_end_chart(); + + pthread_mutex_unlock(&lock); } } @@ -467,7 +467,7 @@ static void hardirq_collector(ebpf_module_t *em) */ void *ebpf_hardirq_thread(void *ptr) { - netdata_thread_cleanup_push(hardirq_cleanup, ptr); + netdata_thread_cleanup_push(hardirq_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = hardirq_maps; @@ -481,8 +481,8 @@ void *ebpf_hardirq_thread(void *ptr) goto endhardirq; } - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { em->enabled = CONFIG_BOOLEAN_NO; goto endhardirq; } diff --git a/collectors/ebpf.plugin/ebpf_mdflush.c b/collectors/ebpf.plugin/ebpf_mdflush.c index 9f75543d7..4dca04505 100644 --- a/collectors/ebpf.plugin/ebpf_mdflush.c +++ b/collectors/ebpf.plugin/ebpf_mdflush.c @@ -35,47 +35,47 @@ static avl_tree_lock mdflush_pub; // tmp store for mdflush values we get from a per-CPU eBPF map. static mdflush_ebpf_val_t *mdflush_ebpf_vals = NULL; -static struct bpf_link **probe_links = NULL; -static struct bpf_object *objects = NULL; - -static int read_thread_closed = 1; - static struct netdata_static_thread mdflush_threads = {"MDFLUSH KERNEL", NULL, NULL, 1, NULL, NULL, NULL }; +static enum ebpf_threads_status ebpf_mdflush_exited = NETDATA_THREAD_EBPF_RUNNING; /** - * Clean up the main thread. + * MDflush exit + * + * Cancel thread and exit. * * @param ptr thread data. */ -static void mdflush_cleanup(void *ptr) +static void mdflush_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; return; } - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 1 * USEC_PER_MS; - while (!read_thread_closed) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); - } + ebpf_mdflush_exited = NETDATA_THREAD_EBPF_STOPPING; +} + +/** + * CLeanup + * + * Clean allocated memory. + * + * @param ptr thread data. + */ +static void mdflush_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (ebpf_mdflush_exited != NETDATA_THREAD_EBPF_STOPPED) + return; freez(mdflush_ebpf_vals); freez(mdflush_threads.thread); - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); - } + mdflush_threads.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; } /** @@ -175,22 +175,25 @@ static void mdflush_read_count_map() */ static void *mdflush_reader(void *ptr) { - read_thread_closed = 0; - + netdata_thread_cleanup_push(mdflush_cleanup, ptr); heartbeat_t hb; heartbeat_init(&hb); ebpf_module_t *em = (ebpf_module_t *)ptr; usec_t step = NETDATA_MDFLUSH_SLEEP_MS * em->update_every; - while (!close_ebpf_plugin) { + while (ebpf_mdflush_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); UNUSED(dt); + if (ebpf_mdflush_exited == NETDATA_THREAD_EBPF_STOPPING) + break; mdflush_read_count_map(); } - read_thread_closed = 1; + ebpf_mdflush_exited = NETDATA_THREAD_EBPF_STOPPED; + + netdata_thread_cleanup_pop(1); return NULL; } @@ -248,7 +251,7 @@ static void mdflush_collector(ebpf_module_t *em) netdata_thread_create( mdflush_threads.thread, mdflush_threads.name, - NETDATA_THREAD_OPTION_JOINABLE, + NETDATA_THREAD_OPTION_DEFAULT, mdflush_reader, em ); @@ -260,25 +263,20 @@ static void mdflush_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); // loop and read from published data until ebpf plugin is closed. - int update_every = em->update_every; - int counter = update_every - 1; - while (!close_ebpf_plugin) { - pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - - if (++counter == update_every) { - counter = 0; - pthread_mutex_lock(&lock); - - // write dims now for all hitherto discovered devices. - write_begin_chart("mdstat", "mdstat_flush"); - avl_traverse_lock(&mdflush_pub, mdflush_write_dims, NULL); - write_end_chart(); - - pthread_mutex_unlock(&lock); - } - - pthread_mutex_unlock(&collect_data_mutex); + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + // write dims now for all hitherto discovered devices. + write_begin_chart("mdstat", "mdstat_flush"); + avl_traverse_lock(&mdflush_pub, mdflush_write_dims, NULL); + write_end_chart(); + + pthread_mutex_unlock(&lock); } } @@ -290,7 +288,7 @@ static void mdflush_collector(ebpf_module_t *em) */ void *ebpf_mdflush_thread(void *ptr) { - netdata_thread_cleanup_push(mdflush_cleanup, ptr); + netdata_thread_cleanup_push(mdflush_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = mdflush_maps; @@ -306,8 +304,8 @@ void *ebpf_mdflush_thread(void *ptr) goto endmdflush; } - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { em->enabled = CONFIG_BOOLEAN_NO; goto endmdflush; } diff --git a/collectors/ebpf.plugin/ebpf_mount.c b/collectors/ebpf.plugin/ebpf_mount.c index 1ba1e135c..bca467bce 100644 --- a/collectors/ebpf.plugin/ebpf_mount.c +++ b/collectors/ebpf.plugin/ebpf_mount.c @@ -18,12 +18,8 @@ struct config mount_config = { .first_section = NULL, .last_section = NULL, .mut .index = {.avl_tree = { .root = NULL, .compar = appconfig_section_compare }, .rwlock = AVL_LOCK_INITIALIZER } }; -static int read_thread_closed = 1; static netdata_idx_t *mount_values = NULL; -static struct bpf_link **probe_links = NULL; -static struct bpf_object *objects = NULL; - static netdata_idx_t mount_hash_values[NETDATA_MOUNT_END]; struct netdata_static_thread mount_thread = {"MOUNT KERNEL", @@ -33,6 +29,7 @@ struct netdata_static_thread mount_thread = {"MOUNT KERNEL", netdata_ebpf_targets_t mount_targets[] = { {.name = "mount", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = "umount", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; +static enum ebpf_threads_status ebpf_mount_exited = NETDATA_THREAD_EBPF_RUNNING; #ifdef LIBBPF_MAJOR_VERSION #include "includes/mount.skel.h" // BTF code @@ -227,33 +224,46 @@ static inline int ebpf_mount_load_and_attach(struct mount_bpf *obj, ebpf_module_ *****************************************************************/ /** - * Clean up the main thread. + * Mount Exit + * + * Cancel child thread. + * + * @param ptr thread data. + */ +static void ebpf_mount_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; + return; + } + + ebpf_mount_exited = NETDATA_THREAD_EBPF_STOPPING; +} + +/** + * Mount cleanup + * + * Clean up allocated memory. * * @param ptr thread data. */ static void ebpf_mount_cleanup(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (!em->enabled) + if (ebpf_mount_exited != NETDATA_THREAD_EBPF_STOPPED) return; freez(mount_thread.thread); freez(mount_values); - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); - } #ifdef LIBBPF_MAJOR_VERSION - else if (bpf_obj) + if (bpf_obj) mount_bpf__destroy(bpf_obj); #endif + mount_thread.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; } /***************************************************************** @@ -299,22 +309,26 @@ static void read_global_table() */ void *ebpf_mount_read_hash(void *ptr) { - read_thread_closed = 0; - + netdata_thread_cleanup_push(ebpf_mount_cleanup, ptr); heartbeat_t hb; heartbeat_init(&hb); ebpf_module_t *em = (ebpf_module_t *)ptr; usec_t step = NETDATA_LATENCY_MOUNT_SLEEP_MS * em->update_every; - while (!close_ebpf_plugin) { + //This will be cancelled by its parent + while (ebpf_mount_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); (void)dt; + if (ebpf_mount_exited == NETDATA_THREAD_EBPF_STOPPING) + break; read_global_table(); } - read_thread_closed = 1; + ebpf_mount_exited = NETDATA_THREAD_EBPF_STOPPED; + + netdata_thread_cleanup_pop(1); return NULL; } @@ -348,25 +362,22 @@ static void mount_collector(ebpf_module_t *em) mount_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); - netdata_thread_create(mount_thread.thread, mount_thread.name, NETDATA_THREAD_OPTION_JOINABLE, + netdata_thread_create(mount_thread.thread, mount_thread.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_mount_read_hash, em); - int update_every = em->update_every; - int counter = update_every - 1; - while (!close_ebpf_plugin) { - pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - - if (++counter == update_every) { - counter = 0; - pthread_mutex_lock(&lock); + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; - ebpf_mount_send_data(); + pthread_mutex_lock(&lock); - pthread_mutex_unlock(&lock); - } + ebpf_mount_send_data(); - pthread_mutex_unlock(&collect_data_mutex); + pthread_mutex_unlock(&lock); } } @@ -425,8 +436,8 @@ static int ebpf_mount_load_bpf(ebpf_module_t *em) { int ret = 0; if (em->load == EBPF_LOAD_LEGACY) { - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { em->enabled = CONFIG_BOOLEAN_NO; ret = -1; } @@ -458,7 +469,7 @@ static int ebpf_mount_load_bpf(ebpf_module_t *em) */ void *ebpf_mount_thread(void *ptr) { - netdata_thread_cleanup_push(ebpf_mount_cleanup, ptr); + netdata_thread_cleanup_push(ebpf_mount_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = mount_maps; diff --git a/collectors/ebpf.plugin/ebpf_oomkill.c b/collectors/ebpf.plugin/ebpf_oomkill.c index 463a32904..33f505b0e 100644 --- a/collectors/ebpf.plugin/ebpf_oomkill.c +++ b/collectors/ebpf.plugin/ebpf_oomkill.c @@ -34,9 +34,6 @@ static ebpf_tracepoint_t oomkill_tracepoints[] = { {.enabled = false, .class = NULL, .event = NULL} }; -static struct bpf_link **probe_links = NULL; -static struct bpf_object *objects = NULL; - static netdata_publish_syscall_t oomkill_publish_aggregated = {.name = "oomkill", .dimension = "oomkill", .algorithm = "absolute", .next = NULL}; @@ -49,19 +46,8 @@ static netdata_publish_syscall_t oomkill_publish_aggregated = {.name = "oomkill" static void oomkill_cleanup(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (!em->enabled) { - return; - } - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); - } + em->enabled = NETDATA_MAIN_THREAD_EXITED; } static void oomkill_write_data(int32_t *keys, uint32_t total) @@ -159,7 +145,7 @@ static int ebpf_send_systemd_oomkill_charts() if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long) ect->oomkill); ect->oomkill = 0; - } else + } else if (unlikely(ect->systemd)) ret = 0; } write_end_chart(); @@ -312,34 +298,36 @@ static void oomkill_collector(ebpf_module_t *em) { int cgroups = em->cgroup_charts; int update_every = em->update_every; - int counter = update_every - 1; int32_t keys[NETDATA_OOMKILL_MAX_ENTRIES]; memset(keys, 0, sizeof(keys)); // loop and read until ebpf plugin is closed. - while (!close_ebpf_plugin) { - pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; - if (++counter == update_every) { - counter = 0; - pthread_mutex_lock(&lock); + pthread_mutex_lock(&collect_data_mutex); + pthread_mutex_lock(&lock); - uint32_t count = oomkill_read_data(keys); - if (cgroups && count) - ebpf_update_oomkill_cgroup(keys, count); + uint32_t count = oomkill_read_data(keys); + if (cgroups && count) + ebpf_update_oomkill_cgroup(keys, count); - // write everything from the ebpf map. - if (cgroups) - ebpf_oomkill_send_cgroup_data(update_every); + // write everything from the ebpf map. + if (cgroups) + ebpf_oomkill_send_cgroup_data(update_every); + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_OOMKILL_CHART); oomkill_write_data(keys, count); write_end_chart(); - - pthread_mutex_unlock(&lock); } + pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); } } @@ -362,6 +350,8 @@ void ebpf_oomkill_create_apps_charts(struct ebpf_module *em, void *ptr) 20020, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], root, em->update_every, NETDATA_EBPF_MODULE_NAME_OOMKILL); + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } /** @@ -395,8 +385,8 @@ void *ebpf_oomkill_thread(void *ptr) goto endoomkill; } - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { em->enabled = CONFIG_BOOLEAN_NO; goto endoomkill; } diff --git a/collectors/ebpf.plugin/ebpf_process.c b/collectors/ebpf.plugin/ebpf_process.c index f894f0707..f6b379a51 100644 --- a/collectors/ebpf.plugin/ebpf_process.c +++ b/collectors/ebpf.plugin/ebpf_process.c @@ -47,9 +47,6 @@ ebpf_process_publish_apps_t **current_apps_data = NULL; int process_enabled = 0; -static struct bpf_object *objects = NULL; -static struct bpf_link **probe_links = NULL; - struct config process_config = { .first_section = NULL, .last_section = NULL, .mutex = NETDATA_MUTEX_INITIALIZER, @@ -58,6 +55,7 @@ struct config process_config = { .first_section = NULL, static struct netdata_static_thread cgroup_thread = {"EBPF CGROUP", NULL, NULL, 1, NULL, NULL, NULL}; +static enum ebpf_threads_status ebpf_process_exited = NETDATA_THREAD_EBPF_RUNNING; static char *threads_stat[NETDATA_EBPF_THREAD_STAT_END] = {"total", "running"}; static char *load_event_stat[NETDATA_EBPF_LOAD_STAT_END] = {"legacy", "co-re"}; @@ -177,11 +175,9 @@ void ebpf_process_remove_pids() uint32_t pid = pids->pid; ebpf_process_stat_t *w = global_process_stats[pid]; if (w) { - if (w->removeme) { - freez(w); - global_process_stats[pid] = NULL; - bpf_map_delete_elem(pid_fd, &pid); - } + freez(w); + global_process_stats[pid] = NULL; + bpf_map_delete_elem(pid_fd, &pid); } pids = pids->next; @@ -568,6 +564,8 @@ void ebpf_process_create_apps_charts(struct ebpf_module *em, void *ptr) root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); } + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } /** @@ -624,6 +622,72 @@ static void ebpf_create_apps_charts(struct target *root) /***************************************************************** * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Process disable tracepoints + * + * Disable tracepoints when the plugin was responsible to enable it. + */ +static void ebpf_process_disable_tracepoints() +{ + char *default_message = { "Cannot disable the tracepoint" }; + if (!was_sched_process_exit_enabled) { + if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exit)) + error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exit); + } + + if (!was_sched_process_exec_enabled) { + if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exec)) + error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exec); + } + + if (!was_sched_process_fork_enabled) { + if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_fork)) + error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_fork); + } +} + +/** + * Process Exit + * + * Cancel child thread. + * + * @param ptr thread data. + */ +static void ebpf_process_exit(void *ptr) +{ + (void)ptr; + ebpf_process_exited = NETDATA_THREAD_EBPF_STOPPING; +} + +/** + * Process cleanup + * + * Cleanup allocated memory. + * + * @param ptr thread data. + */ +static void ebpf_process_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (ebpf_process_exited != NETDATA_THREAD_EBPF_STOPPED) + return; + + ebpf_cleanup_publish_syscall(process_publish_aggregated); + freez(process_hash_values); + freez(cgroup_thread.thread); + + ebpf_process_disable_tracepoints(); + + cgroup_thread.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +/***************************************************************** + * * FUNCTIONS WITH THE MAIN LOOP * *****************************************************************/ @@ -640,24 +704,33 @@ static void ebpf_create_apps_charts(struct target *root) */ void *ebpf_cgroup_update_shm(void *ptr) { - UNUSED(ptr); + netdata_thread_cleanup_push(ebpf_process_cleanup, ptr); heartbeat_t hb; heartbeat_init(&hb); - usec_t step = 30 * USEC_PER_SEC; - while (!close_ebpf_plugin) { + usec_t step = 3 * USEC_PER_SEC; + int counter = NETDATA_EBPF_CGROUP_UPDATE - 1; + //This will be cancelled by its parent + while (ebpf_process_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); (void)dt; - - if (close_ebpf_plugin) + if (ebpf_process_exited == NETDATA_THREAD_EBPF_STOPPING) break; - if (!shm_ebpf_cgroup.header) - ebpf_map_cgroup_shared_memory(); + // We are using a small heartbeat time to wake up thread, + // but we should not update so frequently the shared memory data + if (++counter >= NETDATA_EBPF_CGROUP_UPDATE) { + counter = 0; + if (!shm_ebpf_cgroup.header) + ebpf_map_cgroup_shared_memory(); - ebpf_parse_cgroup_shm_data(); + ebpf_parse_cgroup_shm_data(); + } } + ebpf_process_exited = NETDATA_THREAD_EBPF_STOPPED; + + netdata_thread_cleanup_pop(1); return NULL; } @@ -884,7 +957,7 @@ static int ebpf_send_systemd_process_charts(ebpf_module_t *em) for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_ps.create_process); - } else + } else if (unlikely(ect->systemd)) ret = 0; } write_end_chart(); @@ -1019,32 +1092,36 @@ static void process_collector(ebpf_module_t *em) cgroup_thread.thread = mallocz(sizeof(netdata_thread_t)); cgroup_thread.start_routine = ebpf_cgroup_update_shm; - netdata_thread_create(cgroup_thread.thread, cgroup_thread.name, NETDATA_THREAD_OPTION_JOINABLE, + netdata_thread_create(cgroup_thread.thread, cgroup_thread.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_cgroup_update_shm, em); heartbeat_t hb; heartbeat_init(&hb); int publish_global = em->global_charts; - int apps_enabled = em->apps_charts; int cgroups = em->cgroup_charts; int thread_enabled = em->enabled; if (cgroups) ebpf_process_update_cgroup_algorithm(); + int update_apps_every = (int) EBPF_CFG_UPDATE_APPS_EVERY_DEFAULT; int pid_fd = process_maps[NETDATA_PROCESS_PID_TABLE].map_fd; int update_every = em->update_every; int counter = update_every - 1; - while (!close_ebpf_plugin) { + int update_apps_list = update_apps_every - 1; + while (!ebpf_exit_plugin) { usec_t dt = heartbeat_next(&hb, USEC_PER_SEC); (void)dt; + if (ebpf_exit_plugin) + break; pthread_mutex_lock(&collect_data_mutex); - cleanup_exited_pids(); - collect_data_for_all_processes(pid_fd); + if (++update_apps_list == update_apps_every) { + update_apps_list = 0; + cleanup_exited_pids(); + collect_data_for_all_processes(pid_fd); - ebpf_create_apps_charts(apps_groups_root_target); - - pthread_cond_broadcast(&collect_data_cond_var); + ebpf_create_apps_charts(apps_groups_root_target); + } pthread_mutex_unlock(&collect_data_mutex); if (++counter == update_every) { @@ -1052,10 +1129,10 @@ static void process_collector(ebpf_module_t *em) read_hash_global_tables(); - int publish_apps = 0; + netdata_apps_integration_flags_t apps_enabled = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); if (all_pids_count > 0) { if (apps_enabled) { - publish_apps = 1; ebpf_process_update_apps_data(); } @@ -1072,7 +1149,7 @@ static void process_collector(ebpf_module_t *em) ebpf_process_send_data(em); } - if (publish_apps) { + if (apps_enabled & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { ebpf_process_send_apps_data(apps_groups_root_target, em); } @@ -1081,6 +1158,7 @@ static void process_collector(ebpf_module_t *em) } } pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&collect_data_mutex); } fflush(stdout); @@ -1089,89 +1167,6 @@ static void process_collector(ebpf_module_t *em) /***************************************************************** * - * FUNCTIONS TO CLOSE THE THREAD - * - *****************************************************************/ - -void clean_global_memory() { - int pid_fd = process_maps[NETDATA_PROCESS_PID_TABLE].map_fd; - struct pid_stat *pids = root_of_pids; - while (pids) { - uint32_t pid = pids->pid; - freez(global_process_stats[pid]); - - bpf_map_delete_elem(pid_fd, &pid); - freez(current_apps_data[pid]); - - pids = pids->next; - } -} - -/** - * Process disable tracepoints - * - * Disable tracepoints when the plugin was responsible to enable it. - */ -static void ebpf_process_disable_tracepoints() -{ - char *default_message = { "Cannot disable the tracepoint" }; - if (!was_sched_process_exit_enabled) { - if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exit)) - error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exit); - } - - if (!was_sched_process_exec_enabled) { - if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exec)) - error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exec); - } - - if (!was_sched_process_fork_enabled) { - if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_fork)) - error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_fork); - } -} - -/** - * Clean up the main thread. - * - * @param ptr thread data. - */ -static void ebpf_process_cleanup(void *ptr) -{ - UNUSED(ptr); - - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 1 * USEC_PER_SEC; - while (!finalized_threads) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); - } - - ebpf_cleanup_publish_syscall(process_publish_aggregated); - freez(process_hash_values); - - clean_global_memory(); - freez(global_process_stats); - freez(current_apps_data); - - ebpf_process_disable_tracepoints(); - - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); - } - - freez(cgroup_thread.thread); -} - -/***************************************************************** - * * FUNCTIONS TO START THREAD * *****************************************************************/ @@ -1293,7 +1288,7 @@ static int ebpf_process_enable_tracepoints() */ void *ebpf_process_thread(void *ptr) { - netdata_thread_cleanup_push(ebpf_process_cleanup, ptr); + netdata_thread_cleanup_push(ebpf_process_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = process_maps; @@ -1309,8 +1304,8 @@ void *ebpf_process_thread(void *ptr) ebpf_update_pid_table(&process_maps[0], em); set_local_pointers(); - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { em->enabled = CONFIG_BOOLEAN_NO; pthread_mutex_unlock(&lock); goto endprocess; diff --git a/collectors/ebpf.plugin/ebpf_process.h b/collectors/ebpf.plugin/ebpf_process.h index b0377b5db..43df34d48 100644 --- a/collectors/ebpf.plugin/ebpf_process.h +++ b/collectors/ebpf.plugin/ebpf_process.h @@ -39,6 +39,8 @@ #define NETDATA_SYSTEMD_PROCESS_EXIT_CONTEXT "services.task_exit" #define NETDATA_SYSTEMD_PROCESS_ERROR_CONTEXT "services.task_error" +#define NETDATA_EBPF_CGROUP_UPDATE 10 + // Statistical information enum netdata_ebpf_thread_stats{ NETDATA_EBPF_THREAD_STAT_TOTAL, diff --git a/collectors/ebpf.plugin/ebpf_shm.c b/collectors/ebpf.plugin/ebpf_shm.c index 939741e75..bd928cbdc 100644 --- a/collectors/ebpf.plugin/ebpf_shm.c +++ b/collectors/ebpf.plugin/ebpf_shm.c @@ -7,7 +7,6 @@ static char *shm_dimension_name[NETDATA_SHM_END] = { "get", "at", "dt", "ctl" }; static netdata_syscall_stat_t shm_aggregated_data[NETDATA_SHM_END]; static netdata_publish_syscall_t shm_publish_aggregated[NETDATA_SHM_END]; -static int read_thread_closed = 1; netdata_publish_shm_t *shm_vector = NULL; static netdata_idx_t shm_hash_values[NETDATA_SHM_END]; @@ -35,11 +34,9 @@ static ebpf_local_maps_t shm_maps[] = {{.name = "tbl_pid_shm", .internal_input = .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, {.name = NULL, .internal_input = 0, .user_input = 0}}; -static struct bpf_link **probe_links = NULL; -static struct bpf_object *objects = NULL; - struct netdata_static_thread shm_threads = {"SHM KERNEL", NULL, NULL, 1, NULL, NULL, NULL}; +static enum ebpf_threads_status ebpf_shm_exited = NETDATA_THREAD_EBPF_RUNNING; netdata_ebpf_targets_t shm_targets[] = { {.name = "shmget", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = "shmat", .mode = EBPF_LOAD_TRAMPOLINE}, @@ -243,55 +240,48 @@ static inline int ebpf_shm_load_and_attach(struct shm_bpf *obj, ebpf_module_t *e *****************************************************************/ /** - * Clean shm structure + * SHM Exit + * + * Cancel child thread. + * + * @param ptr thread data. */ -void clean_shm_pid_structures() { - struct pid_stat *pids = root_of_pids; - while (pids) { - freez(shm_pid[pids->pid]); - - pids = pids->next; +static void ebpf_shm_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; + return; } + + ebpf_shm_exited = NETDATA_THREAD_EBPF_STOPPING; } /** - * Clean up the main thread. + * SHM Cleanup + * + * Clean up allocated memory. * * @param ptr thread data. */ static void ebpf_shm_cleanup(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (!em->enabled) { + if (ebpf_shm_exited != NETDATA_THREAD_EBPF_STOPPED) return; - } - - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 2 * USEC_PER_MS; - while (!read_thread_closed) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); - } ebpf_cleanup_publish_syscall(shm_publish_aggregated); freez(shm_vector); freez(shm_values); - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); - } #ifdef LIBBPF_MAJOR_VERSION - else if (bpf_obj) + if (bpf_obj) shm_bpf__destroy(bpf_obj); #endif + + shm_threads.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; } /***************************************************************** @@ -467,21 +457,24 @@ static void read_global_table() */ void *ebpf_shm_read_hash(void *ptr) { - read_thread_closed = 0; - + netdata_thread_cleanup_push(ebpf_shm_cleanup, ptr); heartbeat_t hb; heartbeat_init(&hb); ebpf_module_t *em = (ebpf_module_t *)ptr; usec_t step = NETDATA_SHM_SLEEP_MS * em->update_every; - while (!close_ebpf_plugin) { + while (ebpf_shm_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); (void)dt; + if (ebpf_shm_exited == NETDATA_THREAD_EBPF_STOPPING) + break; read_global_table(); } - read_thread_closed = 1; + ebpf_shm_exited = NETDATA_THREAD_EBPF_STOPPED; + + netdata_thread_cleanup_pop(1); return NULL; } @@ -740,7 +733,7 @@ static int ebpf_send_systemd_shm_charts() for (ect = ebpf_cgroup_pids; ect; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_shm.get); - } else + } else if (unlikely(ect->systemd)) ret = 0; } write_end_chart(); @@ -859,44 +852,44 @@ static void shm_collector(ebpf_module_t *em) netdata_thread_create( shm_threads.thread, shm_threads.name, - NETDATA_THREAD_OPTION_JOINABLE, + NETDATA_THREAD_OPTION_DEFAULT, ebpf_shm_read_hash, em ); - int apps = em->apps_charts; int cgroups = em->cgroup_charts; int update_every = em->update_every; - int counter = update_every - 1; - while (!close_ebpf_plugin) { - pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - - if (++counter == update_every) { - counter = 0; - if (apps) { - read_apps_table(); - } + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; - if (cgroups) { - ebpf_update_shm_cgroup(); - } + netdata_apps_integration_flags_t apps = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); + if (apps) { + read_apps_table(); + } - pthread_mutex_lock(&lock); + if (cgroups) { + ebpf_update_shm_cgroup(); + } - shm_send_global(); + pthread_mutex_lock(&lock); - if (apps) { - ebpf_shm_send_apps_data(apps_groups_root_target); - } + shm_send_global(); - if (cgroups) { - ebpf_shm_send_cgroup_data(update_every); - } + if (apps & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_shm_send_apps_data(apps_groups_root_target); + } - pthread_mutex_unlock(&lock); + if (cgroups) { + ebpf_shm_send_cgroup_data(update_every); } + pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); } } @@ -950,6 +943,8 @@ void ebpf_shm_create_apps_charts(struct ebpf_module *em, void *ptr) 20194, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM); + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } /** @@ -1014,8 +1009,8 @@ static int ebpf_shm_load_bpf(ebpf_module_t *em) { int ret = 0; if (em->load == EBPF_LOAD_LEGACY) { - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { em->enabled = CONFIG_BOOLEAN_NO; ret = -1; } @@ -1045,7 +1040,7 @@ static int ebpf_shm_load_bpf(ebpf_module_t *em) */ void *ebpf_shm_thread(void *ptr) { - netdata_thread_cleanup_push(ebpf_shm_cleanup, ptr); + netdata_thread_cleanup_push(ebpf_shm_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = shm_maps; diff --git a/collectors/ebpf.plugin/ebpf_shm.h b/collectors/ebpf.plugin/ebpf_shm.h index f0559e431..8e118a6f5 100644 --- a/collectors/ebpf.plugin/ebpf_shm.h +++ b/collectors/ebpf.plugin/ebpf_shm.h @@ -56,7 +56,6 @@ extern netdata_publish_shm_t **shm_pid; extern void *ebpf_shm_thread(void *ptr); extern void ebpf_shm_create_apps_charts(struct ebpf_module *em, void *ptr); -extern void clean_shm_pid_structures(); extern netdata_ebpf_targets_t shm_targets[]; extern struct config shm_config; diff --git a/collectors/ebpf.plugin/ebpf_socket.c b/collectors/ebpf.plugin/ebpf_socket.c index 7b2d4a5bf..ba63934d5 100644 --- a/collectors/ebpf.plugin/ebpf_socket.c +++ b/collectors/ebpf.plugin/ebpf_socket.c @@ -61,10 +61,8 @@ static netdata_publish_syscall_t socket_publish_aggregated[NETDATA_MAX_SOCKET_VE ebpf_socket_publish_apps_t **socket_bandwidth_curr = NULL; static ebpf_bandwidth_t *bandwidth_vector = NULL; -static int socket_apps_created = 0; pthread_mutex_t nv_mutex; int wait_to_plot = 0; -int read_thread_closed = 1; netdata_vector_plot_t inbound_vectors = { .plot = NULL, .next = 0, .last = 0 }; netdata_vector_plot_t outbound_vectors = { .plot = NULL, .next = 0, .last = 0 }; @@ -72,9 +70,6 @@ netdata_socket_t *socket_values; ebpf_network_viewer_port_list_t *listen_ports = NULL; -static struct bpf_object *objects = NULL; -static struct bpf_link **probe_links = NULL; - struct config socket_config = { .first_section = NULL, .last_section = NULL, .mutex = NETDATA_MUTEX_INITIALIZER, @@ -92,6 +87,11 @@ netdata_ebpf_targets_t socket_targets[] = { {.name = "inet_csk_accept", .mode = {.name = "tcp_v6_connect", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; +struct netdata_static_thread socket_threads = {"EBPF SOCKET READ", + NULL, NULL, 1, NULL, + NULL, NULL }; +static enum ebpf_threads_status ebpf_socket_exited = NETDATA_THREAD_EBPF_RUNNING; + #ifdef LIBBPF_MAJOR_VERSION #include "includes/socket.skel.h" // BTF code @@ -424,6 +424,225 @@ static inline int ebpf_socket_load_and_attach(struct socket_bpf *obj, ebpf_modul return ret; } #endif + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Clean internal socket plot + * + * Clean all structures allocated with strdupz. + * + * @param ptr the pointer with addresses to clean. + */ +static inline void clean_internal_socket_plot(netdata_socket_plot_t *ptr) +{ + freez(ptr->dimension_recv); + freez(ptr->dimension_sent); + freez(ptr->resolved_name); + freez(ptr->dimension_retransmit); +} + +/** + * Clean socket plot + * + * Clean the allocated data for inbound and outbound vectors. + */ +static void clean_allocated_socket_plot() +{ + uint32_t i; + uint32_t end = inbound_vectors.last; + netdata_socket_plot_t *plot = inbound_vectors.plot; + for (i = 0; i < end; i++) { + clean_internal_socket_plot(&plot[i]); + } + + clean_internal_socket_plot(&plot[inbound_vectors.last]); + + end = outbound_vectors.last; + plot = outbound_vectors.plot; + for (i = 0; i < end; i++) { + clean_internal_socket_plot(&plot[i]); + } + clean_internal_socket_plot(&plot[outbound_vectors.last]); +} + +/** + * Clean network ports allocated during initialization. + * + * @param ptr a pointer to the link list. + */ +static void clean_network_ports(ebpf_network_viewer_port_list_t *ptr) +{ + if (unlikely(!ptr)) + return; + + while (ptr) { + ebpf_network_viewer_port_list_t *next = ptr->next; + freez(ptr->value); + freez(ptr); + ptr = next; + } +} + +/** + * Clean service names + * + * Clean the allocated link list that stores names. + * + * @param names the link list. + */ +static void clean_service_names(ebpf_network_viewer_dim_name_t *names) +{ + if (unlikely(!names)) + return; + + while (names) { + ebpf_network_viewer_dim_name_t *next = names->next; + freez(names->name); + freez(names); + names = next; + } +} + +/** + * Clean hostnames + * + * @param hostnames the hostnames to clean + */ +static void clean_hostnames(ebpf_network_viewer_hostname_list_t *hostnames) +{ + if (unlikely(!hostnames)) + return; + + while (hostnames) { + ebpf_network_viewer_hostname_list_t *next = hostnames->next; + freez(hostnames->value); + simple_pattern_free(hostnames->value_pattern); + freez(hostnames); + hostnames = next; + } +} + +/** + * Cleanup publish syscall + * + * @param nps list of structures to clean + */ +void ebpf_cleanup_publish_syscall(netdata_publish_syscall_t *nps) +{ + while (nps) { + freez(nps->algorithm); + nps = nps->next; + } +} + +/** + * Clean port Structure + * + * Clean the allocated list. + * + * @param clean the list that will be cleaned + */ +void clean_port_structure(ebpf_network_viewer_port_list_t **clean) +{ + ebpf_network_viewer_port_list_t *move = *clean; + while (move) { + ebpf_network_viewer_port_list_t *next = move->next; + freez(move->value); + freez(move); + + move = next; + } + *clean = NULL; +} + +/** + * Clean IP structure + * + * Clean the allocated list. + * + * @param clean the list that will be cleaned + */ +static void clean_ip_structure(ebpf_network_viewer_ip_list_t **clean) +{ + ebpf_network_viewer_ip_list_t *move = *clean; + while (move) { + ebpf_network_viewer_ip_list_t *next = move->next; + freez(move->value); + freez(move); + + move = next; + } + *clean = NULL; +} + +/** + * Socket exit + * + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_socket_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; + return; + } + + ebpf_socket_exited = NETDATA_THREAD_EBPF_STOPPING; +} + +/** + * Socket cleanup + * + * Clean up allocated addresses. + * + * @param ptr thread data. + */ +void ebpf_socket_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (ebpf_socket_exited != NETDATA_THREAD_EBPF_STOPPED) + return; + + ebpf_cleanup_publish_syscall(socket_publish_aggregated); + freez(socket_hash_values); + + freez(bandwidth_vector); + + freez(socket_values); + clean_allocated_socket_plot(); + freez(inbound_vectors.plot); + freez(outbound_vectors.plot); + + clean_port_structure(&listen_ports); + + ebpf_modules[EBPF_MODULE_SOCKET_IDX].enabled = 0; + + clean_network_ports(network_viewer_opt.included_port); + clean_network_ports(network_viewer_opt.excluded_port); + clean_service_names(network_viewer_opt.names); + clean_hostnames(network_viewer_opt.included_hostnames); + clean_hostnames(network_viewer_opt.excluded_hostnames); + + pthread_mutex_destroy(&nv_mutex); + + freez(socket_threads.thread); + +#ifdef LIBBPF_MAJOR_VERSION + if (bpf_obj) + socket_bpf__destroy(bpf_obj); +#endif + socket_threads.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; +} + /***************************************************************** * * PROCESS DATA AND SEND TO NETDATA @@ -737,8 +956,6 @@ long long ebpf_socket_sum_values_for_pids(struct pid_on_target *root, size_t off void ebpf_socket_send_apps_data(ebpf_module_t *em, struct target *root) { UNUSED(em); - if (!socket_apps_created) - return; struct target *w; collected_number value; @@ -1052,7 +1269,7 @@ void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - socket_apps_created = 1; + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } /** @@ -1814,15 +2031,6 @@ static void read_socket_hash_table(int fd, int family, int network_connection) key = next_key; } - - test = bpf_map_lookup_elem(fd, &next_key, values); - if (test < 0) { - return; - } - - if (network_connection) { - hash_accumulator(values, &next_key, family, end); - } } /** @@ -1929,18 +2137,20 @@ static void read_listen_table() */ void *ebpf_socket_read_hash(void *ptr) { + netdata_thread_cleanup_push(ebpf_socket_cleanup, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; - read_thread_closed = 0; heartbeat_t hb; heartbeat_init(&hb); usec_t step = NETDATA_SOCKET_READ_SLEEP_MS * em->update_every; int fd_ipv4 = socket_maps[NETDATA_SOCKET_TABLE_IPV4].map_fd; int fd_ipv6 = socket_maps[NETDATA_SOCKET_TABLE_IPV6].map_fd; int network_connection = em->optional; - while (!close_ebpf_plugin) { + while (ebpf_socket_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); (void)dt; + if (ebpf_socket_exited == NETDATA_THREAD_EBPF_STOPPING) + break; pthread_mutex_lock(&nv_mutex); read_listen_table(); @@ -1950,7 +2160,9 @@ void *ebpf_socket_read_hash(void *ptr) pthread_mutex_unlock(&nv_mutex); } - read_thread_closed = 1; + ebpf_socket_exited = NETDATA_THREAD_EBPF_STOPPED; + + netdata_thread_cleanup_pop(1); return NULL; } @@ -2502,7 +2714,7 @@ static int ebpf_send_systemd_socket_charts() for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_v4_connection); - } else + } else if (unlikely(ect->systemd)) ret = 0; } write_end_chart(); @@ -2643,10 +2855,6 @@ static void ebpf_socket_send_cgroup_data(int update_every) * *****************************************************************/ -struct netdata_static_thread socket_threads = {"EBPF SOCKET READ", - NULL, NULL, 1, NULL, - NULL, ebpf_socket_read_hash }; - /** * Main loop for this collector. * @@ -2655,297 +2863,74 @@ struct netdata_static_thread socket_threads = {"EBPF SOCKET READ", */ static void socket_collector(usec_t step, ebpf_module_t *em) { - UNUSED(step); heartbeat_t hb; heartbeat_init(&hb); socket_threads.thread = mallocz(sizeof(netdata_thread_t)); + socket_threads.start_routine = ebpf_socket_read_hash; netdata_thread_create(socket_threads.thread, socket_threads.name, - NETDATA_THREAD_OPTION_JOINABLE, ebpf_socket_read_hash, em); + NETDATA_THREAD_OPTION_DEFAULT, ebpf_socket_read_hash, em); int cgroups = em->cgroup_charts; if (cgroups) ebpf_socket_update_cgroup_algorithm(); - int socket_apps_enabled = ebpf_modules[EBPF_MODULE_SOCKET_IDX].apps_charts; - int socket_global_enabled = ebpf_modules[EBPF_MODULE_SOCKET_IDX].global_charts; + int socket_global_enabled = em->global_charts; int network_connection = em->optional; int update_every = em->update_every; - int counter = update_every - 1; - while (!close_ebpf_plugin) { + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + netdata_apps_integration_flags_t socket_apps_enabled = em->apps_charts; pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + if (socket_global_enabled) + read_hash_global_tables(); - if (++counter == update_every) { - counter = 0; - if (socket_global_enabled) - read_hash_global_tables(); + if (socket_apps_enabled) + ebpf_socket_update_apps_data(); - if (socket_apps_enabled) - ebpf_socket_update_apps_data(); + if (cgroups) + ebpf_update_socket_cgroup(); - if (cgroups) - ebpf_update_socket_cgroup(); + calculate_nv_plot(); - calculate_nv_plot(); + pthread_mutex_lock(&lock); + if (socket_global_enabled) + ebpf_socket_send_data(em); - pthread_mutex_lock(&lock); - if (socket_global_enabled) - ebpf_socket_send_data(em); + if (socket_apps_enabled & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + ebpf_socket_send_apps_data(em, apps_groups_root_target); - if (socket_apps_enabled) - ebpf_socket_send_apps_data(em, apps_groups_root_target); + if (cgroups) + ebpf_socket_send_cgroup_data(update_every); - if (cgroups) - ebpf_socket_send_cgroup_data(update_every); + fflush(stdout); + if (network_connection) { + // We are calling fflush many times, because when we have a lot of dimensions + // we began to have not expected outputs and Netdata closed the plugin. + pthread_mutex_lock(&nv_mutex); + ebpf_socket_create_nv_charts(&inbound_vectors, update_every); fflush(stdout); + ebpf_socket_send_nv_data(&inbound_vectors); - if (network_connection) { - // We are calling fflush many times, because when we have a lot of dimensions - // we began to have not expected outputs and Netdata closed the plugin. - pthread_mutex_lock(&nv_mutex); - ebpf_socket_create_nv_charts(&inbound_vectors, update_every); - fflush(stdout); - ebpf_socket_send_nv_data(&inbound_vectors); - - ebpf_socket_create_nv_charts(&outbound_vectors, update_every); - fflush(stdout); - ebpf_socket_send_nv_data(&outbound_vectors); - wait_to_plot = 0; - pthread_mutex_unlock(&nv_mutex); + ebpf_socket_create_nv_charts(&outbound_vectors, update_every); + fflush(stdout); + ebpf_socket_send_nv_data(&outbound_vectors); + wait_to_plot = 0; + pthread_mutex_unlock(&nv_mutex); - } - pthread_mutex_unlock(&lock); } - + pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); } } /***************************************************************** * - * FUNCTIONS TO CLOSE THE THREAD - * - *****************************************************************/ - - -/** - * Clean internal socket plot - * - * Clean all structures allocated with strdupz. - * - * @param ptr the pointer with addresses to clean. - */ -static inline void clean_internal_socket_plot(netdata_socket_plot_t *ptr) -{ - freez(ptr->dimension_recv); - freez(ptr->dimension_sent); - freez(ptr->resolved_name); - freez(ptr->dimension_retransmit); -} - -/** - * Clean socket plot - * - * Clean the allocated data for inbound and outbound vectors. - */ -static void clean_allocated_socket_plot() -{ - uint32_t i; - uint32_t end = inbound_vectors.last; - netdata_socket_plot_t *plot = inbound_vectors.plot; - for (i = 0; i < end; i++) { - clean_internal_socket_plot(&plot[i]); - } - - clean_internal_socket_plot(&plot[inbound_vectors.last]); - - end = outbound_vectors.last; - plot = outbound_vectors.plot; - for (i = 0; i < end; i++) { - clean_internal_socket_plot(&plot[i]); - } - clean_internal_socket_plot(&plot[outbound_vectors.last]); -} - -/** - * Clean network ports allocated during initialization. - * - * @param ptr a pointer to the link list. - */ -static void clean_network_ports(ebpf_network_viewer_port_list_t *ptr) -{ - if (unlikely(!ptr)) - return; - - while (ptr) { - ebpf_network_viewer_port_list_t *next = ptr->next; - freez(ptr->value); - freez(ptr); - ptr = next; - } -} - -/** - * Clean service names - * - * Clean the allocated link list that stores names. - * - * @param names the link list. - */ -static void clean_service_names(ebpf_network_viewer_dim_name_t *names) -{ - if (unlikely(!names)) - return; - - while (names) { - ebpf_network_viewer_dim_name_t *next = names->next; - freez(names->name); - freez(names); - names = next; - } -} - -/** - * Clean hostnames - * - * @param hostnames the hostnames to clean - */ -static void clean_hostnames(ebpf_network_viewer_hostname_list_t *hostnames) -{ - if (unlikely(!hostnames)) - return; - - while (hostnames) { - ebpf_network_viewer_hostname_list_t *next = hostnames->next; - freez(hostnames->value); - simple_pattern_free(hostnames->value_pattern); - freez(hostnames); - hostnames = next; - } -} - -void clean_socket_apps_structures() { - struct pid_stat *pids = root_of_pids; - while (pids) { - freez(socket_bandwidth_curr[pids->pid]); - - pids = pids->next; - } -} - -/** - * Cleanup publish syscall - * - * @param nps list of structures to clean - */ -void ebpf_cleanup_publish_syscall(netdata_publish_syscall_t *nps) -{ - while (nps) { - freez(nps->algorithm); - nps = nps->next; - } -} - -/** - * Clean port Structure - * - * Clean the allocated list. - * - * @param clean the list that will be cleaned - */ -void clean_port_structure(ebpf_network_viewer_port_list_t **clean) -{ - ebpf_network_viewer_port_list_t *move = *clean; - while (move) { - ebpf_network_viewer_port_list_t *next = move->next; - freez(move->value); - freez(move); - - move = next; - } - *clean = NULL; -} - -/** - * Clean IP structure - * - * Clean the allocated list. - * - * @param clean the list that will be cleaned - */ -static void clean_ip_structure(ebpf_network_viewer_ip_list_t **clean) -{ - ebpf_network_viewer_ip_list_t *move = *clean; - while (move) { - ebpf_network_viewer_ip_list_t *next = move->next; - freez(move->value); - freez(move); - - move = next; - } - *clean = NULL; -} - -/** - * Clean up the main thread. - * - * @param ptr thread data. - */ -static void ebpf_socket_cleanup(void *ptr) -{ - ebpf_module_t *em = (ebpf_module_t *)ptr; - if (!em->enabled) - return; - - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 2*USEC_PER_MS; - while (!read_thread_closed) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); - } - - ebpf_cleanup_publish_syscall(socket_publish_aggregated); - freez(socket_hash_values); - - freez(bandwidth_vector); - - freez(socket_values); - clean_allocated_socket_plot(); - freez(inbound_vectors.plot); - freez(outbound_vectors.plot); - - clean_port_structure(&listen_ports); - - ebpf_modules[EBPF_MODULE_SOCKET_IDX].enabled = 0; - - clean_network_ports(network_viewer_opt.included_port); - clean_network_ports(network_viewer_opt.excluded_port); - clean_service_names(network_viewer_opt.names); - clean_hostnames(network_viewer_opt.included_hostnames); - clean_hostnames(network_viewer_opt.excluded_hostnames); - - pthread_mutex_destroy(&nv_mutex); - - freez(socket_threads.thread); - - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); - } - finalized_threads = 1; -} - -/***************************************************************** - * * FUNCTIONS TO START THREAD * *****************************************************************/ @@ -3891,8 +3876,8 @@ static int ebpf_socket_load_bpf(ebpf_module_t *em) int ret = 0; if (em->load == EBPF_LOAD_LEGACY) { - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { ret = -1; } } @@ -3924,7 +3909,7 @@ static int ebpf_socket_load_bpf(ebpf_module_t *em) */ void *ebpf_socket_thread(void *ptr) { - netdata_thread_cleanup_push(ebpf_socket_cleanup, ptr); + netdata_thread_cleanup_push(ebpf_socket_exit, ptr); memset(&inbound_vectors.tree, 0, sizeof(avl_tree_lock)); memset(&outbound_vectors.tree, 0, sizeof(avl_tree_lock)); @@ -3946,7 +3931,6 @@ void *ebpf_socket_thread(void *ptr) error("Cannot initialize local mutex"); goto endsocket; } - pthread_mutex_lock(&lock); ebpf_socket_allocate_global_vectors(em->apps_charts); initialize_inbound_outbound(); @@ -3973,11 +3957,11 @@ void *ebpf_socket_thread(void *ptr) socket_aggregated_data, socket_publish_aggregated, socket_dimension_names, socket_id_names, algorithms, NETDATA_MAX_SOCKET_VECTOR); + pthread_mutex_lock(&lock); ebpf_create_global_charts(em); ebpf_update_stats(&plugin_statistics, em); - finalized_threads = 0; pthread_mutex_unlock(&lock); socket_collector((usec_t)(em->update_every * USEC_PER_SEC), em); diff --git a/collectors/ebpf.plugin/ebpf_socket.h b/collectors/ebpf.plugin/ebpf_socket.h index 672001301..711225acf 100644 --- a/collectors/ebpf.plugin/ebpf_socket.h +++ b/collectors/ebpf.plugin/ebpf_socket.h @@ -279,7 +279,7 @@ typedef struct netdata_socket { uint32_t retransmit; // It is never used with UDP uint16_t protocol; uint16_t reserved; -} netdata_socket_t __attribute__((__aligned__(8))); +} netdata_socket_t; typedef struct netdata_plot_values { // Values used in the previous iteration @@ -307,7 +307,7 @@ typedef struct netdata_socket_idx { uint16_t sport; union netdata_ip_t daddr; uint16_t dport; -} netdata_socket_idx_t __attribute__((__aligned__(8))); +} netdata_socket_idx_t; // Next values were defined according getnameinfo(3) #define NETDATA_MAX_NETWORK_COMBINED_LENGTH 1018 @@ -362,7 +362,6 @@ extern void update_listen_table(uint16_t value, uint16_t proto, netdata_passive_ extern void parse_network_viewer_section(struct config *cfg); extern void fill_ip_list(ebpf_network_viewer_ip_list_t **out, ebpf_network_viewer_ip_list_t *in, char *table); extern void parse_service_name_section(struct config *cfg); -extern void clean_socket_apps_structures(); extern ebpf_socket_publish_apps_t **socket_bandwidth_curr; extern struct config socket_config; diff --git a/collectors/ebpf.plugin/ebpf_softirq.c b/collectors/ebpf.plugin/ebpf_softirq.c index f5e79279f..ed13f027f 100644 --- a/collectors/ebpf.plugin/ebpf_softirq.c +++ b/collectors/ebpf.plugin/ebpf_softirq.c @@ -54,51 +54,51 @@ static softirq_val_t softirq_vals[] = { // tmp store for soft IRQ values we get from a per-CPU eBPF map. static softirq_ebpf_val_t *softirq_ebpf_vals = NULL; -static struct bpf_link **probe_links = NULL; -static struct bpf_object *objects = NULL; - -static int read_thread_closed = 1; - static struct netdata_static_thread softirq_threads = {"SOFTIRQ KERNEL", NULL, NULL, 1, NULL, NULL, NULL }; +static enum ebpf_threads_status ebpf_softirq_exited = NETDATA_THREAD_EBPF_RUNNING; /** - * Clean up the main thread. + * Exit + * + * Cancel thread. * * @param ptr thread data. */ -static void softirq_cleanup(void *ptr) +static void softirq_exit(void *ptr) { - for (int i = 0; softirq_tracepoints[i].class != NULL; i++) { - ebpf_disable_tracepoint(&softirq_tracepoints[i]); - } - ebpf_module_t *em = (ebpf_module_t *)ptr; if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; return; } - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 1 * USEC_PER_MS; - while (!read_thread_closed) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); - } + ebpf_softirq_exited = NETDATA_THREAD_EBPF_STOPPING; +} + +/** + * Cleanup + * + * Clean up allocated memory. + * + * @param ptr thread data. + */ +static void softirq_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (ebpf_softirq_exited != NETDATA_THREAD_EBPF_STOPPED) + return; - freez(softirq_ebpf_vals); freez(softirq_threads.thread); - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); + for (int i = 0; softirq_tracepoints[i].class != NULL; i++) { + ebpf_disable_tracepoint(&softirq_tracepoints[i]); } + freez(softirq_ebpf_vals); + + softirq_threads.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; } /***************************************************************** @@ -131,22 +131,24 @@ static void softirq_read_latency_map() */ static void *softirq_reader(void *ptr) { - read_thread_closed = 0; - + netdata_thread_cleanup_push(softirq_exit, ptr); heartbeat_t hb; heartbeat_init(&hb); ebpf_module_t *em = (ebpf_module_t *)ptr; usec_t step = NETDATA_SOFTIRQ_SLEEP_MS * em->update_every; - while (!close_ebpf_plugin) { + while (ebpf_softirq_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); UNUSED(dt); + if (ebpf_softirq_exited == NETDATA_THREAD_EBPF_STOPPING) + break; softirq_read_latency_map(); } + ebpf_softirq_exited = NETDATA_THREAD_EBPF_STOPPED; - read_thread_closed = 1; + netdata_thread_cleanup_pop(1); return NULL; } @@ -200,7 +202,7 @@ static void softirq_collector(ebpf_module_t *em) netdata_thread_create( softirq_threads.thread, softirq_threads.name, - NETDATA_THREAD_OPTION_JOINABLE, + NETDATA_THREAD_OPTION_DEFAULT, softirq_reader, em ); @@ -213,24 +215,23 @@ static void softirq_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); // loop and read from published data until ebpf plugin is closed. - int update_every = em->update_every; - int counter = update_every - 1; - while (!close_ebpf_plugin) { - pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - - if (++counter == update_every) { - counter = 0; - pthread_mutex_lock(&lock); - - // write dims now for all hitherto discovered IRQs. - write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "softirq_latency"); - softirq_write_dims(); - write_end_chart(); - - pthread_mutex_unlock(&lock); - } - pthread_mutex_unlock(&collect_data_mutex); + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + //This will be cancelled by its parent + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + + pthread_mutex_lock(&lock); + + // write dims now for all hitherto discovered IRQs. + write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "softirq_latency"); + softirq_write_dims(); + write_end_chart(); + + pthread_mutex_unlock(&lock); } } @@ -260,8 +261,8 @@ void *ebpf_softirq_thread(void *ptr) goto endsoftirq; } - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { em->enabled = CONFIG_BOOLEAN_NO; goto endsoftirq; } diff --git a/collectors/ebpf.plugin/ebpf_swap.c b/collectors/ebpf.plugin/ebpf_swap.c index 7d8423358..71d0c402b 100644 --- a/collectors/ebpf.plugin/ebpf_swap.c +++ b/collectors/ebpf.plugin/ebpf_swap.c @@ -7,7 +7,6 @@ static char *swap_dimension_name[NETDATA_SWAP_END] = { "read", "write" }; static netdata_syscall_stat_t swap_aggregated_data[NETDATA_SWAP_END]; static netdata_publish_syscall_t swap_publish_aggregated[NETDATA_SWAP_END]; -static int read_thread_closed = 1; netdata_publish_swap_t *swap_vector = NULL; static netdata_idx_t swap_hash_values[NETDATA_SWAP_END]; @@ -35,11 +34,9 @@ static ebpf_local_maps_t swap_maps[] = {{.name = "tbl_pid_swap", .internal_input .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, {.name = NULL, .internal_input = 0, .user_input = 0}}; -static struct bpf_link **probe_links = NULL; -static struct bpf_object *objects = NULL; - struct netdata_static_thread swap_threads = {"SWAP KERNEL", NULL, NULL, 1, NULL, NULL, NULL}; +static enum ebpf_threads_status ebpf_swap_exited = NETDATA_THREAD_EBPF_RUNNING; netdata_ebpf_targets_t swap_targets[] = { {.name = "swap_readpage", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = "swap_writepage", .mode = EBPF_LOAD_TRAMPOLINE}, @@ -196,54 +193,48 @@ static inline int ebpf_swap_load_and_attach(struct swap_bpf *obj, ebpf_module_t *****************************************************************/ /** - * Clean swap structure + * Swap exit + * + * Cancel thread and exit. + * + * @param ptr thread data. */ -void clean_swap_pid_structures() { - struct pid_stat *pids = root_of_pids; - while (pids) { - freez(swap_pid[pids->pid]); - - pids = pids->next; +static void ebpf_swap_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; + return; } + + ebpf_swap_exited = NETDATA_THREAD_EBPF_STOPPING; } /** - * Clean up the main thread. + * Swap cleanup + * + * Clean up allocated memory. * * @param ptr thread data. */ static void ebpf_swap_cleanup(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (!em->enabled) + if (ebpf_swap_exited != NETDATA_THREAD_EBPF_STOPPED) return; - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 2 * USEC_PER_MS; - while (!read_thread_closed) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); - } - ebpf_cleanup_publish_syscall(swap_publish_aggregated); freez(swap_vector); freez(swap_values); + freez(swap_threads.thread); - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); - } #ifdef LIBBPF_MAJOR_VERSION - else if (bpf_obj) + if (bpf_obj) swap_bpf__destroy(bpf_obj); #endif + swap_threads.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; } /***************************************************************** @@ -404,21 +395,24 @@ static void read_global_table() */ void *ebpf_swap_read_hash(void *ptr) { - read_thread_closed = 0; - + netdata_thread_cleanup_push(ebpf_swap_cleanup, ptr); heartbeat_t hb; heartbeat_init(&hb); ebpf_module_t *em = (ebpf_module_t *)ptr; usec_t step = NETDATA_SWAP_SLEEP_MS * em->update_every; - while (!close_ebpf_plugin) { + while (ebpf_swap_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); (void)dt; + if (ebpf_swap_exited == NETDATA_THREAD_EBPF_STOPPING) + break; read_global_table(); } - read_thread_closed = 1; + ebpf_swap_exited = NETDATA_THREAD_EBPF_STOPPED; + + netdata_thread_cleanup_pop(1); return NULL; } @@ -523,7 +517,7 @@ static int ebpf_send_systemd_swap_charts() for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, (long long) ect->publish_systemd_swap.read); - } else + } else if (unlikely(ect->systemd)) ret = 0; } write_end_chart(); @@ -690,37 +684,38 @@ static void swap_collector(ebpf_module_t *em) swap_threads.thread = mallocz(sizeof(netdata_thread_t)); swap_threads.start_routine = ebpf_swap_read_hash; - netdata_thread_create(swap_threads.thread, swap_threads.name, NETDATA_THREAD_OPTION_JOINABLE, + netdata_thread_create(swap_threads.thread, swap_threads.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_swap_read_hash, em); - int apps = em->apps_charts; int cgroup = em->cgroup_charts; int update_every = em->update_every; - int counter = update_every - 1; - while (!close_ebpf_plugin) { - pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; - if (++counter == update_every) { - counter = 0; - if (apps) - read_apps_table(); + netdata_apps_integration_flags_t apps = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); + if (apps) + read_apps_table(); - if (cgroup) - ebpf_update_swap_cgroup(); + if (cgroup) + ebpf_update_swap_cgroup(); - pthread_mutex_lock(&lock); + pthread_mutex_lock(&lock); - swap_send_global(); + swap_send_global(); - if (apps) - ebpf_swap_send_apps_data(apps_groups_root_target); + if (apps & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + ebpf_swap_send_apps_data(apps_groups_root_target); - if (cgroup) - ebpf_swap_send_cgroup_data(update_every); + if (cgroup) + ebpf_swap_send_cgroup_data(update_every); - pthread_mutex_unlock(&lock); - } + pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); } } @@ -758,6 +753,7 @@ void ebpf_swap_create_apps_charts(struct ebpf_module *em, void *ptr) 20192, ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], root, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } /** @@ -817,8 +813,8 @@ static int ebpf_swap_load_bpf(ebpf_module_t *em) { int ret = 0; if (em->load == EBPF_LOAD_LEGACY) { - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { ret = -1; } } @@ -849,7 +845,7 @@ static int ebpf_swap_load_bpf(ebpf_module_t *em) */ void *ebpf_swap_thread(void *ptr) { - netdata_thread_cleanup_push(ebpf_swap_cleanup, ptr); + netdata_thread_cleanup_push(ebpf_swap_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = swap_maps; diff --git a/collectors/ebpf.plugin/ebpf_swap.h b/collectors/ebpf.plugin/ebpf_swap.h index 31bda16a2..80c2c8e94 100644 --- a/collectors/ebpf.plugin/ebpf_swap.h +++ b/collectors/ebpf.plugin/ebpf_swap.h @@ -46,7 +46,6 @@ extern netdata_publish_swap_t **swap_pid; extern void *ebpf_swap_thread(void *ptr); extern void ebpf_swap_create_apps_charts(struct ebpf_module *em, void *ptr); -extern void clean_swap_pid_structures(); extern struct config swap_config; extern netdata_ebpf_targets_t swap_targets[]; diff --git a/collectors/ebpf.plugin/ebpf_sync.c b/collectors/ebpf.plugin/ebpf_sync.c index b45ec86c1..0e56f541d 100644 --- a/collectors/ebpf.plugin/ebpf_sync.c +++ b/collectors/ebpf.plugin/ebpf_sync.c @@ -8,8 +8,6 @@ static char *sync_counter_dimension_name[NETDATA_SYNC_IDX_END] = { "sync", "sync static netdata_syscall_stat_t sync_counter_aggregated_data[NETDATA_SYNC_IDX_END]; static netdata_publish_syscall_t sync_counter_publish_aggregated[NETDATA_SYNC_IDX_END]; -static int read_thread_closed = 1; - static netdata_idx_t sync_hash_values[NETDATA_SYNC_IDX_END]; struct netdata_static_thread sync_threads = {"SYNC KERNEL", NULL, NULL, 1, @@ -43,44 +41,6 @@ struct config sync_config = { .first_section = NULL, .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, .rwlock = AVL_LOCK_INITIALIZER } }; -ebpf_sync_syscalls_t local_syscalls[] = { - {.syscall = NETDATA_SYSCALLS_SYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, -#ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL -#endif - }, - {.syscall = NETDATA_SYSCALLS_SYNCFS, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, -#ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL -#endif - }, - {.syscall = NETDATA_SYSCALLS_MSYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, -#ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL -#endif - }, - {.syscall = NETDATA_SYSCALLS_FSYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, -#ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL -#endif - }, - {.syscall = NETDATA_SYSCALLS_FDATASYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, -#ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL -#endif - }, - {.syscall = NETDATA_SYSCALLS_SYNC_FILE_RANGE, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, -#ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL -#endif - }, - {.syscall = NULL, .enabled = CONFIG_BOOLEAN_NO, .objects = NULL, .probe_links = NULL, -#ifdef LIBBPF_MAJOR_VERSION - .sync_obj = NULL -#endif - } -}; - netdata_ebpf_targets_t sync_targets[] = { {.name = NETDATA_SYSCALLS_SYNC, .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NETDATA_SYSCALLS_SYNCFS, .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NETDATA_SYSCALLS_MSYNC, .mode = EBPF_LOAD_TRAMPOLINE}, @@ -88,6 +48,7 @@ netdata_ebpf_targets_t sync_targets[] = { {.name = NETDATA_SYSCALLS_SYNC, .mode {.name = NETDATA_SYSCALLS_FDATASYNC, .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NETDATA_SYSCALLS_SYNC_FILE_RANGE, .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; +static enum ebpf_threads_status ebpf_sync_exited = NETDATA_THREAD_EBPF_RUNNING; #ifdef LIBBPF_MAJOR_VERSION @@ -218,6 +179,76 @@ static inline int ebpf_sync_load_and_attach(struct sync_bpf *obj, ebpf_module_t /***************************************************************** * + * CLEANUP THREAD + * + *****************************************************************/ + +/** + * Cleanup Objects + * + * Cleanup loaded objects when thread was initialized. + */ +void ebpf_sync_cleanup_objects() +{ + int i; + for (i = 0; local_syscalls[i].syscall; i++) { + ebpf_sync_syscalls_t *w = &local_syscalls[i]; + if (w->probe_links) { + struct bpf_program *prog; + size_t j = 0 ; + bpf_object__for_each_program(prog, w->objects) { + bpf_link__destroy(w->probe_links[j]); + j++; + } + freez(w->probe_links); + if (w->objects) + bpf_object__close(w->objects); + } +#ifdef LIBBPF_MAJOR_VERSION + else if (w->sync_obj) + sync_bpf__destroy(w->sync_obj); +#endif + } +} + +/** + * Exit + * + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_sync_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; + return; + } + + ebpf_sync_exited = NETDATA_THREAD_EBPF_STOPPING; +} + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_sync_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (ebpf_sync_exited != NETDATA_THREAD_EBPF_STOPPED) + return; + + ebpf_sync_cleanup_objects(); + freez(sync_threads.thread); + + sync_threads.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +/***************************************************************** + * * INITIALIZE THREAD * *****************************************************************/ @@ -334,21 +365,25 @@ static void read_global_table() */ void *ebpf_sync_read_hash(void *ptr) { + netdata_thread_cleanup_push(ebpf_sync_cleanup, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; - read_thread_closed = 0; heartbeat_t hb; heartbeat_init(&hb); usec_t step = NETDATA_EBPF_SYNC_SLEEP_MS * em->update_every; - while (!close_ebpf_plugin) { + while (ebpf_sync_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); (void)dt; + if (ebpf_sync_exited == NETDATA_THREAD_EBPF_STOPPING) + break; read_global_table(); } - read_thread_closed = 1; + ebpf_sync_exited = NETDATA_THREAD_EBPF_STOPPED; + + netdata_thread_cleanup_pop(1); return NULL; } @@ -414,82 +449,25 @@ static void sync_collector(ebpf_module_t *em) sync_threads.thread = mallocz(sizeof(netdata_thread_t)); sync_threads.start_routine = ebpf_sync_read_hash; - netdata_thread_create(sync_threads.thread, sync_threads.name, NETDATA_THREAD_OPTION_JOINABLE, + netdata_thread_create(sync_threads.thread, sync_threads.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_sync_read_hash, em); - int update_every = em->update_every; - int counter = update_every - 1; - while (!close_ebpf_plugin) { - pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); - - if (++counter == update_every) { - counter = 0; - pthread_mutex_lock(&lock); - - sync_send_data(); - - pthread_mutex_unlock(&lock); - } - pthread_mutex_unlock(&collect_data_mutex); - } -} + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; + pthread_mutex_lock(&lock); -/***************************************************************** - * - * CLEANUP THREAD - * - *****************************************************************/ + sync_send_data(); -/** - * Cleanup Objects - * - * Cleanup loaded objects when thread was initialized. - */ -void ebpf_sync_cleanup_objects() -{ - int i; - for (i = 0; local_syscalls[i].syscall; i++) { - ebpf_sync_syscalls_t *w = &local_syscalls[i]; - if (w->probe_links) { - struct bpf_program *prog; - size_t j = 0 ; - bpf_object__for_each_program(prog, w->objects) { - bpf_link__destroy(w->probe_links[j]); - j++; - } - bpf_object__close(w->objects); - } -#ifdef LIBBPF_MAJOR_VERSION - else if (w->sync_obj) - sync_bpf__destroy(w->sync_obj); -#endif + pthread_mutex_unlock(&lock); } } -/** - * Clean up the main thread. - * - * @param ptr thread data. - */ -static void ebpf_sync_cleanup(void *ptr) -{ - ebpf_module_t *em = (ebpf_module_t *)ptr; - if (!em->enabled) - return; - - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 2*USEC_PER_MS; - while (!read_thread_closed) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); - } - - ebpf_sync_cleanup_objects(); - freez(sync_threads.thread); -} /***************************************************************** * @@ -587,7 +565,7 @@ static void ebpf_sync_parse_syscalls() */ void *ebpf_sync_thread(void *ptr) { - netdata_thread_cleanup_push(ebpf_sync_cleanup, ptr); + netdata_thread_cleanup_push(ebpf_sync_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = sync_maps; diff --git a/collectors/ebpf.plugin/ebpf_sync.h b/collectors/ebpf.plugin/ebpf_sync.h index 2bc18c544..a52434c17 100644 --- a/collectors/ebpf.plugin/ebpf_sync.h +++ b/collectors/ebpf.plugin/ebpf_sync.h @@ -41,23 +41,6 @@ typedef enum sync_syscalls_index { NETDATA_SYNC_IDX_END } sync_syscalls_index_t; -typedef struct ebpf_sync_syscalls { - char *syscall; - int enabled; - uint32_t flags; - - // BTF structure - struct bpf_object *objects; - struct bpf_link **probe_links; - - // BPF structure -#ifdef LIBBPF_MAJOR_VERSION - struct sync_bpf *sync_obj; -#else - void *sync_obj; -#endif -} ebpf_sync_syscalls_t; - enum netdata_sync_charts { NETDATA_SYNC_CALL, diff --git a/collectors/ebpf.plugin/ebpf_vfs.c b/collectors/ebpf.plugin/ebpf_vfs.c index e14165fb7..6746cc624 100644 --- a/collectors/ebpf.plugin/ebpf_vfs.c +++ b/collectors/ebpf.plugin/ebpf_vfs.c @@ -34,33 +34,32 @@ struct config vfs_config = { .first_section = NULL, .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, .rwlock = AVL_LOCK_INITIALIZER } }; -static struct bpf_object *objects = NULL; -static struct bpf_link **probe_links = NULL; - struct netdata_static_thread vfs_threads = {"VFS KERNEL", NULL, NULL, 1, NULL, NULL, NULL}; - -static int read_thread_closed = 1; +static enum ebpf_threads_status ebpf_vfs_exited = NETDATA_THREAD_EBPF_RUNNING; /***************************************************************** * * FUNCTIONS TO CLOSE THE THREAD * *****************************************************************/ - /** - * Clean PID structures + * Exit * - * Clean the allocated structures. - */ -void clean_vfs_pid_structures() { - struct pid_stat *pids = root_of_pids; - while (pids) { - freez(vfs_pid[pids->pid]); - - pids = pids->next; + * Cancel thread and exit. + * + * @param ptr thread data. +**/ +static void ebpf_vfs_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + em->enabled = NETDATA_MAIN_THREAD_EXITED; + return; } + + ebpf_vfs_exited = NETDATA_THREAD_EBPF_STOPPING; } /** @@ -71,29 +70,15 @@ void clean_vfs_pid_structures() { static void ebpf_vfs_cleanup(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (!em->enabled) + if (ebpf_vfs_exited != NETDATA_THREAD_EBPF_STOPPED) return; - heartbeat_t hb; - heartbeat_init(&hb); - uint32_t tick = 50 * USEC_PER_MS; - while (!read_thread_closed) { - usec_t dt = heartbeat_next(&hb, tick); - UNUSED(dt); - } - freez(vfs_hash_values); freez(vfs_vector); + freez(vfs_threads.thread); - if (probe_links) { - struct bpf_program *prog; - size_t i = 0 ; - bpf_object__for_each_program(prog, objects) { - bpf_link__destroy(probe_links[i]); - i++; - } - bpf_object__close(objects); - } + vfs_threads.enabled = NETDATA_MAIN_THREAD_EXITED; + em->enabled = NETDATA_MAIN_THREAD_EXITED; } /***************************************************************** @@ -526,23 +511,26 @@ static void read_update_vfs_cgroup() */ void *ebpf_vfs_read_hash(void *ptr) { - read_thread_closed = 0; - + netdata_thread_cleanup_push(ebpf_vfs_cleanup, ptr); heartbeat_t hb; heartbeat_init(&hb); ebpf_module_t *em = (ebpf_module_t *)ptr; usec_t step = NETDATA_LATENCY_VFS_SLEEP_MS * em->update_every; - while (!close_ebpf_plugin) { + //This will be cancelled by its parent + while (ebpf_vfs_exited == NETDATA_THREAD_EBPF_RUNNING) { usec_t dt = heartbeat_next(&hb, step); (void)dt; + if (ebpf_vfs_exited == NETDATA_THREAD_EBPF_STOPPING) + break; read_global_table(); } - read_thread_closed = 1; + ebpf_vfs_exited = NETDATA_THREAD_EBPF_STOPPED; + netdata_thread_cleanup_pop(1); return NULL; } @@ -1000,7 +988,7 @@ static int ebpf_send_systemd_vfs_charts(ebpf_module_t *em) for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { if (unlikely(ect->systemd) && unlikely(ect->updated)) { write_chart_dimension(ect->name, ect->publish_systemd_vfs.unlink_call); - } else + } else if (unlikely(ect->systemd)) ret = 0; } write_end_chart(); @@ -1181,38 +1169,38 @@ static void vfs_collector(ebpf_module_t *em) vfs_threads.thread = mallocz(sizeof(netdata_thread_t)); vfs_threads.start_routine = ebpf_vfs_read_hash; - netdata_thread_create(vfs_threads.thread, vfs_threads.name, NETDATA_THREAD_OPTION_JOINABLE, + netdata_thread_create(vfs_threads.thread, vfs_threads.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_vfs_read_hash, em); - int apps = em->apps_charts; int cgroups = em->cgroup_charts; - int update_every = em->update_every; - int counter = update_every - 1; - while (!close_ebpf_plugin) { - pthread_mutex_lock(&collect_data_mutex); - pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = em->update_every * USEC_PER_SEC; + while (!ebpf_exit_plugin) { + (void)heartbeat_next(&hb, step); + if (ebpf_exit_plugin) + break; - if (++counter == update_every) { - counter = 0; - if (apps) - ebpf_vfs_read_apps(); + netdata_apps_integration_flags_t apps = em->apps_charts; + pthread_mutex_lock(&collect_data_mutex); + if (apps) + ebpf_vfs_read_apps(); - if (cgroups) - read_update_vfs_cgroup(); + if (cgroups) + read_update_vfs_cgroup(); - pthread_mutex_lock(&lock); + pthread_mutex_lock(&lock); - ebpf_vfs_send_data(em); - fflush(stdout); + ebpf_vfs_send_data(em); + fflush(stdout); - if (apps) - ebpf_vfs_send_apps_data(em, apps_groups_root_target); + if (apps & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + ebpf_vfs_send_apps_data(em, apps_groups_root_target); - if (cgroups) - ebpf_vfs_send_cgroup_data(em); + if (cgroups) + ebpf_vfs_send_cgroup_data(em); - pthread_mutex_unlock(&lock); - } + pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); } } @@ -1520,6 +1508,8 @@ void ebpf_vfs_create_apps_charts(struct ebpf_module *em, void *ptr) ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); } + + em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED; } /***************************************************************** @@ -1564,7 +1554,7 @@ static void ebpf_vfs_allocate_global_vectors(int apps) */ void *ebpf_vfs_thread(void *ptr) { - netdata_thread_cleanup_push(ebpf_vfs_cleanup, ptr); + netdata_thread_cleanup_push(ebpf_vfs_exit, ptr); ebpf_module_t *em = (ebpf_module_t *)ptr; em->maps = vfs_maps; @@ -1576,8 +1566,8 @@ void *ebpf_vfs_thread(void *ptr) if (!em->enabled) goto endvfs; - probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &objects); - if (!probe_links) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { em->enabled = CONFIG_BOOLEAN_NO; goto endvfs; } diff --git a/collectors/ebpf.plugin/ebpf_vfs.h b/collectors/ebpf.plugin/ebpf_vfs.h index 67542ad44..87a21e39c 100644 --- a/collectors/ebpf.plugin/ebpf_vfs.h +++ b/collectors/ebpf.plugin/ebpf_vfs.h @@ -156,7 +156,6 @@ extern netdata_publish_vfs_t **vfs_pid; extern void *ebpf_vfs_thread(void *ptr); extern void ebpf_vfs_create_apps_charts(struct ebpf_module *em, void *ptr); -extern void clean_vfs_pid_structures(); extern struct config vfs_config; diff --git a/collectors/freebsd.plugin/freebsd_sysctl.c b/collectors/freebsd.plugin/freebsd_sysctl.c index c43743c35..016a71e37 100644 --- a/collectors/freebsd.plugin/freebsd_sysctl.c +++ b/collectors/freebsd.plugin/freebsd_sysctl.c @@ -178,15 +178,15 @@ int do_vm_loadavg(int update_every, usec_t dt){ int do_vm_vmtotal(int update_every, usec_t dt) { (void)dt; - static int do_all_processes = -1, do_processes = -1, do_committed = -1; + static int do_all_processes = -1, do_processes = -1, do_mem_real = -1; if (unlikely(do_all_processes == -1)) { do_all_processes = config_get_boolean("plugin:freebsd:vm.vmtotal", "enable total processes", 1); do_processes = config_get_boolean("plugin:freebsd:vm.vmtotal", "processes running", 1); - do_committed = config_get_boolean("plugin:freebsd:vm.vmtotal", "committed memory", 1); + do_mem_real = config_get_boolean("plugin:freebsd:vm.vmtotal", "real memory", 1); } - if (likely(do_all_processes | do_processes | do_committed)) { + if (likely(do_all_processes | do_processes | do_mem_real)) { static int mib[2] = {0, 0}; struct vmtotal vmtotal_data; @@ -195,8 +195,8 @@ int do_vm_vmtotal(int update_every, usec_t dt) { error("DISABLED: system.active_processes chart"); do_processes = 0; error("DISABLED: system.processes chart"); - do_committed = 0; - error("DISABLED: mem.committed chart"); + do_mem_real = 0; + error("DISABLED: mem.real chart"); error("DISABLED: vm.vmtotal module"); return 1; } else { @@ -264,18 +264,18 @@ int do_vm_vmtotal(int update_every, usec_t dt) { // -------------------------------------------------------------------- - if (likely(do_committed)) { + if (likely(do_mem_real)) { static RRDSET *st = NULL; static RRDDIM *rd = NULL; if (unlikely(!st)) { st = rrdset_create_localhost( "mem", - "committed", + "real", NULL, "system", NULL, - "Committed (Allocated) Memory", + "Total Real Memory In Use", "MiB", "freebsd.plugin", "vm.vmtotal", @@ -285,7 +285,7 @@ int do_vm_vmtotal(int update_every, usec_t dt) { ); rrdset_flag_set(st, RRDSET_FLAG_DETAIL); - rd = rrddim_add(st, "Committed_AS", NULL, system_pagesize, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + rd = rrddim_add(st, "used", NULL, system_pagesize, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); } else rrdset_next(st); @@ -1005,9 +1005,9 @@ int do_system_ram(int update_every, usec_t dt) { // -------------------------------------------------------------------- - static RRDSET *st = NULL; + static RRDSET *st = NULL, *st_mem_available = NULL; static RRDDIM *rd_free = NULL, *rd_active = NULL, *rd_inactive = NULL, *rd_wired = NULL, - *rd_cache = NULL, *rd_buffers = NULL; + *rd_cache = NULL, *rd_buffers = NULL, *rd_avail = NULL; #if defined(NETDATA_COLLECT_LAUNDRY) static RRDDIM *rd_laundry = NULL; @@ -1044,9 +1044,9 @@ int do_system_ram(int update_every, usec_t dt) { rrddim_set_by_pointer(st, rd_free, vmmeter_data.v_free_count); rrddim_set_by_pointer(st, rd_active, vmmeter_data.v_active_count); rrddim_set_by_pointer(st, rd_inactive, vmmeter_data.v_inactive_count); - rrddim_set_by_pointer(st, rd_wired, vmmeter_data.v_wire_count * system_pagesize - zfs_arcstats_shrinkable_cache_size_bytes); + rrddim_set_by_pointer(st, rd_wired, (unsigned long long)vmmeter_data.v_wire_count * (unsigned long long)system_pagesize - zfs_arcstats_shrinkable_cache_size_bytes); #if __FreeBSD_version < 1200016 - rrddim_set_by_pointer(st, rd_cache, vmmeter_data.v_cache_count * system_pagesize + zfs_arcstats_shrinkable_cache_size_bytes); + rrddim_set_by_pointer(st, rd_cache, (unsigned long long)vmmeter_data.v_cache_count * (unsigned long long)system_pagesize + zfs_arcstats_shrinkable_cache_size_bytes); #else rrddim_set_by_pointer(st, rd_cache, zfs_arcstats_shrinkable_cache_size_bytes); #endif @@ -1055,6 +1055,34 @@ int do_system_ram(int update_every, usec_t dt) { #endif rrddim_set_by_pointer(st, rd_buffers, vfs_bufspace_count); rrdset_done(st); + + if (unlikely(!st_mem_available)) { + st_mem_available = rrdset_create_localhost( + "mem", + "available", + NULL, + "system", + NULL, + "Available RAM for applications", + "MiB", + "freebsd.plugin", + "system.ram", + NETDATA_CHART_PRIO_MEM_SYSTEM_AVAILABLE, + update_every, + RRDSET_TYPE_AREA + ); + + rd_avail = rrddim_add(st_mem_available, "MemAvailable", "avail", system_pagesize, MEGA_FACTOR, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(st_mem_available); + +#if __FreeBSD_version < 1200016 + rrddim_set_by_pointer(st_mem_available, rd_avail, vmmeter_data.v_inactive_count + vmmeter_data.v_free_count + vmmeter_data.v_cache_count + zfs_arcstats_shrinkable_cache_size_bytes / system_pagesize); +#else + rrddim_set_by_pointer(st_mem_available, rd_avail, vmmeter_data.v_inactive_count + vmmeter_data.v_free_count + zfs_arcstats_shrinkable_cache_size_bytes / system_pagesize); +#endif + + rrdset_done(st_mem_available); } return 0; diff --git a/collectors/perf.plugin/perf_plugin.c b/collectors/perf.plugin/perf_plugin.c index 80e042edc..b2f7d2e17 100644 --- a/collectors/perf.plugin/perf_plugin.c +++ b/collectors/perf.plugin/perf_plugin.c @@ -520,8 +520,8 @@ static void perf_send_metrics() { , "instructions_per_cycle" ); - calculated_number result = ((calculated_number)perf_events[EV_ID_INSTRUCTIONS].value / - (calculated_number)perf_events[EV_ID_CPU_CYCLES].value) * 100.0; + NETDATA_DOUBLE result = ((NETDATA_DOUBLE)perf_events[EV_ID_INSTRUCTIONS].value / + (NETDATA_DOUBLE)perf_events[EV_ID_CPU_CYCLES].value) * 100.0; printf("SET %s = %lld\n" , "ipc" , (collected_number) result diff --git a/collectors/plugins.d/README.md b/collectors/plugins.d/README.md index c84384215..0741636b9 100644 --- a/collectors/plugins.d/README.md +++ b/collectors/plugins.d/README.md @@ -116,15 +116,17 @@ For example, if your plugin wants to monitor `squid`, you can search for it on p Any program that can print a few values to its standard output can become a Netdata external plugin. -Netdata parses 7 lines starting with: - -- `CHART` - create or update a chart -- `DIMENSION` - add or update a dimension to the chart just created -- `BEGIN` - initialize data collection for a chart -- `SET` - set the value of a dimension for the initialized chart -- `END` - complete data collection for the initialized chart -- `FLUSH` - ignore the last collected values -- `DISABLE` - disable this plugin +Netdata parses 9 lines starting with: + +- `CHART` - create or update a chart +- `DIMENSION` - add or update a dimension to the chart just created +- `BEGIN` - initialize data collection for a chart +- `SET` - set the value of a dimension for the initialized chart +- `END` - complete data collection for the initialized chart +- `FLUSH` - ignore the last collected values +- `DISABLE` - disable this plugin +- `CLABEL` - add a label to a chart +- `CLABEL_COMMIT` - commit added labels to the chart. a single program can produce any number of charts with any number of dimensions each. @@ -151,6 +153,7 @@ available for the plugin to use. |`NETDATA_USER_CONFIG_DIR`|The directory where all Netdata-related user configuration should be stored. If the plugin requires custom user configuration, this is the place the user has saved it (normally under `/etc/netdata`).| |`NETDATA_STOCK_CONFIG_DIR`|The directory where all Netdata -related stock configuration should be stored. If the plugin is shipped with configuration files, this is the place they can be found (normally under `/usr/lib/netdata/conf.d`).| |`NETDATA_PLUGINS_DIR`|The directory where all Netdata plugins are stored.| +|`NETDATA_USER_PLUGINS_DIRS`|The list of directories where custom plugins are stored.| |`NETDATA_WEB_DIR`|The directory where the web files of Netdata are saved.| |`NETDATA_CACHE_DIR`|The directory where the cache files of Netdata are stored. Use this directory if the plugin requires a place to store data. A new directory should be created for the plugin for this purpose, inside this directory.| |`NETDATA_LOG_DIR`|The directory where the log files are stored. By default the `stderr` output of the plugin will be saved in the `error.log` file of Netdata.| @@ -319,6 +322,46 @@ The `value` is floating point (Netdata used `long double`). Variables are transferred to upstream Netdata servers (streaming and database replication). +#### CLABEL + +> CLABEL name value source + +`CLABEL` defines a label used to organize and identify a chart. + +Name and value accept characters according to the following table: + +| Character | Symbol | Label Name | Label Value | +|---------------------|:------:|:----------:|:-----------:| +| UTF-8 character | UTF-8 | _ | keep | +| Lower case letter | [a-z] | keep | keep | +| Upper case letter | [A-Z] | keep | [a-z] | +| Digit | [0-9] | keep | keep | +| Underscore | _ | keep | keep | +| Minus | - | keep | keep | +| Plus | + | _ | keep | +| Colon | : | _ | keep | +| Semicolon | ; | _ | : | +| Equal | = | _ | : | +| Period | . | keep | keep | +| Comma | , | . | . | +| Slash | / | keep | keep | +| Backslash | \ | / | / | +| At | @ | _ | keep | +| Space | ' ' | _ | keep | +| Opening parenthesis | ( | _ | keep | +| Closing parenthesis | ) | _ | keep | +| Anything else | | _ | _ | + +The `source` is an integer field that can have the following values: +- `1`: The value was set automatically. +- `2`: The value was set manually. +- `4`: This is a K8 label. +- `8`: This is a label defined using `netdata` agent cloud link. + +#### CLABEL_COMMIT + +`CLABEL_COMMIT` indicates that all labels were defined and the chart can be updated. + ## Data collection data collection is defined as a series of `BEGIN` -> `SET` -> `END` lines diff --git a/collectors/plugins.d/plugins_d.c b/collectors/plugins.d/plugins_d.c index 2916f1c13..377ec1401 100644 --- a/collectors/plugins.d/plugins_d.c +++ b/collectors/plugins.d/plugins_d.c @@ -39,7 +39,7 @@ inline int config_isspace(char c) inline int quoted_strings_splitter(char *str, char **words, int max_words, int (*custom_isspace)(char), char *recover_input, char **recover_location, int max_recover) { char *s = str, quote = 0; - int i = 0, j, rec = 0; + int i = 0, rec = 0; char *recover = recover_input; // skip all white space @@ -112,9 +112,7 @@ inline int quoted_strings_splitter(char *str, char **words, int max_words, int ( } // terminate the words - j = i; - while (likely(j < max_words)) - words[j++] = NULL; + memset(&words[i], 0, (max_words - i) * sizeof (char *)); return i; } diff --git a/collectors/plugins.d/pluginsd_parser.c b/collectors/plugins.d/pluginsd_parser.c index f014a29d0..88e07fab7 100644 --- a/collectors/plugins.d/pluginsd_parser.c +++ b/collectors/plugins.d/pluginsd_parser.c @@ -96,7 +96,7 @@ PARSER_RC pluginsd_disable_action(void *user) } -PARSER_RC pluginsd_variable_action(void *user, RRDHOST *host, RRDSET *st, char *name, int global, calculated_number value) +PARSER_RC pluginsd_variable_action(void *user, RRDHOST *host, RRDSET *st, char *name, int global, NETDATA_DOUBLE value) { UNUSED(user); @@ -146,35 +146,41 @@ PARSER_RC pluginsd_dimension_action(void *user, RRDSET *st, char *id, char *name if (likely(unhide_dimension)) { rrddim_flag_clear(rd, RRDDIM_FLAG_HIDDEN); if (rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) { - (void)sql_set_dimension_option(&rd->state->metric_uuid, NULL); + (void)sql_set_dimension_option(&rd->metric_uuid, NULL); rrddim_flag_clear(rd, RRDDIM_FLAG_META_HIDDEN); } } else { rrddim_flag_set(rd, RRDDIM_FLAG_HIDDEN); if (!rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) { - (void)sql_set_dimension_option(&rd->state->metric_uuid, "hidden"); + (void)sql_set_dimension_option(&rd->metric_uuid, "hidden"); rrddim_flag_set(rd, RRDDIM_FLAG_META_HIDDEN); } } return PARSER_RC_OK; } -PARSER_RC pluginsd_label_action(void *user, char *key, char *value, LABEL_SOURCE source) +PARSER_RC pluginsd_label_action(void *user, char *key, char *value, RRDLABEL_SRC source) { - ((PARSER_USER_OBJECT *) user)->new_labels = add_label_to_list(((PARSER_USER_OBJECT *) user)->new_labels, key, value, source); + if(unlikely(!((PARSER_USER_OBJECT *) user)->new_host_labels)) + ((PARSER_USER_OBJECT *) user)->new_host_labels = rrdlabels_create(); + + rrdlabels_add(((PARSER_USER_OBJECT *)user)->new_host_labels, key, value, source); return PARSER_RC_OK; } -PARSER_RC pluginsd_clabel_action(void *user, char *key, char *value, LABEL_SOURCE source) +PARSER_RC pluginsd_clabel_action(void *user, char *key, char *value, RRDLABEL_SRC source) { - ((PARSER_USER_OBJECT *) user)->chart_labels = add_label_to_list(((PARSER_USER_OBJECT *) user)->chart_labels, key, value, source); + if(unlikely(!((PARSER_USER_OBJECT *) user)->new_chart_labels)) + ((PARSER_USER_OBJECT *) user)->new_chart_labels = rrdlabels_create(); + + rrdlabels_add(((PARSER_USER_OBJECT *)user)->new_chart_labels, key, value, source); return PARSER_RC_OK; } -PARSER_RC pluginsd_clabel_commit_action(void *user, RRDHOST *host, struct label *new_labels) +PARSER_RC pluginsd_clabel_commit_action(void *user, RRDHOST *host, DICTIONARY *new_chart_labels) { RRDSET *st = ((PARSER_USER_OBJECT *)user)->st; if (unlikely(!st)) { @@ -182,21 +188,21 @@ PARSER_RC pluginsd_clabel_commit_action(void *user, RRDHOST *host, struct label return PARSER_RC_OK; } - rrdset_update_labels(st, new_labels); + rrdset_update_rrdlabels(st, new_chart_labels); + return PARSER_RC_OK; } -PARSER_RC pluginsd_overwrite_action(void *user, RRDHOST *host, struct label *new_labels) +PARSER_RC pluginsd_overwrite_action(void *user, RRDHOST *host, DICTIONARY *new_host_labels) { UNUSED(user); - if (!host->labels.head) { - host->labels.head = new_labels; - } else { - rrdhost_rdlock(host); - replace_label_list(&host->labels, new_labels); - rrdhost_unlock(host); - } + if(!host->host_labels) + host->host_labels = rrdlabels_create(); + + rrdlabels_migrate_to_these(host->host_labels, new_host_labels); + sql_store_host_labels(host); + return PARSER_RC_OK; } @@ -468,7 +474,7 @@ PARSER_RC pluginsd_variable(char **words, void *user, PLUGINSD_ACTION *plugins_ { char *name = words[1]; char *value = words[2]; - calculated_number v; + NETDATA_DOUBLE v; RRDSET *st = ((PARSER_USER_OBJECT *) user)->st; RRDHOST *host = ((PARSER_USER_OBJECT *) user)->host; @@ -508,7 +514,7 @@ PARSER_RC pluginsd_variable(char **words, void *user, PLUGINSD_ACTION *plugins_ } char *endptr = NULL; - v = (calculated_number)str2ld(value, &endptr); + v = (NETDATA_DOUBLE)str2ndd(value, &endptr); if (unlikely(endptr && *endptr)) { if (endptr == value) error( @@ -615,14 +621,15 @@ PARSER_RC pluginsd_clabel_commit(char **words, void *user, PLUGINSD_ACTION *plu RRDHOST *host = ((PARSER_USER_OBJECT *) user)->host; debug(D_PLUGINSD, "requested to commit chart labels"); - struct label *chart_labels = ((PARSER_USER_OBJECT *)user)->chart_labels; - ((PARSER_USER_OBJECT *)user)->chart_labels = NULL; + PARSER_RC rc = PARSER_RC_OK; - if (plugins_action->clabel_commit_action) { - return plugins_action->clabel_commit_action(user, host, chart_labels); - } + if (plugins_action->clabel_commit_action) + rc = plugins_action->clabel_commit_action(user, host, ((PARSER_USER_OBJECT *)user)->new_chart_labels); - return PARSER_RC_OK; + rrdlabels_destroy(((PARSER_USER_OBJECT *)user)->new_chart_labels); + ((PARSER_USER_OBJECT *)user)->new_chart_labels = NULL; + + return rc; } PARSER_RC pluginsd_overwrite(char **words, void *user, PLUGINSD_ACTION *plugins_action) @@ -630,16 +637,17 @@ PARSER_RC pluginsd_overwrite(char **words, void *user, PLUGINSD_ACTION *plugins UNUSED(words); RRDHOST *host = ((PARSER_USER_OBJECT *) user)->host; - debug(D_PLUGINSD, "requested a OVERWRITE a variable"); + debug(D_PLUGINSD, "requested to OVERWRITE host labels"); - struct label *new_labels = ((PARSER_USER_OBJECT *)user)->new_labels; - ((PARSER_USER_OBJECT *)user)->new_labels = NULL; + PARSER_RC rc = PARSER_RC_OK; - if (plugins_action->overwrite_action) { - return plugins_action->overwrite_action(user, host, new_labels); - } + if (plugins_action->overwrite_action) + rc = plugins_action->overwrite_action(user, host, ((PARSER_USER_OBJECT *)user)->new_host_labels); - return PARSER_RC_OK; + rrdlabels_destroy(((PARSER_USER_OBJECT *)user)->new_host_labels); + ((PARSER_USER_OBJECT *)user)->new_host_labels = NULL; + + return rc; } PARSER_RC pluginsd_guid(char **words, void *user, PLUGINSD_ACTION *plugins_action) diff --git a/collectors/plugins.d/pluginsd_parser.h b/collectors/plugins.d/pluginsd_parser.h index fb4a45b7a..924d48b7b 100644 --- a/collectors/plugins.d/pluginsd_parser.h +++ b/collectors/plugins.d/pluginsd_parser.h @@ -13,8 +13,8 @@ typedef struct parser_user_object { void *opaque; struct plugind *cd; int trust_durations; - struct label *new_labels; - struct label *chart_labels; + DICTIONARY *new_host_labels; + DICTIONARY *new_chart_labels; size_t count; int enabled; uint8_t st_exists; @@ -30,14 +30,13 @@ extern PARSER_RC pluginsd_chart_action(void *user, char *type, char *id, char *n char *title, char *units, char *plugin, char *module, int priority, int update_every, RRDSET_TYPE chart_type, char *options); extern PARSER_RC pluginsd_disable_action(void *user); -extern PARSER_RC pluginsd_variable_action(void *user, RRDHOST *host, RRDSET *st, char *name, int global, - calculated_number value); +extern PARSER_RC pluginsd_variable_action(void *user, RRDHOST *host, RRDSET *st, char *name, int global, NETDATA_DOUBLE value); extern PARSER_RC pluginsd_dimension_action(void *user, RRDSET *st, char *id, char *name, char *algorithm, long multiplier, long divisor, char *options, RRD_ALGORITHM algorithm_type); -extern PARSER_RC pluginsd_label_action(void *user, char *key, char *value, LABEL_SOURCE source); -extern PARSER_RC pluginsd_overwrite_action(void *user, RRDHOST *host, struct label *new_labels); -extern PARSER_RC pluginsd_clabel_commit_action(void *user, RRDHOST *host, struct label *new_labels); -extern PARSER_RC pluginsd_clabel_action(void *user, char *key, char *value, LABEL_SOURCE source); +extern PARSER_RC pluginsd_label_action(void *user, char *key, char *value, RRDLABEL_SRC source); +extern PARSER_RC pluginsd_overwrite_action(void *user, RRDHOST *host, DICTIONARY *new_host_labels); +extern PARSER_RC pluginsd_clabel_commit_action(void *user, RRDHOST *host, DICTIONARY *new_chart_labels); +extern PARSER_RC pluginsd_clabel_action(void *user, char *key, char *value, RRDLABEL_SRC source); #endif //NETDATA_PLUGINSD_PARSER_H diff --git a/collectors/proc.plugin/plugin_proc.h b/collectors/proc.plugin/plugin_proc.h index 1e3b82965..8cb5431e5 100644 --- a/collectors/proc.plugin/plugin_proc.h +++ b/collectors/proc.plugin/plugin_proc.h @@ -54,7 +54,12 @@ extern unsigned long long zfs_arcstats_shrinkable_cache_size_bytes; // netdev renames extern void netdev_rename_device_add( - const char *host_device, const char *container_device, const char *container_name, struct label *labels); + const char *host_device, + const char *container_device, + const char *container_name, + DICTIONARY *labels, + const char *ctx_prefix); + extern void netdev_rename_device_del(const char *host_device); #include "proc_self_mountinfo.h" diff --git a/collectors/proc.plugin/proc_diskstats.c b/collectors/proc.plugin/proc_diskstats.c index cfaf2134a..be4a481cd 100644 --- a/collectors/proc.plugin/proc_diskstats.c +++ b/collectors/proc.plugin/proc_diskstats.c @@ -201,7 +201,7 @@ static unsigned long long int bcache_read_number_with_units(const char *filename static int unknown_units_error = 10; char *end = NULL; - long double value = str2ld(buffer, &end); + NETDATA_DOUBLE value = str2ndd(buffer, &end); if(end && *end) { if(*end == 'k') return (unsigned long long int)(value * 1024.0); @@ -830,6 +830,30 @@ static struct disk *get_disk(unsigned long major, unsigned long minor, char *dis return d; } +static void add_labels_to_disk(struct disk *d, RRDSET *st) { + rrdlabels_add(st->state->chart_labels, "device", d->disk, RRDLABEL_SRC_AUTO); + rrdlabels_add(st->state->chart_labels, "mount_point", d->mount_point, RRDLABEL_SRC_AUTO); + + switch (d->type) { + default: + case DISK_TYPE_UNKNOWN: + rrdlabels_add(st->state->chart_labels, "device_type", "unknown", RRDLABEL_SRC_AUTO); + break; + + case DISK_TYPE_PHYSICAL: + rrdlabels_add(st->state->chart_labels, "device_type", "physical", RRDLABEL_SRC_AUTO); + break; + + case DISK_TYPE_PARTITION: + rrdlabels_add(st->state->chart_labels, "device_type", "partition", RRDLABEL_SRC_AUTO); + break; + + case DISK_TYPE_VIRTUAL: + rrdlabels_add(st->state->chart_labels, "device_type", "virtual", RRDLABEL_SRC_AUTO); + break; + } +} + int do_proc_diskstats(int update_every, usec_t dt) { static procfile *ff = NULL; @@ -1067,6 +1091,8 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_io_reads = rrddim_add(d->st_io, "reads", NULL, d->sector_size, 1024, RRD_ALGORITHM_INCREMENTAL); d->rd_io_writes = rrddim_add(d->st_io, "writes", NULL, d->sector_size * -1, 1024, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_io); } else rrdset_next(d->st_io); @@ -1094,8 +1120,9 @@ int do_proc_diskstats(int update_every, usec_t dt) { , RRDSET_TYPE_AREA ); - d->rd_io_discards = - rrddim_add(d->st_ext_io, "discards", NULL, d->sector_size, 1024, RRD_ALGORITHM_INCREMENTAL); + d->rd_io_discards = rrddim_add(d->st_ext_io, "discards", NULL, d->sector_size, 1024, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_ext_io); } else rrdset_next(d->st_ext_io); @@ -1130,6 +1157,8 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_ops_reads = rrddim_add(d->st_ops, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_ops_writes = rrddim_add(d->st_ops, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_ops); } else rrdset_next(d->st_ops); @@ -1162,7 +1191,10 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_ops_discards = rrddim_add(d->st_ext_ops, "discards", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); if (do_fl_stats) d->rd_ops_flushes = rrddim_add(d->st_ext_ops, "flushes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } else + + add_labels_to_disk(d, d->st_ext_ops); + } + else rrdset_next(d->st_ext_ops); last_discards = rrddim_set_by_pointer(d->st_ext_ops, d->rd_ops_discards, discards); @@ -1196,6 +1228,8 @@ int do_proc_diskstats(int update_every, usec_t dt) { rrdset_flag_set(d->st_qops, RRDSET_FLAG_DETAIL); d->rd_qops_operations = rrddim_add(d->st_qops, "operations", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_qops); } else rrdset_next(d->st_qops); @@ -1228,6 +1262,8 @@ int do_proc_diskstats(int update_every, usec_t dt) { rrdset_flag_set(d->st_backlog, RRDSET_FLAG_DETAIL); d->rd_backlog_backlog = rrddim_add(d->st_backlog, "backlog", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_backlog); } else rrdset_next(d->st_backlog); @@ -1259,8 +1295,9 @@ int do_proc_diskstats(int update_every, usec_t dt) { rrdset_flag_set(d->st_busy, RRDSET_FLAG_DETAIL); - d->rd_busy_busy = - rrddim_add(d->st_busy, "busy", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->rd_busy_busy = rrddim_add(d->st_busy, "busy", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_busy); } else rrdset_next(d->st_busy); @@ -1288,6 +1325,8 @@ int do_proc_diskstats(int update_every, usec_t dt) { rrdset_flag_set(d->st_util, RRDSET_FLAG_DETAIL); d->rd_util_utilization = rrddim_add(d->st_util, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_util); } else rrdset_next(d->st_util); @@ -1326,6 +1365,8 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_mops_reads = rrddim_add(d->st_mops, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_mops_writes = rrddim_add(d->st_mops, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_mops); } else rrdset_next(d->st_mops); @@ -1358,7 +1399,10 @@ int do_proc_diskstats(int update_every, usec_t dt) { rrdset_flag_set(d->st_ext_mops, RRDSET_FLAG_DETAIL); d->rd_mops_discards = rrddim_add(d->st_ext_mops, "discards", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } else + + add_labels_to_disk(d, d->st_ext_mops); + } + else rrdset_next(d->st_ext_mops); rrddim_set_by_pointer(d->st_ext_mops, d->rd_mops_discards, mdiscards); @@ -1391,6 +1435,8 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_iotime_reads = rrddim_add(d->st_iotime, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_iotime_writes = rrddim_add(d->st_iotime, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_iotime); } else rrdset_next(d->st_iotime); @@ -1422,9 +1468,11 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_iotime_discards = rrddim_add(d->st_ext_iotime, "discards", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); if (do_fl_stats) - d->rd_iotime_flushes = - rrddim_add(d->st_ext_iotime, "flushes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } else + d->rd_iotime_flushes = rrddim_add(d->st_ext_iotime, "flushes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_ext_iotime); + } + else rrdset_next(d->st_ext_iotime); last_discardms = rrddim_set_by_pointer(d->st_ext_iotime, d->rd_iotime_discards, discardms); @@ -1465,6 +1513,8 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_await_reads = rrddim_add(d->st_await, "reads", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); d->rd_await_writes = rrddim_add(d->st_await, "writes", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_await); } else rrdset_next(d->st_await); @@ -1494,18 +1544,22 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_await_discards = rrddim_add(d->st_ext_await, "discards", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); if (do_fl_stats) - d->rd_await_flushes = - rrddim_add(d->st_ext_await, "flushes", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } else + d->rd_await_flushes = rrddim_add(d->st_ext_await, "flushes", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_ext_await); + } + else rrdset_next(d->st_ext_await); rrddim_set_by_pointer( d->st_ext_await, d->rd_await_discards, (discards - last_discards) ? (discardms - last_discardms) / (discards - last_discards) : 0); + if (do_fl_stats) rrddim_set_by_pointer( d->st_ext_await, d->rd_await_flushes, (flushes - last_flushes) ? (flushms - last_flushms) / (flushes - last_flushes) : 0); + rrdset_done(d->st_ext_await); } @@ -1534,6 +1588,8 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_avgsz_reads = rrddim_add(d->st_avgsz, "reads", NULL, d->sector_size, 1024, RRD_ALGORITHM_ABSOLUTE); d->rd_avgsz_writes = rrddim_add(d->st_avgsz, "writes", NULL, d->sector_size * -1, 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_avgsz); } else rrdset_next(d->st_avgsz); @@ -1561,9 +1617,11 @@ int do_proc_diskstats(int update_every, usec_t dt) { rrdset_flag_set(d->st_ext_avgsz, RRDSET_FLAG_DETAIL); - d->rd_avgsz_discards = - rrddim_add(d->st_ext_avgsz, "discards", NULL, d->sector_size, 1024, RRD_ALGORITHM_ABSOLUTE); - } else + d->rd_avgsz_discards = rrddim_add(d->st_ext_avgsz, "discards", NULL, d->sector_size, 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_ext_avgsz); + } + else rrdset_next(d->st_ext_avgsz); rrddim_set_by_pointer( @@ -1599,8 +1657,11 @@ int do_proc_diskstats(int update_every, usec_t dt) { rrdset_flag_set(d->st_svctm, RRDSET_FLAG_DETAIL); d->rd_svctm_svctm = rrddim_add(d->st_svctm, "svctm", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_svctm); } - else rrdset_next(d->st_svctm); + else + rrdset_next(d->st_svctm); rrddim_set_by_pointer(d->st_svctm, d->rd_svctm_svctm, ((reads - last_reads) + (writes - last_writes)) ? (busy_ms - last_busy_ms) / ((reads - last_reads) + (writes - last_writes)) : 0); rrdset_done(d->st_svctm); @@ -1705,8 +1766,11 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_bcache_hit_ratio_1hour = rrddim_add(d->st_bcache_hit_ratio, "1hour", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); d->rd_bcache_hit_ratio_1day = rrddim_add(d->st_bcache_hit_ratio, "1day", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); d->rd_bcache_hit_ratio_total = rrddim_add(d->st_bcache_hit_ratio, "ever", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_bcache_hit_ratio); } - else rrdset_next(d->st_bcache_hit_ratio); + else + rrdset_next(d->st_bcache_hit_ratio); rrddim_set_by_pointer(d->st_bcache_hit_ratio, d->rd_bcache_hit_ratio_5min, stats_five_minute_cache_hit_ratio); rrddim_set_by_pointer(d->st_bcache_hit_ratio, d->rd_bcache_hit_ratio_1hour, stats_hour_cache_hit_ratio); @@ -1735,8 +1799,11 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_bcache_rate_congested = rrddim_add(d->st_bcache_rates, "congested", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); d->rd_bcache_rate_writeback = rrddim_add(d->st_bcache_rates, "writeback", NULL, -1, 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_bcache_rates); } - else rrdset_next(d->st_bcache_rates); + else + rrdset_next(d->st_bcache_rates); rrddim_set_by_pointer(d->st_bcache_rates, d->rd_bcache_rate_writeback, writeback_rate); rrddim_set_by_pointer(d->st_bcache_rates, d->rd_bcache_rate_congested, cache_congested); @@ -1761,8 +1828,11 @@ int do_proc_diskstats(int update_every, usec_t dt) { ); d->rd_bcache_dirty_size = rrddim_add(d->st_bcache_size, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_bcache_size); } - else rrdset_next(d->st_bcache_size); + else + rrdset_next(d->st_bcache_size); rrddim_set_by_pointer(d->st_bcache_size, d->rd_bcache_dirty_size, dirty_data); rrdset_done(d->st_bcache_size); @@ -1786,8 +1856,11 @@ int do_proc_diskstats(int update_every, usec_t dt) { ); d->rd_bcache_available_percent = rrddim_add(d->st_bcache_usage, "avail", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_disk(d, d->st_bcache_usage); } - else rrdset_next(d->st_bcache_usage); + else + rrdset_next(d->st_bcache_usage); rrddim_set_by_pointer(d->st_bcache_usage, d->rd_bcache_available_percent, cache_available_percent); rrdset_done(d->st_bcache_usage); @@ -1813,8 +1886,11 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_bcache_cache_read_races = rrddim_add(d->st_bcache_cache_read_races, "races", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_bcache_cache_io_errors = rrddim_add(d->st_bcache_cache_read_races, "errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_bcache_cache_read_races); } - else rrdset_next(d->st_bcache_cache_read_races); + else + rrdset_next(d->st_bcache_cache_read_races); rrddim_set_by_pointer(d->st_bcache_cache_read_races, d->rd_bcache_cache_read_races, cache_read_races); rrddim_set_by_pointer(d->st_bcache_cache_read_races, d->rd_bcache_cache_io_errors, cache_io_errors); @@ -1849,8 +1925,11 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_bcache_misses = rrddim_add(d->st_bcache, "misses", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_bcache_miss_collisions = rrddim_add(d->st_bcache, "collisions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_bcache_readaheads = rrddim_add(d->st_bcache, "readaheads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_bcache); } - else rrdset_next(d->st_bcache); + else + rrdset_next(d->st_bcache); rrddim_set_by_pointer(d->st_bcache, d->rd_bcache_hits, stats_total_cache_hits); rrddim_set_by_pointer(d->st_bcache, d->rd_bcache_misses, stats_total_cache_misses); @@ -1884,6 +1963,8 @@ int do_proc_diskstats(int update_every, usec_t dt) { d->rd_bcache_bypass_hits = rrddim_add(d->st_bcache_bypass, "hits", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_bcache_bypass_misses = rrddim_add(d->st_bcache_bypass, "misses", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_disk(d, d->st_bcache_bypass); } else rrdset_next(d->st_bcache_bypass); diff --git a/collectors/proc.plugin/proc_interrupts.c b/collectors/proc.plugin/proc_interrupts.c index 78883c475..46290554b 100644 --- a/collectors/proc.plugin/proc_interrupts.c +++ b/collectors/proc.plugin/proc_interrupts.c @@ -225,6 +225,10 @@ int do_proc_interrupts(int update_every, usec_t dt) { , update_every , RRDSET_TYPE_STACKED ); + + char core[50+1]; + snprintfz(core, 50, "cpu%d", c); + rrdlabels_add(core_st[c]->state->chart_labels, "cpu", core, RRDLABEL_SRC_AUTO); } else rrdset_next(core_st[c]); diff --git a/collectors/proc.plugin/proc_mdstat.c b/collectors/proc.plugin/proc_mdstat.c index bdc298d6b..c8015827e 100644 --- a/collectors/proc.plugin/proc_mdstat.c +++ b/collectors/proc.plugin/proc_mdstat.c @@ -77,6 +77,11 @@ static inline void make_chart_obsolete(char *name, const char *id_modifier) } } +static void add_labels_to_mdstat(struct raid *raid, RRDSET *st) { + rrdlabels_add(st->state->chart_labels, "device", raid->name, RRDLABEL_SRC_AUTO); + rrdlabels_add(st->state->chart_labels, "raid_level", raid->level, RRDLABEL_SRC_AUTO); +} + int do_proc_mdstat(int update_every, usec_t dt) { (void)dt; @@ -263,7 +268,7 @@ int do_proc_mdstat(int update_every, usec_t dt) word = procfile_lineword(ff, l, 3); remove_trailing_chars(word, '%'); - unsigned long long percentage = (unsigned long long)(str2ld(word, NULL) * 100); + unsigned long long percentage = (unsigned long long)(str2ndd(word, NULL) * 100); // possible operations: check, resync, recovery, reshape // 4-th character is unique for each operation so it is checked switch (procfile_lineword(ff, l, 1)[3]) { @@ -287,7 +292,7 @@ int do_proc_mdstat(int update_every, usec_t dt) word += 7; // skip leading "finish=" if (likely(s > word)) - raid->finish_in = (unsigned long long)(str2ld(word, NULL) * 60); + raid->finish_in = (unsigned long long)(str2ndd(word, NULL) * 60); word = procfile_lineword(ff, l, 6); s = remove_trailing_chars(word, 'K'); // remove trailing "K/sec" @@ -407,7 +412,8 @@ int do_proc_mdstat(int update_every, usec_t dt) RRDSET_TYPE_LINE); rrdset_isnot_obsolete(st_mdstat_health); - } else + } + else rrdset_next(st_mdstat_health); if (!redundant_num) { @@ -458,7 +464,10 @@ int do_proc_mdstat(int update_every, usec_t dt) RRDSET_TYPE_STACKED); rrdset_isnot_obsolete(raid->st_disks); - } else + + add_labels_to_mdstat(raid, raid->st_disks); + } + else rrdset_next(raid->st_disks); if (unlikely(!raid->rd_inuse && !(raid->rd_inuse = rrddim_find_active(raid->st_disks, "inuse")))) @@ -495,7 +504,10 @@ int do_proc_mdstat(int update_every, usec_t dt) RRDSET_TYPE_LINE); rrdset_isnot_obsolete(raid->st_mismatch_cnt); - } else + + add_labels_to_mdstat(raid, raid->st_mismatch_cnt); + } + else rrdset_next(raid->st_mismatch_cnt); if (unlikely(!raid->rd_mismatch_cnt && !(raid->rd_mismatch_cnt = rrddim_find_active(raid->st_mismatch_cnt, "count")))) @@ -529,7 +541,10 @@ int do_proc_mdstat(int update_every, usec_t dt) RRDSET_TYPE_LINE); rrdset_isnot_obsolete(raid->st_operation); - } else + + add_labels_to_mdstat(raid, raid->st_operation); + } + else rrdset_next(raid->st_operation); if(unlikely(!raid->rd_check && !(raid->rd_check = rrddim_find_active(raid->st_operation, "check")))) @@ -569,7 +584,10 @@ int do_proc_mdstat(int update_every, usec_t dt) update_every, RRDSET_TYPE_LINE); rrdset_isnot_obsolete(raid->st_finish); - } else + + add_labels_to_mdstat(raid, raid->st_finish); + } + else rrdset_next(raid->st_finish); if(unlikely(!raid->rd_finish_in && !(raid->rd_finish_in = rrddim_find_active(raid->st_finish, "finish_in")))) @@ -601,7 +619,10 @@ int do_proc_mdstat(int update_every, usec_t dt) RRDSET_TYPE_LINE); rrdset_isnot_obsolete(raid->st_speed); - } else + + add_labels_to_mdstat(raid, raid->st_speed); + } + else rrdset_next(raid->st_speed); if (unlikely(!raid->rd_speed && !(raid->rd_speed = rrddim_find_active(raid->st_speed, "speed")))) @@ -635,7 +656,10 @@ int do_proc_mdstat(int update_every, usec_t dt) RRDSET_TYPE_LINE); rrdset_isnot_obsolete(raid->st_nonredundant); - } else + + add_labels_to_mdstat(raid, raid->st_nonredundant); + } + else rrdset_next(raid->st_nonredundant); if (unlikely(!raid->rd_nonredundant && !(raid->rd_nonredundant = rrddim_find_active(raid->st_nonredundant, "available")))) diff --git a/collectors/proc.plugin/proc_net_dev.c b/collectors/proc.plugin/proc_net_dev.c index 74076ff76..79572f442 100644 --- a/collectors/proc.plugin/proc_net_dev.c +++ b/collectors/proc.plugin/proc_net_dev.c @@ -7,19 +7,39 @@ #define STATE_LENGTH_MAX 32 -// As defined in https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-class-net -const char *operstate_names[] = { "unknown", "notpresent", "down", "lowerlayerdown", "testing", "dormant", "up" }; +enum { + NETDEV_DUPLEX_UNKNOWN, + NETDEV_DUPLEX_HALF, + NETDEV_DUPLEX_FULL +}; + +enum { + NETDEV_OPERSTATE_UNKNOWN, + NETDEV_OPERSTATE_NOTPRESENT, + NETDEV_OPERSTATE_DOWN, + NETDEV_OPERSTATE_LOWERLAYERDOWN, + NETDEV_OPERSTATE_TESTING, + NETDEV_OPERSTATE_DORMANT, + NETDEV_OPERSTATE_UP +}; static inline int get_operstate(char *operstate) { - int i; - - for (i = 0; i < (int) (sizeof(operstate_names) / sizeof(char *)); i++) { - if (!strcmp(operstate, operstate_names[i])) { - return i; - } - } - return 0; + // As defined in https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-class-net + if (!strcmp(operstate, "up")) + return NETDEV_OPERSTATE_UP; + if (!strcmp(operstate, "down")) + return NETDEV_OPERSTATE_DOWN; + if (!strcmp(operstate, "notpresent")) + return NETDEV_OPERSTATE_NOTPRESENT; + if (!strcmp(operstate, "lowerlayerdown")) + return NETDEV_OPERSTATE_LOWERLAYERDOWN; + if (!strcmp(operstate, "testing")) + return NETDEV_OPERSTATE_TESTING; + if (!strcmp(operstate, "dormant")) + return NETDEV_OPERSTATE_DORMANT; + + return NETDEV_OPERSTATE_UNKNOWN; } // ---------------------------------------------------------------------------- @@ -90,7 +110,7 @@ static struct netdev { const char *chart_family; - struct label *chart_labels; + DICTIONARY *chart_labels; int flipped; unsigned long priority; @@ -153,9 +173,18 @@ static struct netdev { RRDDIM *rd_tcompressed; RRDDIM *rd_speed; - RRDDIM *rd_duplex; - RRDDIM *rd_operstate; - RRDDIM *rd_carrier; + RRDDIM *rd_duplex_full; + RRDDIM *rd_duplex_half; + RRDDIM *rd_duplex_unknown; + RRDDIM *rd_operstate_unknown; + RRDDIM *rd_operstate_notpresent; + RRDDIM *rd_operstate_down; + RRDDIM *rd_operstate_lowerlayerdown; + RRDDIM *rd_operstate_testing; + RRDDIM *rd_operstate_dormant; + RRDDIM *rd_operstate_up; + RRDDIM *rd_carrier_up; + RRDDIM *rd_carrier_down; RRDDIM *rd_mtu; char *filename_speed; @@ -219,11 +248,21 @@ static void netdev_charts_release(struct netdev *d) { d->rd_tcompressed = NULL; d->rd_speed = NULL; - d->rd_duplex = NULL; - d->rd_operstate = NULL; - d->rd_carrier = NULL; + d->rd_duplex_full = NULL; + d->rd_duplex_half = NULL; + d->rd_duplex_unknown = NULL; + d->rd_carrier_up = NULL; + d->rd_carrier_down = NULL; d->rd_mtu = NULL; + d->rd_operstate_unknown = NULL; + d->rd_operstate_notpresent = NULL; + d->rd_operstate_down = NULL; + d->rd_operstate_lowerlayerdown = NULL; + d->rd_operstate_testing = NULL; + d->rd_operstate_dormant = NULL; + d->rd_operstate_up = NULL; + d->chart_var_speed = NULL; } @@ -273,7 +312,7 @@ static void netdev_free_chart_strings(struct netdev *d) { static void netdev_free(struct netdev *d) { netdev_charts_release(d); netdev_free_chart_strings(d); - free_label_list(d->chart_labels); + rrdlabels_destroy(d->chart_labels); freez((void *)d->name); freez((void *)d->filename_speed); @@ -294,8 +333,9 @@ static struct netdev_rename { const char *container_device; const char *container_name; + const char *ctx_prefix; - struct label *chart_labels; + DICTIONARY *chart_labels; int processed; @@ -317,7 +357,11 @@ static struct netdev_rename *netdev_rename_find(const char *host_device, uint32_ // other threads can call this function to register a rename to a netdev void netdev_rename_device_add( - const char *host_device, const char *container_device, const char *container_name, struct label *labels) + const char *host_device, + const char *container_device, + const char *container_name, + DICTIONARY *labels, + const char *ctx_prefix) { netdata_mutex_lock(&netdev_rename_mutex); @@ -328,7 +372,9 @@ void netdev_rename_device_add( r->host_device = strdupz(host_device); r->container_device = strdupz(container_device); r->container_name = strdupz(container_name); - update_label_list(&r->chart_labels, labels); + r->ctx_prefix = strdupz(ctx_prefix); + r->chart_labels = rrdlabels_create(); + rrdlabels_migrate_to_these(r->chart_labels, labels); r->hash = hash; r->next = netdev_rename_root; r->processed = 0; @@ -344,7 +390,7 @@ void netdev_rename_device_add( r->container_device = strdupz(container_device); r->container_name = strdupz(container_name); - update_label_list(&r->chart_labels, labels); + rrdlabels_migrate_to_these(r->chart_labels, labels); r->processed = 0; netdev_pending_renames++; @@ -377,7 +423,8 @@ void netdev_rename_device_del(const char *host_device) { freez((void *) r->host_device); freez((void *) r->container_name); freez((void *) r->container_device); - free_label_list(r->chart_labels); + freez((void *) r->ctx_prefix); + rrdlabels_destroy(r->chart_labels); freez((void *) r); break; } @@ -433,23 +480,35 @@ static inline void netdev_rename_cgroup(struct netdev *d, struct netdev_rename * snprintfz(buffer, RRD_ID_LENGTH_MAX, "net_mtu_%s", r->container_device); d->chart_id_net_mtu = strdupz(buffer); - d->chart_ctx_net_bytes = strdupz("cgroup.net_net"); - d->chart_ctx_net_compressed = strdupz("cgroup.net_compressed"); - d->chart_ctx_net_drops = strdupz("cgroup.net_drops"); - d->chart_ctx_net_errors = strdupz("cgroup.net_errors"); - d->chart_ctx_net_events = strdupz("cgroup.net_events"); - d->chart_ctx_net_fifo = strdupz("cgroup.net_fifo"); - d->chart_ctx_net_packets = strdupz("cgroup.net_packets"); - d->chart_ctx_net_speed = strdupz("cgroup.net_speed"); - d->chart_ctx_net_duplex = strdupz("cgroup.net_duplex"); - d->chart_ctx_net_operstate = strdupz("cgroup.net_operstate"); - d->chart_ctx_net_carrier = strdupz("cgroup.net_carrier"); - d->chart_ctx_net_mtu = strdupz("cgroup.net_mtu"); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_net", r->ctx_prefix); + d->chart_ctx_net_bytes = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_compressed", r->ctx_prefix); + d->chart_ctx_net_compressed = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_drops", r->ctx_prefix); + d->chart_ctx_net_drops = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_errors", r->ctx_prefix); + d->chart_ctx_net_errors = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_events", r->ctx_prefix); + d->chart_ctx_net_events = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_fifo", r->ctx_prefix); + d->chart_ctx_net_fifo = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_packets", r->ctx_prefix); + d->chart_ctx_net_packets = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_speed", r->ctx_prefix); + d->chart_ctx_net_speed = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_duplex", r->ctx_prefix); + d->chart_ctx_net_duplex = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_operstate", r->ctx_prefix); + d->chart_ctx_net_operstate = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_carrier", r->ctx_prefix); + d->chart_ctx_net_carrier = strdupz(buffer); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%scgroup.net_mtu", r->ctx_prefix); + d->chart_ctx_net_mtu = strdupz(buffer); snprintfz(buffer, RRD_ID_LENGTH_MAX, "net %s", r->container_device); d->chart_family = strdupz(buffer); - update_label_list(&d->chart_labels, r->chart_labels); + rrdlabels_copy(d->chart_labels, r->chart_labels); d->priority = NETDATA_CHART_PRIO_CGROUP_NET_IFACE; d->flipped = 1; @@ -542,6 +601,7 @@ static struct netdev *get_netdev(const char *name) { d->name = strdupz(name); d->hash = simple_hash(d->name); d->len = strlen(d->name); + d->chart_labels = rrdlabels_create(); d->chart_type_net_bytes = strdupz("net"); d->chart_type_net_compressed = strdupz("net_compressed"); @@ -702,11 +762,15 @@ int do_proc_net_dev(int update_every, usec_t dt) { char buffer[FILENAME_MAX + 1]; snprintfz(buffer, FILENAME_MAX, path_to_sys_devices_virtual_net, d->name); - if(likely(access(buffer, R_OK) == 0)) { + if (likely(access(buffer, R_OK) == 0)) { d->virtual = 1; + rrdlabels_add(d->chart_labels, "interface_type", "virtual", RRDLABEL_SRC_AUTO|RRDLABEL_FLAG_PERMANENT); } - else + else { d->virtual = 0; + rrdlabels_add(d->chart_labels, "interface_type", "real", RRDLABEL_SRC_AUTO|RRDLABEL_FLAG_PERMANENT); + } + rrdlabels_add(d->chart_labels, "device", name, RRDLABEL_SRC_AUTO|RRDLABEL_FLAG_PERMANENT); if(likely(!d->virtual)) { // set the filename to get the interface speed @@ -813,11 +877,11 @@ int do_proc_net_dev(int update_every, usec_t dt) { } else { // values can be unknown, half or full -- just check the first letter for speed if (buffer[0] == 'f') - d->duplex = 2; + d->duplex = NETDEV_DUPLEX_FULL; else if (buffer[0] == 'h') - d->duplex = 1; + d->duplex = NETDEV_DUPLEX_HALF; else - d->duplex = 0; + d->duplex = NETDEV_DUPLEX_UNKNOWN; } } else { d->duplex = 0; @@ -881,7 +945,7 @@ int do_proc_net_dev(int update_every, usec_t dt) { , RRDSET_TYPE_AREA ); - rrdset_update_labels(d->st_bandwidth, d->chart_labels); + rrdset_update_rrdlabels(d->st_bandwidth, d->chart_labels); d->rd_rbytes = rrddim_add(d->st_bandwidth, "received", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); d->rd_tbytes = rrddim_add(d->st_bandwidth, "sent", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); @@ -926,7 +990,7 @@ int do_proc_net_dev(int update_every, usec_t dt) { d->filename_speed = NULL; } else { - rrdsetvar_custom_chart_variable_set(d->chart_var_speed, (calculated_number) d->speed * KILOBITS_IN_A_MEGABIT); + rrdsetvar_custom_chart_variable_set(d->chart_var_speed, (NETDATA_DOUBLE) d->speed * KILOBITS_IN_A_MEGABIT); if(d->do_speed != CONFIG_BOOLEAN_NO) { if(unlikely(!d->st_speed)) { @@ -947,7 +1011,7 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_speed, RRDSET_FLAG_DETAIL); - rrdset_update_labels(d->st_speed, d->chart_labels); + rrdset_update_rrdlabels(d->st_speed, d->chart_labels); d->rd_speed = rrddim_add(d->st_speed, "speed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } @@ -982,13 +1046,17 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_duplex, RRDSET_FLAG_DETAIL); - rrdset_update_labels(d->st_duplex, d->chart_labels); + rrdset_update_rrdlabels(d->st_duplex, d->chart_labels); - d->rd_duplex = rrddim_add(d->st_duplex, "duplex", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_duplex_full = rrddim_add(d->st_duplex, "full", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_duplex_half = rrddim_add(d->st_duplex, "half", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_duplex_unknown = rrddim_add(d->st_duplex, "unknown", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } else rrdset_next(d->st_duplex); - rrddim_set_by_pointer(d->st_duplex, d->rd_duplex, (collected_number)d->duplex); + rrddim_set_by_pointer(d->st_duplex, d->rd_duplex_full, (collected_number)(d->duplex == NETDEV_DUPLEX_FULL)); + rrddim_set_by_pointer(d->st_duplex, d->rd_duplex_half, (collected_number)(d->duplex == NETDEV_DUPLEX_HALF)); + rrddim_set_by_pointer(d->st_duplex, d->rd_duplex_unknown, (collected_number)(d->duplex == NETDEV_DUPLEX_UNKNOWN)); rrdset_done(d->st_duplex); } @@ -1013,13 +1081,25 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_operstate, RRDSET_FLAG_DETAIL); - rrdset_update_labels(d->st_operstate, d->chart_labels); + rrdset_update_rrdlabels(d->st_operstate, d->chart_labels); - d->rd_operstate = rrddim_add(d->st_operstate, "state", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_operstate_up = rrddim_add(d->st_operstate, "up", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_operstate_down = rrddim_add(d->st_operstate, "down", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_operstate_notpresent = rrddim_add(d->st_operstate, "notpresent", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_operstate_lowerlayerdown = rrddim_add(d->st_operstate, "lowerlayerdown", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_operstate_testing = rrddim_add(d->st_operstate, "testing", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_operstate_dormant = rrddim_add(d->st_operstate, "dormant", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_operstate_unknown = rrddim_add(d->st_operstate, "unknown", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } else rrdset_next(d->st_operstate); - rrddim_set_by_pointer(d->st_operstate, d->rd_operstate, (collected_number)d->operstate); + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_up, (collected_number)(d->operstate == NETDEV_OPERSTATE_UP)); + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_down, (collected_number)(d->operstate == NETDEV_OPERSTATE_DOWN)); + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_notpresent, (collected_number)(d->operstate == NETDEV_OPERSTATE_NOTPRESENT)); + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_lowerlayerdown, (collected_number)(d->operstate == NETDEV_OPERSTATE_LOWERLAYERDOWN)); + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_testing, (collected_number)(d->operstate == NETDEV_OPERSTATE_TESTING)); + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_dormant, (collected_number)(d->operstate == NETDEV_OPERSTATE_DORMANT)); + rrddim_set_by_pointer(d->st_operstate, d->rd_operstate_unknown, (collected_number)(d->operstate == NETDEV_OPERSTATE_UNKNOWN)); rrdset_done(d->st_operstate); } @@ -1044,13 +1124,15 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_carrier, RRDSET_FLAG_DETAIL); - rrdset_update_labels(d->st_carrier, d->chart_labels); + rrdset_update_rrdlabels(d->st_carrier, d->chart_labels); - d->rd_carrier = rrddim_add(d->st_carrier, "carrier", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_carrier_up = rrddim_add(d->st_carrier, "up", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + d->rd_carrier_down = rrddim_add(d->st_carrier, "down", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } else rrdset_next(d->st_carrier); - rrddim_set_by_pointer(d->st_carrier, d->rd_carrier, (collected_number)d->carrier); + rrddim_set_by_pointer(d->st_carrier, d->rd_carrier_up, (collected_number)(d->carrier == 1)); + rrddim_set_by_pointer(d->st_carrier, d->rd_carrier_down, (collected_number)(d->carrier != 1)); rrdset_done(d->st_carrier); } @@ -1075,7 +1157,7 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_mtu, RRDSET_FLAG_DETAIL); - rrdset_update_labels(d->st_mtu, d->chart_labels); + rrdset_update_rrdlabels(d->st_mtu, d->chart_labels); d->rd_mtu = rrddim_add(d->st_mtu, "mtu", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } @@ -1111,7 +1193,7 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_packets, RRDSET_FLAG_DETAIL); - rrdset_update_labels(d->st_packets, d->chart_labels); + rrdset_update_rrdlabels(d->st_packets, d->chart_labels); d->rd_rpackets = rrddim_add(d->st_packets, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_tpackets = rrddim_add(d->st_packets, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -1159,7 +1241,7 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_errors, RRDSET_FLAG_DETAIL); - rrdset_update_labels(d->st_errors, d->chart_labels); + rrdset_update_rrdlabels(d->st_errors, d->chart_labels); d->rd_rerrors = rrddim_add(d->st_errors, "inbound", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_terrors = rrddim_add(d->st_errors, "outbound", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -1205,7 +1287,7 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_drops, RRDSET_FLAG_DETAIL); - rrdset_update_labels(d->st_drops, d->chart_labels); + rrdset_update_rrdlabels(d->st_drops, d->chart_labels); d->rd_rdrops = rrddim_add(d->st_drops, "inbound", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_tdrops = rrddim_add(d->st_drops, "outbound", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -1251,7 +1333,7 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_fifo, RRDSET_FLAG_DETAIL); - rrdset_update_labels(d->st_fifo, d->chart_labels); + rrdset_update_rrdlabels(d->st_fifo, d->chart_labels); d->rd_rfifo = rrddim_add(d->st_fifo, "receive", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_tfifo = rrddim_add(d->st_fifo, "transmit", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -1297,7 +1379,7 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_compressed, RRDSET_FLAG_DETAIL); - rrdset_update_labels(d->st_compressed, d->chart_labels); + rrdset_update_rrdlabels(d->st_compressed, d->chart_labels); d->rd_rcompressed = rrddim_add(d->st_compressed, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_tcompressed = rrddim_add(d->st_compressed, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -1343,7 +1425,7 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_events, RRDSET_FLAG_DETAIL); - rrdset_update_labels(d->st_events, d->chart_labels); + rrdset_update_rrdlabels(d->st_events, d->chart_labels); d->rd_rframe = rrddim_add(d->st_events, "frames", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_tcollisions = rrddim_add(d->st_events, "collisions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); diff --git a/collectors/proc.plugin/proc_net_wireless.c b/collectors/proc.plugin/proc_net_wireless.c index cb2443b1e..c6ee4ff70 100644 --- a/collectors/proc.plugin/proc_net_wireless.c +++ b/collectors/proc.plugin/proc_net_wireless.c @@ -24,9 +24,9 @@ static struct netwireless { kernel_uint_t status; // Quality - calculated_number link; - calculated_number level; - calculated_number noise; + NETDATA_DOUBLE link; + NETDATA_DOUBLE level; + NETDATA_DOUBLE noise; // Discarded packets kernel_uint_t nwid; @@ -198,6 +198,10 @@ static void configure_device(int do_status, int do_quality, int do_discarded_pac wireless_dev->chart_id_net_missed_beacon = strdupz(buffer); } +static void add_labels_to_wireless(struct netwireless *w, RRDSET *st) { + rrdlabels_add(st->state->chart_labels, "device", w->name, RRDLABEL_SRC_AUTO); +} + int do_proc_net_wireless(int update_every, usec_t dt) { UNUSED(dt); @@ -209,21 +213,11 @@ int do_proc_net_wireless(int update_every, usec_t dt) char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/wireless"); - proc_net_wireless_filename = config_get(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS,"filename to monitor", - filename); - - do_status = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, - "status for all interfaces", CONFIG_BOOLEAN_AUTO); - - do_quality = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, - "quality for all interfaces", CONFIG_BOOLEAN_AUTO); - - do_discarded_packets = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, - "discarded packets for all interfaces", - CONFIG_BOOLEAN_AUTO); - - do_beacon = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, - "missed beacon for all interface", CONFIG_BOOLEAN_AUTO); + proc_net_wireless_filename = config_get(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS,"filename to monitor", filename); + do_status = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, "status for all interfaces", CONFIG_BOOLEAN_AUTO); + do_quality = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, "quality for all interfaces", CONFIG_BOOLEAN_AUTO); + do_discarded_packets = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, "discarded packets for all interfaces", CONFIG_BOOLEAN_AUTO); + do_beacon = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, "missed beacon for all interface", CONFIG_BOOLEAN_AUTO); } if (unlikely(!ff)) { @@ -255,25 +249,28 @@ int do_proc_net_wireless(int update_every, usec_t dt) wireless_dev->status = str2kernel_uint_t(procfile_lineword(ff, l, 1)); if (unlikely(!wireless_dev->st_status)) { - wireless_dev->st_status = rrdset_create_localhost("wireless", - wireless_dev->chart_id_net_status, - NULL, - wireless_dev->name, - "wireless.status", - "Internal status reported by interface.", - "status", - PLUGIN_PROC_NAME, - PLUGIN_PROC_MODULE_NETWIRELESS_NAME, - NETDATA_CHART_PRIO_WIRELESS_IFACE, - update_every, - RRDSET_TYPE_LINE); + wireless_dev->st_status = rrdset_create_localhost( + "wireless", + wireless_dev->chart_id_net_status, + NULL, + wireless_dev->name, + "wireless.status", + "Internal status reported by interface.", + "status", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE, + update_every, + RRDSET_TYPE_LINE); + rrdset_flag_set(wireless_dev->st_status, RRDSET_FLAG_DETAIL); - wireless_dev->rd_status = rrddim_add(wireless_dev->st_status, "status", NULL, 1, - 1, RRD_ALGORITHM_ABSOLUTE); - } else { - rrdset_next(wireless_dev->st_status); + wireless_dev->rd_status = rrddim_add(wireless_dev->st_status, "status", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_wireless(wireless_dev, wireless_dev->st_status); } + else + rrdset_next(wireless_dev->st_status); rrddim_set_by_pointer(wireless_dev->st_status, wireless_dev->rd_status, (collected_number)wireless_dev->status); @@ -281,83 +278,86 @@ int do_proc_net_wireless(int update_every, usec_t dt) } if (likely(do_quality != CONFIG_BOOLEAN_NO)) { - wireless_dev->link = str2ld(procfile_lineword(ff, l, 2), NULL); - wireless_dev->level = str2ld(procfile_lineword(ff, l, 3), NULL); - wireless_dev->noise = str2ld(procfile_lineword(ff, l, 4), NULL); + wireless_dev->link = str2ndd(procfile_lineword(ff, l, 2), NULL); + wireless_dev->level = str2ndd(procfile_lineword(ff, l, 3), NULL); + wireless_dev->noise = str2ndd(procfile_lineword(ff, l, 4), NULL); if (unlikely(!wireless_dev->st_link)) { - wireless_dev->st_link = rrdset_create_localhost("wireless", - wireless_dev->chart_id_net_link, - NULL, - wireless_dev->name, - "wireless.link_quality", - "Overall quality of the link. This is an aggregate value, and depends on the driver and hardware.", - "value", - PLUGIN_PROC_NAME, - PLUGIN_PROC_MODULE_NETWIRELESS_NAME, - NETDATA_CHART_PRIO_WIRELESS_IFACE + 1, - update_every, - RRDSET_TYPE_LINE); + wireless_dev->st_link = rrdset_create_localhost( + "wireless", + wireless_dev->chart_id_net_link, + NULL, + wireless_dev->name, + "wireless.link_quality", + "Overall quality of the link. This is an aggregate value, and depends on the driver and hardware.", + "value", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 1, + update_every, + RRDSET_TYPE_LINE); rrdset_flag_set(wireless_dev->st_link, RRDSET_FLAG_DETAIL); - wireless_dev->rd_link = rrddim_add(wireless_dev->st_link, "link_quality", NULL, 1, 1, - RRD_ALGORITHM_ABSOLUTE); - } else { - rrdset_next(wireless_dev->st_link); + wireless_dev->rd_link = rrddim_add(wireless_dev->st_link, "link_quality", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_wireless(wireless_dev, wireless_dev->st_link); } + else + rrdset_next(wireless_dev->st_link); if (unlikely(!wireless_dev->st_level)) { - wireless_dev->st_level = rrdset_create_localhost("wireless", - wireless_dev->chart_id_net_level, - NULL, - wireless_dev->name, - "wireless.signal_level", - "The signal level is the wireless signal power level received by the wireless client. The closer the value is to 0, the stronger the signal.", - "dBm", - PLUGIN_PROC_NAME, - PLUGIN_PROC_MODULE_NETWIRELESS_NAME, - NETDATA_CHART_PRIO_WIRELESS_IFACE + 2, - update_every, - RRDSET_TYPE_LINE); + wireless_dev->st_level = rrdset_create_localhost( + "wireless", + wireless_dev->chart_id_net_level, + NULL, + wireless_dev->name, + "wireless.signal_level", + "The signal level is the wireless signal power level received by the wireless client. The closer the value is to 0, the stronger the signal.", + "dBm", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 2, + update_every, + RRDSET_TYPE_LINE); rrdset_flag_set(wireless_dev->st_level, RRDSET_FLAG_DETAIL); - wireless_dev->rd_level = rrddim_add(wireless_dev->st_level, "signal_level", NULL, 1, 1, - RRD_ALGORITHM_ABSOLUTE); - } else { - rrdset_next(wireless_dev->st_level); + wireless_dev->rd_level = rrddim_add(wireless_dev->st_level, "signal_level", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_wireless(wireless_dev, wireless_dev->st_level); } + else + rrdset_next(wireless_dev->st_level); if (unlikely(!wireless_dev->st_noise)) { - wireless_dev->st_noise = rrdset_create_localhost("wireless", - wireless_dev->chart_id_net_noise, - NULL, - wireless_dev->name, - "wireless.noise_level", - "The noise level indicates the amount of background noise in your environment. The closer the value to 0, the greater the noise level.", - "dBm", - PLUGIN_PROC_NAME, - PLUGIN_PROC_MODULE_NETWIRELESS_NAME, - NETDATA_CHART_PRIO_WIRELESS_IFACE + 3, - update_every, - RRDSET_TYPE_LINE); + wireless_dev->st_noise = rrdset_create_localhost( + "wireless", + wireless_dev->chart_id_net_noise, + NULL, + wireless_dev->name, + "wireless.noise_level", + "The noise level indicates the amount of background noise in your environment. The closer the value to 0, the greater the noise level.", + "dBm", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 3, + update_every, + RRDSET_TYPE_LINE); rrdset_flag_set(wireless_dev->st_noise, RRDSET_FLAG_DETAIL); - wireless_dev->rd_noise = rrddim_add(wireless_dev->st_noise, "noise_level", NULL, 1, 1, - RRD_ALGORITHM_ABSOLUTE); - } else { - rrdset_next(wireless_dev->st_noise); + wireless_dev->rd_noise = rrddim_add(wireless_dev->st_noise, "noise_level", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_wireless(wireless_dev, wireless_dev->st_noise); } + else + rrdset_next(wireless_dev->st_noise); - rrddim_set_by_pointer(wireless_dev->st_link, wireless_dev->rd_link, - (collected_number)wireless_dev->link); + rrddim_set_by_pointer(wireless_dev->st_link, wireless_dev->rd_link, (collected_number)wireless_dev->link); rrdset_done(wireless_dev->st_link); - rrddim_set_by_pointer(wireless_dev->st_level, wireless_dev->rd_level, - (collected_number)wireless_dev->level); + rrddim_set_by_pointer(wireless_dev->st_level, wireless_dev->rd_level, (collected_number)wireless_dev->level); rrdset_done(wireless_dev->st_level); - rrddim_set_by_pointer(wireless_dev->st_noise, wireless_dev->rd_noise, - (collected_number)wireless_dev->noise); + rrddim_set_by_pointer(wireless_dev->st_noise, wireless_dev->rd_noise, (collected_number)wireless_dev->noise); rrdset_done(wireless_dev->st_noise); } @@ -369,49 +369,38 @@ int do_proc_net_wireless(int update_every, usec_t dt) wireless_dev->misc = str2kernel_uint_t(procfile_lineword(ff, l, 9)); if (unlikely(!wireless_dev->st_discarded_packets)) { - wireless_dev->st_discarded_packets = rrdset_create_localhost("wireless", - wireless_dev->chart_id_net_discarded_packets, - NULL, - wireless_dev->name, - "wireless.discarded_packets", - "Packet discarded in the wireless adapter due to \"wireless\" specific problems.", - "packets/s", - PLUGIN_PROC_NAME, - PLUGIN_PROC_MODULE_NETWIRELESS_NAME, - NETDATA_CHART_PRIO_WIRELESS_IFACE + 4, - update_every, - RRDSET_TYPE_LINE); + wireless_dev->st_discarded_packets = rrdset_create_localhost( + "wireless", + wireless_dev->chart_id_net_discarded_packets, + NULL, + wireless_dev->name, + "wireless.discarded_packets", + "Packet discarded in the wireless adapter due to \"wireless\" specific problems.", + "packets/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 4, + update_every, + RRDSET_TYPE_LINE); rrdset_flag_set(wireless_dev->st_discarded_packets, RRDSET_FLAG_DETAIL); - wireless_dev->rd_nwid = rrddim_add(wireless_dev->st_discarded_packets, "nwid", NULL, 1, - 1, RRD_ALGORITHM_INCREMENTAL); - wireless_dev->rd_crypt = rrddim_add(wireless_dev->st_discarded_packets, "crypt", NULL, 1, - 1, RRD_ALGORITHM_INCREMENTAL); - wireless_dev->rd_frag = rrddim_add(wireless_dev->st_discarded_packets, "frag", NULL, 1, - 1, RRD_ALGORITHM_INCREMENTAL); - wireless_dev->rd_retry = rrddim_add(wireless_dev->st_discarded_packets, "retry", NULL, 1, - 1, RRD_ALGORITHM_INCREMENTAL); - wireless_dev->rd_misc = rrddim_add(wireless_dev->st_discarded_packets, "misc", NULL, 1, - 1, RRD_ALGORITHM_INCREMENTAL); - } else { - rrdset_next(wireless_dev->st_discarded_packets); - } - - rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_nwid, - (collected_number)wireless_dev->nwid); - - rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_crypt, - (collected_number)wireless_dev->crypt); + wireless_dev->rd_nwid = rrddim_add(wireless_dev->st_discarded_packets, "nwid", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + wireless_dev->rd_crypt = rrddim_add(wireless_dev->st_discarded_packets, "crypt", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + wireless_dev->rd_frag = rrddim_add(wireless_dev->st_discarded_packets, "frag", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + wireless_dev->rd_retry = rrddim_add(wireless_dev->st_discarded_packets, "retry", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + wireless_dev->rd_misc = rrddim_add(wireless_dev->st_discarded_packets, "misc", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_frag, - (collected_number)wireless_dev->frag); - - rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_retry, - (collected_number)wireless_dev->retry); + add_labels_to_wireless(wireless_dev, wireless_dev->st_discarded_packets); + } + else + rrdset_next(wireless_dev->st_discarded_packets); - rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_misc, - (collected_number)wireless_dev->misc); + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_nwid, (collected_number)wireless_dev->nwid); + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_crypt, (collected_number)wireless_dev->crypt); + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_frag, (collected_number)wireless_dev->frag); + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_retry, (collected_number)wireless_dev->retry); + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_misc, (collected_number)wireless_dev->misc); rrdset_done(wireless_dev->st_discarded_packets); } @@ -420,28 +409,31 @@ int do_proc_net_wireless(int update_every, usec_t dt) wireless_dev->missed_beacon = str2kernel_uint_t(procfile_lineword(ff, l, 10)); if (unlikely(!wireless_dev->st_missed_beacon)) { - wireless_dev->st_missed_beacon = rrdset_create_localhost("wireless", - wireless_dev->chart_id_net_missed_beacon, - NULL, - wireless_dev->name, - "wireless.missed_beacons", - "Number of missed beacons.", - "frames/s", - PLUGIN_PROC_NAME, - PLUGIN_PROC_MODULE_NETWIRELESS_NAME, - NETDATA_CHART_PRIO_WIRELESS_IFACE + 5, - update_every, - RRDSET_TYPE_LINE); + wireless_dev->st_missed_beacon = rrdset_create_localhost( + "wireless", + wireless_dev->chart_id_net_missed_beacon, + NULL, + wireless_dev->name, + "wireless.missed_beacons", + "Number of missed beacons.", + "frames/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 5, + update_every, + RRDSET_TYPE_LINE); + rrdset_flag_set(wireless_dev->st_missed_beacon, RRDSET_FLAG_DETAIL); - wireless_dev->rd_missed_beacon = rrddim_add(wireless_dev->st_missed_beacon, "missed_beacons", - NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } else { - rrdset_next(wireless_dev->st_missed_beacon); + wireless_dev->rd_missed_beacon = rrddim_add(wireless_dev->st_missed_beacon, "missed_beacons", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + add_labels_to_wireless(wireless_dev, wireless_dev->st_missed_beacon); } + else + rrdset_next(wireless_dev->st_missed_beacon); + + rrddim_set_by_pointer(wireless_dev->st_missed_beacon, wireless_dev->rd_missed_beacon, (collected_number)wireless_dev->missed_beacon); - rrddim_set_by_pointer(wireless_dev->st_missed_beacon, wireless_dev->rd_missed_beacon, - (collected_number)wireless_dev->missed_beacon); rrdset_done(wireless_dev->st_missed_beacon); } diff --git a/collectors/proc.plugin/proc_pagetypeinfo.c b/collectors/proc.plugin/proc_pagetypeinfo.c index e1026cf51..017edc49a 100644 --- a/collectors/proc.plugin/proc_pagetypeinfo.c +++ b/collectors/proc.plugin/proc_pagetypeinfo.c @@ -242,15 +242,14 @@ int do_proc_pagetypeinfo(int update_every, usec_t dt) { // "Node" + NUMA-NodeID + ZoneName + TypeName char setname[4+1+MAX_ZONETYPE_NAME+1+MAX_PAGETYPE_NAME +1]; - snprintfz(setname, MAX_ZONETYPE_NAME + MAX_PAGETYPE_NAME, "Node %d %s %s", - pgl->node, pgl->zone, pgl->type); + snprintfz(setname, MAX_ZONETYPE_NAME + MAX_PAGETYPE_NAME, "Node %d %s %s", pgl->node, pgl->zone, pgl->type); st_nodezonetype[p] = rrdset_create_localhost( "mem" , setid , NULL , "pagetype" - , NULL + , "mem.pagetype" , setname , "B" , PLUGIN_PROC_NAME @@ -259,6 +258,13 @@ int do_proc_pagetypeinfo(int update_every, usec_t dt) { , update_every , RRDSET_TYPE_STACKED ); + + char node[50+1]; + snprintfz(node, 50, "node%d", pgl->node); + rrdlabels_add(st_nodezonetype[p]->state->chart_labels, "node_id", node, RRDLABEL_SRC_AUTO); + rrdlabels_add(st_nodezonetype[p]->state->chart_labels, "node_zone", pgl->zone, RRDLABEL_SRC_AUTO); + rrdlabels_add(st_nodezonetype[p]->state->chart_labels, "node_type", pgl->type, RRDLABEL_SRC_AUTO); + for (o = 0; o < pageorders_cnt; o++) { char dimid[3+1]; snprintfz(dimid, 3, "%lu", o); diff --git a/collectors/proc.plugin/proc_self_mountinfo.c b/collectors/proc.plugin/proc_self_mountinfo.c index ca00f8a89..4456d5978 100644 --- a/collectors/proc.plugin/proc_self_mountinfo.c +++ b/collectors/proc.plugin/proc_self_mountinfo.c @@ -182,6 +182,33 @@ static inline int is_read_only(const char *s) { return 0; } +// for the full list of protected mount points look at +// https://github.com/systemd/systemd/blob/1eb3ef78b4df28a9e9f464714208f2682f957e36/src/core/namespace.c#L142-L149 +// https://github.com/systemd/systemd/blob/1eb3ef78b4df28a9e9f464714208f2682f957e36/src/core/namespace.c#L180-L194 +static const char *systemd_protected_mount_points[] = { + "/home", + "/root", + "/usr", + "/boot", + "/efi", + "/etc", + "/run/user", + "/lib", + "/lib64", + "/bin", + "/sbin", + NULL +}; + +static inline int mount_point_is_protected(char *mount_point) +{ + for (size_t i = 0; systemd_protected_mount_points[i] != NULL; i++) + if (!strcmp(mount_point, systemd_protected_mount_points[i])) + return 1; + + return 0; +} + // read the whole mountinfo into a linked list struct mountinfo *mountinfo_read(int do_statvfs) { char filename[FILENAME_MAX + 1]; @@ -199,11 +226,21 @@ struct mountinfo *mountinfo_read(int do_statvfs) { struct mountinfo *root = NULL, *last = NULL, *mi = NULL; + // create a dictionary to track uniqueness + DICTIONARY *dict = dictionary_create(DICTIONARY_FLAG_SINGLE_THREADED|DICTIONARY_FLAG_DONT_OVERWRITE_VALUE|DICTIONARY_FLAG_NAME_LINK_DONT_CLONE); + unsigned long l, lines = procfile_lines(ff); for(l = 0; l < lines ;l++) { if(unlikely(procfile_linewords(ff, l) < 5)) continue; + // make sure we don't add the same item twice + char *v = (char *)dictionary_set(dict, procfile_lineword(ff, l, 4), "N", 2); + if(v) { + if(*v == 'O') continue; + *v = 'O'; + } + mi = mallocz(sizeof(struct mountinfo)); unsigned long w = 0; @@ -242,6 +279,9 @@ struct mountinfo *mountinfo_read(int do_statvfs) { if(unlikely(is_read_only(mi->mount_options))) mi->flags |= MOUNTINFO_READONLY; + if(unlikely(mount_point_is_protected(mi->mount_point))) + mi->flags |= MOUNTINFO_IS_IN_SYSD_PROTECTED_LIST; + // count the optional fields /* unsigned long wo = w; @@ -411,6 +451,7 @@ struct mountinfo *mountinfo_read(int do_statvfs) { } */ + dictionary_destroy(dict); procfile_close(ff); return root; } diff --git a/collectors/proc.plugin/proc_self_mountinfo.h b/collectors/proc.plugin/proc_self_mountinfo.h index 92918a73d..b915550a7 100644 --- a/collectors/proc.plugin/proc_self_mountinfo.h +++ b/collectors/proc.plugin/proc_self_mountinfo.h @@ -10,6 +10,7 @@ #define MOUNTINFO_NO_STAT 0x00000010 #define MOUNTINFO_NO_SIZE 0x00000020 #define MOUNTINFO_READONLY 0x00000040 +#define MOUNTINFO_IS_IN_SYSD_PROTECTED_LIST 0x00000080 struct mountinfo { long id; // mount ID: unique identifier of the mount (may be reused after umount(2)). diff --git a/collectors/proc.plugin/proc_softirqs.c b/collectors/proc.plugin/proc_softirqs.c index bb14c1596..7eff28c98 100644 --- a/collectors/proc.plugin/proc_softirqs.c +++ b/collectors/proc.plugin/proc_softirqs.c @@ -217,6 +217,10 @@ int do_proc_softirqs(int update_every, usec_t dt) { , update_every , RRDSET_TYPE_STACKED ); + + char core[50+1]; + snprintfz(core, 50, "cpu%d", c); + rrdlabels_add(core_st[c]->state->chart_labels, "cpu", core, RRDLABEL_SRC_AUTO); } else rrdset_next(core_st[c]); diff --git a/collectors/proc.plugin/proc_stat.c b/collectors/proc.plugin/proc_stat.c index c889f0736..6faba55a9 100644 --- a/collectors/proc.plugin/proc_stat.c +++ b/collectors/proc.plugin/proc_stat.c @@ -1039,6 +1039,10 @@ int do_proc_stat(int update_every, usec_t dt) { , RRDSET_TYPE_STACKED ); + char corebuf[50+1]; + snprintfz(corebuf, 50, "cpu%zu", core); + rrdlabels_add(cpuidle_charts[core].st->state->chart_labels, "cpu", corebuf, RRDLABEL_SRC_AUTO); + char cpuidle_dim_id[RRD_ID_LENGTH_MAX + 1]; cpuidle_charts[core].active_time_rd = rrddim_add(cpuidle_charts[core].st, "active", "C0 (active)", 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); for(state = 0; state < cpuidle_charts[core].cpuidle_state_len; state++) { diff --git a/collectors/proc.plugin/sys_block_zram.c b/collectors/proc.plugin/sys_block_zram.c index 3a39b3b66..ddd1e7ae0 100644 --- a/collectors/proc.plugin/sys_block_zram.c +++ b/collectors/proc.plugin/sys_block_zram.c @@ -75,6 +75,7 @@ static inline void init_rrd(const char *name, ZRAM_DEVICE *d, int update_every) , RRDSET_TYPE_AREA); d->rd_compr_data_size = rrddim_add(d->st_usage, "compressed", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); d->rd_metadata_size = rrddim_add(d->st_usage, "metadata", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrdlabels_add(d->st_usage->state->chart_labels, "device", name, RRDLABEL_SRC_AUTO); snprintfz(chart_name, RRD_ID_LENGTH_MAX, "zram_savings.%s", name); d->st_savings = rrdset_create_localhost( @@ -92,6 +93,7 @@ static inline void init_rrd(const char *name, ZRAM_DEVICE *d, int update_every) , RRDSET_TYPE_AREA); d->rd_savings_size = rrddim_add(d->st_savings, "savings", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); d->rd_original_size = rrddim_add(d->st_savings, "original", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrdlabels_add(d->st_savings->state->chart_labels, "device", name, RRDLABEL_SRC_AUTO); snprintfz(chart_name, RRD_ID_LENGTH_MAX, "zram_ratio.%s", name); d->st_comp_ratio = rrdset_create_localhost( @@ -108,6 +110,7 @@ static inline void init_rrd(const char *name, ZRAM_DEVICE *d, int update_every) , update_every , RRDSET_TYPE_LINE); d->rd_comp_ratio = rrddim_add(d->st_comp_ratio, "ratio", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + rrdlabels_add(d->st_comp_ratio->state->chart_labels, "device", name, RRDLABEL_SRC_AUTO); snprintfz(chart_name, RRD_ID_LENGTH_MAX, "zram_efficiency.%s", name); d->st_alloc_efficiency = rrdset_create_localhost( @@ -124,6 +127,7 @@ static inline void init_rrd(const char *name, ZRAM_DEVICE *d, int update_every) , update_every , RRDSET_TYPE_LINE); d->rd_alloc_efficiency = rrddim_add(d->st_alloc_efficiency, "percent", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + rrdlabels_add(d->st_alloc_efficiency->state->chart_labels, "device", name, RRDLABEL_SRC_AUTO); } static int init_devices(DICTIONARY *devices, unsigned int zram_id, int update_every) { diff --git a/collectors/proc.plugin/sys_class_power_supply.c b/collectors/proc.plugin/sys_class_power_supply.c index c558a384d..a80d46e93 100644 --- a/collectors/proc.plugin/sys_class_power_supply.c +++ b/collectors/proc.plugin/sys_class_power_supply.c @@ -112,6 +112,10 @@ void power_supply_free(struct power_supply *ps) { } } +static void add_labels_to_power_supply(struct power_supply *ps, RRDSET *st) { + rrdlabels_add(st->state->chart_labels, "device", ps->name, RRDLABEL_SRC_AUTO); +} + int do_sys_class_power_supply(int update_every, usec_t dt) { (void)dt; static int do_capacity = -1, do_property[3] = {-1}; @@ -358,6 +362,8 @@ int do_sys_class_power_supply(int update_every, usec_t dt) { , update_every , RRDSET_TYPE_LINE ); + + add_labels_to_power_supply(ps, ps->capacity->st); } else rrdset_next(ps->capacity->st); @@ -389,6 +395,8 @@ int do_sys_class_power_supply(int update_every, usec_t dt) { , update_every , RRDSET_TYPE_LINE ); + + add_labels_to_power_supply(ps, pr->st); } else rrdset_next(pr->st); diff --git a/collectors/proc.plugin/sys_devices_system_edac_mc.c b/collectors/proc.plugin/sys_devices_system_edac_mc.c index b11148375..290157903 100644 --- a/collectors/proc.plugin/sys_devices_system_edac_mc.c +++ b/collectors/proc.plugin/sys_devices_system_edac_mc.c @@ -74,7 +74,7 @@ int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt) { } static int do_ce = -1, do_ue = -1; - calculated_number ce_sum = 0, ue_sum = 0; + NETDATA_DOUBLE ce_sum = 0, ue_sum = 0; struct mc *m; if(unlikely(do_ce == -1)) { diff --git a/collectors/proc.plugin/sys_devices_system_node.c b/collectors/proc.plugin/sys_devices_system_node.c index ff408ed88..fd3394309 100644 --- a/collectors/proc.plugin/sys_devices_system_node.c +++ b/collectors/proc.plugin/sys_devices_system_node.c @@ -115,6 +115,8 @@ int do_proc_sys_devices_system_node(int update_every, usec_t dt) { , RRDSET_TYPE_LINE ); + rrdlabels_add(m->numastat_st->state->chart_labels, "numa_node", m->name, RRDLABEL_SRC_AUTO); + rrdset_flag_set(m->numastat_st, RRDSET_FLAG_DETAIL); rrddim_add(m->numastat_st, "numa_hit", "hit", 1, 1, RRD_ALGORITHM_INCREMENTAL); diff --git a/collectors/proc.plugin/sys_fs_btrfs.c b/collectors/proc.plugin/sys_fs_btrfs.c index e28b60a7a..158587a8f 100644 --- a/collectors/proc.plugin/sys_fs_btrfs.c +++ b/collectors/proc.plugin/sys_fs_btrfs.c @@ -448,6 +448,11 @@ static inline int find_all_btrfs_pools(const char *path) { return 0; } +static void add_labels_to_btrfs(BTRFS_NODE *n, RRDSET *st) { + rrdlabels_add(st->state->chart_labels, "device", n->id, RRDLABEL_SRC_AUTO); + rrdlabels_add(st->state->chart_labels, "device_label", n->label, RRDLABEL_SRC_AUTO); +} + int do_sys_fs_btrfs(int update_every, usec_t dt) { static int initialized = 0 , do_allocation_disks = CONFIG_BOOLEAN_AUTO @@ -579,6 +584,8 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) { node->rd_allocation_disks_metadata_used = rrddim_add(node->st_allocation_disks, "meta_used", "meta used", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); node->rd_allocation_disks_system_free = rrddim_add(node->st_allocation_disks, "sys_free", "sys free", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); node->rd_allocation_disks_system_used = rrddim_add(node->st_allocation_disks, "sys_used", "sys used", 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_btrfs(node, node->st_allocation_disks); } else rrdset_next(node->st_allocation_disks); @@ -632,6 +639,8 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) { node->rd_allocation_data_free = rrddim_add(node->st_allocation_data, "free", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); node->rd_allocation_data_used = rrddim_add(node->st_allocation_data, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_btrfs(node, node->st_allocation_data); } else rrdset_next(node->st_allocation_data); @@ -676,6 +685,8 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) { node->rd_allocation_metadata_free = rrddim_add(node->st_allocation_metadata, "free", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); node->rd_allocation_metadata_used = rrddim_add(node->st_allocation_metadata, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); node->rd_allocation_metadata_reserved = rrddim_add(node->st_allocation_metadata, "reserved", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_btrfs(node, node->st_allocation_metadata); } else rrdset_next(node->st_allocation_metadata); @@ -720,6 +731,8 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) { node->rd_allocation_system_free = rrddim_add(node->st_allocation_system, "free", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); node->rd_allocation_system_used = rrddim_add(node->st_allocation_system, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + add_labels_to_btrfs(node, node->st_allocation_system); } else rrdset_next(node->st_allocation_system); diff --git a/collectors/python.d.plugin/Makefile.am b/collectors/python.d.plugin/Makefile.am index 667f1627c..9377ebe8d 100644 --- a/collectors/python.d.plugin/Makefile.am +++ b/collectors/python.d.plugin/Makefile.am @@ -48,7 +48,6 @@ include bind_rndc/Makefile.inc include boinc/Makefile.inc include ceph/Makefile.inc include changefinder/Makefile.inc -include chrony/Makefile.inc include dockerd/Makefile.inc include dovecot/Makefile.inc include example/Makefile.inc @@ -71,7 +70,6 @@ include nginx_plus/Makefile.inc include nvidia_smi/Makefile.inc include nsd/Makefile.inc include ntpd/Makefile.inc -include ovpn_status_log/Makefile.inc include openldap/Makefile.inc include oracledb/Makefile.inc include postfix/Makefile.inc diff --git a/collectors/python.d.plugin/chrony/Makefile.inc b/collectors/python.d.plugin/chrony/Makefile.inc deleted file mode 100644 index 18a805b12..000000000 --- a/collectors/python.d.plugin/chrony/Makefile.inc +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -# THIS IS NOT A COMPLETE Makefile -# IT IS INCLUDED BY ITS PARENT'S Makefile.am -# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT - -# install these files -dist_python_DATA += chrony/chrony.chart.py -dist_pythonconfig_DATA += chrony/chrony.conf - -# do not install these files, but include them in the distribution -dist_noinst_DATA += chrony/README.md chrony/Makefile.inc - diff --git a/collectors/python.d.plugin/chrony/README.md b/collectors/python.d.plugin/chrony/README.md deleted file mode 100644 index 3093ec3f9..000000000 --- a/collectors/python.d.plugin/chrony/README.md +++ /dev/null @@ -1,61 +0,0 @@ -<!-- -title: "Chrony monitoring with Netdata" -custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/python.d.plugin/chrony/README.md -sidebar_label: "Chrony" ---> - -# Chrony monitoring with Netdata - -Monitors the precision and statistics of a local chronyd server, and produces: - -- frequency -- last offset -- RMS offset -- residual freq -- root delay -- root dispersion -- skew -- system time - -## Requirements - -Verify that user Netdata can execute `chronyc tracking`. If necessary, update `/etc/chrony.conf`, `cmdallow`. - -## Enable the collector - -The `chrony` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config -directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file. - -```bash -cd /etc/netdata # Replace this path with your Netdata config directory, if different -sudo ./edit-config python.d.conf -``` - -Change the value of the `chrony` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl -restart netdata`, or the appropriate method for your system, to finish enabling the `chrony` collector. - -## Configuration - -Edit the `python.d/chrony.conf` configuration file using `edit-config` from the Netdata [config -directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. - -```bash -cd /etc/netdata # Replace this path with your Netdata config directory, if different -sudo ./edit-config python.d/chrony.conf -``` - -Sample: - -```yaml -# data collection frequency: -update_every: 1 - -# chrony query command: -local: - command: 'chronyc -n tracking' -``` - -Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate -method](/docs/configure/start-stop-restart.md) for your system, to finish configuring the `chrony` collector. - - diff --git a/collectors/python.d.plugin/chrony/chrony.chart.py b/collectors/python.d.plugin/chrony/chrony.chart.py deleted file mode 100644 index 91f725001..000000000 --- a/collectors/python.d.plugin/chrony/chrony.chart.py +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: chrony netdata python.d module -# Author: Dominik Schloesser (domschl) -# SPDX-License-Identifier: GPL-3.0-or-later - -from bases.FrameworkServices.ExecutableService import ExecutableService - -# default module values (can be overridden per job in `config`) -update_every = 5 - -CHRONY_COMMAND = 'chronyc -n tracking' - -# charts order (can be overridden if you want less charts, or different order) -ORDER = [ - 'system', - 'offsets', - 'stratum', - 'root', - 'frequency', - 'residualfreq', - 'skew', -] - -CHARTS = { - 'system': { - 'options': [None, 'Chrony System Time Deltas', 'microseconds', 'system', 'chrony.system', 'area'], - 'lines': [ - ['timediff', 'system time', 'absolute', 1, 1000] - ] - }, - 'offsets': { - 'options': [None, 'Chrony System Time Offsets', 'microseconds', 'system', 'chrony.offsets', 'area'], - 'lines': [ - ['lastoffset', 'last offset', 'absolute', 1, 1000], - ['rmsoffset', 'RMS offset', 'absolute', 1, 1000] - ] - }, - 'stratum': { - 'options': [None, 'Chrony Stratum', 'stratum', 'root', 'chrony.stratum', 'line'], - 'lines': [ - ['stratum', None, 'absolute', 1, 1] - ] - }, - 'root': { - 'options': [None, 'Chrony Root Delays', 'milliseconds', 'root', 'chrony.root', 'line'], - 'lines': [ - ['rootdelay', 'delay', 'absolute', 1, 1000000], - ['rootdispersion', 'dispersion', 'absolute', 1, 1000000] - ] - }, - 'frequency': { - 'options': [None, 'Chrony Frequency', 'ppm', 'frequencies', 'chrony.frequency', 'area'], - 'lines': [ - ['frequency', None, 'absolute', 1, 1000] - ] - }, - 'residualfreq': { - 'options': [None, 'Chrony Residual frequency', 'ppm', 'frequencies', 'chrony.residualfreq', 'area'], - 'lines': [ - ['residualfreq', 'residual frequency', 'absolute', 1, 1000] - ] - }, - 'skew': { - 'options': [None, 'Chrony Skew, error bound on frequency', 'ppm', 'frequencies', 'chrony.skew', 'area'], - 'lines': [ - ['skew', None, 'absolute', 1, 1000] - ] - } -} - -CHRONY = [ - ('Frequency', 'frequency', 1e3), - ('Last offset', 'lastoffset', 1e9), - ('RMS offset', 'rmsoffset', 1e9), - ('Residual freq', 'residualfreq', 1e3), - ('Root delay', 'rootdelay', 1e9), - ('Root dispersion', 'rootdispersion', 1e9), - ('Skew', 'skew', 1e3), - ('Stratum', 'stratum', 1), - ('System time', 'timediff', 1e9) -] - - -class Service(ExecutableService): - def __init__(self, configuration=None, name=None): - ExecutableService.__init__( - self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.command = CHRONY_COMMAND - - def _get_data(self): - """ - Format data received from shell command - :return: dict - """ - raw_data = self._get_raw_data() - if not raw_data: - return None - - raw_data = (line.split(':', 1) for line in raw_data) - parsed, data = dict(), dict() - - for line in raw_data: - try: - key, value = (l.strip() for l in line) - except ValueError: - continue - if value: - parsed[key] = value.split()[0] - - for key, dim_id, multiplier in CHRONY: - try: - data[dim_id] = int(float(parsed[key]) * multiplier) - except (KeyError, ValueError): - continue - - return data or None diff --git a/collectors/python.d.plugin/chrony/chrony.conf b/collectors/python.d.plugin/chrony/chrony.conf deleted file mode 100644 index fd95519b5..000000000 --- a/collectors/python.d.plugin/chrony/chrony.conf +++ /dev/null @@ -1,77 +0,0 @@ -# netdata python.d.plugin configuration for chrony -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -update_every: 5 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, chrony also supports the following: -# -# command: 'chrony tracking' # the command to run -# - -# ---------------------------------------------------------------------- -# REQUIRED chrony CONFIGURATION -# -# netdata will query chrony as user netdata. -# verify that user netdata is allowed to call 'chronyc tracking' -# Check cmdallow in chrony.conf -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS - -local: - command: 'chronyc -n tracking' diff --git a/collectors/python.d.plugin/haproxy/README.md b/collectors/python.d.plugin/haproxy/README.md index 4eb962e24..f16e7258e 100644 --- a/collectors/python.d.plugin/haproxy/README.md +++ b/collectors/python.d.plugin/haproxy/README.md @@ -9,30 +9,32 @@ sidebar_label: "HAProxy" Monitors frontend and backend metrics such as bytes in, bytes out, sessions current, sessions in queue current. And health metrics such as backend servers status (server check should be used). -Plugin can obtain data from url **OR** unix socket. +Plugin can obtain data from URL or Unix socket. -**Requirement:** -Socket MUST be readable AND writable by the `netdata` user. +Requirement: + +- Socket must be readable and writable by the `netdata` user. +- URL must have `stats uri <path>` present in the haproxy config, otherwise you will get HTTP 503 in the haproxy logs. It produces: -1. **Frontend** family charts +1. **Frontend** family charts - - Kilobytes in/s - - Kilobytes out/s - - Sessions current - - Sessions in queue current + - Kilobytes in/s + - Kilobytes out/s + - Sessions current + - Sessions in queue current -2. **Backend** family charts +2. **Backend** family charts - - Kilobytes in/s - - Kilobytes out/s - - Sessions current - - Sessions in queue current + - Kilobytes in/s + - Kilobytes out/s + - Sessions current + - Sessions in queue current -3. **Health** chart +3. **Health** chart - - number of failed servers for every backend (in DOWN state) + - number of failed servers for every backend (in DOWN state) ## Configuration @@ -48,20 +50,18 @@ Sample: ```yaml via_url: - user : 'username' # ONLY IF stats auth is used - pass : 'password' # # ONLY IF stats auth is used - url : 'http://ip.address:port/url;csv;norefresh' + user: 'username' # ONLY IF stats auth is used + pass: 'password' # # ONLY IF stats auth is used + url: 'http://ip.address:port/url;csv;norefresh' ``` OR ```yaml via_socket: - socket : 'path/to/haproxy/sock' + socket: 'path/to/haproxy/sock' ``` If no configuration is given, module will fail to run. --- - - diff --git a/collectors/python.d.plugin/mongodb/mongodb.chart.py b/collectors/python.d.plugin/mongodb/mongodb.chart.py index bec94d3ef..5e8fec834 100644 --- a/collectors/python.d.plugin/mongodb/mongodb.chart.py +++ b/collectors/python.d.plugin/mongodb/mongodb.chart.py @@ -10,7 +10,7 @@ from datetime import datetime from sys import exc_info try: - from pymongo import MongoClient, ASCENDING, DESCENDING + from pymongo import MongoClient, ASCENDING, DESCENDING, version_tuple from pymongo.errors import PyMongoError PYMONGO = True @@ -750,7 +750,7 @@ class Service(SimpleService): CONN_PARAM_HOST: conf.get(CONN_PARAM_HOST, DEFAULT_HOST), CONN_PARAM_PORT: conf.get(CONN_PARAM_PORT, DEFAULT_PORT), } - if hasattr(MongoClient, 'server_selection_timeout'): + if hasattr(MongoClient, 'server_selection_timeout') or version_tuple[0] >= 4: params[CONN_PARAM_SERVER_SELECTION_TIMEOUT_MS] = conf.get('timeout', DEFAULT_TIMEOUT) params.update(self.build_ssl_connection_params()) diff --git a/collectors/python.d.plugin/ovpn_status_log/Makefile.inc b/collectors/python.d.plugin/ovpn_status_log/Makefile.inc deleted file mode 100644 index 1fbc506d6..000000000 --- a/collectors/python.d.plugin/ovpn_status_log/Makefile.inc +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -# THIS IS NOT A COMPLETE Makefile -# IT IS INCLUDED BY ITS PARENT'S Makefile.am -# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT - -# install these files -dist_python_DATA += ovpn_status_log/ovpn_status_log.chart.py -dist_pythonconfig_DATA += ovpn_status_log/ovpn_status_log.conf - -# do not install these files, but include them in the distribution -dist_noinst_DATA += ovpn_status_log/README.md ovpn_status_log/Makefile.inc - diff --git a/collectors/python.d.plugin/ovpn_status_log/README.md b/collectors/python.d.plugin/ovpn_status_log/README.md deleted file mode 100644 index 374d1910e..000000000 --- a/collectors/python.d.plugin/ovpn_status_log/README.md +++ /dev/null @@ -1,50 +0,0 @@ -<!-- -title: "OpenVPN monitoring with Netdata" -custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/python.d.plugin/ovpn_status_log/README.md -sidebar_label: "OpenVPN" ---> - -# OpenVPN monitoring with Netdata - -Parses server log files and provides summary (client, traffic) metrics. - -## Requirements - -- If you are running multiple OpenVPN instances out of the same directory, MAKE SURE TO EDIT DIRECTIVES which create output files - so that multiple instances do not overwrite each other's output files. - -- Make sure NETDATA USER CAN READ openvpn-status.log - -- Update_every interval MUST MATCH interval on which OpenVPN writes operational status to log file. - -It produces: - -1. **Users** OpenVPN active users - - - users - -2. **Traffic** OpenVPN overall bandwidth usage in kilobit/s - - - in - - out - -## Configuration - -Edit the `python.d/ovpn_status_log.conf` configuration file using `edit-config` from the Netdata [config -directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. - -```bash -cd /etc/netdata # Replace this path with your Netdata config directory, if different -sudo ./edit-config python.d/ovpn_status_log.conf -``` - -Sample: - -```yaml -default - log_path : '/var/log/openvpn-status.log' -``` - ---- - - diff --git a/collectors/python.d.plugin/ovpn_status_log/ovpn_status_log.chart.py b/collectors/python.d.plugin/ovpn_status_log/ovpn_status_log.chart.py deleted file mode 100644 index cfc87be36..000000000 --- a/collectors/python.d.plugin/ovpn_status_log/ovpn_status_log.chart.py +++ /dev/null @@ -1,136 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: openvpn status log netdata python.d module -# Author: ilyam8 -# SPDX-License-Identifier: GPL-3.0-or-later - -import re - -from bases.FrameworkServices.SimpleService import SimpleService - -update_every = 10 - -ORDER = [ - 'users', - 'traffic', -] - -CHARTS = { - 'users': { - 'options': [None, 'OpenVPN Active Users', 'active users', 'users', 'openvpn_status.users', 'line'], - 'lines': [ - ['users', None, 'absolute'], - ] - }, - 'traffic': { - 'options': [None, 'OpenVPN Traffic', 'KiB/s', 'traffic', 'openvpn_status.traffic', 'area'], - 'lines': [ - ['bytes_in', 'in', 'incremental', 1, 1 << 10], - ['bytes_out', 'out', 'incremental', -1, 1 << 10] - ] - } -} - -TLS_REGEX = re.compile( - r'(?:[0-9a-f]+:[0-9a-f:]+|(?:\d{1,3}(?:\.\d{1,3}){3}(?::\d+)?)) (?P<bytes_in>\d+) (?P<bytes_out>\d+)' -) -STATIC_KEY_REGEX = re.compile( - r'TCP/[A-Z]+ (?P<direction>(?:read|write)) bytes,(?P<bytes>\d+)' -) - - -class Service(SimpleService): - def __init__(self, configuration=None, name=None): - SimpleService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.log_path = self.configuration.get('log_path') - self.regex = { - 'tls': TLS_REGEX, - 'static_key': STATIC_KEY_REGEX - } - - def check(self): - if not (self.log_path and isinstance(self.log_path, str)): - self.error("'log_path' is not defined") - return False - - data = self._get_raw_data() - if not data: - self.error('Make sure that the openvpn status log file exists and netdata has permission to read it') - return None - - found = None - for row in data: - if 'ROUTING' in row: - self.get_data = self.get_data_tls - found = True - break - elif 'STATISTICS' in row: - self.get_data = self.get_data_static_key - found = True - break - if found: - return True - self.error('Failed to parse openvpn log file') - return False - - def _get_raw_data(self): - """ - Open log file - :return: str - """ - - try: - with open(self.log_path) as log: - raw_data = log.readlines() or None - except OSError: - return None - else: - return raw_data - - def get_data_static_key(self): - """ - Parse openvpn-status log file. - """ - - raw_data = self._get_raw_data() - if not raw_data: - return None - - data = dict(bytes_in=0, bytes_out=0) - - for row in raw_data: - match = self.regex['static_key'].search(row) - if match: - match = match.groupdict() - if match['direction'] == 'read': - data['bytes_in'] += int(match['bytes']) - else: - data['bytes_out'] += int(match['bytes']) - - return data or None - - def get_data_tls(self): - """ - Parse openvpn-status log file. - """ - - raw_data = self._get_raw_data() - if not raw_data: - return None - - data = dict(users=0, bytes_in=0, bytes_out=0) - for row in raw_data: - columns = row.split(',') if ',' in row else row.split() - if 'UNDEF' in columns: - # see https://openvpn.net/archive/openvpn-users/2004-08/msg00116.html - continue - - match = self.regex['tls'].search(' '.join(columns)) - if match: - match = match.groupdict() - data['users'] += 1 - data['bytes_in'] += int(match['bytes_in']) - data['bytes_out'] += int(match['bytes_out']) - - return data or None diff --git a/collectors/python.d.plugin/ovpn_status_log/ovpn_status_log.conf b/collectors/python.d.plugin/ovpn_status_log/ovpn_status_log.conf deleted file mode 100644 index 1d71f6b8e..000000000 --- a/collectors/python.d.plugin/ovpn_status_log/ovpn_status_log.conf +++ /dev/null @@ -1,97 +0,0 @@ -# netdata python.d.plugin configuration for openvpn status log -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, openvpn status log also supports the following: -# -# log_path: 'PATH' # the path to openvpn status log file -# -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) -# -# IMPORTANT information -# -# 1. If you are running multiple OpenVPN instances out of the same directory, MAKE SURE TO EDIT DIRECTIVES which create output files -# so that multiple instances do not overwrite each other's output files. -# 2. Make sure NETDATA USER CAN READ openvpn-status.log -# -# * cd into directory with openvpn-status.log and run the following commands as root -# * #chown :netdata openvpn-status.log && chmod 640 openvpn-status.log -# * To check permission and group membership run -# * #ls -l openvpn-status.log -# -rw-r----- 1 root netdata 359 dec 21 21:22 openvpn-status.log -# -# 3. Update_every interval MUST MATCH interval on which OpenVPN writes operational status to log file. -# If its not true traffic chart WILL DISPLAY WRONG values -# -# Default OpenVPN update interval is 10 second on Debian 8 -# # ps -C openvpn -o command= -# /usr/sbin/openvpn --daemon ovpn-server --status /run/openvpn/server.status 10 --cd /etc/openvpn --config /etc/openvpn/server.conf -# -# 4. Confirm status is configured in your OpenVPN configuration. -# * Open OpenVPN config in an editor (e.g. sudo nano /etc/openvpn/default.conf) -# * Confirm status is enabled with below: -# status /var/log/openvpn-status.log -# -#default: -# log_path: '/var/log/openvpn-status.log' -# -# ---------------------------------------------------------------------- diff --git a/collectors/python.d.plugin/postgres/README.md b/collectors/python.d.plugin/postgres/README.md index 224b76ff5..7acb9a7a9 100644 --- a/collectors/python.d.plugin/postgres/README.md +++ b/collectors/python.d.plugin/postgres/README.md @@ -6,6 +6,9 @@ sidebar_label: "PostgreSQL" # PostgreSQL monitoring with Netdata +> **Warning**: This module is deprecated and will be deleted in v1.37.0. +> Use [go.d/postgres](https://learn.netdata.cloud/docs/agent/collectors/go.d.plugin/modules/postgres). + Collects database health and performance metrics. ## Requirements @@ -97,7 +100,8 @@ cd /etc/netdata # Replace this path with your Netdata config directory, if dif sudo ./edit-config python.d/postgres.conf ``` -When no configuration file is found, the module tries to connect to TCP/IP socket: `localhost:5432`. +When no configuration file is found, the module tries to connect to TCP/IP socket: `localhost:5432` with the +following collection jobs. ```yaml socket: @@ -113,6 +117,29 @@ tcp: port : 5432 ``` +**Note**: Every job collection must have a unique identifier. In cases that you monitor multiple DBs, every +job must have it's own name. Use a mnemonic of your preference (e.g us_east_db, us_east_tcp) + +## Troubleshooting + +To troubleshoot issues with the `postgres` collector, run the `python.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +First, navigate to your plugins directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on your +system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the plugin's directory, switch +to the `netdata` user. + +```bash +cd /usr/libexec/netdata/plugins.d/ +sudo su -s /bin/bash netdata +``` + +You can now run the `python.d.plugin` to debug the collector: + +```bash +./python.d.plugin postgres debug trace +``` + --- diff --git a/collectors/python.d.plugin/python.d.conf b/collectors/python.d.plugin/python.d.conf index af58b451c..72e20fcd3 100644 --- a/collectors/python.d.plugin/python.d.conf +++ b/collectors/python.d.plugin/python.d.conf @@ -25,73 +25,48 @@ gc_run: yes # Garbage collection interval in seconds. Default is 300. gc_interval: 300 -# apache: yes - -# apache_cache has been replaced by web_log # adaptec_raid: yes # alarms: yes # am2320: yes # anomalies: no -apache_cache: no # beanstalk: yes # bind_rndc: yes # boinc: yes # ceph: yes -chrony: no # changefinder: no -# couchdb: yes -# dns_query_time: yes -# dnsdist: yes # dockerd: yes # dovecot: yes -# elasticsearch: yes -# energid: yes # this is just an example example: no # exim: yes # fail2ban: yes -# freeradius: yes # gearman: yes go_expvar: no -# gunicorn_log has been replaced by web_log -gunicorn_log: no # haproxy: yes # hddtemp: yes -# httpcheck: yes hpssa: no # icecast: yes # ipfs: yes -# isc_dhcpd: yes # litespeed: yes logind: no # megacli: yes # memcached: yes # mongodb: yes # monit: yes -# mysql: yes -# nginx: yes # nginx_plus: yes # nvidia_smi: yes - -# nginx_log has been replaced by web_log -nginx_log: no # nsd: yes # ntpd: yes # openldap: yes # oracledb: yes -# ovpn_status_log: yes -# phpfpm: yes -# portcheck: yes # postfix: yes # postgres: yes -# powerdns: yes # proxysql: yes # puppet: yes # rabbitmq: yes -# redis: yes # rethinkdbs: yes # retroshare: yes # riakkv: yes @@ -107,5 +82,4 @@ nginx_log: no # uwsgi: yes # varnish: yes # w1sensor: yes -# web_log: yes # zscores: no diff --git a/collectors/python.d.plugin/python.d.plugin.in b/collectors/python.d.plugin/python.d.plugin.in index b943f3a20..c04cb3ff0 100644 --- a/collectors/python.d.plugin/python.d.plugin.in +++ b/collectors/python.d.plugin/python.d.plugin.in @@ -31,8 +31,8 @@ import os import pprint import re import sys -import time import threading +import time import types try: @@ -50,6 +50,7 @@ else: ENV_NETDATA_USER_CONFIG_DIR = 'NETDATA_USER_CONFIG_DIR' ENV_NETDATA_STOCK_CONFIG_DIR = 'NETDATA_STOCK_CONFIG_DIR' ENV_NETDATA_PLUGINS_DIR = 'NETDATA_PLUGINS_DIR' +ENV_NETDATA_USER_PLUGINS_DIRS = 'NETDATA_USER_PLUGINS_DIRS' ENV_NETDATA_LIB_DIR = 'NETDATA_LIB_DIR' ENV_NETDATA_UPDATE_EVERY = 'NETDATA_UPDATE_EVERY' ENV_NETDATA_LOCK_DIR = 'NETDATA_LOCK_DIR' @@ -99,6 +100,9 @@ def dirs(): modules_user_config = os.path.join(plugin_user_config, 'python.d') modules_stock_config = os.path.join(plugin_stock_config, 'python.d') modules = os.path.abspath(pluginsd + '/../python.d') + user_modules = [os.path.join(p, 'python.d') for p in + os.getenv(ENV_NETDATA_USER_PLUGINS_DIRS, "").split(" ") if + p] Dirs = collections.namedtuple( 'Dirs', @@ -108,6 +112,7 @@ def dirs(): 'modules_user_config', 'modules_stock_config', 'modules', + 'user_modules', 'var_lib', 'locks', ] @@ -118,6 +123,7 @@ def dirs(): modules_user_config, modules_stock_config, modules, + user_modules, var_lib, locks, ) @@ -130,6 +136,28 @@ IS_ATTY = sys.stdout.isatty() MODULE_SUFFIX = '.chart.py' +def find_available_modules(*directories): + AvailableModule = collections.namedtuple( + 'AvailableModule', + [ + 'filepath', + 'name', + ] + ) + available = list() + for d in directories: + try: + if not os.path.isdir(d): + continue + files = sorted(os.listdir(d)) + except OSError: + continue + modules = [m for m in files if m.endswith(MODULE_SUFFIX)] + available.extend([AvailableModule(os.path.join(d, m), m[:-len(MODULE_SUFFIX)]) for m in modules]) + + return available + + def available_modules(): obsolete = ( 'apache_cache', # replaced by web_log @@ -143,10 +171,17 @@ def available_modules(): 'unbound', # rewritten in Go ) - files = sorted(os.listdir(DIRS.modules)) - modules = [m[:-len(MODULE_SUFFIX)] for m in files if m.endswith(MODULE_SUFFIX)] - avail = [m for m in modules if m not in obsolete] - return tuple(avail) + stock = [m for m in find_available_modules(DIRS.modules) if m.name not in obsolete] + user = find_available_modules(*DIRS.user_modules) + + available, seen = list(), set() + for m in user + stock: + if m.name in seen: + continue + seen.add(m.name) + available.append(m) + + return available AVAILABLE_MODULES = available_modules() @@ -176,9 +211,8 @@ def multi_path_find(name, *paths): return str() -def load_module(name): - abs_path = os.path.join(DIRS.modules, '{0}{1}'.format(name, MODULE_SUFFIX)) - module = SourceFileLoader('pythond_' + name, abs_path) +def load_module(name, filepath): + module = SourceFileLoader('pythond_' + name, filepath) if isinstance(module, types.ModuleType): return module return module.load_module() @@ -331,12 +365,13 @@ class Job(threading.Thread): class ModuleSrc: - def __init__(self, name): - self.name = name + def __init__(self, m): + self.name = m.name + self.filepath = m.filepath self.src = None def load(self): - self.src = load_module(self.name) + self.src = load_module(self.name, self.filepath) def get(self, key): return getattr(self.src, key, None) @@ -537,7 +572,8 @@ class Plugin: try: statuses = JobsStatuses().from_file(abs_path) except Exception as error: - self.log.warning("error on loading '{0}' : {1}".format(abs_path, repr(error))) + self.log.error("[{0}] config file invalid YAML format: {1}".format( + module_name, ' '.join([v.strip() for v in str(error).split('\n')]))) return None self.log.debug("'{0}' is loaded".format(abs_path)) return statuses @@ -553,37 +589,38 @@ class Plugin: builder.min_update_every = self.min_update_every jobs = list() - for mod_name in self.modules_to_run: - if not self.config.is_module_enabled(mod_name): - self.log.info("[{0}] is disabled in the configuration file, skipping it".format(mod_name)) + for m in self.modules_to_run: + if not self.config.is_module_enabled(m.name): + self.log.info("[{0}] is disabled in the configuration file, skipping it".format(m.name)) continue - src = ModuleSrc(mod_name) + src = ModuleSrc(m) try: src.load() except Exception as error: - self.log.warning("[{0}] error on loading source : {1}, skipping it".format(mod_name, repr(error))) + self.log.warning("[{0}] error on loading source : {1}, skipping it".format(m.name, repr(error))) continue + self.log.debug("[{0}] loaded module source : '{1}'".format(m.name, m.filepath)) if not (src.service() and callable(src.service())): - self.log.warning("[{0}] has no callable Service object, skipping it".format(mod_name)) + self.log.warning("[{0}] has no callable Service object, skipping it".format(m.name)) continue - if src.is_disabled_by_default() and not self.config.is_module_explicitly_enabled(mod_name): - self.log.info("[{0}] is disabled by default, skipping it".format(mod_name)) + if src.is_disabled_by_default() and not self.config.is_module_explicitly_enabled(m.name): + self.log.info("[{0}] is disabled by default, skipping it".format(m.name)) continue builder.module_defaults = src.defaults() - configs = builder.build(mod_name) + configs = builder.build(m.name) if not configs: - self.log.info("[{0}] has no job configs, skipping it".format(mod_name)) + self.log.info("[{0}] has no job configs, skipping it".format(m.name)) continue for config in configs: config['job_name'] = re.sub(r'\s+', '_', config['job_name']) config['override_name'] = re.sub(r'\s+', '_', config.pop('name')) - job = Job(src.service(), mod_name, config) + job = Job(src.service(), m.name, config) was_previously_active = job_statuses and job_statuses.has(job.module_name, job.real_name) if was_previously_active and job.autodetection_retry == 0: @@ -811,6 +848,20 @@ def disable(): exit(0) +def get_modules_to_run(cmd): + if not cmd.modules_to_run: + return AVAILABLE_MODULES + + modules_to_run, seen = list(), set() + for m in AVAILABLE_MODULES: + if m.name not in cmd.modules_to_run or m.name in seen: + continue + seen.add(m.name) + modules_to_run.append(m) + + return modules_to_run + + def main(): cmd = parse_command_line() log = PythonDLogger() @@ -822,21 +873,22 @@ def main(): log.info('using python v{0}'.format(PY_VERSION[0])) - unknown = set(cmd.modules_to_run) - set(AVAILABLE_MODULES) + if DIRS.locks and not cmd.nolock: + registry = FileLockRegistry(DIRS.locks) + else: + registry = DummyRegistry() + + unique_avail_module_names = set([m.name for m in AVAILABLE_MODULES]) + unknown = set(cmd.modules_to_run) - unique_avail_module_names if unknown: log.error('unknown modules : {0}'.format(sorted(list(unknown)))) - guessed = guess_module(AVAILABLE_MODULES, *cmd.modules_to_run) + guessed = guess_module(unique_avail_module_names, *cmd.modules_to_run) if guessed: log.info('probably you meant : \n{0}'.format(pprint.pformat(guessed, width=1))) return - if DIRS.locks and not cmd.nolock: - registry = FileLockRegistry(DIRS.locks) - else: - registry = DummyRegistry() - p = Plugin( - cmd.modules_to_run or AVAILABLE_MODULES, + get_modules_to_run(cmd), cmd.update_every, registry, ) diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py index c304ccec2..ed1b2e669 100644 --- a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py @@ -95,8 +95,9 @@ class SimpleService(PythonDLimitedLogger, object): @property def name(self): - if self.job_name and self.job_name != self.module_name: - return '_'.join([self.module_name, self.override_name or self.job_name]) + name = self.override_name or self.job_name + if name and name != self.module_name: + return '_'.join([self.module_name, name]) return self.module_name def actual_name(self): diff --git a/collectors/python.d.plugin/python_modules/urllib3/_collections.py b/collectors/python.d.plugin/python_modules/urllib3/_collections.py index c1d2fad36..2a6b3ec70 100644 --- a/collectors/python.d.plugin/python_modules/urllib3/_collections.py +++ b/collectors/python.d.plugin/python_modules/urllib3/_collections.py @@ -1,6 +1,11 @@ # SPDX-License-Identifier: MIT from __future__ import absolute_import -from collections import Mapping, MutableMapping + +try: + from collections import Mapping, MutableMapping +except ImportError: + from collections.abc import Mapping, MutableMapping + try: from threading import RLock except ImportError: # Platform-specific: No threads available diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/selectors.py b/collectors/python.d.plugin/python_modules/urllib3/util/selectors.py index c0997b1a2..de5e49838 100644 --- a/collectors/python.d.plugin/python_modules/urllib3/util/selectors.py +++ b/collectors/python.d.plugin/python_modules/urllib3/util/selectors.py @@ -12,7 +12,13 @@ import select import socket import sys import time -from collections import namedtuple, Mapping + +from collections import namedtuple + +try: + from collections import Mapping +except ImportError: + from collections.abc import Mapping try: monotonic = time.monotonic diff --git a/collectors/python.d.plugin/smartd_log/smartd_log.chart.py b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py index 75b8c8c40..dc4e95dec 100644 --- a/collectors/python.d.plugin/smartd_log/smartd_log.chart.py +++ b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py @@ -630,6 +630,7 @@ class Service(SimpleService): self.exclude = configuration.get('exclude_disks', str()).split() self.disks = list() self.runs = 0 + self.do_force_rescan = False def check(self): return self.scan() > 0 @@ -637,9 +638,10 @@ class Service(SimpleService): def get_data(self): self.runs += 1 - if self.runs % DEF_RESCAN_INTERVAL == 0: + if self.do_force_rescan or self.runs % DEF_RESCAN_INTERVAL == 0: self.cleanup() self.scan() + self.do_force_rescan = False data = dict() @@ -654,10 +656,12 @@ class Service(SimpleService): if changed is None: disk.alive = False + self.do_force_rescan = True continue if changed and disk.populate_attrs() is None: disk.alive = False + self.do_force_rescan = True continue data.update(disk.data()) diff --git a/collectors/python.d.plugin/smartd_log/smartd_log.conf b/collectors/python.d.plugin/smartd_log/smartd_log.conf index 4f138d17a..6c01d953b 100644 --- a/collectors/python.d.plugin/smartd_log/smartd_log.conf +++ b/collectors/python.d.plugin/smartd_log/smartd_log.conf @@ -65,3 +65,11 @@ # exclude_disks: 'PATTERN1 PATTERN2' # space separated patterns. If the pattern is in the drive name, the module will not collect data for it. # # ---------------------------------------------------------------------- + +custom: + name: smartd_log + log_path: '/var/log/smartd/' + +debian: + name: smartd_log + log_path: '/var/lib/smartmontools/' diff --git a/collectors/python.d.plugin/zscores/README.md b/collectors/python.d.plugin/zscores/README.md index 7fb189f6a..4f84a6c1f 100644 --- a/collectors/python.d.plugin/zscores/README.md +++ b/collectors/python.d.plugin/zscores/README.md @@ -1,9 +1,7 @@ <!-- ---- title: "zscores" description: "Use statistical anomaly detection to narrow your focus and shorten root cause analysis." custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/python.d.plugin/zscores/README.md ---- --> # Z-Scores - basic anomaly detection for your key metrics and charts @@ -143,4 +141,4 @@ per_chart_agg: 'mean' # 'absmax' will take the max absolute value across all dim - About ~50mb of ram (`apps.mem`) being continually used by the `python.d.plugin`. - If you activate this collector on a fresh node, it might take a little while to build up enough data to calculate a proper zscore. So until you actually have `train_secs` of available data the mean and stddev calculated will be subject - to more noise.
\ No newline at end of file + to more noise. diff --git a/collectors/statsd.plugin/README.md b/collectors/statsd.plugin/README.md index 7dc5dbb72..b46ca28d9 100644 --- a/collectors/statsd.plugin/README.md +++ b/collectors/statsd.plugin/README.md @@ -77,7 +77,7 @@ Netdata fully supports the StatsD protocol and also extends it to support more a - **Sets** - The application sends `name:value|s`, where `value` is anything (**number or text**, leading and trailing spaces are removed), StatsD reports the number of unique values sent and the number of times it was updated (events). + The application sends `name:value|s`, where `value` is anything (**number or text**, leading and trailing spaces are removed), StatsD reports the number of unique values sent and the number of times it was updated (events). Sampling rate is **not** supported for Sets. `value` is always considered text (so `01` and `1` are considered different). diff --git a/collectors/statsd.plugin/statsd.c b/collectors/statsd.plugin/statsd.c index 63e3316cb..fef4206bc 100644 --- a/collectors/statsd.plugin/statsd.c +++ b/collectors/statsd.plugin/statsd.c @@ -35,7 +35,7 @@ // data specific to each metric type typedef struct statsd_metric_gauge { - LONG_DOUBLE value; + NETDATA_DOUBLE value; } STATSD_METRIC_GAUGE; typedef struct statsd_metric_counter { // counter and meter @@ -64,7 +64,7 @@ typedef struct statsd_histogram_extensions { size_t size; size_t used; - LONG_DOUBLE *values; // dynamic array of values collected + NETDATA_DOUBLE *values; // dynamic array of values collected } STATSD_METRIC_HISTOGRAM_EXTENSIONS; typedef struct statsd_metric_histogram { // histogram and timer @@ -271,9 +271,7 @@ static struct statsd { size_t tcp_idle_timeout; collected_number decimal_detail; size_t private_charts; - size_t max_private_charts; size_t max_private_charts_hard; - RRD_MEMORY_MODE private_charts_memory_mode; long private_charts_rrd_history_entries; unsigned int private_charts_hidden:1; @@ -290,7 +288,6 @@ static struct statsd { LISTEN_SOCKETS sockets; } statsd = { .enabled = 1, - .max_private_charts = 200, .max_private_charts_hard = 1000, .private_charts_hidden = 0, .recvmmsg_size = 10, @@ -390,7 +387,7 @@ static void dictionary_metric_insert_callback(const char *name, void *value, voi netdata_mutex_init(&m->histogram.ext->mutex); } - __atomic_fetch_add(&index->metrics, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&index->metrics, 1, __ATOMIC_RELAXED); } static void dictionary_metric_delete_callback(const char *name, void *value, void *data) { @@ -431,12 +428,12 @@ static inline STATSD_METRIC *statsd_find_or_add_metric(STATSD_INDEX *index, cons // -------------------------------------------------------------------------------------------------------------------- // statsd parsing numbers -static inline LONG_DOUBLE statsd_parse_float(const char *v, LONG_DOUBLE def) { - LONG_DOUBLE value; +static inline NETDATA_DOUBLE statsd_parse_float(const char *v, NETDATA_DOUBLE def) { + NETDATA_DOUBLE value; if(likely(v && *v)) { char *e = NULL; - value = str2ld(v, &e); + value = str2ndd(v, &e); if(unlikely(e && *e)) error("STATSD: excess data '%s' after value '%s'", e, v); } @@ -446,8 +443,8 @@ static inline LONG_DOUBLE statsd_parse_float(const char *v, LONG_DOUBLE def) { return value; } -static inline LONG_DOUBLE statsd_parse_sampling_rate(const char *v) { - LONG_DOUBLE sampling_rate = statsd_parse_float(v, 1.0); +static inline NETDATA_DOUBLE statsd_parse_sampling_rate(const char *v) { + NETDATA_DOUBLE sampling_rate = statsd_parse_float(v, 1.0); if(unlikely(isless(sampling_rate, 0.001))) sampling_rate = 0.001; if(unlikely(isgreater(sampling_rate, 1.0))) sampling_rate = 1.0; return sampling_rate; @@ -522,7 +519,7 @@ static inline void statsd_process_counter_or_meter(STATSD_METRIC *m, const char // magic loading of metric, without affecting anything } else { - m->counter.value += llrintl((LONG_DOUBLE) statsd_parse_int(value, 1) / statsd_parse_sampling_rate(sampling)); + m->counter.value += llrintndd((NETDATA_DOUBLE) statsd_parse_int(value, 1) / statsd_parse_sampling_rate(sampling)); m->events++; m->count++; @@ -549,18 +546,18 @@ static inline void statsd_process_histogram_or_timer(STATSD_METRIC *m, const cha // magic loading of metric, without affecting anything } else { - LONG_DOUBLE v = statsd_parse_float(value, 1.0); - LONG_DOUBLE sampling_rate = statsd_parse_sampling_rate(sampling); + NETDATA_DOUBLE v = statsd_parse_float(value, 1.0); + NETDATA_DOUBLE sampling_rate = statsd_parse_sampling_rate(sampling); if(unlikely(isless(sampling_rate, 0.01))) sampling_rate = 0.01; if(unlikely(isgreater(sampling_rate, 1.0))) sampling_rate = 1.0; - long long samples = llrintl(1.0 / sampling_rate); + long long samples = llrintndd(1.0 / sampling_rate); while(samples-- > 0) { if(unlikely(m->histogram.ext->used == m->histogram.ext->size)) { netdata_mutex_lock(&m->histogram.ext->mutex); m->histogram.ext->size += statsd.histogram_increase_step; - m->histogram.ext->values = reallocz(m->histogram.ext->values, sizeof(LONG_DOUBLE) * m->histogram.ext->size); + m->histogram.ext->values = reallocz(m->histogram.ext->values, sizeof(NETDATA_DOUBLE) * m->histogram.ext->size); netdata_mutex_unlock(&m->histogram.ext->mutex); } @@ -593,7 +590,6 @@ static inline void statsd_process_set(STATSD_METRIC *m, const char *value) { if(unlikely(m->reset)) { if(likely(m->set.dict)) { dictionary_destroy(m->set.dict); - dictionary_register_insert_callback(m->set.dict, dictionary_metric_set_value_insert_callback, m); m->set.dict = NULL; } statsd_reset_metric(m); @@ -601,6 +597,7 @@ static inline void statsd_process_set(STATSD_METRIC *m, const char *value) { if (unlikely(!m->set.dict)) { m->set.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + dictionary_register_insert_callback(m->set.dict, dictionary_metric_set_value_insert_callback, m); m->set.unique = 0; } @@ -1591,7 +1588,7 @@ static inline void statsd_get_metric_type_and_id(STATSD_METRIC *m, char *type, c } static inline RRDSET *statsd_private_rrdset_create( - STATSD_METRIC *m + STATSD_METRIC *m __maybe_unused , const char *type , const char *id , const char *name @@ -1603,16 +1600,6 @@ static inline RRDSET *statsd_private_rrdset_create( , int update_every , RRDSET_TYPE chart_type ) { - RRD_MEMORY_MODE memory_mode = statsd.private_charts_memory_mode; - long history = statsd.private_charts_rrd_history_entries; - - if(unlikely(statsd.private_charts >= statsd.max_private_charts)) { - debug(D_STATSD, "STATSD: metric '%s' will be charted with memory mode = none, because the maximum number of charts has been reached.", m->name); - info("STATSD: metric '%s' will be charted with memory mode = none, because the maximum number of charts (%zu) has been reached. Increase the number of charts by editing netdata.conf, [statsd] section.", m->name, statsd.max_private_charts); - memory_mode = RRD_MEMORY_MODE_NONE; - history = 5; - } - statsd.private_charts++; RRDSET *st = rrdset_create_custom( localhost // host @@ -1628,8 +1615,8 @@ static inline RRDSET *statsd_private_rrdset_create( , priority // priority , update_every // update every , chart_type // chart type - , memory_mode // memory mode - , history // history + , default_rrd_memory_mode // memory mode + , default_rrd_history_entries // history ); rrdset_flag_set(st, RRDSET_FLAG_STORE_FIRST); @@ -1945,21 +1932,21 @@ static inline void statsd_flush_timer_or_histogram(STATSD_METRIC *m, const char netdata_mutex_lock(&m->histogram.ext->mutex); size_t len = m->histogram.ext->used; - LONG_DOUBLE *series = m->histogram.ext->values; + NETDATA_DOUBLE *series = m->histogram.ext->values; sort_series(series, len); - m->histogram.ext->last_min = (collected_number)roundl(series[0] * statsd.decimal_detail); - m->histogram.ext->last_max = (collected_number)roundl(series[len - 1] * statsd.decimal_detail); - m->last = (collected_number)roundl(average(series, len) * statsd.decimal_detail); - m->histogram.ext->last_median = (collected_number)roundl(median_on_sorted_series(series, len) * statsd.decimal_detail); - m->histogram.ext->last_stddev = (collected_number)roundl(standard_deviation(series, len) * statsd.decimal_detail); - m->histogram.ext->last_sum = (collected_number)roundl(sum(series, len) * statsd.decimal_detail); + m->histogram.ext->last_min = (collected_number)roundndd(series[0] * statsd.decimal_detail); + m->histogram.ext->last_max = (collected_number)roundndd(series[len - 1] * statsd.decimal_detail); + m->last = (collected_number)roundndd(average(series, len) * statsd.decimal_detail); + m->histogram.ext->last_median = (collected_number)roundndd(median_on_sorted_series(series, len) * statsd.decimal_detail); + m->histogram.ext->last_stddev = (collected_number)roundndd(standard_deviation(series, len) * statsd.decimal_detail); + m->histogram.ext->last_sum = (collected_number)roundndd(sum(series, len) * statsd.decimal_detail); size_t pct_len = (size_t)floor((double)len * statsd.histogram_percentile / 100.0); if(pct_len < 1) m->histogram.ext->last_percentile = (collected_number)(series[0] * statsd.decimal_detail); else - m->histogram.ext->last_percentile = (collected_number)roundl(series[pct_len - 1] * statsd.decimal_detail); + m->histogram.ext->last_percentile = (collected_number)roundndd(series[pct_len - 1] * statsd.decimal_detail); netdata_mutex_unlock(&m->histogram.ext->mutex); @@ -2300,7 +2287,7 @@ static inline void statsd_flush_index_metrics(STATSD_INDEX *index, void (*flush_ if(unlikely(!(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_CHECKED))) { if(unlikely(statsd.private_charts >= statsd.max_private_charts_hard)) { debug(D_STATSD, "STATSD: metric '%s' will not be charted, because the hard limit of the maximum number of charts has been reached.", m->name); - info("STATSD: metric '%s' will not be charted, because the hard limit of the maximum number of charts (%zu) has been reached. Increase the number of charts by editing netdata.conf, [statsd] section.", m->name, statsd.max_private_charts); + info("STATSD: metric '%s' will not be charted, because the hard limit of the maximum number of charts (%zu) has been reached. Increase the number of charts by editing netdata.conf, [statsd] section.", m->name, statsd.max_private_charts_hard); m->options &= ~STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; } else { @@ -2446,9 +2433,7 @@ void *statsd_main(void *ptr) { #endif statsd.charts_for = simple_pattern_create(config_get(CONFIG_SECTION_STATSD, "create private charts for metrics matching", "*"), NULL, SIMPLE_PATTERN_EXACT); - statsd.max_private_charts = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts allowed", (long long)statsd.max_private_charts); - statsd.max_private_charts_hard = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts hard limit", (long long)statsd.max_private_charts * 5); - statsd.private_charts_memory_mode = rrd_memory_mode_id(config_get(CONFIG_SECTION_STATSD, "private charts memory mode", rrd_memory_mode_name(default_rrd_memory_mode))); + statsd.max_private_charts_hard = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts hard limit", (long long)statsd.max_private_charts_hard); statsd.private_charts_rrd_history_entries = (int)config_get_number(CONFIG_SECTION_STATSD, "private charts history", default_rrd_history_entries); statsd.decimal_detail = (collected_number)config_get_number(CONFIG_SECTION_STATSD, "decimal detail", (long long int)statsd.decimal_detail); statsd.tcp_idle_timeout = (size_t) config_get_number(CONFIG_SECTION_STATSD, "disconnect idle tcp clients after seconds", (long long int)statsd.tcp_idle_timeout); |