diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-24 09:54:23 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-24 09:54:44 +0000 |
commit | 836b47cb7e99a977c5a23b059ca1d0b5065d310e (patch) | |
tree | 1604da8f482d02effa033c94a84be42bc0c848c3 /collectors/cgroups.plugin | |
parent | Releasing debian version 1.44.3-2. (diff) | |
download | netdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.tar.xz netdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.zip |
Merging upstream version 1.46.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/cgroups.plugin')
23 files changed, 0 insertions, 10366 deletions
diff --git a/collectors/cgroups.plugin/Makefile.am b/collectors/cgroups.plugin/Makefile.am deleted file mode 100644 index 0f6062420..000000000 --- a/collectors/cgroups.plugin/Makefile.am +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -AUTOMAKE_OPTIONS = subdir-objects -MAINTAINERCLEANFILES = $(srcdir)/Makefile.in - -CLEANFILES = \ - cgroup-name.sh \ - cgroup-network-helper.sh \ - $(NULL) - -include $(top_srcdir)/build/subst.inc -SUFFIXES = .in - -dist_plugins_SCRIPTS = \ - cgroup-name.sh \ - cgroup-network-helper.sh \ - $(NULL) - -dist_noinst_DATA = \ - cgroup-name.sh.in \ - cgroup-network-helper.sh.in \ - README.md \ - $(NULL) diff --git a/collectors/cgroups.plugin/README.md b/collectors/cgroups.plugin/README.md deleted file mode 100644 index ba6a20e5e..000000000 --- a/collectors/cgroups.plugin/README.md +++ /dev/null @@ -1,302 +0,0 @@ -<!-- -title: "Monitor Cgroups (cgroups.plugin)" -custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/README.md" -sidebar_label: "Monitor Cgroups" -learn_status: "Published" -learn_topic_type: "References" -learn_rel_path: "Integrations/Monitor/Virtualized environments/Containers" ---> - -# Monitor Cgroups (cgroups.plugin) - -You can monitor containers and virtual machines using **cgroups**. - -cgroups (or control groups), are a Linux kernel feature that provides accounting and resource usage limiting for -processes. When cgroups are bundled with namespaces (i.e. isolation), they form what we usually call **containers**. - -cgroups are hierarchical, meaning that cgroups can contain child cgroups, which can contain more cgroups, etc. All -accounting is reported (and resource usage limits are applied) also in a hierarchical way. - -To visualize cgroup metrics Netdata provides configuration for cherry picking the cgroups of interest. By default ( -without any configuration) Netdata should pick **systemd services**, all kinds of **containers** (lxc, docker, etc) -and **virtual machines** spawn by managers that register them with cgroups (qemu, libvirt, etc). - -## Configuring Netdata for cgroups - -In general, no additional settings are required. Netdata discovers all available cgroups on the host system and -collects their metrics. - -### How Netdata finds the available cgroups - -Linux exposes resource usage reporting and provides dynamic configuration for cgroups, using virtual files (usually) -under `/sys/fs/cgroup`. Netdata reads `/proc/self/mountinfo` to detect the exact mount point of cgroups. Netdata also -allows manual configuration of this mount point, using these settings: - -```text -[plugin:cgroups] - check for new cgroups every = 10 - path to /sys/fs/cgroup/cpuacct = /sys/fs/cgroup/cpuacct - path to /sys/fs/cgroup/blkio = /sys/fs/cgroup/blkio - path to /sys/fs/cgroup/memory = /sys/fs/cgroup/memory - path to /sys/fs/cgroup/devices = /sys/fs/cgroup/devices -``` - -Netdata rescans these directories for added or removed cgroups every `check for new cgroups every` seconds. - -### Hierarchical search for cgroups - -Since cgroups are hierarchical, for each of the directories shown above, Netdata walks through the subdirectories -recursively searching for cgroups (each subdirectory is another cgroup). - -To provide a sane default for this setting, Netdata uses the following pattern list (patterns starting with `!` give a -negative match and their order is important: the first matching a path will be used): - -```text -[plugin:cgroups] - search for cgroups in subpaths matching = !*/init.scope !*-qemu !/init.scope !/system !/systemd !/user !/user.slice * -``` - -So, we disable checking for **child cgroups** in systemd internal -cgroups ([systemd services are monitored by Netdata](#monitoring-systemd-services)), user cgroups (normally used for -desktop and remote user sessions), qemu virtual machines (child cgroups of virtual machines) and `init.scope`. All -others are enabled. - -### Unified cgroups (cgroups v2) support - -Netdata automatically detects cgroups version. If detection fails Netdata assumes v1. -To switch to v2 manually add: - -```text -[plugin:cgroups] - use unified cgroups = yes - path to unified cgroups = /sys/fs/cgroup -``` - -Unified cgroups use same name pattern matching as v1 cgroups. `cgroup_enable_systemd_services_detailed_memory` is -currently unsupported when using unified cgroups. - -### Enabled cgroups - -To provide a sane default, Netdata uses the -following [pattern list](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md): - -- Checks the pattern against the path of the cgroup - - ```text - [plugin:cgroups] - enable by default cgroups matching = !*/init.scope *.scope !*/vcpu* !*/emulator !*.mount !*.partition !*.service !*.slice !*.swap !*.user !/ !/docker !/libvirt !/lxc !/lxc/*/ns !/lxc/*/ns/* !/machine !/qemu !/system !/systemd !/user * - ``` - -- Checks the pattern against the name of the cgroup (as you see it on the dashboard) - - ```text - [plugin:cgroups] - enable by default cgroups names matching = * - ``` - -Renaming is configured with the following options: - -```text -[plugin:cgroups] - run script to rename cgroups matching = *.scope *docker* *lxc* *qemu* !/ !*.mount !*.partition !*.service !*.slice !*.swap !*.user * - script to get cgroup names = /usr/libexec/netdata/plugins.d/cgroup-name.sh -``` - -The whole point for the additional pattern list, is to limit the number of times the script will be called. Without this -pattern list, the script might be called thousands of times, depending on the number of cgroups available in the system. - -The above pattern list is matched against the path of the cgroup. For matched cgroups, Netdata calls the -script [cgroup-name.sh](https://raw.githubusercontent.com/netdata/netdata/master/collectors/cgroups.plugin/cgroup-name.sh) -to get its name. This script queries `docker`, `kubectl`, `podman`, or applies heuristics to find give a name for the -cgroup. - -#### Note on Podman container names - -Podman's security model is a lot more restrictive than Docker's, so Netdata will not be able to detect container names -out of the box unless they were started by the same user as Netdata itself. - -If Podman is used in "rootful" mode, it's also possible to use `podman system service` to grant Netdata access to -container names. To do this, ensure `podman system service` is running and Netdata has access -to `/run/podman/podman.sock` (the default permissions as specified by upstream are `0600`, with owner `root`, so you -will have to adjust the configuration). - -[Docker Socket Proxy (HAProxy)](https://github.com/Tecnativa/docker-socket-proxy) or [CetusGuard](https://github.com/hectorm/cetusguard) -can also be used to give Netdata restricted access to the socket. Note that `PODMAN_HOST` in Netdata's environment should -be set to the proxy's URL in this case. - -### Charts with zero metrics - -By default, Netdata will enable monitoring metrics only when they are not zero. If they are constantly zero they are -ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be -automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Set `yes` for a -chart instead of `auto` to enable it permanently. For example: - -```text -[plugin:cgroups] - enable memory (used mem including cache) = yes -``` - -You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero -metrics for all internal Netdata plugins. - -### Alerts - -CPU and memory limits are watched and used to rise alerts. Memory usage for every cgroup is checked against `ram` -and `ram+swap` limits. CPU usage for every cgroup is checked against `cpuset.cpus` and `cpu.cfs_period_us` + `cpu.cfs_quota_us` pair assigned for the cgroup. Configuration for the alerts is available in `health.d/cgroups.conf` -file. - -## Monitoring systemd services - -Netdata monitors **systemd services**. Example: - -![image](https://cloud.githubusercontent.com/assets/2662304/21964372/20cd7b84-db53-11e6-98a2-b9c986b082c0.png) - -Support per distribution: - -| system | charts shown | `/sys/fs/cgroup` tree | comments | -|:----------------:|:------------:|:------------------------------------:|:--------------------------| -| Arch Linux | YES | | | -| Gentoo | NO | | can be enabled, see below | -| Ubuntu 16.04 LTS | YES | | | -| Ubuntu 16.10 | YES | [here](http://pastebin.com/PiWbQEXy) | | -| Fedora 25 | YES | [here](http://pastebin.com/ax0373wF) | | -| Debian 8 | NO | | can be enabled, see below | -| AMI | NO | [here](http://pastebin.com/FrxmptjL) | not a systemd system | -| CentOS 7.3.1611 | NO | [here](http://pastebin.com/SpzgezAg) | can be enabled, see below | - -### Monitored systemd service metrics - -- CPU utilization -- Used memory -- RSS memory -- Mapped memory -- Cache memory -- Writeback memory -- Memory minor page faults -- Memory major page faults -- Memory charging activity -- Memory uncharging activity -- Memory limit failures -- Swap memory used -- Disk read bandwidth -- Disk write bandwidth -- Disk read operations -- Disk write operations -- Throttle disk read bandwidth -- Throttle disk write bandwidth -- Throttle disk read operations -- Throttle disk write operations -- Queued disk read operations -- Queued disk write operations -- Merged disk read operations -- Merged disk write operations - -### How to enable cgroup accounting on systemd systems that is by default disabled - -You can verify there is no accounting enabled, by running `systemd-cgtop`. The program will show only resources for -cgroup `/`, but all services will show nothing. - -To enable cgroup accounting, execute this: - -```sh -sed -e 's|^#Default\(.*\)Accounting=.*$|Default\1Accounting=yes|g' /etc/systemd/system.conf >/tmp/system.conf -``` - -To see the changes it made, run this: - -```sh -# diff /etc/systemd/system.conf /tmp/system.conf -40,44c40,44 -< #DefaultCPUAccounting=no -< #DefaultIOAccounting=no -< #DefaultBlockIOAccounting=no -< #DefaultMemoryAccounting=no -< #DefaultTasksAccounting=yes ---- -> DefaultCPUAccounting=yes -> DefaultIOAccounting=yes -> DefaultBlockIOAccounting=yes -> DefaultMemoryAccounting=yes -> DefaultTasksAccounting=yes -``` - -If you are happy with the changes, run: - -```sh -# copy the file to the right location -sudo cp /tmp/system.conf /etc/systemd/system.conf - -# restart systemd to take it into account -sudo systemctl daemon-reexec -``` - -(`systemctl daemon-reload` does not reload the configuration of the server - so you have to -execute `systemctl daemon-reexec`). - -Now, when you run `systemd-cgtop`, services will start reporting usage (if it does not, restart any service to wake it up). Refresh your Netdata dashboard, and you will have the charts too. - -In case memory accounting is missing, you will need to enable it at your kernel, by appending the following kernel boot -options and rebooting: - -```sh -cgroup_enable=memory swapaccount=1 -``` - -You can add the above, directly at the `linux` line in your `/boot/grub/grub.cfg` or appending them to -the `GRUB_CMDLINE_LINUX` in `/etc/default/grub` (in which case you will have to run `update-grub` before rebooting). On -DigitalOcean debian images you may have to set it at `/etc/default/grub.d/50-cloudimg-settings.cfg`. - -Which systemd services are monitored by Netdata is determined by the following pattern list: - -```text -[plugin:cgroups] - cgroups to match as systemd services = !/system.slice/*/*.service /system.slice/*.service -``` - -- - - - -## Monitoring ephemeral containers - -Netdata monitors containers automatically when it is installed at the host, or when it is installed in a container that -has access to the `/proc` and `/sys` filesystems of the host. - -Network interfaces and cgroups (containers) are self-cleaned. When a network interface or container stops, Netdata might log -a few errors in error.log complaining about files it cannot find, but immediately: - -1. It will detect this is a removed container or network interface -2. It will freeze/pause all alerts for them -3. It will mark their charts as obsolete -4. Obsolete charts are not be offered on new dashboard sessions (so hit F5 and the charts are gone) -5. Existing dashboard sessions will continue to see them, but of course they will not refresh -6. Obsolete charts will be removed from memory, 1 hour after the last user viewed them (configurable - with `[global].cleanup obsolete charts after seconds = 3600` (at `netdata.conf`). -7. When obsolete charts are removed from memory they are also deleted from disk (configurable - with `[global].delete obsolete charts files = yes`) - -### Monitored container metrics - -- CPU usage -- CPU usage within the limits -- CPU usage per core -- Memory usage -- Writeback memory -- Memory activity -- Memory page faults -- Used memory -- Used RAM within the limits -- Memory utilization -- Memory limit failures -- I/O bandwidth (all disks) -- Serviced I/O operations (all disks) -- Throttle I/O bandwidth (all disks) -- Throttle serviced I/O operations (all disks) -- Queued I/O operations (all disks) -- Merged I/O operations (all disks) -- CPU pressure -- Memory pressure -- Memory full pressure -- I/O pressure -- I/O full pressure - -Network interfaces are monitored by means of -the [proc plugin](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md#monitored-network-interface-metrics). diff --git a/collectors/cgroups.plugin/cgroup-charts.c b/collectors/cgroups.plugin/cgroup-charts.c deleted file mode 100644 index a89e8ac45..000000000 --- a/collectors/cgroups.plugin/cgroup-charts.c +++ /dev/null @@ -1,1526 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "cgroup-internals.h" - -void update_cpu_utilization_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_cpu; - - if (unlikely(!cg->st_cpu)) { - char *title; - char *context; - int prio; - - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services CPU utilization (100%% = 1 core)"; - context = "systemd.service.cpu.utilization"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD; - } else { - title = k8s_is_kubepod(cg) ? "CPU Usage (100%% = 1000 mCPU)" : "CPU Usage (100%% = 1 core)"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu" : "cgroup.cpu"; - prio = cgroup_containers_chart_priority; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_cpu = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "cpu", - NULL, - "cpu", - context, - title, - "percentage", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_STACKED); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - - if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - cg->st_cpu_rd_user = rrddim_add(chart, "user", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); - cg->st_cpu_rd_system = rrddim_add(chart, "system", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); - } else { - cg->st_cpu_rd_user = rrddim_add(chart, "user", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); - cg->st_cpu_rd_system = rrddim_add(chart, "system", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); - } - } - - rrddim_set_by_pointer(chart, cg->st_cpu_rd_user, (collected_number)cg->cpuacct_stat.user); - rrddim_set_by_pointer(chart, cg->st_cpu_rd_system, (collected_number)cg->cpuacct_stat.system); - rrdset_done(chart); -} - -void update_cpu_utilization_limit_chart(struct cgroup *cg, NETDATA_DOUBLE cpu_limit) { - if (is_cgroup_systemd_service(cg)) - return; - - RRDSET *chart = cg->st_cpu_limit; - - if (unlikely(!cg->st_cpu_limit)) { - char *title = "CPU Usage within the limits"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_limit" : "cgroup.cpu_limit"; - int prio = cgroup_containers_chart_priority - 1; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_cpu_limit = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "cpu_limit", - NULL, - "cpu", - context, - title, - "percentage", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - - if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) - rrddim_add(chart, "used", NULL, 1, system_hz, RRD_ALGORITHM_ABSOLUTE); - else - rrddim_add(chart, "used", NULL, 1, 1000000, RRD_ALGORITHM_ABSOLUTE); - cg->prev_cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; - } - - NETDATA_DOUBLE cpu_usage = 0; - cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; - NETDATA_DOUBLE cpu_used = 100 * (cpu_usage - cg->prev_cpu_usage) / (cpu_limit * cgroup_update_every); - - rrdset_isnot_obsolete___safe_from_collector_thread(chart); - - rrddim_set(chart, "used", (cpu_used > 0) ? (collected_number)cpu_used : 0); - - cg->prev_cpu_usage = cpu_usage; - - rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, cpu_limit); - rrdset_done(chart); -} - -void update_cpu_throttled_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - RRDSET *chart = cg->st_cpu_nr_throttled; - - if (unlikely(!cg->st_cpu_nr_throttled)) { - char *title = "CPU Throttled Runnable Periods"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.throttled" : "cgroup.throttled"; - int prio = cgroup_containers_chart_priority + 10; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_cpu_nr_throttled = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "throttled", - NULL, - "cpu", - context, - title, - "percentage", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - rrddim_add(chart, "throttled", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set(chart, "throttled", (collected_number)cg->cpuacct_cpu_throttling.nr_throttled_perc); - rrdset_done(chart); -} - -void update_cpu_throttled_duration_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - RRDSET *chart = cg->st_cpu_throttled_time; - - if (unlikely(!cg->st_cpu_throttled_time)) { - char *title = "CPU Throttled Time Duration"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.throttled_duration" : "cgroup.throttled_duration"; - int prio = cgroup_containers_chart_priority + 15; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_cpu_throttled_time = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "throttled_duration", - NULL, - "cpu", - context, - title, - "ms", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - rrddim_add(chart, "duration", NULL, 1, 1000000, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(chart, "duration", (collected_number)cg->cpuacct_cpu_throttling.throttled_time); - rrdset_done(chart); -} - -void update_cpu_shares_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - RRDSET *chart = cg->st_cpu_shares; - - if (unlikely(!cg->st_cpu_shares)) { - char *title = "CPU Time Relative Share"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_shares" : "cgroup.cpu_shares"; - int prio = cgroup_containers_chart_priority + 20; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_cpu_shares = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "cpu_shares", - NULL, - "cpu", - context, - title, - "shares", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - rrddim_add(chart, "shares", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set(chart, "shares", (collected_number)cg->cpuacct_cpu_shares.shares); - rrdset_done(chart); -} - -void update_cpu_per_core_usage_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - char id[RRD_ID_LENGTH_MAX + 1]; - unsigned int i; - - if (unlikely(!cg->st_cpu_per_core)) { - char *title = k8s_is_kubepod(cg) ? "CPU Usage (100%% = 1000 mCPU) Per Core" : "CPU Usage (100%% = 1 core) Per Core"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_per_core" : "cgroup.cpu_per_core"; - int prio = cgroup_containers_chart_priority + 100; - - char buff[RRD_ID_LENGTH_MAX + 1]; - cg->st_cpu_per_core = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "cpu_per_core", - NULL, - "cpu", - context, - title, - "percentage", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_STACKED); - - rrdset_update_rrdlabels(cg->st_cpu_per_core, cg->chart_labels); - - for (i = 0; i < cg->cpuacct_usage.cpus; i++) { - snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i); - rrddim_add(cg->st_cpu_per_core, id, NULL, 100, 1000000000, RRD_ALGORITHM_INCREMENTAL); - } - } - - for (i = 0; i < cg->cpuacct_usage.cpus; i++) { - snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i); - rrddim_set(cg->st_cpu_per_core, id, (collected_number)cg->cpuacct_usage.cpu_percpu[i]); - } - rrdset_done(cg->st_cpu_per_core); -} - -void update_mem_usage_detailed_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_mem; - - if (unlikely(!cg->st_mem)) { - char *title; - char *context; - int prio; - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services Memory"; - context = "systemd.service.memory.ram.usage"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 15; - } else { - title = "Memory Usage"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem" : "cgroup.mem"; - prio = cgroup_containers_chart_priority + 220; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - - chart = cg->st_mem = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "mem", - NULL, - "mem", - context, - title, - "MiB", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_STACKED); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - - if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - rrddim_add(chart, "cache", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(chart, "rss", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - - if (cg->memory.detailed_has_swap) - rrddim_add(chart, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - - rrddim_add(chart, "rss_huge", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(chart, "mapped_file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } else { - rrddim_add(chart, "anon", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(chart, "kernel_stack", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(chart, "slab", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(chart, "sock", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(chart, "anon_thp", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(chart, "file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } - } - - if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - rrddim_set(chart, "cache", (collected_number)cg->memory.total_cache); - collected_number rss = (collected_number)(cg->memory.total_rss - cg->memory.total_rss_huge); - if (rss < 0) - rss = 0; - rrddim_set(chart, "rss", rss); - if (cg->memory.detailed_has_swap) - rrddim_set(chart, "swap", (collected_number)cg->memory.total_swap); - rrddim_set(chart, "rss_huge", (collected_number)cg->memory.total_rss_huge); - rrddim_set(chart, "mapped_file", (collected_number)cg->memory.total_mapped_file); - } else { - rrddim_set(chart, "anon", (collected_number)cg->memory.anon); - rrddim_set(chart, "kernel_stack", (collected_number)cg->memory.kernel_stack); - rrddim_set(chart, "slab", (collected_number)cg->memory.slab); - rrddim_set(chart, "sock", (collected_number)cg->memory.sock); - rrddim_set(chart, "anon_thp", (collected_number)cg->memory.anon_thp); - rrddim_set(chart, "file", (collected_number)cg->memory.total_mapped_file); - } - rrdset_done(chart); -} - -void update_mem_writeback_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_writeback; - - if (unlikely(!cg->st_writeback)) { - char *title; - char *context; - int prio; - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services Writeback Memory"; - context = "systemd.service.memory.writeback"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 20; - } else { - title = "Writeback Memory"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.writeback" : "cgroup.writeback"; - prio = cgroup_containers_chart_priority + 300; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_writeback = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "writeback", - NULL, - "mem", - context, - title, - "MiB", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_AREA); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - if (cg->memory.detailed_has_dirty) - rrddim_add(chart, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(chart, "writeback", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } - - if (cg->memory.detailed_has_dirty) - rrddim_set(chart, "dirty", (collected_number)cg->memory.total_dirty); - rrddim_set(chart, "writeback", (collected_number)cg->memory.total_writeback); - rrdset_done(chart); -} - -void update_mem_activity_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_mem_activity; - - if (unlikely(!cg->st_mem_activity)) { - char *title; - char *context; - int prio; - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services Memory Paging IO"; - context = "systemd.service.memory.paging.io"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 30; - } else { - title = "Memory Activity"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_activity" : "cgroup.mem_activity"; - prio = cgroup_containers_chart_priority + 400; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_mem_activity = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "mem_activity", - NULL, - "mem", - context, - title, - "MiB/s", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - // FIXME: systemd just in, out - rrddim_add(chart, "pgpgin", "in", system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(chart, "pgpgout", "out", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(chart, "pgpgin", (collected_number)cg->memory.total_pgpgin); - rrddim_set(chart, "pgpgout", (collected_number)cg->memory.total_pgpgout); - rrdset_done(chart); -} - -void update_mem_pgfaults_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_pgfaults; - - if (unlikely(!cg->st_pgfaults)) { - char *title; - char *context; - int prio; - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services Memory Page Faults"; - context = "systemd.service.memory.paging.faults"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 25; - } else { - title = "Memory Page Faults"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.pgfaults" : "cgroup.pgfaults"; - prio = cgroup_containers_chart_priority + 500; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_pgfaults = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "pgfaults", - NULL, - "mem", - context, - title, - "MiB/s", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - rrddim_add(chart, "pgfault", NULL, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(chart, "pgmajfault", "swap", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(chart, "pgfault", (collected_number)cg->memory.total_pgfault); - rrddim_set(chart, "pgmajfault", (collected_number)cg->memory.total_pgmajfault); - rrdset_done(chart); -} - -void update_mem_usage_limit_chart(struct cgroup *cg, unsigned long long memory_limit) { - if (is_cgroup_systemd_service(cg)) - return; - - RRDSET *chart = cg->st_mem_usage_limit; - - if (unlikely(!cg->st_mem_usage_limit)) { - char *title = "Used RAM within the limits"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage_limit" : "cgroup.mem_usage_limit"; - int prio = cgroup_containers_chart_priority + 200; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_mem_usage_limit = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "mem_usage_limit", - NULL, - "mem", - context, - title, - "MiB", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_STACKED); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - - rrddim_add(chart, "available", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(chart, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } - - rrdset_isnot_obsolete___safe_from_collector_thread(chart); - - rrddim_set(chart, "available", (collected_number)(memory_limit - cg->memory.usage_in_bytes)); - rrddim_set(chart, "used", (collected_number)cg->memory.usage_in_bytes); - rrdset_done(chart); -} - -void update_mem_utilization_chart(struct cgroup *cg, unsigned long long memory_limit) { - if (is_cgroup_systemd_service(cg)) - return; - - RRDSET *chart = cg->st_mem_utilization; - - if (unlikely(!cg->st_mem_utilization)) { - char *title = "Memory Utilization"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_utilization" : "cgroup.mem_utilization"; - int prio = cgroup_containers_chart_priority + 199; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_mem_utilization = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "mem_utilization", - NULL, - "mem", - context, - title, - "percentage", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_AREA); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - - rrddim_add(chart, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } - - rrdset_isnot_obsolete___safe_from_collector_thread(chart); - collected_number util = (collected_number)(cg->memory.usage_in_bytes * 100 / memory_limit); - rrddim_set(chart, "utilization", util); - rrdset_done(chart); -} - -void update_mem_failcnt_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_mem_failcnt; - - if (unlikely(!cg->st_mem_failcnt)) { - char *title; - char *context; - int prio; - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services Memory Limit Failures"; - context = "systemd.service.memory.failcnt"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 10; - } else { - title = "Memory Limit Failures"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_failcnt" : "cgroup.mem_failcnt"; - prio = cgroup_containers_chart_priority + 250; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_mem_failcnt = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "mem_failcnt", - NULL, - "mem", - context, - title, - "count", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - rrddim_add(chart, "failures", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(chart, "failures", (collected_number)cg->memory.failcnt); - rrdset_done(chart); -} - -void update_mem_usage_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_mem_usage; - - if (unlikely(!cg->st_mem_usage)) { - char *title; - char *context; - int prio; - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services Used Memory"; - context = "systemd.service.memory.usage"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 5; - } else { - title = "Used Memory"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage" : "cgroup.mem_usage"; - prio = cgroup_containers_chart_priority + 210; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_mem_usage = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "mem_usage", - NULL, - "mem", - context, - title, - "MiB", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_STACKED); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - - cg->st_mem_rd_ram = rrddim_add(chart, "ram", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - cg->st_mem_rd_swap = rrddim_add(chart, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set_by_pointer(chart, cg->st_mem_rd_ram, (collected_number)cg->memory.usage_in_bytes); - - if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - rrddim_set_by_pointer( - chart, - cg->st_mem_rd_swap, - cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ? - (collected_number)(cg->memory.msw_usage_in_bytes - - (cg->memory.usage_in_bytes + cg->memory.total_inactive_file)) : - 0); - } else { - rrddim_set_by_pointer(chart, cg->st_mem_rd_swap, (collected_number)cg->memory.msw_usage_in_bytes); - } - - rrdset_done(chart); -} - -void update_io_serviced_bytes_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_io; - - if (unlikely(!cg->st_io)) { - char *title; - char *context; - int prio; - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services Disk Read/Write Bandwidth"; - context = "systemd.service.disk.io"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 35; - } else { - title = "I/O Bandwidth (all disks)"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.io" : "cgroup.io"; - prio = cgroup_containers_chart_priority + 1200; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_io = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "io", - NULL, - "disk", - context, - title, - "KiB/s", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_AREA); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - cg->st_io_rd_read = rrddim_add(chart, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); - cg->st_io_rd_written = rrddim_add(cg->st_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(chart, cg->st_io_rd_read, (collected_number)cg->io_service_bytes.Read); - rrddim_set_by_pointer(chart, cg->st_io_rd_written, (collected_number)cg->io_service_bytes.Write); - rrdset_done(chart); -} - -void update_io_serviced_ops_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_serviced_ops; - - if (unlikely(!cg->st_serviced_ops)) { - char *title; - char *context; - int prio; - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services Disk Read/Write Operations"; - context = "systemd.service.disk.iops"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 40; - } else { - title = "Serviced I/O Operations (all disks)"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.serviced_ops" : "cgroup.serviced_ops"; - prio = cgroup_containers_chart_priority + 1200; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_serviced_ops = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "serviced_ops", - NULL, - "disk", - context, - title, - "operations/s", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - rrddim_add(chart, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(chart, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(chart, "read", (collected_number)cg->io_serviced.Read); - rrddim_set(chart, "write", (collected_number)cg->io_serviced.Write); - rrdset_done(chart); -} - -void update_throttle_io_serviced_bytes_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_throttle_io; - - if (unlikely(!cg->st_throttle_io)) { - char *title; - char *context; - int prio; - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services Throttle Disk Read/Write Bandwidth"; - context = "systemd.service.disk.throttle.io"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 45; - } else { - title = "Throttle I/O Bandwidth (all disks)"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_io" : "cgroup.throttle_io"; - prio = cgroup_containers_chart_priority + 1200; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_throttle_io = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "throttle_io", - NULL, - "disk", - context, - title, - "KiB/s", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_AREA); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - - cg->st_throttle_io_rd_read = rrddim_add(chart, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); - cg->st_throttle_io_rd_written = rrddim_add(chart, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(chart, cg->st_throttle_io_rd_read, (collected_number)cg->throttle_io_service_bytes.Read); - rrddim_set_by_pointer(chart, cg->st_throttle_io_rd_written, (collected_number)cg->throttle_io_service_bytes.Write); - rrdset_done(chart); -} - -void update_throttle_io_serviced_ops_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_throttle_serviced_ops; - - if (unlikely(!cg->st_throttle_serviced_ops)) { - char *title; - char *context; - int prio; - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services Throttle Disk Read/Write Operations"; - context = "systemd.service.disk.throttle.iops"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 50; - } else { - title = "Throttle Serviced I/O Operations (all disks)"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_serviced_ops" : "cgroup.throttle_serviced_ops"; - prio = cgroup_containers_chart_priority + 1200; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_throttle_serviced_ops = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "throttle_serviced_ops", - NULL, - "disk", - context, - title, - "operations/s", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - rrddim_add(chart, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(chart, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(chart, "read", (collected_number)cg->throttle_io_serviced.Read); - rrddim_set(chart, "write", (collected_number)cg->throttle_io_serviced.Write); - rrdset_done(chart); -} - -void update_io_queued_ops_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_queued_ops; - - if (unlikely(!cg->st_queued_ops)) { - char *title; - char *context; - int prio; - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services Queued Disk Read/Write Operations"; - context = "systemd.service.disk.queued_iops"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 55; - } else { - title = "Queued I/O Operations (all disks)"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.queued_ops" : "cgroup.queued_ops"; - prio = cgroup_containers_chart_priority + 2000; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_queued_ops = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "queued_ops", - NULL, - "disk", - context, - title, - "operations", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - rrddim_add(chart, "read", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rrddim_add(chart, "write", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set(chart, "read", (collected_number)cg->io_queued.Read); - rrddim_set(chart, "write", (collected_number)cg->io_queued.Write); - rrdset_done(chart); -} - -void update_io_merged_ops_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_merged_ops; - - if (unlikely(!cg->st_merged_ops)) { - char *title; - char *context; - int prio; - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services Merged Disk Read/Write Operations"; - context = "systemd.service.disk.merged_iops"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 60; - } else { - title = "Merged I/O Operations (all disks)"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.merged_ops" : "cgroup.merged_ops"; - prio = cgroup_containers_chart_priority + 2100; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_merged_ops = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "merged_ops", - NULL, - "disk", - context, - title, - "operations/s", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - rrddim_add(chart, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(chart, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set(chart, "read", (collected_number)cg->io_merged.Read); - rrddim_set(chart, "write", (collected_number)cg->io_merged.Write); - rrdset_done(chart); -} - -void update_cpu_some_pressure_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->cpu_pressure; - struct pressure_charts *pcs = &res->some; - RRDSET *chart = pcs->share_time.st; - - if (unlikely(!pcs->share_time.st)) { - char *title = "CPU some pressure"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure" : "cgroup.cpu_some_pressure"; - int prio = cgroup_containers_chart_priority + 2200; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "cpu_some_pressure", - NULL, - "cpu", - context, - title, - "percentage", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); - rrdset_done(chart); -} - -void update_cpu_some_pressure_stall_time_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->cpu_pressure; - struct pressure_charts *pcs = &res->some; - RRDSET *chart = pcs->total_time.st; - - if (unlikely(!pcs->total_time.st)) { - char *title = "CPU some pressure stall time"; - char *context = - k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure_stall_time" : "cgroup.cpu_some_pressure_stall_time"; - int prio = cgroup_containers_chart_priority + 2220; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "cpu_some_pressure_stall_time", - NULL, - "cpu", - context, - title, - "ms", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); - rrdset_done(chart); -} - -void update_cpu_full_pressure_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->cpu_pressure; - struct pressure_charts *pcs = &res->full; - RRDSET *chart = pcs->share_time.st; - - if (unlikely(!pcs->share_time.st)) { - char *title = "CPU full pressure"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure" : "cgroup.cpu_full_pressure"; - int prio = cgroup_containers_chart_priority + 2240; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "cpu_full_pressure", - NULL, - "cpu", - context, - title, - "percentage", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); - rrdset_done(chart); -} - -void update_cpu_full_pressure_stall_time_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->cpu_pressure; - struct pressure_charts *pcs = &res->full; - RRDSET *chart = pcs->total_time.st; - - if (unlikely(!pcs->total_time.st)) { - char *title = "CPU full pressure stall time"; - char *context = - k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure_stall_time" : "cgroup.cpu_full_pressure_stall_time"; - int prio = cgroup_containers_chart_priority + 2260; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "cpu_full_pressure_stall_time", - NULL, - "cpu", - context, - title, - "ms", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); - rrdset_done(chart); -} - -void update_mem_some_pressure_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->memory_pressure; - struct pressure_charts *pcs = &res->some; - RRDSET *chart = pcs->share_time.st; - - if (unlikely(!pcs->share_time.st)) { - char *title = "Memory some pressure"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure" : "cgroup.memory_some_pressure"; - int prio = cgroup_containers_chart_priority + 2300; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "mem_some_pressure", - NULL, - "mem", - context, - title, - "percentage", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); - rrdset_done(chart); -} - -void update_mem_some_pressure_stall_time_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->memory_pressure; - struct pressure_charts *pcs = &res->some; - RRDSET *chart = pcs->total_time.st; - - if (unlikely(!pcs->total_time.st)) { - char *title = "Memory some pressure stall time"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure_stall_time" : - "cgroup.memory_some_pressure_stall_time"; - int prio = cgroup_containers_chart_priority + 2320; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "memory_some_pressure_stall_time", - NULL, - "mem", - context, - title, - "ms", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); - rrdset_done(chart); -} - -void update_mem_full_pressure_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->memory_pressure; - struct pressure_charts *pcs = &res->full; - RRDSET *chart = pcs->share_time.st; - - if (unlikely(!pcs->share_time.st)) { - char *title = "Memory full pressure"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure" : "cgroup.memory_full_pressure"; - int prio = cgroup_containers_chart_priority + 2340; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "mem_full_pressure", - NULL, - "mem", - context, - title, - "percentage", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); - rrdset_done(chart); -} - -void update_mem_full_pressure_stall_time_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->memory_pressure; - struct pressure_charts *pcs = &res->full; - RRDSET *chart = pcs->total_time.st; - - if (unlikely(!pcs->total_time.st)) { - char *title = "Memory full pressure stall time"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure_stall_time" : - "cgroup.memory_full_pressure_stall_time"; - int prio = cgroup_containers_chart_priority + 2360; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "memory_full_pressure_stall_time", - NULL, - "mem", - context, - title, - "ms", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); - rrdset_done(chart); -} - -void update_irq_some_pressure_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->irq_pressure; - struct pressure_charts *pcs = &res->some; - RRDSET *chart = pcs->share_time.st; - - if (unlikely(!pcs->share_time.st)) { - char *title = "IRQ some pressure"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure" : "cgroup.irq_some_pressure"; - int prio = cgroup_containers_chart_priority + 2310; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "irq_some_pressure", - NULL, - "interrupts", - context, - title, - "percentage", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); - rrdset_done(chart); -} - -void update_irq_some_pressure_stall_time_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->irq_pressure; - struct pressure_charts *pcs = &res->some; - RRDSET *chart = pcs->total_time.st; - - if (unlikely(!pcs->total_time.st)) { - char *title = "IRQ some pressure stall time"; - char *context = - k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure_stall_time" : "cgroup.irq_some_pressure_stall_time"; - int prio = cgroup_containers_chart_priority + 2330; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "irq_some_pressure_stall_time", - NULL, - "interrupts", - context, - title, - "ms", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); - rrdset_done(chart); -} - -void update_irq_full_pressure_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->irq_pressure; - struct pressure_charts *pcs = &res->full; - RRDSET *chart = pcs->share_time.st; - - if (unlikely(!pcs->share_time.st)) { - char *title = "IRQ full pressure"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure" : "cgroup.irq_full_pressure"; - int prio = cgroup_containers_chart_priority + 2350; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "irq_full_pressure", - NULL, - "interrupts", - context, - title, - "percentage", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); - rrdset_done(chart); -} - -void update_irq_full_pressure_stall_time_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->irq_pressure; - struct pressure_charts *pcs = &res->full; - RRDSET *chart = pcs->total_time.st; - - if (unlikely(!pcs->total_time.st)) { - char *title = "IRQ full pressure stall time"; - char *context = - k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure_stall_time" : "cgroup.irq_full_pressure_stall_time"; - int prio = cgroup_containers_chart_priority + 2370; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "irq_full_pressure_stall_time", - NULL, - "interrupts", - context, - title, - "ms", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); - rrdset_done(chart); -} - -void update_io_some_pressure_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->io_pressure; - struct pressure_charts *pcs = &res->some; - RRDSET *chart = pcs->share_time.st; - - if (unlikely(!pcs->share_time.st)) { - char *title = "I/O some pressure"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure" : "cgroup.io_some_pressure"; - int prio = cgroup_containers_chart_priority + 2400; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "io_some_pressure", - NULL, - "disk", - context, - title, - "percentage", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); - rrdset_done(chart); -} - -void update_io_some_pressure_stall_time_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->io_pressure; - struct pressure_charts *pcs = &res->some; - RRDSET *chart = pcs->total_time.st; - - if (unlikely(!pcs->total_time.st)) { - char *title = "I/O some pressure stall time"; - char *context = - k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure_stall_time" : "cgroup.io_some_pressure_stall_time"; - int prio = cgroup_containers_chart_priority + 2420; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "io_some_pressure_stall_time", - NULL, - "disk", - context, - title, - "ms", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); - rrdset_done(chart); -} - -void update_io_full_pressure_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->io_pressure; - struct pressure_charts *pcs = &res->full; - RRDSET *chart = pcs->share_time.st; - - if (unlikely(!pcs->share_time.st)) { - char *title = "I/O full pressure"; - char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure" : "cgroup.io_full_pressure"; - int prio = cgroup_containers_chart_priority + 2440; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->share_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "io_full_pressure", - NULL, - "disk", - context, - title, - "percentage", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100)); - rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100)); - rrdset_done(chart); -} - -void update_io_full_pressure_stall_time_chart(struct cgroup *cg) { - if (is_cgroup_systemd_service(cg)) - return; - - struct pressure *res = &cg->io_pressure; - struct pressure_charts *pcs = &res->full; - RRDSET *chart = pcs->total_time.st; - - if (unlikely(!pcs->total_time.st)) { - char *title = "I/O full pressure stall time"; - char *context = - k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure_stall_time" : "cgroup.io_full_pressure_stall_time"; - int prio = cgroup_containers_chart_priority + 2460; - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = pcs->total_time.st = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "io_full_pressure_stall_time", - NULL, - "disk", - context, - title, - "ms", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); - rrdset_done(chart); -} - -void update_pids_current_chart(struct cgroup *cg) { - RRDSET *chart = cg->st_pids; - - if (unlikely(!cg->st_pids)) { - char *title; - char *context; - int prio; - if (is_cgroup_systemd_service(cg)) { - title = "Systemd Services Number of Processes"; - context = "systemd.service.pids.current"; - prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 70; - } else { - title = "Number of processes"; - context = k8s_is_kubepod(cg) ? "k8s.cgroup.pids_current" : "cgroup.pids_current"; - prio = cgroup_containers_chart_priority + 2150; - } - - char buff[RRD_ID_LENGTH_MAX + 1]; - chart = cg->st_pids = rrdset_create_localhost( - cgroup_chart_type(buff, cg), - "pids_current", - NULL, - "pids", - context, - title, - "pids", - PLUGIN_CGROUPS_NAME, - is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, - prio, - cgroup_update_every, - RRDSET_TYPE_LINE); - - rrdset_update_rrdlabels(chart, cg->chart_labels); - cg->st_pids_rd_pids_current = rrddim_add(chart, "pids", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set_by_pointer(chart, cg->st_pids_rd_pids_current, (collected_number)cg->pids.pids_current); - rrdset_done(chart); -} diff --git a/collectors/cgroups.plugin/cgroup-discovery.c b/collectors/cgroups.plugin/cgroup-discovery.c deleted file mode 100644 index ede35ed8a..000000000 --- a/collectors/cgroups.plugin/cgroup-discovery.c +++ /dev/null @@ -1,1245 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "cgroup-internals.h" - -// discovery cgroup thread worker jobs -#define WORKER_DISCOVERY_INIT 0 -#define WORKER_DISCOVERY_FIND 1 -#define WORKER_DISCOVERY_PROCESS 2 -#define WORKER_DISCOVERY_PROCESS_RENAME 3 -#define WORKER_DISCOVERY_PROCESS_NETWORK 4 -#define WORKER_DISCOVERY_PROCESS_FIRST_TIME 5 -#define WORKER_DISCOVERY_UPDATE 6 -#define WORKER_DISCOVERY_CLEANUP 7 -#define WORKER_DISCOVERY_COPY 8 -#define WORKER_DISCOVERY_SHARE 9 -#define WORKER_DISCOVERY_LOCK 10 - -#if WORKER_UTILIZATION_MAX_JOB_TYPES < 11 -#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 11 -#endif - -struct cgroup *discovered_cgroup_root = NULL; - -char cgroup_chart_id_prefix[] = "cgroup_"; -char services_chart_id_prefix[] = "systemd_"; -char *cgroups_rename_script = NULL; - - -// ---------------------------------------------------------------------------- - -static inline void free_pressure(struct pressure *res) { - if (res->some.share_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->some.share_time.st); - if (res->some.total_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->some.total_time.st); - if (res->full.share_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->full.share_time.st); - if (res->full.total_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->full.total_time.st); - freez(res->filename); -} - -static inline void cgroup_free_network_interfaces(struct cgroup *cg) { - while(cg->interfaces) { - struct cgroup_network_interface *i = cg->interfaces; - cg->interfaces = i->next; - - // delete the registration of proc_net_dev rename - netdev_rename_device_del(i->host_device); - - freez((void *)i->host_device); - freez((void *)i->container_device); - freez((void *)i); - } -} - -static inline void cgroup_free(struct cgroup *cg) { - netdata_log_debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available"); - - cgroup_netdev_delete(cg); - - if(cg->st_cpu) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu); - if(cg->st_cpu_limit) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_limit); - if(cg->st_cpu_per_core) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_per_core); - if(cg->st_cpu_nr_throttled) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_nr_throttled); - if(cg->st_cpu_throttled_time) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_throttled_time); - if(cg->st_cpu_shares) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_shares); - if(cg->st_mem) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem); - if(cg->st_writeback) rrdset_is_obsolete___safe_from_collector_thread(cg->st_writeback); - if(cg->st_mem_activity) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_activity); - if(cg->st_pgfaults) rrdset_is_obsolete___safe_from_collector_thread(cg->st_pgfaults); - if(cg->st_mem_usage) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_usage); - if(cg->st_mem_usage_limit) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_usage_limit); - if(cg->st_mem_utilization) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_utilization); - if(cg->st_mem_failcnt) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_failcnt); - if(cg->st_io) rrdset_is_obsolete___safe_from_collector_thread(cg->st_io); - if(cg->st_serviced_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_serviced_ops); - if(cg->st_throttle_io) rrdset_is_obsolete___safe_from_collector_thread(cg->st_throttle_io); - if(cg->st_throttle_serviced_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_throttle_serviced_ops); - if(cg->st_queued_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_queued_ops); - if(cg->st_merged_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_merged_ops); - if(cg->st_pids) rrdset_is_obsolete___safe_from_collector_thread(cg->st_pids); - - freez(cg->filename_cpuset_cpus); - freez(cg->filename_cpu_cfs_period); - freez(cg->filename_cpu_cfs_quota); - freez(cg->filename_memory_limit); - freez(cg->filename_memoryswap_limit); - - cgroup_free_network_interfaces(cg); - - freez(cg->cpuacct_usage.cpu_percpu); - - freez(cg->cpuacct_stat.filename); - freez(cg->cpuacct_usage.filename); - freez(cg->cpuacct_cpu_throttling.filename); - freez(cg->cpuacct_cpu_shares.filename); - - arl_free(cg->memory.arl_base); - freez(cg->memory.filename_detailed); - freez(cg->memory.filename_failcnt); - freez(cg->memory.filename_usage_in_bytes); - freez(cg->memory.filename_msw_usage_in_bytes); - - freez(cg->io_service_bytes.filename); - freez(cg->io_serviced.filename); - - freez(cg->throttle_io_service_bytes.filename); - freez(cg->throttle_io_serviced.filename); - - freez(cg->io_merged.filename); - freez(cg->io_queued.filename); - freez(cg->pids.pids_current_filename); - - free_pressure(&cg->cpu_pressure); - free_pressure(&cg->io_pressure); - free_pressure(&cg->memory_pressure); - free_pressure(&cg->irq_pressure); - - freez(cg->id); - freez(cg->intermediate_id); - freez(cg->chart_id); - freez(cg->name); - - rrdlabels_destroy(cg->chart_labels); - - freez(cg); - - cgroup_root_count--; -} - -// ---------------------------------------------------------------------------- -// add/remove/find cgroup objects - -#define CGROUP_CHARTID_LINE_MAX 1024 - -static inline char *cgroup_chart_id_strdupz(const char *s) { - if(!s || !*s) s = "/"; - - if(*s == '/' && s[1] != '\0') s++; - - char *r = strdupz(s); - netdata_fix_chart_id(r); - - return r; -} - -// TODO: move the code to cgroup_chart_id_strdupz() when the renaming script is fixed -static inline void substitute_dots_in_id(char *s) { - // dots are used to distinguish chart type and id in streaming, so we should replace them - for (char *d = s; *d; d++) { - if (*d == '.') - *d = '-'; - } -} - -// ---------------------------------------------------------------------------- -// parse k8s labels - -char *cgroup_parse_resolved_name_and_labels(RRDLABELS *labels, char *data) { - // the first word, up to the first space is the name - char *name = strsep_skip_consecutive_separators(&data, " "); - - // the rest are key=value pairs separated by comma - while(data) { - char *pair = strsep_skip_consecutive_separators(&data, ","); - rrdlabels_add_pair(labels, pair, RRDLABEL_SRC_AUTO | RRDLABEL_SRC_K8S); - } - - return name; -} - -static inline void discovery_rename_cgroup(struct cgroup *cg) { - if (!cg->pending_renames) { - return; - } - cg->pending_renames--; - - netdata_log_debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id); - netdata_log_debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id); - pid_t cgroup_pid; - - FILE *fp_child_input, *fp_child_output; - (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_rename_script, cg->id, cg->intermediate_id); - if (!fp_child_output) { - collector_error("CGROUP: cannot popen(%s \"%s\", \"r\").", cgroups_rename_script, cg->intermediate_id); - cg->pending_renames = 0; - cg->processed = 1; - return; - } - - char buffer[CGROUP_CHARTID_LINE_MAX + 1]; - char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp_child_output); - int exit_code = netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); - - switch (exit_code) { - case 0: - cg->pending_renames = 0; - break; - - case 3: - cg->pending_renames = 0; - cg->processed = 1; - break; - - default: - break; - } - - if (cg->pending_renames || cg->processed) - return; - if (!new_name || !*new_name || *new_name == '\n') - return; - if (!(new_name = trim(new_name))) - return; - - if (!cg->chart_labels) - cg->chart_labels = rrdlabels_create(); - // read the new labels and remove the obsolete ones - rrdlabels_unmark_all(cg->chart_labels); - char *name = cgroup_parse_resolved_name_and_labels(cg->chart_labels, new_name); - rrdlabels_remove_all_unmarked(cg->chart_labels); - - freez(cg->name); - cg->name = strdupz(name); - - freez(cg->chart_id); - cg->chart_id = cgroup_chart_id_strdupz(name); - - substitute_dots_in_id(cg->chart_id); - cg->hash_chart_id = simple_hash(cg->chart_id); -} - -static void is_cgroup_procs_exist(netdata_ebpf_cgroup_shm_body_t *out, char *id) { - struct stat buf; - - snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_cpuset_base, id); - if (likely(stat(out->path, &buf) == 0)) { - return; - } - - snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_blkio_base, id); - if (likely(stat(out->path, &buf) == 0)) { - return; - } - - snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_memory_base, id); - if (likely(stat(out->path, &buf) == 0)) { - return; - } - - snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_devices_base, id); - if (likely(stat(out->path, &buf) == 0)) { - return; - } - - out->path[0] = '\0'; - out->enabled = 0; -} - -static inline void convert_cgroup_to_systemd_service(struct cgroup *cg) { - char buffer[CGROUP_CHARTID_LINE_MAX + 1]; - cg->options |= CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE; - strncpyz(buffer, cg->id, CGROUP_CHARTID_LINE_MAX); - char *s = buffer; - - // skip to the last slash - size_t len = strlen(s); - while (len--) { - if (unlikely(s[len] == '/')) { - break; - } - } - if (len) { - s = &s[len + 1]; - } - - // remove extension - len = strlen(s); - while (len--) { - if (unlikely(s[len] == '.')) { - break; - } - } - if (len) { - s[len] = '\0'; - } - - freez(cg->name); - cg->name = strdupz(s); - - freez(cg->chart_id); - cg->chart_id = cgroup_chart_id_strdupz(s); - substitute_dots_in_id(cg->chart_id); - cg->hash_chart_id = simple_hash(cg->chart_id); -} - -static inline struct cgroup *discovery_cgroup_add(const char *id) { - netdata_log_debug(D_CGROUP, "adding to list, cgroup with id '%s'", id); - - struct cgroup *cg = callocz(1, sizeof(struct cgroup)); - - cg->id = strdupz(id); - cg->hash = simple_hash(cg->id); - - cg->name = strdupz(id); - - cg->intermediate_id = cgroup_chart_id_strdupz(id); - - cg->chart_id = cgroup_chart_id_strdupz(id); - substitute_dots_in_id(cg->chart_id); - cg->hash_chart_id = simple_hash(cg->chart_id); - - if (cgroup_use_unified_cgroups) { - cg->options |= CGROUP_OPTIONS_IS_UNIFIED; - } - - if (!discovered_cgroup_root) - discovered_cgroup_root = cg; - else { - struct cgroup *t; - for (t = discovered_cgroup_root; t->discovered_next; t = t->discovered_next) { - } - t->discovered_next = cg; - } - - return cg; -} - -static inline struct cgroup *discovery_cgroup_find(const char *id) { - netdata_log_debug(D_CGROUP, "searching for cgroup '%s'", id); - - uint32_t hash = simple_hash(id); - - struct cgroup *cg; - for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { - if(hash == cg->hash && strcmp(id, cg->id) == 0) - break; - } - - netdata_log_debug(D_CGROUP, "cgroup '%s' %s in memory", id, (cg)?"found":"not found"); - return cg; -} - -static int calc_cgroup_depth(const char *id) { - int depth = 0; - const char *s; - for (s = id; *s; s++) { - depth += unlikely(*s == '/'); - } - return depth; -} - -static inline void discovery_find_cgroup_in_dir_callback(const char *dir) { - if (!dir || !*dir) { - dir = "/"; - } - - netdata_log_debug(D_CGROUP, "examining cgroup dir '%s'", dir); - - struct cgroup *cg = discovery_cgroup_find(dir); - if (cg) { - cg->available = 1; - return; - } - - if (cgroup_root_count >= cgroup_root_max) { - nd_log_limit_static_global_var(erl, 3600, 0); - nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_WARNING, "CGROUP: maximum number of cgroups reached (%d). No more cgroups will be added.", cgroup_root_count); - return; - } - - if (cgroup_max_depth > 0) { - int depth = calc_cgroup_depth(dir); - if (depth > cgroup_max_depth) { - nd_log_collector(NDLP_DEBUG, "CGROUP: '%s' is too deep (%d, while max is %d)", dir, depth, cgroup_max_depth); - return; - } - } - - cg = discovery_cgroup_add(dir); - cg->available = 1; - cg->first_time_seen = 1; - cg->function_ready = false; - cgroup_root_count++; -} - -static inline int discovery_find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) { - if(!this) this = base; - netdata_log_debug(D_CGROUP, "searching for directories in '%s' (base '%s')", this?this:"", base); - - size_t dirlen = strlen(this), baselen = strlen(base); - - int ret = -1; - int enabled = -1; - - const char *relative_path = &this[baselen]; - if(!*relative_path) relative_path = "/"; - - DIR *dir = opendir(this); - if(!dir) { - collector_error("CGROUP: cannot read directory '%s'", base); - return ret; - } - ret = 1; - - callback(relative_path); - - struct dirent *de = NULL; - while((de = readdir(dir))) { - if(de->d_type == DT_DIR - && ( - (de->d_name[0] == '.' && de->d_name[1] == '\0') - || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') - )) - continue; - - if(de->d_type == DT_DIR) { - if(enabled == -1) { - const char *r = relative_path; - if(*r == '\0') r = "/"; - - // do not decent in directories we are not interested - enabled = matches_search_cgroup_paths(r); - } - - if(enabled) { - char *s = mallocz(dirlen + strlen(de->d_name) + 2); - strcpy(s, this); - strcat(s, "/"); - strcat(s, de->d_name); - int ret2 = discovery_find_dir_in_subdirs(base, s, callback); - if(ret2 > 0) ret += ret2; - freez(s); - } - } - } - - closedir(dir); - return ret; -} - -static inline void discovery_mark_as_unavailable_all_cgroups() { - for (struct cgroup *cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { - cg->available = 0; - } -} - -static inline void discovery_update_filenames_cgroup_v1(struct cgroup *cg) { - char filename[FILENAME_MAX + 1]; - struct stat buf; - - // CPU - if (unlikely(cgroup_enable_cpuacct_stat && !cg->cpuacct_stat.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.stat", cgroup_cpuacct_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->cpuacct_stat.filename = strdupz(filename); - cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat; - snprintfz(filename, FILENAME_MAX, "%s%s/cpuset.cpus", cgroup_cpuset_base, cg->id); - cg->filename_cpuset_cpus = strdupz(filename); - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_period_us", cgroup_cpuacct_base, cg->id); - cg->filename_cpu_cfs_period = strdupz(filename); - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_quota_us", cgroup_cpuacct_base, cg->id); - cg->filename_cpu_cfs_quota = strdupz(filename); - } - } - // FIXME: remove usage_percpu - if (unlikely(cgroup_enable_cpuacct_usage && !cg->cpuacct_usage.filename && !is_cgroup_systemd_service(cg))) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.usage_percpu", cgroup_cpuacct_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->cpuacct_usage.filename = strdupz(filename); - cg->cpuacct_usage.enabled = cgroup_enable_cpuacct_usage; - } - } - if (unlikely( - cgroup_enable_cpuacct_cpu_throttling && !cg->cpuacct_cpu_throttling.filename && - !is_cgroup_systemd_service(cg))) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_cpuacct_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->cpuacct_cpu_throttling.filename = strdupz(filename); - cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling; - } - } - if (unlikely( - cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename && !is_cgroup_systemd_service(cg))) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.shares", cgroup_cpuacct_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->cpuacct_cpu_shares.filename = strdupz(filename); - cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; - } - } - - // Memory - if (unlikely( - (cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && - (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_memory_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->memory.filename_detailed = strdupz(filename); - cg->memory.enabled_detailed = - (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_AUTO; - } - } - if (unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.usage_in_bytes", cgroup_memory_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->memory.filename_usage_in_bytes = strdupz(filename); - cg->memory.enabled_usage_in_bytes = cgroup_enable_memory; - snprintfz(filename, FILENAME_MAX, "%s%s/memory.limit_in_bytes", cgroup_memory_base, cg->id); - cg->filename_memory_limit = strdupz(filename); - } - } - if (unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.usage_in_bytes", cgroup_memory_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->memory.filename_msw_usage_in_bytes = strdupz(filename); - cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap; - snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.limit_in_bytes", cgroup_memory_base, cg->id); - cg->filename_memoryswap_limit = strdupz(filename); - } - } - if (unlikely(cgroup_enable_memory_failcnt && !cg->memory.filename_failcnt)) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.failcnt", cgroup_memory_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->memory.filename_failcnt = strdupz(filename); - cg->memory.enabled_failcnt = cgroup_enable_memory_failcnt; - } - } - - // Blkio - if (unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes_recursive", cgroup_blkio_base, cg->id); - if (unlikely(stat(filename, &buf) != -1)) { - cg->io_service_bytes.filename = strdupz(filename); - cg->io_service_bytes.enabled = cgroup_enable_blkio_io; - } else { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes", cgroup_blkio_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->io_service_bytes.filename = strdupz(filename); - cg->io_service_bytes.enabled = cgroup_enable_blkio_io; - } - } - } - if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced_recursive", cgroup_blkio_base, cg->id); - if (unlikely(stat(filename, &buf) != -1)) { - cg->io_serviced.filename = strdupz(filename); - cg->io_serviced.enabled = cgroup_enable_blkio_ops; - } else { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced", cgroup_blkio_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->io_serviced.filename = strdupz(filename); - cg->io_serviced.enabled = cgroup_enable_blkio_ops; - } - } - } - if (unlikely(cgroup_enable_blkio_throttle_io && !cg->throttle_io_service_bytes.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes_recursive", cgroup_blkio_base, cg->id); - if (unlikely(stat(filename, &buf) != -1)) { - cg->throttle_io_service_bytes.filename = strdupz(filename); - cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io; - } else { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes", cgroup_blkio_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->throttle_io_service_bytes.filename = strdupz(filename); - cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io; - } - } - } - if (unlikely(cgroup_enable_blkio_throttle_ops && !cg->throttle_io_serviced.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced_recursive", cgroup_blkio_base, cg->id); - if (unlikely(stat(filename, &buf) != -1)) { - cg->throttle_io_serviced.filename = strdupz(filename); - cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops; - } else { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced", cgroup_blkio_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->throttle_io_serviced.filename = strdupz(filename); - cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops; - } - } - } - if (unlikely(cgroup_enable_blkio_merged_ops && !cg->io_merged.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged_recursive", cgroup_blkio_base, cg->id); - if (unlikely(stat(filename, &buf) != -1)) { - cg->io_merged.filename = strdupz(filename); - cg->io_merged.enabled = cgroup_enable_blkio_merged_ops; - } else { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged", cgroup_blkio_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->io_merged.filename = strdupz(filename); - cg->io_merged.enabled = cgroup_enable_blkio_merged_ops; - } - } - } - if (unlikely(cgroup_enable_blkio_queued_ops && !cg->io_queued.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued_recursive", cgroup_blkio_base, cg->id); - if (unlikely(stat(filename, &buf) != -1)) { - cg->io_queued.filename = strdupz(filename); - cg->io_queued.enabled = cgroup_enable_blkio_queued_ops; - } else { - snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued", cgroup_blkio_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->io_queued.filename = strdupz(filename); - cg->io_queued.enabled = cgroup_enable_blkio_queued_ops; - } - } - } - - // Pids - if (unlikely(!cg->pids.pids_current_filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/pids.current", cgroup_pids_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->pids.pids_current_filename = strdupz(filename); - } - } -} - -static inline void discovery_update_filenames_cgroup_v2(struct cgroup *cg) { - char filename[FILENAME_MAX + 1]; - struct stat buf; - - // CPU - if (unlikely((cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_cpu_throttling) && !cg->cpuacct_stat.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->cpuacct_stat.filename = strdupz(filename); - cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat; - cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling; - cg->filename_cpuset_cpus = NULL; - cg->filename_cpu_cfs_period = NULL; - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.max", cgroup_unified_base, cg->id); - cg->filename_cpu_cfs_quota = strdupz(filename); - } - } - if (unlikely(cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.weight", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->cpuacct_cpu_shares.filename = strdupz(filename); - cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; - } - } - - // Memory - // FIXME: this if condition! - if (unlikely( - (cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && - (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->memory.filename_detailed = strdupz(filename); - cg->memory.enabled_detailed = - (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_AUTO; - } - } - - if (unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.current", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->memory.filename_usage_in_bytes = strdupz(filename); - cg->memory.enabled_usage_in_bytes = cgroup_enable_memory; - snprintfz(filename, FILENAME_MAX, "%s%s/memory.max", cgroup_unified_base, cg->id); - cg->filename_memory_limit = strdupz(filename); - } - } - - if (unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.current", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->memory.filename_msw_usage_in_bytes = strdupz(filename); - cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap; - snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.max", cgroup_unified_base, cg->id); - cg->filename_memoryswap_limit = strdupz(filename); - } - } - - // Blkio - if (unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->io_service_bytes.filename = strdupz(filename); - cg->io_service_bytes.enabled = cgroup_enable_blkio_io; - } - } - - if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->io_serviced.filename = strdupz(filename); - cg->io_serviced.enabled = cgroup_enable_blkio_ops; - } - } - - // PSI - if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/cpu.pressure", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->cpu_pressure.filename = strdupz(filename); - cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu; - cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO; - } - } - - if (unlikely((cgroup_enable_pressure_io_some || cgroup_enable_pressure_io_full) && !cg->io_pressure.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/io.pressure", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->io_pressure.filename = strdupz(filename); - cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some; - cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full; - } - } - - if (unlikely( - (cgroup_enable_pressure_memory_some || cgroup_enable_pressure_memory_full) && - !cg->memory_pressure.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/memory.pressure", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->memory_pressure.filename = strdupz(filename); - cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some; - cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full; - } - } - - if (unlikely((cgroup_enable_pressure_irq_some || cgroup_enable_pressure_irq_full) && !cg->irq_pressure.filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/irq.pressure", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->irq_pressure.filename = strdupz(filename); - cg->irq_pressure.some.enabled = cgroup_enable_pressure_irq_some; - cg->irq_pressure.full.enabled = cgroup_enable_pressure_irq_full; - } - } - - // Pids - if (unlikely(!cg->pids.pids_current_filename)) { - snprintfz(filename, FILENAME_MAX, "%s%s/pids.current", cgroup_unified_base, cg->id); - if (likely(stat(filename, &buf) != -1)) { - cg->pids.pids_current_filename = strdupz(filename); - } - } -} - -static inline void discovery_update_filenames_all_cgroups() { - for (struct cgroup *cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { - if (unlikely(!cg->available || !cg->enabled || cg->pending_renames)) - continue; - - if (!cgroup_use_unified_cgroups) - discovery_update_filenames_cgroup_v1(cg); - else if (likely(cgroup_unified_exist)) - discovery_update_filenames_cgroup_v2(cg); - } -} - -static inline void discovery_cleanup_all_cgroups() { - struct cgroup *cg = discovered_cgroup_root, *last = NULL; - - for(; cg ;) { - if(!cg->available) { - // enable the first duplicate cgroup - { - struct cgroup *t; - for (t = discovered_cgroup_root; t; t = t->discovered_next) { - if (t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && - (is_cgroup_systemd_service(t) == is_cgroup_systemd_service(cg)) && - t->hash_chart_id == cg->hash_chart_id && !strcmp(t->chart_id, cg->chart_id)) { - netdata_log_debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id); - t->enabled = 1; - t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE; - break; - } - } - } - - if(!last) - discovered_cgroup_root = cg->discovered_next; - else - last->discovered_next = cg->discovered_next; - - cgroup_free(cg); - - if(!last) - cg = discovered_cgroup_root; - else - cg = last->discovered_next; - } - else { - last = cg; - cg = cg->discovered_next; - } - } -} - -static inline void discovery_copy_discovered_cgroups_to_reader() { - netdata_log_debug(D_CGROUP, "copy discovered cgroups to the main group list"); - - struct cgroup *cg; - - for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { - cg->next = cg->discovered_next; - } - - cgroup_root = discovered_cgroup_root; -} - -static inline void discovery_share_cgroups_with_ebpf() { - struct cgroup *cg; - int count; - struct stat buf; - - if (shm_mutex_cgroup_ebpf == SEM_FAILED) { - return; - } - sem_wait(shm_mutex_cgroup_ebpf); - - for (cg = cgroup_root, count = 0; cg; cg = cg->next, count++) { - netdata_ebpf_cgroup_shm_body_t *ptr = &shm_cgroup_ebpf.body[count]; - char *prefix = (is_cgroup_systemd_service(cg)) ? services_chart_id_prefix : cgroup_chart_id_prefix; - snprintfz(ptr->name, CGROUP_EBPF_NAME_SHARED_LENGTH - 1, "%s%s", prefix, cg->chart_id); - ptr->hash = simple_hash(ptr->name); - ptr->options = cg->options; - ptr->enabled = cg->enabled; - if (cgroup_use_unified_cgroups) { - snprintfz(ptr->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_unified_base, cg->id); - if (likely(stat(ptr->path, &buf) == -1)) { - ptr->path[0] = '\0'; - ptr->enabled = 0; - } - } else { - is_cgroup_procs_exist(ptr, cg->id); - } - - netdata_log_debug(D_CGROUP, "cgroup shared: NAME=%s, ENABLED=%d", ptr->name, ptr->enabled); - } - - shm_cgroup_ebpf.header->cgroup_root_count = count; - sem_post(shm_mutex_cgroup_ebpf); -} - -static inline void discovery_find_all_cgroups_v1() { - if (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) { - if (discovery_find_dir_in_subdirs(cgroup_cpuacct_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { - cgroup_enable_cpuacct_stat = cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO; - collector_error("CGROUP: disabled cpu statistics."); - } - } - - if (cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io || - cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) { - if (discovery_find_dir_in_subdirs(cgroup_blkio_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { - cgroup_enable_blkio_io = cgroup_enable_blkio_ops = cgroup_enable_blkio_throttle_io = - cgroup_enable_blkio_throttle_ops = cgroup_enable_blkio_merged_ops = cgroup_enable_blkio_queued_ops = - CONFIG_BOOLEAN_NO; - collector_error("CGROUP: disabled blkio statistics."); - } - } - - if (cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) { - if (discovery_find_dir_in_subdirs(cgroup_memory_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { - cgroup_enable_memory = cgroup_enable_detailed_memory = cgroup_enable_swap = cgroup_enable_memory_failcnt = - CONFIG_BOOLEAN_NO; - collector_error("CGROUP: disabled memory statistics."); - } - } - - if (cgroup_search_in_devices) { - if (discovery_find_dir_in_subdirs(cgroup_devices_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { - cgroup_search_in_devices = 0; - collector_error("CGROUP: disabled devices statistics."); - } - } -} - -static inline void discovery_find_all_cgroups_v2() { - if (discovery_find_dir_in_subdirs(cgroup_unified_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { - cgroup_unified_exist = CONFIG_BOOLEAN_NO; - collector_error("CGROUP: disabled unified cgroups statistics."); - } -} - -static int is_digits_only(const char *s) { - do { - if (!isdigit(*s++)) { - return 0; - } - } while (*s); - - return 1; -} - -static int is_cgroup_k8s_container(const char *id) { - // examples: - // https://github.com/netdata/netdata/blob/0fc101679dcd12f1cb8acdd07bb4c85d8e553e53/collectors/cgroups.plugin/cgroup-name.sh#L121-L147 - const char *p = id; - const char *pp = NULL; - int i = 0; - size_t l = 3; // pod - while ((p = strstr(p, "pod"))) { - i++; - p += l; - pp = p; - } - return !(i < 2 || !pp || !(pp = strchr(pp, '/')) || !pp++ || !*pp); -} - -#define TASK_COMM_LEN 16 - -static int k8s_get_container_first_proc_comm(const char *id, char *comm) { - if (!is_cgroup_k8s_container(id)) { - return 1; - } - - static procfile *ff = NULL; - - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/%s/cgroup.procs", cgroup_cpuacct_base, id); - - ff = procfile_reopen(ff, filename, NULL, CGROUP_PROCFILE_FLAG); - if (unlikely(!ff)) { - netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); - return 1; - } - - ff = procfile_readall(ff); - if (unlikely(!ff)) { - netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); - return 1; - } - - unsigned long lines = procfile_lines(ff); - if (likely(lines < 2)) { - return 1; - } - - char *pid = procfile_lineword(ff, 0, 0); - if (!pid || !*pid) { - return 1; - } - - snprintfz(filename, FILENAME_MAX, "%s/proc/%s/comm", netdata_configured_host_prefix, pid); - - ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT); - if (unlikely(!ff)) { - netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); - return 1; - } - - ff = procfile_readall(ff); - if (unlikely(!ff)) { - netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); - return 1; - } - - lines = procfile_lines(ff); - if (unlikely(lines != 2)) { - return 1; - } - - char *proc_comm = procfile_lineword(ff, 0, 0); - if (!proc_comm || !*proc_comm) { - return 1; - } - - strncpyz(comm, proc_comm, TASK_COMM_LEN); - return 0; -} - -static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) { - if (!cg->first_time_seen) { - return; - } - cg->first_time_seen = 0; - - char comm[TASK_COMM_LEN + 1]; - - if (cg->container_orchestrator == CGROUPS_ORCHESTRATOR_UNSET) { - if (strstr(cg->id, "kubepods")) { - cg->container_orchestrator = CGROUPS_ORCHESTRATOR_K8S; - } else { - cg->container_orchestrator = CGROUPS_ORCHESTRATOR_UNKNOWN; - } - } - - if (is_inside_k8s && !k8s_get_container_first_proc_comm(cg->id, comm)) { - // container initialization may take some time when CPU % is high - // seen on GKE: comm is '6' before 'runc:[2:INIT]' (dunno if it could be another number) - if (is_digits_only(comm) || matches_entrypoint_parent_process_comm(comm)) { - cg->first_time_seen = 1; - return; - } - if (!strcmp(comm, "pause")) { - // a container that holds the network namespace for the pod - // we don't need to collect its metrics - cg->processed = 1; - return; - } - } - - if (cgroup_enable_systemd_services && matches_systemd_services_cgroups(cg->id)) { - netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'cgroups to match as systemd services'", cg->id, cg->chart_id); - convert_cgroup_to_systemd_service(cg); - return; - } - - if (matches_enabled_cgroup_renames(cg->id)) { - netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'run script to rename cgroups matching', will try to rename it", cg->id, cg->chart_id); - if (is_inside_k8s && is_cgroup_k8s_container(cg->id)) { - // it may take up to a minute for the K8s API to return data for the container - // tested on AWS K8s cluster with 100% CPU utilization - cg->pending_renames = 9; // 1.5 minute - } else { - cg->pending_renames = 2; - } - } -} - -static int discovery_is_cgroup_duplicate(struct cgroup *cg) { - // https://github.com/netdata/netdata/issues/797#issuecomment-241248884 - struct cgroup *c; - for (c = discovered_cgroup_root; c; c = c->discovered_next) { - if (c != cg && c->enabled && (is_cgroup_systemd_service(c) == is_cgroup_systemd_service(cg)) && - c->hash_chart_id == cg->hash_chart_id && !strcmp(c->chart_id, cg->chart_id)) { - collector_error( - "CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", - cg->chart_id, - c->id, - cg->id); - return 1; - } - } - return 0; -} - -// ---------------------------------------------------------------------------- -// cgroup network interfaces - -#define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048 - -static inline void read_cgroup_network_interfaces(struct cgroup *cg) { - netdata_log_debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id); - - pid_t cgroup_pid; - char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; - - if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_cpuacct_base, cg->id); - } - else { - snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_unified_base, cg->id); - } - - netdata_log_debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id); - FILE *fp_child_input, *fp_child_output; - (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_network_interface_script, "--cgroup", cgroup_identifier); - if(!fp_child_output) { - collector_error("CGROUP: cannot popen(%s --cgroup \"%s\", \"r\").", cgroups_network_interface_script, cgroup_identifier); - return; - } - - char *s; - char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; - while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) { - trim(s); - - if(*s && *s != '\n') { - char *t = s; - while(*t && *t != ' ') t++; - if(*t == ' ') { - *t = '\0'; - t++; - } - - if(!*s) { - collector_error("CGROUP: empty host interface returned by script"); - continue; - } - - if(!*t) { - collector_error("CGROUP: empty guest interface returned by script"); - continue; - } - - struct cgroup_network_interface *i = callocz(1, sizeof(struct cgroup_network_interface)); - i->host_device = strdupz(s); - i->container_device = strdupz(t); - i->next = cg->interfaces; - cg->interfaces = i; - - collector_info("CGROUP: cgroup '%s' has network interface '%s' as '%s'", cg->id, i->host_device, i->container_device); - - // register a device rename to proc_net_dev.c - netdev_rename_device_add(i->host_device, i->container_device, cg->chart_id, cg->chart_labels, - k8s_is_kubepod(cg) ? "k8s." : "", cgroup_netdev_get(cg)); - } - } - - netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); - // netdata_log_debug(D_CGROUP, "closed cgroup_identifier for cgroup '%s'", cg->id); -} - -static inline void discovery_process_cgroup(struct cgroup *cg) { - if (!cg->available || cg->processed) { - return; - } - - if (cg->first_time_seen) { - worker_is_busy(WORKER_DISCOVERY_PROCESS_FIRST_TIME); - discovery_process_first_time_seen_cgroup(cg); - if (unlikely(cg->first_time_seen || cg->processed)) { - return; - } - } - - if (cg->pending_renames) { - worker_is_busy(WORKER_DISCOVERY_PROCESS_RENAME); - discovery_rename_cgroup(cg); - if (unlikely(cg->pending_renames || cg->processed)) { - return; - } - } - - cg->processed = 1; - - if ((strlen(cg->chart_id) + strlen(cgroup_chart_id_prefix)) >= RRD_ID_LENGTH_MAX) { - collector_info("cgroup '%s' (chart id '%s') disabled because chart_id exceeds the limit (RRD_ID_LENGTH_MAX)", cg->id, cg->chart_id); - return; - } - - if (is_cgroup_systemd_service(cg)) { - if (discovery_is_cgroup_duplicate(cg)) { - cg->enabled = 0; - cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; - return; - } - if (!cg->chart_labels) - cg->chart_labels = rrdlabels_create(); - rrdlabels_add(cg->chart_labels, "service_name", cg->name, RRDLABEL_SRC_AUTO); - cg->enabled = 1; - return; - } - - if (!(cg->enabled = matches_enabled_cgroup_names(cg->name))) { - netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups names matching'", cg->id, cg->name); - return; - } - - if (!(cg->enabled = matches_enabled_cgroup_paths(cg->id))) { - netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups matching'", cg->id, cg->name); - return; - } - - if (discovery_is_cgroup_duplicate(cg)) { - cg->enabled = 0; - cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; - return; - } - - if (!cg->chart_labels) - cg->chart_labels = rrdlabels_create(); - - if (!k8s_is_kubepod(cg)) { - rrdlabels_add(cg->chart_labels, "cgroup_name", cg->name, RRDLABEL_SRC_AUTO); - if (!rrdlabels_exist(cg->chart_labels, "image")) - rrdlabels_add(cg->chart_labels, "image", "", RRDLABEL_SRC_AUTO); - } - - worker_is_busy(WORKER_DISCOVERY_PROCESS_NETWORK); - read_cgroup_network_interfaces(cg); -} - -static inline void discovery_find_all_cgroups() { - netdata_log_debug(D_CGROUP, "searching for cgroups"); - - worker_is_busy(WORKER_DISCOVERY_INIT); - discovery_mark_as_unavailable_all_cgroups(); - - worker_is_busy(WORKER_DISCOVERY_FIND); - if (!cgroup_use_unified_cgroups) { - discovery_find_all_cgroups_v1(); - } else { - discovery_find_all_cgroups_v2(); - } - - for (struct cgroup *cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { - worker_is_busy(WORKER_DISCOVERY_PROCESS); - discovery_process_cgroup(cg); - } - - worker_is_busy(WORKER_DISCOVERY_UPDATE); - discovery_update_filenames_all_cgroups(); - - worker_is_busy(WORKER_DISCOVERY_LOCK); - uv_mutex_lock(&cgroup_root_mutex); - - worker_is_busy(WORKER_DISCOVERY_CLEANUP); - discovery_cleanup_all_cgroups(); - - worker_is_busy(WORKER_DISCOVERY_COPY); - discovery_copy_discovered_cgroups_to_reader(); - - uv_mutex_unlock(&cgroup_root_mutex); - - worker_is_busy(WORKER_DISCOVERY_SHARE); - discovery_share_cgroups_with_ebpf(); - - netdata_log_debug(D_CGROUP, "done searching for cgroups"); -} - -void cgroup_discovery_worker(void *ptr) -{ - UNUSED(ptr); - - worker_register("CGROUPSDISC"); - worker_register_job_name(WORKER_DISCOVERY_INIT, "init"); - worker_register_job_name(WORKER_DISCOVERY_FIND, "find"); - worker_register_job_name(WORKER_DISCOVERY_PROCESS, "process"); - worker_register_job_name(WORKER_DISCOVERY_PROCESS_RENAME, "rename"); - worker_register_job_name(WORKER_DISCOVERY_PROCESS_NETWORK, "network"); - worker_register_job_name(WORKER_DISCOVERY_PROCESS_FIRST_TIME, "new"); - worker_register_job_name(WORKER_DISCOVERY_UPDATE, "update"); - worker_register_job_name(WORKER_DISCOVERY_CLEANUP, "cleanup"); - worker_register_job_name(WORKER_DISCOVERY_COPY, "copy"); - worker_register_job_name(WORKER_DISCOVERY_SHARE, "share"); - worker_register_job_name(WORKER_DISCOVERY_LOCK, "lock"); - - entrypoint_parent_process_comm = simple_pattern_create( - " runc:[* " // http://terenceli.github.io/%E6%8A%80%E6%9C%AF/2021/12/28/runc-internals-3) - " exe ", // https://github.com/falcosecurity/falco/blob/9d41b0a151b83693929d3a9c84f7c5c85d070d3a/rules/falco_rules.yaml#L1961 - NULL, - SIMPLE_PATTERN_EXACT, true); - - service_register(SERVICE_THREAD_TYPE_LIBUV, NULL, NULL, NULL, false); - - while (service_running(SERVICE_COLLECTORS)) { - worker_is_idle(); - - uv_mutex_lock(&discovery_thread.mutex); - uv_cond_wait(&discovery_thread.cond_var, &discovery_thread.mutex); - uv_mutex_unlock(&discovery_thread.mutex); - - if (unlikely(!service_running(SERVICE_COLLECTORS))) - break; - - discovery_find_all_cgroups(); - } - collector_info("discovery thread stopped"); - worker_unregister(); - service_exits(); - __atomic_store_n(&discovery_thread.exited,1,__ATOMIC_RELAXED); -} diff --git a/collectors/cgroups.plugin/cgroup-internals.h b/collectors/cgroups.plugin/cgroup-internals.h deleted file mode 100644 index a69802240..000000000 --- a/collectors/cgroups.plugin/cgroup-internals.h +++ /dev/null @@ -1,514 +0,0 @@ -#include "sys_fs_cgroup.h" - -#ifndef NETDATA_CGROUP_INTERNALS_H -#define NETDATA_CGROUP_INTERNALS_H 1 - -#ifdef NETDATA_INTERNAL_CHECKS -#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_DEFAULT -#else -#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_NO_ERROR_ON_FILE_IO -#endif - -struct blkio { - int updated; - int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - int delay_counter; - - char *filename; - - unsigned long long Read; - unsigned long long Write; -/* - unsigned long long Sync; - unsigned long long Async; - unsigned long long Total; -*/ -}; - -struct pids { - char *pids_current_filename; - int pids_current_updated; - unsigned long long pids_current; -}; - -// https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt -struct memory { - ARL_BASE *arl_base; - ARL_ENTRY *arl_dirty; - ARL_ENTRY *arl_swap; - - int updated_detailed; - int updated_usage_in_bytes; - int updated_msw_usage_in_bytes; - int updated_failcnt; - - int enabled_detailed; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - int enabled_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - int enabled_msw_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - int enabled_failcnt; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - - int delay_counter_detailed; - int delay_counter_failcnt; - - char *filename_detailed; - char *filename_usage_in_bytes; - char *filename_msw_usage_in_bytes; - char *filename_failcnt; - - int detailed_has_dirty; - int detailed_has_swap; - - // detailed metrics -/* - unsigned long long cache; - unsigned long long rss; - unsigned long long rss_huge; - unsigned long long mapped_file; - unsigned long long writeback; - unsigned long long dirty; - unsigned long long swap; - unsigned long long pgpgin; - unsigned long long pgpgout; - unsigned long long pgfault; - unsigned long long pgmajfault; - unsigned long long inactive_anon; - unsigned long long active_anon; - unsigned long long inactive_file; - unsigned long long active_file; - unsigned long long unevictable; - unsigned long long hierarchical_memory_limit; -*/ - //unified cgroups metrics - unsigned long long anon; - unsigned long long kernel_stack; - unsigned long long slab; - unsigned long long sock; - // unsigned long long shmem; - unsigned long long anon_thp; - //unsigned long long file_writeback; - //unsigned long long file_dirty; - //unsigned long long file; - - unsigned long long total_cache; - unsigned long long total_rss; - unsigned long long total_rss_huge; - unsigned long long total_mapped_file; - unsigned long long total_writeback; - unsigned long long total_dirty; - unsigned long long total_swap; - unsigned long long total_pgpgin; - unsigned long long total_pgpgout; - unsigned long long total_pgfault; - unsigned long long total_pgmajfault; -/* - unsigned long long total_inactive_anon; - unsigned long long total_active_anon; -*/ - - unsigned long long total_inactive_file; - -/* - unsigned long long total_active_file; - unsigned long long total_unevictable; -*/ - - // single file metrics - unsigned long long usage_in_bytes; - unsigned long long msw_usage_in_bytes; - unsigned long long failcnt; -}; - -// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt -struct cpuacct_stat { - int updated; - int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - - char *filename; - - unsigned long long user; // v1, v2(user_usec) - unsigned long long system; // v1, v2(system_usec) -}; - -// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt -struct cpuacct_usage { - int updated; - int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - - char *filename; - - unsigned int cpus; - unsigned long long *cpu_percpu; -}; - -// represents cpuacct/cpu.stat, for v2 'cpuacct_stat' is used for 'user_usec', 'system_usec' -struct cpuacct_cpu_throttling { - int updated; - int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - - char *filename; - - unsigned long long nr_periods; - unsigned long long nr_throttled; - unsigned long long throttled_time; - - unsigned long long nr_throttled_perc; -}; - -// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu#sect-cfs -// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/managing_monitoring_and_updating_the_kernel/using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications_managing-monitoring-and-updating-the-kernel#proc_controlling-distribution-of-cpu-time-for-applications-by-adjusting-cpu-weight_using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications -struct cpuacct_cpu_shares { - int updated; - int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO - - char *filename; - - unsigned long long shares; -}; - -struct cgroup_network_interface { - const char *host_device; - const char *container_device; - struct cgroup_network_interface *next; -}; - -enum cgroups_container_orchestrator { - CGROUPS_ORCHESTRATOR_UNSET, - CGROUPS_ORCHESTRATOR_UNKNOWN, - CGROUPS_ORCHESTRATOR_K8S -}; - - -// *** WARNING *** The fields are not thread safe. Take care of safe usage. -struct cgroup { - uint32_t options; - - int first_time_seen; // first time seen by the discoverer - int processed; // the discoverer is done processing a cgroup (resolved name, set 'enabled' option) - - char available; // found in the filesystem - char enabled; // enabled in the config - - bool function_ready; // true after the first iteration of chart creation/update - - char pending_renames; - - char *id; - uint32_t hash; - - char *intermediate_id; // TODO: remove it when the renaming script is fixed - - char *chart_id; - uint32_t hash_chart_id; - - // 'cgroup_name' label value. - // by default this is the *id (path), later changed to the resolved name (cgroup-name.sh) or systemd service name. - char *name; - - RRDLABELS *chart_labels; - - int container_orchestrator; - - struct cpuacct_stat cpuacct_stat; - struct cpuacct_usage cpuacct_usage; - struct cpuacct_cpu_throttling cpuacct_cpu_throttling; - struct cpuacct_cpu_shares cpuacct_cpu_shares; - - struct memory memory; - - struct blkio io_service_bytes; // bytes - struct blkio io_serviced; // operations - - struct blkio throttle_io_service_bytes; // bytes - struct blkio throttle_io_serviced; // operations - - struct blkio io_merged; // operations - struct blkio io_queued; // operations - - struct pids pids; - - struct cgroup_network_interface *interfaces; - - struct pressure cpu_pressure; - struct pressure io_pressure; - struct pressure memory_pressure; - struct pressure irq_pressure; - - // Cpu - RRDSET *st_cpu; - RRDDIM *st_cpu_rd_user; - RRDDIM *st_cpu_rd_system; - - RRDSET *st_cpu_limit; - RRDSET *st_cpu_per_core; - RRDSET *st_cpu_nr_throttled; - RRDSET *st_cpu_throttled_time; - RRDSET *st_cpu_shares; - - // Memory - RRDSET *st_mem; - RRDDIM *st_mem_rd_ram; - RRDDIM *st_mem_rd_swap; - - RRDSET *st_mem_utilization; - RRDSET *st_writeback; - RRDSET *st_mem_activity; - RRDSET *st_pgfaults; - RRDSET *st_mem_usage; - RRDSET *st_mem_usage_limit; - RRDSET *st_mem_failcnt; - - // Blkio - RRDSET *st_io; - RRDDIM *st_io_rd_read; - RRDDIM *st_io_rd_written; - - RRDSET *st_serviced_ops; - - RRDSET *st_throttle_io; - RRDDIM *st_throttle_io_rd_read; - RRDDIM *st_throttle_io_rd_written; - - RRDSET *st_throttle_serviced_ops; - - RRDSET *st_queued_ops; - RRDSET *st_merged_ops; - - // Pids - RRDSET *st_pids; - RRDDIM *st_pids_rd_pids_current; - - // per cgroup chart variables - char *filename_cpuset_cpus; - unsigned long long cpuset_cpus; - - char *filename_cpu_cfs_period; - unsigned long long cpu_cfs_period; - - char *filename_cpu_cfs_quota; - unsigned long long cpu_cfs_quota; - - const RRDSETVAR_ACQUIRED *chart_var_cpu_limit; - NETDATA_DOUBLE prev_cpu_usage; - - char *filename_memory_limit; - unsigned long long memory_limit; - const RRDSETVAR_ACQUIRED *chart_var_memory_limit; - - char *filename_memoryswap_limit; - unsigned long long memoryswap_limit; - const RRDSETVAR_ACQUIRED *chart_var_memoryswap_limit; - - const DICTIONARY_ITEM *cgroup_netdev_link; - - struct cgroup *next; - struct cgroup *discovered_next; - -}; - -struct discovery_thread { - uv_thread_t thread; - uv_mutex_t mutex; - uv_cond_t cond_var; - int exited; -}; - -extern struct discovery_thread discovery_thread; - -extern char *cgroups_rename_script; -extern char cgroup_chart_id_prefix[]; -extern char services_chart_id_prefix[]; -extern uv_mutex_t cgroup_root_mutex; - -void cgroup_discovery_worker(void *ptr); - -extern int is_inside_k8s; -extern long system_page_size; -extern int cgroup_enable_cpuacct_stat; -extern int cgroup_enable_cpuacct_usage; -extern int cgroup_enable_cpuacct_cpu_throttling; -extern int cgroup_enable_cpuacct_cpu_shares; -extern int cgroup_enable_memory; -extern int cgroup_enable_detailed_memory; -extern int cgroup_enable_memory_failcnt; -extern int cgroup_enable_swap; -extern int cgroup_enable_blkio_io; -extern int cgroup_enable_blkio_ops; -extern int cgroup_enable_blkio_throttle_io; -extern int cgroup_enable_blkio_throttle_ops; -extern int cgroup_enable_blkio_merged_ops; -extern int cgroup_enable_blkio_queued_ops; -extern int cgroup_enable_pressure_cpu; -extern int cgroup_enable_pressure_io_some; -extern int cgroup_enable_pressure_io_full; -extern int cgroup_enable_pressure_memory_some; -extern int cgroup_enable_pressure_memory_full; -extern int cgroup_enable_pressure_irq_some; -extern int cgroup_enable_pressure_irq_full; -extern int cgroup_enable_systemd_services; -extern int cgroup_enable_systemd_services_detailed_memory; -extern int cgroup_used_memory; -extern int cgroup_use_unified_cgroups; -extern int cgroup_unified_exist; -extern int cgroup_search_in_devices; -extern int cgroup_check_for_new_every; -extern int cgroup_update_every; -extern int cgroup_containers_chart_priority; -extern int cgroup_recheck_zero_blkio_every_iterations; -extern int cgroup_recheck_zero_mem_failcnt_every_iterations; -extern int cgroup_recheck_zero_mem_detailed_every_iterations; -extern char *cgroup_cpuacct_base; -extern char *cgroup_cpuset_base; -extern char *cgroup_blkio_base; -extern char *cgroup_memory_base; -extern char *cgroup_pids_base; -extern char *cgroup_devices_base; -extern char *cgroup_unified_base; -extern int cgroup_root_count; -extern int cgroup_root_max; -extern int cgroup_max_depth; -extern SIMPLE_PATTERN *enabled_cgroup_paths; -extern SIMPLE_PATTERN *enabled_cgroup_names; -extern SIMPLE_PATTERN *search_cgroup_paths; -extern SIMPLE_PATTERN *enabled_cgroup_renames; -extern SIMPLE_PATTERN *systemd_services_cgroups; -extern SIMPLE_PATTERN *entrypoint_parent_process_comm; -extern char *cgroups_network_interface_script; -extern int cgroups_check; -extern uint32_t Read_hash; -extern uint32_t Write_hash; -extern uint32_t user_hash; -extern uint32_t system_hash; -extern uint32_t user_usec_hash; -extern uint32_t system_usec_hash; -extern uint32_t nr_periods_hash; -extern uint32_t nr_throttled_hash; -extern uint32_t throttled_time_hash; -extern uint32_t throttled_usec_hash; -extern struct cgroup *cgroup_root; - -extern netdata_ebpf_cgroup_shm_t shm_cgroup_ebpf; -extern int shm_fd_cgroup_ebpf; -extern sem_t *shm_mutex_cgroup_ebpf; - -enum cgroups_type { CGROUPS_AUTODETECT_FAIL, CGROUPS_V1, CGROUPS_V2 }; - -enum cgroups_systemd_setting { - SYSTEMD_CGROUP_ERR, - SYSTEMD_CGROUP_LEGACY, - SYSTEMD_CGROUP_HYBRID, - SYSTEMD_CGROUP_UNIFIED -}; - -struct cgroups_systemd_config_setting { - char *name; - enum cgroups_systemd_setting setting; -}; - -extern struct cgroups_systemd_config_setting cgroups_systemd_options[]; - -static inline int matches_enabled_cgroup_paths(char *id) { - return simple_pattern_matches(enabled_cgroup_paths, id); -} - -static inline int matches_enabled_cgroup_names(char *name) { - return simple_pattern_matches(enabled_cgroup_names, name); -} - -static inline int matches_enabled_cgroup_renames(char *id) { - return simple_pattern_matches(enabled_cgroup_renames, id); -} - -static inline int matches_systemd_services_cgroups(char *id) { - return simple_pattern_matches(systemd_services_cgroups, id); -} - -static inline int matches_search_cgroup_paths(const char *dir) { - return simple_pattern_matches(search_cgroup_paths, dir); -} - -static inline int matches_entrypoint_parent_process_comm(const char *comm) { - return simple_pattern_matches(entrypoint_parent_process_comm, comm); -} - -static inline int is_cgroup_systemd_service(struct cgroup *cg) { - return (int)(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE); -} - -static inline int k8s_is_kubepod(struct cgroup *cg) { - return cg->container_orchestrator == CGROUPS_ORCHESTRATOR_K8S; -} - -static inline char *cgroup_chart_type(char *buffer, struct cgroup *cg) { - buffer[0] = '\0'; - - if (cg->chart_id[0] == '\0' || (cg->chart_id[0] == '/' && cg->chart_id[1] == '\0')) - strncpy(buffer, "cgroup_root", RRD_ID_LENGTH_MAX); - else if (is_cgroup_systemd_service(cg)) - snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s%s", services_chart_id_prefix, cg->chart_id); - else - snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s%s", cgroup_chart_id_prefix, cg->chart_id); - - return buffer; -} - -#define RRDFUNCTIONS_CGTOP_HELP "View running containers" - -int cgroup_function_cgroup_top(BUFFER *wb, int timeout, const char *function, void *collector_data, - rrd_function_result_callback_t result_cb, void *result_cb_data, - rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data, - rrd_function_register_canceller_cb_t register_canceller_cb, void *register_canceller_cb_data); -int cgroup_function_systemd_top(BUFFER *wb, int timeout, const char *function, void *collector_data, - rrd_function_result_callback_t result_cb, void *result_cb_data, - rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data, - rrd_function_register_canceller_cb_t register_canceller_cb, void *register_canceller_cb_data); - -void cgroup_netdev_link_init(void); -const DICTIONARY_ITEM *cgroup_netdev_get(struct cgroup *cg); -void cgroup_netdev_delete(struct cgroup *cg); - -void update_cpu_utilization_chart(struct cgroup *cg); -void update_cpu_utilization_limit_chart(struct cgroup *cg, NETDATA_DOUBLE cpu_limit); -void update_cpu_throttled_chart(struct cgroup *cg); -void update_cpu_throttled_duration_chart(struct cgroup *cg); -void update_cpu_shares_chart(struct cgroup *cg); -void update_cpu_per_core_usage_chart(struct cgroup *cg); - -void update_mem_usage_limit_chart(struct cgroup *cg, unsigned long long memory_limit); -void update_mem_utilization_chart(struct cgroup *cg, unsigned long long memory_limit); -void update_mem_usage_detailed_chart(struct cgroup *cg); -void update_mem_writeback_chart(struct cgroup *cg); -void update_mem_activity_chart(struct cgroup *cg); -void update_mem_pgfaults_chart(struct cgroup *cg); -void update_mem_failcnt_chart(struct cgroup *cg); -void update_mem_usage_chart(struct cgroup *cg); - -void update_io_serviced_bytes_chart(struct cgroup *cg); -void update_io_serviced_ops_chart(struct cgroup *cg); -void update_throttle_io_serviced_bytes_chart(struct cgroup *cg); -void update_throttle_io_serviced_ops_chart(struct cgroup *cg); -void update_io_queued_ops_chart(struct cgroup *cg); -void update_io_merged_ops_chart(struct cgroup *cg); - -void update_pids_current_chart(struct cgroup *cg); - -void update_cpu_some_pressure_chart(struct cgroup *cg); -void update_cpu_some_pressure_stall_time_chart(struct cgroup *cg); -void update_cpu_full_pressure_chart(struct cgroup *cg); -void update_cpu_full_pressure_stall_time_chart(struct cgroup *cg); - -void update_mem_some_pressure_chart(struct cgroup *cg); -void update_mem_some_pressure_stall_time_chart(struct cgroup *cg); -void update_mem_full_pressure_chart(struct cgroup *cg); -void update_mem_full_pressure_stall_time_chart(struct cgroup *cg); - -void update_irq_some_pressure_chart(struct cgroup *cg); -void update_irq_some_pressure_stall_time_chart(struct cgroup *cg); -void update_irq_full_pressure_chart(struct cgroup *cg); -void update_irq_full_pressure_stall_time_chart(struct cgroup *cg); - -void update_io_some_pressure_chart(struct cgroup *cg); -void update_io_some_pressure_stall_time_chart(struct cgroup *cg); -void update_io_full_pressure_chart(struct cgroup *cg); -void update_io_full_pressure_stall_time_chart(struct cgroup *cg); - -#endif // NETDATA_CGROUP_INTERNALS_H
\ No newline at end of file diff --git a/collectors/cgroups.plugin/cgroup-name.sh.in b/collectors/cgroups.plugin/cgroup-name.sh.in deleted file mode 100755 index 0f8b63256..000000000 --- a/collectors/cgroups.plugin/cgroup-name.sh.in +++ /dev/null @@ -1,706 +0,0 @@ -#!/usr/bin/env bash -#shellcheck disable=SC2001 - -# netdata -# real-time performance and health monitoring, done right! -# (C) 2023 Netdata Inc. -# SPDX-License-Identifier: GPL-3.0-or-later -# -# Script to find a better name for cgroups -# - -export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin:@sbindir_POST@" -export LC_ALL=C - -cmd_line="'${0}' $(printf "'%s' " "${@}")" - -# ----------------------------------------------------------------------------- -# logging - -PROGRAM_NAME="$(basename "${0}")" - -# these should be the same with syslog() priorities -NDLP_EMERG=0 # system is unusable -NDLP_ALERT=1 # action must be taken immediately -NDLP_CRIT=2 # critical conditions -NDLP_ERR=3 # error conditions -NDLP_WARN=4 # warning conditions -NDLP_NOTICE=5 # normal but significant condition -NDLP_INFO=6 # informational -NDLP_DEBUG=7 # debug-level messages - -# the max (numerically) log level we will log -LOG_LEVEL=$NDLP_INFO - -set_log_min_priority() { - case "${NETDATA_LOG_LEVEL,,}" in - "emerg" | "emergency") - LOG_LEVEL=$NDLP_EMERG - ;; - - "alert") - LOG_LEVEL=$NDLP_ALERT - ;; - - "crit" | "critical") - LOG_LEVEL=$NDLP_CRIT - ;; - - "err" | "error") - LOG_LEVEL=$NDLP_ERR - ;; - - "warn" | "warning") - LOG_LEVEL=$NDLP_WARN - ;; - - "notice") - LOG_LEVEL=$NDLP_NOTICE - ;; - - "info") - LOG_LEVEL=$NDLP_INFO - ;; - - "debug") - LOG_LEVEL=$NDLP_DEBUG - ;; - esac -} - -set_log_min_priority - -log() { - local level="${1}" - shift 1 - - [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return - - systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <<EOFLOG -INVOCATION_ID=${NETDATA_INVOCATION_ID} -SYSLOG_IDENTIFIER=${PROGRAM_NAME} -PRIORITY=${level} -THREAD_TAG=cgroup-name -ND_LOG_SOURCE=collector -ND_REQUEST=${cmd_line} -MESSAGE=${*//\\n/--NEWLINE--} - -EOFLOG - # AN EMPTY LINE IS NEEDED ABOVE -} - -info() { - log "$NDLP_INFO" "${@}" -} - -warning() { - log "$NDLP_WARN" "${@}" -} - -error() { - log "$NDLP_ERR" "${@}" -} - -fatal() { - log "$NDLP_ALERT" "${@}" - exit 1 -} - -debug() { - log "$NDLP_DEBUG" "${@}" -} - -# ----------------------------------------------------------------------------- - -function parse_docker_like_inspect_output() { - local output="${1}" - eval "$(grep -E "^(NOMAD_NAMESPACE|NOMAD_JOB_NAME|NOMAD_TASK_NAME|NOMAD_SHORT_ALLOC_ID|CONT_NAME|IMAGE_NAME)=" <<<"$output")" - if [ -n "$NOMAD_NAMESPACE" ] && [ -n "$NOMAD_JOB_NAME" ] && [ -n "$NOMAD_TASK_NAME" ] && [ -n "$NOMAD_SHORT_ALLOC_ID" ]; then - NAME="${NOMAD_NAMESPACE}-${NOMAD_JOB_NAME}-${NOMAD_TASK_NAME}-${NOMAD_SHORT_ALLOC_ID}" - else - NAME=$(echo "${CONT_NAME}" | sed 's|^/||') - fi - if [ -n "${IMAGE_NAME}" ]; then - LABELS="image=\"${IMAGE_NAME}\"" - fi -} - -function docker_like_get_name_command() { - local command="${1}" - local id="${2}" - info "Running command: ${command} inspect --format='{{range .Config.Env}}{{println .}}{{end}}CONT_NAME={{ .Name}}' \"${id}\"" - if OUTPUT="$(${command} inspect --format='{{range .Config.Env}}{{println .}}{{end}}CONT_NAME={{ .Name}}{{println}}IMAGE_NAME={{ .Config.Image}}' "${id}")" && - [ -n "$OUTPUT" ]; then - parse_docker_like_inspect_output "$OUTPUT" - fi - return 0 -} - -function docker_like_get_name_api() { - local host_var="${1}" - local host="${!host_var}" - local path="/containers/${2}/json" - if [ -z "${host}" ]; then - warning "No ${host_var} is set" - return 1 - fi - if ! command -v jq >/dev/null 2>&1; then - warning "Can't find jq command line tool. jq is required for netdata to retrieve container name using ${host} API, falling back to docker ps" - return 1 - fi - if [ -S "${host}" ]; then - info "Running API command: curl --unix-socket \"${host}\" http://localhost${path}" - JSON=$(curl -sS --unix-socket "${host}" "http://localhost${path}") - else - info "Running API command: curl \"${host}${path}\"" - JSON=$(curl -sS "${host}${path}") - fi - if OUTPUT=$(echo "${JSON}" | jq -r '.Config.Env[],"CONT_NAME=\(.Name)","IMAGE_NAME=\(.Config.Image)"') && [ -n "$OUTPUT" ]; then - parse_docker_like_inspect_output "$OUTPUT" - fi - return 0 -} - -# get_lbl_val returns the value for the label with the given name. -# Returns "null" string if the label doesn't exist. -# Expected labels format: 'name="value",...'. -function get_lbl_val() { - local labels want_name - labels="${1}" - want_name="${2}" - - IFS=, read -ra labels <<< "$labels" - - local lname lval - for l in "${labels[@]}"; do - IFS="=" read -r lname lval <<< "$l" - if [ "$want_name" = "$lname" ] && [ -n "$lval" ]; then - echo "${lval:1:-1}" # trim " - return 0 - fi - done - - echo "null" - return 1 -} - -function add_lbl_prefix() { - local orig_labels prefix - orig_labels="${1}" - prefix="${2}" - - IFS=, read -ra labels <<< "$orig_labels" - - local new_labels - for l in "${labels[@]}"; do - new_labels+="${prefix}${l}," - done - - echo "${new_labels:0:-1}" # trim last ',' -} - -function remove_lbl() { - local orig_labels lbl_name - orig_labels="${1}" - lbl_name="${2}" - - IFS=, read -ra labels <<< "$orig_labels" - - local new_labels - for l in "${labels[@]}"; do - IFS="=" read -r lname lval <<< "$l" - [ "$lbl_name" != "$lname" ] && new_labels+="${l}," - done - - echo "${new_labels:0:-1}" # trim last ',' -} - -function k8s_is_pause_container() { - local cgroup_path="${1}" - - local file - if [ -d "${NETDATA_HOST_PREFIX}/sys/fs/cgroup/cpuacct" ]; then - file="${NETDATA_HOST_PREFIX}/sys/fs/cgroup/cpuacct/$cgroup_path/cgroup.procs" - else - file="${NETDATA_HOST_PREFIX}/sys/fs/cgroup/$cgroup_path/cgroup.procs" - fi - - [ ! -f "$file" ] && return 1 - - local procs - IFS= read -rd' ' procs 2>/dev/null <"$file" - #shellcheck disable=SC2206 - procs=($procs) - - [ "${#procs[@]}" -ne 1 ] && return 1 - - IFS= read -r comm 2>/dev/null <"/proc/${procs[0]}/comm" - - [ "$comm" == "pause" ] - return -} - -function k8s_gcp_get_cluster_name() { - local header url id loc name - header="Metadata-Flavor: Google" - url="http://metadata/computeMetadata/v1" - if id=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/project/project-id") && - loc=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/instance/attributes/cluster-location") && - name=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/instance/attributes/cluster-name") && - [ -n "$id" ] && [ -n "$loc" ] && [ -n "$name" ]; then - echo "gke_${id}_${loc}_${name}" - return 0 - fi - return 1 -} - -# k8s_get_kubepod_name resolves */kubepods/* cgroup name. -# pod level cgroup name format: 'pod_<namespace>_<pod_name>' -# container level cgroup name format: 'cntr_<namespace>_<pod_name>_<container_name>' -function k8s_get_kubepod_name() { - # GKE /sys/fs/cgroup/*/ (cri=docker, cgroups=v1): - # |-- kubepods - # | |-- burstable - # | | |-- pod98cee708-023b-11eb-933d-42010a800193 - # | | | |-- 922161c98e6ea450bf665226cdc64ca2aa3e889934c2cff0aec4325f8f78ac03 - # | `-- pode314bbac-d577-11ea-a171-42010a80013b - # | |-- 7d505356b04507de7b710016d540b2759483ed5f9136bb01a80872b08f771930 - # - # GKE /sys/fs/cgroup/*/ (cri=containerd, cgroups=v1): - # |-- kubepods.slice - # | |-- kubepods-besteffort.slice - # | | |-- kubepods-besteffort-pode1465238_4518_4c21_832f_fd9f87033dad.slice - # | | | |-- cri-containerd-66be9b2efdf4d85288c319b8c1a2f50d2439b5617e36f45d9d0d0be1381113be.scope - # | `-- kubepods-pod91f5b561_369f_4103_8015_66391059996a.slice - # | |-- cri-containerd-24c53b774a586f06abc058619b47f71d9d869ac50c92898adbd199106fd0aaeb.scope - # - # GKE /sys/fs/cgroup/*/ (cri=crio, cgroups=v1): - # |-- kubepods.slice - # | |-- kubepods-besteffort.slice - # | | |-- kubepods-besteffort-podad412dfe_3589_4056_965a_592356172968.slice - # | | | |-- crio-77b019312fd9825828b70214b2c94da69c30621af2a7ee06f8beace4bc9439e5.scope - # - # Minikube (v1.8.2) /sys/fs/cgroup/*/ (cri=docker, cgroups=v1): - # |-- kubepods.slice - # | |-- kubepods-besteffort.slice - # | | |-- kubepods-besteffort-pod10fb5647_c724_400c_b9cc_0e6eae3110e7.slice - # | | | |-- docker-36e5eb5056dfdf6dbb75c0c44a1ecf23217fe2c50d606209d8130fcbb19fb5a7.scope - # - # kind v0.14.0 - # |-- kubelet.slice - # | |-- kubelet-kubepods.slice - # | | |-- kubelet-kubepods-besteffort.slice - # | | | |-- kubelet-kubepods-besteffort-pod7881ed9e_c63e_4425_b5e0_ac55a08ae939.slice - # | | | | |-- cri-containerd-00c7939458bffc416bb03451526e9fde13301d6654cfeadf5b4964a7fb5be1a9.scope - # - # NOTE: cgroups plugin - # - uses '_' to join dir names (so it is <parent>_<child>_<child>_...) - # - replaces '.' with '-' - - local fn="${FUNCNAME[0]}" - local cgroup_path="${1}" - local id="${2}" - - if [[ ! $id =~ ^.*kubepods.* ]]; then - warning "${fn}: '${id}' is not kubepod cgroup." - return 1 - fi - - local clean_id="$id" - clean_id=${clean_id//.slice/} - clean_id=${clean_id//.scope/} - - local name pod_uid cntr_id - if [[ $clean_id == "kubepods" ]]; then - name="$clean_id" - elif [[ $clean_id =~ .+(besteffort|burstable|guaranteed)$ ]]; then - # kubepods_<QOS_CLASS> - # kubepods_kubepods-<QOS_CLASS> - name=${clean_id//-/_} - name=${name/#kubepods_kubepods/kubepods} - elif [[ $clean_id =~ .+pod[a-f0-9_-]+_(docker|crio|cri-containerd)-([a-f0-9]+)$ ]]; then - # ...pod<POD_UID>_(docker|crio|cri-containerd)-<CONTAINER_ID> (POD_UID w/ "_") - cntr_id=${BASH_REMATCH[2]} - elif [[ $clean_id =~ .+pod[a-f0-9-]+_([a-f0-9]+)$ ]]; then - # ...pod<POD_UID>_<CONTAINER_ID> - cntr_id=${BASH_REMATCH[1]} - elif [[ $clean_id =~ .+pod([a-f0-9_-]+)$ ]]; then - # ...pod<POD_UID> (POD_UID w/ and w/o "_") - pod_uid=${BASH_REMATCH[1]} - pod_uid=${pod_uid//_/-} - fi - - if [ -n "$name" ]; then - echo "$name" - return 0 - fi - - if [ -z "$pod_uid" ] && [ -z "$cntr_id" ]; then - warning "${fn}: can't extract pod_uid or container_id from the cgroup '$id'." - return 3 - fi - - [ -n "$pod_uid" ] && info "${fn}: cgroup '$id' is a pod(uid:$pod_uid)" - [ -n "$cntr_id" ] && info "${fn}: cgroup '$id' is a container(id:$cntr_id)" - - if [ -n "$cntr_id" ] && k8s_is_pause_container "$cgroup_path"; then - return 3 - fi - - if ! command -v jq > /dev/null 2>&1; then - warning "${fn}: 'jq' command not available." - return 1 - fi - - local tmp_kube_cluster_name="${TMPDIR:-"/tmp"}/netdata-cgroups-k8s-cluster-name" - local tmp_kube_system_ns_uid_file="${TMPDIR:-"/tmp"}/netdata-cgroups-kubesystem-uid" - local tmp_kube_containers_file="${TMPDIR:-"/tmp"}/netdata-cgroups-containers" - - local kube_cluster_name - local kube_system_uid - local labels - - if [ -n "$cntr_id" ] && - [ -f "$tmp_kube_cluster_name" ] && - [ -f "$tmp_kube_system_ns_uid_file" ] && - [ -f "$tmp_kube_containers_file" ] && - labels=$(grep "$cntr_id" "$tmp_kube_containers_file" 2>/dev/null); then - IFS= read -r kube_system_uid 2>/dev/null <"$tmp_kube_system_ns_uid_file" - IFS= read -r kube_cluster_name 2>/dev/null <"$tmp_kube_cluster_name" - else - IFS= read -r kube_system_uid 2>/dev/null <"$tmp_kube_system_ns_uid_file" - IFS= read -r kube_cluster_name 2>/dev/null <"$tmp_kube_cluster_name" - [ -z "$kube_cluster_name" ] && ! kube_cluster_name=$(k8s_gcp_get_cluster_name) && kube_cluster_name="unknown" - - local kube_system_ns - local pods - - if [ -n "${KUBERNETES_SERVICE_HOST}" ] && [ -n "${KUBERNETES_PORT_443_TCP_PORT}" ]; then - local token header host url - token="$(</var/run/secrets/kubernetes.io/serviceaccount/token)" - header="Authorization: Bearer $token" - host="$KUBERNETES_SERVICE_HOST:$KUBERNETES_PORT_443_TCP_PORT" - - if [ -z "$kube_system_uid" ]; then - url="https://$host/api/v1/namespaces/kube-system" - # FIX: check HTTP response code - if ! kube_system_ns=$(curl --fail -sSk -H "$header" "$url" 2>&1); then - warning "${fn}: error on curl '${url}': ${kube_system_ns}." - fi - fi - - local url - if [ -n "${USE_KUBELET_FOR_PODS_METADATA}" ]; then - url="${KUBELET_URL:-https://localhost:10250}/pods" - else - url="https://$host/api/v1/pods" - [ -n "$MY_NODE_NAME" ] && url+="?fieldSelector=spec.nodeName==$MY_NODE_NAME" - fi - - # FIX: check HTTP response code - if ! pods=$(curl --fail -sSk -H "$header" "$url" 2>&1); then - warning "${fn}: error on curl '${url}': ${pods}." - return 1 - fi - elif ps -C kubelet >/dev/null 2>&1 && command -v kubectl >/dev/null 2>&1; then - if [ -z "$kube_system_uid" ]; then - if ! kube_system_ns=$(kubectl --kubeconfig="$KUBE_CONFIG" get namespaces kube-system -o json 2>&1); then - warning "${fn}: error on 'kubectl': ${kube_system_ns}." - fi - fi - - [[ -z ${KUBE_CONFIG+x} ]] && KUBE_CONFIG="/etc/kubernetes/admin.conf" - if ! pods=$(kubectl --kubeconfig="$KUBE_CONFIG" get pods --all-namespaces -o json 2>&1); then - warning "${fn}: error on 'kubectl': ${pods}." - return 1 - fi - else - warning "${fn}: not inside the k8s cluster and 'kubectl' command not available." - return 1 - fi - - if [ -n "$kube_system_ns" ] && ! kube_system_uid=$(jq -r '.metadata.uid' <<<"$kube_system_ns" 2>&1); then - warning "${fn}: error on 'jq' parse kube_system_ns: ${kube_system_uid}." - fi - - local jq_filter - jq_filter+='.items[] | "' - jq_filter+='namespace=\"\(.metadata.namespace)\",' - jq_filter+='pod_name=\"\(.metadata.name)\",' - jq_filter+='pod_uid=\"\(.metadata.uid)\",' - #jq_filter+='\(.metadata.labels | to_entries | map("pod_label_"+.key+"=\""+.value+"\"") | join(",") | if length > 0 then .+"," else . end)' - jq_filter+='\((.metadata.ownerReferences[]? | select(.controller==true) | "controller_kind=\""+.kind+"\",controller_name=\""+.name+"\",") // "")' - jq_filter+='node_name=\"\(.spec.nodeName)\",' - jq_filter+='" + ' - jq_filter+='(.status.containerStatuses[]? | "' - jq_filter+='container_name=\"\(.name)\",' - jq_filter+='container_id=\"\(.containerID)\"' - jq_filter+='") | ' - jq_filter+='sub("(docker|cri-o|containerd)://";"")' # containerID: docker://a346da9bc0e3eaba6b295f64ac16e02f2190db2cef570835706a9e7a36e2c722 - - local containers - if ! containers=$(jq -r "${jq_filter}" <<<"$pods" 2>&1); then - warning "${fn}: error on 'jq' parse pods: ${containers}." - return 1 - fi - - [ -n "$kube_cluster_name" ] && echo "$kube_cluster_name" >"$tmp_kube_cluster_name" 2>/dev/null - [ -n "$kube_system_ns" ] && [ -n "$kube_system_uid" ] && echo "$kube_system_uid" >"$tmp_kube_system_ns_uid_file" 2>/dev/null - echo "$containers" >"$tmp_kube_containers_file" 2>/dev/null - fi - - local qos_class - if [[ $clean_id =~ .+(besteffort|burstable) ]]; then - qos_class="${BASH_REMATCH[1]}" - else - qos_class="guaranteed" - fi - - # available labels: - # namespace, pod_name, pod_uid, container_name, container_id, node_name - if [ -n "$cntr_id" ]; then - if [ -n "$labels" ] || labels=$(grep "$cntr_id" <<< "$containers" 2> /dev/null); then - labels+=',kind="container"' - labels+=",qos_class=\"$qos_class\"" - [ -n "$kube_system_uid" ] && [ "$kube_system_uid" != "null" ] && labels+=",cluster_id=\"$kube_system_uid\"" - [ -n "$kube_cluster_name" ] && [ "$kube_cluster_name" != "unknown" ] && labels+=",cluster_name=\"$kube_cluster_name\"" - name="cntr" - name+="_$(get_lbl_val "$labels" namespace)" - name+="_$(get_lbl_val "$labels" pod_name)" - name+="_$(get_lbl_val "$labels" container_name)" - labels=$(remove_lbl "$labels" "container_id") - labels=$(remove_lbl "$labels" "pod_uid") - labels=$(add_lbl_prefix "$labels" "k8s_") - name+=" $labels" - else - return 2 - fi - elif [ -n "$pod_uid" ]; then - if labels=$(grep "$pod_uid" -m 1 <<< "$containers" 2> /dev/null); then - labels="${labels%%,container_*}" - labels+=',kind="pod"' - labels+=",qos_class=\"$qos_class\"" - [ -n "$kube_system_uid" ] && [ "$kube_system_uid" != "null" ] && labels+=",cluster_id=\"$kube_system_uid\"" - [ -n "$kube_cluster_name" ] && [ "$kube_cluster_name" != "unknown" ] && labels+=",cluster_name=\"$kube_cluster_name\"" - name="pod" - name+="_$(get_lbl_val "$labels" namespace)" - name+="_$(get_lbl_val "$labels" pod_name)" - labels=$(remove_lbl "$labels" "pod_uid") - labels=$(add_lbl_prefix "$labels" "k8s_") - name+=" $labels" - else - return 2 - fi - fi - - # jq filter nonexistent field and nonexistent label value is 'null' - if [[ $name =~ _null(_|$) ]]; then - warning "${fn}: invalid name: $name (cgroup '$id')" - if [ -n "${USE_KUBELET_FOR_PODS_METADATA}" ]; then - # local data is cached and may not contain the correct id - return 2 - fi - return 1 - fi - - echo "$name" - [ -n "$name" ] - return -} - -function k8s_get_name() { - local fn="${FUNCNAME[0]}" - local cgroup_path="${1}" - local id="${2}" - local kubepod_name="" - - kubepod_name=$(k8s_get_kubepod_name "$cgroup_path" "$id") - - case "$?" in - 0) - kubepod_name="k8s_${kubepod_name}" - - local name labels - name=${kubepod_name%% *} - labels=${kubepod_name#* } - - if [ "$name" != "$labels" ]; then - info "${fn}: cgroup '${id}' has chart name '${name}', labels '${labels}" - NAME="$name" - LABELS="$labels" - else - info "${fn}: cgroup '${id}' has chart name '${NAME}'" - NAME="$name" - fi - EXIT_CODE=$EXIT_SUCCESS - ;; - 1) - NAME="k8s_${id}" - warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and enabling it." - EXIT_CODE=$EXIT_SUCCESS - ;; - 2) - NAME="k8s_${id}" - warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and asking for retry." - EXIT_CODE=$EXIT_RETRY - ;; - *) - NAME="k8s_${id}" - warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and disabling it." - EXIT_CODE=$EXIT_DISABLE - ;; - esac -} - -function docker_get_name() { - local id="${1}" - # See https://github.com/netdata/netdata/pull/13523 for details - if command -v snap >/dev/null 2>&1 && snap list docker >/dev/null 2>&1; then - docker_like_get_name_api DOCKER_HOST "${id}" - elif hash docker 2> /dev/null; then - docker_like_get_name_command docker "${id}" - else - docker_like_get_name_api DOCKER_HOST "${id}" || docker_like_get_name_command podman "${id}" - fi - if [ -z "${NAME}" ]; then - warning "cannot find the name of docker container '${id}'" - EXIT_CODE=$EXIT_RETRY - NAME="${id:0:12}" - else - info "docker container '${id}' is named '${NAME}'" - fi -} - -function docker_validate_id() { - local id="${1}" - if [ -n "${id}" ] && { [ ${#id} -eq 64 ] || [ ${#id} -eq 12 ]; }; then - docker_get_name "${id}" - else - error "a docker id cannot be extracted from docker cgroup '${CGROUP}'." - fi -} - -function podman_get_name() { - local id="${1}" - - # for Podman, prefer using the API if we can, as netdata will not normally have access - # to other users' containers, so they will not be visible when running `podman ps` - docker_like_get_name_api PODMAN_HOST "${id}" || docker_like_get_name_command podman "${id}" - - if [ -z "${NAME}" ]; then - warning "cannot find the name of podman container '${id}'" - EXIT_CODE=$EXIT_RETRY - NAME="${id:0:12}" - else - info "podman container '${id}' is named '${NAME}'" - fi -} - -function podman_validate_id() { - local id="${1}" - if [ -n "${id}" ] && [ ${#id} -eq 64 ]; then - podman_get_name "${id}" - else - error "a podman id cannot be extracted from docker cgroup '${CGROUP}'." - fi -} - -# ----------------------------------------------------------------------------- - -DOCKER_HOST="${DOCKER_HOST:=/var/run/docker.sock}" -PODMAN_HOST="${PODMAN_HOST:=/run/podman/podman.sock}" -CGROUP_PATH="${1}" # the path as it is (e.g. '/docker/efcf4c409') -CGROUP="${2}" # the modified path (e.g. 'docker_efcf4c409') -EXIT_SUCCESS=0 -EXIT_RETRY=2 -EXIT_DISABLE=3 -EXIT_CODE=$EXIT_SUCCESS -NAME= -LABELS= - -# ----------------------------------------------------------------------------- - -if [ -z "${CGROUP}" ]; then - fatal "called without a cgroup name. Nothing to do." -fi - -if [ -z "${NAME}" ]; then - if [[ ${CGROUP} =~ ^.*kubepods.* ]]; then - k8s_get_name "${CGROUP_PATH}" "${CGROUP}" - fi -fi - -if [ -z "${NAME}" ]; then - if [[ ${CGROUP} =~ ^.*docker[-_/\.][a-fA-F0-9]+[-_\.]?.*$ ]]; then - # docker containers - #shellcheck disable=SC1117 - DOCKERID="$(echo "${CGROUP}" | sed "s|^.*docker[-_/]\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")" - docker_validate_id "${DOCKERID}" - elif [[ ${CGROUP} =~ ^.*ecs[-_/\.][a-fA-F0-9]+[-_\.]?.*$ ]]; then - # ECS - #shellcheck disable=SC1117 - DOCKERID="$(echo "${CGROUP}" | sed "s|^.*ecs[-_/].*[-_/]\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")" - docker_validate_id "${DOCKERID}" - elif [[ ${CGROUP} =~ system.slice_containerd.service_cpuset_[a-fA-F0-9]+[-_\.]?.*$ ]]; then - # docker containers under containerd - #shellcheck disable=SC1117 - DOCKERID="$(echo "${CGROUP}" | sed "s|^.*ystem.slice_containerd.service_cpuset_\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")" - docker_validate_id "${DOCKERID}" - elif [[ ${CGROUP} =~ ^.*libpod-[a-fA-F0-9]+.*$ ]]; then - # Podman - PODMANID="$(echo "${CGROUP}" | sed "s|^.*libpod-\([a-fA-F0-9]\+\).*$|\1|")" - podman_validate_id "${PODMANID}" - - elif [[ ${CGROUP} =~ machine.slice[_/].*\.service ]]; then - # systemd-nspawn - NAME="$(echo "${CGROUP}" | sed 's/.*machine.slice[_\/]\(.*\)\.service/\1/g')" - - elif [[ ${CGROUP} =~ machine.slice_machine.*-lxc ]]; then - # libvirtd / lxc containers - # machine.slice machine-lxc/x2d969/x2dhubud0xians01.scope => lxc/hubud0xians01 - # machine.slice_machine-lxc/x2d969/x2dhubud0xians01.scope/libvirt_init.scope => lxc/hubud0xians01/libvirt_init - NAME="lxc/$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-lxc//; s/[\/_]x2d[[:digit:]]*//; s/[\/_]x2d//g; s/\.scope//g')" - elif [[ ${CGROUP} =~ machine.slice_machine.*-qemu ]]; then - # libvirtd / qemu virtual machines - # machine.slice_machine-qemu_x2d1_x2dopnsense.scope => qemu_opnsense - NAME="qemu_$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-qemu//; s/[\/_]x2d[[:digit:]]*//; s/[\/_]x2d//g; s/\.scope//g')" - - elif [[ ${CGROUP} =~ machine_.*\.libvirt-qemu ]]; then - # libvirtd / qemu virtual machines - NAME="qemu_$(echo "${CGROUP}" | sed 's/^machine_//; s/\.libvirt-qemu$//; s/-/_/;')" - - elif [[ ${CGROUP} =~ qemu.slice_([0-9]+).scope && -d "${NETDATA_HOST_PREFIX}/etc/pve" ]]; then - # Proxmox VMs - FILENAME="${NETDATA_HOST_PREFIX}/etc/pve/qemu-server/${BASH_REMATCH[1]}.conf" - if [[ -f $FILENAME && -r $FILENAME ]]; then - NAME="qemu_$(grep -e '^name: ' "${FILENAME}" | head -1 | sed -rn 's|\s*name\s*:\s*(.*)?$|\1|p')" - else - error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group." - fi - elif [[ ${CGROUP} =~ lxc_([0-9]+) && -d "${NETDATA_HOST_PREFIX}/etc/pve" ]]; then - # Proxmox Containers (LXC) - FILENAME="${NETDATA_HOST_PREFIX}/etc/pve/lxc/${BASH_REMATCH[1]}.conf" - if [[ -f ${FILENAME} && -r ${FILENAME} ]]; then - NAME=$(grep -e '^hostname: ' "${FILENAME}" | head -1 | sed -rn 's|\s*hostname\s*:\s*(.*)?$|\1|p') - else - error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group." - fi - elif [[ ${CGROUP} =~ lxc.payload.* ]]; then - # LXC 4.0 - NAME="$(echo "${CGROUP}" | sed 's/lxc\.payload\.\(.*\)/\1/g')" - fi - - [ -z "${NAME}" ] && NAME="${CGROUP}" - [ ${#NAME} -gt 100 ] && NAME="${NAME:0:100}" -fi - -NAME="${NAME// /_}" - -info "cgroup '${CGROUP}' is called '${NAME}', labels '${LABELS}'" -if [ -n "$LABELS" ]; then - echo "${NAME} ${LABELS}" -else - echo "${NAME}" -fi - -exit ${EXIT_CODE} diff --git a/collectors/cgroups.plugin/cgroup-network-helper.sh.in b/collectors/cgroups.plugin/cgroup-network-helper.sh.in deleted file mode 100755 index da9b9162a..000000000 --- a/collectors/cgroups.plugin/cgroup-network-helper.sh.in +++ /dev/null @@ -1,376 +0,0 @@ -#!/usr/bin/env bash -# shellcheck disable=SC1117 - -# cgroup-network-helper.sh -# detect container and virtual machine interfaces -# -# (C) 2023 Netdata Inc. -# SPDX-License-Identifier: GPL-3.0-or-later -# -# This script is called as root (by cgroup-network), with either a pid, or a cgroup path. -# It tries to find all the network interfaces that belong to the same cgroup. -# -# It supports several method for this detection: -# -# 1. cgroup-network (the binary father of this script) detects veth network interfaces, -# by examining iflink and ifindex IDs and switching namespaces -# (it also detects the interface name as it is used by the container). -# -# 2. this script, uses /proc/PID/fdinfo to find tun/tap network interfaces. -# -# 3. this script, calls virsh to find libvirt network interfaces. -# - -# ----------------------------------------------------------------------------- - -# the system path is cleared by cgroup-network -# shellcheck source=/dev/null -[ -f /etc/profile ] && source /etc/profile -export PATH="${PATH}:@sbindir_POST@" - -export LC_ALL=C - -cmd_line="'${0}' $(printf "'%s' " "${@}")" - -# ----------------------------------------------------------------------------- -# logging - -PROGRAM_NAME="$(basename "${0}")" - -# these should be the same with syslog() priorities -NDLP_EMERG=0 # system is unusable -NDLP_ALERT=1 # action must be taken immediately -NDLP_CRIT=2 # critical conditions -NDLP_ERR=3 # error conditions -NDLP_WARN=4 # warning conditions -NDLP_NOTICE=5 # normal but significant condition -NDLP_INFO=6 # informational -NDLP_DEBUG=7 # debug-level messages - -# the max (numerically) log level we will log -LOG_LEVEL=$NDLP_INFO - -set_log_min_priority() { - case "${NETDATA_LOG_LEVEL,,}" in - "emerg" | "emergency") - LOG_LEVEL=$NDLP_EMERG - ;; - - "alert") - LOG_LEVEL=$NDLP_ALERT - ;; - - "crit" | "critical") - LOG_LEVEL=$NDLP_CRIT - ;; - - "err" | "error") - LOG_LEVEL=$NDLP_ERR - ;; - - "warn" | "warning") - LOG_LEVEL=$NDLP_WARN - ;; - - "notice") - LOG_LEVEL=$NDLP_NOTICE - ;; - - "info") - LOG_LEVEL=$NDLP_INFO - ;; - - "debug") - LOG_LEVEL=$NDLP_DEBUG - ;; - esac -} - -set_log_min_priority - -log() { - local level="${1}" - shift 1 - - [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return - - systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <<EOFLOG -INVOCATION_ID=${NETDATA_INVOCATION_ID} -SYSLOG_IDENTIFIER=${PROGRAM_NAME} -PRIORITY=${level} -THREAD_TAG=cgroup-network-helper -ND_LOG_SOURCE=collector -ND_REQUEST=${cmd_line} -MESSAGE=${*//\\n/--NEWLINE--} - -EOFLOG - # AN EMPTY LINE IS NEEDED ABOVE -} - -info() { - log "$NDLP_INFO" "${@}" -} - -warning() { - log "$NDLP_WARN" "${@}" -} - -error() { - log "$NDLP_ERR" "${@}" -} - -fatal() { - log "$NDLP_ALERT" "${@}" - exit 1 -} - -debug() { - log "$NDLP_DEBUG" "${@}" -} - -debug=0 -if [ "${NETDATA_CGROUP_NETWORK_HELPER_DEBUG-0}" = "1" ]; then - debug=1 - LOG_LEVEL=$NDLP_DEBUG -fi - -# ----------------------------------------------------------------------------- -# check for BASH v4+ (required for associative arrays) - -if [ ${BASH_VERSINFO[0]} -lt 4 ]; then - echo >&2 "BASH version 4 or later is required (this is ${BASH_VERSION})." - exit 1 -fi - -# ----------------------------------------------------------------------------- -# parse the arguments - -pid= -cgroup= -while [ -n "${1}" ] -do - case "${1}" in - --cgroup) cgroup="${2}"; shift 1;; - --pid|-p) pid="${2}"; shift 1;; - --debug|debug) - debug=1 - LOG_LEVEL=$NDLP_DEBUG - ;; - *) fatal "Cannot understand argument '${1}'";; - esac - - shift -done - -if [ -z "${pid}" ] && [ -z "${cgroup}" ] -then - fatal "Either --pid or --cgroup is required" -fi - -# ----------------------------------------------------------------------------- - -set_source() { - [ ${debug} -eq 1 ] && echo "SRC ${*}" -} - - -# ----------------------------------------------------------------------------- -# veth interfaces via cgroup - -# cgroup-network can detect veth interfaces by itself (written in C). -# If you seek for a shell version of what it does, check this: -# https://github.com/netdata/netdata/issues/474#issuecomment-317866709 - - -# ----------------------------------------------------------------------------- -# tun/tap interfaces via /proc/PID/fdinfo - -# find any tun/tap devices linked to a pid -proc_pid_fdinfo_iff() { - local p="${1}" # the pid - - debug "Searching for tun/tap interfaces for pid ${p}..." - set_source "fdinfo" - grep "^iff:.*" "${NETDATA_HOST_PREFIX}/proc/${p}/fdinfo"/* 2>/dev/null | cut -f 2 -} - -find_tun_tap_interfaces_for_cgroup() { - local c="${1}" # the cgroup path - [ -d "${c}/emulator" ] && c="${c}/emulator" # check for 'emulator' subdirectory - c="${c}/cgroup.procs" # make full path - - # for each pid of the cgroup - # find any tun/tap devices linked to the pid - if [ -f "${c}" ] - then - local p - for p in $(< "${c}" ) - do - proc_pid_fdinfo_iff "${p}" - done - else - debug "Cannot find file '${c}', not searching for tun/tap interfaces." - fi -} - - -# ----------------------------------------------------------------------------- -# virsh domain network interfaces - -virsh_cgroup_to_domain_name() { - local c="${1}" # the cgroup path - - debug "extracting a possible virsh domain from cgroup ${c}..." - - # extract for the cgroup path - sed -n -e "s|.*/machine-qemu\\\\x2d[0-9]\+\\\\x2d\(.*\)\.scope$|\1|p" \ - -e "s|.*/machine/qemu-[0-9]\+-\(.*\)\.libvirt-qemu$|\1|p" \ - -e "s|.*/machine/\(.*\)\.libvirt-qemu$|\1|p" \ - <<EOF -${c} -EOF -} - -virsh_find_all_interfaces_for_cgroup() { - local c="${1}" # the cgroup path - - # the virsh command - local virsh - # shellcheck disable=SC2230 - virsh="$(which virsh 2>/dev/null || command -v virsh 2>/dev/null)" - - if [ -n "${virsh}" ] - then - local d - d="$(virsh_cgroup_to_domain_name "${c}")" - # convert hex to character - # e.g.: vm01\x2dweb => vm01-web (https://github.com/netdata/netdata/issues/11088#issuecomment-832618149) - d="$(printf '%b' "${d}")" - - if [ -n "${d}" ] - then - debug "running: virsh domiflist ${d}; to find the network interfaces" - - # 'virsh -r domiflist <domain>' example output - # Interface Type Source Model MAC - #-------------------------------------------------------------- - # vnet3 bridge br0 virtio 52:54:00:xx:xx:xx - # vnet4 network default virtio 52:54:00:yy:yy:yy - - # match only 'network' interfaces from virsh output - set_source "virsh" - "${virsh}" -r domiflist "${d}" |\ - sed -n \ - -e "s|^[[:space:]]\?\([^[:space:]]\+\)[[:space:]]\+network[[:space:]]\+\([^[:space:]]\+\)[[:space:]]\+[^[:space:]]\+[[:space:]]\+[^[:space:]]\+$|\1 \1_\2|p" \ - -e "s|^[[:space:]]\?\([^[:space:]]\+\)[[:space:]]\+bridge[[:space:]]\+\([^[:space:]]\+\)[[:space:]]\+[^[:space:]]\+[[:space:]]\+[^[:space:]]\+$|\1 \1_\2|p" - else - debug "no virsh domain extracted from cgroup ${c}" - fi - else - debug "virsh command is not available" - fi -} - -# ----------------------------------------------------------------------------- -# netnsid detected interfaces - -netnsid_find_all_interfaces_for_pid() { - local pid="${1}" - [ -z "${pid}" ] && return 1 - - local nsid - nsid=$(lsns -t net -p "${pid}" -o NETNSID -nr 2>/dev/null) - if [ -z "${nsid}" ] || [ "${nsid}" = "unassigned" ]; then - return 1 - fi - - set_source "netnsid" - ip link show |\ - grep -B 1 -E " link-netnsid ${nsid}($| )" |\ - sed -n -e "s|^[[:space:]]*[0-9]\+:[[:space:]]\+\([A-Za-z0-9_]\+\)\(@[A-Za-z0-9_]\+\)*:[[:space:]].*$|\1|p" -} - -netnsid_find_all_interfaces_for_cgroup() { - local c="${1}" # the cgroup path - - if [ -f "${c}/cgroup.procs" ]; then - netnsid_find_all_interfaces_for_pid "$(head -n 1 "${c}/cgroup.procs" 2>/dev/null)" - else - debug "Cannot find file '${c}/cgroup.procs', not searching for netnsid interfaces." - fi -} - -# ----------------------------------------------------------------------------- - -find_all_interfaces_of_pid_or_cgroup() { - local p="${1}" c="${2}" # the pid and the cgroup path - - if [ -n "${pid}" ] - then - # we have been called with a pid - - proc_pid_fdinfo_iff "${p}" - netnsid_find_all_interfaces_for_pid "${p}" - - elif [ -n "${c}" ] - then - # we have been called with a cgroup - - info "searching for network interfaces of cgroup '${c}'" - - find_tun_tap_interfaces_for_cgroup "${c}" - virsh_find_all_interfaces_for_cgroup "${c}" - netnsid_find_all_interfaces_for_cgroup "${c}" - - else - - error "Either a pid or a cgroup path is needed" - return 1 - - fi - - return 0 -} - -# ----------------------------------------------------------------------------- - -# an associative array to store the interfaces -# the index is the interface name as seen by the host -# the value is the interface name as seen by the guest / container -declare -A devs=() - -# store all interfaces found in the associative array -# this will also give the unique devices, as seen by the host -last_src= -# shellcheck disable=SC2162 -while read host_device guest_device -do - [ -z "${host_device}" ] && continue - - [ "${host_device}" = "SRC" ] && last_src="${guest_device}" && continue - - # the default guest_device is the host_device - [ -z "${guest_device}" ] && guest_device="${host_device}" - - # when we run in debug, show the source - debug "Found host device '${host_device}', guest device '${guest_device}', detected via '${last_src}'" - - if [ -z "${devs[${host_device}]}" ] || [ "${devs[${host_device}]}" = "${host_device}" ]; then - devs[${host_device}]="${guest_device}" - fi - -done < <( find_all_interfaces_of_pid_or_cgroup "${pid}" "${cgroup}" ) - -# print the interfaces found, in the format netdata expects them -found=0 -for x in "${!devs[@]}" -do - found=$((found + 1)) - echo "${x} ${devs[${x}]}" -done - -debug "found ${found} network interfaces for pid '${pid}', cgroup '${cgroup}', run as ${USER}, ${UID}" - -# let netdata know if we found any -[ ${found} -eq 0 ] && exit 1 -exit 0 diff --git a/collectors/cgroups.plugin/cgroup-network.c b/collectors/cgroups.plugin/cgroup-network.c deleted file mode 100644 index 508ea07c6..000000000 --- a/collectors/cgroups.plugin/cgroup-network.c +++ /dev/null @@ -1,743 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "libnetdata/libnetdata.h" -#include "libnetdata/required_dummies.h" - -#ifdef HAVE_SETNS -#ifndef _GNU_SOURCE -#define _GNU_SOURCE /* See feature_test_macros(7) */ -#endif -#include <sched.h> -#endif - -char env_netdata_host_prefix[FILENAME_MAX + 50] = ""; -char env_netdata_log_method[FILENAME_MAX + 50] = ""; -char env_netdata_log_format[FILENAME_MAX + 50] = ""; -char env_netdata_log_level[FILENAME_MAX + 50] = ""; -char *environment[] = { - "PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin", - env_netdata_host_prefix, - env_netdata_log_method, - env_netdata_log_format, - env_netdata_log_level, - NULL -}; - -struct iface { - const char *device; - uint32_t hash; - - unsigned int ifindex; - unsigned int iflink; - - struct iface *next; -}; - -unsigned int calc_num_ifaces(struct iface *root) { - unsigned int num = 0; - for (struct iface *h = root; h; h = h->next) { - num++; - } - return num; -} - -unsigned int read_iface_iflink(const char *prefix, const char *iface) { - if(!prefix) prefix = ""; - - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/sys/class/net/%s/iflink", prefix, iface); - - unsigned long long iflink = 0; - int ret = read_single_number_file(filename, &iflink); - if(ret) collector_error("Cannot read '%s'.", filename); - - return (unsigned int)iflink; -} - -unsigned int read_iface_ifindex(const char *prefix, const char *iface) { - if(!prefix) prefix = ""; - - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/sys/class/net/%s/ifindex", prefix, iface); - - unsigned long long ifindex = 0; - int ret = read_single_number_file(filename, &ifindex); - if(ret) collector_error("Cannot read '%s'.", filename); - - return (unsigned int)ifindex; -} - -struct iface *read_proc_net_dev(const char *scope __maybe_unused, const char *prefix) { - if(!prefix) prefix = ""; - - procfile *ff = NULL; - char filename[FILENAME_MAX + 1]; - - snprintfz(filename, FILENAME_MAX, "%s%s", prefix, (*prefix)?"/proc/1/net/dev":"/proc/net/dev"); - -#ifdef NETDATA_INTERNAL_CHECKS - collector_info("parsing '%s'", filename); -#endif - - ff = procfile_open(filename, " \t,:|", PROCFILE_FLAG_DEFAULT); - if(unlikely(!ff)) { - collector_error("Cannot open file '%s'", filename); - return NULL; - } - - ff = procfile_readall(ff); - if(unlikely(!ff)) { - collector_error("Cannot read file '%s'", filename); - return NULL; - } - - size_t lines = procfile_lines(ff), l; - struct iface *root = NULL; - for(l = 2; l < lines ;l++) { - if (unlikely(procfile_linewords(ff, l) < 1)) continue; - - struct iface *t = callocz(1, sizeof(struct iface)); - t->device = strdupz(procfile_lineword(ff, l, 0)); - t->hash = simple_hash(t->device); - t->ifindex = read_iface_ifindex(prefix, t->device); - t->iflink = read_iface_iflink(prefix, t->device); - t->next = root; - root = t; - -#ifdef NETDATA_INTERNAL_CHECKS - collector_info("added %s interface '%s', ifindex %u, iflink %u", scope, t->device, t->ifindex, t->iflink); -#endif - } - - procfile_close(ff); - - return root; -} - -void free_iface(struct iface *iface) { - freez((void *)iface->device); - freez(iface); -} - -void free_host_ifaces(struct iface *iface) { - while(iface) { - struct iface *t = iface->next; - free_iface(iface); - iface = t; - } -} - -int iface_is_eligible(struct iface *iface) { - if(iface->iflink != iface->ifindex) - return 1; - - return 0; -} - -int eligible_ifaces(struct iface *root) { - int eligible = 0; - - struct iface *t; - for(t = root; t ; t = t->next) - if(iface_is_eligible(t)) - eligible++; - - return eligible; -} - -static void continue_as_child(void) { - pid_t child = fork(); - int status; - pid_t ret; - - if (child < 0) - collector_error("fork() failed"); - - /* Only the child returns */ - if (child == 0) - return; - - for (;;) { - ret = waitpid(child, &status, WUNTRACED); - if ((ret == child) && (WIFSTOPPED(status))) { - /* The child suspended so suspend us as well */ - kill(getpid(), SIGSTOP); - kill(child, SIGCONT); - } else { - break; - } - } - - /* Return the child's exit code if possible */ - if (WIFEXITED(status)) { - exit(WEXITSTATUS(status)); - } else if (WIFSIGNALED(status)) { - kill(getpid(), WTERMSIG(status)); - } - - exit(EXIT_FAILURE); -} - -int proc_pid_fd(const char *prefix, const char *ns, pid_t pid) { - if(!prefix) prefix = ""; - - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/%s", prefix, (int)pid, ns); - int fd = open(filename, O_RDONLY); - - if(fd == -1) - collector_error("Cannot open proc_pid_fd() file '%s'", filename); - - return fd; -} - -static struct ns { - int nstype; - int fd; - int status; - const char *name; - const char *path; -} all_ns[] = { - // { .nstype = CLONE_NEWUSER, .fd = -1, .status = -1, .name = "user", .path = "ns/user" }, - // { .nstype = CLONE_NEWCGROUP, .fd = -1, .status = -1, .name = "cgroup", .path = "ns/cgroup" }, - // { .nstype = CLONE_NEWIPC, .fd = -1, .status = -1, .name = "ipc", .path = "ns/ipc" }, - // { .nstype = CLONE_NEWUTS, .fd = -1, .status = -1, .name = "uts", .path = "ns/uts" }, - { .nstype = CLONE_NEWNET, .fd = -1, .status = -1, .name = "network", .path = "ns/net" }, - { .nstype = CLONE_NEWPID, .fd = -1, .status = -1, .name = "pid", .path = "ns/pid" }, - { .nstype = CLONE_NEWNS, .fd = -1, .status = -1, .name = "mount", .path = "ns/mnt" }, - - // terminator - { .nstype = 0, .fd = -1, .status = -1, .name = NULL, .path = NULL } -}; - -int switch_namespace(const char *prefix, pid_t pid) { - -#ifdef HAVE_SETNS - - int i; - for(i = 0; all_ns[i].name ; i++) - all_ns[i].fd = proc_pid_fd(prefix, all_ns[i].path, pid); - - int root_fd = proc_pid_fd(prefix, "root", pid); - int cwd_fd = proc_pid_fd(prefix, "cwd", pid); - - setgroups(0, NULL); - - // 2 passes - found it at nsenter source code - // this is related CLONE_NEWUSER functionality - - // This code cannot switch user namespace (it can all the other namespaces) - // Fortunately, we don't need to switch user namespaces. - - int pass; - for(pass = 0; pass < 2 ;pass++) { - for(i = 0; all_ns[i].name ; i++) { - if (all_ns[i].fd != -1 && all_ns[i].status == -1) { - if(setns(all_ns[i].fd, all_ns[i].nstype) == -1) { - if(pass == 1) { - all_ns[i].status = 0; - collector_error("Cannot switch to %s namespace of pid %d", all_ns[i].name, (int) pid); - } - } - else - all_ns[i].status = 1; - } - } - } - - setgroups(0, NULL); - - if(root_fd != -1) { - if(fchdir(root_fd) < 0) - collector_error("Cannot fchdir() to pid %d root directory", (int)pid); - - if(chroot(".") < 0) - collector_error("Cannot chroot() to pid %d root directory", (int)pid); - - close(root_fd); - } - - if(cwd_fd != -1) { - if(fchdir(cwd_fd) < 0) - collector_error("Cannot fchdir() to pid %d current working directory", (int)pid); - - close(cwd_fd); - } - - int do_fork = 0; - for(i = 0; all_ns[i].name ; i++) - if(all_ns[i].fd != -1) { - - // CLONE_NEWPID requires a fork() to become effective - if(all_ns[i].nstype == CLONE_NEWPID && all_ns[i].status) - do_fork = 1; - - close(all_ns[i].fd); - } - - if(do_fork) - continue_as_child(); - - return 0; - -#else - - errno = ENOSYS; - collector_error("setns() is missing on this system."); - return 1; - -#endif -} - -pid_t read_pid_from_cgroup_file(const char *filename) { - int fd = open(filename, procfile_open_flags); - if(fd == -1) { - if (errno != ENOENT) - collector_error("Cannot open pid_from_cgroup() file '%s'.", filename); - return 0; - } - - FILE *fp = fdopen(fd, "r"); - if(!fp) { - collector_error("Cannot upgrade fd to fp for file '%s'.", filename); - return 0; - } - - char buffer[100 + 1]; - pid_t pid = 0; - char *s; - while((s = fgets(buffer, 100, fp))) { - buffer[100] = '\0'; - pid = atoi(s); - if(pid > 0) break; - } - - fclose(fp); - -#ifdef NETDATA_INTERNAL_CHECKS - if(pid > 0) collector_info("found pid %d on file '%s'", pid, filename); -#endif - - return pid; -} - -pid_t read_pid_from_cgroup_files(const char *path) { - char filename[FILENAME_MAX + 1]; - - snprintfz(filename, FILENAME_MAX, "%s/cgroup.procs", path); - pid_t pid = read_pid_from_cgroup_file(filename); - if(pid > 0) return pid; - - snprintfz(filename, FILENAME_MAX, "%s/tasks", path); - return read_pid_from_cgroup_file(filename); -} - -pid_t read_pid_from_cgroup(const char *path) { - pid_t pid = read_pid_from_cgroup_files(path); - if (pid > 0) return pid; - - DIR *dir = opendir(path); - if (!dir) { - collector_error("cannot read directory '%s'", path); - return 0; - } - - struct dirent *de = NULL; - while ((de = readdir(dir))) { - if (de->d_type == DT_DIR - && ( - (de->d_name[0] == '.' && de->d_name[1] == '\0') - || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') - )) - continue; - - if (de->d_type == DT_DIR) { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/%s", path, de->d_name); - pid = read_pid_from_cgroup(filename); - if(pid > 0) break; - } - } - closedir(dir); - return pid; -} - -// ---------------------------------------------------------------------------- -// send the result to netdata - -struct found_device { - const char *host_device; - const char *guest_device; - - uint32_t host_device_hash; - - struct found_device *next; -} *detected_devices = NULL; - -void add_device(const char *host, const char *guest) { -#ifdef NETDATA_INTERNAL_CHECKS - collector_info("adding device with host '%s', guest '%s'", host, guest); -#endif - - uint32_t hash = simple_hash(host); - - if(guest && (!*guest || strcmp(host, guest) == 0)) - guest = NULL; - - struct found_device *f; - for(f = detected_devices; f ; f = f->next) { - if(f->host_device_hash == hash && !strcmp(host, f->host_device)) { - - if(guest && (!f->guest_device || !strcmp(f->host_device, f->guest_device))) { - if(f->guest_device) freez((void *)f->guest_device); - f->guest_device = strdupz(guest); - } - - return; - } - } - - f = mallocz(sizeof(struct found_device)); - f->host_device = strdupz(host); - f->host_device_hash = hash; - f->guest_device = (guest)?strdupz(guest):NULL; - f->next = detected_devices; - detected_devices = f; -} - -int send_devices(void) { - int found = 0; - - struct found_device *f; - for(f = detected_devices; f ; f = f->next) { - found++; - printf("%s %s\n", f->host_device, (f->guest_device)?f->guest_device:f->host_device); - } - - return found; -} - -// ---------------------------------------------------------------------------- -// this function should be called only **ONCE** -// also it has to be the **LAST** to be called -// since it switches namespaces, so after this call, everything is different! - -void detect_veth_interfaces(pid_t pid) { - struct iface *cgroup = NULL; - struct iface *host, *h, *c; - - host = read_proc_net_dev("host", netdata_configured_host_prefix); - if(!host) { - errno = 0; - collector_error("cannot read host interface list."); - goto cleanup; - } - - if(!eligible_ifaces(host)) { - errno = 0; - collector_info("there are no double-linked host interfaces available."); - goto cleanup; - } - - if(switch_namespace(netdata_configured_host_prefix, pid)) { - errno = 0; - collector_error("cannot switch to the namespace of pid %u", (unsigned int) pid); - goto cleanup; - } - -#ifdef NETDATA_INTERNAL_CHECKS - collector_info("switched to namespaces of pid %d", pid); -#endif - - cgroup = read_proc_net_dev("cgroup", NULL); - if(!cgroup) { - errno = 0; - collector_error("cannot read cgroup interface list."); - goto cleanup; - } - - if(!eligible_ifaces(cgroup)) { - errno = 0; - collector_error("there are not double-linked cgroup interfaces available."); - goto cleanup; - } - - unsigned int host_dev_num = calc_num_ifaces(host); - unsigned int cgroup_dev_num = calc_num_ifaces(cgroup); - // host ifaces == guest ifaces => we are still in the host namespace - // and we can't really identify which ifaces belong to the cgroup (e.g. Proxmox VM). - if (host_dev_num == cgroup_dev_num) { - unsigned int m = 0; - for (h = host; h; h = h->next) { - for (c = cgroup; c; c = c->next) { - if (h->ifindex == c->ifindex && h->iflink == c->iflink) { - m++; - break; - } - } - } - if (host_dev_num == m) { - goto cleanup; - } - } - - for(h = host; h ; h = h->next) { - if(iface_is_eligible(h)) { - for (c = cgroup; c; c = c->next) { - if(iface_is_eligible(c) && h->ifindex == c->iflink && h->iflink == c->ifindex) { - add_device(h->device, c->device); - } - } - } - } - -cleanup: - free_host_ifaces(cgroup); - free_host_ifaces(host); -} - -// ---------------------------------------------------------------------------- -// call the external helper - -#define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048 -void call_the_helper(pid_t pid, const char *cgroup) { - if(setresuid(0, 0, 0) == -1) - collector_error("setresuid(0, 0, 0) failed."); - - char command[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; - if(cgroup) - snprintfz(command, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec " PLUGINS_DIR "/cgroup-network-helper.sh --cgroup '%s'", cgroup); - else - snprintfz(command, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec " PLUGINS_DIR "/cgroup-network-helper.sh --pid %d", pid); - - collector_info("running: %s", command); - - pid_t cgroup_pid; - FILE *fp_child_input, *fp_child_output; - - if(cgroup) { - (void)netdata_popen_raw_default_flags(&cgroup_pid, environment, &fp_child_input, &fp_child_output, PLUGINS_DIR "/cgroup-network-helper.sh", "--cgroup", cgroup); - } - else { - char buffer[100]; - snprintfz(buffer, sizeof(buffer) - 1, "%d", pid); - (void)netdata_popen_raw_default_flags(&cgroup_pid, environment, &fp_child_input, &fp_child_output, PLUGINS_DIR "/cgroup-network-helper.sh", "--pid", buffer); - } - - if(fp_child_output) { - char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; - char *s; - while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) { - trim(s); - - if(*s && *s != '\n') { - char *t = s; - while(*t && *t != ' ') t++; - if(*t == ' ') { - *t = '\0'; - t++; - } - - if(!*s || !*t) continue; - add_device(s, t); - } - } - - netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); - } - else - collector_error("cannot execute cgroup-network helper script: %s", command); -} - -int is_valid_path_symbol(char c) { - switch(c) { - case '/': // path separators - case '\\': // needed for virsh domains \x2d1\x2dname - case ' ': // space - case '-': // hyphen - case '_': // underscore - case '.': // dot - case ',': // comma - return 1; - - default: - return 0; - } -} - -// we will pass this path a shell script running as root -// so, we need to make sure the path will be valid -// and will not include anything that could allow -// the caller use shell expansion for gaining escalated -// privileges. -int verify_path(const char *path) { - struct stat sb; - - char c; - const char *s = path; - while((c = *s++)) { - if(!( isalnum(c) || is_valid_path_symbol(c) )) { - collector_error("invalid character in path '%s'", path); - return -1; - } - } - - if(strstr(path, "\\") && !strstr(path, "\\x")) { - collector_error("invalid escape sequence in path '%s'", path); - return 1; - } - - if(strstr(path, "/../")) { - collector_error("invalid parent path sequence detected in '%s'", path); - return 1; - } - - if(path[0] != '/') { - collector_error("only absolute path names are supported - invalid path '%s'", path); - return -1; - } - - if (stat(path, &sb) == -1) { - collector_error("cannot stat() path '%s'", path); - return -1; - } - - if((sb.st_mode & S_IFMT) != S_IFDIR) { - collector_error("path '%s' is not a directory", path); - return -1; - } - - return 0; -} - -/* -char *fix_path_variable(void) { - const char *path = getenv("PATH"); - if(!path || !*path) return 0; - - char *p = strdupz(path); - char *safe_path = callocz(1, strlen(p) + strlen("PATH=") + 1); - strcpy(safe_path, "PATH="); - - int added = 0; - char *ptr = p; - while(ptr && *ptr) { - char *s = strsep(&ptr, ":"); - if(s && *s) { - if(verify_path(s) == -1) { - collector_error("the PATH variable includes an invalid path '%s' - removed it.", s); - } - else { - collector_info("the PATH variable includes a valid path '%s'.", s); - if(added) strcat(safe_path, ":"); - strcat(safe_path, s); - added++; - } - } - } - - collector_info("unsafe PATH: '%s'.", path); - collector_info(" safe PATH: '%s'.", safe_path); - - freez(p); - return safe_path; -} -*/ - -// ---------------------------------------------------------------------------- -// main - -void usage(void) { - fprintf(stderr, "%s [ -p PID | --pid PID | --cgroup /path/to/cgroup ]\n", program_name); - exit(1); -} - -int main(int argc, char **argv) { - pid_t pid = 0; - - program_version = VERSION; - clocks_init(); - nd_log_initialize_for_external_plugins("cgroup-network"); - - // since cgroup-network runs as root, prevent it from opening symbolic links - procfile_open_flags = O_RDONLY|O_NOFOLLOW; - - // ------------------------------------------------------------------------ - // make sure NETDATA_HOST_PREFIX is safe - - netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); - if(verify_netdata_host_prefix(false) == -1) exit(1); - - if(netdata_configured_host_prefix[0] != '\0' && verify_path(netdata_configured_host_prefix) == -1) - fatal("invalid NETDATA_HOST_PREFIX '%s'", netdata_configured_host_prefix); - - // ------------------------------------------------------------------------ - // build a safe environment for our script - - // the first environment variable is a fixed PATH= - snprintfz(env_netdata_host_prefix, sizeof(env_netdata_host_prefix) - 1, "NETDATA_HOST_PREFIX=%s", netdata_configured_host_prefix); - - char *s; - - s = getenv("NETDATA_LOG_METHOD"); - snprintfz(env_netdata_log_method, sizeof(env_netdata_log_method) - 1, "NETDATA_LOG_METHOD=%s", nd_log_method_for_external_plugins(s)); - - s = getenv("NETDATA_LOG_FORMAT"); - if (s) - snprintfz(env_netdata_log_format, sizeof(env_netdata_log_format) - 1, "NETDATA_LOG_FORMAT=%s", s); - - s = getenv("NETDATA_LOG_LEVEL"); - if (s) - snprintfz(env_netdata_log_level, sizeof(env_netdata_log_level) - 1, "NETDATA_LOG_LEVEL=%s", s); - - // ------------------------------------------------------------------------ - - if(argc == 2 && (!strcmp(argv[1], "version") || !strcmp(argv[1], "-version") || !strcmp(argv[1], "--version") || !strcmp(argv[1], "-v") || !strcmp(argv[1], "-V"))) { - fprintf(stderr, "cgroup-network %s\n", VERSION); - exit(0); - } - - if(argc != 3) - usage(); - - int arg = 1; - int helper = 1; - if (getenv("KUBERNETES_SERVICE_HOST") != NULL && getenv("KUBERNETES_SERVICE_PORT") != NULL) - helper = 0; - - if(!strcmp(argv[arg], "-p") || !strcmp(argv[arg], "--pid")) { - pid = atoi(argv[arg+1]); - - if(pid <= 0) { - errno = 0; - collector_error("Invalid pid %d given", (int) pid); - return 2; - } - - if(helper) call_the_helper(pid, NULL); - } - else if(!strcmp(argv[arg], "--cgroup")) { - char *cgroup = argv[arg+1]; - if(verify_path(cgroup) == -1) { - collector_error("cgroup '%s' does not exist or is not valid.", cgroup); - return 1; - } - - pid = read_pid_from_cgroup(cgroup); - if(helper) call_the_helper(pid, cgroup); - - if(pid <= 0 && !detected_devices) { - errno = 0; - collector_error("Cannot find a cgroup PID from cgroup '%s'", cgroup); - } - } - else - usage(); - - if(pid > 0) - detect_veth_interfaces(pid); - - int found = send_devices(); - if(found <= 0) return 1; - return 0; -} diff --git a/collectors/cgroups.plugin/cgroup-top.c b/collectors/cgroups.plugin/cgroup-top.c deleted file mode 100644 index 8d44d3b56..000000000 --- a/collectors/cgroups.plugin/cgroup-top.c +++ /dev/null @@ -1,520 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "cgroup-internals.h" - -struct cgroup_netdev_link { - size_t read_slot; - NETDATA_DOUBLE received[2]; - NETDATA_DOUBLE sent[2]; -}; - -static DICTIONARY *cgroup_netdev_link_dict = NULL; - -void cgroup_netdev_link_init(void) { - cgroup_netdev_link_dict = dictionary_create_advanced(DICT_OPTION_FIXED_SIZE|DICT_OPTION_DONT_OVERWRITE_VALUE, NULL, sizeof(struct cgroup_netdev_link)); -} - -const DICTIONARY_ITEM *cgroup_netdev_get(struct cgroup *cg) { - if(!cg->cgroup_netdev_link) { - struct cgroup_netdev_link t = { - .read_slot = 0, - .received = {NAN, NAN}, - .sent = {NAN, NAN}, - }; - - cg->cgroup_netdev_link = - dictionary_set_and_acquire_item(cgroup_netdev_link_dict, cg->id, &t, sizeof(struct cgroup_netdev_link)); - } - - return dictionary_acquired_item_dup(cgroup_netdev_link_dict, cg->cgroup_netdev_link); -} - -void cgroup_netdev_delete(struct cgroup *cg) { - if(cg->cgroup_netdev_link) { - dictionary_acquired_item_release(cgroup_netdev_link_dict, cg->cgroup_netdev_link); - dictionary_del(cgroup_netdev_link_dict, cg->id); - dictionary_garbage_collect(cgroup_netdev_link_dict); - cg->cgroup_netdev_link = NULL; - } -} - -void cgroup_netdev_release(const DICTIONARY_ITEM *link) { - if(link) - dictionary_acquired_item_release(cgroup_netdev_link_dict, link); -} - -const void *cgroup_netdev_dup(const DICTIONARY_ITEM *link) { - return dictionary_acquired_item_dup(cgroup_netdev_link_dict, link); -} - -void cgroup_netdev_reset_all(void) { - struct cgroup_netdev_link *t; - dfe_start_read(cgroup_netdev_link_dict, t) { - if(t->read_slot >= 1) { - t->read_slot = 0; - t->received[1] = NAN; - t->sent[1] = NAN; - } - else { - t->read_slot = 1; - t->received[0] = NAN; - t->sent[0] = NAN; - } - } - dfe_done(t); -} - -void cgroup_netdev_add_bandwidth(const DICTIONARY_ITEM *link, NETDATA_DOUBLE received, NETDATA_DOUBLE sent) { - if(!link) - return; - - struct cgroup_netdev_link *t = dictionary_acquired_item_value(link); - - size_t slot = (t->read_slot) ? 0 : 1; - - if(isnan(t->received[slot])) - t->received[slot] = received; - else - t->received[slot] += received; - - if(isnan(t->sent[slot])) - t->sent[slot] = sent; - else - t->sent[slot] += sent; -} - -void cgroup_netdev_get_bandwidth(struct cgroup *cg, NETDATA_DOUBLE *received, NETDATA_DOUBLE *sent) { - if(!cg->cgroup_netdev_link) { - *received = NAN; - *sent = NAN; - return; - } - - struct cgroup_netdev_link *t = dictionary_acquired_item_value(cg->cgroup_netdev_link); - - size_t slot = (t->read_slot) ? 1 : 0; - - *received = t->received[slot]; - *sent = t->sent[slot]; -} - -int cgroup_function_cgroup_top(BUFFER *wb, int timeout __maybe_unused, const char *function __maybe_unused, - void *collector_data __maybe_unused, - rrd_function_result_callback_t result_cb, void *result_cb_data, - rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data, - rrd_function_register_canceller_cb_t register_canceller_cb __maybe_unused, - void *register_canceller_cb_data __maybe_unused) { - - buffer_flush(wb); - wb->content_type = CT_APPLICATION_JSON; - buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); - - buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(localhost)); - buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); - buffer_json_member_add_string(wb, "type", "table"); - buffer_json_member_add_time_t(wb, "update_every", 1); - buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_CGTOP_HELP); - buffer_json_member_add_array(wb, "data"); - - double max_pids = 0.0; - double max_cpu = 0.0; - double max_ram = 0.0; - double max_disk_io_read = 0.0; - double max_disk_io_written = 0.0; - double max_net_received = 0.0; - double max_net_sent = 0.0; - - RRDDIM *rd = NULL; - - uv_mutex_lock(&cgroup_root_mutex); - - for(struct cgroup *cg = cgroup_root; cg ; cg = cg->next) { - if(unlikely(!cg->enabled || cg->pending_renames || !cg->function_ready || is_cgroup_systemd_service(cg))) - continue; - - buffer_json_add_array_item_array(wb); - - buffer_json_add_array_item_string(wb, cg->name); // Name - - if(k8s_is_kubepod(cg)) - buffer_json_add_array_item_string(wb, "k8s"); // Kind - else - buffer_json_add_array_item_string(wb, "cgroup"); // Kind - - double pids_current = rrddim_get_last_stored_value(cg->st_pids_rd_pids_current, &max_pids, 1.0); - - double cpu = NAN; - if (cg->st_cpu_rd_user && cg->st_cpu_rd_system) { - cpu = cg->st_cpu_rd_user->collector.last_stored_value + cg->st_cpu_rd_system->collector.last_stored_value; - max_cpu = MAX(max_cpu, cpu); - } - - double ram = rrddim_get_last_stored_value(cg->st_mem_rd_ram, &max_ram, 1.0); - - rd = cg->st_throttle_io_rd_read ? cg->st_throttle_io_rd_read : cg->st_io_rd_read; - double disk_io_read = rrddim_get_last_stored_value(rd, &max_disk_io_read, 1024.0); - rd = cg->st_throttle_io_rd_written ? cg->st_throttle_io_rd_written : cg->st_io_rd_written; - double disk_io_written = rrddim_get_last_stored_value(rd, &max_disk_io_written, 1024.0); - - NETDATA_DOUBLE received, sent; - cgroup_netdev_get_bandwidth(cg, &received, &sent); - if (!isnan(received) && !isnan(sent)) { - received /= 1000.0; - sent /= 1000.0; - max_net_received = MAX(max_net_received, received); - max_net_sent = MAX(max_net_sent, sent); - } - - buffer_json_add_array_item_double(wb, pids_current); - buffer_json_add_array_item_double(wb, cpu); - buffer_json_add_array_item_double(wb, ram); - buffer_json_add_array_item_double(wb, disk_io_read); - buffer_json_add_array_item_double(wb, disk_io_written); - buffer_json_add_array_item_double(wb, received); - buffer_json_add_array_item_double(wb, sent); - - buffer_json_array_close(wb); - } - - uv_mutex_unlock(&cgroup_root_mutex); - - buffer_json_array_close(wb); // data - buffer_json_member_add_object(wb, "columns"); - { - size_t field_id = 0; - - // Node - buffer_rrdf_table_add_field(wb, field_id++, "Name", "CGROUP Name", - RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, - 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, - RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, - RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_FULL_WIDTH, - NULL); - - // Kind - buffer_rrdf_table_add_field(wb, field_id++, "Kind", "CGROUP Kind", - RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, - 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, - RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, - RRDF_FIELD_OPTS_VISIBLE, - NULL); - - // PIDs - buffer_rrdf_table_add_field(wb, field_id++, "PIDs", "Number of Processes Currently in the CGROUP", - RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, - 0, "pids", max_pids, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, - RRDF_FIELD_OPTS_VISIBLE, - NULL); - - // CPU - buffer_rrdf_table_add_field(wb, field_id++, "CPU", "CPU Usage", - RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, - 2, "%", max_cpu, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, - RRDF_FIELD_OPTS_VISIBLE, - NULL); - - // RAM - buffer_rrdf_table_add_field(wb, field_id++, "RAM", "RAM Usage", - RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, - 2, "MiB", max_ram, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, - RRDF_FIELD_OPTS_VISIBLE, - NULL); - - // Disk IO Reads - buffer_rrdf_table_add_field(wb, field_id++, "Reads", "Disk Read Data", - RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, - 2, "MiB", max_disk_io_read, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, - RRDF_FIELD_OPTS_VISIBLE, - NULL); - - // Disk IO Writes - buffer_rrdf_table_add_field(wb, field_id++, "Writes", "Disk Written Data", - RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, - 2, "MiB", max_disk_io_written, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, - RRDF_FIELD_OPTS_VISIBLE, - NULL); - - // Network Received - buffer_rrdf_table_add_field(wb, field_id++, "Received", "Network Traffic Received", - RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, - 2, "Mbps", max_net_received, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, - RRDF_FIELD_OPTS_VISIBLE, - NULL); - - // Network Sent - buffer_rrdf_table_add_field(wb, field_id++, "Sent", "Network Traffic Sent ", - RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, - 2, "Mbps", max_net_sent, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, - RRDF_FIELD_OPTS_VISIBLE, - NULL); - } - buffer_json_object_close(wb); // columns - buffer_json_member_add_string(wb, "default_sort_column", "CPU"); - - buffer_json_member_add_object(wb, "charts"); - { - buffer_json_member_add_object(wb, "CPU"); - { - buffer_json_member_add_string(wb, "name", "CPU"); - buffer_json_member_add_string(wb, "type", "stacked-bar"); - buffer_json_member_add_array(wb, "columns"); - { - buffer_json_add_array_item_string(wb, "CPU"); - } - buffer_json_array_close(wb); - } - buffer_json_object_close(wb); - - buffer_json_member_add_object(wb, "Memory"); - { - buffer_json_member_add_string(wb, "name", "Memory"); - buffer_json_member_add_string(wb, "type", "stacked-bar"); - buffer_json_member_add_array(wb, "columns"); - { - buffer_json_add_array_item_string(wb, "RAM"); - } - buffer_json_array_close(wb); - } - buffer_json_object_close(wb); - - buffer_json_member_add_object(wb, "Traffic"); - { - buffer_json_member_add_string(wb, "name", "Traffic"); - buffer_json_member_add_string(wb, "type", "stacked-bar"); - buffer_json_member_add_array(wb, "columns"); - { - buffer_json_add_array_item_string(wb, "Received"); - buffer_json_add_array_item_string(wb, "Sent"); - } - buffer_json_array_close(wb); - } - buffer_json_object_close(wb); - } - buffer_json_object_close(wb); // charts - - buffer_json_member_add_array(wb, "default_charts"); - { - buffer_json_add_array_item_array(wb); - buffer_json_add_array_item_string(wb, "CPU"); - buffer_json_add_array_item_string(wb, "Name"); - buffer_json_array_close(wb); - - buffer_json_add_array_item_array(wb); - buffer_json_add_array_item_string(wb, "Memory"); - buffer_json_add_array_item_string(wb, "Name"); - buffer_json_array_close(wb); - } - buffer_json_array_close(wb); - - buffer_json_member_add_object(wb, "group_by"); - { - buffer_json_member_add_object(wb, "Kind"); - { - buffer_json_member_add_string(wb, "name", "Kind"); - buffer_json_member_add_array(wb, "columns"); - { - buffer_json_add_array_item_string(wb, "Kind"); - } - buffer_json_array_close(wb); - } - buffer_json_object_close(wb); - } - buffer_json_object_close(wb); // group_by - - buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1); - buffer_json_finalize(wb); - - int response = HTTP_RESP_OK; - if(is_cancelled_cb && is_cancelled_cb(is_cancelled_cb_data)) { - buffer_flush(wb); - response = HTTP_RESP_CLIENT_CLOSED_REQUEST; - } - - if(result_cb) - result_cb(wb, response, result_cb_data); - - return response; -} - -int cgroup_function_systemd_top(BUFFER *wb, int timeout __maybe_unused, const char *function __maybe_unused, - void *collector_data __maybe_unused, - rrd_function_result_callback_t result_cb, void *result_cb_data, - rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data, - rrd_function_register_canceller_cb_t register_canceller_cb __maybe_unused, - void *register_canceller_cb_data __maybe_unused) { - - buffer_flush(wb); - wb->content_type = CT_APPLICATION_JSON; - buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); - - buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(localhost)); - buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); - buffer_json_member_add_string(wb, "type", "table"); - buffer_json_member_add_time_t(wb, "update_every", 1); - buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_CGTOP_HELP); - buffer_json_member_add_array(wb, "data"); - - double max_pids = 0.0; - double max_cpu = 0.0; - double max_ram = 0.0; - double max_disk_io_read = 0.0; - double max_disk_io_written = 0.0; - - RRDDIM *rd = NULL; - - uv_mutex_lock(&cgroup_root_mutex); - - for(struct cgroup *cg = cgroup_root; cg ; cg = cg->next) { - if(unlikely(!cg->enabled || cg->pending_renames || !cg->function_ready || !is_cgroup_systemd_service(cg))) - continue; - - buffer_json_add_array_item_array(wb); - - buffer_json_add_array_item_string(wb, cg->name); - - double pids_current = rrddim_get_last_stored_value(cg->st_pids_rd_pids_current, &max_pids, 1.0); - - double cpu = NAN; - if (cg->st_cpu_rd_user && cg->st_cpu_rd_system) { - cpu = cg->st_cpu_rd_user->collector.last_stored_value + cg->st_cpu_rd_system->collector.last_stored_value; - max_cpu = MAX(max_cpu, cpu); - } - - double ram = rrddim_get_last_stored_value(cg->st_mem_rd_ram, &max_ram, 1.0); - - rd = cg->st_throttle_io_rd_read ? cg->st_throttle_io_rd_read : cg->st_io_rd_read; - double disk_io_read = rrddim_get_last_stored_value(rd, &max_disk_io_read, 1024.0); - rd = cg->st_throttle_io_rd_written ? cg->st_throttle_io_rd_written : cg->st_io_rd_written; - double disk_io_written = rrddim_get_last_stored_value(rd, &max_disk_io_written, 1024.0); - - buffer_json_add_array_item_double(wb, pids_current); - buffer_json_add_array_item_double(wb, cpu); - buffer_json_add_array_item_double(wb, ram); - buffer_json_add_array_item_double(wb, disk_io_read); - buffer_json_add_array_item_double(wb, disk_io_written); - - buffer_json_array_close(wb); - } - - uv_mutex_unlock(&cgroup_root_mutex); - - buffer_json_array_close(wb); // data - buffer_json_member_add_object(wb, "columns"); - { - size_t field_id = 0; - - // Node - buffer_rrdf_table_add_field(wb, field_id++, "Name", "Systemd Service Name", - RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, - 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, - RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, - RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_FULL_WIDTH, - NULL); - - // PIDs - buffer_rrdf_table_add_field(wb, field_id++, "PIDs", "Number of Processes Currently in the CGROUP", - RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, - 0, "pids", max_pids, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, - RRDF_FIELD_OPTS_VISIBLE, - NULL); - - // CPU - buffer_rrdf_table_add_field(wb, field_id++, "CPU", "CPU Usage", - RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, - 2, "%", max_cpu, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, - RRDF_FIELD_OPTS_VISIBLE, - NULL); - - // RAM - buffer_rrdf_table_add_field(wb, field_id++, "RAM", "RAM Usage", - RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, - 2, "MiB", max_ram, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, - RRDF_FIELD_OPTS_VISIBLE, - NULL); - - // Disk IO Reads - buffer_rrdf_table_add_field(wb, field_id++, "Reads", "Disk Read Data", - RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, - 2, "MiB", max_disk_io_read, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, - RRDF_FIELD_OPTS_VISIBLE, - NULL); - - // Disk IO Writes - buffer_rrdf_table_add_field(wb, field_id++, "Writes", "Disk Written Data", - RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, - 2, "MiB", max_disk_io_written, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, - RRDF_FIELD_OPTS_VISIBLE, - NULL); - } - - buffer_json_object_close(wb); // columns - buffer_json_member_add_string(wb, "default_sort_column", "CPU"); - - buffer_json_member_add_object(wb, "charts"); - { - buffer_json_member_add_object(wb, "CPU"); - { - buffer_json_member_add_string(wb, "name", "CPU"); - buffer_json_member_add_string(wb, "type", "stacked-bar"); - buffer_json_member_add_array(wb, "columns"); - { - buffer_json_add_array_item_string(wb, "CPU"); - } - buffer_json_array_close(wb); - } - buffer_json_object_close(wb); - - buffer_json_member_add_object(wb, "Memory"); - { - buffer_json_member_add_string(wb, "name", "Memory"); - buffer_json_member_add_string(wb, "type", "stacked-bar"); - buffer_json_member_add_array(wb, "columns"); - { - buffer_json_add_array_item_string(wb, "RAM"); - } - buffer_json_array_close(wb); - } - buffer_json_object_close(wb); - } - buffer_json_object_close(wb); // charts - - buffer_json_member_add_array(wb, "default_charts"); - { - buffer_json_add_array_item_array(wb); - buffer_json_add_array_item_string(wb, "CPU"); - buffer_json_add_array_item_string(wb, "Name"); - buffer_json_array_close(wb); - - buffer_json_add_array_item_array(wb); - buffer_json_add_array_item_string(wb, "Memory"); - buffer_json_add_array_item_string(wb, "Name"); - buffer_json_array_close(wb); - } - buffer_json_array_close(wb); - - buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1); - buffer_json_finalize(wb); - - int response = HTTP_RESP_OK; - if(is_cancelled_cb && is_cancelled_cb(is_cancelled_cb_data)) { - buffer_flush(wb); - response = HTTP_RESP_CLIENT_CLOSED_REQUEST; - } - - if(result_cb) - result_cb(wb, response, result_cb_data); - - return response; -} diff --git a/collectors/cgroups.plugin/integrations/containers.md b/collectors/cgroups.plugin/integrations/containers.md deleted file mode 100644 index 6273d1e91..000000000 --- a/collectors/cgroups.plugin/integrations/containers.md +++ /dev/null @@ -1,169 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/integrations/containers.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/metadata.yaml" -sidebar_label: "Containers" -learn_status: "Published" -learn_rel_path: "Data Collection/Containers and VMs" -most_popular: True -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Containers - - -<img src="https://netdata.cloud/img/container.svg" width="150"/> - - -Plugin: cgroups.plugin -Module: /sys/fs/cgroup - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Monitor Containers for performance, resource usage, and health status. - - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -This integration doesn't support auto-detection. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per cgroup - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| container_name | The container name or group path if name resolution fails. | -| image | Docker/Podman container image name. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| cgroup.cpu_limit | used | percentage | -| cgroup.cpu | user, system | percentage | -| cgroup.cpu_per_core | a dimension per core | percentage | -| cgroup.throttled | throttled | percentage | -| cgroup.throttled_duration | duration | ms | -| cgroup.cpu_shares | shares | shares | -| cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB | -| cgroup.writeback | dirty, writeback | MiB | -| cgroup.mem_activity | in, out | MiB/s | -| cgroup.pgfaults | pgfault, swap | MiB/s | -| cgroup.mem_usage | ram, swap | MiB | -| cgroup.mem_usage_limit | available, used | MiB | -| cgroup.mem_utilization | utilization | percentage | -| cgroup.mem_failcnt | failures | count | -| cgroup.io | read, write | KiB/s | -| cgroup.serviced_ops | read, write | operations/s | -| cgroup.throttle_io | read, write | KiB/s | -| cgroup.throttle_serviced_ops | read, write | operations/s | -| cgroup.queued_ops | read, write | operations | -| cgroup.merged_ops | read, write | operations/s | -| cgroup.cpu_some_pressure | some10, some60, some300 | percentage | -| cgroup.cpu_some_pressure_stall_time | time | ms | -| cgroup.cpu_full_pressure | some10, some60, some300 | percentage | -| cgroup.cpu_full_pressure_stall_time | time | ms | -| cgroup.memory_some_pressure | some10, some60, some300 | percentage | -| cgroup.memory_some_pressure_stall_time | time | ms | -| cgroup.memory_full_pressure | some10, some60, some300 | percentage | -| cgroup.memory_full_pressure_stall_time | time | ms | -| cgroup.io_some_pressure | some10, some60, some300 | percentage | -| cgroup.io_some_pressure_stall_time | time | ms | -| cgroup.io_full_pressure | some10, some60, some300 | percentage | -| cgroup.io_full_pressure_stall_time | time | ms | -| cgroup.pids_current | pids | pids | - -### Per cgroup network device - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| container_name | The container name or group path if name resolution fails. | -| image | Docker/Podman container image name. | -| device | The name of the host network interface linked to the container's network interface. | -| container_device | Container network interface name. | -| interface_type | Network interface type. Always "virtual" for the containers. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| cgroup.net_net | received, sent | kilobits/s | -| cgroup.net_packets | received, sent, multicast | pps | -| cgroup.net_errors | inbound, outbound | errors/s | -| cgroup.net_drops | inbound, outbound | errors/s | -| cgroup.net_fifo | receive, transmit | errors/s | -| cgroup.net_compressed | receive, sent | pps | -| cgroup.net_events | frames, collisions, carrier | events/s | -| cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state | -| cgroup.net_carrier | up, down | state | -| cgroup.net_mtu | mtu | octets | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes | -| [ cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.mem_usage | cgroup memory utilization | -| [ cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute | -| [ cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute | - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -There is no configuration file. -#### Options - - - -There are no configuration options. - -#### Examples -There are no configuration examples. - - diff --git a/collectors/cgroups.plugin/integrations/kubernetes_containers.md b/collectors/cgroups.plugin/integrations/kubernetes_containers.md deleted file mode 100644 index 9be32a12a..000000000 --- a/collectors/cgroups.plugin/integrations/kubernetes_containers.md +++ /dev/null @@ -1,183 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/integrations/kubernetes_containers.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/metadata.yaml" -sidebar_label: "Kubernetes Containers" -learn_status: "Published" -learn_rel_path: "Data Collection/Kubernetes" -most_popular: True -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Kubernetes Containers - - -<img src="https://netdata.cloud/img/kubernetes.svg" width="150"/> - - -Plugin: cgroups.plugin -Module: /sys/fs/cgroup - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Monitor Containers for performance, resource usage, and health status. - - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -This integration doesn't support auto-detection. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per k8s cgroup - -These metrics refer to the Pod container. - -Labels: - -| Label | Description | -|:-----------|:----------------| -| k8s_node_name | Node name. The value of _pod.spec.nodeName_. | -| k8s_namespace | Namespace name. The value of _pod.metadata.namespace_. | -| k8s_controller_kind | Controller kind (ReplicaSet, DaemonSet, StatefulSet, Job, etc.). The value of _pod.OwnerReferences.Controller.Kind_. | -| k8s_controller_name | Controller name.The value of _pod.OwnerReferences.Controller.Name_. | -| k8s_pod_name | Pod name. The value of _pod.metadata.name_. | -| k8s_container_name | Container name. The value of _pod.spec.containers.name_. | -| k8s_kind | Instance kind: "pod" or "container". | -| k8s_qos_class | QoS class (guaranteed, burstable, besteffort). | -| k8s_cluster_id | Cluster ID. The value of kube-system namespace _namespace.metadata.uid_. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| k8s.cgroup.cpu_limit | used | percentage | -| k8s.cgroup.cpu | user, system | percentage | -| k8s.cgroup.cpu_per_core | a dimension per core | percentage | -| k8s.cgroup.throttled | throttled | percentage | -| k8s.cgroup.throttled_duration | duration | ms | -| k8s.cgroup.cpu_shares | shares | shares | -| k8s.cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB | -| k8s.cgroup.writeback | dirty, writeback | MiB | -| k8s.cgroup.mem_activity | in, out | MiB/s | -| k8s.cgroup.pgfaults | pgfault, swap | MiB/s | -| k8s.cgroup.mem_usage | ram, swap | MiB | -| k8s.cgroup.mem_usage_limit | available, used | MiB | -| k8s.cgroup.mem_utilization | utilization | percentage | -| k8s.cgroup.mem_failcnt | failures | count | -| k8s.cgroup.io | read, write | KiB/s | -| k8s.cgroup.serviced_ops | read, write | operations/s | -| k8s.cgroup.throttle_io | read, write | KiB/s | -| k8s.cgroup.throttle_serviced_ops | read, write | operations/s | -| k8s.cgroup.queued_ops | read, write | operations | -| k8s.cgroup.merged_ops | read, write | operations/s | -| k8s.cgroup.cpu_some_pressure | some10, some60, some300 | percentage | -| k8s.cgroup.cpu_some_pressure_stall_time | time | ms | -| k8s.cgroup.cpu_full_pressure | some10, some60, some300 | percentage | -| k8s.cgroup.cpu_full_pressure_stall_time | time | ms | -| k8s.cgroup.memory_some_pressure | some10, some60, some300 | percentage | -| k8s.cgroup.memory_some_pressure_stall_time | time | ms | -| k8s.cgroup.memory_full_pressure | some10, some60, some300 | percentage | -| k8s.cgroup.memory_full_pressure_stall_time | time | ms | -| k8s.cgroup.io_some_pressure | some10, some60, some300 | percentage | -| k8s.cgroup.io_some_pressure_stall_time | time | ms | -| k8s.cgroup.io_full_pressure | some10, some60, some300 | percentage | -| k8s.cgroup.io_full_pressure_stall_time | time | ms | -| k8s.cgroup.pids_current | pids | pids | - -### Per k8s cgroup network device - -These metrics refer to the Pod container network interface. - -Labels: - -| Label | Description | -|:-----------|:----------------| -| device | The name of the host network interface linked to the container's network interface. | -| container_device | Container network interface name. | -| interface_type | Network interface type. Always "virtual" for the containers. | -| k8s_node_name | Node name. The value of _pod.spec.nodeName_. | -| k8s_namespace | Namespace name. The value of _pod.metadata.namespace_. | -| k8s_controller_kind | Controller kind (ReplicaSet, DaemonSet, StatefulSet, Job, etc.). The value of _pod.OwnerReferences.Controller.Kind_. | -| k8s_controller_name | Controller name.The value of _pod.OwnerReferences.Controller.Name_. | -| k8s_pod_name | Pod name. The value of _pod.metadata.name_. | -| k8s_container_name | Container name. The value of _pod.spec.containers.name_. | -| k8s_kind | Instance kind: "pod" or "container". | -| k8s_qos_class | QoS class (guaranteed, burstable, besteffort). | -| k8s_cluster_id | Cluster ID. The value of kube-system namespace _namespace.metadata.uid_. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| k8s.cgroup.net_net | received, sent | kilobits/s | -| k8s.cgroup.net_packets | received, sent, multicast | pps | -| k8s.cgroup.net_errors | inbound, outbound | errors/s | -| k8s.cgroup.net_drops | inbound, outbound | errors/s | -| k8s.cgroup.net_fifo | receive, transmit | errors/s | -| k8s.cgroup.net_compressed | receive, sent | pps | -| k8s.cgroup.net_events | frames, collisions, carrier | events/s | -| k8s.cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state | -| k8s.cgroup.net_carrier | up, down | state | -| k8s.cgroup.net_mtu | mtu | octets | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ k8s_cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | k8s.cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes | -| [ k8s_cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | k8s.cgroup.mem_usage | cgroup memory utilization | -| [ k8s_cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | k8s.cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute | -| [ k8s_cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | k8s.cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute | - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -There is no configuration file. -#### Options - - - -There are no configuration options. - -#### Examples -There are no configuration examples. - - diff --git a/collectors/cgroups.plugin/integrations/libvirt_containers.md b/collectors/cgroups.plugin/integrations/libvirt_containers.md deleted file mode 100644 index fed454698..000000000 --- a/collectors/cgroups.plugin/integrations/libvirt_containers.md +++ /dev/null @@ -1,169 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/integrations/libvirt_containers.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/metadata.yaml" -sidebar_label: "Libvirt Containers" -learn_status: "Published" -learn_rel_path: "Data Collection/Containers and VMs" -most_popular: True -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Libvirt Containers - - -<img src="https://netdata.cloud/img/libvirt.png" width="150"/> - - -Plugin: cgroups.plugin -Module: /sys/fs/cgroup - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Monitor Libvirt for performance, resource usage, and health status. - - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -This integration doesn't support auto-detection. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per cgroup - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| container_name | The container name or group path if name resolution fails. | -| image | Docker/Podman container image name. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| cgroup.cpu_limit | used | percentage | -| cgroup.cpu | user, system | percentage | -| cgroup.cpu_per_core | a dimension per core | percentage | -| cgroup.throttled | throttled | percentage | -| cgroup.throttled_duration | duration | ms | -| cgroup.cpu_shares | shares | shares | -| cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB | -| cgroup.writeback | dirty, writeback | MiB | -| cgroup.mem_activity | in, out | MiB/s | -| cgroup.pgfaults | pgfault, swap | MiB/s | -| cgroup.mem_usage | ram, swap | MiB | -| cgroup.mem_usage_limit | available, used | MiB | -| cgroup.mem_utilization | utilization | percentage | -| cgroup.mem_failcnt | failures | count | -| cgroup.io | read, write | KiB/s | -| cgroup.serviced_ops | read, write | operations/s | -| cgroup.throttle_io | read, write | KiB/s | -| cgroup.throttle_serviced_ops | read, write | operations/s | -| cgroup.queued_ops | read, write | operations | -| cgroup.merged_ops | read, write | operations/s | -| cgroup.cpu_some_pressure | some10, some60, some300 | percentage | -| cgroup.cpu_some_pressure_stall_time | time | ms | -| cgroup.cpu_full_pressure | some10, some60, some300 | percentage | -| cgroup.cpu_full_pressure_stall_time | time | ms | -| cgroup.memory_some_pressure | some10, some60, some300 | percentage | -| cgroup.memory_some_pressure_stall_time | time | ms | -| cgroup.memory_full_pressure | some10, some60, some300 | percentage | -| cgroup.memory_full_pressure_stall_time | time | ms | -| cgroup.io_some_pressure | some10, some60, some300 | percentage | -| cgroup.io_some_pressure_stall_time | time | ms | -| cgroup.io_full_pressure | some10, some60, some300 | percentage | -| cgroup.io_full_pressure_stall_time | time | ms | -| cgroup.pids_current | pids | pids | - -### Per cgroup network device - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| container_name | The container name or group path if name resolution fails. | -| image | Docker/Podman container image name. | -| device | The name of the host network interface linked to the container's network interface. | -| container_device | Container network interface name. | -| interface_type | Network interface type. Always "virtual" for the containers. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| cgroup.net_net | received, sent | kilobits/s | -| cgroup.net_packets | received, sent, multicast | pps | -| cgroup.net_errors | inbound, outbound | errors/s | -| cgroup.net_drops | inbound, outbound | errors/s | -| cgroup.net_fifo | receive, transmit | errors/s | -| cgroup.net_compressed | receive, sent | pps | -| cgroup.net_events | frames, collisions, carrier | events/s | -| cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state | -| cgroup.net_carrier | up, down | state | -| cgroup.net_mtu | mtu | octets | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes | -| [ cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.mem_usage | cgroup memory utilization | -| [ cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute | -| [ cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute | - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -There is no configuration file. -#### Options - - - -There are no configuration options. - -#### Examples -There are no configuration examples. - - diff --git a/collectors/cgroups.plugin/integrations/lxc_containers.md b/collectors/cgroups.plugin/integrations/lxc_containers.md deleted file mode 100644 index 3f05ffd5f..000000000 --- a/collectors/cgroups.plugin/integrations/lxc_containers.md +++ /dev/null @@ -1,169 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/integrations/lxc_containers.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/metadata.yaml" -sidebar_label: "LXC Containers" -learn_status: "Published" -learn_rel_path: "Data Collection/Containers and VMs" -most_popular: True -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# LXC Containers - - -<img src="https://netdata.cloud/img/lxc.png" width="150"/> - - -Plugin: cgroups.plugin -Module: /sys/fs/cgroup - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Monitor LXC Containers for performance, resource usage, and health status. - - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -This integration doesn't support auto-detection. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per cgroup - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| container_name | The container name or group path if name resolution fails. | -| image | Docker/Podman container image name. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| cgroup.cpu_limit | used | percentage | -| cgroup.cpu | user, system | percentage | -| cgroup.cpu_per_core | a dimension per core | percentage | -| cgroup.throttled | throttled | percentage | -| cgroup.throttled_duration | duration | ms | -| cgroup.cpu_shares | shares | shares | -| cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB | -| cgroup.writeback | dirty, writeback | MiB | -| cgroup.mem_activity | in, out | MiB/s | -| cgroup.pgfaults | pgfault, swap | MiB/s | -| cgroup.mem_usage | ram, swap | MiB | -| cgroup.mem_usage_limit | available, used | MiB | -| cgroup.mem_utilization | utilization | percentage | -| cgroup.mem_failcnt | failures | count | -| cgroup.io | read, write | KiB/s | -| cgroup.serviced_ops | read, write | operations/s | -| cgroup.throttle_io | read, write | KiB/s | -| cgroup.throttle_serviced_ops | read, write | operations/s | -| cgroup.queued_ops | read, write | operations | -| cgroup.merged_ops | read, write | operations/s | -| cgroup.cpu_some_pressure | some10, some60, some300 | percentage | -| cgroup.cpu_some_pressure_stall_time | time | ms | -| cgroup.cpu_full_pressure | some10, some60, some300 | percentage | -| cgroup.cpu_full_pressure_stall_time | time | ms | -| cgroup.memory_some_pressure | some10, some60, some300 | percentage | -| cgroup.memory_some_pressure_stall_time | time | ms | -| cgroup.memory_full_pressure | some10, some60, some300 | percentage | -| cgroup.memory_full_pressure_stall_time | time | ms | -| cgroup.io_some_pressure | some10, some60, some300 | percentage | -| cgroup.io_some_pressure_stall_time | time | ms | -| cgroup.io_full_pressure | some10, some60, some300 | percentage | -| cgroup.io_full_pressure_stall_time | time | ms | -| cgroup.pids_current | pids | pids | - -### Per cgroup network device - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| container_name | The container name or group path if name resolution fails. | -| image | Docker/Podman container image name. | -| device | The name of the host network interface linked to the container's network interface. | -| container_device | Container network interface name. | -| interface_type | Network interface type. Always "virtual" for the containers. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| cgroup.net_net | received, sent | kilobits/s | -| cgroup.net_packets | received, sent, multicast | pps | -| cgroup.net_errors | inbound, outbound | errors/s | -| cgroup.net_drops | inbound, outbound | errors/s | -| cgroup.net_fifo | receive, transmit | errors/s | -| cgroup.net_compressed | receive, sent | pps | -| cgroup.net_events | frames, collisions, carrier | events/s | -| cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state | -| cgroup.net_carrier | up, down | state | -| cgroup.net_mtu | mtu | octets | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes | -| [ cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.mem_usage | cgroup memory utilization | -| [ cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute | -| [ cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute | - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -There is no configuration file. -#### Options - - - -There are no configuration options. - -#### Examples -There are no configuration examples. - - diff --git a/collectors/cgroups.plugin/integrations/ovirt_containers.md b/collectors/cgroups.plugin/integrations/ovirt_containers.md deleted file mode 100644 index 5771aeea1..000000000 --- a/collectors/cgroups.plugin/integrations/ovirt_containers.md +++ /dev/null @@ -1,169 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/integrations/ovirt_containers.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/metadata.yaml" -sidebar_label: "oVirt Containers" -learn_status: "Published" -learn_rel_path: "Data Collection/Containers and VMs" -most_popular: True -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# oVirt Containers - - -<img src="https://netdata.cloud/img/ovirt.svg" width="150"/> - - -Plugin: cgroups.plugin -Module: /sys/fs/cgroup - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Monitor oVirt for performance, resource usage, and health status. - - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -This integration doesn't support auto-detection. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per cgroup - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| container_name | The container name or group path if name resolution fails. | -| image | Docker/Podman container image name. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| cgroup.cpu_limit | used | percentage | -| cgroup.cpu | user, system | percentage | -| cgroup.cpu_per_core | a dimension per core | percentage | -| cgroup.throttled | throttled | percentage | -| cgroup.throttled_duration | duration | ms | -| cgroup.cpu_shares | shares | shares | -| cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB | -| cgroup.writeback | dirty, writeback | MiB | -| cgroup.mem_activity | in, out | MiB/s | -| cgroup.pgfaults | pgfault, swap | MiB/s | -| cgroup.mem_usage | ram, swap | MiB | -| cgroup.mem_usage_limit | available, used | MiB | -| cgroup.mem_utilization | utilization | percentage | -| cgroup.mem_failcnt | failures | count | -| cgroup.io | read, write | KiB/s | -| cgroup.serviced_ops | read, write | operations/s | -| cgroup.throttle_io | read, write | KiB/s | -| cgroup.throttle_serviced_ops | read, write | operations/s | -| cgroup.queued_ops | read, write | operations | -| cgroup.merged_ops | read, write | operations/s | -| cgroup.cpu_some_pressure | some10, some60, some300 | percentage | -| cgroup.cpu_some_pressure_stall_time | time | ms | -| cgroup.cpu_full_pressure | some10, some60, some300 | percentage | -| cgroup.cpu_full_pressure_stall_time | time | ms | -| cgroup.memory_some_pressure | some10, some60, some300 | percentage | -| cgroup.memory_some_pressure_stall_time | time | ms | -| cgroup.memory_full_pressure | some10, some60, some300 | percentage | -| cgroup.memory_full_pressure_stall_time | time | ms | -| cgroup.io_some_pressure | some10, some60, some300 | percentage | -| cgroup.io_some_pressure_stall_time | time | ms | -| cgroup.io_full_pressure | some10, some60, some300 | percentage | -| cgroup.io_full_pressure_stall_time | time | ms | -| cgroup.pids_current | pids | pids | - -### Per cgroup network device - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| container_name | The container name or group path if name resolution fails. | -| image | Docker/Podman container image name. | -| device | The name of the host network interface linked to the container's network interface. | -| container_device | Container network interface name. | -| interface_type | Network interface type. Always "virtual" for the containers. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| cgroup.net_net | received, sent | kilobits/s | -| cgroup.net_packets | received, sent, multicast | pps | -| cgroup.net_errors | inbound, outbound | errors/s | -| cgroup.net_drops | inbound, outbound | errors/s | -| cgroup.net_fifo | receive, transmit | errors/s | -| cgroup.net_compressed | receive, sent | pps | -| cgroup.net_events | frames, collisions, carrier | events/s | -| cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state | -| cgroup.net_carrier | up, down | state | -| cgroup.net_mtu | mtu | octets | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes | -| [ cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.mem_usage | cgroup memory utilization | -| [ cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute | -| [ cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute | - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -There is no configuration file. -#### Options - - - -There are no configuration options. - -#### Examples -There are no configuration examples. - - diff --git a/collectors/cgroups.plugin/integrations/proxmox_containers.md b/collectors/cgroups.plugin/integrations/proxmox_containers.md deleted file mode 100644 index 1804a40ca..000000000 --- a/collectors/cgroups.plugin/integrations/proxmox_containers.md +++ /dev/null @@ -1,169 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/integrations/proxmox_containers.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/metadata.yaml" -sidebar_label: "Proxmox Containers" -learn_status: "Published" -learn_rel_path: "Data Collection/Containers and VMs" -most_popular: True -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Proxmox Containers - - -<img src="https://netdata.cloud/img/proxmox.png" width="150"/> - - -Plugin: cgroups.plugin -Module: /sys/fs/cgroup - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Monitor Proxmox for performance, resource usage, and health status. - - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -This integration doesn't support auto-detection. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per cgroup - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| container_name | The container name or group path if name resolution fails. | -| image | Docker/Podman container image name. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| cgroup.cpu_limit | used | percentage | -| cgroup.cpu | user, system | percentage | -| cgroup.cpu_per_core | a dimension per core | percentage | -| cgroup.throttled | throttled | percentage | -| cgroup.throttled_duration | duration | ms | -| cgroup.cpu_shares | shares | shares | -| cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB | -| cgroup.writeback | dirty, writeback | MiB | -| cgroup.mem_activity | in, out | MiB/s | -| cgroup.pgfaults | pgfault, swap | MiB/s | -| cgroup.mem_usage | ram, swap | MiB | -| cgroup.mem_usage_limit | available, used | MiB | -| cgroup.mem_utilization | utilization | percentage | -| cgroup.mem_failcnt | failures | count | -| cgroup.io | read, write | KiB/s | -| cgroup.serviced_ops | read, write | operations/s | -| cgroup.throttle_io | read, write | KiB/s | -| cgroup.throttle_serviced_ops | read, write | operations/s | -| cgroup.queued_ops | read, write | operations | -| cgroup.merged_ops | read, write | operations/s | -| cgroup.cpu_some_pressure | some10, some60, some300 | percentage | -| cgroup.cpu_some_pressure_stall_time | time | ms | -| cgroup.cpu_full_pressure | some10, some60, some300 | percentage | -| cgroup.cpu_full_pressure_stall_time | time | ms | -| cgroup.memory_some_pressure | some10, some60, some300 | percentage | -| cgroup.memory_some_pressure_stall_time | time | ms | -| cgroup.memory_full_pressure | some10, some60, some300 | percentage | -| cgroup.memory_full_pressure_stall_time | time | ms | -| cgroup.io_some_pressure | some10, some60, some300 | percentage | -| cgroup.io_some_pressure_stall_time | time | ms | -| cgroup.io_full_pressure | some10, some60, some300 | percentage | -| cgroup.io_full_pressure_stall_time | time | ms | -| cgroup.pids_current | pids | pids | - -### Per cgroup network device - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| container_name | The container name or group path if name resolution fails. | -| image | Docker/Podman container image name. | -| device | The name of the host network interface linked to the container's network interface. | -| container_device | Container network interface name. | -| interface_type | Network interface type. Always "virtual" for the containers. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| cgroup.net_net | received, sent | kilobits/s | -| cgroup.net_packets | received, sent, multicast | pps | -| cgroup.net_errors | inbound, outbound | errors/s | -| cgroup.net_drops | inbound, outbound | errors/s | -| cgroup.net_fifo | receive, transmit | errors/s | -| cgroup.net_compressed | receive, sent | pps | -| cgroup.net_events | frames, collisions, carrier | events/s | -| cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state | -| cgroup.net_carrier | up, down | state | -| cgroup.net_mtu | mtu | octets | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes | -| [ cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.mem_usage | cgroup memory utilization | -| [ cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute | -| [ cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute | - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -There is no configuration file. -#### Options - - - -There are no configuration options. - -#### Examples -There are no configuration examples. - - diff --git a/collectors/cgroups.plugin/integrations/systemd_services.md b/collectors/cgroups.plugin/integrations/systemd_services.md deleted file mode 100644 index 0ce906366..000000000 --- a/collectors/cgroups.plugin/integrations/systemd_services.md +++ /dev/null @@ -1,112 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/integrations/systemd_services.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/metadata.yaml" -sidebar_label: "Systemd Services" -learn_status: "Published" -learn_rel_path: "Data Collection/Systemd" -most_popular: True -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Systemd Services - - -<img src="https://netdata.cloud/img/systemd.svg" width="150"/> - - -Plugin: cgroups.plugin -Module: /sys/fs/cgroup - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Monitor Containers for performance, resource usage, and health status. - - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -This integration doesn't support auto-detection. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per systemd service - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| service_name | Service name | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| systemd.service.cpu.utilization | user, system | percentage | -| systemd.service.memory.usage | ram, swap | MiB | -| systemd.service.memory.failcnt | fail | failures/s | -| systemd.service.memory.ram.usage | rss, cache, mapped_file, rss_huge | MiB | -| systemd.service.memory.writeback | writeback, dirty | MiB | -| systemd.service.memory.paging.faults | minor, major | MiB/s | -| systemd.service.memory.paging.io | in, out | MiB/s | -| systemd.service.disk.io | read, write | KiB/s | -| systemd.service.disk.iops | read, write | operations/s | -| systemd.service.disk.throttle.io | read, write | KiB/s | -| systemd.service.disk.throttle.iops | read, write | operations/s | -| systemd.service.disk.queued_iops | read, write | operations/s | -| systemd.service.disk.merged_iops | read, write | operations/s | -| systemd.service.pids.current | pids | pids | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -There is no configuration file. -#### Options - - - -There are no configuration options. - -#### Examples -There are no configuration examples. - - diff --git a/collectors/cgroups.plugin/integrations/virtual_machines.md b/collectors/cgroups.plugin/integrations/virtual_machines.md deleted file mode 100644 index 6a64923c4..000000000 --- a/collectors/cgroups.plugin/integrations/virtual_machines.md +++ /dev/null @@ -1,169 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/integrations/virtual_machines.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/metadata.yaml" -sidebar_label: "Virtual Machines" -learn_status: "Published" -learn_rel_path: "Data Collection/Containers and VMs" -most_popular: True -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Virtual Machines - - -<img src="https://netdata.cloud/img/container.svg" width="150"/> - - -Plugin: cgroups.plugin -Module: /sys/fs/cgroup - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Monitor Virtual Machines for performance, resource usage, and health status. - - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -This integration doesn't support auto-detection. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per cgroup - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| container_name | The container name or group path if name resolution fails. | -| image | Docker/Podman container image name. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| cgroup.cpu_limit | used | percentage | -| cgroup.cpu | user, system | percentage | -| cgroup.cpu_per_core | a dimension per core | percentage | -| cgroup.throttled | throttled | percentage | -| cgroup.throttled_duration | duration | ms | -| cgroup.cpu_shares | shares | shares | -| cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB | -| cgroup.writeback | dirty, writeback | MiB | -| cgroup.mem_activity | in, out | MiB/s | -| cgroup.pgfaults | pgfault, swap | MiB/s | -| cgroup.mem_usage | ram, swap | MiB | -| cgroup.mem_usage_limit | available, used | MiB | -| cgroup.mem_utilization | utilization | percentage | -| cgroup.mem_failcnt | failures | count | -| cgroup.io | read, write | KiB/s | -| cgroup.serviced_ops | read, write | operations/s | -| cgroup.throttle_io | read, write | KiB/s | -| cgroup.throttle_serviced_ops | read, write | operations/s | -| cgroup.queued_ops | read, write | operations | -| cgroup.merged_ops | read, write | operations/s | -| cgroup.cpu_some_pressure | some10, some60, some300 | percentage | -| cgroup.cpu_some_pressure_stall_time | time | ms | -| cgroup.cpu_full_pressure | some10, some60, some300 | percentage | -| cgroup.cpu_full_pressure_stall_time | time | ms | -| cgroup.memory_some_pressure | some10, some60, some300 | percentage | -| cgroup.memory_some_pressure_stall_time | time | ms | -| cgroup.memory_full_pressure | some10, some60, some300 | percentage | -| cgroup.memory_full_pressure_stall_time | time | ms | -| cgroup.io_some_pressure | some10, some60, some300 | percentage | -| cgroup.io_some_pressure_stall_time | time | ms | -| cgroup.io_full_pressure | some10, some60, some300 | percentage | -| cgroup.io_full_pressure_stall_time | time | ms | -| cgroup.pids_current | pids | pids | - -### Per cgroup network device - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| container_name | The container name or group path if name resolution fails. | -| image | Docker/Podman container image name. | -| device | The name of the host network interface linked to the container's network interface. | -| container_device | Container network interface name. | -| interface_type | Network interface type. Always "virtual" for the containers. | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| cgroup.net_net | received, sent | kilobits/s | -| cgroup.net_packets | received, sent, multicast | pps | -| cgroup.net_errors | inbound, outbound | errors/s | -| cgroup.net_drops | inbound, outbound | errors/s | -| cgroup.net_fifo | receive, transmit | errors/s | -| cgroup.net_compressed | receive, sent | pps | -| cgroup.net_events | frames, collisions, carrier | events/s | -| cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state | -| cgroup.net_carrier | up, down | state | -| cgroup.net_mtu | mtu | octets | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes | -| [ cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.mem_usage | cgroup memory utilization | -| [ cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute | -| [ cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute | - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -There is no configuration file. -#### Options - - - -There are no configuration options. - -#### Examples -There are no configuration examples. - - diff --git a/collectors/cgroups.plugin/metadata.yaml b/collectors/cgroups.plugin/metadata.yaml deleted file mode 100644 index a1abbb5a9..000000000 --- a/collectors/cgroups.plugin/metadata.yaml +++ /dev/null @@ -1,1022 +0,0 @@ -plugin_name: cgroups.plugin -modules: - - &module - meta: &meta - plugin_name: cgroups.plugin - module_name: /sys/fs/cgroup - monitored_instance: - name: Containers - link: "" - categories: - - data-collection.containers-and-vms - icon_filename: container.svg - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - containers - most_popular: true - overview: &overview - data_collection: &data_collection - metrics_description: "Monitor Containers for performance, resource usage, and health status." - method_description: "" - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "" - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: [] - configuration: - file: - name: "" - description: "" - options: - description: "" - folding: - title: "" - enabled: true - list: [] - examples: - folding: - enabled: true - title: "" - list: [] - troubleshooting: - problems: - list: [] - alerts: - - name: cgroup_10min_cpu_usage - link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf - metric: cgroup.cpu_limit - info: average cgroup CPU utilization over the last 10 minutes - - name: cgroup_ram_in_use - link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf - metric: cgroup.mem_usage - info: cgroup memory utilization - - name: cgroup_1m_received_packets_rate - link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf - metric: cgroup.net_packets - info: average number of packets received by the network interface ${label:device} over the last minute - - name: cgroup_10s_received_packets_storm - link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf - metric: cgroup.net_packets - info: - ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over - the last minute - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: cgroup - description: "" - labels: - - name: container_name - description: The container name or group path if name resolution fails. - - name: image - description: Docker/Podman container image name. - metrics: - - name: cgroup.cpu_limit - description: CPU Usage within the limits - unit: "percentage" - chart_type: line - dimensions: - - name: used - - name: cgroup.cpu - description: CPU Usage (100% = 1 core) - unit: "percentage" - chart_type: stacked - dimensions: - - name: user - - name: system - - name: cgroup.cpu_per_core - description: CPU Usage (100% = 1 core) Per Core - unit: "percentage" - chart_type: line - dimensions: - - name: a dimension per core - - name: cgroup.throttled - description: CPU Throttled Runnable Periods - unit: "percentage" - chart_type: line - dimensions: - - name: throttled - - name: cgroup.throttled_duration - description: CPU Throttled Time Duration - unit: "ms" - chart_type: line - dimensions: - - name: duration - - name: cgroup.cpu_shares - description: CPU Time Relative Share - unit: "shares" - chart_type: line - dimensions: - - name: shares - - name: cgroup.mem - description: Memory Usage - unit: "MiB" - chart_type: stacked - dimensions: - - name: cache - - name: rss - - name: swap - - name: rss_huge - - name: mapped_file - - name: cgroup.writeback - description: Writeback Memory - unit: "MiB" - chart_type: area - dimensions: - - name: dirty - - name: writeback - - name: cgroup.mem_activity - description: Memory Activity - unit: "MiB/s" - chart_type: line - dimensions: - - name: in - - name: out - - name: cgroup.pgfaults - description: Memory Page Faults - unit: "MiB/s" - chart_type: line - dimensions: - - name: pgfault - - name: swap - - name: cgroup.mem_usage - description: Used Memory - unit: "MiB" - chart_type: stacked - dimensions: - - name: ram - - name: swap - - name: cgroup.mem_usage_limit - description: Used RAM within the limits - unit: "MiB" - chart_type: stacked - dimensions: - - name: available - - name: used - - name: cgroup.mem_utilization - description: Memory Utilization - unit: "percentage" - chart_type: line - dimensions: - - name: utilization - - name: cgroup.mem_failcnt - description: Memory Limit Failures - unit: "count" - chart_type: line - dimensions: - - name: failures - - name: cgroup.io - description: I/O Bandwidth (all disks) - unit: "KiB/s" - chart_type: area - dimensions: - - name: read - - name: write - - name: cgroup.serviced_ops - description: Serviced I/O Operations (all disks) - unit: "operations/s" - chart_type: line - dimensions: - - name: read - - name: write - - name: cgroup.throttle_io - description: Throttle I/O Bandwidth (all disks) - unit: "KiB/s" - chart_type: area - dimensions: - - name: read - - name: write - - name: cgroup.throttle_serviced_ops - description: Throttle Serviced I/O Operations (all disks) - unit: "operations/s" - chart_type: line - dimensions: - - name: read - - name: write - - name: cgroup.queued_ops - description: Queued I/O Operations (all disks) - unit: "operations" - chart_type: line - dimensions: - - name: read - - name: write - - name: cgroup.merged_ops - description: Merged I/O Operations (all disks) - unit: "operations/s" - chart_type: line - dimensions: - - name: read - - name: write - - name: cgroup.cpu_some_pressure - description: CPU some pressure - unit: "percentage" - chart_type: line - dimensions: - - name: some10 - - name: some60 - - name: some300 - - name: cgroup.cpu_some_pressure_stall_time - description: CPU some pressure stall time - unit: "ms" - chart_type: line - dimensions: - - name: time - - name: cgroup.cpu_full_pressure - description: CPU full pressure - unit: "percentage" - chart_type: line - dimensions: - - name: some10 - - name: some60 - - name: some300 - - name: cgroup.cpu_full_pressure_stall_time - description: CPU full pressure stall time - unit: "ms" - chart_type: line - dimensions: - - name: time - - name: cgroup.memory_some_pressure - description: Memory some pressure - unit: "percentage" - chart_type: line - dimensions: - - name: some10 - - name: some60 - - name: some300 - - name: cgroup.memory_some_pressure_stall_time - description: Memory some pressure stall time - unit: "ms" - chart_type: line - dimensions: - - name: time - - name: cgroup.memory_full_pressure - description: Memory full pressure - unit: "percentage" - chart_type: line - dimensions: - - name: some10 - - name: some60 - - name: some300 - - name: cgroup.memory_full_pressure_stall_time - description: Memory full pressure stall time - unit: "ms" - chart_type: line - dimensions: - - name: time - - name: cgroup.io_some_pressure - description: I/O some pressure - unit: "percentage" - chart_type: line - dimensions: - - name: some10 - - name: some60 - - name: some300 - - name: cgroup.io_some_pressure_stall_time - description: I/O some pressure stall time - unit: "ms" - chart_type: line - dimensions: - - name: time - - name: cgroup.io_full_pressure - description: I/O some pressure - unit: "percentage" - chart_type: line - dimensions: - - name: some10 - - name: some60 - - name: some300 - - name: cgroup.io_full_pressure_stall_time - description: I/O some pressure stall time - unit: "ms" - chart_type: line - dimensions: - - name: time - - name: cgroup.pids_current - description: Number of processes - unit: "pids" - chart_type: line - dimensions: - - name: pids - - name: cgroup network device - description: "" - labels: - - name: container_name - description: The container name or group path if name resolution fails. - - name: image - description: Docker/Podman container image name. - - name: device - description: "The name of the host network interface linked to the container's network interface." - - name: container_device - description: Container network interface name. - - name: interface_type - description: 'Network interface type. Always "virtual" for the containers.' - metrics: - - name: cgroup.net_net - description: Bandwidth - unit: "kilobits/s" - chart_type: area - dimensions: - - name: received - - name: sent - - name: cgroup.net_packets - description: Packets - unit: "pps" - chart_type: line - dimensions: - - name: received - - name: sent - - name: multicast - - name: cgroup.net_errors - description: Interface Errors - unit: "errors/s" - chart_type: line - dimensions: - - name: inbound - - name: outbound - - name: cgroup.net_drops - description: Interface Drops - unit: "errors/s" - chart_type: line - dimensions: - - name: inbound - - name: outbound - - name: cgroup.net_fifo - description: Interface FIFO Buffer Errors - unit: "errors/s" - chart_type: line - dimensions: - - name: receive - - name: transmit - - name: cgroup.net_compressed - description: Interface FIFO Buffer Errors - unit: "pps" - chart_type: line - dimensions: - - name: receive - - name: sent - - name: cgroup.net_events - description: Network Interface Events - unit: "events/s" - chart_type: line - dimensions: - - name: frames - - name: collisions - - name: carrier - - name: cgroup.net_operstate - description: Interface Operational State - unit: "state" - chart_type: line - dimensions: - - name: up - - name: down - - name: notpresent - - name: lowerlayerdown - - name: testing - - name: dormant - - name: unknown - - name: cgroup.net_carrier - description: Interface Physical Link State - unit: "state" - chart_type: line - dimensions: - - name: up - - name: down - - name: cgroup.net_mtu - description: Interface MTU - unit: "octets" - chart_type: line - dimensions: - - name: mtu - - <<: *module - meta: - <<: *meta - monitored_instance: - name: Kubernetes Containers - link: https://kubernetes.io/ - icon_filename: kubernetes.svg - categories: - #- data-collection.containers-and-vms - - data-collection.kubernetes - keywords: - - k8s - - kubernetes - - pods - - containers - overview: - <<: *overview - data-collection: - <<: *data_collection - metrics_description: Monitor Kubernetes Clusters for performance, resource usage, and health status. - alerts: - - name: k8s_cgroup_10min_cpu_usage - link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf - metric: k8s.cgroup.cpu_limit - info: average cgroup CPU utilization over the last 10 minutes - - name: k8s_cgroup_ram_in_use - link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf - metric: k8s.cgroup.mem_usage - info: cgroup memory utilization - - name: k8s_cgroup_1m_received_packets_rate - link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf - metric: k8s.cgroup.net_packets - info: average number of packets received by the network interface ${label:device} over the last minute - - name: k8s_cgroup_10s_received_packets_storm - link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf - metric: k8s.cgroup.net_packets - info: - ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over - the last minute - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: k8s cgroup - description: These metrics refer to the Pod container. - labels: - - name: k8s_node_name - description: 'Node name. The value of _pod.spec.nodeName_.' - - name: k8s_namespace - description: 'Namespace name. The value of _pod.metadata.namespace_.' - - name: k8s_controller_kind - description: 'Controller kind (ReplicaSet, DaemonSet, StatefulSet, Job, etc.). The value of _pod.OwnerReferences.Controller.Kind_.' - - name: k8s_controller_name - description: 'Controller name.The value of _pod.OwnerReferences.Controller.Name_.' - - name: k8s_pod_name - description: 'Pod name. The value of _pod.metadata.name_.' - - name: k8s_container_name - description: 'Container name. The value of _pod.spec.containers.name_.' - - name: k8s_kind - description: 'Instance kind: "pod" or "container".' - - name: k8s_qos_class - description: 'QoS class (guaranteed, burstable, besteffort).' - - name: k8s_cluster_id - description: 'Cluster ID. The value of kube-system namespace _namespace.metadata.uid_.' - metrics: - - name: k8s.cgroup.cpu_limit - description: CPU Usage within the limits - unit: "percentage" - chart_type: line - dimensions: - - name: used - - name: k8s.cgroup.cpu - description: CPU Usage (100% = 1000 mCPU) - unit: "percentage" - chart_type: stacked - dimensions: - - name: user - - name: system - - name: k8s.cgroup.cpu_per_core - description: CPU Usage (100% = 1000 mCPU) Per Core - unit: "percentage" - chart_type: line - dimensions: - - name: a dimension per core - - name: k8s.cgroup.throttled - description: CPU Throttled Runnable Periods - unit: "percentage" - chart_type: line - dimensions: - - name: throttled - - name: k8s.cgroup.throttled_duration - description: CPU Throttled Time Duration - unit: "ms" - chart_type: line - dimensions: - - name: duration - - name: k8s.cgroup.cpu_shares - description: CPU Time Relative Share - unit: "shares" - chart_type: line - dimensions: - - name: shares - - name: k8s.cgroup.mem - description: Memory Usage - unit: "MiB" - chart_type: stacked - dimensions: - - name: cache - - name: rss - - name: swap - - name: rss_huge - - name: mapped_file - - name: k8s.cgroup.writeback - description: Writeback Memory - unit: "MiB" - chart_type: area - dimensions: - - name: dirty - - name: writeback - - name: k8s.cgroup.mem_activity - description: Memory Activity - unit: "MiB/s" - chart_type: line - dimensions: - - name: in - - name: out - - name: k8s.cgroup.pgfaults - description: Memory Page Faults - unit: "MiB/s" - chart_type: line - dimensions: - - name: pgfault - - name: swap - - name: k8s.cgroup.mem_usage - description: Used Memory - unit: "MiB" - chart_type: stacked - dimensions: - - name: ram - - name: swap - - name: k8s.cgroup.mem_usage_limit - description: Used RAM within the limits - unit: "MiB" - chart_type: stacked - dimensions: - - name: available - - name: used - - name: k8s.cgroup.mem_utilization - description: Memory Utilization - unit: "percentage" - chart_type: line - dimensions: - - name: utilization - - name: k8s.cgroup.mem_failcnt - description: Memory Limit Failures - unit: "count" - chart_type: line - dimensions: - - name: failures - - name: k8s.cgroup.io - description: I/O Bandwidth (all disks) - unit: "KiB/s" - chart_type: area - dimensions: - - name: read - - name: write - - name: k8s.cgroup.serviced_ops - description: Serviced I/O Operations (all disks) - unit: "operations/s" - chart_type: line - dimensions: - - name: read - - name: write - - name: k8s.cgroup.throttle_io - description: Throttle I/O Bandwidth (all disks) - unit: "KiB/s" - chart_type: area - dimensions: - - name: read - - name: write - - name: k8s.cgroup.throttle_serviced_ops - description: Throttle Serviced I/O Operations (all disks) - unit: "operations/s" - chart_type: line - dimensions: - - name: read - - name: write - - name: k8s.cgroup.queued_ops - description: Queued I/O Operations (all disks) - unit: "operations" - chart_type: line - dimensions: - - name: read - - name: write - - name: k8s.cgroup.merged_ops - description: Merged I/O Operations (all disks) - unit: "operations/s" - chart_type: line - dimensions: - - name: read - - name: write - - name: k8s.cgroup.cpu_some_pressure - description: CPU some pressure - unit: "percentage" - chart_type: line - dimensions: - - name: some10 - - name: some60 - - name: some300 - - name: k8s.cgroup.cpu_some_pressure_stall_time - description: CPU some pressure stall time - unit: "ms" - chart_type: line - dimensions: - - name: time - - name: k8s.cgroup.cpu_full_pressure - description: CPU full pressure - unit: "percentage" - chart_type: line - dimensions: - - name: some10 - - name: some60 - - name: some300 - - name: k8s.cgroup.cpu_full_pressure_stall_time - description: CPU full pressure stall time - unit: "ms" - chart_type: line - dimensions: - - name: time - - name: k8s.cgroup.memory_some_pressure - description: Memory some pressure - unit: "percentage" - chart_type: line - dimensions: - - name: some10 - - name: some60 - - name: some300 - - name: k8s.cgroup.memory_some_pressure_stall_time - description: Memory some pressure stall time - unit: "ms" - chart_type: line - dimensions: - - name: time - - name: k8s.cgroup.memory_full_pressure - description: Memory full pressure - unit: "percentage" - chart_type: line - dimensions: - - name: some10 - - name: some60 - - name: some300 - - name: k8s.cgroup.memory_full_pressure_stall_time - description: Memory full pressure stall time - unit: "ms" - chart_type: line - dimensions: - - name: time - - name: k8s.cgroup.io_some_pressure - description: I/O some pressure - unit: "percentage" - chart_type: line - dimensions: - - name: some10 - - name: some60 - - name: some300 - - name: k8s.cgroup.io_some_pressure_stall_time - description: I/O some pressure stall time - unit: "ms" - chart_type: line - dimensions: - - name: time - - name: k8s.cgroup.io_full_pressure - description: I/O some pressure - unit: "percentage" - chart_type: line - dimensions: - - name: some10 - - name: some60 - - name: some300 - - name: k8s.cgroup.io_full_pressure_stall_time - description: I/O some pressure stall time - unit: "ms" - chart_type: line - dimensions: - - name: time - - name: k8s.cgroup.pids_current - description: Number of processes - unit: "pids" - chart_type: line - dimensions: - - name: pids - - name: k8s cgroup network device - description: These metrics refer to the Pod container network interface. - labels: - - name: device - description: "The name of the host network interface linked to the container's network interface." - - name: container_device - description: Container network interface name. - - name: interface_type - description: 'Network interface type. Always "virtual" for the containers.' - - name: k8s_node_name - description: 'Node name. The value of _pod.spec.nodeName_.' - - name: k8s_namespace - description: 'Namespace name. The value of _pod.metadata.namespace_.' - - name: k8s_controller_kind - description: 'Controller kind (ReplicaSet, DaemonSet, StatefulSet, Job, etc.). The value of _pod.OwnerReferences.Controller.Kind_.' - - name: k8s_controller_name - description: 'Controller name.The value of _pod.OwnerReferences.Controller.Name_.' - - name: k8s_pod_name - description: 'Pod name. The value of _pod.metadata.name_.' - - name: k8s_container_name - description: 'Container name. The value of _pod.spec.containers.name_.' - - name: k8s_kind - description: 'Instance kind: "pod" or "container".' - - name: k8s_qos_class - description: 'QoS class (guaranteed, burstable, besteffort).' - - name: k8s_cluster_id - description: 'Cluster ID. The value of kube-system namespace _namespace.metadata.uid_.' - metrics: - - name: k8s.cgroup.net_net - description: Bandwidth - unit: "kilobits/s" - chart_type: area - dimensions: - - name: received - - name: sent - - name: k8s.cgroup.net_packets - description: Packets - unit: "pps" - chart_type: line - dimensions: - - name: received - - name: sent - - name: multicast - - name: k8s.cgroup.net_errors - description: Interface Errors - unit: "errors/s" - chart_type: line - dimensions: - - name: inbound - - name: outbound - - name: k8s.cgroup.net_drops - description: Interface Drops - unit: "errors/s" - chart_type: line - dimensions: - - name: inbound - - name: outbound - - name: k8s.cgroup.net_fifo - description: Interface FIFO Buffer Errors - unit: "errors/s" - chart_type: line - dimensions: - - name: receive - - name: transmit - - name: k8s.cgroup.net_compressed - description: Interface FIFO Buffer Errors - unit: "pps" - chart_type: line - dimensions: - - name: receive - - name: sent - - name: k8s.cgroup.net_events - description: Network Interface Events - unit: "events/s" - chart_type: line - dimensions: - - name: frames - - name: collisions - - name: carrier - - name: k8s.cgroup.net_operstate - description: Interface Operational State - unit: "state" - chart_type: line - dimensions: - - name: up - - name: down - - name: notpresent - - name: lowerlayerdown - - name: testing - - name: dormant - - name: unknown - - name: k8s.cgroup.net_carrier - description: Interface Physical Link State - unit: "state" - chart_type: line - dimensions: - - name: up - - name: down - - name: k8s.cgroup.net_mtu - description: Interface MTU - unit: "octets" - chart_type: line - dimensions: - - name: mtu - - <<: *module - meta: - <<: *meta - monitored_instance: - name: Systemd Services - link: "" - icon_filename: systemd.svg - categories: - - data-collection.systemd - keywords: - - systemd - - services - overview: - <<: *overview - data-collection: - <<: *data_collection - metrics_desctiption: "Monitor Systemd Services for performance, resource usage, and health status." - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: systemd service - description: "" - labels: - - name: service_name - description: Service name - metrics: - - name: systemd.service.cpu.utilization - description: Systemd Services CPU utilization (100% = 1 core) - unit: percentage - chart_type: stacked - dimensions: - - name: user - - name: system - - name: systemd.service.memory.usage - description: Systemd Services Used Memory - unit: MiB - chart_type: stacked - dimensions: - - name: ram - - name: swap - - name: systemd.service.memory.failcnt - description: Systemd Services Memory Limit Failures - unit: failures/s - chart_type: line - dimensions: - - name: fail - - name: systemd.service.memory.ram.usage - description: Systemd Services Memory - unit: MiB - chart_type: stacked - dimensions: - - name: rss - - name: cache - - name: mapped_file - - name: rss_huge - - name: systemd.service.memory.writeback - description: Systemd Services Writeback Memory - unit: MiB - chart_type: stacked - dimensions: - - name: writeback - - name: dirty - - name: systemd.service.memory.paging.faults - description: Systemd Services Memory Minor and Major Page Faults - unit: MiB/s - chart_type: area - dimensions: - - name: minor - - name: major - - name: systemd.service.memory.paging.io - description: Systemd Services Memory Paging IO - unit: MiB/s - chart_type: area - dimensions: - - name: in - - name: out - - name: systemd.service.disk.io - description: Systemd Services Disk Read/Write Bandwidth - unit: KiB/s - chart_type: area - dimensions: - - name: read - - name: write - - name: systemd.service.disk.iops - description: Systemd Services Disk Read/Write Operations - unit: operations/s - chart_type: line - dimensions: - - name: read - - name: write - - name: systemd.service.disk.throttle.io - description: Systemd Services Throttle Disk Read/Write Bandwidth - unit: KiB/s - chart_type: area - dimensions: - - name: read - - name: write - - name: systemd.service.disk.throttle.iops - description: Systemd Services Throttle Disk Read/Write Operations - unit: operations/s - chart_type: line - dimensions: - - name: read - - name: write - - name: systemd.service.disk.queued_iops - description: Systemd Services Queued Disk Read/Write Operations - unit: operations/s - chart_type: line - dimensions: - - name: read - - name: write - - name: systemd.service.disk.merged_iops - description: Systemd Services Merged Disk Read/Write Operations - unit: operations/s - chart_type: line - dimensions: - - name: read - - name: write - - name: systemd.service.pids.current - description: Systemd Services Number of Processes - unit: pids - chart_type: line - dimensions: - - name: pids - - <<: *module - meta: - <<: *meta - monitored_instance: - name: Virtual Machines - link: "" - icon_filename: container.svg - categories: - - data-collection.containers-and-vms - keywords: - - vms - - virtualization - - container - overview: - <<: *overview - data_collection: - <<: *data_collection - metrics_description: "Monitor Virtual Machines for performance, resource usage, and health status." - - <<: *module - meta: - <<: *meta - monitored_instance: - name: LXC Containers - link: "" - icon_filename: lxc.png - categories: - - data-collection.containers-and-vms - keywords: - - lxc - - lxd - - container - overview: - <<: *overview - data_collection: - <<: *data_collection - metrics_description: "Monitor LXC Containers for performance, resource usage, and health status." - - <<: *module - meta: - <<: *meta - monitored_instance: - name: Libvirt Containers - link: "" - icon_filename: libvirt.png - categories: - - data-collection.containers-and-vms - keywords: - - libvirt - - container - overview: - <<: *overview - data_collection: - <<: *data_collection - metrics_description: "Monitor Libvirt for performance, resource usage, and health status." - - <<: *module - meta: - <<: *meta - monitored_instance: - name: oVirt Containers - link: "" - icon_filename: ovirt.svg - categories: - - data-collection.containers-and-vms - keywords: - - ovirt - - container - overview: - <<: *overview - data_collection: - <<: *data_collection - metrics_description: "Monitor oVirt for performance, resource usage, and health status." - - <<: *module - meta: - <<: *meta - monitored_instance: - name: Proxmox Containers - link: "" - icon_filename: proxmox.png - categories: - - data-collection.containers-and-vms - keywords: - - proxmox - - container - overview: - <<: *overview - data_collection: - <<: *data_collection - metrics_description: "Monitor Proxmox for performance, resource usage, and health status." diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c deleted file mode 100644 index 705edf6f7..000000000 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ /dev/null @@ -1,1729 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "cgroup-internals.h" - -// main cgroups thread worker jobs -#define WORKER_CGROUPS_LOCK 0 -#define WORKER_CGROUPS_READ 1 -#define WORKER_CGROUPS_CHART 2 - -// ---------------------------------------------------------------------------- -// cgroup globals -unsigned long long host_ram_total = 0; -int is_inside_k8s = 0; -long system_page_size = 4096; // system will be queried via sysconf() in configuration() -int cgroup_enable_cpuacct_stat = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO; -int cgroup_enable_cpuacct_cpu_throttling = CONFIG_BOOLEAN_YES; -int cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_NO; -int cgroup_enable_memory = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_detailed_memory = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_swap = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_blkio_io = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_blkio_ops = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_pressure_irq_some = CONFIG_BOOLEAN_NO; -int cgroup_enable_pressure_irq_full = CONFIG_BOOLEAN_AUTO; -int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES; -int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO; -int cgroup_used_memory = CONFIG_BOOLEAN_YES; -int cgroup_use_unified_cgroups = CONFIG_BOOLEAN_NO; -int cgroup_unified_exist = CONFIG_BOOLEAN_AUTO; -int cgroup_search_in_devices = 1; -int cgroup_check_for_new_every = 10; -int cgroup_update_every = 1; -int cgroup_containers_chart_priority = NETDATA_CHART_PRIO_CGROUPS_CONTAINERS; -int cgroup_recheck_zero_blkio_every_iterations = 10; -int cgroup_recheck_zero_mem_failcnt_every_iterations = 10; -int cgroup_recheck_zero_mem_detailed_every_iterations = 10; -char *cgroup_cpuacct_base = NULL; -char *cgroup_cpuset_base = NULL; -char *cgroup_blkio_base = NULL; -char *cgroup_memory_base = NULL; -char *cgroup_devices_base = NULL; -char *cgroup_pids_base = NULL; -char *cgroup_unified_base = NULL; -int cgroup_root_count = 0; -int cgroup_root_max = 1000; -int cgroup_max_depth = 0; -SIMPLE_PATTERN *enabled_cgroup_paths = NULL; -SIMPLE_PATTERN *enabled_cgroup_names = NULL; -SIMPLE_PATTERN *search_cgroup_paths = NULL; -SIMPLE_PATTERN *enabled_cgroup_renames = NULL; -SIMPLE_PATTERN *systemd_services_cgroups = NULL; -SIMPLE_PATTERN *entrypoint_parent_process_comm = NULL; -char *cgroups_network_interface_script = NULL; -int cgroups_check = 0; -uint32_t Read_hash = 0; -uint32_t Write_hash = 0; -uint32_t user_hash = 0; -uint32_t system_hash = 0; -uint32_t user_usec_hash = 0; -uint32_t system_usec_hash = 0; -uint32_t nr_periods_hash = 0; -uint32_t nr_throttled_hash = 0; -uint32_t throttled_time_hash = 0; -uint32_t throttled_usec_hash = 0; - -// *** WARNING *** The fields are not thread safe. Take care of safe usage. -struct cgroup *cgroup_root = NULL; -uv_mutex_t cgroup_root_mutex; - -struct cgroups_systemd_config_setting cgroups_systemd_options[] = { - { .name = "legacy", .setting = SYSTEMD_CGROUP_LEGACY }, - { .name = "hybrid", .setting = SYSTEMD_CGROUP_HYBRID }, - { .name = "unified", .setting = SYSTEMD_CGROUP_UNIFIED }, - { .name = NULL, .setting = SYSTEMD_CGROUP_ERR }, -}; - -// Shared memory with information from detected cgroups -netdata_ebpf_cgroup_shm_t shm_cgroup_ebpf = {NULL, NULL}; -int shm_fd_cgroup_ebpf = -1; -sem_t *shm_mutex_cgroup_ebpf = SEM_FAILED; - -struct discovery_thread discovery_thread; - - -/* on Fed systemd is not in PATH for some reason */ -#define SYSTEMD_CMD_RHEL "/usr/lib/systemd/systemd --version" -#define SYSTEMD_HIERARCHY_STRING "default-hierarchy=" - -#define MAXSIZE_PROC_CMDLINE 4096 -static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec) -{ - pid_t command_pid; - enum cgroups_systemd_setting retval = SYSTEMD_CGROUP_ERR; - char buf[MAXSIZE_PROC_CMDLINE]; - char *begin, *end; - - FILE *fp_child_input; - FILE *fp_child_output = netdata_popen(exec, &command_pid, &fp_child_input); - - if (!fp_child_output) - return retval; - - fd_set rfds; - struct timeval timeout; - int fd = fileno(fp_child_output); - int ret = -1; - - FD_ZERO(&rfds); - FD_SET(fd, &rfds); - timeout.tv_sec = 3; - timeout.tv_usec = 0; - - if (fd != -1) { - ret = select(fd + 1, &rfds, NULL, NULL, &timeout); - } - - if (ret == -1) { - collector_error("Failed to get the output of \"%s\"", exec); - } else if (ret == 0) { - collector_info("Cannot get the output of \"%s\" within %"PRId64" seconds", exec, (int64_t)timeout.tv_sec); - } else { - while (fgets(buf, MAXSIZE_PROC_CMDLINE, fp_child_output) != NULL) { - if ((begin = strstr(buf, SYSTEMD_HIERARCHY_STRING))) { - end = begin = begin + strlen(SYSTEMD_HIERARCHY_STRING); - if (!*begin) - break; - while (isalpha(*end)) - end++; - *end = 0; - for (int i = 0; cgroups_systemd_options[i].name; i++) { - if (!strcmp(begin, cgroups_systemd_options[i].name)) { - retval = cgroups_systemd_options[i].setting; - break; - } - } - break; - } - } - } - - if (netdata_pclose(fp_child_input, fp_child_output, command_pid)) - return SYSTEMD_CGROUP_ERR; - - return retval; -} - -static enum cgroups_type cgroups_try_detect_version() -{ - pid_t command_pid; - char buf[MAXSIZE_PROC_CMDLINE]; - enum cgroups_systemd_setting systemd_setting; - int cgroups2_available = 0; - - // 1. check if cgroups2 available on system at all - FILE *fp_child_input; - FILE *fp_child_output = netdata_popen("grep cgroup /proc/filesystems", &command_pid, &fp_child_input); - if (!fp_child_output) { - collector_error("popen failed"); - return CGROUPS_AUTODETECT_FAIL; - } - while (fgets(buf, MAXSIZE_PROC_CMDLINE, fp_child_output) != NULL) { - if (strstr(buf, "cgroup2")) { - cgroups2_available = 1; - break; - } - } - if(netdata_pclose(fp_child_input, fp_child_output, command_pid)) - return CGROUPS_AUTODETECT_FAIL; - - if(!cgroups2_available) - return CGROUPS_V1; - -#if defined CGROUP2_SUPER_MAGIC - // 2. check filesystem type for the default mountpoint - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/fs/cgroup"); - struct statfs fsinfo; - if (!statfs(filename, &fsinfo)) { - if (fsinfo.f_type == CGROUP2_SUPER_MAGIC) - return CGROUPS_V2; - } -#endif - - // 3. check systemd compiletime setting - if ((systemd_setting = cgroups_detect_systemd("systemd --version")) == SYSTEMD_CGROUP_ERR) - systemd_setting = cgroups_detect_systemd(SYSTEMD_CMD_RHEL); - - if(systemd_setting == SYSTEMD_CGROUP_ERR) - return CGROUPS_AUTODETECT_FAIL; - - if(systemd_setting == SYSTEMD_CGROUP_LEGACY || systemd_setting == SYSTEMD_CGROUP_HYBRID) { - // currently we prefer V1 if HYBRID is set as it seems to be more feature complete - // in the future we might want to continue here if SYSTEMD_CGROUP_HYBRID - // and go ahead with V2 - return CGROUPS_V1; - } - - // 4. if we are unified as on Fedora (default cgroups2 only mode) - // check kernel command line flag that can override that setting - FILE *fp = fopen("/proc/cmdline", "r"); - if (!fp) { - collector_error("Error reading kernel boot commandline parameters"); - return CGROUPS_AUTODETECT_FAIL; - } - - if (!fgets(buf, MAXSIZE_PROC_CMDLINE, fp)) { - collector_error("couldn't read all cmdline params into buffer"); - fclose(fp); - return CGROUPS_AUTODETECT_FAIL; - } - - fclose(fp); - - if (strstr(buf, "systemd.unified_cgroup_hierarchy=0")) { - collector_info("cgroups v2 (unified cgroups) is available but are disabled on this system."); - return CGROUPS_V1; - } - return CGROUPS_V2; -} - -void set_cgroup_base_path(char *filename, char *path) { - if (strncmp(netdata_configured_host_prefix, path, strlen(netdata_configured_host_prefix)) == 0) { - snprintfz(filename, FILENAME_MAX, "%s", path); - } else { - snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, path); - } -} - -void read_cgroup_plugin_configuration() { - system_page_size = sysconf(_SC_PAGESIZE); - - Read_hash = simple_hash("Read"); - Write_hash = simple_hash("Write"); - user_hash = simple_hash("user"); - system_hash = simple_hash("system"); - user_usec_hash = simple_hash("user_usec"); - system_usec_hash = simple_hash("system_usec"); - nr_periods_hash = simple_hash("nr_periods"); - nr_throttled_hash = simple_hash("nr_throttled"); - throttled_time_hash = simple_hash("throttled_time"); - throttled_usec_hash = simple_hash("throttled_usec"); - - cgroup_update_every = (int)config_get_number("plugin:cgroups", "update every", localhost->rrd_update_every); - if(cgroup_update_every < localhost->rrd_update_every) - cgroup_update_every = localhost->rrd_update_every; - - cgroup_check_for_new_every = (int)config_get_number("plugin:cgroups", "check for new cgroups every", (long long)cgroup_check_for_new_every * (long long)cgroup_update_every); - if(cgroup_check_for_new_every < cgroup_update_every) - cgroup_check_for_new_every = cgroup_update_every; - - cgroup_use_unified_cgroups = config_get_boolean_ondemand("plugin:cgroups", "use unified cgroups", CONFIG_BOOLEAN_AUTO); - if(cgroup_use_unified_cgroups == CONFIG_BOOLEAN_AUTO) - cgroup_use_unified_cgroups = (cgroups_try_detect_version() == CGROUPS_V2); - - collector_info("use unified cgroups %s", cgroup_use_unified_cgroups ? "true" : "false"); - - cgroup_containers_chart_priority = (int)config_get_number("plugin:cgroups", "containers priority", cgroup_containers_chart_priority); - if(cgroup_containers_chart_priority < 1) - cgroup_containers_chart_priority = NETDATA_CHART_PRIO_CGROUPS_CONTAINERS; - - cgroup_enable_cpuacct_stat = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct stat (total CPU)", cgroup_enable_cpuacct_stat); - cgroup_enable_cpuacct_usage = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct usage (per core CPU)", cgroup_enable_cpuacct_usage); - cgroup_enable_cpuacct_cpu_throttling = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct cpu throttling", cgroup_enable_cpuacct_cpu_throttling); - cgroup_enable_cpuacct_cpu_shares = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct cpu shares", cgroup_enable_cpuacct_cpu_shares); - - cgroup_enable_memory = config_get_boolean_ondemand("plugin:cgroups", "enable memory", cgroup_enable_memory); - cgroup_enable_detailed_memory = config_get_boolean_ondemand("plugin:cgroups", "enable detailed memory", cgroup_enable_detailed_memory); - cgroup_enable_memory_failcnt = config_get_boolean_ondemand("plugin:cgroups", "enable memory limits fail count", cgroup_enable_memory_failcnt); - cgroup_enable_swap = config_get_boolean_ondemand("plugin:cgroups", "enable swap memory", cgroup_enable_swap); - - cgroup_enable_blkio_io = config_get_boolean_ondemand("plugin:cgroups", "enable blkio bandwidth", cgroup_enable_blkio_io); - cgroup_enable_blkio_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio operations", cgroup_enable_blkio_ops); - cgroup_enable_blkio_throttle_io = config_get_boolean_ondemand("plugin:cgroups", "enable blkio throttle bandwidth", cgroup_enable_blkio_throttle_io); - cgroup_enable_blkio_throttle_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio throttle operations", cgroup_enable_blkio_throttle_ops); - cgroup_enable_blkio_queued_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio queued operations", cgroup_enable_blkio_queued_ops); - cgroup_enable_blkio_merged_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio merged operations", cgroup_enable_blkio_merged_ops); - - cgroup_enable_pressure_cpu = config_get_boolean_ondemand("plugin:cgroups", "enable cpu pressure", cgroup_enable_pressure_cpu); - cgroup_enable_pressure_io_some = config_get_boolean_ondemand("plugin:cgroups", "enable io some pressure", cgroup_enable_pressure_io_some); - cgroup_enable_pressure_io_full = config_get_boolean_ondemand("plugin:cgroups", "enable io full pressure", cgroup_enable_pressure_io_full); - cgroup_enable_pressure_memory_some = config_get_boolean_ondemand("plugin:cgroups", "enable memory some pressure", cgroup_enable_pressure_memory_some); - cgroup_enable_pressure_memory_full = config_get_boolean_ondemand("plugin:cgroups", "enable memory full pressure", cgroup_enable_pressure_memory_full); - - cgroup_recheck_zero_blkio_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero blkio every iterations", cgroup_recheck_zero_blkio_every_iterations); - cgroup_recheck_zero_mem_failcnt_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero memory failcnt every iterations", cgroup_recheck_zero_mem_failcnt_every_iterations); - cgroup_recheck_zero_mem_detailed_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero detailed memory every iterations", cgroup_recheck_zero_mem_detailed_every_iterations); - - cgroup_enable_systemd_services = config_get_boolean("plugin:cgroups", "enable systemd services", cgroup_enable_systemd_services); - cgroup_enable_systemd_services_detailed_memory = config_get_boolean("plugin:cgroups", "enable systemd services detailed memory", cgroup_enable_systemd_services_detailed_memory); - cgroup_used_memory = config_get_boolean("plugin:cgroups", "report used memory", cgroup_used_memory); - - char filename[FILENAME_MAX + 1], *s; - struct mountinfo *mi, *root = mountinfo_read(0); - if(!cgroup_use_unified_cgroups) { - // cgroup v1 does not have pressure metrics - cgroup_enable_pressure_cpu = - cgroup_enable_pressure_io_some = - cgroup_enable_pressure_io_full = - cgroup_enable_pressure_memory_some = - cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_NO; - - mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct"); - if (!mi) - mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct"); - if (!mi) { - collector_error("CGROUP: cannot find cpuacct mountinfo. Assuming default: /sys/fs/cgroup/cpuacct"); - s = "/sys/fs/cgroup/cpuacct"; - } else - s = mi->mount_point; - set_cgroup_base_path(filename, s); - cgroup_cpuacct_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuacct", filename); - - mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuset"); - if (!mi) - mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuset"); - if (!mi) { - collector_error("CGROUP: cannot find cpuset mountinfo. Assuming default: /sys/fs/cgroup/cpuset"); - s = "/sys/fs/cgroup/cpuset"; - } else - s = mi->mount_point; - set_cgroup_base_path(filename, s); - cgroup_cpuset_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuset", filename); - - mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "blkio"); - if (!mi) - mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "blkio"); - if (!mi) { - collector_error("CGROUP: cannot find blkio mountinfo. Assuming default: /sys/fs/cgroup/blkio"); - s = "/sys/fs/cgroup/blkio"; - } else - s = mi->mount_point; - set_cgroup_base_path(filename, s); - cgroup_blkio_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/blkio", filename); - - mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "memory"); - if (!mi) - mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "memory"); - if (!mi) { - collector_error("CGROUP: cannot find memory mountinfo. Assuming default: /sys/fs/cgroup/memory"); - s = "/sys/fs/cgroup/memory"; - } else - s = mi->mount_point; - set_cgroup_base_path(filename, s); - cgroup_memory_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/memory", filename); - - mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "devices"); - if (!mi) - mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "devices"); - if (!mi) { - collector_error("CGROUP: cannot find devices mountinfo. Assuming default: /sys/fs/cgroup/devices"); - s = "/sys/fs/cgroup/devices"; - } else - s = mi->mount_point; - set_cgroup_base_path(filename, s); - cgroup_devices_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/devices", filename); - - mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "pids"); - if (!mi) - mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "pids"); - if (!mi) { - collector_error("CGROUP: cannot find pids mountinfo. Assuming default: /sys/fs/cgroup/pids"); - s = "/sys/fs/cgroup/pids"; - } else - s = mi->mount_point; - set_cgroup_base_path(filename, s); - cgroup_pids_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/pids", filename); - } - else { - //cgroup_enable_cpuacct_stat = - cgroup_enable_cpuacct_usage = - //cgroup_enable_memory = - //cgroup_enable_detailed_memory = - cgroup_enable_memory_failcnt = - //cgroup_enable_swap = - //cgroup_enable_blkio_io = - //cgroup_enable_blkio_ops = - cgroup_enable_blkio_throttle_io = - cgroup_enable_blkio_throttle_ops = - cgroup_enable_blkio_merged_ops = - cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_NO; - cgroup_search_in_devices = 0; - cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO; - cgroup_used_memory = CONFIG_BOOLEAN_NO; //unified cgroups use different values - - //TODO: can there be more than 1 cgroup2 mount point? - //there is no cgroup2 specific super option - for now use 'rw' option - mi = mountinfo_find_by_filesystem_super_option(root, "cgroup2", "rw"); - if (!mi) { - mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup2", "cgroup"); - } - if (!mi) { - collector_error("CGROUP: cannot find cgroup2 mountinfo. Assuming default: /sys/fs/cgroup"); - s = "/sys/fs/cgroup"; - } else - s = mi->mount_point; - - set_cgroup_base_path(filename, s); - cgroup_unified_base = config_get("plugin:cgroups", "path to unified cgroups", filename); - } - - cgroup_root_max = (int)config_get_number("plugin:cgroups", "max cgroups to allow", cgroup_root_max); - cgroup_max_depth = (int)config_get_number("plugin:cgroups", "max cgroups depth to monitor", cgroup_max_depth); - - enabled_cgroup_paths = simple_pattern_create( - config_get("plugin:cgroups", "enable by default cgroups matching", - // ---------------------------------------------------------------- - - " !*/init.scope " // ignore init.scope - " !/system.slice/run-*.scope " // ignore system.slice/run-XXXX.scope - " *.scope " // we need all other *.scope for sure - - // ---------------------------------------------------------------- - - " /machine.slice/*.service " // #3367 systemd-nspawn - - // ---------------------------------------------------------------- - - " */kubepods/pod*/* " // k8s containers - " */kubepods/*/pod*/* " // k8s containers - " */*-kubepods-pod*/* " // k8s containers - " */*-kubepods-*-pod*/* " // k8s containers - " !*kubepods* !*kubelet* " // all other k8s cgroups - - // ---------------------------------------------------------------- - - " !*/vcpu* " // libvirtd adds these sub-cgroups - " !*/emulator " // libvirtd adds these sub-cgroups - " !*.mount " - " !*.partition " - " !*.service " - " !*.service/udev " - " !*.socket " - " !*.slice " - " !*.swap " - " !*.user " - " !/ " - " !/docker " - " !*/libvirt " - " !/lxc " - " !/lxc/*/* " // #1397 #2649 - " !/lxc.monitor* " - " !/lxc.pivot " - " !/lxc.payload " - " !/machine " - " !/qemu " - " !/system " - " !/systemd " - " !/user " - " * " // enable anything else - ), NULL, SIMPLE_PATTERN_EXACT, true); - - enabled_cgroup_names = simple_pattern_create( - config_get("plugin:cgroups", "enable by default cgroups names matching", - " * " - ), NULL, SIMPLE_PATTERN_EXACT, true); - - search_cgroup_paths = simple_pattern_create( - config_get("plugin:cgroups", "search for cgroups in subpaths matching", - " !*/init.scope " // ignore init.scope - " !*-qemu " // #345 - " !*.libvirt-qemu " // #3010 - " !/init.scope " - " !/system " - " !/systemd " - " !/user " - " !/user.slice " - " !/lxc/*/* " // #2161 #2649 - " !/lxc.monitor " - " !/lxc.payload/*/* " - " !/lxc.payload.* " - " * " - ), NULL, SIMPLE_PATTERN_EXACT, true); - - snprintfz(filename, FILENAME_MAX, "%s/cgroup-name.sh", netdata_configured_primary_plugins_dir); - cgroups_rename_script = config_get("plugin:cgroups", "script to get cgroup names", filename); - - snprintfz(filename, FILENAME_MAX, "%s/cgroup-network", netdata_configured_primary_plugins_dir); - cgroups_network_interface_script = config_get("plugin:cgroups", "script to get cgroup network interfaces", filename); - - enabled_cgroup_renames = simple_pattern_create( - config_get("plugin:cgroups", "run script to rename cgroups matching", - " !/ " - " !*.mount " - " !*.socket " - " !*.partition " - " /machine.slice/*.service " // #3367 systemd-nspawn - " !*.service " - " !*.slice " - " !*.swap " - " !*.user " - " !init.scope " - " !*.scope/vcpu* " // libvirtd adds these sub-cgroups - " !*.scope/emulator " // libvirtd adds these sub-cgroups - " *.scope " - " *docker* " - " *lxc* " - " *qemu* " - " */kubepods/pod*/* " // k8s containers - " */kubepods/*/pod*/* " // k8s containers - " */*-kubepods-pod*/* " // k8s containers - " */*-kubepods-*-pod*/* " // k8s containers - " !*kubepods* !*kubelet* " // all other k8s cgroups - " *.libvirt-qemu " // #3010 - " * " - ), NULL, SIMPLE_PATTERN_EXACT, true); - - if(cgroup_enable_systemd_services) { - systemd_services_cgroups = simple_pattern_create( - config_get("plugin:cgroups", "cgroups to match as systemd services", - " !/system.slice/*/*.service " - " /system.slice/*.service " - ), NULL, SIMPLE_PATTERN_EXACT, true); - } - - mountinfo_free_all(root); -} - -void netdata_cgroup_ebpf_set_values(size_t length) -{ - sem_wait(shm_mutex_cgroup_ebpf); - - shm_cgroup_ebpf.header->cgroup_max = cgroup_root_max; - shm_cgroup_ebpf.header->systemd_enabled = cgroup_enable_systemd_services | - cgroup_enable_systemd_services_detailed_memory | - cgroup_used_memory; - shm_cgroup_ebpf.header->body_length = length; - - sem_post(shm_mutex_cgroup_ebpf); -} - -void netdata_cgroup_ebpf_initialize_shm() -{ - shm_fd_cgroup_ebpf = shm_open(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME, O_CREAT | O_RDWR, 0660); - if (shm_fd_cgroup_ebpf < 0) { - collector_error("Cannot initialize shared memory used by cgroup and eBPF, integration won't happen."); - return; - } - - size_t length = sizeof(netdata_ebpf_cgroup_shm_header_t) + cgroup_root_max * sizeof(netdata_ebpf_cgroup_shm_body_t); - if (ftruncate(shm_fd_cgroup_ebpf, length)) { - collector_error("Cannot set size for shared memory."); - goto end_init_shm; - } - - shm_cgroup_ebpf.header = (netdata_ebpf_cgroup_shm_header_t *) mmap(NULL, length, - PROT_READ | PROT_WRITE, MAP_SHARED, - shm_fd_cgroup_ebpf, 0); - - if (unlikely(MAP_FAILED == shm_cgroup_ebpf.header)) { - shm_cgroup_ebpf.header = NULL; - collector_error("Cannot map shared memory used between cgroup and eBPF, integration won't happen"); - goto end_init_shm; - } - shm_cgroup_ebpf.body = (netdata_ebpf_cgroup_shm_body_t *) ((char *)shm_cgroup_ebpf.header + - sizeof(netdata_ebpf_cgroup_shm_header_t)); - - shm_mutex_cgroup_ebpf = sem_open(NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME, O_CREAT, - S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH, 1); - - if (shm_mutex_cgroup_ebpf != SEM_FAILED) { - netdata_cgroup_ebpf_set_values(length); - return; - } - - collector_error("Cannot create semaphore, integration between eBPF and cgroup won't happen"); - munmap(shm_cgroup_ebpf.header, length); - shm_cgroup_ebpf.header = NULL; - -end_init_shm: - close(shm_fd_cgroup_ebpf); - shm_fd_cgroup_ebpf = -1; - shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME); -} - -// --------------------------------------------------------------------------------------------- - -static unsigned long long calc_delta(unsigned long long curr, unsigned long long prev) { - if (prev > curr) { - return 0; - } - return curr - prev; -} - -static unsigned long long calc_percentage(unsigned long long value, unsigned long long total) { - if (total == 0) { - return 0; - } - return (unsigned long long)((NETDATA_DOUBLE)value / (NETDATA_DOUBLE)total * 100); -} - -// ---------------------------------------------------------------------------- -// read values from /sys - -static inline void cgroup_read_cpuacct_stat(struct cpuacct_stat *cp) { - static procfile *ff = NULL; - - if(likely(cp->filename)) { - ff = procfile_reopen(ff, cp->filename, NULL, CGROUP_PROCFILE_FLAG); - if(unlikely(!ff)) { - cp->updated = 0; - cgroups_check = 1; - return; - } - - ff = procfile_readall(ff); - if(unlikely(!ff)) { - cp->updated = 0; - cgroups_check = 1; - return; - } - - unsigned long i, lines = procfile_lines(ff); - - if(unlikely(lines < 1)) { - collector_error("CGROUP: file '%s' should have 1+ lines.", cp->filename); - cp->updated = 0; - return; - } - - for(i = 0; i < lines ; i++) { - char *s = procfile_lineword(ff, i, 0); - uint32_t hash = simple_hash(s); - - if(unlikely(hash == user_hash && !strcmp(s, "user"))) - cp->user = str2ull(procfile_lineword(ff, i, 1), NULL); - - else if(unlikely(hash == system_hash && !strcmp(s, "system"))) - cp->system = str2ull(procfile_lineword(ff, i, 1), NULL); - } - - cp->updated = 1; - - if(unlikely(cp->enabled == CONFIG_BOOLEAN_AUTO && - (cp->user || cp->system || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) - cp->enabled = CONFIG_BOOLEAN_YES; - } -} - -static inline void cgroup_read_cpuacct_cpu_stat(struct cpuacct_cpu_throttling *cp) { - if (unlikely(!cp->filename)) { - return; - } - - static procfile *ff = NULL; - ff = procfile_reopen(ff, cp->filename, NULL, CGROUP_PROCFILE_FLAG); - if (unlikely(!ff)) { - cp->updated = 0; - cgroups_check = 1; - return; - } - - ff = procfile_readall(ff); - if (unlikely(!ff)) { - cp->updated = 0; - cgroups_check = 1; - return; - } - - unsigned long lines = procfile_lines(ff); - if (unlikely(lines < 3)) { - collector_error("CGROUP: file '%s' should have 3 lines.", cp->filename); - cp->updated = 0; - return; - } - - unsigned long long nr_periods_last = cp->nr_periods; - unsigned long long nr_throttled_last = cp->nr_throttled; - - for (unsigned long i = 0; i < lines; i++) { - char *s = procfile_lineword(ff, i, 0); - uint32_t hash = simple_hash(s); - - if (unlikely(hash == nr_periods_hash && !strcmp(s, "nr_periods"))) { - cp->nr_periods = str2ull(procfile_lineword(ff, i, 1), NULL); - } else if (unlikely(hash == nr_throttled_hash && !strcmp(s, "nr_throttled"))) { - cp->nr_throttled = str2ull(procfile_lineword(ff, i, 1), NULL); - } else if (unlikely(hash == throttled_time_hash && !strcmp(s, "throttled_time"))) { - cp->throttled_time = str2ull(procfile_lineword(ff, i, 1), NULL); - } - } - cp->nr_throttled_perc = - calc_percentage(calc_delta(cp->nr_throttled, nr_throttled_last), calc_delta(cp->nr_periods, nr_periods_last)); - - cp->updated = 1; - - if (unlikely(cp->enabled == CONFIG_BOOLEAN_AUTO)) { - if (likely( - cp->nr_periods || cp->nr_throttled || cp->throttled_time || - netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) { - cp->enabled = CONFIG_BOOLEAN_YES; - } - } -} - -static inline void cgroup2_read_cpuacct_cpu_stat(struct cpuacct_stat *cp, struct cpuacct_cpu_throttling *cpt) { - static procfile *ff = NULL; - if (unlikely(!cp->filename)) { - return; - } - - ff = procfile_reopen(ff, cp->filename, NULL, CGROUP_PROCFILE_FLAG); - if (unlikely(!ff)) { - cp->updated = 0; - cgroups_check = 1; - return; - } - - ff = procfile_readall(ff); - if (unlikely(!ff)) { - cp->updated = 0; - cgroups_check = 1; - return; - } - - unsigned long lines = procfile_lines(ff); - - if (unlikely(lines < 3)) { - collector_error("CGROUP: file '%s' should have at least 3 lines.", cp->filename); - cp->updated = 0; - return; - } - - unsigned long long nr_periods_last = cpt->nr_periods; - unsigned long long nr_throttled_last = cpt->nr_throttled; - - for (unsigned long i = 0; i < lines; i++) { - char *s = procfile_lineword(ff, i, 0); - uint32_t hash = simple_hash(s); - - if (unlikely(hash == user_usec_hash && !strcmp(s, "user_usec"))) { - cp->user = str2ull(procfile_lineword(ff, i, 1), NULL); - } else if (unlikely(hash == system_usec_hash && !strcmp(s, "system_usec"))) { - cp->system = str2ull(procfile_lineword(ff, i, 1), NULL); - } else if (unlikely(hash == nr_periods_hash && !strcmp(s, "nr_periods"))) { - cpt->nr_periods = str2ull(procfile_lineword(ff, i, 1), NULL); - } else if (unlikely(hash == nr_throttled_hash && !strcmp(s, "nr_throttled"))) { - cpt->nr_throttled = str2ull(procfile_lineword(ff, i, 1), NULL); - } else if (unlikely(hash == throttled_usec_hash && !strcmp(s, "throttled_usec"))) { - cpt->throttled_time = str2ull(procfile_lineword(ff, i, 1), NULL) * 1000; // usec -> ns - } - } - cpt->nr_throttled_perc = - calc_percentage(calc_delta(cpt->nr_throttled, nr_throttled_last), calc_delta(cpt->nr_periods, nr_periods_last)); - - cp->updated = 1; - cpt->updated = 1; - - if (unlikely(cp->enabled == CONFIG_BOOLEAN_AUTO)) { - if (likely(cp->user || cp->system || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) { - cp->enabled = CONFIG_BOOLEAN_YES; - } - } - if (unlikely(cpt->enabled == CONFIG_BOOLEAN_AUTO)) { - if (likely( - cpt->nr_periods || cpt->nr_throttled || cpt->throttled_time || - netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) { - cpt->enabled = CONFIG_BOOLEAN_YES; - } - } -} - -static inline void cgroup_read_cpuacct_cpu_shares(struct cpuacct_cpu_shares *cp) { - if (unlikely(!cp->filename)) { - return; - } - - if (unlikely(read_single_number_file(cp->filename, &cp->shares))) { - cp->updated = 0; - cgroups_check = 1; - return; - } - - cp->updated = 1; - if (unlikely((cp->enabled == CONFIG_BOOLEAN_AUTO)) && - (cp->shares || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) { - cp->enabled = CONFIG_BOOLEAN_YES; - } -} - -static inline void cgroup_read_cpuacct_usage(struct cpuacct_usage *ca) { - static procfile *ff = NULL; - - if(likely(ca->filename)) { - ff = procfile_reopen(ff, ca->filename, NULL, CGROUP_PROCFILE_FLAG); - if(unlikely(!ff)) { - ca->updated = 0; - cgroups_check = 1; - return; - } - - ff = procfile_readall(ff); - if(unlikely(!ff)) { - ca->updated = 0; - cgroups_check = 1; - return; - } - - if(unlikely(procfile_lines(ff) < 1)) { - collector_error("CGROUP: file '%s' should have 1+ lines but has %zu.", ca->filename, procfile_lines(ff)); - ca->updated = 0; - return; - } - - unsigned long i = procfile_linewords(ff, 0); - if(unlikely(i == 0)) { - ca->updated = 0; - return; - } - - // we may have 1 more CPU reported - while(i > 0) { - char *s = procfile_lineword(ff, 0, i - 1); - if(!*s) i--; - else break; - } - - if(unlikely(i != ca->cpus)) { - freez(ca->cpu_percpu); - ca->cpu_percpu = mallocz(sizeof(unsigned long long) * i); - ca->cpus = (unsigned int)i; - } - - unsigned long long total = 0; - for(i = 0; i < ca->cpus ;i++) { - unsigned long long n = str2ull(procfile_lineword(ff, 0, i), NULL); - ca->cpu_percpu[i] = n; - total += n; - } - - ca->updated = 1; - - if(unlikely(ca->enabled == CONFIG_BOOLEAN_AUTO && - (total || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) - ca->enabled = CONFIG_BOOLEAN_YES; - } -} - -static inline void cgroup_read_blkio(struct blkio *io) { - if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO && io->delay_counter > 0)) { - io->delay_counter--; - return; - } - - if(likely(io->filename)) { - static procfile *ff = NULL; - - ff = procfile_reopen(ff, io->filename, NULL, CGROUP_PROCFILE_FLAG); - if(unlikely(!ff)) { - io->updated = 0; - cgroups_check = 1; - return; - } - - ff = procfile_readall(ff); - if(unlikely(!ff)) { - io->updated = 0; - cgroups_check = 1; - return; - } - - unsigned long i, lines = procfile_lines(ff); - - if(unlikely(lines < 1)) { - collector_error("CGROUP: file '%s' should have 1+ lines.", io->filename); - io->updated = 0; - return; - } - - io->Read = 0; - io->Write = 0; -/* - io->Sync = 0; - io->Async = 0; - io->Total = 0; -*/ - - for(i = 0; i < lines ; i++) { - char *s = procfile_lineword(ff, i, 1); - uint32_t hash = simple_hash(s); - - if(unlikely(hash == Read_hash && !strcmp(s, "Read"))) - io->Read += str2ull(procfile_lineword(ff, i, 2), NULL); - - else if(unlikely(hash == Write_hash && !strcmp(s, "Write"))) - io->Write += str2ull(procfile_lineword(ff, i, 2), NULL); - -/* - else if(unlikely(hash == Sync_hash && !strcmp(s, "Sync"))) - io->Sync += str2ull(procfile_lineword(ff, i, 2)); - - else if(unlikely(hash == Async_hash && !strcmp(s, "Async"))) - io->Async += str2ull(procfile_lineword(ff, i, 2)); - - else if(unlikely(hash == Total_hash && !strcmp(s, "Total"))) - io->Total += str2ull(procfile_lineword(ff, i, 2)); -*/ - } - - io->updated = 1; - - if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO)) { - if(unlikely(io->Read || io->Write || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) - io->enabled = CONFIG_BOOLEAN_YES; - else - io->delay_counter = cgroup_recheck_zero_blkio_every_iterations; - } - } -} - -static inline void cgroup2_read_blkio(struct blkio *io, unsigned int word_offset) { - if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO && io->delay_counter > 0)) { - io->delay_counter--; - return; - } - - if(likely(io->filename)) { - static procfile *ff = NULL; - - ff = procfile_reopen(ff, io->filename, NULL, CGROUP_PROCFILE_FLAG); - if(unlikely(!ff)) { - io->updated = 0; - cgroups_check = 1; - return; - } - - ff = procfile_readall(ff); - if(unlikely(!ff)) { - io->updated = 0; - cgroups_check = 1; - return; - } - - unsigned long i, lines = procfile_lines(ff); - - if (unlikely(lines < 1)) { - collector_error("CGROUP: file '%s' should have 1+ lines.", io->filename); - io->updated = 0; - return; - } - - io->Read = 0; - io->Write = 0; - - for (i = 0; i < lines; i++) { - io->Read += str2ull(procfile_lineword(ff, i, 2 + word_offset), NULL); - io->Write += str2ull(procfile_lineword(ff, i, 4 + word_offset), NULL); - } - - io->updated = 1; - - if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO)) { - if(unlikely(io->Read || io->Write || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) - io->enabled = CONFIG_BOOLEAN_YES; - else - io->delay_counter = cgroup_recheck_zero_blkio_every_iterations; - } - } -} - -static inline void cgroup2_read_pressure(struct pressure *res) { - static procfile *ff = NULL; - - if (likely(res->filename)) { - ff = procfile_reopen(ff, res->filename, " =", CGROUP_PROCFILE_FLAG); - if (unlikely(!ff)) { - res->updated = 0; - cgroups_check = 1; - return; - } - - ff = procfile_readall(ff); - if (unlikely(!ff)) { - res->updated = 0; - cgroups_check = 1; - return; - } - - size_t lines = procfile_lines(ff); - if (lines < 1) { - collector_error("CGROUP: file '%s' should have 1+ lines.", res->filename); - res->updated = 0; - return; - } - - bool did_some = false, did_full = false; - - for(size_t l = 0; l < lines ;l++) { - const char *key = procfile_lineword(ff, l, 0); - if(strcmp(key, "some") == 0) { - res->some.share_time.value10 = strtod(procfile_lineword(ff, l, 2), NULL); - res->some.share_time.value60 = strtod(procfile_lineword(ff, l, 4), NULL); - res->some.share_time.value300 = strtod(procfile_lineword(ff, l, 6), NULL); - res->some.total_time.value_total = str2ull(procfile_lineword(ff, l, 8), NULL) / 1000; // us->ms - did_some = true; - } - else if(strcmp(key, "full") == 0) { - res->full.share_time.value10 = strtod(procfile_lineword(ff, l, 2), NULL); - res->full.share_time.value60 = strtod(procfile_lineword(ff, l, 4), NULL); - res->full.share_time.value300 = strtod(procfile_lineword(ff, l, 6), NULL); - res->full.total_time.value_total = str2ull(procfile_lineword(ff, l, 8), NULL) / 1000; // us->ms - did_full = true; - } - } - - res->updated = (did_full || did_some) ? 1 : 0; - - if(unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) - res->some.enabled = (did_some) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO; - - if(unlikely(res->full.enabled == CONFIG_BOOLEAN_AUTO)) - res->full.enabled = (did_full) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO; - } -} - -static inline void cgroup_read_memory(struct memory *mem, char parent_cg_is_unified) { - static procfile *ff = NULL; - - // read detailed ram usage - if(likely(mem->filename_detailed)) { - if(unlikely(mem->enabled_detailed == CONFIG_BOOLEAN_AUTO && mem->delay_counter_detailed > 0)) { - mem->delay_counter_detailed--; - goto memory_next; - } - - ff = procfile_reopen(ff, mem->filename_detailed, NULL, CGROUP_PROCFILE_FLAG); - if(unlikely(!ff)) { - mem->updated_detailed = 0; - cgroups_check = 1; - goto memory_next; - } - - ff = procfile_readall(ff); - if(unlikely(!ff)) { - mem->updated_detailed = 0; - cgroups_check = 1; - goto memory_next; - } - - unsigned long i, lines = procfile_lines(ff); - - if(unlikely(lines < 1)) { - collector_error("CGROUP: file '%s' should have 1+ lines.", mem->filename_detailed); - mem->updated_detailed = 0; - goto memory_next; - } - - - if(unlikely(!mem->arl_base)) { - if(parent_cg_is_unified == 0){ - mem->arl_base = arl_create("cgroup/memory", NULL, 60); - - arl_expect(mem->arl_base, "total_cache", &mem->total_cache); - arl_expect(mem->arl_base, "total_rss", &mem->total_rss); - arl_expect(mem->arl_base, "total_rss_huge", &mem->total_rss_huge); - arl_expect(mem->arl_base, "total_mapped_file", &mem->total_mapped_file); - arl_expect(mem->arl_base, "total_writeback", &mem->total_writeback); - mem->arl_dirty = arl_expect(mem->arl_base, "total_dirty", &mem->total_dirty); - mem->arl_swap = arl_expect(mem->arl_base, "total_swap", &mem->total_swap); - arl_expect(mem->arl_base, "total_pgpgin", &mem->total_pgpgin); - arl_expect(mem->arl_base, "total_pgpgout", &mem->total_pgpgout); - arl_expect(mem->arl_base, "total_pgfault", &mem->total_pgfault); - arl_expect(mem->arl_base, "total_pgmajfault", &mem->total_pgmajfault); - arl_expect(mem->arl_base, "total_inactive_file", &mem->total_inactive_file); - } else { - mem->arl_base = arl_create("cgroup/memory", NULL, 60); - - arl_expect(mem->arl_base, "anon", &mem->anon); - arl_expect(mem->arl_base, "kernel_stack", &mem->kernel_stack); - arl_expect(mem->arl_base, "slab", &mem->slab); - arl_expect(mem->arl_base, "sock", &mem->sock); - arl_expect(mem->arl_base, "anon_thp", &mem->anon_thp); - arl_expect(mem->arl_base, "file", &mem->total_mapped_file); - arl_expect(mem->arl_base, "file_writeback", &mem->total_writeback); - mem->arl_dirty = arl_expect(mem->arl_base, "file_dirty", &mem->total_dirty); - arl_expect(mem->arl_base, "pgfault", &mem->total_pgfault); - arl_expect(mem->arl_base, "pgmajfault", &mem->total_pgmajfault); - arl_expect(mem->arl_base, "inactive_file", &mem->total_inactive_file); - } - } - - arl_begin(mem->arl_base); - - for(i = 0; i < lines ; i++) { - if(arl_check(mem->arl_base, - procfile_lineword(ff, i, 0), - procfile_lineword(ff, i, 1))) break; - } - - if(unlikely(mem->arl_dirty->flags & ARL_ENTRY_FLAG_FOUND)) - mem->detailed_has_dirty = 1; - - if(unlikely(parent_cg_is_unified == 0 && mem->arl_swap->flags & ARL_ENTRY_FLAG_FOUND)) - mem->detailed_has_swap = 1; - - // fprintf(stderr, "READ: '%s', cache: %llu, rss: %llu, rss_huge: %llu, mapped_file: %llu, writeback: %llu, dirty: %llu, swap: %llu, pgpgin: %llu, pgpgout: %llu, pgfault: %llu, pgmajfault: %llu, inactive_anon: %llu, active_anon: %llu, inactive_file: %llu, active_file: %llu, unevictable: %llu, hierarchical_memory_limit: %llu, total_cache: %llu, total_rss: %llu, total_rss_huge: %llu, total_mapped_file: %llu, total_writeback: %llu, total_dirty: %llu, total_swap: %llu, total_pgpgin: %llu, total_pgpgout: %llu, total_pgfault: %llu, total_pgmajfault: %llu, total_inactive_anon: %llu, total_active_anon: %llu, total_inactive_file: %llu, total_active_file: %llu, total_unevictable: %llu\n", mem->filename, mem->cache, mem->rss, mem->rss_huge, mem->mapped_file, mem->writeback, mem->dirty, mem->swap, mem->pgpgin, mem->pgpgout, mem->pgfault, mem->pgmajfault, mem->inactive_anon, mem->active_anon, mem->inactive_file, mem->active_file, mem->unevictable, mem->hierarchical_memory_limit, mem->total_cache, mem->total_rss, mem->total_rss_huge, mem->total_mapped_file, mem->total_writeback, mem->total_dirty, mem->total_swap, mem->total_pgpgin, mem->total_pgpgout, mem->total_pgfault, mem->total_pgmajfault, mem->total_inactive_anon, mem->total_active_anon, mem->total_inactive_file, mem->total_active_file, mem->total_unevictable); - - mem->updated_detailed = 1; - - if(unlikely(mem->enabled_detailed == CONFIG_BOOLEAN_AUTO)) { - if(( (!parent_cg_is_unified) && ( mem->total_cache || mem->total_dirty || mem->total_rss || mem->total_rss_huge || mem->total_mapped_file || mem->total_writeback - || mem->total_swap || mem->total_pgpgin || mem->total_pgpgout || mem->total_pgfault || mem->total_pgmajfault || mem->total_inactive_file)) - || (parent_cg_is_unified && ( mem->anon || mem->total_dirty || mem->kernel_stack || mem->slab || mem->sock || mem->total_writeback - || mem->anon_thp || mem->total_pgfault || mem->total_pgmajfault || mem->total_inactive_file)) - || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES) - mem->enabled_detailed = CONFIG_BOOLEAN_YES; - else - mem->delay_counter_detailed = cgroup_recheck_zero_mem_detailed_every_iterations; - } - } - -memory_next: - - // read usage_in_bytes - if(likely(mem->filename_usage_in_bytes)) { - mem->updated_usage_in_bytes = !read_single_number_file(mem->filename_usage_in_bytes, &mem->usage_in_bytes); - if(unlikely(mem->updated_usage_in_bytes && mem->enabled_usage_in_bytes == CONFIG_BOOLEAN_AUTO && - (mem->usage_in_bytes || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) - mem->enabled_usage_in_bytes = CONFIG_BOOLEAN_YES; - } - - if (likely(mem->updated_usage_in_bytes && mem->updated_detailed)) { - mem->usage_in_bytes = - (mem->usage_in_bytes > mem->total_inactive_file) ? (mem->usage_in_bytes - mem->total_inactive_file) : 0; - } - - // read msw_usage_in_bytes - if(likely(mem->filename_msw_usage_in_bytes)) { - mem->updated_msw_usage_in_bytes = !read_single_number_file(mem->filename_msw_usage_in_bytes, &mem->msw_usage_in_bytes); - if(unlikely(mem->updated_msw_usage_in_bytes && mem->enabled_msw_usage_in_bytes == CONFIG_BOOLEAN_AUTO && - (mem->msw_usage_in_bytes || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) - mem->enabled_msw_usage_in_bytes = CONFIG_BOOLEAN_YES; - } - - // read failcnt - if(likely(mem->filename_failcnt)) { - if(unlikely(mem->enabled_failcnt == CONFIG_BOOLEAN_AUTO && mem->delay_counter_failcnt > 0)) { - mem->updated_failcnt = 0; - mem->delay_counter_failcnt--; - } - else { - mem->updated_failcnt = !read_single_number_file(mem->filename_failcnt, &mem->failcnt); - if(unlikely(mem->updated_failcnt && mem->enabled_failcnt == CONFIG_BOOLEAN_AUTO)) { - if(unlikely(mem->failcnt || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) - mem->enabled_failcnt = CONFIG_BOOLEAN_YES; - else - mem->delay_counter_failcnt = cgroup_recheck_zero_mem_failcnt_every_iterations; - } - } - } -} - -static void cgroup_read_pids_current(struct pids *pids) { - pids->pids_current_updated = 0; - - if (unlikely(!pids->pids_current_filename)) - return; - - pids->pids_current_updated = !read_single_number_file(pids->pids_current_filename, &pids->pids_current); -} - -static inline void read_cgroup(struct cgroup *cg) { - netdata_log_debug(D_CGROUP, "reading metrics for cgroups '%s'", cg->id); - if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - cgroup_read_cpuacct_stat(&cg->cpuacct_stat); - cgroup_read_cpuacct_usage(&cg->cpuacct_usage); - cgroup_read_cpuacct_cpu_stat(&cg->cpuacct_cpu_throttling); - cgroup_read_cpuacct_cpu_shares(&cg->cpuacct_cpu_shares); - cgroup_read_memory(&cg->memory, 0); - cgroup_read_blkio(&cg->io_service_bytes); - cgroup_read_blkio(&cg->io_serviced); - cgroup_read_blkio(&cg->throttle_io_service_bytes); - cgroup_read_blkio(&cg->throttle_io_serviced); - cgroup_read_blkio(&cg->io_merged); - cgroup_read_blkio(&cg->io_queued); - cgroup_read_pids_current(&cg->pids); - } - else { - //TODO: io_service_bytes and io_serviced use same file merge into 1 function - cgroup2_read_blkio(&cg->io_service_bytes, 0); - cgroup2_read_blkio(&cg->io_serviced, 4); - cgroup2_read_cpuacct_cpu_stat(&cg->cpuacct_stat, &cg->cpuacct_cpu_throttling); - cgroup_read_cpuacct_cpu_shares(&cg->cpuacct_cpu_shares); - cgroup2_read_pressure(&cg->cpu_pressure); - cgroup2_read_pressure(&cg->io_pressure); - cgroup2_read_pressure(&cg->memory_pressure); - cgroup2_read_pressure(&cg->irq_pressure); - cgroup_read_memory(&cg->memory, 1); - cgroup_read_pids_current(&cg->pids); - } -} - -static inline void read_all_discovered_cgroups(struct cgroup *root) { - netdata_log_debug(D_CGROUP, "reading metrics for all cgroups"); - - struct cgroup *cg; - for (cg = root; cg; cg = cg->next) { - if (cg->enabled && !cg->pending_renames) { - read_cgroup(cg); - } - } -} - -// update CPU and memory limits - -static inline void update_cpu_limits(char **filename, unsigned long long *value, struct cgroup *cg) { - if(*filename) { - int ret = -1; - - if(value == &cg->cpuset_cpus) { - unsigned long ncpus = read_cpuset_cpus(*filename, get_system_cpus()); - if(ncpus) { - *value = ncpus; - ret = 0; - } - } - else if(value == &cg->cpu_cfs_period || value == &cg->cpu_cfs_quota) { - ret = read_single_number_file(*filename, value); - } - else ret = -1; - - if(ret) { - collector_error("Cannot refresh cgroup %s cpu limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename); - freez(*filename); - *filename = NULL; - } - } -} - -static inline void update_cpu_limits2(struct cgroup *cg) { - if(cg->filename_cpu_cfs_quota){ - static procfile *ff = NULL; - - ff = procfile_reopen(ff, cg->filename_cpu_cfs_quota, NULL, CGROUP_PROCFILE_FLAG); - if(unlikely(!ff)) { - goto cpu_limits2_err; - } - - ff = procfile_readall(ff); - if(unlikely(!ff)) { - goto cpu_limits2_err; - } - - unsigned long lines = procfile_lines(ff); - - if (unlikely(lines < 1)) { - collector_error("CGROUP: file '%s' should have 1 lines.", cg->filename_cpu_cfs_quota); - return; - } - - cg->cpu_cfs_period = str2ull(procfile_lineword(ff, 0, 1), NULL); - cg->cpuset_cpus = get_system_cpus(); - - char *s = "max\n\0"; - if(strcmp(s, procfile_lineword(ff, 0, 0)) == 0){ - cg->cpu_cfs_quota = cg->cpu_cfs_period * cg->cpuset_cpus; - } else { - cg->cpu_cfs_quota = str2ull(procfile_lineword(ff, 0, 0), NULL); - } - netdata_log_debug(D_CGROUP, "CPU limits values: %llu %llu %llu", cg->cpu_cfs_period, cg->cpuset_cpus, cg->cpu_cfs_quota); - return; - -cpu_limits2_err: - collector_error("Cannot refresh cgroup %s cpu limit by reading '%s'. Will not update its limit anymore.", cg->id, cg->filename_cpu_cfs_quota); - freez(cg->filename_cpu_cfs_quota); - cg->filename_cpu_cfs_quota = NULL; - - } -} - -static inline int update_memory_limits(struct cgroup *cg) { - char **filename = &cg->filename_memory_limit; - const RRDSETVAR_ACQUIRED **chart_var = &cg->chart_var_memory_limit; - unsigned long long *value = &cg->memory_limit; - - if(*filename) { - if(unlikely(!*chart_var)) { - *chart_var = rrdsetvar_custom_chart_variable_add_and_acquire(cg->st_mem_usage, "memory_limit"); - if(!*chart_var) { - collector_error("Cannot create cgroup %s chart variable '%s'. Will not update its limit anymore.", cg->id, "memory_limit"); - freez(*filename); - *filename = NULL; - } - } - - if(*filename && *chart_var) { - if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - if(read_single_number_file(*filename, value)) { - collector_error("Cannot refresh cgroup %s memory limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename); - freez(*filename); - *filename = NULL; - } - else { - rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0)); - return 1; - } - } else { - char buffer[30 + 1]; - int ret = read_file(*filename, buffer, 30); - if(ret) { - collector_error("Cannot refresh cgroup %s memory limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename); - freez(*filename); - *filename = NULL; - return 0; - } - char *s = "max\n\0"; - if(strcmp(s, buffer) == 0){ - *value = UINT64_MAX; - rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0)); - return 1; - } - *value = str2ull(buffer, NULL); - rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0)); - return 1; - } - } - } - return 0; -} - -// ---------------------------------------------------------------------------- -// generate charts - -void update_cgroup_systemd_services_charts() { - for (struct cgroup *cg = cgroup_root; cg; cg = cg->next) { - if (unlikely(!cg->enabled || cg->pending_renames || !is_cgroup_systemd_service(cg))) - continue; - - if (likely(cg->cpuacct_stat.updated)) { - update_cpu_utilization_chart(cg); - } - if (likely(cg->memory.updated_msw_usage_in_bytes)) { - update_mem_usage_chart(cg); - } - if (likely(cg->memory.updated_failcnt)) { - update_mem_failcnt_chart(cg); - } - if (likely(cg->memory.updated_detailed)) { - update_mem_usage_detailed_chart(cg); - update_mem_writeback_chart(cg); - update_mem_pgfaults_chart(cg); - if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - update_mem_activity_chart(cg); - } - } - if (likely(cg->io_service_bytes.updated)) { - update_io_serviced_bytes_chart(cg); - } - if (likely(cg->io_serviced.updated)) { - update_io_serviced_ops_chart(cg); - } - if (likely(cg->throttle_io_service_bytes.updated)) { - update_throttle_io_serviced_bytes_chart(cg); - } - if (likely(cg->throttle_io_serviced.updated)) { - update_throttle_io_serviced_ops_chart(cg); - } - if (likely(cg->io_queued.updated)) { - update_io_queued_ops_chart(cg); - } - if (likely(cg->io_merged.updated)) { - update_io_merged_ops_chart(cg); - } - - if (likely(cg->pids.pids_current_updated)) { - update_pids_current_chart(cg); - } - - cg->function_ready = true; - } -} - -void update_cgroup_charts() { - for (struct cgroup *cg = cgroup_root; cg; cg = cg->next) { - if(unlikely(!cg->enabled || cg->pending_renames || is_cgroup_systemd_service(cg))) - continue; - - if (likely(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES)) { - update_cpu_utilization_chart(cg); - - if(likely(cg->filename_cpuset_cpus || cg->filename_cpu_cfs_period || cg->filename_cpu_cfs_quota)) { - if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - update_cpu_limits(&cg->filename_cpuset_cpus, &cg->cpuset_cpus, cg); - update_cpu_limits(&cg->filename_cpu_cfs_period, &cg->cpu_cfs_period, cg); - update_cpu_limits(&cg->filename_cpu_cfs_quota, &cg->cpu_cfs_quota, cg); - } else { - update_cpu_limits2(cg); - } - - if(unlikely(!cg->chart_var_cpu_limit)) { - cg->chart_var_cpu_limit = rrdsetvar_custom_chart_variable_add_and_acquire(cg->st_cpu, "cpu_limit"); - if(!cg->chart_var_cpu_limit) { - collector_error("Cannot create cgroup %s chart variable 'cpu_limit'. Will not update its limit anymore.", cg->id); - if(cg->filename_cpuset_cpus) freez(cg->filename_cpuset_cpus); - cg->filename_cpuset_cpus = NULL; - if(cg->filename_cpu_cfs_period) freez(cg->filename_cpu_cfs_period); - cg->filename_cpu_cfs_period = NULL; - if(cg->filename_cpu_cfs_quota) freez(cg->filename_cpu_cfs_quota); - cg->filename_cpu_cfs_quota = NULL; - } - } else { - NETDATA_DOUBLE value = 0, quota = 0; - - if(likely( ((!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) && (cg->filename_cpuset_cpus || (cg->filename_cpu_cfs_period && cg->filename_cpu_cfs_quota))) - || ((cg->options & CGROUP_OPTIONS_IS_UNIFIED) && cg->filename_cpu_cfs_quota))) { - if(unlikely(cg->cpu_cfs_quota > 0)) - quota = (NETDATA_DOUBLE)cg->cpu_cfs_quota / (NETDATA_DOUBLE)cg->cpu_cfs_period; - - if(unlikely(quota > 0 && quota < cg->cpuset_cpus)) - value = quota * 100; - else - value = (NETDATA_DOUBLE)cg->cpuset_cpus * 100; - } - if(likely(value)) { - update_cpu_utilization_limit_chart(cg, value); - } else { - if (unlikely(cg->st_cpu_limit)) { - rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_limit); - cg->st_cpu_limit = NULL; - } - rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, NAN); - } - } - } - } - - if (likely(cg->cpuacct_cpu_throttling.updated && cg->cpuacct_cpu_throttling.enabled == CONFIG_BOOLEAN_YES)) { - update_cpu_throttled_chart(cg); - update_cpu_throttled_duration_chart(cg); - } - - if (likely(cg->cpuacct_cpu_shares.updated && cg->cpuacct_cpu_shares.enabled == CONFIG_BOOLEAN_YES)) { - update_cpu_shares_chart(cg); - } - - if (likely(cg->cpuacct_usage.updated && cg->cpuacct_usage.enabled == CONFIG_BOOLEAN_YES)) { - update_cpu_per_core_usage_chart(cg); - } - - if (likely(cg->memory.updated_detailed && cg->memory.enabled_detailed == CONFIG_BOOLEAN_YES)) { - update_mem_usage_detailed_chart(cg); - update_mem_writeback_chart(cg); - - if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - update_mem_activity_chart(cg); - } - - update_mem_pgfaults_chart(cg); - } - - if (likely(cg->memory.updated_usage_in_bytes && cg->memory.enabled_usage_in_bytes == CONFIG_BOOLEAN_YES)) { - update_mem_usage_chart(cg); - - // FIXME: this if should be only for unlimited charts - if(likely(host_ram_total)) { - // FIXME: do we need to update mem limits on every data collection? - if (likely(update_memory_limits(cg))) { - - unsigned long long memory_limit = host_ram_total; - if (unlikely(cg->memory_limit < host_ram_total)) - memory_limit = cg->memory_limit; - - update_mem_usage_limit_chart(cg, memory_limit); - update_mem_utilization_chart(cg, memory_limit); - } else { - if (unlikely(cg->st_mem_usage_limit)) { - rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_usage_limit); - cg->st_mem_usage_limit = NULL; - } - - if (unlikely(cg->st_mem_utilization)) { - rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_utilization); - cg->st_mem_utilization = NULL; - } - } - } - } - - if (likely(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES)) { - update_mem_failcnt_chart(cg); - } - - if (likely(cg->io_service_bytes.updated && cg->io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) { - update_io_serviced_bytes_chart(cg); - } - - if (likely(cg->io_serviced.updated && cg->io_serviced.enabled == CONFIG_BOOLEAN_YES)) { - update_io_serviced_ops_chart(cg); - } - - if (likely(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) { - update_throttle_io_serviced_bytes_chart(cg); - } - - if (likely(cg->throttle_io_serviced.updated && cg->throttle_io_serviced.enabled == CONFIG_BOOLEAN_YES)) { - update_throttle_io_serviced_ops_chart(cg); - } - - if (likely(cg->io_queued.updated && cg->io_queued.enabled == CONFIG_BOOLEAN_YES)) { - update_io_queued_ops_chart(cg); - } - - if (likely(cg->io_merged.updated && cg->io_merged.enabled == CONFIG_BOOLEAN_YES)) { - update_io_merged_ops_chart(cg); - } - - if (likely(cg->pids.pids_current_updated)) { - update_pids_current_chart(cg); - } - - if (cg->options & CGROUP_OPTIONS_IS_UNIFIED) { - if (likely(cg->cpu_pressure.updated)) { - if (cg->cpu_pressure.some.enabled) { - update_cpu_some_pressure_chart(cg); - update_cpu_some_pressure_stall_time_chart(cg); - } - if (cg->cpu_pressure.full.enabled) { - update_cpu_full_pressure_chart(cg); - update_cpu_full_pressure_stall_time_chart(cg); - } - } - - if (likely(cg->memory_pressure.updated)) { - if (cg->memory_pressure.some.enabled) { - update_mem_some_pressure_chart(cg); - update_mem_some_pressure_stall_time_chart(cg); - } - if (cg->memory_pressure.full.enabled) { - update_mem_full_pressure_chart(cg); - update_mem_full_pressure_stall_time_chart(cg); - } - } - - if (likely(cg->irq_pressure.updated)) { - if (cg->irq_pressure.some.enabled) { - update_irq_some_pressure_chart(cg); - update_irq_some_pressure_stall_time_chart(cg); - } - if (cg->irq_pressure.full.enabled) { - update_irq_full_pressure_chart(cg); - update_irq_full_pressure_stall_time_chart(cg); - } - } - - if (likely(cg->io_pressure.updated)) { - if (cg->io_pressure.some.enabled) { - update_io_some_pressure_chart(cg); - update_io_some_pressure_stall_time_chart(cg); - } - if (cg->io_pressure.full.enabled) { - update_io_full_pressure_chart(cg); - update_io_full_pressure_stall_time_chart(cg); - } - } - } - - cg->function_ready = true; - } -} - -// ---------------------------------------------------------------------------- -// cgroups main - -static void cgroup_main_cleanup(void *ptr) { - worker_unregister(); - - struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; - static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; - - collector_info("cleaning up..."); - - usec_t max = 2 * USEC_PER_SEC, step = 50000; - - if (!__atomic_load_n(&discovery_thread.exited, __ATOMIC_RELAXED)) { - collector_info("waiting for discovery thread to finish..."); - while (!__atomic_load_n(&discovery_thread.exited, __ATOMIC_RELAXED) && max > 0) { - uv_mutex_lock(&discovery_thread.mutex); - uv_cond_signal(&discovery_thread.cond_var); - uv_mutex_unlock(&discovery_thread.mutex); - max -= step; - sleep_usec(step); - } - } - - if (shm_mutex_cgroup_ebpf != SEM_FAILED) { - sem_close(shm_mutex_cgroup_ebpf); - } - - if (shm_cgroup_ebpf.header) { - shm_cgroup_ebpf.header->cgroup_root_count = 0; - munmap(shm_cgroup_ebpf.header, shm_cgroup_ebpf.header->body_length); - } - - if (shm_fd_cgroup_ebpf > 0) { - close(shm_fd_cgroup_ebpf); - } - - static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; -} - -void cgroup_read_host_total_ram() { - procfile *ff = NULL; - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/meminfo"); - - ff = procfile_open( - config_get("plugin:cgroups", "meminfo filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); - - if (likely((ff = procfile_readall(ff)) && procfile_lines(ff) && !strncmp(procfile_word(ff, 0), "MemTotal", 8))) - host_ram_total = str2ull(procfile_word(ff, 1), NULL) * 1024; - else - collector_error("Cannot read file %s. Will not create RAM limit charts.", filename); - - procfile_close(ff); -} - -void *cgroups_main(void *ptr) { - worker_register("CGROUPS"); - worker_register_job_name(WORKER_CGROUPS_LOCK, "lock"); - worker_register_job_name(WORKER_CGROUPS_READ, "read"); - worker_register_job_name(WORKER_CGROUPS_CHART, "chart"); - - netdata_thread_cleanup_push(cgroup_main_cleanup, ptr); - - if (getenv("KUBERNETES_SERVICE_HOST") != NULL && getenv("KUBERNETES_SERVICE_PORT") != NULL) { - is_inside_k8s = 1; - cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_YES; - } - - read_cgroup_plugin_configuration(); - - cgroup_read_host_total_ram(); - - netdata_cgroup_ebpf_initialize_shm(); - - if (uv_mutex_init(&cgroup_root_mutex)) { - collector_error("CGROUP: cannot initialize mutex for the main cgroup list"); - goto exit; - } - - discovery_thread.exited = 0; - - if (uv_mutex_init(&discovery_thread.mutex)) { - collector_error("CGROUP: cannot initialize mutex for discovery thread"); - goto exit; - } - if (uv_cond_init(&discovery_thread.cond_var)) { - collector_error("CGROUP: cannot initialize conditional variable for discovery thread"); - goto exit; - } - - int error = uv_thread_create(&discovery_thread.thread, cgroup_discovery_worker, NULL); - if (error) { - collector_error("CGROUP: cannot create thread worker. uv_thread_create(): %s", uv_strerror(error)); - goto exit; - } - - uv_thread_set_name_np(discovery_thread.thread, "P[cgroups]"); - - // we register this only on localhost - // for the other nodes, the origin server should register it - rrd_collector_started(); // this creates a collector that runs for as long as netdata runs - cgroup_netdev_link_init(); - rrd_function_add(localhost, NULL, "containers-vms", 10, RRDFUNCTIONS_CGTOP_HELP, true, cgroup_function_cgroup_top, NULL); - rrd_function_add(localhost, NULL, "systemd-services", 10, RRDFUNCTIONS_CGTOP_HELP, true, cgroup_function_systemd_top, NULL); - - heartbeat_t hb; - heartbeat_init(&hb); - usec_t step = cgroup_update_every * USEC_PER_SEC; - usec_t find_every = cgroup_check_for_new_every * USEC_PER_SEC, find_dt = 0; - - netdata_thread_disable_cancelability(); - - while(service_running(SERVICE_COLLECTORS)) { - worker_is_idle(); - - usec_t hb_dt = heartbeat_next(&hb, step); - if (unlikely(!service_running(SERVICE_COLLECTORS))) - break; - - find_dt += hb_dt; - if (unlikely(find_dt >= find_every || (!is_inside_k8s && cgroups_check))) { - uv_mutex_lock(&discovery_thread.mutex); - uv_cond_signal(&discovery_thread.cond_var); - uv_mutex_unlock(&discovery_thread.mutex); - find_dt = 0; - cgroups_check = 0; - } - - worker_is_busy(WORKER_CGROUPS_LOCK); - uv_mutex_lock(&cgroup_root_mutex); - - worker_is_busy(WORKER_CGROUPS_READ); - read_all_discovered_cgroups(cgroup_root); - - if (unlikely(!service_running(SERVICE_COLLECTORS))) { - uv_mutex_unlock(&cgroup_root_mutex); - break; - } - - worker_is_busy(WORKER_CGROUPS_CHART); - - update_cgroup_charts(); - if (cgroup_enable_systemd_services) - update_cgroup_systemd_services_charts(); - - if (unlikely(!service_running(SERVICE_COLLECTORS))) { - uv_mutex_unlock(&cgroup_root_mutex); - break; - } - - worker_is_idle(); - uv_mutex_unlock(&cgroup_root_mutex); - } - -exit: - netdata_thread_cleanup_pop(1); - return NULL; -} diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.h b/collectors/cgroups.plugin/sys_fs_cgroup.h deleted file mode 100644 index e8cfcf5f6..000000000 --- a/collectors/cgroups.plugin/sys_fs_cgroup.h +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_SYS_FS_CGROUP_H -#define NETDATA_SYS_FS_CGROUP_H 1 - -#include "daemon/common.h" - -#define PLUGIN_CGROUPS_NAME "cgroups.plugin" -#define PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME "systemd" -#define PLUGIN_CGROUPS_MODULE_CGROUPS_NAME "/sys/fs/cgroup" - -#define CGROUP_OPTIONS_DISABLED_DUPLICATE 0x00000001 -#define CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE 0x00000002 -#define CGROUP_OPTIONS_IS_UNIFIED 0x00000004 - -typedef struct netdata_ebpf_cgroup_shm_header { - int cgroup_root_count; - int cgroup_max; - int systemd_enabled; - int __pad; - size_t body_length; -} netdata_ebpf_cgroup_shm_header_t; - -#define CGROUP_EBPF_NAME_SHARED_LENGTH 256 - -typedef struct netdata_ebpf_cgroup_shm_body { - // Considering what is exposed in this link https://en.wikipedia.org/wiki/Comparison_of_file_systems#Limits - // this length is enough to store what we want. - char name[CGROUP_EBPF_NAME_SHARED_LENGTH]; - uint32_t hash; - uint32_t options; - int enabled; - char path[FILENAME_MAX + 1]; -} netdata_ebpf_cgroup_shm_body_t; - -typedef struct netdata_ebpf_cgroup_shm { - netdata_ebpf_cgroup_shm_header_t *header; - netdata_ebpf_cgroup_shm_body_t *body; -} netdata_ebpf_cgroup_shm_t; - -#define NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME "netdata_shm_cgroup_ebpf" -#define NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME "/netdata_sem_cgroup_ebpf" - -#include "../proc.plugin/plugin_proc.h" - -char *cgroup_parse_resolved_name_and_labels(RRDLABELS *labels, char *data); - -#endif //NETDATA_SYS_FS_CGROUP_H diff --git a/collectors/cgroups.plugin/tests/test_cgroups_plugin.c b/collectors/cgroups.plugin/tests/test_cgroups_plugin.c deleted file mode 100644 index bb1fb3988..000000000 --- a/collectors/cgroups.plugin/tests/test_cgroups_plugin.c +++ /dev/null @@ -1,130 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "test_cgroups_plugin.h" -#include "libnetdata/required_dummies.h" - -RRDHOST *localhost; -int netdata_zero_metrics_enabled = 1; -struct config netdata_config; -char *netdata_configured_primary_plugins_dir = NULL; - -struct k8s_test_data { - char *data; - char *name; - char *key[3]; - char *value[3]; - - const char *result_key[3]; - const char *result_value[3]; - int result_ls[3]; - int i; -}; - -static int read_label_callback(const char *name, const char *value, void *data) -{ - struct k8s_test_data *test_data = (struct k8s_test_data *)data; - - test_data->result_key[test_data->i] = name; - test_data->result_value[test_data->i] = value; - - test_data->i++; - - return 1; -} - -static void test_cgroup_parse_resolved_name(void **state) -{ - UNUSED(state); - - RRDLABELS *labels = rrdlabels_create(); - - struct k8s_test_data test_data[] = { - // One label - { .data = "name label1=\"value1\"", - .name = "name", - .key[0] = "label1", .value[0] = "value1" }, - - // Three labels - { .data = "name label1=\"value1\",label2=\"value2\",label3=\"value3\"", - .name = "name", - .key[0] = "label1", .value[0] = "value1", - .key[1] = "label2", .value[1] = "value2", - .key[2] = "label3", .value[2] = "value3" }, - - // Comma at the end of the data string - { .data = "name label1=\"value1\",", - .name = "name", - .key[0] = "label1", .value[0] = "value1" }, - - // Equals sign in the value - // { .data = "name label1=\"value=1\"", - // .name = "name", - // .key[0] = "label1", .value[0] = "value=1" }, - - // Double quotation mark in the value - // { .data = "name label1=\"value\"1\"", - // .name = "name", - // .key[0] = "label1", .value[0] = "value" }, - - // Escaped double quotation mark in the value - // { .data = "name label1=\"value\\\"1\"", - // .name = "name", - // .key[0] = "label1", .value[0] = "value\\\"1" }, - - // Equals sign in the key - // { .data = "name label=1=\"value1\"", - // .name = "name", - // .key[0] = "label", .value[0] = "1=\"value1\"" }, - - // Skipped value - // { .data = "name label1=,label2=\"value2\"", - // .name = "name", - // .key[0] = "label2", .value[0] = "value2" }, - - // A pair of equals signs - { .data = "name= =", - .name = "name=" }, - - // A pair of commas - { .data = "name, ,", - .name = "name," }, - - { .data = NULL } - }; - - for (int i = 0; test_data[i].data != NULL; i++) { - char *data = strdup(test_data[i].data); - - char *name = cgroup_parse_resolved_name_and_labels(labels, data); - - assert_string_equal(name, test_data[i].name); - - rrdlabels_walkthrough_read(labels, read_label_callback, &test_data[i]); - - for (int l = 0; l < 3 && test_data[i].key[l] != NULL; l++) { - char *key = test_data[i].key[l]; - char *value = test_data[i].value[l]; - - const char *result_key = test_data[i].result_key[l]; - const char *result_value = test_data[i].result_value[l]; - int ls = test_data[i].result_ls[l]; - - assert_string_equal(key, result_key); - assert_string_equal(value, result_value); - assert_int_equal(RRDLABEL_SRC_AUTO | RRDLABEL_SRC_K8S, ls); - } - - free(data); - } -} - -int main(void) -{ - const struct CMUnitTest tests[] = { - cmocka_unit_test(test_cgroup_parse_resolved_name), - }; - - int test_res = cmocka_run_group_tests_name("test_cgroup_parse_resolved_name", tests, NULL, NULL); - - return test_res; -} diff --git a/collectors/cgroups.plugin/tests/test_cgroups_plugin.h b/collectors/cgroups.plugin/tests/test_cgroups_plugin.h deleted file mode 100644 index 3d68e9230..000000000 --- a/collectors/cgroups.plugin/tests/test_cgroups_plugin.h +++ /dev/null @@ -1,16 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef TEST_CGROUPS_PLUGIN_H -#define TEST_CGROUPS_PLUGIN_H 1 - -#include "libnetdata/libnetdata.h" - -#include "../sys_fs_cgroup.h" - -#include <stdarg.h> -#include <stddef.h> -#include <setjmp.h> -#include <stdint.h> -#include <cmocka.h> - -#endif /* TEST_CGROUPS_PLUGIN_H */ diff --git a/collectors/cgroups.plugin/tests/test_doubles.c b/collectors/cgroups.plugin/tests/test_doubles.c deleted file mode 100644 index b13d4b19c..000000000 --- a/collectors/cgroups.plugin/tests/test_doubles.c +++ /dev/null @@ -1,157 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "test_cgroups_plugin.h" - -void rrdset_is_obsolete___safe_from_collector_thread(RRDSET *st) -{ - UNUSED(st); -} - -void rrdset_isnot_obsolete___safe_from_collector_thread(RRDSET *st) -{ - UNUSED(st); -} - -struct mountinfo *mountinfo_read(int do_statvfs) -{ - UNUSED(do_statvfs); - - return NULL; -} - -struct mountinfo * -mountinfo_find_by_filesystem_mount_source(struct mountinfo *root, const char *filesystem, const char *mount_source) -{ - UNUSED(root); - UNUSED(filesystem); - UNUSED(mount_source); - - return NULL; -} - -struct mountinfo * -mountinfo_find_by_filesystem_super_option(struct mountinfo *root, const char *filesystem, const char *super_options) -{ - UNUSED(root); - UNUSED(filesystem); - UNUSED(super_options); - - return NULL; -} - -void mountinfo_free_all(struct mountinfo *mi) -{ - UNUSED(mi); -} - -RRDSET *rrdset_create_custom( - RRDHOST *host, const char *type, const char *id, const char *name, const char *family, const char *context, - const char *title, const char *units, const char *plugin, const char *module, long priority, int update_every, - RRDSET_TYPE chart_type, RRD_MEMORY_MODE memory_mode, long history_entries) -{ - UNUSED(host); - UNUSED(type); - UNUSED(id); - UNUSED(name); - UNUSED(family); - UNUSED(context); - UNUSED(title); - UNUSED(units); - UNUSED(plugin); - UNUSED(module); - UNUSED(priority); - UNUSED(update_every); - UNUSED(chart_type); - UNUSED(memory_mode); - UNUSED(history_entries); - - return NULL; -} - -RRDDIM *rrddim_add_custom( - RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divisor, - RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode) -{ - UNUSED(st); - UNUSED(id); - UNUSED(name); - UNUSED(multiplier); - UNUSED(divisor); - UNUSED(algorithm); - UNUSED(memory_mode); - - return NULL; -} - -collected_number rrddim_set(RRDSET *st, const char *id, collected_number value) -{ - UNUSED(st); - UNUSED(id); - UNUSED(value); - - return 0; -} - -collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number value) -{ - UNUSED(st); - UNUSED(rd); - UNUSED(value); - - return 0; -} - -const RRDSETVAR_ACQUIRED *rrdsetvar_custom_chart_variable_add_and_acquire(RRDSET *st, const char *name) -{ - UNUSED(st); - UNUSED(name); - - return NULL; -} - -void rrdsetvar_custom_chart_variable_set(RRDSET *st, const RRDSETVAR_ACQUIRED *rsa, NETDATA_DOUBLE value) -{ - UNUSED(st); - UNUSED(rsa); - UNUSED(value); -} - -void rrdset_next_usec(RRDSET *st, usec_t microseconds) -{ - UNUSED(st); - UNUSED(microseconds); -} - -void rrdset_done(RRDSET *st) -{ - UNUSED(st); -} - -void update_pressure_charts(struct pressure_charts *charts) -{ - UNUSED(charts); -} - -void netdev_rename_device_add( - const char *host_device, const char *container_device, const char *container_name, DICTIONARY *labels, const char *ctx_prefix) -{ - UNUSED(host_device); - UNUSED(container_device); - UNUSED(container_name); - UNUSED(labels); - UNUSED(ctx_prefix); -} - -void netdev_rename_device_del(const char *host_device) -{ - UNUSED(host_device); -} - -void rrdcalc_update_rrdlabels(RRDSET *st) { - (void)st; -} - -void db_execute(const char *cmd) -{ - UNUSED(cmd); -} |