diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-06-09 04:52:47 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-06-09 04:52:57 +0000 |
commit | 00151562145df50cc65e9902d52d5fa77f89fe50 (patch) | |
tree | 2737716802f6725a5074d606ec8fe5422c58a83c /collectors/cgroups.plugin | |
parent | Releasing debian version 1.34.1-1. (diff) | |
download | netdata-00151562145df50cc65e9902d52d5fa77f89fe50.tar.xz netdata-00151562145df50cc65e9902d52d5fa77f89fe50.zip |
Merging upstream version 1.35.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/cgroups.plugin')
-rw-r--r-- | collectors/cgroups.plugin/Makefile.am | 8 | ||||
-rw-r--r-- | collectors/cgroups.plugin/README.md | 287 | ||||
-rwxr-xr-x | collectors/cgroups.plugin/cgroup-name.sh (renamed from collectors/cgroups.plugin/cgroup-name.sh.in) | 265 | ||||
-rwxr-xr-x | collectors/cgroups.plugin/cgroup-network-helper.sh | 11 | ||||
-rw-r--r-- | collectors/cgroups.plugin/cgroup-network.c | 56 | ||||
-rw-r--r-- | collectors/cgroups.plugin/sys_fs_cgroup.c | 1385 | ||||
-rw-r--r-- | collectors/cgroups.plugin/sys_fs_cgroup.h | 2 | ||||
-rw-r--r-- | collectors/cgroups.plugin/tests/test_cgroups_plugin.c | 8 | ||||
-rw-r--r-- | collectors/cgroups.plugin/tests/test_doubles.c | 4 |
9 files changed, 1299 insertions, 727 deletions
diff --git a/collectors/cgroups.plugin/Makefile.am b/collectors/cgroups.plugin/Makefile.am index 9e924aba1..354b9fbdc 100644 --- a/collectors/cgroups.plugin/Makefile.am +++ b/collectors/cgroups.plugin/Makefile.am @@ -3,19 +3,11 @@ AUTOMAKE_OPTIONS = subdir-objects MAINTAINERCLEANFILES = $(srcdir)/Makefile.in -CLEANFILES = \ - cgroup-name.sh \ - $(NULL) - -include $(top_srcdir)/build/subst.inc -SUFFIXES = .in - dist_plugins_SCRIPTS = \ cgroup-name.sh \ cgroup-network-helper.sh \ $(NULL) dist_noinst_DATA = \ - cgroup-name.sh.in \ README.md \ $(NULL) diff --git a/collectors/cgroups.plugin/README.md b/collectors/cgroups.plugin/README.md index d74ef000e..d0f822e66 100644 --- a/collectors/cgroups.plugin/README.md +++ b/collectors/cgroups.plugin/README.md @@ -7,30 +7,28 @@ custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/cgrou You can monitor containers and virtual machines using **cgroups**. -cgroups (or control groups), are a Linux kernel feature that provides accounting and resource usage limiting for processes. When cgroups are bundled with namespaces (i.e. isolation), they form what we usually call **containers**. +cgroups (or control groups), are a Linux kernel feature that provides accounting and resource usage limiting for +processes. When cgroups are bundled with namespaces (i.e. isolation), they form what we usually call **containers**. -cgroups are hierarchical, meaning that cgroups can contain child cgroups, which can contain more cgroups, etc. All accounting is reported (and resource usage limits are applied) also in a hierarchical way. +cgroups are hierarchical, meaning that cgroups can contain child cgroups, which can contain more cgroups, etc. All +accounting is reported (and resource usage limits are applied) also in a hierarchical way. -To visualize cgroup metrics Netdata provides configuration for cherry picking the cgroups of interest. By default (without any configuration) Netdata should pick **systemd services**, all kinds of **containers** (lxc, docker, etc) and **virtual machines** spawn by managers that register them with cgroups (qemu, libvirt, etc). +To visualize cgroup metrics Netdata provides configuration for cherry picking the cgroups of interest. By default ( +without any configuration) Netdata should pick **systemd services**, all kinds of **containers** (lxc, docker, etc) +and **virtual machines** spawn by managers that register them with cgroups (qemu, libvirt, etc). -## configuring Netdata for cgroups +## Configuring Netdata for cgroups -For each cgroup available in the system, Netdata provides this configuration: - -``` -[plugin:cgroups] - enable cgroup XXX = yes | no -``` - -But it also provides a few patterns to provide a sane default (`yes` or `no`). - -Below we see, how this works. +In general, no additional settings are required. Netdata discovers all available cgroups on the host system and +collects their metrics. ### how Netdata finds the available cgroups -Linux exposes resource usage reporting and provides dynamic configuration for cgroups, using virtual files (usually) under `/sys/fs/cgroup`. Netdata reads `/proc/self/mountinfo` to detect the exact mount point of cgroups. Netdata also allows manual configuration of this mount point, using these settings: +Linux exposes resource usage reporting and provides dynamic configuration for cgroups, using virtual files (usually) +under `/sys/fs/cgroup`. Netdata reads `/proc/self/mountinfo` to detect the exact mount point of cgroups. Netdata also +allows manual configuration of this mount point, using these settings: -``` +```text [plugin:cgroups] check for new cgroups every = 10 path to /sys/fs/cgroup/cpuacct = /sys/fs/cgroup/cpuacct @@ -43,90 +41,104 @@ Netdata rescans these directories for added or removed cgroups every `check for ### hierarchical search for cgroups -Since cgroups are hierarchical, for each of the directories shown above, Netdata walks through the subdirectories recursively searching for cgroups (each subdirectory is another cgroup). +Since cgroups are hierarchical, for each of the directories shown above, Netdata walks through the subdirectories +recursively searching for cgroups (each subdirectory is another cgroup). -For each of the directories found, Netdata provides a configuration variable: +To provide a sane default for this setting, Netdata uses the following pattern list (patterns starting with `!` give a +negative match and their order is important: the first matching a path will be used): -``` -[plugin:cgroups] - search for cgroups under PATH = yes | no -``` - -To provide a sane default for this setting, Netdata uses the following pattern list (patterns starting with `!` give a negative match and their order is important: the first matching a path will be used): - -``` +```text [plugin:cgroups] search for cgroups in subpaths matching = !*/init.scope !*-qemu !/init.scope !/system !/systemd !/user !/user.slice * ``` -So, we disable checking for **child cgroups** in systemd internal cgroups ([systemd services are monitored by Netdata](#monitoring-systemd-services)), user cgroups (normally used for desktop and remote user sessions), qemu virtual machines (child cgroups of virtual machines) and `init.scope`. All others are enabled. +So, we disable checking for **child cgroups** in systemd internal +cgroups ([systemd services are monitored by Netdata](#monitoring-systemd-services)), user cgroups (normally used for +desktop and remote user sessions), qemu virtual machines (child cgroups of virtual machines) and `init.scope`. All +others are enabled. ### unified cgroups (cgroups v2) support -Basic unified cgroups metrics are supported. To use them instead of v1 cgroups add: +Netdata automatically detects cgroups version. If detection fails Netdata assumes v1. +To switch to v2 manually add: -``` +```text [plugin:cgroups] use unified cgroups = yes path to unified cgroups = /sys/fs/cgroup ``` -Unified cgroups use same name pattern matching as v1 cgroups. `cgroup_enable_systemd_services_detailed_memory` is currently unsupported when using unified cgroups. +Unified cgroups use same name pattern matching as v1 cgroups. `cgroup_enable_systemd_services_detailed_memory` is +currently unsupported when using unified cgroups. ### enabled cgroups -To check if the cgroup is enabled, Netdata uses this setting: - -``` -[plugin:cgroups] - enable cgroup NAME = yes | no -``` +To provide a sane default, Netdata uses the +following [pattern list](https://learn.netdata.cloud/docs/agent/libnetdata/simple_pattern): -To provide a sane default, Netdata uses the following pattern list (it checks the pattern against the path of the cgroup): +- checks the pattern against the path of the cgroup -``` -[plugin:cgroups] - enable by default cgroups matching = !*/init.scope *.scope !*/vcpu* !*/emulator !*.mount !*.partition !*.service !*.slice !*.swap !*.user !/ !/docker !/libvirt !/lxc !/lxc/*/ns !/lxc/*/ns/* !/machine !/qemu !/system !/systemd !/user * -``` + ```text + [plugin:cgroups] + enable by default cgroups matching = !*/init.scope *.scope !*/vcpu* !*/emulator !*.mount !*.partition !*.service !*.slice !*.swap !*.user !/ !/docker !/libvirt !/lxc !/lxc/*/ns !/lxc/*/ns/* !/machine !/qemu !/system !/systemd !/user * + ``` -The above provides the default `yes` or `no` setting for the cgroup. However, there is an additional step. In many cases the cgroups found in the `/sys/fs/cgroup` hierarchy are just random numbers and in many cases these numbers are ephemeral: they change across reboots or sessions. +- checks the pattern against the name of the cgroup (as you see it on the dashboard) -So, we need to somehow map the paths of the cgroups to names, to provide consistent Netdata configuration (i.e. there is no point to say `enable cgroup 1234 = yes | no`, if `1234` is a random number that changes over time - we need a name for the cgroup first, so that `enable cgroup NAME = yes | no` will be consistent). + ```text + [plugin:cgroups] + enable by default cgroups names matching = * + ``` -For this mapping Netdata provides 2 configuration options: +Renaming is configured with the following options: -``` +```text [plugin:cgroups] run script to rename cgroups matching = *.scope *docker* *lxc* *qemu* !/ !*.mount !*.partition !*.service !*.slice !*.swap !*.user * script to get cgroup names = /usr/libexec/netdata/plugins.d/cgroup-name.sh ``` -The whole point for the additional pattern list, is to limit the number of times the script will be called. Without this pattern list, the script might be called thousands of times, depending on the number of cgroups available in the system. +The whole point for the additional pattern list, is to limit the number of times the script will be called. Without this +pattern list, the script might be called thousands of times, depending on the number of cgroups available in the system. -The above pattern list is matched against the path of the cgroup. For matched cgroups, Netdata calls the script [cgroup-name.sh](https://raw.githubusercontent.com/netdata/netdata/master/collectors/cgroups.plugin/cgroup-name.sh.in) to get its name. This script queries `docker`, `kubectl`, `podman`, or applies heuristics to find give a name for the cgroup. +The above pattern list is matched against the path of the cgroup. For matched cgroups, Netdata calls the +script [cgroup-name.sh](https://raw.githubusercontent.com/netdata/netdata/master/collectors/cgroups.plugin/cgroup-name.sh) +to get its name. This script queries `docker`, `kubectl`, `podman`, or applies heuristics to find give a name for the +cgroup. #### Note on Podman container names -Podman's security model is a lot more restrictive than Docker's, so Netdata will not be able to detect container names out of the box unless they were started by the same user as Netdata itself. +Podman's security model is a lot more restrictive than Docker's, so Netdata will not be able to detect container names +out of the box unless they were started by the same user as Netdata itself. -If Podman is used in "rootful" mode, it's also possible to use `podman system service` to grant Netdata access to container names. To do this, ensure `podman system service` is running and Netdata has access to `/run/podman/podman.sock` (the default permissions as specified by upstream are `0600`, with owner `root`, so you will have to adjust the configuration). +If Podman is used in "rootful" mode, it's also possible to use `podman system service` to grant Netdata access to +container names. To do this, ensure `podman system service` is running and Netdata has access +to `/run/podman/podman.sock` (the default permissions as specified by upstream are `0600`, with owner `root`, so you +will have to adjust the configuration). -[docker-socket-proxy](https://github.com/Tecnativa/docker-socket-proxy) can also be used to give Netdata restricted access to the socket. Note that `PODMAN_HOST` in Netdata's environment should be set to the proxy's URL in this case. +[docker-socket-proxy](https://github.com/Tecnativa/docker-socket-proxy) can also be used to give Netdata restricted +access to the socket. Note that `PODMAN_HOST` in Netdata's environment should be set to the proxy's URL in this case. ### charts with zero metrics -By default, Netdata will enable monitoring metrics only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Set `yes` for a chart instead of `auto` to enable it permanently. For example: +By default, Netdata will enable monitoring metrics only when they are not zero. If they are constantly zero they are +ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be +automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Set `yes` for a +chart instead of `auto` to enable it permanently. For example: -``` +```text [plugin:cgroups] enable memory (used mem including cache) = yes ``` -You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics for all internal Netdata plugins. +You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero +metrics for all internal Netdata plugins. ### alarms -CPU and memory limits are watched and used to rise alarms. Memory usage for every cgroup is checked against `ram` and `ram+swap` limits. CPU usage for every cgroup is checked against `cpuset.cpus` and `cpu.cfs_period_us` + `cpu.cfs_quota_us` pair assigned for the cgroup. Configuration for the alarms is available in `health.d/cgroups.conf` file. +CPU and memory limits are watched and used to rise alarms. Memory usage for every cgroup is checked against `ram` +and `ram+swap` limits. CPU usage for every cgroup is checked against `cpuset.cpus` and `cpu.cfs_period_us` + `cpu.cfs_quota_us` pair assigned for the cgroup. Configuration for the alarms is available in `health.d/cgroups.conf` +file. ## Monitoring systemd services @@ -136,47 +148,48 @@ Netdata monitors **systemd services**. Example: Support per distribution: -|system|systemd services<br/>charts shown|`tree`<br/>`/sys/fs/cgroup`|comments| -|:----:|:-------------------------------:|:-------------------------:|:-------| -|Arch Linux|YES||| -|Gentoo|NO||can be enabled, see below| -|Ubuntu 16.04 LTS|YES||| -|Ubuntu 16.10|YES|[here](http://pastebin.com/PiWbQEXy)|| -|Fedora 25|YES|[here](http://pastebin.com/ax0373wF)|| -|Debian 8|NO||can be enabled, see below| -|AMI|NO|[here](http://pastebin.com/FrxmptjL)|not a systemd system| -|CentOS 7.3.1611|NO|[here](http://pastebin.com/SpzgezAg)|can be enabled, see below| +| system | charts shown | `/sys/fs/cgroup` tree | comments | +|:----------------:|:------------:|:------------------------------------:|:--------------------------| +| Arch Linux | YES | | | +| Gentoo | NO | | can be enabled, see below | +| Ubuntu 16.04 LTS | YES | | | +| Ubuntu 16.10 | YES | [here](http://pastebin.com/PiWbQEXy) | | +| Fedora 25 | YES | [here](http://pastebin.com/ax0373wF) | | +| Debian 8 | NO | | can be enabled, see below | +| AMI | NO | [here](http://pastebin.com/FrxmptjL) | not a systemd system | +| CentOS 7.3.1611 | NO | [here](http://pastebin.com/SpzgezAg) | can be enabled, see below | ### Monitored systemd service metrics -- CPU utilization -- Used memory -- RSS memory -- Mapped memory -- Cache memory -- Writeback memory -- Memory minor page faults -- Memory major page faults -- Memory charging activity -- Memory uncharging activity -- Memory limit failures -- Swap memory used -- Disk read bandwidth -- Disk write bandwidth -- Disk read operations -- Disk write operations -- Throttle disk read bandwidth -- Throttle disk write bandwidth -- Throttle disk read operations -- Throttle disk write operations -- Queued disk read operations -- Queued disk write operations -- Merged disk read operations -- Merged disk write operations +- CPU utilization +- Used memory +- RSS memory +- Mapped memory +- Cache memory +- Writeback memory +- Memory minor page faults +- Memory major page faults +- Memory charging activity +- Memory uncharging activity +- Memory limit failures +- Swap memory used +- Disk read bandwidth +- Disk write bandwidth +- Disk read operations +- Disk write operations +- Throttle disk read bandwidth +- Throttle disk write bandwidth +- Throttle disk read operations +- Throttle disk write operations +- Queued disk read operations +- Queued disk write operations +- Merged disk read operations +- Merged disk write operations ### how to enable cgroup accounting on systemd systems that is by default disabled -You can verify there is no accounting enabled, by running `systemd-cgtop`. The program will show only resources for cgroup `/`, but all services will show nothing. +You can verify there is no accounting enabled, by running `systemd-cgtop`. The program will show only resources for +cgroup `/`, but all services will show nothing. To enable cgroup accounting, execute this: @@ -186,7 +199,7 @@ sed -e 's|^#Default\(.*\)Accounting=.*$|Default\1Accounting=yes|g' /etc/systemd/ To see the changes it made, run this: -``` +```sh # diff /etc/systemd/system.conf /tmp/system.conf 40,44c40,44 < #DefaultCPUAccounting=no @@ -212,21 +225,25 @@ sudo cp /tmp/system.conf /etc/systemd/system.conf sudo systemctl daemon-reexec ``` -(`systemctl daemon-reload` does not reload the configuration of the server - so you have to execute `systemctl daemon-reexec`). +(`systemctl daemon-reload` does not reload the configuration of the server - so you have to +execute `systemctl daemon-reexec`). -Now, when you run `systemd-cgtop`, services will start reporting usage (if it does not, restart a service - any service - to wake it up). Refresh your Netdata dashboard, and you will have the charts too. +Now, when you run `systemd-cgtop`, services will start reporting usage (if it does not, restart any service to wake it up). Refresh your Netdata dashboard, and you will have the charts too. -In case memory accounting is missing, you will need to enable it at your kernel, by appending the following kernel boot options and rebooting: +In case memory accounting is missing, you will need to enable it at your kernel, by appending the following kernel boot +options and rebooting: -``` +```sh cgroup_enable=memory swapaccount=1 ``` -You can add the above, directly at the `linux` line in your `/boot/grub/grub.cfg` or appending them to the `GRUB_CMDLINE_LINUX` in `/etc/default/grub` (in which case you will have to run `update-grub` before rebooting). On DigitalOcean debian images you may have to set it at `/etc/default/grub.d/50-cloudimg-settings.cfg`. +You can add the above, directly at the `linux` line in your `/boot/grub/grub.cfg` or appending them to +the `GRUB_CMDLINE_LINUX` in `/etc/default/grub` (in which case you will have to run `update-grub` before rebooting). On +DigitalOcean debian images you may have to set it at `/etc/default/grub.d/50-cloudimg-settings.cfg`. Which systemd services are monitored by Netdata is determined by the following pattern list: -``` +```text [plugin:cgroups] cgroups to match as systemd services = !/system.slice/*/*.service /system.slice/*.service ``` @@ -235,53 +252,57 @@ Which systemd services are monitored by Netdata is determined by the following p ## Monitoring ephemeral containers -Netdata monitors containers automatically when it is installed at the host, or when it is installed in a container that has access to the `/proc` and `/sys` filesystems of the host. +Netdata monitors containers automatically when it is installed at the host, or when it is installed in a container that +has access to the `/proc` and `/sys` filesystems of the host. Netdata prior to v1.6 had 2 issues when such containers were monitored: -1. network interface alarms where triggering when containers were stopped +1. network interface alarms where triggering when containers were stopped -2. charts were never cleaned up, so after some time dozens of containers were showing up on the dashboard, and they were occupying memory. +2. charts were never cleaned up, so after some time dozens of containers were showing up on the dashboard, and they were + occupying memory. ### the current Netdata network interfaces and cgroups (containers) are now self-cleaned. -So, when a network interface or container stops, Netdata might log a few errors in error.log complaining about files it cannot find, but immediately: +So, when a network interface or container stops, Netdata might log a few errors in error.log complaining about files it +cannot find, but immediately: -1. it will detect this is a removed container or network interface -2. it will freeze/pause all alarms for them -3. it will mark their charts as obsolete -4. obsolete charts are not be offered on new dashboard sessions (so hit F5 and the charts are gone) -5. existing dashboard sessions will continue to see them, but of course they will not refresh -6. obsolete charts will be removed from memory, 1 hour after the last user viewed them (configurable with `[global].cleanup obsolete charts after seconds = 3600` (at `netdata.conf`). -7. when obsolete charts are removed from memory they are also deleted from disk (configurable with `[global].delete obsolete charts files = yes`) +1. it will detect this is a removed container or network interface +2. it will freeze/pause all alarms for them +3. it will mark their charts as obsolete +4. obsolete charts are not be offered on new dashboard sessions (so hit F5 and the charts are gone) +5. existing dashboard sessions will continue to see them, but of course they will not refresh +6. obsolete charts will be removed from memory, 1 hour after the last user viewed them (configurable + with `[global].cleanup obsolete charts after seconds = 3600` (at `netdata.conf`). +7. when obsolete charts are removed from memory they are also deleted from disk (configurable + with `[global].delete obsolete charts files = yes`) ### Monitored container metrics -- CPU usage -- CPU usage within the limits -- CPU usage per core -- Memory usage -- Writeback memory -- Memory activity -- Memory page faults -- Used memory -- Used RAM within the limits -- Memory utilization -- Memory limit failures -- I/O bandwidth (all disks) -- Serviced I/O operations (all disks) -- Throttle I/O bandwidth (all disks) -- Throttle serviced I/O operations (all disks) -- Queued I/O operations (all disks) -- Merged I/O operations (all disks) -- CPU pressure -- Memory pressure -- Memory full pressure -- I/O pressure -- I/O full pressure - -Network interfaces are monitored by means of the [proc plugin](/collectors/proc.plugin/README.md#monitored-network-interface-metrics). - - +- CPU usage +- CPU usage within the limits +- CPU usage per core +- Memory usage +- Writeback memory +- Memory activity +- Memory page faults +- Used memory +- Used RAM within the limits +- Memory utilization +- Memory limit failures +- I/O bandwidth (all disks) +- Serviced I/O operations (all disks) +- Throttle I/O bandwidth (all disks) +- Throttle serviced I/O operations (all disks) +- Queued I/O operations (all disks) +- Merged I/O operations (all disks) +- CPU pressure +- Memory pressure +- Memory full pressure +- I/O pressure +- I/O full pressure + +Network interfaces are monitored by means of +the [proc plugin](/collectors/proc.plugin/README.md#monitored-network-interface-metrics). diff --git a/collectors/cgroups.plugin/cgroup-name.sh.in b/collectors/cgroups.plugin/cgroup-name.sh index 1f31c49a7..00d7e614c 100755 --- a/collectors/cgroups.plugin/cgroup-name.sh.in +++ b/collectors/cgroups.plugin/cgroup-name.sh @@ -114,6 +114,45 @@ function add_lbl_prefix() { echo "${new_labels:0:-1}" # trim last ',' } +function k8s_is_pause_container() { + local cgroup_path="${1}" + + local file + if [ -d "${NETDATA_HOST_PREFIX}/sys/fs/cgroup/cpuacct" ]; then + file="${NETDATA_HOST_PREFIX}/sys/fs/cgroup/cpuacct/$cgroup_path/cgroup.procs" + else + file="${NETDATA_HOST_PREFIX}/sys/fs/cgroup/$cgroup_path/cgroup.procs" + fi + + [ ! -f "$file" ] && return 1 + + local procs + IFS= read -rd' ' procs 2>/dev/null <"$file" + #shellcheck disable=SC2206 + procs=($procs) + + [ "${#procs[@]}" -ne 1 ] && return 1 + + IFS= read -r comm 2>/dev/null <"/proc/${procs[0]}/comm" + + [ "$comm" == "pause" ] + return +} + +function k8s_gcp_get_cluster_name() { + local header url id loc name + header="Metadata-Flavor: Google" + url="http://metadata/computeMetadata/v1" + if id=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/project/project-id") && + loc=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/instance/attributes/cluster-location") && + name=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/instance/attributes/cluster-name") && + [ -n "$id" ] && [ -n "$loc" ] && [ -n "$name" ]; then + echo "gke_${id}_${loc}_${name}" + return 0 + fi + return 1 +} + # k8s_get_kubepod_name resolves */kubepods/* cgroup name. # pod level cgroup name format: 'pod_<namespace>_<pod_name>' # container level cgroup name format: 'cntr_<namespace>_<pod_name>_<container_name>' @@ -151,7 +190,8 @@ function k8s_get_kubepod_name() { # - replaces '.' with '-' local fn="${FUNCNAME[0]}" - local id="${1}" + local cgroup_path="${1}" + local id="${2}" if [[ ! $id =~ ^kubepods ]]; then warning "${fn}: '${id}' is not kubepod cgroup." @@ -189,121 +229,157 @@ function k8s_get_kubepod_name() { if [ -z "$pod_uid" ] && [ -z "$cntr_id" ]; then warning "${fn}: can't extract pod_uid or container_id from the cgroup '$id'." - return 1 + return 3 fi [ -n "$pod_uid" ] && info "${fn}: cgroup '$id' is a pod(uid:$pod_uid)" [ -n "$cntr_id" ] && info "${fn}: cgroup '$id' is a container(id:$cntr_id)" + if [ -n "$cntr_id" ] && k8s_is_pause_container "$cgroup_path"; then + return 3 + fi + if ! command -v jq > /dev/null 2>&1; then warning "${fn}: 'jq' command not available." return 1 fi - local kube_system_ns - local tmp_kube_system_ns_file="${TMPDIR:-"/tmp/"}netdata-cgroups-kube-system-ns" - [ -f "$tmp_kube_system_ns_file" ] && kube_system_ns=$(cat "$tmp_kube_system_ns_file" 2> /dev/null) + local tmp_kube_cluster_name="${TMPDIR:-"/tmp"}/netdata-cgroups-k8s-cluster-name" + local tmp_kube_system_ns_uid_file="${TMPDIR:-"/tmp"}/netdata-cgroups-kubesystem-uid" + local tmp_kube_containers_file="${TMPDIR:-"/tmp"}/netdata-cgroups-containers" + + local kube_cluster_name + local kube_system_uid + local labels - local pods - if [ -n "${KUBERNETES_SERVICE_HOST}" ] && [ -n "${KUBERNETES_PORT_443_TCP_PORT}" ]; then - local token header host url - token="$(< /var/run/secrets/kubernetes.io/serviceaccount/token)" - header="Authorization: Bearer $token" - host="$KUBERNETES_SERVICE_HOST:$KUBERNETES_PORT_443_TCP_PORT" + if [ -n "$cntr_id" ] && + [ -f "$tmp_kube_cluster_name" ] && + [ -f "$tmp_kube_system_ns_uid_file" ] && + [ -f "$tmp_kube_containers_file" ] && + labels=$(grep "$cntr_id" "$tmp_kube_containers_file" 2>/dev/null); then + IFS= read -r kube_system_uid 2>/dev/null <"$tmp_kube_system_ns_uid_file" + IFS= read -r kube_cluster_name 2>/dev/null <"$tmp_kube_cluster_name" + else + IFS= read -r kube_system_uid 2>/dev/null <"$tmp_kube_system_ns_uid_file" + IFS= read -r kube_cluster_name 2>/dev/null <"$tmp_kube_cluster_name" + [ -z "$kube_cluster_name" ] && ! kube_cluster_name=$(k8s_gcp_get_cluster_name) && kube_cluster_name="unknown" + + local kube_system_ns + local pods + + if [ -n "${KUBERNETES_SERVICE_HOST}" ] && [ -n "${KUBERNETES_PORT_443_TCP_PORT}" ]; then + local token header host url + token="$(</var/run/secrets/kubernetes.io/serviceaccount/token)" + header="Authorization: Bearer $token" + host="$KUBERNETES_SERVICE_HOST:$KUBERNETES_PORT_443_TCP_PORT" + + if [ -z "$kube_system_uid" ]; then + url="https://$host/api/v1/namespaces/kube-system" + # FIX: check HTTP response code + if ! kube_system_ns=$(curl --fail -sSk -H "$header" "$url" 2>&1); then + warning "${fn}: error on curl '${url}': ${kube_system_ns}." + fi + fi - if [ -z "$kube_system_ns" ]; then - url="https://$host/api/v1/namespaces/kube-system" + url="https://$host/api/v1/pods" + [ -n "$MY_NODE_NAME" ] && url+="?fieldSelector=spec.nodeName==$MY_NODE_NAME" # FIX: check HTTP response code - if ! kube_system_ns=$(curl -sSk -H "$header" "$url" 2>&1); then - warning "${fn}: error on curl '${url}': ${kube_system_ns}." - else - echo "$kube_system_ns" > "$tmp_kube_system_ns_file" 2> /dev/null + if ! pods=$(curl --fail -sSk -H "$header" "$url" 2>&1); then + warning "${fn}: error on curl '${url}': ${pods}." + return 1 + fi + elif ps -C kubelet >/dev/null 2>&1 && command -v kubectl >/dev/null 2>&1; then + if [ -z "$kube_system_uid" ]; then + if ! kube_system_ns=$(kubectl --kubeconfig="$KUBE_CONFIG" get namespaces kube-system -o json 2>&1); then + warning "${fn}: error on 'kubectl': ${kube_system_ns}." + fi fi - fi - url="https://$host/api/v1/pods" - [ -n "$MY_NODE_NAME" ] && url+="?fieldSelector=spec.nodeName==$MY_NODE_NAME" - # FIX: check HTTP response code - if ! pods=$(curl -sSk -H "$header" "$url" 2>&1); then - warning "${fn}: error on curl '${url}': ${pods}." + [[ -z ${KUBE_CONFIG+x} ]] && KUBE_CONFIG="/etc/kubernetes/admin.conf" + if ! pods=$(kubectl --kubeconfig="$KUBE_CONFIG" get pods --all-namespaces -o json 2>&1); then + warning "${fn}: error on 'kubectl': ${pods}." + return 1 + fi + else + warning "${fn}: not inside the k8s cluster and 'kubectl' command not available." return 1 fi - elif ps -C kubelet > /dev/null 2>&1 && command -v kubectl > /dev/null 2>&1; then - if [ -z "$kube_system_ns" ]; then - if ! kube_system_ns=$(kubectl get namespaces kube-system -o json 2>&1); then - warning "${fn}: error on 'kubectl': ${kube_system_ns}." - else - echo "$kube_system_ns" > "$tmp_kube_system_ns_file" 2> /dev/null - fi + + if [ -n "$kube_system_ns" ] && ! kube_system_uid=$(jq -r '.metadata.uid' <<<"$kube_system_ns" 2>&1); then + warning "${fn}: error on 'jq' parse kube_system_ns: ${kube_system_uid}." fi - [[ -z ${KUBE_CONFIG+x} ]] && KUBE_CONFIG="/etc/kubernetes/admin.conf" - if ! pods=$(kubectl --kubeconfig="$KUBE_CONFIG" get pods --all-namespaces -o json 2>&1); then - warning "${fn}: error on 'kubectl': ${pods}." + local jq_filter + jq_filter+='.items[] | "' + jq_filter+='namespace=\"\(.metadata.namespace)\",' + jq_filter+='pod_name=\"\(.metadata.name)\",' + jq_filter+='pod_uid=\"\(.metadata.uid)\",' + #jq_filter+='\(.metadata.labels | to_entries | map("pod_label_"+.key+"=\""+.value+"\"") | join(",") | if length > 0 then .+"," else . end)' + jq_filter+='\((.metadata.ownerReferences[]? | select(.controller==true) | "controller_kind=\""+.kind+"\",controller_name=\""+.name+"\",") // "")' + jq_filter+='node_name=\"\(.spec.nodeName)\",' + jq_filter+='" + ' + jq_filter+='(.status.containerStatuses[]? | "' + jq_filter+='container_name=\"\(.name)\",' + jq_filter+='container_id=\"\(.containerID)\"' + jq_filter+='") | ' + jq_filter+='sub("(docker|cri-o|containerd)://";"")' # containerID: docker://a346da9bc0e3eaba6b295f64ac16e02f2190db2cef570835706a9e7a36e2c722 + + local containers + if ! containers=$(jq -r "${jq_filter}" <<<"$pods" 2>&1); then + warning "${fn}: error on 'jq' parse pods: ${containers}." return 1 fi - else - warning "${fn}: not inside the k8s cluster and 'kubectl' command not available." - return 1 - fi - local kube_system_uid - if [ -n "$kube_system_ns" ] && ! kube_system_uid=$(jq -r '.metadata.uid' <<< "$kube_system_ns" 2>&1); then - warning "${fn}: error on 'jq' parse kube_system_ns: ${kube_system_uid}." + [ -n "$kube_cluster_name" ] && echo "$kube_cluster_name" >"$tmp_kube_cluster_name" 2>/dev/null + [ -n "$kube_system_ns" ] && [ -n "$kube_system_uid" ] && echo "$kube_system_uid" >"$tmp_kube_system_ns_uid_file" 2>/dev/null + echo "$containers" >"$tmp_kube_containers_file" 2>/dev/null fi - local jq_filter - jq_filter+='.items[] | "' - jq_filter+='namespace=\"\(.metadata.namespace)\",' - jq_filter+='pod_name=\"\(.metadata.name)\",' - jq_filter+='pod_uid=\"\(.metadata.uid)\",' - #jq_filter+='\(.metadata.labels | to_entries | map("pod_label_"+.key+"=\""+.value+"\"") | join(",") | if length > 0 then .+"," else . end)' - jq_filter+='\((.metadata.ownerReferences[]? | select(.controller==true) | "controller_kind=\""+.kind+"\",controller_name=\""+.name+"\",") // "")' - jq_filter+='node_name=\"\(.spec.nodeName)\",' - jq_filter+='" + ' - jq_filter+='(.status.containerStatuses[]? | "' - jq_filter+='container_name=\"\(.name)\",' - jq_filter+='container_id=\"\(.containerID)\"' - jq_filter+='") | ' - jq_filter+='sub("(docker|cri-o|containerd)://";"")' # containerID: docker://a346da9bc0e3eaba6b295f64ac16e02f2190db2cef570835706a9e7a36e2c722 - - local containers - if ! containers=$(jq -r "${jq_filter}" <<< "$pods" 2>&1); then - warning "${fn}: error on 'jq' parse pods: ${containers}." - return 1 + local qos_class + if [[ $clean_id =~ .+(besteffort|burstable) ]]; then + qos_class="${BASH_REMATCH[1]}" + else + qos_class="guaranteed" fi # available labels: # namespace, pod_name, pod_uid, container_name, container_id, node_name - local labels if [ -n "$cntr_id" ]; then - if labels=$(grep "$cntr_id" <<< "$containers" 2> /dev/null); then + if [ -n "$labels" ] || labels=$(grep "$cntr_id" <<< "$containers" 2> /dev/null); then labels+=',kind="container"' + labels+=",qos_class=\"$qos_class\"" [ -n "$kube_system_uid" ] && [ "$kube_system_uid" != "null" ] && labels+=",cluster_id=\"$kube_system_uid\"" + [ -n "$kube_cluster_name" ] && [ "$kube_cluster_name" != "unknown" ] && labels+=",cluster_name=\"$kube_cluster_name\"" name="cntr" name+="_$(get_lbl_val "$labels" namespace)" name+="_$(get_lbl_val "$labels" pod_name)" name+="_$(get_lbl_val "$labels" container_name)" labels=$(add_lbl_prefix "$labels" "k8s_") name+=" $labels" + else + return 2 fi elif [ -n "$pod_uid" ]; then if labels=$(grep "$pod_uid" -m 1 <<< "$containers" 2> /dev/null); then labels="${labels%%,container_*}" labels+=',kind="pod"' + labels+=",qos_class=\"$qos_class\"" [ -n "$kube_system_uid" ] && [ "$kube_system_uid" != "null" ] && labels+=",cluster_id=\"$kube_system_uid\"" + [ -n "$kube_cluster_name" ] && [ "$kube_cluster_name" != "unknown" ] && labels+=",cluster_name=\"$kube_cluster_name\"" name="pod" name+="_$(get_lbl_val "$labels" namespace)" name+="_$(get_lbl_val "$labels" pod_name)" labels=$(add_lbl_prefix "$labels" "k8s_") name+=" $labels" + else + return 2 fi fi # jq filter nonexistent field and nonexistent label value is 'null' if [[ $name =~ _null(_|$) ]]; then warning "${fn}: invalid name: $name (cgroup '$id')" - name="" + return 1 fi echo "$name" @@ -313,15 +389,13 @@ function k8s_get_kubepod_name() { function k8s_get_name() { local fn="${FUNCNAME[0]}" - local id="${1}" - - NAME=$(k8s_get_kubepod_name "$id") + local cgroup_path="${1}" + local id="${2}" - if [ -z "${NAME}" ]; then - warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${id} and disabling it." - NAME="${id}" - NAME_NOT_FOUND=3 - else + NAME=$(k8s_get_kubepod_name "$cgroup_path" "$id") + + case "$?" in + 0) NAME="k8s_${NAME}" local name labels @@ -332,7 +406,24 @@ function k8s_get_name() { else info "${fn}: cgroup '${id}' has chart name '${NAME}'" fi - fi + EXIT_CODE=$EXIT_SUCCESS + ;; + 1) + NAME="k8s_${id}" + warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and enabling it." + EXIT_CODE=$EXIT_SUCCESS + ;; + 2) + NAME="k8s_${id}" + warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and asking for retry." + EXIT_CODE=$EXIT_RETRY + ;; + *) + NAME="k8s_${id}" + warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and disabling it." + EXIT_CODE=$EXIT_DISABLE + ;; + esac } function docker_get_name() { @@ -344,7 +435,7 @@ function docker_get_name() { fi if [ -z "${NAME}" ]; then warning "cannot find the name of docker container '${id}'" - NAME_NOT_FOUND=2 + EXIT_CODE=$EXIT_RETRY NAME="${id:0:12}" else info "docker container '${id}' is named '${NAME}'" @@ -369,7 +460,7 @@ function podman_get_name() { if [ -z "${NAME}" ]; then warning "cannot find the name of podman container '${id}'" - NAME_NOT_FOUND=2 + EXIT_CODE=$EXIT_RETRY NAME="${id:0:12}" else info "podman container '${id}' is named '${NAME}'" @@ -387,13 +478,14 @@ function podman_validate_id() { # ----------------------------------------------------------------------------- -[ -z "${NETDATA_USER_CONFIG_DIR}" ] && NETDATA_USER_CONFIG_DIR="@configdir_POST@" -[ -z "${NETDATA_STOCK_CONFIG_DIR}" ] && NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@" - DOCKER_HOST="${DOCKER_HOST:=/var/run/docker.sock}" PODMAN_HOST="${PODMAN_HOST:=/run/podman/podman.sock}" -CGROUP="${1}" -NAME_NOT_FOUND=0 +CGROUP_PATH="${1}" # the path as it is (e.g. '/docker/efcf4c409') +CGROUP="${2}" # the modified path (e.g. 'docker_efcf4c409') +EXIT_SUCCESS=0 +EXIT_RETRY=2 +EXIT_DISABLE=3 +EXIT_CODE=$EXIT_SUCCESS NAME= # ----------------------------------------------------------------------------- @@ -402,22 +494,9 @@ if [ -z "${CGROUP}" ]; then fatal "called without a cgroup name. Nothing to do." fi -for CONFIG in "${NETDATA_USER_CONFIG_DIR}/cgroups-names.conf" "${NETDATA_STOCK_CONFIG_DIR}/cgroups-names.conf"; do - if [ -f "${CONFIG}" ]; then - NAME="$(grep "^${CGROUP} " "${CONFIG}" | sed 's/[[:space:]]\+/ /g' | cut -d ' ' -f 2)" - if [ -z "${NAME}" ]; then - info "cannot find cgroup '${CGROUP}' in '${CONFIG}'." - else - break - fi - #else - # info "configuration file '${CONFIG}' is not available." - fi -done - if [ -z "${NAME}" ]; then if [[ ${CGROUP} =~ ^.*kubepods.* ]]; then - k8s_get_name "${CGROUP}" + k8s_get_name "${CGROUP_PATH}" "${CGROUP}" fi fi @@ -488,4 +567,4 @@ fi info "cgroup '${CGROUP}' is called '${NAME}'" echo "${NAME}" -exit ${NAME_NOT_FOUND} +exit ${EXIT_CODE} diff --git a/collectors/cgroups.plugin/cgroup-network-helper.sh b/collectors/cgroups.plugin/cgroup-network-helper.sh index f355480b8..07318d774 100755 --- a/collectors/cgroups.plugin/cgroup-network-helper.sh +++ b/collectors/cgroups.plugin/cgroup-network-helper.sh @@ -218,15 +218,8 @@ netnsid_find_all_interfaces_for_pid() { netnsid_find_all_interfaces_for_cgroup() { local c="${1}" # the cgroup path - # for each pid of the cgroup - # find any tun/tap devices linked to the pid - if [ -f "${c}/cgroup.procs" ] - then - local p - for p in $(< "${c}/cgroup.procs" ) - do - netnsid_find_all_interfaces_for_pid "${p}" - done + if [ -f "${c}/cgroup.procs" ]; then + netnsid_find_all_interfaces_for_pid "$(head -n 1 "${c}/cgroup.procs" 2>/dev/null)" else debug "Cannot find file '${c}/cgroup.procs', not searching for netnsid interfaces." fi diff --git a/collectors/cgroups.plugin/cgroup-network.c b/collectors/cgroups.plugin/cgroup-network.c index 6465c91e7..ec3d814c6 100644 --- a/collectors/cgroups.plugin/cgroup-network.c +++ b/collectors/cgroups.plugin/cgroup-network.c @@ -27,6 +27,14 @@ struct iface { struct iface *next; }; +unsigned int calc_num_ifaces(struct iface *root) { + unsigned int num = 0; + for (struct iface *h = root; h; h = h->next) { + num++; + } + return num; +} + unsigned int read_iface_iflink(const char *prefix, const char *iface) { if(!prefix) prefix = ""; @@ -447,6 +455,25 @@ void detect_veth_interfaces(pid_t pid) { goto cleanup; } + unsigned int host_dev_num = calc_num_ifaces(host); + unsigned int cgroup_dev_num = calc_num_ifaces(cgroup); + // host ifaces == guest ifaces => we are still in the host namespace + // and we can't really identify which ifaces belong to the cgroup (e.g. Proxmox VM). + if (host_dev_num == cgroup_dev_num) { + unsigned int m = 0; + for (h = host; h; h = h->next) { + for (c = cgroup; c; c = c->next) { + if (h->ifindex == c->ifindex && h->iflink == c->iflink) { + m++; + break; + } + } + } + if (host_dev_num == m) { + goto cleanup; + } + } + for(h = host; h ; h = h->next) { if(iface_is_eligible(h)) { for (c = cgroup; c; c = c->next) { @@ -479,7 +506,17 @@ void call_the_helper(pid_t pid, const char *cgroup) { info("running: %s", command); pid_t cgroup_pid; - FILE *fp = mypopene(command, &cgroup_pid, environment); + FILE *fp; + + if(cgroup) { + (void)mypopen_raw_default_flags(&cgroup_pid, environment, &fp, PLUGINS_DIR "/cgroup-network-helper.sh", "--cgroup", cgroup); + } + else { + char buffer[100]; + snprintfz(buffer, sizeof(buffer) - 1, "%d", pid); + (void)mypopen_raw_default_flags(&cgroup_pid, environment, &fp, PLUGINS_DIR "/cgroup-network-helper.sh", "--pid", buffer); + } + if(fp) { char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; char *s; @@ -643,8 +680,13 @@ int main(int argc, char **argv) { if(argc != 3) usage(); - if(!strcmp(argv[1], "-p") || !strcmp(argv[1], "--pid")) { - pid = atoi(argv[2]); + int arg = 1; + int helper = 1; + if (getenv("KUBERNETES_SERVICE_HOST") != NULL && getenv("KUBERNETES_SERVICE_PORT") != NULL) + helper = 0; + + if(!strcmp(argv[arg], "-p") || !strcmp(argv[arg], "--pid")) { + pid = atoi(argv[arg+1]); if(pid <= 0) { errno = 0; @@ -652,17 +694,17 @@ int main(int argc, char **argv) { return 2; } - call_the_helper(pid, NULL); + if(helper) call_the_helper(pid, NULL); } - else if(!strcmp(argv[1], "--cgroup")) { - char *cgroup = argv[2]; + else if(!strcmp(argv[arg], "--cgroup")) { + char *cgroup = argv[arg+1]; if(verify_path(cgroup) == -1) { error("cgroup '%s' does not exist or is not valid.", cgroup); return 1; } pid = read_pid_from_cgroup(cgroup); - call_the_helper(pid, cgroup); + if(helper) call_the_helper(pid, cgroup); if(pid <= 0 && !detected_devices) { errno = 0; diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index 8efb68cf0..5676ef8ca 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -6,14 +6,39 @@ #define PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME "systemd" #define PLUGIN_CGROUPS_MODULE_CGROUPS_NAME "/sys/fs/cgroup" +// main cgroups thread worker jobs +#define WORKER_CGROUPS_LOCK 0 +#define WORKER_CGROUPS_READ 1 +#define WORKER_CGROUPS_CHART 2 + +// discovery cgroup thread worker jobs +#define WORKER_DISCOVERY_INIT 0 +#define WORKER_DISCOVERY_FIND 1 +#define WORKER_DISCOVERY_PROCESS 2 +#define WORKER_DISCOVERY_PROCESS_RENAME 3 +#define WORKER_DISCOVERY_PROCESS_NETWORK 4 +#define WORKER_DISCOVERY_PROCESS_FIRST_TIME 5 +#define WORKER_DISCOVERY_UPDATE 6 +#define WORKER_DISCOVERY_CLEANUP 7 +#define WORKER_DISCOVERY_COPY 8 +#define WORKER_DISCOVERY_SHARE 9 +#define WORKER_DISCOVERY_LOCK 10 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 11 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 11 +#endif + // ---------------------------------------------------------------------------- // cgroup globals +static int is_inside_k8s = 0; + static long system_page_size = 4096; // system will be queried via sysconf() in configuration() static int cgroup_enable_cpuacct_stat = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_cpuacct_cpu_throttling = CONFIG_BOOLEAN_YES; +static int cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_NO; static int cgroup_enable_memory = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_detailed_memory = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_AUTO; @@ -39,7 +64,6 @@ static int cgroup_unified_exist = CONFIG_BOOLEAN_AUTO; static int cgroup_search_in_devices = 1; -static int cgroup_enable_new_cgroups_detected_at_runtime = 1; static int cgroup_check_for_new_every = 10; static int cgroup_update_every = 1; static int cgroup_containers_chart_priority = NETDATA_CHART_PRIO_CGROUPS_CONTAINERS; @@ -59,11 +83,14 @@ static int cgroup_root_count = 0; static int cgroup_root_max = 1000; static int cgroup_max_depth = 0; -static SIMPLE_PATTERN *enabled_cgroup_patterns = NULL; static SIMPLE_PATTERN *enabled_cgroup_paths = NULL; +static SIMPLE_PATTERN *enabled_cgroup_names = NULL; +static SIMPLE_PATTERN *search_cgroup_paths = NULL; static SIMPLE_PATTERN *enabled_cgroup_renames = NULL; static SIMPLE_PATTERN *systemd_services_cgroups = NULL; +static SIMPLE_PATTERN *entrypoint_parent_process_comm = NULL; + static char *cgroups_rename_script = NULL; static char *cgroups_network_interface_script = NULL; @@ -283,6 +310,7 @@ void read_cgroup_plugin_configuration() { cgroup_enable_cpuacct_stat = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct stat (total CPU)", cgroup_enable_cpuacct_stat); cgroup_enable_cpuacct_usage = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct usage (per core CPU)", cgroup_enable_cpuacct_usage); cgroup_enable_cpuacct_cpu_throttling = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct cpu throttling", cgroup_enable_cpuacct_cpu_throttling); + cgroup_enable_cpuacct_cpu_shares = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct cpu shares", cgroup_enable_cpuacct_cpu_shares); cgroup_enable_memory = config_get_boolean_ondemand("plugin:cgroups", "enable memory", cgroup_enable_memory); cgroup_enable_detailed_memory = config_get_boolean_ondemand("plugin:cgroups", "enable detailed memory", cgroup_enable_detailed_memory); @@ -407,9 +435,7 @@ void read_cgroup_plugin_configuration() { cgroup_root_max = (int)config_get_number("plugin:cgroups", "max cgroups to allow", cgroup_root_max); cgroup_max_depth = (int)config_get_number("plugin:cgroups", "max cgroups depth to monitor", cgroup_max_depth); - cgroup_enable_new_cgroups_detected_at_runtime = config_get_boolean("plugin:cgroups", "enable new cgroups detected at run time", cgroup_enable_new_cgroups_detected_at_runtime); - - enabled_cgroup_patterns = simple_pattern_create( + enabled_cgroup_paths = simple_pattern_create( config_get("plugin:cgroups", "enable by default cgroups matching", // ---------------------------------------------------------------- @@ -451,7 +477,12 @@ void read_cgroup_plugin_configuration() { " * " // enable anything else ), NULL, SIMPLE_PATTERN_EXACT); - enabled_cgroup_paths = simple_pattern_create( + enabled_cgroup_names = simple_pattern_create( + config_get("plugin:cgroups", "enable by default cgroups names matching", + " * " + ), NULL, SIMPLE_PATTERN_EXACT); + + search_cgroup_paths = simple_pattern_create( config_get("plugin:cgroups", "search for cgroups in subpaths matching", " !*/init.scope " // ignore init.scope " !*-qemu " // #345 @@ -492,7 +523,9 @@ void read_cgroup_plugin_configuration() { " *docker* " " *lxc* " " *qemu* " - " *kubepods* " // #3396 kubernetes + " /kubepods/pod*/* " // k8s containers + " /kubepods/*/pod*/* " // k8s containers + " !/kubepods* " // all other k8s cgroups " *.libvirt-qemu " // #3010 " * " ), NULL, SIMPLE_PATTERN_EXACT); @@ -705,6 +738,17 @@ struct cpuacct_cpu_throttling { unsigned long long nr_throttled_perc; }; +// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu#sect-cfs +// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/managing_monitoring_and_updating_the_kernel/using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications_managing-monitoring-and-updating-the-kernel#proc_controlling-distribution-of-cpu-time-for-applications-by-adjusting-cpu-weight_using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications +struct cpuacct_cpu_shares { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned long long shares; +}; + struct cgroup_network_interface { const char *host_device; const char *container_device; @@ -715,6 +759,9 @@ struct cgroup_network_interface { struct cgroup { uint32_t options; + int first_time_seen; // first time seen by the discoverer + int processed; // the discoverer is done processing a cgroup (resolved name, set 'enabled' option) + char available; // found in the filesystem char enabled; // enabled in the config @@ -734,6 +781,7 @@ struct cgroup { struct cpuacct_stat cpuacct_stat; struct cpuacct_usage cpuacct_usage; struct cpuacct_cpu_throttling cpuacct_cpu_throttling; + struct cpuacct_cpu_shares cpuacct_cpu_shares; struct memory memory; @@ -758,6 +806,7 @@ struct cgroup { RRDSET *st_cpu_per_core; RRDSET *st_cpu_nr_throttled; RRDSET *st_cpu_throttled_time; + RRDSET *st_cpu_shares; RRDSET *st_mem; RRDSET *st_mem_utilization; @@ -842,7 +891,115 @@ struct discovery_thread { int exited; } discovery_thread; -// ---------------------------------------------------------------------------- +// --------------------------------------------------------------------------------------------- + +static inline int matches_enabled_cgroup_paths(char *id) { + return simple_pattern_matches(enabled_cgroup_paths, id); +} + +static inline int matches_enabled_cgroup_names(char *name) { + return simple_pattern_matches(enabled_cgroup_names, name); +} + +static inline int matches_enabled_cgroup_renames(char *id) { + return simple_pattern_matches(enabled_cgroup_renames, id); +} + +static inline int matches_systemd_services_cgroups(char *id) { + return simple_pattern_matches(systemd_services_cgroups, id); +} + +static inline int matches_search_cgroup_paths(const char *dir) { + return simple_pattern_matches(search_cgroup_paths, dir); +} + +static inline int matches_entrypoint_parent_process_comm(const char *comm) { + return simple_pattern_matches(entrypoint_parent_process_comm, comm); +} + +static inline int is_cgroup_systemd_service(struct cgroup *cg) { + return (cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE); +} + +// --------------------------------------------------------------------------------------------- +static int k8s_is_container(const char *id) { + // examples: + // https://github.com/netdata/netdata/blob/0fc101679dcd12f1cb8acdd07bb4c85d8e553e53/collectors/cgroups.plugin/cgroup-name.sh#L121-L147 + const char *p = id; + const char *pp = NULL; + int i = 0; + size_t l = 3; // pod + while ((p = strstr(p, "pod"))) { + i++; + p += l; + pp = p; + } + return !(i < 2 || !pp || !(pp = strchr(pp, '/')) || !pp++ || !*pp); +} + +#define TASK_COMM_LEN 16 + +static int k8s_get_container_first_proc_comm(const char *id, char *comm) { + if (!k8s_is_container(id)) { + return 1; + } + + static procfile *ff = NULL; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/%s/cgroup.procs", cgroup_cpuacct_base, id); + + ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) { + debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); + return 1; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); + return 1; + } + + unsigned long lines = procfile_lines(ff); + if (likely(lines < 2)) { + return 1; + } + + char *pid = procfile_lineword(ff, 0, 0); + if (!pid || !*pid) { + return 1; + } + + snprintfz(filename, FILENAME_MAX, "%s/proc/%s/comm", netdata_configured_host_prefix, pid); + + ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) { + debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); + return 1; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); + return 1; + } + + lines = procfile_lines(ff); + if (unlikely(lines != 2)) { + return 1; + } + + char *proc_comm = procfile_lineword(ff, 0, 0); + if (!proc_comm || !*proc_comm) { + return 1; + } + + strncpyz(comm, proc_comm, TASK_COMM_LEN); + return 0; +} + +// --------------------------------------------------------------------------------------------- static unsigned long long calc_delta(unsigned long long curr, unsigned long long prev) { if (prev > curr) { @@ -858,6 +1015,15 @@ static unsigned long long calc_percentage(unsigned long long value, unsigned lon return (calculated_number)value / (calculated_number)total * 100; } +static int calc_cgroup_depth(const char *id) { + int depth = 0; + const char *s; + for (s = id; *s; s++) { + depth += unlikely(*s == '/'); + } + return depth; +} + // ---------------------------------------------------------------------------- // read values from /sys @@ -1029,6 +1195,24 @@ static inline void cgroup2_read_cpuacct_cpu_stat(struct cpuacct_stat *cp, struct } } +static inline void cgroup_read_cpuacct_cpu_shares(struct cpuacct_cpu_shares *cp) { + if (unlikely(!cp->filename)) { + return; + } + + if (unlikely(read_single_number_file(cp->filename, &cp->shares))) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + cp->updated = 1; + if (unlikely((cp->enabled == CONFIG_BOOLEAN_AUTO)) && + (cp->shares || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) { + cp->enabled = CONFIG_BOOLEAN_YES; + } +} + static inline void cgroup_read_cpuacct_usage(struct cpuacct_usage *ca) { static procfile *ff = NULL; @@ -1234,14 +1418,17 @@ static inline void cgroup2_read_pressure(struct pressure *res) { return; } - res->some.value10 = strtod(procfile_lineword(ff, 0, 2), NULL); - res->some.value60 = strtod(procfile_lineword(ff, 0, 4), NULL); - res->some.value300 = strtod(procfile_lineword(ff, 0, 6), NULL); + + res->some.share_time.value10 = strtod(procfile_lineword(ff, 0, 2), NULL); + res->some.share_time.value60 = strtod(procfile_lineword(ff, 0, 4), NULL); + res->some.share_time.value300 = strtod(procfile_lineword(ff, 0, 6), NULL); + res->some.total_time.value_total = str2ull(procfile_lineword(ff, 0, 8)) / 1000; // us->ms if (lines > 2) { - res->full.value10 = strtod(procfile_lineword(ff, 1, 2), NULL); - res->full.value60 = strtod(procfile_lineword(ff, 1, 4), NULL); - res->full.value300 = strtod(procfile_lineword(ff, 1, 6), NULL); + res->full.share_time.value10 = strtod(procfile_lineword(ff, 1, 2), NULL); + res->full.share_time.value60 = strtod(procfile_lineword(ff, 1, 4), NULL); + res->full.share_time.value300 = strtod(procfile_lineword(ff, 1, 6), NULL); + res->full.total_time.value_total = str2ull(procfile_lineword(ff, 0, 8)) / 1000; // us->ms } res->updated = 1; @@ -1394,12 +1581,13 @@ memory_next: } } -static inline void cgroup_read(struct cgroup *cg) { +static inline void read_cgroup(struct cgroup *cg) { debug(D_CGROUP, "reading metrics for cgroups '%s'", cg->id); if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { cgroup_read_cpuacct_stat(&cg->cpuacct_stat); cgroup_read_cpuacct_usage(&cg->cpuacct_usage); cgroup_read_cpuacct_cpu_stat(&cg->cpuacct_cpu_throttling); + cgroup_read_cpuacct_cpu_shares(&cg->cpuacct_cpu_shares); cgroup_read_memory(&cg->memory, 0); cgroup_read_blkio(&cg->io_service_bytes); cgroup_read_blkio(&cg->io_serviced); @@ -1413,6 +1601,7 @@ static inline void cgroup_read(struct cgroup *cg) { cgroup2_read_blkio(&cg->io_service_bytes, 0); cgroup2_read_blkio(&cg->io_serviced, 4); cgroup2_read_cpuacct_cpu_stat(&cg->cpuacct_stat, &cg->cpuacct_cpu_throttling); + cgroup_read_cpuacct_cpu_shares(&cg->cpuacct_cpu_shares); cgroup2_read_pressure(&cg->cpu_pressure); cgroup2_read_pressure(&cg->io_pressure); cgroup2_read_pressure(&cg->memory_pressure); @@ -1420,14 +1609,15 @@ static inline void cgroup_read(struct cgroup *cg) { } } -static inline void read_all_cgroups(struct cgroup *root) { +static inline void read_all_discovered_cgroups(struct cgroup *root) { debug(D_CGROUP, "reading metrics for all cgroups"); struct cgroup *cg; - - for(cg = root; cg ; cg = cg->next) - if(cg->enabled && !cg->pending_renames) - cgroup_read(cg); + for (cg = root; cg; cg = cg->next) { + if (cg->enabled && !cg->pending_renames) { + read_cgroup(cg); + } + } } // ---------------------------------------------------------------------------- @@ -1438,19 +1628,20 @@ static inline void read_cgroup_network_interfaces(struct cgroup *cg) { debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title); pid_t cgroup_pid; - char command[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; + char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { - snprintfz(command, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec %s --cgroup '%s%s'", cgroups_network_interface_script, cgroup_cpuacct_base, cg->id); + snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_cpuacct_base, cg->id); } else { - snprintfz(command, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec %s --cgroup '%s%s'", cgroups_network_interface_script, cgroup_unified_base, cg->id); + snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_unified_base, cg->id); } - debug(D_CGROUP, "executing command '%s' for cgroup '%s'", command, cg->id); - FILE *fp = mypopen(command, &cgroup_pid); + debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id); + FILE *fp; + (void)mypopen_raw_default_flags_and_environment(&cgroup_pid, &fp, cgroups_network_interface_script, "--cgroup", cgroup_identifier); if(!fp) { - error("CGROUP: cannot popen(\"%s\", \"r\").", command); + error("CGROUP: cannot popen(%s --cgroup \"%s\", \"r\").", cgroups_network_interface_script, cgroup_identifier); return; } @@ -1491,7 +1682,7 @@ static inline void read_cgroup_network_interfaces(struct cgroup *cg) { } mypclose(fp, cgroup_pid); - // debug(D_CGROUP, "closed command for cgroup '%s'", cg->id); + // debug(D_CGROUP, "closed cgroup_identifier for cgroup '%s'", cg->id); } static inline void free_cgroup_network_interfaces(struct cgroup *cg) { @@ -1544,8 +1735,7 @@ static inline void substitute_dots_in_id(char *s) { } } -char *parse_k8s_data(struct label **labels, char *data) -{ +char *k8s_parse_resolved_name(struct label **labels, char *data) { char *name = mystrsep(&data, " "); if (!data) { @@ -1573,187 +1763,11 @@ char *parse_k8s_data(struct label **labels, char *data) return name; } -static inline void cgroup_get_chart_name(struct cgroup *cg) { - debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title); - - pid_t cgroup_pid; - char command[CGROUP_CHARTID_LINE_MAX + 1]; - - // TODO: use cg->id when the renaming script is fixed - snprintfz(command, CGROUP_CHARTID_LINE_MAX, "exec %s '%s'", cgroups_rename_script, cg->intermediate_id); - - debug(D_CGROUP, "executing command \"%s\" for cgroup '%s'", command, cg->chart_id); - FILE *fp = mypopen(command, &cgroup_pid); - if(fp) { - // debug(D_CGROUP, "reading from command '%s' for cgroup '%s'", command, cg->id); - char buffer[CGROUP_CHARTID_LINE_MAX + 1]; - char *s = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp); - // debug(D_CGROUP, "closing command for cgroup '%s'", cg->id); - int name_error = mypclose(fp, cgroup_pid); - // debug(D_CGROUP, "closed command for cgroup '%s'", cg->id); - - if(s && *s && *s != '\n') { - debug(D_CGROUP, "cgroup '%s' should be renamed to '%s'", cg->chart_id, s); - - s = trim(s); - if (s) { - if(likely(name_error==0)) - cg->pending_renames = 0; - else if (unlikely(name_error==3)) { - debug(D_CGROUP, "cgroup '%s' disabled based due to rename command output", cg->chart_id); - cg->enabled = 0; - } - - if (likely(cg->pending_renames < 2)) { - char *name = s; - - if (!strncmp(s, "k8s_", 4)) { - free_label_list(cg->chart_labels); - name = parse_k8s_data(&cg->chart_labels, s); - } - - freez(cg->chart_title); - cg->chart_title = cgroup_title_strdupz(name); - - freez(cg->chart_id); - cg->chart_id = cgroup_chart_id_strdupz(name); - substitute_dots_in_id(cg->chart_id); - cg->hash_chart = simple_hash(cg->chart_id); - } - } - } - } - else - error("CGROUP: cannot popen(\"%s\", \"r\").", command); -} - -static inline struct cgroup *cgroup_add(const char *id) { - if(!id || !*id) id = "/"; - debug(D_CGROUP, "adding to list, cgroup with id '%s'", id); - - if(cgroup_root_count >= cgroup_root_max) { - info("CGROUP: maximum number of cgroups reached (%d). Not adding cgroup '%s'", cgroup_root_count, id); - return NULL; - } - - int def = simple_pattern_matches(enabled_cgroup_patterns, id)?cgroup_enable_new_cgroups_detected_at_runtime:0; - struct cgroup *cg = callocz(1, sizeof(struct cgroup)); - - cg->id = strdupz(id); - cg->hash = simple_hash(cg->id); - - cg->chart_title = cgroup_title_strdupz(id); - - cg->intermediate_id = cgroup_chart_id_strdupz(id); - - cg->chart_id = cgroup_chart_id_strdupz(id); - substitute_dots_in_id(cg->chart_id); - cg->hash_chart = simple_hash(cg->chart_id); - - if(cgroup_use_unified_cgroups) cg->options |= CGROUP_OPTIONS_IS_UNIFIED; - - if(!discovered_cgroup_root) - discovered_cgroup_root = cg; - else { - // append it - struct cgroup *e; - for(e = discovered_cgroup_root; e->discovered_next ;e = e->discovered_next) ; - e->discovered_next = cg; - } - - cgroup_root_count++; - - // fix the chart_id and title by calling the external script - if(simple_pattern_matches(enabled_cgroup_renames, cg->id)) { - - cg->pending_renames = 2; - cgroup_get_chart_name(cg); - - debug(D_CGROUP, "cgroup '%s' renamed to '%s' (title: '%s')", cg->id, cg->chart_id, cg->chart_title); - } - else - debug(D_CGROUP, "cgroup '%s' will not be renamed - it matches the list of disabled cgroup renames (will be shown as '%s')", cg->id, cg->chart_id); - - int user_configurable = 1; - - // check if this cgroup should be a systemd service - if(cgroup_enable_systemd_services) { - if(simple_pattern_matches(systemd_services_cgroups, cg->id) || - simple_pattern_matches(systemd_services_cgroups, cg->chart_id)) { - debug(D_CGROUP, "cgroup '%s' with chart id '%s' (title: '%s') matches systemd services cgroups", cg->id, cg->chart_id, cg->chart_title); - - char buffer[CGROUP_CHARTID_LINE_MAX + 1]; - cg->options |= CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE; - - strncpy(buffer, cg->id, CGROUP_CHARTID_LINE_MAX); - char *s = buffer; - - // skip to the last slash - size_t len = strlen(s); - while(len--) if(unlikely(s[len] == '/')) break; - if(len) s = &s[len + 1]; - - // remove extension - len = strlen(s); - while(len--) if(unlikely(s[len] == '.')) break; - if(len) s[len] = '\0'; - - freez(cg->chart_title); - cg->chart_title = cgroup_title_strdupz(s); - - cg->enabled = 1; - user_configurable = 0; - - debug(D_CGROUP, "cgroup '%s' renamed to '%s' (title: '%s')", cg->id, cg->chart_id, cg->chart_title); - } - else - debug(D_CGROUP, "cgroup '%s' with chart id '%s' (title: '%s') does not match systemd services groups", cg->id, cg->chart_id, cg->chart_title); - } - - if(user_configurable) { - // allow the user to enable/disable this individually - char option[FILENAME_MAX + 1]; - snprintfz(option, FILENAME_MAX, "enable cgroup %s", cg->chart_title); - cg->enabled = (char) config_get_boolean("plugin:cgroups", option, def); - } - - // detect duplicate cgroups - if(cg->enabled) { - struct cgroup *t; - for (t = discovered_cgroup_root; t; t = t->discovered_next) { - if (t != cg && t->enabled && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) { - // TODO: use it after refactoring if system.slice might be scanned before init.scope/system.slice - // - // if (!strncmp(t->id, "/system.slice/", 14) && !strncmp(cg->id, "/init.scope/system.slice/", 25)) { - // error("CGROUP: chart id '%s' already exists with id '%s' and is enabled. Swapping them by enabling cgroup with id '%s' and disabling cgroup with id '%s'.", - // cg->chart_id, t->id, cg->id, t->id); - // t->enabled = 0; - // t->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; - // } - // else {} - // - // https://github.com/netdata/netdata/issues/797#issuecomment-241248884 - error("CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", - cg->chart_id, t->id, cg->id); - cg->enabled = 0; - cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; - - break; - } - } - } - - if(cg->enabled && !cg->pending_renames && !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)) - read_cgroup_network_interfaces(cg); - - debug(D_CGROUP, "ADDED CGROUP: '%s' with chart id '%s' and title '%s' as %s (default was %s)", cg->id, cg->chart_id, cg->chart_title, (cg->enabled)?"enabled":"disabled", (def)?"enabled":"disabled"); - - return cg; -} - static inline void free_pressure(struct pressure *res) { - if (res->some.st) rrdset_is_obsolete(res->some.st); - if (res->full.st) rrdset_is_obsolete(res->full.st); + if (res->some.share_time.st) rrdset_is_obsolete(res->some.share_time.st); + if (res->some.total_time.st) rrdset_is_obsolete(res->some.total_time.st); + if (res->full.share_time.st) rrdset_is_obsolete(res->full.share_time.st); + if (res->full.total_time.st) rrdset_is_obsolete(res->full.total_time.st); freez(res->filename); } @@ -1765,6 +1779,7 @@ static inline void cgroup_free(struct cgroup *cg) { if(cg->st_cpu_per_core) rrdset_is_obsolete(cg->st_cpu_per_core); if(cg->st_cpu_nr_throttled) rrdset_is_obsolete(cg->st_cpu_nr_throttled); if(cg->st_cpu_throttled_time) rrdset_is_obsolete(cg->st_cpu_throttled_time); + if(cg->st_cpu_shares) rrdset_is_obsolete(cg->st_cpu_shares); if(cg->st_mem) rrdset_is_obsolete(cg->st_mem); if(cg->st_writeback) rrdset_is_obsolete(cg->st_writeback); if(cg->st_mem_activity) rrdset_is_obsolete(cg->st_mem_activity); @@ -1793,6 +1808,7 @@ static inline void cgroup_free(struct cgroup *cg) { freez(cg->cpuacct_stat.filename); freez(cg->cpuacct_usage.filename); freez(cg->cpuacct_cpu_throttling.filename); + freez(cg->cpuacct_cpu_shares.filename); arl_free(cg->memory.arl_base); freez(cg->memory.filename_detailed); @@ -1825,71 +1841,197 @@ static inline void cgroup_free(struct cgroup *cg) { cgroup_root_count--; } -// find if a given cgroup exists -static inline struct cgroup *cgroup_find(const char *id) { - debug(D_CGROUP, "searching for cgroup '%s'", id); +// ---------------------------------------------------------------------------- - uint32_t hash = simple_hash(id); +static inline void discovery_rename_cgroup(struct cgroup *cg) { + if (!cg->pending_renames) { + return; + } + cg->pending_renames--; - struct cgroup *cg; - for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { - if(hash == cg->hash && strcmp(id, cg->id) == 0) + debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title); + debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id); + pid_t cgroup_pid; + + FILE *fp; + (void)mypopen_raw_default_flags_and_environment(&cgroup_pid, &fp, cgroups_rename_script, cg->id, cg->intermediate_id); + if (!fp) { + error("CGROUP: cannot popen(%s \"%s\", \"r\").", cgroups_rename_script, cg->intermediate_id); + cg->pending_renames = 0; + cg->processed = 1; + return; + } + + char buffer[CGROUP_CHARTID_LINE_MAX + 1]; + char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp); + int exit_code = mypclose(fp, cgroup_pid); + + switch (exit_code) { + case 0: + cg->pending_renames = 0; + break; + case 3: + cg->pending_renames = 0; + cg->processed = 1; break; } - debug(D_CGROUP, "cgroup '%s' %s in memory", id, (cg)?"found":"not found"); - return cg; + if (cg->pending_renames || cg->processed) { + return; + } + if (!(new_name && *new_name && *new_name != '\n')) { + return; + } + new_name = trim(new_name); + if (!(new_name)) { + return; + } + char *name = new_name; + if (!strncmp(new_name, "k8s_", 4)) { + free_label_list(cg->chart_labels); + name = k8s_parse_resolved_name(&cg->chart_labels, new_name); + } + freez(cg->chart_title); + cg->chart_title = cgroup_title_strdupz(name); + freez(cg->chart_id); + cg->chart_id = cgroup_chart_id_strdupz(name); + substitute_dots_in_id(cg->chart_id); + cg->hash_chart = simple_hash(cg->chart_id); } -// ---------------------------------------------------------------------------- -// detect running cgroups +static void is_cgroup_procs_exist(netdata_ebpf_cgroup_shm_body_t *out, char *id) { + struct stat buf; -// callback for find_file_in_subdirs() -static inline void found_subdir_in_dir(const char *dir) { - debug(D_CGROUP, "examining cgroup dir '%s'", dir); + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_cpuset_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_blkio_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } - struct cgroup *cg = cgroup_find(dir); - if(!cg) { - if(*dir && cgroup_max_depth > 0) { - int depth = 0; - const char *s; + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_memory_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } - for(s = dir; *s ;s++) - if(unlikely(*s == '/')) - depth++; + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_devices_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } - if(depth > cgroup_max_depth) { - info("CGROUP: '%s' is too deep (%d, while max is %d)", dir, depth, cgroup_max_depth); - return; - } + out->path[0] = '\0'; + out->enabled = 0; +} + +static inline void convert_cgroup_to_systemd_service(struct cgroup *cg) { + char buffer[CGROUP_CHARTID_LINE_MAX]; + cg->options |= CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE; + strncpyz(buffer, cg->id, CGROUP_CHARTID_LINE_MAX); + char *s = buffer; + + // skip to the last slash + size_t len = strlen(s); + while (len--) { + if (unlikely(s[len] == '/')) { + break; } - // debug(D_CGROUP, "will add dir '%s' as cgroup", dir); - cg = cgroup_add(dir); + } + if (len) { + s = &s[len + 1]; } - if(cg) { - // delay renaming of the cgroup and looking for network interfaces to deal with the docker lag when starting the container - if(unlikely(cg->pending_renames == 1)) { - // fix the chart_id and title by calling the external script - if(simple_pattern_matches(enabled_cgroup_renames, cg->id)) { + // remove extension + len = strlen(s); + while (len--) { + if (unlikely(s[len] == '.')) { + break; + } + } + if (len) { + s[len] = '\0'; + } - cgroup_get_chart_name(cg); - cg->pending_renames = 0; + freez(cg->chart_title); + cg->chart_title = cgroup_title_strdupz(s); +} - if(cg->enabled && !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)) - read_cgroup_network_interfaces(cg); +static inline struct cgroup *discovery_cgroup_add(const char *id) { + debug(D_CGROUP, "adding to list, cgroup with id '%s'", id); - debug(D_CGROUP, "cgroup '%s' renamed to '%s' (title: '%s')", cg->id, cg->chart_id, cg->chart_title); - } - else - debug(D_CGROUP, "cgroup '%s' will not be renamed - it matches the list of disabled cgroup renames (will be shown as '%s')", cg->id, cg->chart_id); + struct cgroup *cg = callocz(1, sizeof(struct cgroup)); + cg->id = strdupz(id); + cg->hash = simple_hash(cg->id); + cg->chart_title = cgroup_title_strdupz(id); + cg->intermediate_id = cgroup_chart_id_strdupz(id); + cg->chart_id = cgroup_chart_id_strdupz(id); + substitute_dots_in_id(cg->chart_id); + cg->hash_chart = simple_hash(cg->chart_id); + if (cgroup_use_unified_cgroups) { + cg->options |= CGROUP_OPTIONS_IS_UNIFIED; + } + + if (!discovered_cgroup_root) + discovered_cgroup_root = cg; + else { + struct cgroup *t; + for (t = discovered_cgroup_root; t->discovered_next; t = t->discovered_next) { } + t->discovered_next = cg; + } + + return cg; +} + +static inline struct cgroup *discovery_cgroup_find(const char *id) { + debug(D_CGROUP, "searching for cgroup '%s'", id); + + uint32_t hash = simple_hash(id); + + struct cgroup *cg; + for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { + if(hash == cg->hash && strcmp(id, cg->id) == 0) + break; + } + + debug(D_CGROUP, "cgroup '%s' %s in memory", id, (cg)?"found":"not found"); + return cg; +} +static inline void discovery_find_cgroup_in_dir_callback(const char *dir) { + if (!dir || !*dir) { + dir = "/"; + } + debug(D_CGROUP, "examining cgroup dir '%s'", dir); + + struct cgroup *cg = discovery_cgroup_find(dir); + if (cg) { cg->available = 1; + return; } + + if (cgroup_root_count >= cgroup_root_max) { + info("CGROUP: maximum number of cgroups reached (%d). Not adding cgroup '%s'", cgroup_root_count, dir); + return; + } + + if (cgroup_max_depth > 0) { + int depth = calc_cgroup_depth(dir); + if (depth > cgroup_max_depth) { + info("CGROUP: '%s' is too deep (%d, while max is %d)", dir, depth, cgroup_max_depth); + return; + } + } + + cg = discovery_cgroup_add(dir); + cg->available = 1; + cg->first_time_seen = 1; + cgroup_root_count++; } -static inline int find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) { +static inline int discovery_find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) { if(!this) this = base; debug(D_CGROUP, "searching for directories in '%s' (base '%s')", this?this:"", base); @@ -1925,15 +2067,7 @@ static inline int find_dir_in_subdirs(const char *base, const char *this, void ( if(*r == '\0') r = "/"; // do not decent in directories we are not interested - int def = simple_pattern_matches(enabled_cgroup_paths, r); - - // we check for this option here - // so that the config will not have settings - // for leaf directories - char option[FILENAME_MAX + 1]; - snprintfz(option, FILENAME_MAX, "search for cgroups under %s", r); - option[FILENAME_MAX] = '\0'; - enabled = config_get_boolean("plugin:cgroups", option, def); + enabled = matches_search_cgroup_paths(r); } if(enabled) { @@ -1941,7 +2075,7 @@ static inline int find_dir_in_subdirs(const char *base, const char *this, void ( strcpy(s, this); strcat(s, "/"); strcat(s, de->d_name); - int ret2 = find_dir_in_subdirs(base, s, callback); + int ret2 = discovery_find_dir_in_subdirs(base, s, callback); if(ret2 > 0) ret += ret2; freez(s); } @@ -1952,28 +2086,19 @@ static inline int find_dir_in_subdirs(const char *base, const char *this, void ( return ret; } -static inline void mark_all_cgroups_as_not_available() { +static inline void discovery_mark_all_cgroups_as_unavailable() { debug(D_CGROUP, "marking all cgroups as not available"); - struct cgroup *cg; - - // mark all as not available - for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { + for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { cg->available = 0; } } -static inline void update_filenames() -{ +static inline void discovery_update_filenames() { struct cgroup *cg; struct stat buf; for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { - // fprintf(stderr, " >>> CGROUP '%s' (%u - %s) with name '%s'\n", cg->id, cg->hash, cg->available?"available":"stopped", cg->name); - - if(unlikely(cg->pending_renames)) - cg->pending_renames--; - - if(unlikely(!cg->available || cg->pending_renames)) + if(unlikely(!cg->available || !cg->enabled || cg->pending_renames)) continue; debug(D_CGROUP, "checking paths for cgroup '%s'", cg->id); @@ -1999,7 +2124,7 @@ static inline void update_filenames() debug(D_CGROUP, "cpuacct.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); } - if(unlikely(cgroup_enable_cpuacct_usage && !cg->cpuacct_usage.filename && !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE))) { + if(unlikely(cgroup_enable_cpuacct_usage && !cg->cpuacct_usage.filename && !is_cgroup_systemd_service(cg))) { snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.usage_percpu", cgroup_cpuacct_base, cg->id); if(likely(stat(filename, &buf) != -1)) { cg->cpuacct_usage.filename = strdupz(filename); @@ -2009,7 +2134,7 @@ static inline void update_filenames() else debug(D_CGROUP, "cpuacct.usage_percpu file for cgroup '%s': '%s' does not exist.", cg->id, filename); } - if(unlikely(cgroup_enable_cpuacct_cpu_throttling && !cg->cpuacct_cpu_throttling.filename && !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE))) { + if(unlikely(cgroup_enable_cpuacct_cpu_throttling && !cg->cpuacct_cpu_throttling.filename && !is_cgroup_systemd_service(cg))) { snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_cpuacct_base, cg->id); if(likely(stat(filename, &buf) != -1)) { cg->cpuacct_cpu_throttling.filename = strdupz(filename); @@ -2019,8 +2144,20 @@ static inline void update_filenames() else debug(D_CGROUP, "cpu.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); } + if (unlikely( + cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename && + !is_cgroup_systemd_service(cg))) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.shares", cgroup_cpuacct_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpuacct_cpu_shares.filename = strdupz(filename); + cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; + debug( + D_CGROUP, "cpu.shares filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_shares.filename); + } else + debug(D_CGROUP, "cpu.shares file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } - if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)))) { + if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_memory_base, cg->id); if(likely(stat(filename, &buf) != -1)) { cg->memory.filename_detailed = strdupz(filename); @@ -2219,7 +2356,17 @@ static inline void update_filenames() else debug(D_CGROUP, "cpu.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); } - if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)))) { + if (unlikely(cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.weight", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpuacct_cpu_shares.filename = strdupz(filename); + cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; + debug(D_CGROUP, "cpu.weight filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_shares.filename); + } else + debug(D_CGROUP, "cpu.weight file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_unified_base, cg->id); if(likely(stat(filename, &buf) != -1)) { cg->memory.filename_detailed = strdupz(filename); @@ -2295,7 +2442,7 @@ static inline void update_filenames() } } -static inline void cleanup_all_cgroups() { +static inline void discovery_cleanup_all_cgroups() { struct cgroup *cg = discovered_cgroup_root, *last = NULL; for(; cg ;) { @@ -2332,49 +2479,19 @@ static inline void cleanup_all_cgroups() { } } -static inline void copy_discovered_cgroups() -{ +static inline void discovery_copy_discovered_cgroups_to_reader() { debug(D_CGROUP, "copy discovered cgroups to the main group list"); struct cgroup *cg; - for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { + for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { cg->next = cg->discovered_next; } cgroup_root = discovered_cgroup_root; } -static void is_there_cgroup_procs(netdata_ebpf_cgroup_shm_body_t *out, char *id) -{ - struct stat buf; - - snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_cpuset_base, id); - if (likely(stat(out->path, &buf) == 0)) { - return; - } - - snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_blkio_base, id); - if (likely(stat(out->path, &buf) == 0)) { - return; - } - - snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_memory_base, id); - if (likely(stat(out->path, &buf) == 0)) { - return; - } - - snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_devices_base, id); - if (likely(stat(out->path, &buf) == 0)) { - return; - } - - out->path[0] = '\0'; - out->enabled = 0; -} - -static inline void share_cgroups() -{ +static inline void discovery_share_cgroups_with_ebpf() { struct cgroup *cg; int count; struct stat buf; @@ -2384,9 +2501,9 @@ static inline void share_cgroups() } sem_wait(shm_mutex_cgroup_ebpf); - for (cg = cgroup_root, count = 0; cg ; cg = cg->next, count++) { + for (cg = cgroup_root, count = 0; cg; cg = cg->next, count++) { netdata_ebpf_cgroup_shm_body_t *ptr = &shm_cgroup_ebpf.body[count]; - char *prefix = (cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE) ? "" : "cgroup_"; + char *prefix = (is_cgroup_systemd_service(cg)) ? "" : "cgroup_"; snprintfz(ptr->name, CGROUP_EBPF_NAME_SHARED_LENGTH - 1, "%s%s", prefix, cg->chart_title); ptr->hash = simple_hash(ptr->name); ptr->options = cg->options; @@ -2398,7 +2515,7 @@ static inline void share_cgroups() ptr->enabled = 0; } } else { - is_there_cgroup_procs(ptr, cg->id); + is_cgroup_procs_exist(ptr, cg->id); } debug(D_CGROUP, "cgroup shared: NAME=%s, ENABLED=%d", ptr->name, ptr->enabled); @@ -2408,63 +2525,197 @@ static inline void share_cgroups() sem_post(shm_mutex_cgroup_ebpf); } -static inline void find_all_cgroups() { - debug(D_CGROUP, "searching for cgroups"); +static inline void discovery_find_all_cgroups_v1() { + if (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) { + if (discovery_find_dir_in_subdirs(cgroup_cpuacct_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_enable_cpuacct_stat = cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO; + error("CGROUP: disabled cpu statistics."); + } + } - mark_all_cgroups_as_not_available(); - if(!cgroup_use_unified_cgroups) { - if(cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) { - if(find_dir_in_subdirs(cgroup_cpuacct_base, NULL, found_subdir_in_dir) == -1) { - cgroup_enable_cpuacct_stat = - cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO; - error("CGROUP: disabled cpu statistics."); - } + if (cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io || + cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) { + if (discovery_find_dir_in_subdirs(cgroup_blkio_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_enable_blkio_io = cgroup_enable_blkio_ops = cgroup_enable_blkio_throttle_io = + cgroup_enable_blkio_throttle_ops = cgroup_enable_blkio_merged_ops = cgroup_enable_blkio_queued_ops = + CONFIG_BOOLEAN_NO; + error("CGROUP: disabled blkio statistics."); } + } - if(cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io || cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) { - if(find_dir_in_subdirs(cgroup_blkio_base, NULL, found_subdir_in_dir) == -1) { - cgroup_enable_blkio_io = - cgroup_enable_blkio_ops = - cgroup_enable_blkio_throttle_io = - cgroup_enable_blkio_throttle_ops = - cgroup_enable_blkio_merged_ops = - cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_NO; - error("CGROUP: disabled blkio statistics."); - } + if (cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) { + if (discovery_find_dir_in_subdirs(cgroup_memory_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_enable_memory = cgroup_enable_detailed_memory = cgroup_enable_swap = cgroup_enable_memory_failcnt = + CONFIG_BOOLEAN_NO; + error("CGROUP: disabled memory statistics."); } + } - if(cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) { - if(find_dir_in_subdirs(cgroup_memory_base, NULL, found_subdir_in_dir) == -1) { - cgroup_enable_memory = - cgroup_enable_detailed_memory = - cgroup_enable_swap = - cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_NO; - error("CGROUP: disabled memory statistics."); - } + if (cgroup_search_in_devices) { + if (discovery_find_dir_in_subdirs(cgroup_devices_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_search_in_devices = 0; + error("CGROUP: disabled devices statistics."); } + } +} - if(cgroup_search_in_devices) { - if(find_dir_in_subdirs(cgroup_devices_base, NULL, found_subdir_in_dir) == -1) { - cgroup_search_in_devices = 0; - error("CGROUP: disabled devices statistics."); - } +static inline void discovery_find_all_cgroups_v2() { + if (discovery_find_dir_in_subdirs(cgroup_unified_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_unified_exist = CONFIG_BOOLEAN_NO; + error("CGROUP: disabled unified cgroups statistics."); + } +} + +static int is_digits_only(const char *s) { + do { + if (!isdigit(*s++)) { + return 0; + } + } while (*s); + + return 1; +} + +static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) { + if (!cg->first_time_seen) { + return; + } + cg->first_time_seen = 0; + + char comm[TASK_COMM_LEN]; + + if (is_inside_k8s && !k8s_get_container_first_proc_comm(cg->id, comm)) { + // container initialization may take some time when CPU % is high + // seen on GKE: comm is '6' before 'runc:[2:INIT]' (dunno if it could be another number) + if (is_digits_only(comm) || matches_entrypoint_parent_process_comm(comm)) { + cg->first_time_seen = 1; + return; + } + if (!strcmp(comm, "pause")) { + // a container that holds the network namespace for the pod + // we don't need to collect its metrics + cg->processed = 1; + return; } } - else { - if (find_dir_in_subdirs(cgroup_unified_base, NULL, found_subdir_in_dir) == -1) { - cgroup_unified_exist = CONFIG_BOOLEAN_NO; - error("CGROUP: disabled unified cgroups statistics."); + + if (cgroup_enable_systemd_services && matches_systemd_services_cgroups(cg->id)) { + debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'cgroups to match as systemd services'", cg->id, cg->chart_title); + convert_cgroup_to_systemd_service(cg); + return; + } + + if (matches_enabled_cgroup_renames(cg->id)) { + debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'run script to rename cgroups matching', will try to rename it", cg->id, cg->chart_title); + if (is_inside_k8s && k8s_is_container(cg->id)) { + // it may take up to a minute for the K8s API to return data for the container + // tested on AWS K8s cluster with 100% CPU utilization + cg->pending_renames = 9; // 1.5 minute + } else { + cg->pending_renames = 2; } } +} - update_filenames(); +static int discovery_is_cgroup_duplicate(struct cgroup *cg) { + // https://github.com/netdata/netdata/issues/797#issuecomment-241248884 + struct cgroup *c; + for (c = discovered_cgroup_root; c; c = c->discovered_next) { + if (c != cg && c->enabled && c->hash_chart == cg->hash_chart && !strcmp(c->chart_id, cg->chart_id)) { + error("CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", cg->chart_id, c->id, cg->id); + return 1; + } + } + return 0; +} +static inline void discovery_process_cgroup(struct cgroup *cg) { + if (!cg) { + debug(D_CGROUP, "discovery_process_cgroup() received NULL"); + return; + } + if (!cg->available || cg->processed) { + return; + } + + if (cg->first_time_seen) { + worker_is_busy(WORKER_DISCOVERY_PROCESS_FIRST_TIME); + discovery_process_first_time_seen_cgroup(cg); + if (unlikely(cg->first_time_seen || cg->processed)) { + return; + } + } + + if (cg->pending_renames) { + worker_is_busy(WORKER_DISCOVERY_PROCESS_RENAME); + discovery_rename_cgroup(cg); + if (unlikely(cg->pending_renames || cg->processed)) { + return; + } + } + + cg->processed = 1; + + if (is_cgroup_systemd_service(cg)) { + cg->enabled = 1; + return; + } + + if (!(cg->enabled = matches_enabled_cgroup_names(cg->chart_title))) { + debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups names matching'", cg->id, cg->chart_title); + return; + } + + if (!(cg->enabled = matches_enabled_cgroup_paths(cg->id))) { + debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups matching'", cg->id, cg->chart_title); + return; + } + + if (discovery_is_cgroup_duplicate(cg)) { + cg->enabled = 0; + cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; + return; + } + + worker_is_busy(WORKER_DISCOVERY_PROCESS_NETWORK); + read_cgroup_network_interfaces(cg); +} + +static inline void discovery_find_all_cgroups() { + debug(D_CGROUP, "searching for cgroups"); + + worker_is_busy(WORKER_DISCOVERY_INIT); + discovery_mark_all_cgroups_as_unavailable(); + + worker_is_busy(WORKER_DISCOVERY_FIND); + if (!cgroup_use_unified_cgroups) { + discovery_find_all_cgroups_v1(); + } else { + discovery_find_all_cgroups_v2(); + } + + struct cgroup *cg; + for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { + worker_is_busy(WORKER_DISCOVERY_PROCESS); + discovery_process_cgroup(cg); + } + + worker_is_busy(WORKER_DISCOVERY_UPDATE); + discovery_update_filenames(); + + worker_is_busy(WORKER_DISCOVERY_LOCK); uv_mutex_lock(&cgroup_root_mutex); - cleanup_all_cgroups(); - copy_discovered_cgroups(); + + worker_is_busy(WORKER_DISCOVERY_CLEANUP); + discovery_cleanup_all_cgroups(); + + worker_is_busy(WORKER_DISCOVERY_COPY); + discovery_copy_discovered_cgroups_to_reader(); + uv_mutex_unlock(&cgroup_root_mutex); - share_cgroups(); + worker_is_busy(WORKER_DISCOVERY_SHARE); + discovery_share_cgroups_with_ebpf(); debug(D_CGROUP, "done searching for cgroups"); } @@ -2473,7 +2724,28 @@ void cgroup_discovery_worker(void *ptr) { UNUSED(ptr); + worker_register("CGROUPSDISC"); + worker_register_job_name(WORKER_DISCOVERY_INIT, "init"); + worker_register_job_name(WORKER_DISCOVERY_FIND, "find"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS, "process"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS_RENAME, "rename"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS_NETWORK, "network"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS_FIRST_TIME, "new"); + worker_register_job_name(WORKER_DISCOVERY_UPDATE, "update"); + worker_register_job_name(WORKER_DISCOVERY_CLEANUP, "cleanup"); + worker_register_job_name(WORKER_DISCOVERY_COPY, "copy"); + worker_register_job_name(WORKER_DISCOVERY_SHARE, "share"); + worker_register_job_name(WORKER_DISCOVERY_LOCK, "lock"); + + entrypoint_parent_process_comm = simple_pattern_create( + " runc:[* " // http://terenceli.github.io/%E6%8A%80%E6%9C%AF/2021/12/28/runc-internals-3) + " exe ", // https://github.com/falcosecurity/falco/blob/9d41b0a151b83693929d3a9c84f7c5c85d070d3a/rules/falco_rules.yaml#L1961 + NULL, + SIMPLE_PATTERN_EXACT); + while (!netdata_exit) { + worker_is_idle(); + uv_mutex_lock(&discovery_thread.mutex); while (!discovery_thread.start_discovery) uv_cond_wait(&discovery_thread.cond_var, &discovery_thread.mutex); @@ -2483,10 +2755,11 @@ void cgroup_discovery_worker(void *ptr) if (unlikely(netdata_exit)) break; - find_all_cgroups(); + discovery_find_all_cgroups(); } discovery_thread.exited = 1; + worker_unregister(); } // ---------------------------------------------------------------------------- @@ -3069,7 +3342,7 @@ void update_systemd_services_charts( // update the values struct cgroup *cg; for(cg = cgroup_root; cg ; cg = cg->next) { - if(unlikely(!cg->enabled || cg->pending_renames || !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE))) + if(unlikely(!cg->enabled || cg->pending_renames || !is_cgroup_systemd_service(cg))) continue; if(likely(do_cpu && cg->cpuacct_stat.updated)) { @@ -3386,7 +3659,7 @@ static inline void update_cpu_limits2(struct cgroup *cg) { cg->cpuset_cpus = get_system_cpus(); char *s = "max\n\0"; - if(strsame(s, procfile_lineword(ff, 0, 0)) == 0){ + if(strcmp(s, procfile_lineword(ff, 0, 0)) == 0){ cg->cpu_cfs_quota = cg->cpu_cfs_period * cg->cpuset_cpus; } else { cg->cpu_cfs_quota = str2ull(procfile_lineword(ff, 0, 0)); @@ -3434,7 +3707,7 @@ static inline int update_memory_limits(char **filename, RRDSETVAR **chart_var, u return 0; } char *s = "max\n\0"; - if(strsame(s, buffer) == 0){ + if(strcmp(s, buffer) == 0){ *value = UINT64_MAX; rrdsetvar_custom_chart_variable_set(*chart_var, (calculated_number)(*value / (1024 * 1024))); return 1; @@ -3471,7 +3744,7 @@ void update_cgroup_charts(int update_every) { if(unlikely(!cg->enabled || cg->pending_renames)) continue; - if(likely(cgroup_enable_systemd_services && cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)) { + if(likely(cgroup_enable_systemd_services && is_cgroup_systemd_service(cg))) { if(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES) services_do_cpu++; if(cgroup_enable_systemd_services_detailed_memory && cg->memory.updated_detailed && cg->memory.enabled_detailed) services_do_mem_detailed++; @@ -3670,6 +3943,34 @@ void update_cgroup_charts(int update_every) { } } + if (likely(cg->cpuacct_cpu_shares.updated && cg->cpuacct_cpu_shares.enabled == CONFIG_BOOLEAN_YES)) { + if (unlikely(!cg->st_cpu_shares)) { + snprintfz(title, CHART_TITLE_MAX, "CPU Time Relative Share"); + + cg->st_cpu_shares = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_shares" + , NULL + , "cpu" + , "cgroup.cpu_shares" + , title + , "shares" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 20 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_labels(cg->st_cpu_shares, cg->chart_labels); + rrddim_add(cg->st_cpu_shares, "shares", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(cg->st_cpu_shares); + rrddim_set(cg->st_cpu_shares, "shares", cg->cpuacct_cpu_shares.shares); + rrdset_done(cg->st_cpu_shares); + } + } + if(likely(cg->cpuacct_usage.updated && cg->cpuacct_usage.enabled == CONFIG_BOOLEAN_YES)) { char id[RRD_ID_LENGTH_MAX + 1]; unsigned int i; @@ -4239,17 +4540,20 @@ void update_cgroup_charts(int update_every) { if (cg->options & CGROUP_OPTIONS_IS_UNIFIED) { struct pressure *res = &cg->cpu_pressure; + if (likely(res->updated && res->some.enabled)) { - if (unlikely(!res->some.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "CPU pressure"); + struct pressure_charts *pcs; + pcs = &res->some; - chart = res->some.st = rrdset_create_localhost( + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU some pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) - , "cpu_pressure" + , "cpu_some_pressure" , NULL , "cpu" - , "cgroup.cpu_pressure" + , "cgroup.cpu_some_pressure" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -4258,31 +4562,105 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - - rrdset_update_labels(chart = res->some.st, cg->chart_labels); - - res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + rrdset_update_labels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(pcs->share_time.st); + } + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU some pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_some_pressure_stall_time" + , NULL + , "cpu" + , "cgroup.cpu_some_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2220 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_labels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } else { - rrdset_next(res->some.st); + rrdset_next(pcs->total_time.st); } + update_pressure_charts(pcs); + } + if (likely(res->updated && res->full.enabled)) { + struct pressure_charts *pcs; + pcs = &res->full; - update_pressure_chart(&res->some); + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU full pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_full_pressure" + , NULL + , "cpu" + , "cgroup.cpu_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2240 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_labels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(pcs->share_time.st); + } + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU full pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_full_pressure_stall_time" + , NULL + , "cpu" + , "cgroup.cpu_full_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2260 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_labels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } else { + rrdset_next(pcs->total_time.st); + } + update_pressure_charts(pcs); } res = &cg->memory_pressure; + if (likely(res->updated && res->some.enabled)) { - if (unlikely(!res->some.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "Memory pressure"); + struct pressure_charts *pcs; + pcs = &res->some; - chart = res->some.st = rrdset_create_localhost( + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory some pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) - , "mem_pressure" + , "mem_some_pressure" , NULL , "mem" - , "cgroup.memory_pressure" + , "cgroup.memory_some_pressure" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -4290,26 +4668,48 @@ void update_cgroup_charts(int update_every) { , cgroup_containers_chart_priority + 2300 , update_every , RRDSET_TYPE_LINE - ); - - rrdset_update_labels(chart = res->some.st, cg->chart_labels); - - res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + ); + rrdset_update_labels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); } else { - rrdset_next(res->some.st); + rrdset_next(pcs->share_time.st); } - - update_pressure_chart(&res->some); + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory some pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "memory_some_pressure_stall_time" + , NULL + , "mem" + , "cgroup.memory_some_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2320 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_labels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } else { + rrdset_next(pcs->total_time.st); + } + update_pressure_charts(pcs); } if (likely(res->updated && res->full.enabled)) { - if (unlikely(!res->full.st)) { + struct pressure_charts *pcs; + pcs = &res->full; + + if (unlikely(!pcs->share_time.st)) { RRDSET *chart; snprintfz(title, CHART_TITLE_MAX, "Memory full pressure"); - chart = res->full.st = rrdset_create_localhost( + chart = pcs->share_time.st = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) , "mem_full_pressure" , NULL @@ -4319,35 +4719,58 @@ void update_cgroup_charts(int update_every) { , "percentage" , PLUGIN_CGROUPS_NAME , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2350 + , cgroup_containers_chart_priority + 2340 , update_every , RRDSET_TYPE_LINE ); - rrdset_update_labels(chart = res->full.st, cg->chart_labels); - - res->full.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - res->full.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - res->full.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + rrdset_update_labels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); } else { - rrdset_next(res->full.st); + rrdset_next(pcs->share_time.st); } - - update_pressure_chart(&res->full); + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory full pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "memory_full_pressure_stall_time" + , NULL + , "mem" + , "cgroup.memory_full_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2360 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_labels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } else { + rrdset_next(pcs->total_time.st); + } + update_pressure_charts(pcs); } res = &cg->io_pressure; + if (likely(res->updated && res->some.enabled)) { - if (unlikely(!res->some.st)) { - RRDSET *chart; - snprintfz(title, CHART_TITLE_MAX, "I/O pressure"); + struct pressure_charts *pcs; + pcs = &res->some; - chart = res->some.st = rrdset_create_localhost( + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O some pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) - , "io_pressure" + , "io_some_pressure" , NULL , "disk" - , "cgroup.io_pressure" + , "cgroup.io_some_pressure" , title , "percentage" , PLUGIN_CGROUPS_NAME @@ -4356,25 +4779,46 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); - - rrdset_update_labels(chart = res->some.st, cg->chart_labels); - - res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + rrdset_update_labels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); } else { - rrdset_next(res->some.st); + rrdset_next(pcs->share_time.st); } - - update_pressure_chart(&res->some); + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O some pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_some_pressure_stall_time" + , NULL + , "disk" + , "cgroup.io_some_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2420 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_labels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } else { + rrdset_next(pcs->total_time.st); + } + update_pressure_charts(pcs); } if (likely(res->updated && res->full.enabled)) { - if (unlikely(!res->full.st)) { + struct pressure_charts *pcs; + pcs = &res->full; + + if (unlikely(!pcs->share_time.st)) { RRDSET *chart; snprintfz(title, CHART_TITLE_MAX, "I/O full pressure"); - - chart = res->full.st = rrdset_create_localhost( + chart = pcs->share_time.st = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) , "io_full_pressure" , NULL @@ -4384,21 +4828,40 @@ void update_cgroup_charts(int update_every) { , "percentage" , PLUGIN_CGROUPS_NAME , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME - , cgroup_containers_chart_priority + 2450 + , cgroup_containers_chart_priority + 2440 , update_every , RRDSET_TYPE_LINE ); - - rrdset_update_labels(chart = res->full.st, cg->chart_labels); - - res->full.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - res->full.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - res->full.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + rrdset_update_labels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); } else { - rrdset_next(res->full.st); + rrdset_next(pcs->share_time.st); } - - update_pressure_chart(&res->full); + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O full pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_full_pressure_stall_time" + , NULL + , "disk" + , "cgroup.io_full_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2460 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_labels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } else { + rrdset_next(pcs->total_time.st); + } + update_pressure_charts(pcs); } } } @@ -4417,6 +4880,8 @@ void update_cgroup_charts(int update_every) { // cgroups main static void cgroup_main_cleanup(void *ptr) { + worker_unregister(); + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; @@ -4455,18 +4920,21 @@ static void cgroup_main_cleanup(void *ptr) { } void *cgroups_main(void *ptr) { - netdata_thread_cleanup_push(cgroup_main_cleanup, ptr); + worker_register("CGROUPS"); + worker_register_job_name(WORKER_CGROUPS_LOCK, "lock"); + worker_register_job_name(WORKER_CGROUPS_READ, "read"); + worker_register_job_name(WORKER_CGROUPS_CHART, "chart"); - struct rusage thread; + netdata_thread_cleanup_push(cgroup_main_cleanup, ptr); - // when ZERO, attempt to do it - int vdo_cpu_netdata = config_get_boolean("plugin:cgroups", "cgroups plugin resource charts", 1); + if (getenv("KUBERNETES_SERVICE_HOST") != NULL && getenv("KUBERNETES_SERVICE_PORT") != NULL) { + is_inside_k8s = 1; + cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_YES; + } read_cgroup_plugin_configuration(); netdata_cgroup_ebpf_initialize_shm(); - RRDSET *stcpu_thread = NULL; - if (uv_mutex_init(&cgroup_root_mutex)) { error("CGROUP: cannot initialize mutex for the main cgroup list"); goto exit; @@ -4498,57 +4966,34 @@ void *cgroups_main(void *ptr) { usec_t find_every = cgroup_check_for_new_every * USEC_PER_SEC, find_dt = 0; while(!netdata_exit) { + worker_is_idle(); + usec_t hb_dt = heartbeat_next(&hb, step); if(unlikely(netdata_exit)) break; find_dt += hb_dt; - if(unlikely(find_dt >= find_every || cgroups_check)) { + if (unlikely(find_dt >= find_every || (!is_inside_k8s && cgroups_check))) { uv_cond_signal(&discovery_thread.cond_var); discovery_thread.start_discovery = 1; find_dt = 0; cgroups_check = 0; } + worker_is_busy(WORKER_CGROUPS_LOCK); uv_mutex_lock(&cgroup_root_mutex); - read_all_cgroups(cgroup_root); - update_cgroup_charts(cgroup_update_every); - uv_mutex_unlock(&cgroup_root_mutex); - - // -------------------------------------------------------------------- - if(vdo_cpu_netdata) { - getrusage(RUSAGE_THREAD, &thread); + worker_is_busy(WORKER_CGROUPS_READ); + read_all_discovered_cgroups(cgroup_root); - if(unlikely(!stcpu_thread)) { - - stcpu_thread = rrdset_create_localhost( - "netdata" - , "plugin_cgroups_cpu" - , NULL - , "cgroups" - , NULL - , "Netdata CGroups Plugin CPU usage" - , "milliseconds/s" - , PLUGIN_CGROUPS_NAME - , "stats" - , 132000 - , cgroup_update_every - , RRDSET_TYPE_STACKED - ); - - rrddim_add(stcpu_thread, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); - rrddim_add(stcpu_thread, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); - } - else - rrdset_next(stcpu_thread); + worker_is_busy(WORKER_CGROUPS_CHART); + update_cgroup_charts(cgroup_update_every); - rrddim_set(stcpu_thread, "user" , thread.ru_utime.tv_sec * 1000000ULL + thread.ru_utime.tv_usec); - rrddim_set(stcpu_thread, "system", thread.ru_stime.tv_sec * 1000000ULL + thread.ru_stime.tv_usec); - rrdset_done(stcpu_thread); - } + worker_is_idle(); + uv_mutex_unlock(&cgroup_root_mutex); } exit: + worker_unregister(); netdata_thread_cleanup_pop(1); return NULL; } diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.h b/collectors/cgroups.plugin/sys_fs_cgroup.h index 85968a4da..8301ec26a 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.h +++ b/collectors/cgroups.plugin/sys_fs_cgroup.h @@ -39,6 +39,6 @@ typedef struct netdata_ebpf_cgroup_shm { #include "../proc.plugin/plugin_proc.h" -extern char *parse_k8s_data(struct label **labels, char *data); +extern char *k8s_parse_resolved_name(struct label **labels, char *data); #endif //NETDATA_SYS_FS_CGROUP_H diff --git a/collectors/cgroups.plugin/tests/test_cgroups_plugin.c b/collectors/cgroups.plugin/tests/test_cgroups_plugin.c index be8ea2c48..057ac9280 100644 --- a/collectors/cgroups.plugin/tests/test_cgroups_plugin.c +++ b/collectors/cgroups.plugin/tests/test_cgroups_plugin.c @@ -8,7 +8,7 @@ int netdata_zero_metrics_enabled = 1; struct config netdata_config; char *netdata_configured_primary_plugins_dir = NULL; -static void test_parse_k8s_data(void **state) +static void test_k8s_parse_resolved_name(void **state) { UNUSED(state); @@ -89,7 +89,7 @@ static void test_parse_k8s_data(void **state) expect_value(__wrap_add_label_to_list, label_source, LABEL_SOURCE_KUBERNETES); } - char *name = parse_k8s_data(&labels, data); + char *name = k8s_parse_resolved_name(&labels, data); assert_string_equal(name, test_data[i].name); assert_ptr_equal(labels, 0xff); @@ -101,10 +101,10 @@ static void test_parse_k8s_data(void **state) int main(void) { const struct CMUnitTest tests[] = { - cmocka_unit_test(test_parse_k8s_data), + cmocka_unit_test(test_k8s_parse_resolved_name), }; - int test_res = cmocka_run_group_tests_name("test_parse_k8s_data", tests, NULL, NULL); + int test_res = cmocka_run_group_tests_name("test_k8s_parse_resolved_name", tests, NULL, NULL); return test_res; } diff --git a/collectors/cgroups.plugin/tests/test_doubles.c b/collectors/cgroups.plugin/tests/test_doubles.c index 5572fb2f5..9cefa6c98 100644 --- a/collectors/cgroups.plugin/tests/test_doubles.c +++ b/collectors/cgroups.plugin/tests/test_doubles.c @@ -142,9 +142,9 @@ void rrdset_done(RRDSET *st) UNUSED(st); } -void update_pressure_chart(struct pressure_chart *chart) +void update_pressure_charts(struct pressure_charts *charts) { - UNUSED(chart); + UNUSED(charts); } void netdev_rename_device_add( |