diff options
Diffstat (limited to '')
-rw-r--r-- | collectors/cgroups.plugin/Makefile.am | 13 | ||||
-rw-r--r-- | collectors/cgroups.plugin/README.md | 308 | ||||
-rwxr-xr-x | collectors/cgroups.plugin/cgroup-name.sh | 597 | ||||
-rwxr-xr-x | collectors/cgroups.plugin/cgroup-network-helper.sh | 302 | ||||
-rw-r--r-- | collectors/cgroups.plugin/cgroup-network.c | 723 | ||||
-rw-r--r-- | collectors/cgroups.plugin/sys_fs_cgroup.c | 4887 | ||||
-rw-r--r-- | collectors/cgroups.plugin/sys_fs_cgroup.h | 44 | ||||
-rw-r--r-- | collectors/cgroups.plugin/tests/test_cgroups_plugin.c | 131 | ||||
-rw-r--r-- | collectors/cgroups.plugin/tests/test_cgroups_plugin.h | 16 | ||||
-rw-r--r-- | collectors/cgroups.plugin/tests/test_doubles.c | 157 |
10 files changed, 7178 insertions, 0 deletions
diff --git a/collectors/cgroups.plugin/Makefile.am b/collectors/cgroups.plugin/Makefile.am new file mode 100644 index 0000000..354b9fb --- /dev/null +++ b/collectors/cgroups.plugin/Makefile.am @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_plugins_SCRIPTS = \ + cgroup-name.sh \ + cgroup-network-helper.sh \ + $(NULL) + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/cgroups.plugin/README.md b/collectors/cgroups.plugin/README.md new file mode 100644 index 0000000..d0f822e --- /dev/null +++ b/collectors/cgroups.plugin/README.md @@ -0,0 +1,308 @@ +<!-- +title: "cgroups.plugin" +custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/cgroups.plugin/README.md +--> + +# cgroups.plugin + +You can monitor containers and virtual machines using **cgroups**. + +cgroups (or control groups), are a Linux kernel feature that provides accounting and resource usage limiting for +processes. When cgroups are bundled with namespaces (i.e. isolation), they form what we usually call **containers**. + +cgroups are hierarchical, meaning that cgroups can contain child cgroups, which can contain more cgroups, etc. All +accounting is reported (and resource usage limits are applied) also in a hierarchical way. + +To visualize cgroup metrics Netdata provides configuration for cherry picking the cgroups of interest. By default ( +without any configuration) Netdata should pick **systemd services**, all kinds of **containers** (lxc, docker, etc) +and **virtual machines** spawn by managers that register them with cgroups (qemu, libvirt, etc). + +## Configuring Netdata for cgroups + +In general, no additional settings are required. Netdata discovers all available cgroups on the host system and +collects their metrics. + +### how Netdata finds the available cgroups + +Linux exposes resource usage reporting and provides dynamic configuration for cgroups, using virtual files (usually) +under `/sys/fs/cgroup`. Netdata reads `/proc/self/mountinfo` to detect the exact mount point of cgroups. Netdata also +allows manual configuration of this mount point, using these settings: + +```text +[plugin:cgroups] + check for new cgroups every = 10 + path to /sys/fs/cgroup/cpuacct = /sys/fs/cgroup/cpuacct + path to /sys/fs/cgroup/blkio = /sys/fs/cgroup/blkio + path to /sys/fs/cgroup/memory = /sys/fs/cgroup/memory + path to /sys/fs/cgroup/devices = /sys/fs/cgroup/devices +``` + +Netdata rescans these directories for added or removed cgroups every `check for new cgroups every` seconds. + +### hierarchical search for cgroups + +Since cgroups are hierarchical, for each of the directories shown above, Netdata walks through the subdirectories +recursively searching for cgroups (each subdirectory is another cgroup). + +To provide a sane default for this setting, Netdata uses the following pattern list (patterns starting with `!` give a +negative match and their order is important: the first matching a path will be used): + +```text +[plugin:cgroups] + search for cgroups in subpaths matching = !*/init.scope !*-qemu !/init.scope !/system !/systemd !/user !/user.slice * +``` + +So, we disable checking for **child cgroups** in systemd internal +cgroups ([systemd services are monitored by Netdata](#monitoring-systemd-services)), user cgroups (normally used for +desktop and remote user sessions), qemu virtual machines (child cgroups of virtual machines) and `init.scope`. All +others are enabled. + +### unified cgroups (cgroups v2) support + +Netdata automatically detects cgroups version. If detection fails Netdata assumes v1. +To switch to v2 manually add: + +```text +[plugin:cgroups] + use unified cgroups = yes + path to unified cgroups = /sys/fs/cgroup +``` + +Unified cgroups use same name pattern matching as v1 cgroups. `cgroup_enable_systemd_services_detailed_memory` is +currently unsupported when using unified cgroups. + +### enabled cgroups + +To provide a sane default, Netdata uses the +following [pattern list](https://learn.netdata.cloud/docs/agent/libnetdata/simple_pattern): + +- checks the pattern against the path of the cgroup + + ```text + [plugin:cgroups] + enable by default cgroups matching = !*/init.scope *.scope !*/vcpu* !*/emulator !*.mount !*.partition !*.service !*.slice !*.swap !*.user !/ !/docker !/libvirt !/lxc !/lxc/*/ns !/lxc/*/ns/* !/machine !/qemu !/system !/systemd !/user * + ``` + +- checks the pattern against the name of the cgroup (as you see it on the dashboard) + + ```text + [plugin:cgroups] + enable by default cgroups names matching = * + ``` + +Renaming is configured with the following options: + +```text +[plugin:cgroups] + run script to rename cgroups matching = *.scope *docker* *lxc* *qemu* !/ !*.mount !*.partition !*.service !*.slice !*.swap !*.user * + script to get cgroup names = /usr/libexec/netdata/plugins.d/cgroup-name.sh +``` + +The whole point for the additional pattern list, is to limit the number of times the script will be called. Without this +pattern list, the script might be called thousands of times, depending on the number of cgroups available in the system. + +The above pattern list is matched against the path of the cgroup. For matched cgroups, Netdata calls the +script [cgroup-name.sh](https://raw.githubusercontent.com/netdata/netdata/master/collectors/cgroups.plugin/cgroup-name.sh) +to get its name. This script queries `docker`, `kubectl`, `podman`, or applies heuristics to find give a name for the +cgroup. + +#### Note on Podman container names + +Podman's security model is a lot more restrictive than Docker's, so Netdata will not be able to detect container names +out of the box unless they were started by the same user as Netdata itself. + +If Podman is used in "rootful" mode, it's also possible to use `podman system service` to grant Netdata access to +container names. To do this, ensure `podman system service` is running and Netdata has access +to `/run/podman/podman.sock` (the default permissions as specified by upstream are `0600`, with owner `root`, so you +will have to adjust the configuration). + +[docker-socket-proxy](https://github.com/Tecnativa/docker-socket-proxy) can also be used to give Netdata restricted +access to the socket. Note that `PODMAN_HOST` in Netdata's environment should be set to the proxy's URL in this case. + +### charts with zero metrics + +By default, Netdata will enable monitoring metrics only when they are not zero. If they are constantly zero they are +ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be +automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Set `yes` for a +chart instead of `auto` to enable it permanently. For example: + +```text +[plugin:cgroups] + enable memory (used mem including cache) = yes +``` + +You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero +metrics for all internal Netdata plugins. + +### alarms + +CPU and memory limits are watched and used to rise alarms. Memory usage for every cgroup is checked against `ram` +and `ram+swap` limits. CPU usage for every cgroup is checked against `cpuset.cpus` and `cpu.cfs_period_us` + `cpu.cfs_quota_us` pair assigned for the cgroup. Configuration for the alarms is available in `health.d/cgroups.conf` +file. + +## Monitoring systemd services + +Netdata monitors **systemd services**. Example: + +![image](https://cloud.githubusercontent.com/assets/2662304/21964372/20cd7b84-db53-11e6-98a2-b9c986b082c0.png) + +Support per distribution: + +| system | charts shown | `/sys/fs/cgroup` tree | comments | +|:----------------:|:------------:|:------------------------------------:|:--------------------------| +| Arch Linux | YES | | | +| Gentoo | NO | | can be enabled, see below | +| Ubuntu 16.04 LTS | YES | | | +| Ubuntu 16.10 | YES | [here](http://pastebin.com/PiWbQEXy) | | +| Fedora 25 | YES | [here](http://pastebin.com/ax0373wF) | | +| Debian 8 | NO | | can be enabled, see below | +| AMI | NO | [here](http://pastebin.com/FrxmptjL) | not a systemd system | +| CentOS 7.3.1611 | NO | [here](http://pastebin.com/SpzgezAg) | can be enabled, see below | + +### Monitored systemd service metrics + +- CPU utilization +- Used memory +- RSS memory +- Mapped memory +- Cache memory +- Writeback memory +- Memory minor page faults +- Memory major page faults +- Memory charging activity +- Memory uncharging activity +- Memory limit failures +- Swap memory used +- Disk read bandwidth +- Disk write bandwidth +- Disk read operations +- Disk write operations +- Throttle disk read bandwidth +- Throttle disk write bandwidth +- Throttle disk read operations +- Throttle disk write operations +- Queued disk read operations +- Queued disk write operations +- Merged disk read operations +- Merged disk write operations + +### how to enable cgroup accounting on systemd systems that is by default disabled + +You can verify there is no accounting enabled, by running `systemd-cgtop`. The program will show only resources for +cgroup `/`, but all services will show nothing. + +To enable cgroup accounting, execute this: + +```sh +sed -e 's|^#Default\(.*\)Accounting=.*$|Default\1Accounting=yes|g' /etc/systemd/system.conf >/tmp/system.conf +``` + +To see the changes it made, run this: + +```sh +# diff /etc/systemd/system.conf /tmp/system.conf +40,44c40,44 +< #DefaultCPUAccounting=no +< #DefaultIOAccounting=no +< #DefaultBlockIOAccounting=no +< #DefaultMemoryAccounting=no +< #DefaultTasksAccounting=yes +--- +> DefaultCPUAccounting=yes +> DefaultIOAccounting=yes +> DefaultBlockIOAccounting=yes +> DefaultMemoryAccounting=yes +> DefaultTasksAccounting=yes +``` + +If you are happy with the changes, run: + +```sh +# copy the file to the right location +sudo cp /tmp/system.conf /etc/systemd/system.conf + +# restart systemd to take it into account +sudo systemctl daemon-reexec +``` + +(`systemctl daemon-reload` does not reload the configuration of the server - so you have to +execute `systemctl daemon-reexec`). + +Now, when you run `systemd-cgtop`, services will start reporting usage (if it does not, restart any service to wake it up). Refresh your Netdata dashboard, and you will have the charts too. + +In case memory accounting is missing, you will need to enable it at your kernel, by appending the following kernel boot +options and rebooting: + +```sh +cgroup_enable=memory swapaccount=1 +``` + +You can add the above, directly at the `linux` line in your `/boot/grub/grub.cfg` or appending them to +the `GRUB_CMDLINE_LINUX` in `/etc/default/grub` (in which case you will have to run `update-grub` before rebooting). On +DigitalOcean debian images you may have to set it at `/etc/default/grub.d/50-cloudimg-settings.cfg`. + +Which systemd services are monitored by Netdata is determined by the following pattern list: + +```text +[plugin:cgroups] + cgroups to match as systemd services = !/system.slice/*/*.service /system.slice/*.service +``` + +- - - + +## Monitoring ephemeral containers + +Netdata monitors containers automatically when it is installed at the host, or when it is installed in a container that +has access to the `/proc` and `/sys` filesystems of the host. + +Netdata prior to v1.6 had 2 issues when such containers were monitored: + +1. network interface alarms where triggering when containers were stopped + +2. charts were never cleaned up, so after some time dozens of containers were showing up on the dashboard, and they were + occupying memory. + +### the current Netdata + +network interfaces and cgroups (containers) are now self-cleaned. + +So, when a network interface or container stops, Netdata might log a few errors in error.log complaining about files it +cannot find, but immediately: + +1. it will detect this is a removed container or network interface +2. it will freeze/pause all alarms for them +3. it will mark their charts as obsolete +4. obsolete charts are not be offered on new dashboard sessions (so hit F5 and the charts are gone) +5. existing dashboard sessions will continue to see them, but of course they will not refresh +6. obsolete charts will be removed from memory, 1 hour after the last user viewed them (configurable + with `[global].cleanup obsolete charts after seconds = 3600` (at `netdata.conf`). +7. when obsolete charts are removed from memory they are also deleted from disk (configurable + with `[global].delete obsolete charts files = yes`) + +### Monitored container metrics + +- CPU usage +- CPU usage within the limits +- CPU usage per core +- Memory usage +- Writeback memory +- Memory activity +- Memory page faults +- Used memory +- Used RAM within the limits +- Memory utilization +- Memory limit failures +- I/O bandwidth (all disks) +- Serviced I/O operations (all disks) +- Throttle I/O bandwidth (all disks) +- Throttle serviced I/O operations (all disks) +- Queued I/O operations (all disks) +- Merged I/O operations (all disks) +- CPU pressure +- Memory pressure +- Memory full pressure +- I/O pressure +- I/O full pressure + +Network interfaces are monitored by means of +the [proc plugin](/collectors/proc.plugin/README.md#monitored-network-interface-metrics). diff --git a/collectors/cgroups.plugin/cgroup-name.sh b/collectors/cgroups.plugin/cgroup-name.sh new file mode 100755 index 0000000..55b02ac --- /dev/null +++ b/collectors/cgroups.plugin/cgroup-name.sh @@ -0,0 +1,597 @@ +#!/usr/bin/env bash +#shellcheck disable=SC2001 + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Script to find a better name for cgroups +# + +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" +export LC_ALL=C + +# ----------------------------------------------------------------------------- + +PROGRAM_NAME="$(basename "${0}")" + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + exit 1 +} + +function parse_docker_like_inspect_output() { + local output="${1}" + eval "$(grep -E "^(NOMAD_NAMESPACE|NOMAD_JOB_NAME|NOMAD_TASK_NAME|NOMAD_SHORT_ALLOC_ID|CONT_NAME)=" <<<"$output")" + if [ -n "$NOMAD_NAMESPACE" ] && [ -n "$NOMAD_JOB_NAME" ] && [ -n "$NOMAD_TASK_NAME" ] && [ -n "$NOMAD_SHORT_ALLOC_ID" ]; then + echo "${NOMAD_NAMESPACE}-${NOMAD_JOB_NAME}-${NOMAD_TASK_NAME}-${NOMAD_SHORT_ALLOC_ID}" + else + echo "${CONT_NAME}" | sed 's|^/||' + fi +} + +function docker_like_get_name_command() { + local command="${1}" + local id="${2}" + info "Running command: ${command} inspect --format='{{range .Config.Env}}{{println .}}{{end}}CONT_NAME={{ .Name}}' \"${id}\"" + if OUTPUT="$(${command} inspect --format='{{range .Config.Env}}{{println .}}{{end}}CONT_NAME={{ .Name}}' "${id}")" && + [ -n "$OUTPUT" ]; then + NAME="$(parse_docker_like_inspect_output "$OUTPUT")" + fi + return 0 +} + +function docker_like_get_name_api() { + local host_var="${1}" + local host="${!host_var}" + local path="/containers/${2}/json" + if [ -z "${host}" ]; then + warning "No ${host_var} is set" + return 1 + fi + if ! command -v jq >/dev/null 2>&1; then + warning "Can't find jq command line tool. jq is required for netdata to retrieve container name using ${host} API, falling back to docker ps" + return 1 + fi + if [ -S "${host}" ]; then + info "Running API command: curl --unix-socket \"${host}\" http://localhost${path}" + JSON=$(curl -sS --unix-socket "${host}" "http://localhost${path}") + else + info "Running API command: curl \"${host}${path}\"" + JSON=$(curl -sS "${host}${path}") + fi + if OUTPUT=$(echo "${JSON}" | jq -r '.Config.Env[],"CONT_NAME=\(.Name)"') && [ -n "$OUTPUT" ]; then + NAME="$(parse_docker_like_inspect_output "$OUTPUT")" + fi + return 0 +} + +# get_lbl_val returns the value for the label with the given name. +# Returns "null" string if the label doesn't exist. +# Expected labels format: 'name="value",...'. +function get_lbl_val() { + local labels want_name + labels="${1}" + want_name="${2}" + + IFS=, read -ra labels <<< "$labels" + + local lname lval + for l in "${labels[@]}"; do + IFS="=" read -r lname lval <<< "$l" + if [ "$want_name" = "$lname" ] && [ -n "$lval" ]; then + echo "${lval:1:-1}" # trim " + return 0 + fi + done + + echo "null" + return 1 +} + +function add_lbl_prefix() { + local orig_labels prefix + orig_labels="${1}" + prefix="${2}" + + IFS=, read -ra labels <<< "$orig_labels" + + local new_labels + for l in "${labels[@]}"; do + new_labels+="${prefix}${l}," + done + + echo "${new_labels:0:-1}" # trim last ',' +} + +function k8s_is_pause_container() { + local cgroup_path="${1}" + + local file + if [ -d "${NETDATA_HOST_PREFIX}/sys/fs/cgroup/cpuacct" ]; then + file="${NETDATA_HOST_PREFIX}/sys/fs/cgroup/cpuacct/$cgroup_path/cgroup.procs" + else + file="${NETDATA_HOST_PREFIX}/sys/fs/cgroup/$cgroup_path/cgroup.procs" + fi + + [ ! -f "$file" ] && return 1 + + local procs + IFS= read -rd' ' procs 2>/dev/null <"$file" + #shellcheck disable=SC2206 + procs=($procs) + + [ "${#procs[@]}" -ne 1 ] && return 1 + + IFS= read -r comm 2>/dev/null <"/proc/${procs[0]}/comm" + + [ "$comm" == "pause" ] + return +} + +function k8s_gcp_get_cluster_name() { + local header url id loc name + header="Metadata-Flavor: Google" + url="http://metadata/computeMetadata/v1" + if id=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/project/project-id") && + loc=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/instance/attributes/cluster-location") && + name=$(curl --fail -s -m 3 --noproxy "*" -H "$header" "$url/instance/attributes/cluster-name") && + [ -n "$id" ] && [ -n "$loc" ] && [ -n "$name" ]; then + echo "gke_${id}_${loc}_${name}" + return 0 + fi + return 1 +} + +# k8s_get_kubepod_name resolves */kubepods/* cgroup name. +# pod level cgroup name format: 'pod_<namespace>_<pod_name>' +# container level cgroup name format: 'cntr_<namespace>_<pod_name>_<container_name>' +function k8s_get_kubepod_name() { + # GKE /sys/fs/cgroup/*/ (cri=docker, cgroups=v1): + # |-- kubepods + # | |-- burstable + # | | |-- pod98cee708-023b-11eb-933d-42010a800193 + # | | | |-- 922161c98e6ea450bf665226cdc64ca2aa3e889934c2cff0aec4325f8f78ac03 + # | `-- pode314bbac-d577-11ea-a171-42010a80013b + # | |-- 7d505356b04507de7b710016d540b2759483ed5f9136bb01a80872b08f771930 + # + # GKE /sys/fs/cgroup/*/ (cri=containerd, cgroups=v1): + # |-- kubepods.slice + # | |-- kubepods-besteffort.slice + # | | |-- kubepods-besteffort-pode1465238_4518_4c21_832f_fd9f87033dad.slice + # | | | |-- cri-containerd-66be9b2efdf4d85288c319b8c1a2f50d2439b5617e36f45d9d0d0be1381113be.scope + # | `-- kubepods-pod91f5b561_369f_4103_8015_66391059996a.slice + # | |-- cri-containerd-24c53b774a586f06abc058619b47f71d9d869ac50c92898adbd199106fd0aaeb.scope + # + # GKE /sys/fs/cgroup/*/ (cri=crio, cgroups=v1): + # |-- kubepods.slice + # | |-- kubepods-besteffort.slice + # | | |-- kubepods-besteffort-podad412dfe_3589_4056_965a_592356172968.slice + # | | | |-- crio-77b019312fd9825828b70214b2c94da69c30621af2a7ee06f8beace4bc9439e5.scope + # + # Minikube (v1.8.2) /sys/fs/cgroup/*/ (cri=docker, cgroups=v1): + # |-- kubepods.slice + # | |-- kubepods-besteffort.slice + # | | |-- kubepods-besteffort-pod10fb5647_c724_400c_b9cc_0e6eae3110e7.slice + # | | | |-- docker-36e5eb5056dfdf6dbb75c0c44a1ecf23217fe2c50d606209d8130fcbb19fb5a7.scope + # + # kind v0.14.0 + # |-- kubelet.slice + # | |-- kubelet-kubepods.slice + # | | |-- kubelet-kubepods-besteffort.slice + # | | | |-- kubelet-kubepods-besteffort-pod7881ed9e_c63e_4425_b5e0_ac55a08ae939.slice + # | | | | |-- cri-containerd-00c7939458bffc416bb03451526e9fde13301d6654cfeadf5b4964a7fb5be1a9.scope + # + # NOTE: cgroups plugin + # - uses '_' to join dir names (so it is <parent>_<child>_<child>_...) + # - replaces '.' with '-' + + local fn="${FUNCNAME[0]}" + local cgroup_path="${1}" + local id="${2}" + + if [[ ! $id =~ ^.*kubepods.* ]]; then + warning "${fn}: '${id}' is not kubepod cgroup." + return 1 + fi + + local clean_id="$id" + clean_id=${clean_id//.slice/} + clean_id=${clean_id//.scope/} + + local name pod_uid cntr_id + if [[ $clean_id == "kubepods" ]]; then + name="$clean_id" + elif [[ $clean_id =~ .+(besteffort|burstable|guaranteed)$ ]]; then + # kubepods_<QOS_CLASS> + # kubepods_kubepods-<QOS_CLASS> + name=${clean_id//-/_} + name=${name/#kubepods_kubepods/kubepods} + elif [[ $clean_id =~ .+pod[a-f0-9_-]+_(docker|crio|cri-containerd)-([a-f0-9]+)$ ]]; then + # ...pod<POD_UID>_(docker|crio|cri-containerd)-<CONTAINER_ID> (POD_UID w/ "_") + cntr_id=${BASH_REMATCH[2]} + elif [[ $clean_id =~ .+pod[a-f0-9-]+_([a-f0-9]+)$ ]]; then + # ...pod<POD_UID>_<CONTAINER_ID> + cntr_id=${BASH_REMATCH[1]} + elif [[ $clean_id =~ .+pod([a-f0-9_-]+)$ ]]; then + # ...pod<POD_UID> (POD_UID w/ and w/o "_") + pod_uid=${BASH_REMATCH[1]} + pod_uid=${pod_uid//_/-} + fi + + if [ -n "$name" ]; then + echo "$name" + return 0 + fi + + if [ -z "$pod_uid" ] && [ -z "$cntr_id" ]; then + warning "${fn}: can't extract pod_uid or container_id from the cgroup '$id'." + return 3 + fi + + [ -n "$pod_uid" ] && info "${fn}: cgroup '$id' is a pod(uid:$pod_uid)" + [ -n "$cntr_id" ] && info "${fn}: cgroup '$id' is a container(id:$cntr_id)" + + if [ -n "$cntr_id" ] && k8s_is_pause_container "$cgroup_path"; then + return 3 + fi + + if ! command -v jq > /dev/null 2>&1; then + warning "${fn}: 'jq' command not available." + return 1 + fi + + local tmp_kube_cluster_name="${TMPDIR:-"/tmp"}/netdata-cgroups-k8s-cluster-name" + local tmp_kube_system_ns_uid_file="${TMPDIR:-"/tmp"}/netdata-cgroups-kubesystem-uid" + local tmp_kube_containers_file="${TMPDIR:-"/tmp"}/netdata-cgroups-containers" + + local kube_cluster_name + local kube_system_uid + local labels + + if [ -n "$cntr_id" ] && + [ -f "$tmp_kube_cluster_name" ] && + [ -f "$tmp_kube_system_ns_uid_file" ] && + [ -f "$tmp_kube_containers_file" ] && + labels=$(grep "$cntr_id" "$tmp_kube_containers_file" 2>/dev/null); then + IFS= read -r kube_system_uid 2>/dev/null <"$tmp_kube_system_ns_uid_file" + IFS= read -r kube_cluster_name 2>/dev/null <"$tmp_kube_cluster_name" + else + IFS= read -r kube_system_uid 2>/dev/null <"$tmp_kube_system_ns_uid_file" + IFS= read -r kube_cluster_name 2>/dev/null <"$tmp_kube_cluster_name" + [ -z "$kube_cluster_name" ] && ! kube_cluster_name=$(k8s_gcp_get_cluster_name) && kube_cluster_name="unknown" + + local kube_system_ns + local pods + + if [ -n "${KUBERNETES_SERVICE_HOST}" ] && [ -n "${KUBERNETES_PORT_443_TCP_PORT}" ]; then + local token header host url + token="$(</var/run/secrets/kubernetes.io/serviceaccount/token)" + header="Authorization: Bearer $token" + host="$KUBERNETES_SERVICE_HOST:$KUBERNETES_PORT_443_TCP_PORT" + + if [ -z "$kube_system_uid" ]; then + url="https://$host/api/v1/namespaces/kube-system" + # FIX: check HTTP response code + if ! kube_system_ns=$(curl --fail -sSk -H "$header" "$url" 2>&1); then + warning "${fn}: error on curl '${url}': ${kube_system_ns}." + fi + fi + + url="https://$host/api/v1/pods" + [ -n "$MY_NODE_NAME" ] && url+="?fieldSelector=spec.nodeName==$MY_NODE_NAME" + # FIX: check HTTP response code + if ! pods=$(curl --fail -sSk -H "$header" "$url" 2>&1); then + warning "${fn}: error on curl '${url}': ${pods}." + return 1 + fi + elif ps -C kubelet >/dev/null 2>&1 && command -v kubectl >/dev/null 2>&1; then + if [ -z "$kube_system_uid" ]; then + if ! kube_system_ns=$(kubectl --kubeconfig="$KUBE_CONFIG" get namespaces kube-system -o json 2>&1); then + warning "${fn}: error on 'kubectl': ${kube_system_ns}." + fi + fi + + [[ -z ${KUBE_CONFIG+x} ]] && KUBE_CONFIG="/etc/kubernetes/admin.conf" + if ! pods=$(kubectl --kubeconfig="$KUBE_CONFIG" get pods --all-namespaces -o json 2>&1); then + warning "${fn}: error on 'kubectl': ${pods}." + return 1 + fi + else + warning "${fn}: not inside the k8s cluster and 'kubectl' command not available." + return 1 + fi + + if [ -n "$kube_system_ns" ] && ! kube_system_uid=$(jq -r '.metadata.uid' <<<"$kube_system_ns" 2>&1); then + warning "${fn}: error on 'jq' parse kube_system_ns: ${kube_system_uid}." + fi + + local jq_filter + jq_filter+='.items[] | "' + jq_filter+='namespace=\"\(.metadata.namespace)\",' + jq_filter+='pod_name=\"\(.metadata.name)\",' + jq_filter+='pod_uid=\"\(.metadata.uid)\",' + #jq_filter+='\(.metadata.labels | to_entries | map("pod_label_"+.key+"=\""+.value+"\"") | join(",") | if length > 0 then .+"," else . end)' + jq_filter+='\((.metadata.ownerReferences[]? | select(.controller==true) | "controller_kind=\""+.kind+"\",controller_name=\""+.name+"\",") // "")' + jq_filter+='node_name=\"\(.spec.nodeName)\",' + jq_filter+='" + ' + jq_filter+='(.status.containerStatuses[]? | "' + jq_filter+='container_name=\"\(.name)\",' + jq_filter+='container_id=\"\(.containerID)\"' + jq_filter+='") | ' + jq_filter+='sub("(docker|cri-o|containerd)://";"")' # containerID: docker://a346da9bc0e3eaba6b295f64ac16e02f2190db2cef570835706a9e7a36e2c722 + + local containers + if ! containers=$(jq -r "${jq_filter}" <<<"$pods" 2>&1); then + warning "${fn}: error on 'jq' parse pods: ${containers}." + return 1 + fi + + [ -n "$kube_cluster_name" ] && echo "$kube_cluster_name" >"$tmp_kube_cluster_name" 2>/dev/null + [ -n "$kube_system_ns" ] && [ -n "$kube_system_uid" ] && echo "$kube_system_uid" >"$tmp_kube_system_ns_uid_file" 2>/dev/null + echo "$containers" >"$tmp_kube_containers_file" 2>/dev/null + fi + + local qos_class + if [[ $clean_id =~ .+(besteffort|burstable) ]]; then + qos_class="${BASH_REMATCH[1]}" + else + qos_class="guaranteed" + fi + + # available labels: + # namespace, pod_name, pod_uid, container_name, container_id, node_name + if [ -n "$cntr_id" ]; then + if [ -n "$labels" ] || labels=$(grep "$cntr_id" <<< "$containers" 2> /dev/null); then + labels+=',kind="container"' + labels+=",qos_class=\"$qos_class\"" + [ -n "$kube_system_uid" ] && [ "$kube_system_uid" != "null" ] && labels+=",cluster_id=\"$kube_system_uid\"" + [ -n "$kube_cluster_name" ] && [ "$kube_cluster_name" != "unknown" ] && labels+=",cluster_name=\"$kube_cluster_name\"" + name="cntr" + name+="_$(get_lbl_val "$labels" namespace)" + name+="_$(get_lbl_val "$labels" pod_name)" + name+="_$(get_lbl_val "$labels" container_name)" + labels=$(add_lbl_prefix "$labels" "k8s_") + name+=" $labels" + else + return 2 + fi + elif [ -n "$pod_uid" ]; then + if labels=$(grep "$pod_uid" -m 1 <<< "$containers" 2> /dev/null); then + labels="${labels%%,container_*}" + labels+=',kind="pod"' + labels+=",qos_class=\"$qos_class\"" + [ -n "$kube_system_uid" ] && [ "$kube_system_uid" != "null" ] && labels+=",cluster_id=\"$kube_system_uid\"" + [ -n "$kube_cluster_name" ] && [ "$kube_cluster_name" != "unknown" ] && labels+=",cluster_name=\"$kube_cluster_name\"" + name="pod" + name+="_$(get_lbl_val "$labels" namespace)" + name+="_$(get_lbl_val "$labels" pod_name)" + labels=$(add_lbl_prefix "$labels" "k8s_") + name+=" $labels" + else + return 2 + fi + fi + + # jq filter nonexistent field and nonexistent label value is 'null' + if [[ $name =~ _null(_|$) ]]; then + warning "${fn}: invalid name: $name (cgroup '$id')" + return 1 + fi + + echo "$name" + [ -n "$name" ] + return +} + +function k8s_get_name() { + local fn="${FUNCNAME[0]}" + local cgroup_path="${1}" + local id="${2}" + + NAME=$(k8s_get_kubepod_name "$cgroup_path" "$id") + + case "$?" in + 0) + NAME="k8s_${NAME}" + + local name labels + name=${NAME%% *} + labels=${NAME#* } + if [ "$name" != "$labels" ]; then + info "${fn}: cgroup '${id}' has chart name '${name}', labels '${labels}" + else + info "${fn}: cgroup '${id}' has chart name '${NAME}'" + fi + EXIT_CODE=$EXIT_SUCCESS + ;; + 1) + NAME="k8s_${id}" + warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and enabling it." + EXIT_CODE=$EXIT_SUCCESS + ;; + 2) + NAME="k8s_${id}" + warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and asking for retry." + EXIT_CODE=$EXIT_RETRY + ;; + *) + NAME="k8s_${id}" + warning "${fn}: cannot find the name of cgroup with id '${id}'. Setting name to ${NAME} and disabling it." + EXIT_CODE=$EXIT_DISABLE + ;; + esac +} + +function docker_get_name() { + local id="${1}" + # See https://github.com/netdata/netdata/pull/13523 for details + if command -v snap >/dev/null 2>&1 && snap list docker >/dev/null 2>&1; then + docker_like_get_name_api DOCKER_HOST "${id}" + elif hash docker 2> /dev/null; then + docker_like_get_name_command docker "${id}" + else + docker_like_get_name_api DOCKER_HOST "${id}" || docker_like_get_name_command podman "${id}" + fi + if [ -z "${NAME}" ]; then + warning "cannot find the name of docker container '${id}'" + EXIT_CODE=$EXIT_RETRY + NAME="${id:0:12}" + else + info "docker container '${id}' is named '${NAME}'" + fi +} + +function docker_validate_id() { + local id="${1}" + if [ -n "${id}" ] && { [ ${#id} -eq 64 ] || [ ${#id} -eq 12 ]; }; then + docker_get_name "${id}" + else + error "a docker id cannot be extracted from docker cgroup '${CGROUP}'." + fi +} + +function podman_get_name() { + local id="${1}" + + # for Podman, prefer using the API if we can, as netdata will not normally have access + # to other users' containers, so they will not be visible when running `podman ps` + docker_like_get_name_api PODMAN_HOST "${id}" || docker_like_get_name_command podman "${id}" + + if [ -z "${NAME}" ]; then + warning "cannot find the name of podman container '${id}'" + EXIT_CODE=$EXIT_RETRY + NAME="${id:0:12}" + else + info "podman container '${id}' is named '${NAME}'" + fi +} + +function podman_validate_id() { + local id="${1}" + if [ -n "${id}" ] && [ ${#id} -eq 64 ]; then + podman_get_name "${id}" + else + error "a podman id cannot be extracted from docker cgroup '${CGROUP}'." + fi +} + +# ----------------------------------------------------------------------------- + +DOCKER_HOST="${DOCKER_HOST:=/var/run/docker.sock}" +PODMAN_HOST="${PODMAN_HOST:=/run/podman/podman.sock}" +CGROUP_PATH="${1}" # the path as it is (e.g. '/docker/efcf4c409') +CGROUP="${2}" # the modified path (e.g. 'docker_efcf4c409') +EXIT_SUCCESS=0 +EXIT_RETRY=2 +EXIT_DISABLE=3 +EXIT_CODE=$EXIT_SUCCESS +NAME= + +# ----------------------------------------------------------------------------- + +if [ -z "${CGROUP}" ]; then + fatal "called without a cgroup name. Nothing to do." +fi + +if [ -z "${NAME}" ]; then + if [[ ${CGROUP} =~ ^.*kubepods.* ]]; then + k8s_get_name "${CGROUP_PATH}" "${CGROUP}" + fi +fi + +if [ -z "${NAME}" ]; then + if [[ ${CGROUP} =~ ^.*docker[-_/\.][a-fA-F0-9]+[-_\.]?.*$ ]]; then + # docker containers + #shellcheck disable=SC1117 + DOCKERID="$(echo "${CGROUP}" | sed "s|^.*docker[-_/]\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")" + docker_validate_id "${DOCKERID}" + elif [[ ${CGROUP} =~ ^.*ecs[-_/\.][a-fA-F0-9]+[-_\.]?.*$ ]]; then + # ECS + #shellcheck disable=SC1117 + DOCKERID="$(echo "${CGROUP}" | sed "s|^.*ecs[-_/].*[-_/]\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")" + docker_validate_id "${DOCKERID}" + elif [[ ${CGROUP} =~ system.slice_containerd.service_cpuset_[a-fA-F0-9]+[-_\.]?.*$ ]]; then + # docker containers under containerd + #shellcheck disable=SC1117 + DOCKERID="$(echo "${CGROUP}" | sed "s|^.*ystem.slice_containerd.service_cpuset_\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|")" + docker_validate_id "${DOCKERID}" + elif [[ ${CGROUP} =~ ^.*libpod-[a-fA-F0-9]+.*$ ]]; then + # Podman + PODMANID="$(echo "${CGROUP}" | sed "s|^.*libpod-\([a-fA-F0-9]\+\).*$|\1|")" + podman_validate_id "${PODMANID}" + + elif [[ ${CGROUP} =~ machine.slice[_/].*\.service ]]; then + # systemd-nspawn + NAME="$(echo "${CGROUP}" | sed 's/.*machine.slice[_\/]\(.*\)\.service/\1/g')" + + elif [[ ${CGROUP} =~ machine.slice_machine.*-lxc ]]; then + # libvirtd / lxc containers + # machine.slice machine-lxc/x2d969/x2dhubud0xians01.scope => lxc/hubud0xians01 + # machine.slice_machine-lxc/x2d969/x2dhubud0xians01.scope/libvirt_init.scope => lxc/hubud0xians01/libvirt_init + NAME="lxc/$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-lxc//; s/[\/_]x2d[[:digit:]]*//; s/[\/_]x2d//g; s/\.scope//g')" + elif [[ ${CGROUP} =~ machine.slice_machine.*-qemu ]]; then + # libvirtd / qemu virtual machines + # machine.slice_machine-qemu_x2d1_x2dopnsense.scope => qemu_opnsense + NAME="qemu_$(echo "${CGROUP}" | sed 's/machine.slice_machine.*-qemu//; s/[\/_]x2d[[:digit:]]*//; s/[\/_]x2d//g; s/\.scope//g')" + + elif [[ ${CGROUP} =~ machine_.*\.libvirt-qemu ]]; then + # libvirtd / qemu virtual machines + NAME="qemu_$(echo "${CGROUP}" | sed 's/^machine_//; s/\.libvirt-qemu$//; s/-/_/;')" + + elif [[ ${CGROUP} =~ qemu.slice_([0-9]+).scope && -d /etc/pve ]]; then + # Proxmox VMs + + FILENAME="/etc/pve/qemu-server/${BASH_REMATCH[1]}.conf" + if [[ -f $FILENAME && -r $FILENAME ]]; then + NAME="qemu_$(grep -e '^name: ' "/etc/pve/qemu-server/${BASH_REMATCH[1]}.conf" | head -1 | sed -rn 's|\s*name\s*:\s*(.*)?$|\1|p')" + else + error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group." + fi + elif [[ ${CGROUP} =~ lxc_([0-9]+) && -d /etc/pve ]]; then + # Proxmox Containers (LXC) + + FILENAME="/etc/pve/lxc/${BASH_REMATCH[1]}.conf" + if [[ -f ${FILENAME} && -r ${FILENAME} ]]; then + NAME=$(grep -e '^hostname: ' "/etc/pve/lxc/${BASH_REMATCH[1]}.conf" | head -1 | sed -rn 's|\s*hostname\s*:\s*(.*)?$|\1|p') + else + error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group." + fi + elif [[ ${CGROUP} =~ lxc.payload.* ]]; then + # LXC 4.0 + NAME="$(echo "${CGROUP}" | sed 's/lxc\.payload\.\(.*\)/\1/g')" + fi + + [ -z "${NAME}" ] && NAME="${CGROUP}" + [ ${#NAME} -gt 100 ] && NAME="${NAME:0:100}" +fi + +info "cgroup '${CGROUP}' is called '${NAME}'" +echo "${NAME}" + +exit ${EXIT_CODE} diff --git a/collectors/cgroups.plugin/cgroup-network-helper.sh b/collectors/cgroups.plugin/cgroup-network-helper.sh new file mode 100755 index 0000000..783332f --- /dev/null +++ b/collectors/cgroups.plugin/cgroup-network-helper.sh @@ -0,0 +1,302 @@ +#!/usr/bin/env bash +# shellcheck disable=SC1117 + +# cgroup-network-helper.sh +# detect container and virtual machine interfaces +# +# (C) 2017 Costa Tsaousis +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This script is called as root (by cgroup-network), with either a pid, or a cgroup path. +# It tries to find all the network interfaces that belong to the same cgroup. +# +# It supports several method for this detection: +# +# 1. cgroup-network (the binary father of this script) detects veth network interfaces, +# by examining iflink and ifindex IDs and switching namespaces +# (it also detects the interface name as it is used by the container). +# +# 2. this script, uses /proc/PID/fdinfo to find tun/tap network interfaces. +# +# 3. this script, calls virsh to find libvirt network interfaces. +# + +# ----------------------------------------------------------------------------- + +# the system path is cleared by cgroup-network +# shellcheck source=/dev/null +[ -f /etc/profile ] && source /etc/profile + +export LC_ALL=C + +PROGRAM_NAME="$(basename "${0}")" + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + exit 1 +} + +debug=${NETDATA_CGROUP_NETWORK_HELPER_DEBUG=0} +debug() { + [ "${debug}" = "1" ] && log DEBUG "${@}" +} + +# ----------------------------------------------------------------------------- +# check for BASH v4+ (required for associative arrays) + +[ $(( BASH_VERSINFO[0] )) -lt 4 ] && \ + fatal "BASH version 4 or later is required (this is ${BASH_VERSION})." + +# ----------------------------------------------------------------------------- +# parse the arguments + +pid= +cgroup= +while [ -n "${1}" ] +do + case "${1}" in + --cgroup) cgroup="${2}"; shift 1;; + --pid|-p) pid="${2}"; shift 1;; + --debug|debug) debug=1;; + *) fatal "Cannot understand argument '${1}'";; + esac + + shift +done + +if [ -z "${pid}" ] && [ -z "${cgroup}" ] +then + fatal "Either --pid or --cgroup is required" +fi + +# ----------------------------------------------------------------------------- + +set_source() { + [ ${debug} -eq 1 ] && echo "SRC ${*}" +} + + +# ----------------------------------------------------------------------------- +# veth interfaces via cgroup + +# cgroup-network can detect veth interfaces by itself (written in C). +# If you seek for a shell version of what it does, check this: +# https://github.com/netdata/netdata/issues/474#issuecomment-317866709 + + +# ----------------------------------------------------------------------------- +# tun/tap interfaces via /proc/PID/fdinfo + +# find any tun/tap devices linked to a pid +proc_pid_fdinfo_iff() { + local p="${1}" # the pid + + debug "Searching for tun/tap interfaces for pid ${p}..." + set_source "fdinfo" + grep "^iff:.*" "${NETDATA_HOST_PREFIX}/proc/${p}/fdinfo"/* 2>/dev/null | cut -f 2 +} + +find_tun_tap_interfaces_for_cgroup() { + local c="${1}" # the cgroup path + [ -d "${c}/emulator" ] && c="${c}/emulator" # check for 'emulator' subdirectory + c="${c}/cgroup.procs" # make full path + + # for each pid of the cgroup + # find any tun/tap devices linked to the pid + if [ -f "${c}" ] + then + local p + for p in $(< "${c}" ) + do + proc_pid_fdinfo_iff "${p}" + done + else + debug "Cannot find file '${c}', not searching for tun/tap interfaces." + fi +} + + +# ----------------------------------------------------------------------------- +# virsh domain network interfaces + +virsh_cgroup_to_domain_name() { + local c="${1}" # the cgroup path + + debug "extracting a possible virsh domain from cgroup ${c}..." + + # extract for the cgroup path + sed -n -e "s|.*/machine-qemu\\\\x2d[0-9]\+\\\\x2d\(.*\)\.scope$|\1|p" \ + -e "s|.*/machine/qemu-[0-9]\+-\(.*\)\.libvirt-qemu$|\1|p" \ + -e "s|.*/machine/\(.*\)\.libvirt-qemu$|\1|p" \ + <<EOF +${c} +EOF +} + +virsh_find_all_interfaces_for_cgroup() { + local c="${1}" # the cgroup path + + # the virsh command + local virsh + # shellcheck disable=SC2230 + virsh="$(which virsh 2>/dev/null || command -v virsh 2>/dev/null)" + + if [ -n "${virsh}" ] + then + local d + d="$(virsh_cgroup_to_domain_name "${c}")" + # convert hex to character + # e.g.: vm01\x2dweb => vm01-web (https://github.com/netdata/netdata/issues/11088#issuecomment-832618149) + d="$(printf '%b' "${d}")" + + if [ -n "${d}" ] + then + debug "running: virsh domiflist ${d}; to find the network interfaces" + + # 'virsh -r domiflist <domain>' example output + # Interface Type Source Model MAC + #-------------------------------------------------------------- + # vnet3 bridge br0 virtio 52:54:00:xx:xx:xx + # vnet4 network default virtio 52:54:00:yy:yy:yy + + # match only 'network' interfaces from virsh output + set_source "virsh" + "${virsh}" -r domiflist "${d}" |\ + sed -n \ + -e "s|^[[:space:]]\?\([^[:space:]]\+\)[[:space:]]\+network[[:space:]]\+\([^[:space:]]\+\)[[:space:]]\+[^[:space:]]\+[[:space:]]\+[^[:space:]]\+$|\1 \1_\2|p" \ + -e "s|^[[:space:]]\?\([^[:space:]]\+\)[[:space:]]\+bridge[[:space:]]\+\([^[:space:]]\+\)[[:space:]]\+[^[:space:]]\+[[:space:]]\+[^[:space:]]\+$|\1 \1_\2|p" + else + debug "no virsh domain extracted from cgroup ${c}" + fi + else + debug "virsh command is not available" + fi +} + +# ----------------------------------------------------------------------------- +# netnsid detected interfaces + +netnsid_find_all_interfaces_for_pid() { + local pid="${1}" + [ -z "${pid}" ] && return 1 + + local nsid + nsid=$(lsns -t net -p "${pid}" -o NETNSID -nr 2>/dev/null) + if [ -z "${nsid}" ] || [ "${nsid}" = "unassigned" ]; then + return 1 + fi + + set_source "netnsid" + ip link show |\ + grep -B 1 -E " link-netnsid ${nsid}($| )" |\ + sed -n -e "s|^[[:space:]]*[0-9]\+:[[:space:]]\+\([A-Za-z0-9_]\+\)\(@[A-Za-z0-9_]\+\)*:[[:space:]].*$|\1|p" +} + +netnsid_find_all_interfaces_for_cgroup() { + local c="${1}" # the cgroup path + + if [ -f "${c}/cgroup.procs" ]; then + netnsid_find_all_interfaces_for_pid "$(head -n 1 "${c}/cgroup.procs" 2>/dev/null)" + else + debug "Cannot find file '${c}/cgroup.procs', not searching for netnsid interfaces." + fi +} + +# ----------------------------------------------------------------------------- + +find_all_interfaces_of_pid_or_cgroup() { + local p="${1}" c="${2}" # the pid and the cgroup path + + if [ -n "${pid}" ] + then + # we have been called with a pid + + proc_pid_fdinfo_iff "${p}" + netnsid_find_all_interfaces_for_pid "${p}" + + elif [ -n "${c}" ] + then + # we have been called with a cgroup + + info "searching for network interfaces of cgroup '${c}'" + + find_tun_tap_interfaces_for_cgroup "${c}" + virsh_find_all_interfaces_for_cgroup "${c}" + netnsid_find_all_interfaces_for_cgroup "${c}" + + else + + error "Either a pid or a cgroup path is needed" + return 1 + + fi + + return 0 +} + +# ----------------------------------------------------------------------------- + +# an associative array to store the interfaces +# the index is the interface name as seen by the host +# the value is the interface name as seen by the guest / container +declare -A devs=() + +# store all interfaces found in the associative array +# this will also give the unique devices, as seen by the host +last_src= +# shellcheck disable=SC2162 +while read host_device guest_device +do + [ -z "${host_device}" ] && continue + + [ "${host_device}" = "SRC" ] && last_src="${guest_device}" && continue + + # the default guest_device is the host_device + [ -z "${guest_device}" ] && guest_device="${host_device}" + + # when we run in debug, show the source + debug "Found host device '${host_device}', guest device '${guest_device}', detected via '${last_src}'" + + if [ -z "${devs[${host_device}]}" ] || [ "${devs[${host_device}]}" = "${host_device}" ]; then + devs[${host_device}]="${guest_device}" + fi + +done < <( find_all_interfaces_of_pid_or_cgroup "${pid}" "${cgroup}" ) + +# print the interfaces found, in the format netdata expects them +found=0 +for x in "${!devs[@]}" +do + found=$((found + 1)) + echo "${x} ${devs[${x}]}" +done + +debug "found ${found} network interfaces for pid '${pid}', cgroup '${cgroup}', run as ${USER}, ${UID}" + +# let netdata know if we found any +[ ${found} -eq 0 ] && exit 1 +exit 0 diff --git a/collectors/cgroups.plugin/cgroup-network.c b/collectors/cgroups.plugin/cgroup-network.c new file mode 100644 index 0000000..0b66ea4 --- /dev/null +++ b/collectors/cgroups.plugin/cgroup-network.c @@ -0,0 +1,723 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "libnetdata/libnetdata.h" +#include "libnetdata/required_dummies.h" + +#ifdef HAVE_SETNS +#ifndef _GNU_SOURCE +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#endif +#include <sched.h> +#endif + +char environment_variable2[FILENAME_MAX + 50] = ""; +char *environment[] = { + "PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin", + environment_variable2, + NULL +}; + +struct iface { + const char *device; + uint32_t hash; + + unsigned int ifindex; + unsigned int iflink; + + struct iface *next; +}; + +unsigned int calc_num_ifaces(struct iface *root) { + unsigned int num = 0; + for (struct iface *h = root; h; h = h->next) { + num++; + } + return num; +} + +unsigned int read_iface_iflink(const char *prefix, const char *iface) { + if(!prefix) prefix = ""; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/sys/class/net/%s/iflink", prefix, iface); + + unsigned long long iflink = 0; + int ret = read_single_number_file(filename, &iflink); + if(ret) error("Cannot read '%s'.", filename); + + return (unsigned int)iflink; +} + +unsigned int read_iface_ifindex(const char *prefix, const char *iface) { + if(!prefix) prefix = ""; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/sys/class/net/%s/ifindex", prefix, iface); + + unsigned long long ifindex = 0; + int ret = read_single_number_file(filename, &ifindex); + if(ret) error("Cannot read '%s'.", filename); + + return (unsigned int)ifindex; +} + +struct iface *read_proc_net_dev(const char *scope __maybe_unused, const char *prefix) { + if(!prefix) prefix = ""; + + procfile *ff = NULL; + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s%s", prefix, (*prefix)?"/proc/1/net/dev":"/proc/net/dev"); + +#ifdef NETDATA_INTERNAL_CHECKS + info("parsing '%s'", filename); +#endif + + ff = procfile_open(filename, " \t,:|", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + error("Cannot open file '%s'", filename); + return NULL; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + error("Cannot read file '%s'", filename); + return NULL; + } + + size_t lines = procfile_lines(ff), l; + struct iface *root = NULL; + for(l = 2; l < lines ;l++) { + if (unlikely(procfile_linewords(ff, l) < 1)) continue; + + struct iface *t = callocz(1, sizeof(struct iface)); + t->device = strdupz(procfile_lineword(ff, l, 0)); + t->hash = simple_hash(t->device); + t->ifindex = read_iface_ifindex(prefix, t->device); + t->iflink = read_iface_iflink(prefix, t->device); + t->next = root; + root = t; + +#ifdef NETDATA_INTERNAL_CHECKS + info("added %s interface '%s', ifindex %u, iflink %u", scope, t->device, t->ifindex, t->iflink); +#endif + } + + procfile_close(ff); + + return root; +} + +void free_iface(struct iface *iface) { + freez((void *)iface->device); + freez(iface); +} + +void free_host_ifaces(struct iface *iface) { + while(iface) { + struct iface *t = iface->next; + free_iface(iface); + iface = t; + } +} + +int iface_is_eligible(struct iface *iface) { + if(iface->iflink != iface->ifindex) + return 1; + + return 0; +} + +int eligible_ifaces(struct iface *root) { + int eligible = 0; + + struct iface *t; + for(t = root; t ; t = t->next) + if(iface_is_eligible(t)) + eligible++; + + return eligible; +} + +static void continue_as_child(void) { + pid_t child = fork(); + int status; + pid_t ret; + + if (child < 0) + error("fork() failed"); + + /* Only the child returns */ + if (child == 0) + return; + + for (;;) { + ret = waitpid(child, &status, WUNTRACED); + if ((ret == child) && (WIFSTOPPED(status))) { + /* The child suspended so suspend us as well */ + kill(getpid(), SIGSTOP); + kill(child, SIGCONT); + } else { + break; + } + } + + /* Return the child's exit code if possible */ + if (WIFEXITED(status)) { + exit(WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + kill(getpid(), WTERMSIG(status)); + } + + exit(EXIT_FAILURE); +} + +int proc_pid_fd(const char *prefix, const char *ns, pid_t pid) { + if(!prefix) prefix = ""; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/%s", prefix, (int)pid, ns); + int fd = open(filename, O_RDONLY); + + if(fd == -1) + error("Cannot open proc_pid_fd() file '%s'", filename); + + return fd; +} + +static struct ns { + int nstype; + int fd; + int status; + const char *name; + const char *path; +} all_ns[] = { + // { .nstype = CLONE_NEWUSER, .fd = -1, .status = -1, .name = "user", .path = "ns/user" }, + // { .nstype = CLONE_NEWCGROUP, .fd = -1, .status = -1, .name = "cgroup", .path = "ns/cgroup" }, + // { .nstype = CLONE_NEWIPC, .fd = -1, .status = -1, .name = "ipc", .path = "ns/ipc" }, + // { .nstype = CLONE_NEWUTS, .fd = -1, .status = -1, .name = "uts", .path = "ns/uts" }, + { .nstype = CLONE_NEWNET, .fd = -1, .status = -1, .name = "network", .path = "ns/net" }, + { .nstype = CLONE_NEWPID, .fd = -1, .status = -1, .name = "pid", .path = "ns/pid" }, + { .nstype = CLONE_NEWNS, .fd = -1, .status = -1, .name = "mount", .path = "ns/mnt" }, + + // terminator + { .nstype = 0, .fd = -1, .status = -1, .name = NULL, .path = NULL } +}; + +int switch_namespace(const char *prefix, pid_t pid) { + +#ifdef HAVE_SETNS + + int i; + for(i = 0; all_ns[i].name ; i++) + all_ns[i].fd = proc_pid_fd(prefix, all_ns[i].path, pid); + + int root_fd = proc_pid_fd(prefix, "root", pid); + int cwd_fd = proc_pid_fd(prefix, "cwd", pid); + + setgroups(0, NULL); + + // 2 passes - found it at nsenter source code + // this is related CLONE_NEWUSER functionality + + // This code cannot switch user namespace (it can all the other namespaces) + // Fortunately, we don't need to switch user namespaces. + + int pass; + for(pass = 0; pass < 2 ;pass++) { + for(i = 0; all_ns[i].name ; i++) { + if (all_ns[i].fd != -1 && all_ns[i].status == -1) { + if(setns(all_ns[i].fd, all_ns[i].nstype) == -1) { + if(pass == 1) { + all_ns[i].status = 0; + error("Cannot switch to %s namespace of pid %d", all_ns[i].name, (int) pid); + } + } + else + all_ns[i].status = 1; + } + } + } + + setgroups(0, NULL); + + if(root_fd != -1) { + if(fchdir(root_fd) < 0) + error("Cannot fchdir() to pid %d root directory", (int)pid); + + if(chroot(".") < 0) + error("Cannot chroot() to pid %d root directory", (int)pid); + + close(root_fd); + } + + if(cwd_fd != -1) { + if(fchdir(cwd_fd) < 0) + error("Cannot fchdir() to pid %d current working directory", (int)pid); + + close(cwd_fd); + } + + int do_fork = 0; + for(i = 0; all_ns[i].name ; i++) + if(all_ns[i].fd != -1) { + + // CLONE_NEWPID requires a fork() to become effective + if(all_ns[i].nstype == CLONE_NEWPID && all_ns[i].status) + do_fork = 1; + + close(all_ns[i].fd); + } + + if(do_fork) + continue_as_child(); + + return 0; + +#else + + errno = ENOSYS; + error("setns() is missing on this system."); + return 1; + +#endif +} + +pid_t read_pid_from_cgroup_file(const char *filename) { + int fd = open(filename, procfile_open_flags); + if(fd == -1) { + error("Cannot open pid_from_cgroup() file '%s'.", filename); + return 0; + } + + FILE *fp = fdopen(fd, "r"); + if(!fp) { + error("Cannot upgrade fd to fp for file '%s'.", filename); + return 0; + } + + char buffer[100 + 1]; + pid_t pid = 0; + char *s; + while((s = fgets(buffer, 100, fp))) { + buffer[100] = '\0'; + pid = atoi(s); + if(pid > 0) break; + } + + fclose(fp); + +#ifdef NETDATA_INTERNAL_CHECKS + if(pid > 0) info("found pid %d on file '%s'", pid, filename); +#endif + + return pid; +} + +pid_t read_pid_from_cgroup_files(const char *path) { + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s/cgroup.procs", path); + pid_t pid = read_pid_from_cgroup_file(filename); + if(pid > 0) return pid; + + snprintfz(filename, FILENAME_MAX, "%s/tasks", path); + return read_pid_from_cgroup_file(filename); +} + +pid_t read_pid_from_cgroup(const char *path) { + pid_t pid = read_pid_from_cgroup_files(path); + if (pid > 0) return pid; + + DIR *dir = opendir(path); + if (!dir) { + error("cannot read directory '%s'", path); + return 0; + } + + struct dirent *de = NULL; + while ((de = readdir(dir))) { + if (de->d_type == DT_DIR + && ( + (de->d_name[0] == '.' && de->d_name[1] == '\0') + || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') + )) + continue; + + if (de->d_type == DT_DIR) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/%s", path, de->d_name); + pid = read_pid_from_cgroup(filename); + if(pid > 0) break; + } + } + closedir(dir); + return pid; +} + +// ---------------------------------------------------------------------------- +// send the result to netdata + +struct found_device { + const char *host_device; + const char *guest_device; + + uint32_t host_device_hash; + + struct found_device *next; +} *detected_devices = NULL; + +void add_device(const char *host, const char *guest) { +#ifdef NETDATA_INTERNAL_CHECKS + info("adding device with host '%s', guest '%s'", host, guest); +#endif + + uint32_t hash = simple_hash(host); + + if(guest && (!*guest || strcmp(host, guest) == 0)) + guest = NULL; + + struct found_device *f; + for(f = detected_devices; f ; f = f->next) { + if(f->host_device_hash == hash && !strcmp(host, f->host_device)) { + + if(guest && (!f->guest_device || !strcmp(f->host_device, f->guest_device))) { + if(f->guest_device) freez((void *)f->guest_device); + f->guest_device = strdupz(guest); + } + + return; + } + } + + f = mallocz(sizeof(struct found_device)); + f->host_device = strdupz(host); + f->host_device_hash = hash; + f->guest_device = (guest)?strdupz(guest):NULL; + f->next = detected_devices; + detected_devices = f; +} + +int send_devices(void) { + int found = 0; + + struct found_device *f; + for(f = detected_devices; f ; f = f->next) { + found++; + printf("%s %s\n", f->host_device, (f->guest_device)?f->guest_device:f->host_device); + } + + return found; +} + +// ---------------------------------------------------------------------------- +// this function should be called only **ONCE** +// also it has to be the **LAST** to be called +// since it switches namespaces, so after this call, everything is different! + +void detect_veth_interfaces(pid_t pid) { + struct iface *cgroup = NULL; + struct iface *host, *h, *c; + + host = read_proc_net_dev("host", netdata_configured_host_prefix); + if(!host) { + errno = 0; + error("cannot read host interface list."); + goto cleanup; + } + + if(!eligible_ifaces(host)) { + errno = 0; + info("there are no double-linked host interfaces available."); + goto cleanup; + } + + if(switch_namespace(netdata_configured_host_prefix, pid)) { + errno = 0; + error("cannot switch to the namespace of pid %u", (unsigned int) pid); + goto cleanup; + } + +#ifdef NETDATA_INTERNAL_CHECKS + info("switched to namespaces of pid %d", pid); +#endif + + cgroup = read_proc_net_dev("cgroup", NULL); + if(!cgroup) { + errno = 0; + error("cannot read cgroup interface list."); + goto cleanup; + } + + if(!eligible_ifaces(cgroup)) { + errno = 0; + error("there are not double-linked cgroup interfaces available."); + goto cleanup; + } + + unsigned int host_dev_num = calc_num_ifaces(host); + unsigned int cgroup_dev_num = calc_num_ifaces(cgroup); + // host ifaces == guest ifaces => we are still in the host namespace + // and we can't really identify which ifaces belong to the cgroup (e.g. Proxmox VM). + if (host_dev_num == cgroup_dev_num) { + unsigned int m = 0; + for (h = host; h; h = h->next) { + for (c = cgroup; c; c = c->next) { + if (h->ifindex == c->ifindex && h->iflink == c->iflink) { + m++; + break; + } + } + } + if (host_dev_num == m) { + goto cleanup; + } + } + + for(h = host; h ; h = h->next) { + if(iface_is_eligible(h)) { + for (c = cgroup; c; c = c->next) { + if(iface_is_eligible(c) && h->ifindex == c->iflink && h->iflink == c->ifindex) { + add_device(h->device, c->device); + } + } + } + } + +cleanup: + free_host_ifaces(cgroup); + free_host_ifaces(host); +} + +// ---------------------------------------------------------------------------- +// call the external helper + +#define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048 +void call_the_helper(pid_t pid, const char *cgroup) { + if(setresuid(0, 0, 0) == -1) + error("setresuid(0, 0, 0) failed."); + + char command[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; + if(cgroup) + snprintfz(command, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec " PLUGINS_DIR "/cgroup-network-helper.sh --cgroup '%s'", cgroup); + else + snprintfz(command, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec " PLUGINS_DIR "/cgroup-network-helper.sh --pid %d", pid); + + info("running: %s", command); + + pid_t cgroup_pid; + FILE *fp_child_input, *fp_child_output; + + if(cgroup) { + (void)netdata_popen_raw_default_flags(&cgroup_pid, environment, &fp_child_input, &fp_child_output, PLUGINS_DIR "/cgroup-network-helper.sh", "--cgroup", cgroup); + } + else { + char buffer[100]; + snprintfz(buffer, sizeof(buffer) - 1, "%d", pid); + (void)netdata_popen_raw_default_flags(&cgroup_pid, environment, &fp_child_input, &fp_child_output, PLUGINS_DIR "/cgroup-network-helper.sh", "--pid", buffer); + } + + if(fp_child_output) { + char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; + char *s; + while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) { + trim(s); + + if(*s && *s != '\n') { + char *t = s; + while(*t && *t != ' ') t++; + if(*t == ' ') { + *t = '\0'; + t++; + } + + if(!*s || !*t) continue; + add_device(s, t); + } + } + + netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + } + else + error("cannot execute cgroup-network helper script: %s", command); +} + +int is_valid_path_symbol(char c) { + switch(c) { + case '/': // path separators + case '\\': // needed for virsh domains \x2d1\x2dname + case ' ': // space + case '-': // hyphen + case '_': // underscore + case '.': // dot + case ',': // comma + return 1; + + default: + return 0; + } +} + +// we will pass this path a shell script running as root +// so, we need to make sure the path will be valid +// and will not include anything that could allow +// the caller use shell expansion for gaining escalated +// privileges. +int verify_path(const char *path) { + struct stat sb; + + char c; + const char *s = path; + while((c = *s++)) { + if(!( isalnum(c) || is_valid_path_symbol(c) )) { + error("invalid character in path '%s'", path); + return -1; + } + } + + if(strstr(path, "\\") && !strstr(path, "\\x")) { + error("invalid escape sequence in path '%s'", path); + return 1; + } + + if(strstr(path, "/../")) { + error("invalid parent path sequence detected in '%s'", path); + return 1; + } + + if(path[0] != '/') { + error("only absolute path names are supported - invalid path '%s'", path); + return -1; + } + + if (stat(path, &sb) == -1) { + error("cannot stat() path '%s'", path); + return -1; + } + + if((sb.st_mode & S_IFMT) != S_IFDIR) { + error("path '%s' is not a directory", path); + return -1; + } + + return 0; +} + +/* +char *fix_path_variable(void) { + const char *path = getenv("PATH"); + if(!path || !*path) return 0; + + char *p = strdupz(path); + char *safe_path = callocz(1, strlen(p) + strlen("PATH=") + 1); + strcpy(safe_path, "PATH="); + + int added = 0; + char *ptr = p; + while(ptr && *ptr) { + char *s = strsep(&ptr, ":"); + if(s && *s) { + if(verify_path(s) == -1) { + error("the PATH variable includes an invalid path '%s' - removed it.", s); + } + else { + info("the PATH variable includes a valid path '%s'.", s); + if(added) strcat(safe_path, ":"); + strcat(safe_path, s); + added++; + } + } + } + + info("unsafe PATH: '%s'.", path); + info(" safe PATH: '%s'.", safe_path); + + freez(p); + return safe_path; +} +*/ + +// ---------------------------------------------------------------------------- +// main + +void usage(void) { + fprintf(stderr, "%s [ -p PID | --pid PID | --cgroup /path/to/cgroup ]\n", program_name); + exit(1); +} + +int main(int argc, char **argv) { + pid_t pid = 0; + + program_name = argv[0]; + program_version = VERSION; + error_log_syslog = 0; + + // since cgroup-network runs as root, prevent it from opening symbolic links + procfile_open_flags = O_RDONLY|O_NOFOLLOW; + + // ------------------------------------------------------------------------ + // make sure NETDATA_HOST_PREFIX is safe + + netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); + if(verify_netdata_host_prefix() == -1) exit(1); + + if(netdata_configured_host_prefix[0] != '\0' && verify_path(netdata_configured_host_prefix) == -1) + fatal("invalid NETDATA_HOST_PREFIX '%s'", netdata_configured_host_prefix); + + // ------------------------------------------------------------------------ + // build a safe environment for our script + + // the first environment variable is a fixed PATH= + snprintfz(environment_variable2, sizeof(environment_variable2) - 1, "NETDATA_HOST_PREFIX=%s", netdata_configured_host_prefix); + + // ------------------------------------------------------------------------ + + if(argc == 2 && (!strcmp(argv[1], "version") || !strcmp(argv[1], "-version") || !strcmp(argv[1], "--version") || !strcmp(argv[1], "-v") || !strcmp(argv[1], "-V"))) { + fprintf(stderr, "cgroup-network %s\n", VERSION); + exit(0); + } + + if(argc != 3) + usage(); + + int arg = 1; + int helper = 1; + if (getenv("KUBERNETES_SERVICE_HOST") != NULL && getenv("KUBERNETES_SERVICE_PORT") != NULL) + helper = 0; + + if(!strcmp(argv[arg], "-p") || !strcmp(argv[arg], "--pid")) { + pid = atoi(argv[arg+1]); + + if(pid <= 0) { + errno = 0; + error("Invalid pid %d given", (int) pid); + return 2; + } + + if(helper) call_the_helper(pid, NULL); + } + else if(!strcmp(argv[arg], "--cgroup")) { + char *cgroup = argv[arg+1]; + if(verify_path(cgroup) == -1) { + error("cgroup '%s' does not exist or is not valid.", cgroup); + return 1; + } + + pid = read_pid_from_cgroup(cgroup); + if(helper) call_the_helper(pid, cgroup); + + if(pid <= 0 && !detected_devices) { + errno = 0; + error("Cannot find a cgroup PID from cgroup '%s'", cgroup); + } + } + else + usage(); + + if(pid > 0) + detect_veth_interfaces(pid); + + int found = send_devices(); + if(found <= 0) return 1; + return 0; +} diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c new file mode 100644 index 0000000..8f75482 --- /dev/null +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -0,0 +1,4887 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sys_fs_cgroup.h" + +#define PLUGIN_CGROUPS_NAME "cgroups.plugin" +#define PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME "systemd" +#define PLUGIN_CGROUPS_MODULE_CGROUPS_NAME "/sys/fs/cgroup" + +#ifdef NETDATA_INTERNAL_CHECKS +#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_DEFAULT +#else +#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_NO_ERROR_ON_FILE_IO +#endif + +// main cgroups thread worker jobs +#define WORKER_CGROUPS_LOCK 0 +#define WORKER_CGROUPS_READ 1 +#define WORKER_CGROUPS_CHART 2 + +// discovery cgroup thread worker jobs +#define WORKER_DISCOVERY_INIT 0 +#define WORKER_DISCOVERY_FIND 1 +#define WORKER_DISCOVERY_PROCESS 2 +#define WORKER_DISCOVERY_PROCESS_RENAME 3 +#define WORKER_DISCOVERY_PROCESS_NETWORK 4 +#define WORKER_DISCOVERY_PROCESS_FIRST_TIME 5 +#define WORKER_DISCOVERY_UPDATE 6 +#define WORKER_DISCOVERY_CLEANUP 7 +#define WORKER_DISCOVERY_COPY 8 +#define WORKER_DISCOVERY_SHARE 9 +#define WORKER_DISCOVERY_LOCK 10 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 11 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 11 +#endif + +// ---------------------------------------------------------------------------- +// cgroup globals + +static char cgroup_chart_id_prefix[] = "cgroup_"; + +static int is_inside_k8s = 0; + +static long system_page_size = 4096; // system will be queried via sysconf() in configuration() + +static int cgroup_enable_cpuacct_stat = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_cpuacct_cpu_throttling = CONFIG_BOOLEAN_YES; +static int cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_NO; +static int cgroup_enable_memory = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_detailed_memory = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_swap = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_io = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO; + +static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES; +static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO; +static int cgroup_used_memory = CONFIG_BOOLEAN_YES; + +static int cgroup_use_unified_cgroups = CONFIG_BOOLEAN_NO; +static int cgroup_unified_exist = CONFIG_BOOLEAN_AUTO; + +static int cgroup_search_in_devices = 1; + +static int cgroup_check_for_new_every = 10; +static int cgroup_update_every = 1; +static int cgroup_containers_chart_priority = NETDATA_CHART_PRIO_CGROUPS_CONTAINERS; + +static int cgroup_recheck_zero_blkio_every_iterations = 10; +static int cgroup_recheck_zero_mem_failcnt_every_iterations = 10; +static int cgroup_recheck_zero_mem_detailed_every_iterations = 10; + +static char *cgroup_cpuacct_base = NULL; +static char *cgroup_cpuset_base = NULL; +static char *cgroup_blkio_base = NULL; +static char *cgroup_memory_base = NULL; +static char *cgroup_devices_base = NULL; +static char *cgroup_unified_base = NULL; + +static int cgroup_root_count = 0; +static int cgroup_root_max = 1000; +static int cgroup_max_depth = 0; + +static SIMPLE_PATTERN *enabled_cgroup_paths = NULL; +static SIMPLE_PATTERN *enabled_cgroup_names = NULL; +static SIMPLE_PATTERN *search_cgroup_paths = NULL; +static SIMPLE_PATTERN *enabled_cgroup_renames = NULL; +static SIMPLE_PATTERN *systemd_services_cgroups = NULL; + +static SIMPLE_PATTERN *entrypoint_parent_process_comm = NULL; + +static char *cgroups_rename_script = NULL; +static char *cgroups_network_interface_script = NULL; + +static int cgroups_check = 0; + +static uint32_t Read_hash = 0; +static uint32_t Write_hash = 0; +static uint32_t user_hash = 0; +static uint32_t system_hash = 0; +static uint32_t user_usec_hash = 0; +static uint32_t system_usec_hash = 0; +static uint32_t nr_periods_hash = 0; +static uint32_t nr_throttled_hash = 0; +static uint32_t throttled_time_hash = 0; +static uint32_t throttled_usec_hash = 0; + +enum cgroups_type { CGROUPS_AUTODETECT_FAIL, CGROUPS_V1, CGROUPS_V2 }; + +enum cgroups_systemd_setting { + SYSTEMD_CGROUP_ERR, + SYSTEMD_CGROUP_LEGACY, + SYSTEMD_CGROUP_HYBRID, + SYSTEMD_CGROUP_UNIFIED +}; + +struct cgroups_systemd_config_setting { + char *name; + enum cgroups_systemd_setting setting; +}; + +static struct cgroups_systemd_config_setting cgroups_systemd_options[] = { + { .name = "legacy", .setting = SYSTEMD_CGROUP_LEGACY }, + { .name = "hybrid", .setting = SYSTEMD_CGROUP_HYBRID }, + { .name = "unified", .setting = SYSTEMD_CGROUP_UNIFIED }, + { .name = NULL, .setting = SYSTEMD_CGROUP_ERR }, +}; + +// Shared memory with information from detected cgroups +netdata_ebpf_cgroup_shm_t shm_cgroup_ebpf = {NULL, NULL}; +static int shm_fd_cgroup_ebpf = -1; +sem_t *shm_mutex_cgroup_ebpf = SEM_FAILED; + +/* on Fed systemd is not in PATH for some reason */ +#define SYSTEMD_CMD_RHEL "/usr/lib/systemd/systemd --version" +#define SYSTEMD_HIERARCHY_STRING "default-hierarchy=" + +#define MAXSIZE_PROC_CMDLINE 4096 +static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec) +{ + pid_t command_pid; + enum cgroups_systemd_setting retval = SYSTEMD_CGROUP_ERR; + char buf[MAXSIZE_PROC_CMDLINE]; + char *begin, *end; + + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(exec, &command_pid, &fp_child_input); + + if (!fp_child_output) + return retval; + + fd_set rfds; + struct timeval timeout; + int fd = fileno(fp_child_output); + int ret = -1; + + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + timeout.tv_sec = 3; + timeout.tv_usec = 0; + + if (fd != -1) { + ret = select(fd + 1, &rfds, NULL, NULL, &timeout); + } + + if (ret == -1) { + error("Failed to get the output of \"%s\"", exec); + } else if (ret == 0) { + info("Cannot get the output of \"%s\" within %"PRId64" seconds", exec, (int64_t)timeout.tv_sec); + } else { + while (fgets(buf, MAXSIZE_PROC_CMDLINE, fp_child_output) != NULL) { + if ((begin = strstr(buf, SYSTEMD_HIERARCHY_STRING))) { + end = begin = begin + strlen(SYSTEMD_HIERARCHY_STRING); + if (!*begin) + break; + while (isalpha(*end)) + end++; + *end = 0; + for (int i = 0; cgroups_systemd_options[i].name; i++) { + if (!strcmp(begin, cgroups_systemd_options[i].name)) { + retval = cgroups_systemd_options[i].setting; + break; + } + } + break; + } + } + } + + if (netdata_pclose(fp_child_input, fp_child_output, command_pid)) + return SYSTEMD_CGROUP_ERR; + + return retval; +} + +static enum cgroups_type cgroups_try_detect_version() +{ + pid_t command_pid; + char buf[MAXSIZE_PROC_CMDLINE]; + enum cgroups_systemd_setting systemd_setting; + int cgroups2_available = 0; + + // 1. check if cgroups2 available on system at all + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen("grep cgroup /proc/filesystems", &command_pid, &fp_child_input); + if (!fp_child_output) { + error("popen failed"); + return CGROUPS_AUTODETECT_FAIL; + } + while (fgets(buf, MAXSIZE_PROC_CMDLINE, fp_child_output) != NULL) { + if (strstr(buf, "cgroup2")) { + cgroups2_available = 1; + break; + } + } + if(netdata_pclose(fp_child_input, fp_child_output, command_pid)) + return CGROUPS_AUTODETECT_FAIL; + + if(!cgroups2_available) + return CGROUPS_V1; + +#if defined CGROUP2_SUPER_MAGIC + // 2. check filesystem type for the default mountpoint + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/fs/cgroup"); + struct statfs fsinfo; + if (!statfs(filename, &fsinfo)) { + if (fsinfo.f_type == CGROUP2_SUPER_MAGIC) + return CGROUPS_V2; + } +#endif + + // 3. check systemd compiletime setting + if ((systemd_setting = cgroups_detect_systemd("systemd --version")) == SYSTEMD_CGROUP_ERR) + systemd_setting = cgroups_detect_systemd(SYSTEMD_CMD_RHEL); + + if(systemd_setting == SYSTEMD_CGROUP_ERR) + return CGROUPS_AUTODETECT_FAIL; + + if(systemd_setting == SYSTEMD_CGROUP_LEGACY || systemd_setting == SYSTEMD_CGROUP_HYBRID) { + // currently we prefer V1 if HYBRID is set as it seems to be more feature complete + // in the future we might want to continue here if SYSTEMD_CGROUP_HYBRID + // and go ahead with V2 + return CGROUPS_V1; + } + + // 4. if we are unified as on Fedora (default cgroups2 only mode) + // check kernel command line flag that can override that setting + FILE *fp = fopen("/proc/cmdline", "r"); + if (!fp) { + error("Error reading kernel boot commandline parameters"); + return CGROUPS_AUTODETECT_FAIL; + } + + if (!fgets(buf, MAXSIZE_PROC_CMDLINE, fp)) { + error("couldn't read all cmdline params into buffer"); + fclose(fp); + return CGROUPS_AUTODETECT_FAIL; + } + + fclose(fp); + + if (strstr(buf, "systemd.unified_cgroup_hierarchy=0")) { + info("cgroups v2 (unified cgroups) is available but are disabled on this system."); + return CGROUPS_V1; + } + return CGROUPS_V2; +} + +void set_cgroup_base_path(char *filename, char *path) { + if (strncmp(netdata_configured_host_prefix, path, strlen(netdata_configured_host_prefix)) == 0) { + snprintfz(filename, FILENAME_MAX, "%s", path); + } else { + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, path); + } +} + +void read_cgroup_plugin_configuration() { + system_page_size = sysconf(_SC_PAGESIZE); + + Read_hash = simple_hash("Read"); + Write_hash = simple_hash("Write"); + user_hash = simple_hash("user"); + system_hash = simple_hash("system"); + user_usec_hash = simple_hash("user_usec"); + system_usec_hash = simple_hash("system_usec"); + nr_periods_hash = simple_hash("nr_periods"); + nr_throttled_hash = simple_hash("nr_throttled"); + throttled_time_hash = simple_hash("throttled_time"); + throttled_usec_hash = simple_hash("throttled_usec"); + + cgroup_update_every = (int)config_get_number("plugin:cgroups", "update every", localhost->rrd_update_every); + if(cgroup_update_every < localhost->rrd_update_every) + cgroup_update_every = localhost->rrd_update_every; + + cgroup_check_for_new_every = (int)config_get_number("plugin:cgroups", "check for new cgroups every", (long long)cgroup_check_for_new_every * (long long)cgroup_update_every); + if(cgroup_check_for_new_every < cgroup_update_every) + cgroup_check_for_new_every = cgroup_update_every; + + cgroup_use_unified_cgroups = config_get_boolean_ondemand("plugin:cgroups", "use unified cgroups", CONFIG_BOOLEAN_AUTO); + if(cgroup_use_unified_cgroups == CONFIG_BOOLEAN_AUTO) + cgroup_use_unified_cgroups = (cgroups_try_detect_version() == CGROUPS_V2); + + info("use unified cgroups %s", cgroup_use_unified_cgroups ? "true" : "false"); + + cgroup_containers_chart_priority = (int)config_get_number("plugin:cgroups", "containers priority", cgroup_containers_chart_priority); + if(cgroup_containers_chart_priority < 1) + cgroup_containers_chart_priority = NETDATA_CHART_PRIO_CGROUPS_CONTAINERS; + + cgroup_enable_cpuacct_stat = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct stat (total CPU)", cgroup_enable_cpuacct_stat); + cgroup_enable_cpuacct_usage = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct usage (per core CPU)", cgroup_enable_cpuacct_usage); + cgroup_enable_cpuacct_cpu_throttling = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct cpu throttling", cgroup_enable_cpuacct_cpu_throttling); + cgroup_enable_cpuacct_cpu_shares = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct cpu shares", cgroup_enable_cpuacct_cpu_shares); + + cgroup_enable_memory = config_get_boolean_ondemand("plugin:cgroups", "enable memory", cgroup_enable_memory); + cgroup_enable_detailed_memory = config_get_boolean_ondemand("plugin:cgroups", "enable detailed memory", cgroup_enable_detailed_memory); + cgroup_enable_memory_failcnt = config_get_boolean_ondemand("plugin:cgroups", "enable memory limits fail count", cgroup_enable_memory_failcnt); + cgroup_enable_swap = config_get_boolean_ondemand("plugin:cgroups", "enable swap memory", cgroup_enable_swap); + + cgroup_enable_blkio_io = config_get_boolean_ondemand("plugin:cgroups", "enable blkio bandwidth", cgroup_enable_blkio_io); + cgroup_enable_blkio_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio operations", cgroup_enable_blkio_ops); + cgroup_enable_blkio_throttle_io = config_get_boolean_ondemand("plugin:cgroups", "enable blkio throttle bandwidth", cgroup_enable_blkio_throttle_io); + cgroup_enable_blkio_throttle_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio throttle operations", cgroup_enable_blkio_throttle_ops); + cgroup_enable_blkio_queued_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio queued operations", cgroup_enable_blkio_queued_ops); + cgroup_enable_blkio_merged_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio merged operations", cgroup_enable_blkio_merged_ops); + + cgroup_enable_pressure_cpu = config_get_boolean_ondemand("plugin:cgroups", "enable cpu pressure", cgroup_enable_pressure_cpu); + cgroup_enable_pressure_io_some = config_get_boolean_ondemand("plugin:cgroups", "enable io some pressure", cgroup_enable_pressure_io_some); + cgroup_enable_pressure_io_full = config_get_boolean_ondemand("plugin:cgroups", "enable io full pressure", cgroup_enable_pressure_io_full); + cgroup_enable_pressure_memory_some = config_get_boolean_ondemand("plugin:cgroups", "enable memory some pressure", cgroup_enable_pressure_memory_some); + cgroup_enable_pressure_memory_full = config_get_boolean_ondemand("plugin:cgroups", "enable memory full pressure", cgroup_enable_pressure_memory_full); + + cgroup_recheck_zero_blkio_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero blkio every iterations", cgroup_recheck_zero_blkio_every_iterations); + cgroup_recheck_zero_mem_failcnt_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero memory failcnt every iterations", cgroup_recheck_zero_mem_failcnt_every_iterations); + cgroup_recheck_zero_mem_detailed_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero detailed memory every iterations", cgroup_recheck_zero_mem_detailed_every_iterations); + + cgroup_enable_systemd_services = config_get_boolean("plugin:cgroups", "enable systemd services", cgroup_enable_systemd_services); + cgroup_enable_systemd_services_detailed_memory = config_get_boolean("plugin:cgroups", "enable systemd services detailed memory", cgroup_enable_systemd_services_detailed_memory); + cgroup_used_memory = config_get_boolean("plugin:cgroups", "report used memory", cgroup_used_memory); + + char filename[FILENAME_MAX + 1], *s; + struct mountinfo *mi, *root = mountinfo_read(0); + if(!cgroup_use_unified_cgroups) { + // cgroup v1 does not have pressure metrics + cgroup_enable_pressure_cpu = + cgroup_enable_pressure_io_some = + cgroup_enable_pressure_io_full = + cgroup_enable_pressure_memory_some = + cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_NO; + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct"); + if(!mi) { + error("CGROUP: cannot find cpuacct mountinfo. Assuming default: /sys/fs/cgroup/cpuacct"); + s = "/sys/fs/cgroup/cpuacct"; + } + else s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_cpuacct_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuacct", filename); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuset"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuset"); + if(!mi) { + error("CGROUP: cannot find cpuset mountinfo. Assuming default: /sys/fs/cgroup/cpuset"); + s = "/sys/fs/cgroup/cpuset"; + } + else s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_cpuset_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuset", filename); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "blkio"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "blkio"); + if(!mi) { + error("CGROUP: cannot find blkio mountinfo. Assuming default: /sys/fs/cgroup/blkio"); + s = "/sys/fs/cgroup/blkio"; + } + else s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_blkio_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/blkio", filename); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "memory"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "memory"); + if(!mi) { + error("CGROUP: cannot find memory mountinfo. Assuming default: /sys/fs/cgroup/memory"); + s = "/sys/fs/cgroup/memory"; + } + else s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_memory_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/memory", filename); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "devices"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "devices"); + if(!mi) { + error("CGROUP: cannot find devices mountinfo. Assuming default: /sys/fs/cgroup/devices"); + s = "/sys/fs/cgroup/devices"; + } + else s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_devices_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/devices", filename); + } + else { + //cgroup_enable_cpuacct_stat = + cgroup_enable_cpuacct_usage = + //cgroup_enable_memory = + //cgroup_enable_detailed_memory = + cgroup_enable_memory_failcnt = + //cgroup_enable_swap = + //cgroup_enable_blkio_io = + //cgroup_enable_blkio_ops = + cgroup_enable_blkio_throttle_io = + cgroup_enable_blkio_throttle_ops = + cgroup_enable_blkio_merged_ops = + cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_NO; + cgroup_search_in_devices = 0; + cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO; + cgroup_used_memory = CONFIG_BOOLEAN_NO; //unified cgroups use different values + + //TODO: can there be more than 1 cgroup2 mount point? + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup2", "rw"); //there is no cgroup2 specific super option - for now use 'rw' option + if(mi) debug(D_CGROUP, "found unified cgroup root using super options, with path: '%s'", mi->mount_point); + if(!mi) { + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup2", "cgroup"); + if(mi) debug(D_CGROUP, "found unified cgroup root using mountsource info, with path: '%s'", mi->mount_point); + } + if(!mi) { + error("CGROUP: cannot find cgroup2 mountinfo. Assuming default: /sys/fs/cgroup"); + s = "/sys/fs/cgroup"; + } + else s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_unified_base = config_get("plugin:cgroups", "path to unified cgroups", filename); + debug(D_CGROUP, "using cgroup root: '%s'", cgroup_unified_base); + } + + cgroup_root_max = (int)config_get_number("plugin:cgroups", "max cgroups to allow", cgroup_root_max); + cgroup_max_depth = (int)config_get_number("plugin:cgroups", "max cgroups depth to monitor", cgroup_max_depth); + + enabled_cgroup_paths = simple_pattern_create( + config_get("plugin:cgroups", "enable by default cgroups matching", + // ---------------------------------------------------------------- + + " !*/init.scope " // ignore init.scope + " !/system.slice/run-*.scope " // ignore system.slice/run-XXXX.scope + " *.scope " // we need all other *.scope for sure + + // ---------------------------------------------------------------- + + " /machine.slice/*.service " // #3367 systemd-nspawn + + // ---------------------------------------------------------------- + + " */kubepods/pod*/* " // k8s containers + " */kubepods/*/pod*/* " // k8s containers + " */*-kubepods-pod*/* " // k8s containers + " */*-kubepods-*-pod*/* " // k8s containers + " !*kubepods* !*kubelet* " // all other k8s cgroups + + // ---------------------------------------------------------------- + + " !*/vcpu* " // libvirtd adds these sub-cgroups + " !*/emulator " // libvirtd adds these sub-cgroups + " !*.mount " + " !*.partition " + " !*.service " + " !*.socket " + " !*.slice " + " !*.swap " + " !*.user " + " !/ " + " !/docker " + " !*/libvirt " + " !/lxc " + " !/lxc/*/* " // #1397 #2649 + " !/lxc.monitor* " + " !/lxc.pivot " + " !/lxc.payload " + " !/machine " + " !/qemu " + " !/system " + " !/systemd " + " !/user " + " * " // enable anything else + ), NULL, SIMPLE_PATTERN_EXACT); + + enabled_cgroup_names = simple_pattern_create( + config_get("plugin:cgroups", "enable by default cgroups names matching", + " * " + ), NULL, SIMPLE_PATTERN_EXACT); + + search_cgroup_paths = simple_pattern_create( + config_get("plugin:cgroups", "search for cgroups in subpaths matching", + " !*/init.scope " // ignore init.scope + " !*-qemu " // #345 + " !*.libvirt-qemu " // #3010 + " !/init.scope " + " !/system " + " !/systemd " + " !/user " + " !/user.slice " + " !/lxc/*/* " // #2161 #2649 + " !/lxc.monitor " + " !/lxc.payload/*/* " + " !/lxc.payload.* " + " * " + ), NULL, SIMPLE_PATTERN_EXACT); + + snprintfz(filename, FILENAME_MAX, "%s/cgroup-name.sh", netdata_configured_primary_plugins_dir); + cgroups_rename_script = config_get("plugin:cgroups", "script to get cgroup names", filename); + + snprintfz(filename, FILENAME_MAX, "%s/cgroup-network", netdata_configured_primary_plugins_dir); + cgroups_network_interface_script = config_get("plugin:cgroups", "script to get cgroup network interfaces", filename); + + enabled_cgroup_renames = simple_pattern_create( + config_get("plugin:cgroups", "run script to rename cgroups matching", + " !/ " + " !*.mount " + " !*.socket " + " !*.partition " + " /machine.slice/*.service " // #3367 systemd-nspawn + " !*.service " + " !*.slice " + " !*.swap " + " !*.user " + " !init.scope " + " !*.scope/vcpu* " // libvirtd adds these sub-cgroups + " !*.scope/emulator " // libvirtd adds these sub-cgroups + " *.scope " + " *docker* " + " *lxc* " + " *qemu* " + " */kubepods/pod*/* " // k8s containers + " */kubepods/*/pod*/* " // k8s containers + " */*-kubepods-pod*/* " // k8s containers + " */*-kubepods-*-pod*/* " // k8s containers + " !*kubepods* !*kubelet* " // all other k8s cgroups + " *.libvirt-qemu " // #3010 + " * " + ), NULL, SIMPLE_PATTERN_EXACT); + + if(cgroup_enable_systemd_services) { + systemd_services_cgroups = simple_pattern_create( + config_get("plugin:cgroups", "cgroups to match as systemd services", + " !/system.slice/*/*.service " + " /system.slice/*.service " + ), NULL, SIMPLE_PATTERN_EXACT); + } + + mountinfo_free_all(root); +} + +void netdata_cgroup_ebpf_set_values(size_t length) +{ + sem_wait(shm_mutex_cgroup_ebpf); + + shm_cgroup_ebpf.header->cgroup_max = cgroup_root_max; + shm_cgroup_ebpf.header->systemd_enabled = cgroup_enable_systemd_services | + cgroup_enable_systemd_services_detailed_memory | + cgroup_used_memory; + shm_cgroup_ebpf.header->body_length = length; + + sem_post(shm_mutex_cgroup_ebpf); +} + +void netdata_cgroup_ebpf_initialize_shm() +{ + shm_fd_cgroup_ebpf = shm_open(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME, O_CREAT | O_RDWR, 0660); + if (shm_fd_cgroup_ebpf < 0) { + error("Cannot initialize shared memory used by cgroup and eBPF, integration won't happen."); + return; + } + + size_t length = sizeof(netdata_ebpf_cgroup_shm_header_t) + cgroup_root_max * sizeof(netdata_ebpf_cgroup_shm_body_t); + if (ftruncate(shm_fd_cgroup_ebpf, length)) { + error("Cannot set size for shared memory."); + goto end_init_shm; + } + + shm_cgroup_ebpf.header = (netdata_ebpf_cgroup_shm_header_t *) mmap(NULL, length, + PROT_READ | PROT_WRITE, MAP_SHARED, + shm_fd_cgroup_ebpf, 0); + + if (!shm_cgroup_ebpf.header) { + error("Cannot map shared memory used between cgroup and eBPF, integration won't happen"); + goto end_init_shm; + } + shm_cgroup_ebpf.body = (netdata_ebpf_cgroup_shm_body_t *) ((char *)shm_cgroup_ebpf.header + + sizeof(netdata_ebpf_cgroup_shm_header_t)); + + shm_mutex_cgroup_ebpf = sem_open(NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME, O_CREAT, + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH, 1); + + if (shm_mutex_cgroup_ebpf != SEM_FAILED) { + netdata_cgroup_ebpf_set_values(length); + return; + } + + error("Cannot create semaphore, integration between eBPF and cgroup won't happen"); + munmap(shm_cgroup_ebpf.header, length); + +end_init_shm: + close(shm_fd_cgroup_ebpf); + shm_fd_cgroup_ebpf = -1; + shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME); +} + +// ---------------------------------------------------------------------------- +// cgroup objects + +struct blkio { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int delay_counter; + + char *filename; + + unsigned long long Read; + unsigned long long Write; +/* + unsigned long long Sync; + unsigned long long Async; + unsigned long long Total; +*/ +}; + +// https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt +struct memory { + ARL_BASE *arl_base; + ARL_ENTRY *arl_dirty; + ARL_ENTRY *arl_swap; + + int updated_detailed; + int updated_usage_in_bytes; + int updated_msw_usage_in_bytes; + int updated_failcnt; + + int enabled_detailed; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int enabled_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int enabled_msw_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int enabled_failcnt; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + int delay_counter_detailed; + int delay_counter_failcnt; + + char *filename_detailed; + char *filename_usage_in_bytes; + char *filename_msw_usage_in_bytes; + char *filename_failcnt; + + int detailed_has_dirty; + int detailed_has_swap; + + // detailed metrics +/* + unsigned long long cache; + unsigned long long rss; + unsigned long long rss_huge; + unsigned long long mapped_file; + unsigned long long writeback; + unsigned long long dirty; + unsigned long long swap; + unsigned long long pgpgin; + unsigned long long pgpgout; + unsigned long long pgfault; + unsigned long long pgmajfault; + unsigned long long inactive_anon; + unsigned long long active_anon; + unsigned long long inactive_file; + unsigned long long active_file; + unsigned long long unevictable; + unsigned long long hierarchical_memory_limit; +*/ + //unified cgroups metrics + unsigned long long anon; + unsigned long long kernel_stack; + unsigned long long slab; + unsigned long long sock; + unsigned long long shmem; + unsigned long long anon_thp; + //unsigned long long file_writeback; + //unsigned long long file_dirty; + //unsigned long long file; + + unsigned long long total_cache; + unsigned long long total_rss; + unsigned long long total_rss_huge; + unsigned long long total_mapped_file; + unsigned long long total_writeback; + unsigned long long total_dirty; + unsigned long long total_swap; + unsigned long long total_pgpgin; + unsigned long long total_pgpgout; + unsigned long long total_pgfault; + unsigned long long total_pgmajfault; +/* + unsigned long long total_inactive_anon; + unsigned long long total_active_anon; +*/ + + unsigned long long total_inactive_file; + +/* + unsigned long long total_active_file; + unsigned long long total_unevictable; +*/ + + // single file metrics + unsigned long long usage_in_bytes; + unsigned long long msw_usage_in_bytes; + unsigned long long failcnt; +}; + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt +struct cpuacct_stat { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned long long user; // v1, v2(user_usec) + unsigned long long system; // v1, v2(system_usec) +}; + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt +struct cpuacct_usage { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned int cpus; + unsigned long long *cpu_percpu; +}; + +// represents cpuacct/cpu.stat, for v2 'cpuacct_stat' is used for 'user_usec', 'system_usec' +struct cpuacct_cpu_throttling { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned long long nr_periods; + unsigned long long nr_throttled; + unsigned long long throttled_time; + + unsigned long long nr_throttled_perc; +}; + +// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu#sect-cfs +// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/managing_monitoring_and_updating_the_kernel/using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications_managing-monitoring-and-updating-the-kernel#proc_controlling-distribution-of-cpu-time-for-applications-by-adjusting-cpu-weight_using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications +struct cpuacct_cpu_shares { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned long long shares; +}; + +struct cgroup_network_interface { + const char *host_device; + const char *container_device; + struct cgroup_network_interface *next; +}; + +enum cgroups_container_orchestrator { + CGROUPS_ORCHESTRATOR_UNSET, + CGROUPS_ORCHESTRATOR_UNKNOWN, + CGROUPS_ORCHESTRATOR_K8S +}; + +// *** WARNING *** The fields are not thread safe. Take care of safe usage. +struct cgroup { + uint32_t options; + + int first_time_seen; // first time seen by the discoverer + int processed; // the discoverer is done processing a cgroup (resolved name, set 'enabled' option) + + char available; // found in the filesystem + char enabled; // enabled in the config + + char pending_renames; + char *intermediate_id; // TODO: remove it when the renaming script is fixed + + char *id; + uint32_t hash; + + char *chart_id; + uint32_t hash_chart; + + char *chart_title; + + DICTIONARY *chart_labels; + + int container_orchestrator; + + struct cpuacct_stat cpuacct_stat; + struct cpuacct_usage cpuacct_usage; + struct cpuacct_cpu_throttling cpuacct_cpu_throttling; + struct cpuacct_cpu_shares cpuacct_cpu_shares; + + struct memory memory; + + struct blkio io_service_bytes; // bytes + struct blkio io_serviced; // operations + + struct blkio throttle_io_service_bytes; // bytes + struct blkio throttle_io_serviced; // operations + + struct blkio io_merged; // operations + struct blkio io_queued; // operations + + struct cgroup_network_interface *interfaces; + + struct pressure cpu_pressure; + struct pressure io_pressure; + struct pressure memory_pressure; + + // per cgroup charts + RRDSET *st_cpu; + RRDSET *st_cpu_limit; + RRDSET *st_cpu_per_core; + RRDSET *st_cpu_nr_throttled; + RRDSET *st_cpu_throttled_time; + RRDSET *st_cpu_shares; + + RRDSET *st_mem; + RRDSET *st_mem_utilization; + RRDSET *st_writeback; + RRDSET *st_mem_activity; + RRDSET *st_pgfaults; + RRDSET *st_mem_usage; + RRDSET *st_mem_usage_limit; + RRDSET *st_mem_failcnt; + + RRDSET *st_io; + RRDSET *st_serviced_ops; + RRDSET *st_throttle_io; + RRDSET *st_throttle_serviced_ops; + RRDSET *st_queued_ops; + RRDSET *st_merged_ops; + + // per cgroup chart variables + char *filename_cpuset_cpus; + unsigned long long cpuset_cpus; + + char *filename_cpu_cfs_period; + unsigned long long cpu_cfs_period; + + char *filename_cpu_cfs_quota; + unsigned long long cpu_cfs_quota; + + const RRDSETVAR_ACQUIRED *chart_var_cpu_limit; + NETDATA_DOUBLE prev_cpu_usage; + + char *filename_memory_limit; + unsigned long long memory_limit; + const RRDSETVAR_ACQUIRED *chart_var_memory_limit; + + char *filename_memoryswap_limit; + unsigned long long memoryswap_limit; + const RRDSETVAR_ACQUIRED *chart_var_memoryswap_limit; + + // services + RRDDIM *rd_cpu; + RRDDIM *rd_mem_usage; + RRDDIM *rd_mem_failcnt; + RRDDIM *rd_swap_usage; + + RRDDIM *rd_mem_detailed_cache; + RRDDIM *rd_mem_detailed_rss; + RRDDIM *rd_mem_detailed_mapped; + RRDDIM *rd_mem_detailed_writeback; + RRDDIM *rd_mem_detailed_pgpgin; + RRDDIM *rd_mem_detailed_pgpgout; + RRDDIM *rd_mem_detailed_pgfault; + RRDDIM *rd_mem_detailed_pgmajfault; + + RRDDIM *rd_io_service_bytes_read; + RRDDIM *rd_io_serviced_read; + RRDDIM *rd_throttle_io_read; + RRDDIM *rd_throttle_io_serviced_read; + RRDDIM *rd_io_queued_read; + RRDDIM *rd_io_merged_read; + + RRDDIM *rd_io_service_bytes_write; + RRDDIM *rd_io_serviced_write; + RRDDIM *rd_throttle_io_write; + RRDDIM *rd_throttle_io_serviced_write; + RRDDIM *rd_io_queued_write; + RRDDIM *rd_io_merged_write; + + struct cgroup *next; + struct cgroup *discovered_next; + +} *cgroup_root = NULL; + +uv_mutex_t cgroup_root_mutex; + +struct cgroup *discovered_cgroup_root = NULL; + +struct discovery_thread { + uv_thread_t thread; + uv_mutex_t mutex; + uv_cond_t cond_var; + int start_discovery; + int exited; +} discovery_thread; + +// --------------------------------------------------------------------------------------------- + +static inline int matches_enabled_cgroup_paths(char *id) { + return simple_pattern_matches(enabled_cgroup_paths, id); +} + +static inline int matches_enabled_cgroup_names(char *name) { + return simple_pattern_matches(enabled_cgroup_names, name); +} + +static inline int matches_enabled_cgroup_renames(char *id) { + return simple_pattern_matches(enabled_cgroup_renames, id); +} + +static inline int matches_systemd_services_cgroups(char *id) { + return simple_pattern_matches(systemd_services_cgroups, id); +} + +static inline int matches_search_cgroup_paths(const char *dir) { + return simple_pattern_matches(search_cgroup_paths, dir); +} + +static inline int matches_entrypoint_parent_process_comm(const char *comm) { + return simple_pattern_matches(entrypoint_parent_process_comm, comm); +} + +static inline int is_cgroup_systemd_service(struct cgroup *cg) { + return (cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE); +} + +// --------------------------------------------------------------------------------------------- +static int k8s_is_kubepod(struct cgroup *cg) { + return cg->container_orchestrator == CGROUPS_ORCHESTRATOR_K8S; +} + +static int k8s_is_container(const char *id) { + // examples: + // https://github.com/netdata/netdata/blob/0fc101679dcd12f1cb8acdd07bb4c85d8e553e53/collectors/cgroups.plugin/cgroup-name.sh#L121-L147 + const char *p = id; + const char *pp = NULL; + int i = 0; + size_t l = 3; // pod + while ((p = strstr(p, "pod"))) { + i++; + p += l; + pp = p; + } + return !(i < 2 || !pp || !(pp = strchr(pp, '/')) || !pp++ || !*pp); +} + +#define TASK_COMM_LEN 16 + +static int k8s_get_container_first_proc_comm(const char *id, char *comm) { + if (!k8s_is_container(id)) { + return 1; + } + + static procfile *ff = NULL; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/%s/cgroup.procs", cgroup_cpuacct_base, id); + + ff = procfile_reopen(ff, filename, NULL, CGROUP_PROCFILE_FLAG); + if (unlikely(!ff)) { + debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); + return 1; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); + return 1; + } + + unsigned long lines = procfile_lines(ff); + if (likely(lines < 2)) { + return 1; + } + + char *pid = procfile_lineword(ff, 0, 0); + if (!pid || !*pid) { + return 1; + } + + snprintfz(filename, FILENAME_MAX, "%s/proc/%s/comm", netdata_configured_host_prefix, pid); + + ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) { + debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); + return 1; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); + return 1; + } + + lines = procfile_lines(ff); + if (unlikely(lines != 2)) { + return 1; + } + + char *proc_comm = procfile_lineword(ff, 0, 0); + if (!proc_comm || !*proc_comm) { + return 1; + } + + strncpyz(comm, proc_comm, TASK_COMM_LEN); + return 0; +} + +// --------------------------------------------------------------------------------------------- + +static unsigned long long calc_delta(unsigned long long curr, unsigned long long prev) { + if (prev > curr) { + return 0; + } + return curr - prev; +} + +static unsigned long long calc_percentage(unsigned long long value, unsigned long long total) { + if (total == 0) { + return 0; + } + return (NETDATA_DOUBLE)value / (NETDATA_DOUBLE)total * 100; +} + +static int calc_cgroup_depth(const char *id) { + int depth = 0; + const char *s; + for (s = id; *s; s++) { + depth += unlikely(*s == '/'); + } + return depth; +} + +// ---------------------------------------------------------------------------- +// read values from /sys + +static inline void cgroup_read_cpuacct_stat(struct cpuacct_stat *cp) { + static procfile *ff = NULL; + + if(likely(cp->filename)) { + ff = procfile_reopen(ff, cp->filename, NULL, CGROUP_PROCFILE_FLAG); + if(unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + unsigned long i, lines = procfile_lines(ff); + + if(unlikely(lines < 1)) { + error("CGROUP: file '%s' should have 1+ lines.", cp->filename); + cp->updated = 0; + return; + } + + for(i = 0; i < lines ; i++) { + char *s = procfile_lineword(ff, i, 0); + uint32_t hash = simple_hash(s); + + if(unlikely(hash == user_hash && !strcmp(s, "user"))) + cp->user = str2ull(procfile_lineword(ff, i, 1)); + + else if(unlikely(hash == system_hash && !strcmp(s, "system"))) + cp->system = str2ull(procfile_lineword(ff, i, 1)); + } + + cp->updated = 1; + + if(unlikely(cp->enabled == CONFIG_BOOLEAN_AUTO && + (cp->user || cp->system || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + cp->enabled = CONFIG_BOOLEAN_YES; + } +} + +static inline void cgroup_read_cpuacct_cpu_stat(struct cpuacct_cpu_throttling *cp) { + if (unlikely(!cp->filename)) { + return; + } + + static procfile *ff = NULL; + ff = procfile_reopen(ff, cp->filename, NULL, CGROUP_PROCFILE_FLAG); + if (unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + unsigned long lines = procfile_lines(ff); + if (unlikely(lines < 3)) { + error("CGROUP: file '%s' should have 3 lines.", cp->filename); + cp->updated = 0; + return; + } + + unsigned long long nr_periods_last = cp->nr_periods; + unsigned long long nr_throttled_last = cp->nr_throttled; + + for (unsigned long i = 0; i < lines; i++) { + char *s = procfile_lineword(ff, i, 0); + uint32_t hash = simple_hash(s); + + if (unlikely(hash == nr_periods_hash && !strcmp(s, "nr_periods"))) { + cp->nr_periods = str2ull(procfile_lineword(ff, i, 1)); + } else if (unlikely(hash == nr_throttled_hash && !strcmp(s, "nr_throttled"))) { + cp->nr_throttled = str2ull(procfile_lineword(ff, i, 1)); + } else if (unlikely(hash == throttled_time_hash && !strcmp(s, "throttled_time"))) { + cp->throttled_time = str2ull(procfile_lineword(ff, i, 1)); + } + } + cp->nr_throttled_perc = + calc_percentage(calc_delta(cp->nr_throttled, nr_throttled_last), calc_delta(cp->nr_periods, nr_periods_last)); + + cp->updated = 1; + + if (unlikely(cp->enabled == CONFIG_BOOLEAN_AUTO)) { + if (likely( + cp->nr_periods || cp->nr_throttled || cp->throttled_time || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) { + cp->enabled = CONFIG_BOOLEAN_YES; + } + } +} + +static inline void cgroup2_read_cpuacct_cpu_stat(struct cpuacct_stat *cp, struct cpuacct_cpu_throttling *cpt) { + static procfile *ff = NULL; + if (unlikely(!cp->filename)) { + return; + } + + ff = procfile_reopen(ff, cp->filename, NULL, CGROUP_PROCFILE_FLAG); + if (unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + unsigned long lines = procfile_lines(ff); + + if (unlikely(lines < 3)) { + error("CGROUP: file '%s' should have at least 3 lines.", cp->filename); + cp->updated = 0; + return; + } + + unsigned long long nr_periods_last = cpt->nr_periods; + unsigned long long nr_throttled_last = cpt->nr_throttled; + + for (unsigned long i = 0; i < lines; i++) { + char *s = procfile_lineword(ff, i, 0); + uint32_t hash = simple_hash(s); + + if (unlikely(hash == user_usec_hash && !strcmp(s, "user_usec"))) { + cp->user = str2ull(procfile_lineword(ff, i, 1)); + } else if (unlikely(hash == system_usec_hash && !strcmp(s, "system_usec"))) { + cp->system = str2ull(procfile_lineword(ff, i, 1)); + } else if (unlikely(hash == nr_periods_hash && !strcmp(s, "nr_periods"))) { + cpt->nr_periods = str2ull(procfile_lineword(ff, i, 1)); + } else if (unlikely(hash == nr_throttled_hash && !strcmp(s, "nr_throttled"))) { + cpt->nr_throttled = str2ull(procfile_lineword(ff, i, 1)); + } else if (unlikely(hash == throttled_usec_hash && !strcmp(s, "throttled_usec"))) { + cpt->throttled_time = str2ull(procfile_lineword(ff, i, 1)) * 1000; // usec -> ns + } + } + cpt->nr_throttled_perc = + calc_percentage(calc_delta(cpt->nr_throttled, nr_throttled_last), calc_delta(cpt->nr_periods, nr_periods_last)); + + cp->updated = 1; + cpt->updated = 1; + + if (unlikely(cp->enabled == CONFIG_BOOLEAN_AUTO)) { + if (likely(cp->user || cp->system || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) { + cp->enabled = CONFIG_BOOLEAN_YES; + } + } + if (unlikely(cpt->enabled == CONFIG_BOOLEAN_AUTO)) { + if (likely( + cpt->nr_periods || cpt->nr_throttled || cpt->throttled_time || + netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) { + cpt->enabled = CONFIG_BOOLEAN_YES; + } + } +} + +static inline void cgroup_read_cpuacct_cpu_shares(struct cpuacct_cpu_shares *cp) { + if (unlikely(!cp->filename)) { + return; + } + + if (unlikely(read_single_number_file(cp->filename, &cp->shares))) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + cp->updated = 1; + if (unlikely((cp->enabled == CONFIG_BOOLEAN_AUTO)) && + (cp->shares || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) { + cp->enabled = CONFIG_BOOLEAN_YES; + } +} + +static inline void cgroup_read_cpuacct_usage(struct cpuacct_usage *ca) { + static procfile *ff = NULL; + + if(likely(ca->filename)) { + ff = procfile_reopen(ff, ca->filename, NULL, CGROUP_PROCFILE_FLAG); + if(unlikely(!ff)) { + ca->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + ca->updated = 0; + cgroups_check = 1; + return; + } + + if(unlikely(procfile_lines(ff) < 1)) { + error("CGROUP: file '%s' should have 1+ lines but has %zu.", ca->filename, procfile_lines(ff)); + ca->updated = 0; + return; + } + + unsigned long i = procfile_linewords(ff, 0); + if(unlikely(i == 0)) { + ca->updated = 0; + return; + } + + // we may have 1 more CPU reported + while(i > 0) { + char *s = procfile_lineword(ff, 0, i - 1); + if(!*s) i--; + else break; + } + + if(unlikely(i != ca->cpus)) { + freez(ca->cpu_percpu); + ca->cpu_percpu = mallocz(sizeof(unsigned long long) * i); + ca->cpus = (unsigned int)i; + } + + unsigned long long total = 0; + for(i = 0; i < ca->cpus ;i++) { + unsigned long long n = str2ull(procfile_lineword(ff, 0, i)); + ca->cpu_percpu[i] = n; + total += n; + } + + ca->updated = 1; + + if(unlikely(ca->enabled == CONFIG_BOOLEAN_AUTO && + (total || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + ca->enabled = CONFIG_BOOLEAN_YES; + } +} + +static inline void cgroup_read_blkio(struct blkio *io) { + if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO && io->delay_counter > 0)) { + io->delay_counter--; + return; + } + + if(likely(io->filename)) { + static procfile *ff = NULL; + + ff = procfile_reopen(ff, io->filename, NULL, CGROUP_PROCFILE_FLAG); + if(unlikely(!ff)) { + io->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + io->updated = 0; + cgroups_check = 1; + return; + } + + unsigned long i, lines = procfile_lines(ff); + + if(unlikely(lines < 1)) { + error("CGROUP: file '%s' should have 1+ lines.", io->filename); + io->updated = 0; + return; + } + + io->Read = 0; + io->Write = 0; +/* + io->Sync = 0; + io->Async = 0; + io->Total = 0; +*/ + + for(i = 0; i < lines ; i++) { + char *s = procfile_lineword(ff, i, 1); + uint32_t hash = simple_hash(s); + + if(unlikely(hash == Read_hash && !strcmp(s, "Read"))) + io->Read += str2ull(procfile_lineword(ff, i, 2)); + + else if(unlikely(hash == Write_hash && !strcmp(s, "Write"))) + io->Write += str2ull(procfile_lineword(ff, i, 2)); + +/* + else if(unlikely(hash == Sync_hash && !strcmp(s, "Sync"))) + io->Sync += str2ull(procfile_lineword(ff, i, 2)); + + else if(unlikely(hash == Async_hash && !strcmp(s, "Async"))) + io->Async += str2ull(procfile_lineword(ff, i, 2)); + + else if(unlikely(hash == Total_hash && !strcmp(s, "Total"))) + io->Total += str2ull(procfile_lineword(ff, i, 2)); +*/ + } + + io->updated = 1; + + if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO)) { + if(unlikely(io->Read || io->Write || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) + io->enabled = CONFIG_BOOLEAN_YES; + else + io->delay_counter = cgroup_recheck_zero_blkio_every_iterations; + } + } +} + +static inline void cgroup2_read_blkio(struct blkio *io, unsigned int word_offset) { + if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO && io->delay_counter > 0)) { + io->delay_counter--; + return; + } + + if(likely(io->filename)) { + static procfile *ff = NULL; + + ff = procfile_reopen(ff, io->filename, NULL, CGROUP_PROCFILE_FLAG); + if(unlikely(!ff)) { + io->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + io->updated = 0; + cgroups_check = 1; + return; + } + + unsigned long i, lines = procfile_lines(ff); + + if (unlikely(lines < 1)) { + error("CGROUP: file '%s' should have 1+ lines.", io->filename); + io->updated = 0; + return; + } + + io->Read = 0; + io->Write = 0; + + for (i = 0; i < lines; i++) { + io->Read += str2ull(procfile_lineword(ff, i, 2 + word_offset)); + io->Write += str2ull(procfile_lineword(ff, i, 4 + word_offset)); + } + + io->updated = 1; + + if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO)) { + if(unlikely(io->Read || io->Write || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) + io->enabled = CONFIG_BOOLEAN_YES; + else + io->delay_counter = cgroup_recheck_zero_blkio_every_iterations; + } + } +} + +static inline void cgroup2_read_pressure(struct pressure *res) { + static procfile *ff = NULL; + + if (likely(res->filename)) { + ff = procfile_reopen(ff, res->filename, " =", CGROUP_PROCFILE_FLAG); + if (unlikely(!ff)) { + res->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + res->updated = 0; + cgroups_check = 1; + return; + } + + size_t lines = procfile_lines(ff); + if (lines < 1) { + error("CGROUP: file '%s' should have 1+ lines.", res->filename); + res->updated = 0; + return; + } + + res->some.share_time.value10 = strtod(procfile_lineword(ff, 0, 2), NULL); + res->some.share_time.value60 = strtod(procfile_lineword(ff, 0, 4), NULL); + res->some.share_time.value300 = strtod(procfile_lineword(ff, 0, 6), NULL); + res->some.total_time.value_total = str2ull(procfile_lineword(ff, 0, 8)) / 1000; // us->ms + + if (lines > 2) { + res->full.share_time.value10 = strtod(procfile_lineword(ff, 1, 2), NULL); + res->full.share_time.value60 = strtod(procfile_lineword(ff, 1, 4), NULL); + res->full.share_time.value300 = strtod(procfile_lineword(ff, 1, 6), NULL); + res->full.total_time.value_total = str2ull(procfile_lineword(ff, 0, 8)) / 1000; // us->ms + } + + res->updated = 1; + + if (unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) { + res->some.enabled = CONFIG_BOOLEAN_YES; + if (lines > 2) { + res->full.enabled = CONFIG_BOOLEAN_YES; + } else { + res->full.enabled = CONFIG_BOOLEAN_NO; + } + } + } +} + +static inline void cgroup_read_memory(struct memory *mem, char parent_cg_is_unified) { + static procfile *ff = NULL; + + // read detailed ram usage + if(likely(mem->filename_detailed)) { + if(unlikely(mem->enabled_detailed == CONFIG_BOOLEAN_AUTO && mem->delay_counter_detailed > 0)) { + mem->delay_counter_detailed--; + goto memory_next; + } + + ff = procfile_reopen(ff, mem->filename_detailed, NULL, CGROUP_PROCFILE_FLAG); + if(unlikely(!ff)) { + mem->updated_detailed = 0; + cgroups_check = 1; + goto memory_next; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + mem->updated_detailed = 0; + cgroups_check = 1; + goto memory_next; + } + + unsigned long i, lines = procfile_lines(ff); + + if(unlikely(lines < 1)) { + error("CGROUP: file '%s' should have 1+ lines.", mem->filename_detailed); + mem->updated_detailed = 0; + goto memory_next; + } + + + if(unlikely(!mem->arl_base)) { + if(parent_cg_is_unified == 0){ + mem->arl_base = arl_create("cgroup/memory", NULL, 60); + + arl_expect(mem->arl_base, "total_cache", &mem->total_cache); + arl_expect(mem->arl_base, "total_rss", &mem->total_rss); + arl_expect(mem->arl_base, "total_rss_huge", &mem->total_rss_huge); + arl_expect(mem->arl_base, "total_mapped_file", &mem->total_mapped_file); + arl_expect(mem->arl_base, "total_writeback", &mem->total_writeback); + mem->arl_dirty = arl_expect(mem->arl_base, "total_dirty", &mem->total_dirty); + mem->arl_swap = arl_expect(mem->arl_base, "total_swap", &mem->total_swap); + arl_expect(mem->arl_base, "total_pgpgin", &mem->total_pgpgin); + arl_expect(mem->arl_base, "total_pgpgout", &mem->total_pgpgout); + arl_expect(mem->arl_base, "total_pgfault", &mem->total_pgfault); + arl_expect(mem->arl_base, "total_pgmajfault", &mem->total_pgmajfault); + arl_expect(mem->arl_base, "total_inactive_file", &mem->total_inactive_file); + } else { + mem->arl_base = arl_create("cgroup/memory", NULL, 60); + + arl_expect(mem->arl_base, "anon", &mem->anon); + arl_expect(mem->arl_base, "kernel_stack", &mem->kernel_stack); + arl_expect(mem->arl_base, "slab", &mem->slab); + arl_expect(mem->arl_base, "sock", &mem->sock); + arl_expect(mem->arl_base, "anon_thp", &mem->anon_thp); + arl_expect(mem->arl_base, "file", &mem->total_mapped_file); + arl_expect(mem->arl_base, "file_writeback", &mem->total_writeback); + mem->arl_dirty = arl_expect(mem->arl_base, "file_dirty", &mem->total_dirty); + arl_expect(mem->arl_base, "pgfault", &mem->total_pgfault); + arl_expect(mem->arl_base, "pgmajfault", &mem->total_pgmajfault); + arl_expect(mem->arl_base, "inactive_file", &mem->total_inactive_file); + } + } + + arl_begin(mem->arl_base); + + for(i = 0; i < lines ; i++) { + if(arl_check(mem->arl_base, + procfile_lineword(ff, i, 0), + procfile_lineword(ff, i, 1))) break; + } + + if(unlikely(mem->arl_dirty->flags & ARL_ENTRY_FLAG_FOUND)) + mem->detailed_has_dirty = 1; + + if(unlikely(parent_cg_is_unified == 0 && mem->arl_swap->flags & ARL_ENTRY_FLAG_FOUND)) + mem->detailed_has_swap = 1; + + // fprintf(stderr, "READ: '%s', cache: %llu, rss: %llu, rss_huge: %llu, mapped_file: %llu, writeback: %llu, dirty: %llu, swap: %llu, pgpgin: %llu, pgpgout: %llu, pgfault: %llu, pgmajfault: %llu, inactive_anon: %llu, active_anon: %llu, inactive_file: %llu, active_file: %llu, unevictable: %llu, hierarchical_memory_limit: %llu, total_cache: %llu, total_rss: %llu, total_rss_huge: %llu, total_mapped_file: %llu, total_writeback: %llu, total_dirty: %llu, total_swap: %llu, total_pgpgin: %llu, total_pgpgout: %llu, total_pgfault: %llu, total_pgmajfault: %llu, total_inactive_anon: %llu, total_active_anon: %llu, total_inactive_file: %llu, total_active_file: %llu, total_unevictable: %llu\n", mem->filename, mem->cache, mem->rss, mem->rss_huge, mem->mapped_file, mem->writeback, mem->dirty, mem->swap, mem->pgpgin, mem->pgpgout, mem->pgfault, mem->pgmajfault, mem->inactive_anon, mem->active_anon, mem->inactive_file, mem->active_file, mem->unevictable, mem->hierarchical_memory_limit, mem->total_cache, mem->total_rss, mem->total_rss_huge, mem->total_mapped_file, mem->total_writeback, mem->total_dirty, mem->total_swap, mem->total_pgpgin, mem->total_pgpgout, mem->total_pgfault, mem->total_pgmajfault, mem->total_inactive_anon, mem->total_active_anon, mem->total_inactive_file, mem->total_active_file, mem->total_unevictable); + + mem->updated_detailed = 1; + + if(unlikely(mem->enabled_detailed == CONFIG_BOOLEAN_AUTO)) { + if(( (!parent_cg_is_unified) && ( mem->total_cache || mem->total_dirty || mem->total_rss || mem->total_rss_huge || mem->total_mapped_file || mem->total_writeback + || mem->total_swap || mem->total_pgpgin || mem->total_pgpgout || mem->total_pgfault || mem->total_pgmajfault || mem->total_inactive_file)) + || (parent_cg_is_unified && ( mem->anon || mem->total_dirty || mem->kernel_stack || mem->slab || mem->sock || mem->total_writeback + || mem->anon_thp || mem->total_pgfault || mem->total_pgmajfault || mem->total_inactive_file)) + || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES) + mem->enabled_detailed = CONFIG_BOOLEAN_YES; + else + mem->delay_counter_detailed = cgroup_recheck_zero_mem_detailed_every_iterations; + } + } + +memory_next: + + // read usage_in_bytes + if(likely(mem->filename_usage_in_bytes)) { + mem->updated_usage_in_bytes = !read_single_number_file(mem->filename_usage_in_bytes, &mem->usage_in_bytes); + if(unlikely(mem->updated_usage_in_bytes && mem->enabled_usage_in_bytes == CONFIG_BOOLEAN_AUTO && + (mem->usage_in_bytes || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + mem->enabled_usage_in_bytes = CONFIG_BOOLEAN_YES; + } + + if (likely(mem->updated_usage_in_bytes && mem->updated_detailed)) { + mem->usage_in_bytes = + (mem->usage_in_bytes > mem->total_inactive_file) ? (mem->usage_in_bytes - mem->total_inactive_file) : 0; + } + + // read msw_usage_in_bytes + if(likely(mem->filename_msw_usage_in_bytes)) { + mem->updated_msw_usage_in_bytes = !read_single_number_file(mem->filename_msw_usage_in_bytes, &mem->msw_usage_in_bytes); + if(unlikely(mem->updated_msw_usage_in_bytes && mem->enabled_msw_usage_in_bytes == CONFIG_BOOLEAN_AUTO && + (mem->msw_usage_in_bytes || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) + mem->enabled_msw_usage_in_bytes = CONFIG_BOOLEAN_YES; + } + + // read failcnt + if(likely(mem->filename_failcnt)) { + if(unlikely(mem->enabled_failcnt == CONFIG_BOOLEAN_AUTO && mem->delay_counter_failcnt > 0)) { + mem->updated_failcnt = 0; + mem->delay_counter_failcnt--; + } + else { + mem->updated_failcnt = !read_single_number_file(mem->filename_failcnt, &mem->failcnt); + if(unlikely(mem->updated_failcnt && mem->enabled_failcnt == CONFIG_BOOLEAN_AUTO)) { + if(unlikely(mem->failcnt || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)) + mem->enabled_failcnt = CONFIG_BOOLEAN_YES; + else + mem->delay_counter_failcnt = cgroup_recheck_zero_mem_failcnt_every_iterations; + } + } + } +} + +static inline void read_cgroup(struct cgroup *cg) { + debug(D_CGROUP, "reading metrics for cgroups '%s'", cg->id); + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + cgroup_read_cpuacct_stat(&cg->cpuacct_stat); + cgroup_read_cpuacct_usage(&cg->cpuacct_usage); + cgroup_read_cpuacct_cpu_stat(&cg->cpuacct_cpu_throttling); + cgroup_read_cpuacct_cpu_shares(&cg->cpuacct_cpu_shares); + cgroup_read_memory(&cg->memory, 0); + cgroup_read_blkio(&cg->io_service_bytes); + cgroup_read_blkio(&cg->io_serviced); + cgroup_read_blkio(&cg->throttle_io_service_bytes); + cgroup_read_blkio(&cg->throttle_io_serviced); + cgroup_read_blkio(&cg->io_merged); + cgroup_read_blkio(&cg->io_queued); + } + else { + //TODO: io_service_bytes and io_serviced use same file merge into 1 function + cgroup2_read_blkio(&cg->io_service_bytes, 0); + cgroup2_read_blkio(&cg->io_serviced, 4); + cgroup2_read_cpuacct_cpu_stat(&cg->cpuacct_stat, &cg->cpuacct_cpu_throttling); + cgroup_read_cpuacct_cpu_shares(&cg->cpuacct_cpu_shares); + cgroup2_read_pressure(&cg->cpu_pressure); + cgroup2_read_pressure(&cg->io_pressure); + cgroup2_read_pressure(&cg->memory_pressure); + cgroup_read_memory(&cg->memory, 1); + } +} + +static inline void read_all_discovered_cgroups(struct cgroup *root) { + debug(D_CGROUP, "reading metrics for all cgroups"); + + struct cgroup *cg; + for (cg = root; cg; cg = cg->next) { + if (cg->enabled && !cg->pending_renames) { + read_cgroup(cg); + } + } +} + +// ---------------------------------------------------------------------------- +// cgroup network interfaces + +#define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048 +static inline void read_cgroup_network_interfaces(struct cgroup *cg) { + debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title); + + pid_t cgroup_pid; + char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_cpuacct_base, cg->id); + } + else { + snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_unified_base, cg->id); + } + + debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id); + FILE *fp_child_input, *fp_child_output; + (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_network_interface_script, "--cgroup", cgroup_identifier); + if(!fp_child_output) { + error("CGROUP: cannot popen(%s --cgroup \"%s\", \"r\").", cgroups_network_interface_script, cgroup_identifier); + return; + } + + char *s; + char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; + while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) { + trim(s); + + if(*s && *s != '\n') { + char *t = s; + while(*t && *t != ' ') t++; + if(*t == ' ') { + *t = '\0'; + t++; + } + + if(!*s) { + error("CGROUP: empty host interface returned by script"); + continue; + } + + if(!*t) { + error("CGROUP: empty guest interface returned by script"); + continue; + } + + struct cgroup_network_interface *i = callocz(1, sizeof(struct cgroup_network_interface)); + i->host_device = strdupz(s); + i->container_device = strdupz(t); + i->next = cg->interfaces; + cg->interfaces = i; + + info("CGROUP: cgroup '%s' has network interface '%s' as '%s'", cg->id, i->host_device, i->container_device); + + // register a device rename to proc_net_dev.c + netdev_rename_device_add( + i->host_device, i->container_device, cg->chart_id, cg->chart_labels, k8s_is_kubepod(cg) ? "k8s." : ""); + } + } + + netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + // debug(D_CGROUP, "closed cgroup_identifier for cgroup '%s'", cg->id); +} + +static inline void free_cgroup_network_interfaces(struct cgroup *cg) { + while(cg->interfaces) { + struct cgroup_network_interface *i = cg->interfaces; + cg->interfaces = i->next; + + // delete the registration of proc_net_dev rename + netdev_rename_device_del(i->host_device); + + freez((void *)i->host_device); + freez((void *)i->container_device); + freez((void *)i); + } +} + +// ---------------------------------------------------------------------------- +// add/remove/find cgroup objects + +#define CGROUP_CHARTID_LINE_MAX 1024 + +static inline char *cgroup_title_strdupz(const char *s) { + if(!s || !*s) s = "/"; + + if(*s == '/' && s[1] != '\0') s++; + + char *r = strdupz(s); + netdata_fix_chart_name(r); + + return r; +} + +static inline char *cgroup_chart_id_strdupz(const char *s) { + if(!s || !*s) s = "/"; + + if(*s == '/' && s[1] != '\0') s++; + + char *r = strdupz(s); + netdata_fix_chart_id(r); + + return r; +} + +// TODO: move the code to cgroup_chart_id_strdupz() when the renaming script is fixed +static inline void substitute_dots_in_id(char *s) { + // dots are used to distinguish chart type and id in streaming, so we should replace them + for (char *d = s; *d; d++) { + if (*d == '.') + *d = '-'; + } +} + +// ---------------------------------------------------------------------------- +// parse k8s labels + +char *k8s_parse_resolved_name_and_labels(DICTIONARY *labels, char *data) { + // the first word, up to the first space is the name + char *name = mystrsep(&data, " "); + + // the rest are key=value pairs separated by comma + while(data) { + char *pair = mystrsep(&data, ","); + rrdlabels_add_pair(labels, pair, RRDLABEL_SRC_AUTO| RRDLABEL_SRC_K8S); + } + + return name; +} + +// ---------------------------------------------------------------------------- + +static inline void free_pressure(struct pressure *res) { + if (res->some.share_time.st) rrdset_is_obsolete(res->some.share_time.st); + if (res->some.total_time.st) rrdset_is_obsolete(res->some.total_time.st); + if (res->full.share_time.st) rrdset_is_obsolete(res->full.share_time.st); + if (res->full.total_time.st) rrdset_is_obsolete(res->full.total_time.st); + freez(res->filename); +} + +static inline void cgroup_free(struct cgroup *cg) { + debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available"); + + if(cg->st_cpu) rrdset_is_obsolete(cg->st_cpu); + if(cg->st_cpu_limit) rrdset_is_obsolete(cg->st_cpu_limit); + if(cg->st_cpu_per_core) rrdset_is_obsolete(cg->st_cpu_per_core); + if(cg->st_cpu_nr_throttled) rrdset_is_obsolete(cg->st_cpu_nr_throttled); + if(cg->st_cpu_throttled_time) rrdset_is_obsolete(cg->st_cpu_throttled_time); + if(cg->st_cpu_shares) rrdset_is_obsolete(cg->st_cpu_shares); + if(cg->st_mem) rrdset_is_obsolete(cg->st_mem); + if(cg->st_writeback) rrdset_is_obsolete(cg->st_writeback); + if(cg->st_mem_activity) rrdset_is_obsolete(cg->st_mem_activity); + if(cg->st_pgfaults) rrdset_is_obsolete(cg->st_pgfaults); + if(cg->st_mem_usage) rrdset_is_obsolete(cg->st_mem_usage); + if(cg->st_mem_usage_limit) rrdset_is_obsolete(cg->st_mem_usage_limit); + if(cg->st_mem_utilization) rrdset_is_obsolete(cg->st_mem_utilization); + if(cg->st_mem_failcnt) rrdset_is_obsolete(cg->st_mem_failcnt); + if(cg->st_io) rrdset_is_obsolete(cg->st_io); + if(cg->st_serviced_ops) rrdset_is_obsolete(cg->st_serviced_ops); + if(cg->st_throttle_io) rrdset_is_obsolete(cg->st_throttle_io); + if(cg->st_throttle_serviced_ops) rrdset_is_obsolete(cg->st_throttle_serviced_ops); + if(cg->st_queued_ops) rrdset_is_obsolete(cg->st_queued_ops); + if(cg->st_merged_ops) rrdset_is_obsolete(cg->st_merged_ops); + + freez(cg->filename_cpuset_cpus); + freez(cg->filename_cpu_cfs_period); + freez(cg->filename_cpu_cfs_quota); + freez(cg->filename_memory_limit); + freez(cg->filename_memoryswap_limit); + + free_cgroup_network_interfaces(cg); + + freez(cg->cpuacct_usage.cpu_percpu); + + freez(cg->cpuacct_stat.filename); + freez(cg->cpuacct_usage.filename); + freez(cg->cpuacct_cpu_throttling.filename); + freez(cg->cpuacct_cpu_shares.filename); + + arl_free(cg->memory.arl_base); + freez(cg->memory.filename_detailed); + freez(cg->memory.filename_failcnt); + freez(cg->memory.filename_usage_in_bytes); + freez(cg->memory.filename_msw_usage_in_bytes); + + freez(cg->io_service_bytes.filename); + freez(cg->io_serviced.filename); + + freez(cg->throttle_io_service_bytes.filename); + freez(cg->throttle_io_serviced.filename); + + freez(cg->io_merged.filename); + freez(cg->io_queued.filename); + + free_pressure(&cg->cpu_pressure); + free_pressure(&cg->io_pressure); + free_pressure(&cg->memory_pressure); + + freez(cg->id); + freez(cg->intermediate_id); + freez(cg->chart_id); + freez(cg->chart_title); + + rrdlabels_destroy(cg->chart_labels); + + freez(cg); + + cgroup_root_count--; +} + +// ---------------------------------------------------------------------------- + +static inline void discovery_rename_cgroup(struct cgroup *cg) { + if (!cg->pending_renames) { + return; + } + cg->pending_renames--; + + debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title); + debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id); + pid_t cgroup_pid; + + FILE *fp_child_input, *fp_child_output; + (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_rename_script, cg->id, cg->intermediate_id); + if (!fp_child_output) { + error("CGROUP: cannot popen(%s \"%s\", \"r\").", cgroups_rename_script, cg->intermediate_id); + cg->pending_renames = 0; + cg->processed = 1; + return; + } + + char buffer[CGROUP_CHARTID_LINE_MAX + 1]; + char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp_child_output); + int exit_code = netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + + switch (exit_code) { + case 0: + cg->pending_renames = 0; + break; + + case 3: + cg->pending_renames = 0; + cg->processed = 1; + break; + } + + if(cg->pending_renames || cg->processed) return; + if(!new_name || !*new_name || *new_name == '\n') return; + if(!(new_name = trim(new_name))) return; + + char *name = new_name; + if (!strncmp(new_name, "k8s_", 4)) { + if(!cg->chart_labels) cg->chart_labels = rrdlabels_create(); + + // read the new labels and remove the obsolete ones + rrdlabels_unmark_all(cg->chart_labels); + name = k8s_parse_resolved_name_and_labels(cg->chart_labels, new_name); + rrdlabels_remove_all_unmarked(cg->chart_labels); + } + + freez(cg->chart_title); + cg->chart_title = cgroup_title_strdupz(name); + + freez(cg->chart_id); + cg->chart_id = cgroup_chart_id_strdupz(name); + + substitute_dots_in_id(cg->chart_id); + cg->hash_chart = simple_hash(cg->chart_id); +} + +static void is_cgroup_procs_exist(netdata_ebpf_cgroup_shm_body_t *out, char *id) { + struct stat buf; + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_cpuset_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_blkio_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_memory_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_devices_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + out->path[0] = '\0'; + out->enabled = 0; +} + +static inline void convert_cgroup_to_systemd_service(struct cgroup *cg) { + char buffer[CGROUP_CHARTID_LINE_MAX]; + cg->options |= CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE; + strncpyz(buffer, cg->id, CGROUP_CHARTID_LINE_MAX); + char *s = buffer; + + // skip to the last slash + size_t len = strlen(s); + while (len--) { + if (unlikely(s[len] == '/')) { + break; + } + } + if (len) { + s = &s[len + 1]; + } + + // remove extension + len = strlen(s); + while (len--) { + if (unlikely(s[len] == '.')) { + break; + } + } + if (len) { + s[len] = '\0'; + } + + freez(cg->chart_title); + cg->chart_title = cgroup_title_strdupz(s); +} + +static inline struct cgroup *discovery_cgroup_add(const char *id) { + debug(D_CGROUP, "adding to list, cgroup with id '%s'", id); + + struct cgroup *cg = callocz(1, sizeof(struct cgroup)); + cg->id = strdupz(id); + cg->hash = simple_hash(cg->id); + cg->chart_title = cgroup_title_strdupz(id); + cg->intermediate_id = cgroup_chart_id_strdupz(id); + cg->chart_id = cgroup_chart_id_strdupz(id); + substitute_dots_in_id(cg->chart_id); + cg->hash_chart = simple_hash(cg->chart_id); + if (cgroup_use_unified_cgroups) { + cg->options |= CGROUP_OPTIONS_IS_UNIFIED; + } + + if (!discovered_cgroup_root) + discovered_cgroup_root = cg; + else { + struct cgroup *t; + for (t = discovered_cgroup_root; t->discovered_next; t = t->discovered_next) { + } + t->discovered_next = cg; + } + + return cg; +} + +static inline struct cgroup *discovery_cgroup_find(const char *id) { + debug(D_CGROUP, "searching for cgroup '%s'", id); + + uint32_t hash = simple_hash(id); + + struct cgroup *cg; + for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { + if(hash == cg->hash && strcmp(id, cg->id) == 0) + break; + } + + debug(D_CGROUP, "cgroup '%s' %s in memory", id, (cg)?"found":"not found"); + return cg; +} + +static inline void discovery_find_cgroup_in_dir_callback(const char *dir) { + if (!dir || !*dir) { + dir = "/"; + } + debug(D_CGROUP, "examining cgroup dir '%s'", dir); + + struct cgroup *cg = discovery_cgroup_find(dir); + if (cg) { + cg->available = 1; + return; + } + + if (cgroup_root_count >= cgroup_root_max) { + info("CGROUP: maximum number of cgroups reached (%d). Not adding cgroup '%s'", cgroup_root_count, dir); + return; + } + + if (cgroup_max_depth > 0) { + int depth = calc_cgroup_depth(dir); + if (depth > cgroup_max_depth) { + info("CGROUP: '%s' is too deep (%d, while max is %d)", dir, depth, cgroup_max_depth); + return; + } + } + + cg = discovery_cgroup_add(dir); + cg->available = 1; + cg->first_time_seen = 1; + cgroup_root_count++; +} + +static inline int discovery_find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) { + if(!this) this = base; + debug(D_CGROUP, "searching for directories in '%s' (base '%s')", this?this:"", base); + + size_t dirlen = strlen(this), baselen = strlen(base); + + int ret = -1; + int enabled = -1; + + const char *relative_path = &this[baselen]; + if(!*relative_path) relative_path = "/"; + + DIR *dir = opendir(this); + if(!dir) { + error("CGROUP: cannot read directory '%s'", base); + return ret; + } + ret = 1; + + callback(relative_path); + + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type == DT_DIR + && ( + (de->d_name[0] == '.' && de->d_name[1] == '\0') + || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') + )) + continue; + + if(de->d_type == DT_DIR) { + if(enabled == -1) { + const char *r = relative_path; + if(*r == '\0') r = "/"; + + // do not decent in directories we are not interested + enabled = matches_search_cgroup_paths(r); + } + + if(enabled) { + char *s = mallocz(dirlen + strlen(de->d_name) + 2); + strcpy(s, this); + strcat(s, "/"); + strcat(s, de->d_name); + int ret2 = discovery_find_dir_in_subdirs(base, s, callback); + if(ret2 > 0) ret += ret2; + freez(s); + } + } + } + + closedir(dir); + return ret; +} + +static inline void discovery_mark_all_cgroups_as_unavailable() { + debug(D_CGROUP, "marking all cgroups as not available"); + struct cgroup *cg; + for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { + cg->available = 0; + } +} + +static inline void discovery_update_filenames() { + struct cgroup *cg; + struct stat buf; + for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { + if(unlikely(!cg->available || !cg->enabled || cg->pending_renames)) + continue; + + debug(D_CGROUP, "checking paths for cgroup '%s'", cg->id); + + // check for newly added cgroups + // and update the filenames they read + char filename[FILENAME_MAX + 1]; + if(!cgroup_use_unified_cgroups) { + if(unlikely(cgroup_enable_cpuacct_stat && !cg->cpuacct_stat.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.stat", cgroup_cpuacct_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->cpuacct_stat.filename = strdupz(filename); + cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat; + snprintfz(filename, FILENAME_MAX, "%s%s/cpuset.cpus", cgroup_cpuset_base, cg->id); + cg->filename_cpuset_cpus = strdupz(filename); + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_period_us", cgroup_cpuacct_base, cg->id); + cg->filename_cpu_cfs_period = strdupz(filename); + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_quota_us", cgroup_cpuacct_base, cg->id); + cg->filename_cpu_cfs_quota = strdupz(filename); + debug(D_CGROUP, "cpuacct.stat filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_stat.filename); + } + else + debug(D_CGROUP, "cpuacct.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_cpuacct_usage && !cg->cpuacct_usage.filename && !is_cgroup_systemd_service(cg))) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.usage_percpu", cgroup_cpuacct_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->cpuacct_usage.filename = strdupz(filename); + cg->cpuacct_usage.enabled = cgroup_enable_cpuacct_usage; + debug(D_CGROUP, "cpuacct.usage_percpu filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_usage.filename); + } + else + debug(D_CGROUP, "cpuacct.usage_percpu file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + if(unlikely(cgroup_enable_cpuacct_cpu_throttling && !cg->cpuacct_cpu_throttling.filename && !is_cgroup_systemd_service(cg))) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_cpuacct_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->cpuacct_cpu_throttling.filename = strdupz(filename); + cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling; + debug(D_CGROUP, "cpu.stat filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_throttling.filename); + } + else + debug(D_CGROUP, "cpu.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + if (unlikely( + cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename && + !is_cgroup_systemd_service(cg))) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.shares", cgroup_cpuacct_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpuacct_cpu_shares.filename = strdupz(filename); + cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; + debug( + D_CGROUP, "cpu.shares filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_shares.filename); + } else + debug(D_CGROUP, "cpu.shares file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_memory_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_detailed = strdupz(filename); + cg->memory.enabled_detailed = (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_AUTO; + debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_detailed); + } + else + debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.usage_in_bytes", cgroup_memory_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_usage_in_bytes = strdupz(filename); + cg->memory.enabled_usage_in_bytes = cgroup_enable_memory; + debug(D_CGROUP, "memory.usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes); + snprintfz(filename, FILENAME_MAX, "%s%s/memory.limit_in_bytes", cgroup_memory_base, cg->id); + cg->filename_memory_limit = strdupz(filename); + } + else + debug(D_CGROUP, "memory.usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.usage_in_bytes", cgroup_memory_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_msw_usage_in_bytes = strdupz(filename); + cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap; + snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.limit_in_bytes", cgroup_memory_base, cg->id); + cg->filename_memoryswap_limit = strdupz(filename); + debug(D_CGROUP, "memory.msw_usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes); + } + else + debug(D_CGROUP, "memory.msw_usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_memory_failcnt && !cg->memory.filename_failcnt)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.failcnt", cgroup_memory_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_failcnt = strdupz(filename); + cg->memory.enabled_failcnt = cgroup_enable_memory_failcnt; + debug(D_CGROUP, "memory.failcnt filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_failcnt); + } + else + debug(D_CGROUP, "memory.failcnt file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->io_service_bytes.filename = strdupz(filename); + cg->io_service_bytes.enabled = cgroup_enable_blkio_io; + debug(D_CGROUP, "blkio.io_service_bytes_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); + } else { + debug(D_CGROUP, "blkio.io_service_bytes_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_service_bytes.filename = strdupz(filename); + cg->io_service_bytes.enabled = cgroup_enable_blkio_io; + debug(D_CGROUP, "blkio.io_service_bytes filename for cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); + } else { + debug(D_CGROUP, "blkio.io_service_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + } + } + + if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->io_serviced.filename = strdupz(filename); + cg->io_serviced.enabled = cgroup_enable_blkio_ops; + debug(D_CGROUP, "blkio.io_serviced_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_serviced.filename); + } else { + debug(D_CGROUP, "blkio.io_serviced_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_serviced.filename = strdupz(filename); + cg->io_serviced.enabled = cgroup_enable_blkio_ops; + debug(D_CGROUP, "blkio.io_serviced filename for cgroup '%s': '%s'", cg->id, cg->io_serviced.filename); + } else { + debug(D_CGROUP, "blkio.io_serviced file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + } + } + + if (unlikely(cgroup_enable_blkio_throttle_io && !cg->throttle_io_service_bytes.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->throttle_io_service_bytes.filename = strdupz(filename); + cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io; + debug(D_CGROUP,"blkio.throttle.io_service_bytes_recursive filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_service_bytes.filename); + } else { + debug(D_CGROUP, "blkio.throttle.io_service_bytes_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + snprintfz( + filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->throttle_io_service_bytes.filename = strdupz(filename); + cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io; + debug(D_CGROUP, "blkio.throttle.io_service_bytes filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_service_bytes.filename); + } else { + debug(D_CGROUP, "blkio.throttle.io_service_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + } + } + + if (unlikely(cgroup_enable_blkio_throttle_ops && !cg->throttle_io_serviced.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->throttle_io_serviced.filename = strdupz(filename); + cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops; + debug(D_CGROUP, "blkio.throttle.io_serviced_recursive filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_serviced.filename); + } else { + debug(D_CGROUP, "blkio.throttle.io_serviced_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->throttle_io_serviced.filename = strdupz(filename); + cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops; + debug(D_CGROUP, "blkio.throttle.io_serviced filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_serviced.filename); + } else { + debug(D_CGROUP, "blkio.throttle.io_serviced file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + } + } + + if (unlikely(cgroup_enable_blkio_merged_ops && !cg->io_merged.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->io_merged.filename = strdupz(filename); + cg->io_merged.enabled = cgroup_enable_blkio_merged_ops; + debug(D_CGROUP, "blkio.io_merged_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_merged.filename); + } else { + debug(D_CGROUP, "blkio.io_merged_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_merged.filename = strdupz(filename); + cg->io_merged.enabled = cgroup_enable_blkio_merged_ops; + debug(D_CGROUP, "blkio.io_merged filename for cgroup '%s': '%s'", cg->id, cg->io_merged.filename); + } else { + debug(D_CGROUP, "blkio.io_merged file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + } + } + + if (unlikely(cgroup_enable_blkio_queued_ops && !cg->io_queued.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued_recursive", cgroup_blkio_base, cg->id); + if (unlikely(stat(filename, &buf) != -1)) { + cg->io_queued.filename = strdupz(filename); + cg->io_queued.enabled = cgroup_enable_blkio_queued_ops; + debug(D_CGROUP, "blkio.io_queued_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_queued.filename); + } else { + debug(D_CGROUP, "blkio.io_queued_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued", cgroup_blkio_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_queued.filename = strdupz(filename); + cg->io_queued.enabled = cgroup_enable_blkio_queued_ops; + debug(D_CGROUP, "blkio.io_queued filename for cgroup '%s': '%s'", cg->id, cg->io_queued.filename); + } else { + debug(D_CGROUP, "blkio.io_queued file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + } + } + } + else if(likely(cgroup_unified_exist)) { + if(unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->io_service_bytes.filename = strdupz(filename); + cg->io_service_bytes.enabled = cgroup_enable_blkio_io; + debug(D_CGROUP, "io.stat filename for unified cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); + } else + debug(D_CGROUP, "io.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); + } + if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_serviced.filename = strdupz(filename); + cg->io_serviced.enabled = cgroup_enable_blkio_ops; + debug(D_CGROUP, "io.stat filename for unified cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); + } else + debug(D_CGROUP, "io.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); + } + if (unlikely( + (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_cpu_throttling) && + !cg->cpuacct_stat.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_unified_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->cpuacct_stat.filename = strdupz(filename); + cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat; + cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling; + cg->filename_cpuset_cpus = NULL; + cg->filename_cpu_cfs_period = NULL; + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.max", cgroup_unified_base, cg->id); + cg->filename_cpu_cfs_quota = strdupz(filename); + debug(D_CGROUP, "cpu.stat filename for unified cgroup '%s': '%s'", cg->id, cg->cpuacct_stat.filename); + } + else + debug(D_CGROUP, "cpu.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); + } + if (unlikely(cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.weight", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpuacct_cpu_shares.filename = strdupz(filename); + cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; + debug(D_CGROUP, "cpu.weight filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_shares.filename); + } else + debug(D_CGROUP, "cpu.weight file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_unified_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_detailed = strdupz(filename); + cg->memory.enabled_detailed = (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_AUTO; + debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_detailed); + } + else + debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.current", cgroup_unified_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_usage_in_bytes = strdupz(filename); + cg->memory.enabled_usage_in_bytes = cgroup_enable_memory; + debug(D_CGROUP, "memory.current filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes); + snprintfz(filename, FILENAME_MAX, "%s%s/memory.max", cgroup_unified_base, cg->id); + cg->filename_memory_limit = strdupz(filename); + } + else + debug(D_CGROUP, "memory.current file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if(unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.current", cgroup_unified_base, cg->id); + if(likely(stat(filename, &buf) != -1)) { + cg->memory.filename_msw_usage_in_bytes = strdupz(filename); + cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap; + snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.max", cgroup_unified_base, cg->id); + cg->filename_memoryswap_limit = strdupz(filename); + debug(D_CGROUP, "memory.swap.current filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes); + } + else + debug(D_CGROUP, "memory.swap file for cgroup '%s': '%s' does not exist.", cg->id, filename); + } + + if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpu_pressure.filename = strdupz(filename); + cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu; + cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO; + debug(D_CGROUP, "cpu.pressure filename for cgroup '%s': '%s'", cg->id, cg->cpu_pressure.filename); + } else { + debug(D_CGROUP, "cpu.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } + + if (unlikely((cgroup_enable_pressure_io_some || cgroup_enable_pressure_io_full) && !cg->io_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/io.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_pressure.filename = strdupz(filename); + cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some; + cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full; + debug(D_CGROUP, "io.pressure filename for cgroup '%s': '%s'", cg->id, cg->io_pressure.filename); + } else { + debug(D_CGROUP, "io.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } + + if (unlikely((cgroup_enable_pressure_memory_some || cgroup_enable_pressure_memory_full) && !cg->memory_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->memory_pressure.filename = strdupz(filename); + cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some; + cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full; + debug(D_CGROUP, "memory.pressure filename for cgroup '%s': '%s'", cg->id, cg->memory_pressure.filename); + } else { + debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } + } + } +} + +static inline void discovery_cleanup_all_cgroups() { + struct cgroup *cg = discovered_cgroup_root, *last = NULL; + + for(; cg ;) { + if(!cg->available) { + // enable the first duplicate cgroup + { + struct cgroup *t; + for(t = discovered_cgroup_root; t ; t = t->discovered_next) { + if(t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) { + debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id); + t->enabled = 1; + t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE; + break; + } + } + } + + if(!last) + discovered_cgroup_root = cg->discovered_next; + else + last->discovered_next = cg->discovered_next; + + cgroup_free(cg); + + if(!last) + cg = discovered_cgroup_root; + else + cg = last->discovered_next; + } + else { + last = cg; + cg = cg->discovered_next; + } + } +} + +static inline void discovery_copy_discovered_cgroups_to_reader() { + debug(D_CGROUP, "copy discovered cgroups to the main group list"); + + struct cgroup *cg; + + for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { + cg->next = cg->discovered_next; + } + + cgroup_root = discovered_cgroup_root; +} + +static inline void discovery_share_cgroups_with_ebpf() { + struct cgroup *cg; + int count; + struct stat buf; + + if (shm_mutex_cgroup_ebpf == SEM_FAILED) { + return; + } + sem_wait(shm_mutex_cgroup_ebpf); + + for (cg = cgroup_root, count = 0; cg; cg = cg->next, count++) { + netdata_ebpf_cgroup_shm_body_t *ptr = &shm_cgroup_ebpf.body[count]; + char *prefix = (is_cgroup_systemd_service(cg)) ? "" : "cgroup_"; + snprintfz(ptr->name, CGROUP_EBPF_NAME_SHARED_LENGTH - 1, "%s%s", prefix, cg->chart_title); + ptr->hash = simple_hash(ptr->name); + ptr->options = cg->options; + ptr->enabled = cg->enabled; + if (cgroup_use_unified_cgroups) { + snprintfz(ptr->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_unified_base, cg->id); + if (likely(stat(ptr->path, &buf) == -1)) { + ptr->path[0] = '\0'; + ptr->enabled = 0; + } + } else { + is_cgroup_procs_exist(ptr, cg->id); + } + + debug(D_CGROUP, "cgroup shared: NAME=%s, ENABLED=%d", ptr->name, ptr->enabled); + } + + shm_cgroup_ebpf.header->cgroup_root_count = count; + sem_post(shm_mutex_cgroup_ebpf); +} + +static inline void discovery_find_all_cgroups_v1() { + if (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) { + if (discovery_find_dir_in_subdirs(cgroup_cpuacct_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_enable_cpuacct_stat = cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO; + error("CGROUP: disabled cpu statistics."); + } + } + + if (cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io || + cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) { + if (discovery_find_dir_in_subdirs(cgroup_blkio_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_enable_blkio_io = cgroup_enable_blkio_ops = cgroup_enable_blkio_throttle_io = + cgroup_enable_blkio_throttle_ops = cgroup_enable_blkio_merged_ops = cgroup_enable_blkio_queued_ops = + CONFIG_BOOLEAN_NO; + error("CGROUP: disabled blkio statistics."); + } + } + + if (cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) { + if (discovery_find_dir_in_subdirs(cgroup_memory_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_enable_memory = cgroup_enable_detailed_memory = cgroup_enable_swap = cgroup_enable_memory_failcnt = + CONFIG_BOOLEAN_NO; + error("CGROUP: disabled memory statistics."); + } + } + + if (cgroup_search_in_devices) { + if (discovery_find_dir_in_subdirs(cgroup_devices_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_search_in_devices = 0; + error("CGROUP: disabled devices statistics."); + } + } +} + +static inline void discovery_find_all_cgroups_v2() { + if (discovery_find_dir_in_subdirs(cgroup_unified_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) { + cgroup_unified_exist = CONFIG_BOOLEAN_NO; + error("CGROUP: disabled unified cgroups statistics."); + } +} + +static int is_digits_only(const char *s) { + do { + if (!isdigit(*s++)) { + return 0; + } + } while (*s); + + return 1; +} + +static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) { + if (!cg->first_time_seen) { + return; + } + cg->first_time_seen = 0; + + char comm[TASK_COMM_LEN]; + + if (cg->container_orchestrator == CGROUPS_ORCHESTRATOR_UNSET) { + if (strstr(cg->id, "kubepods")) { + cg->container_orchestrator = CGROUPS_ORCHESTRATOR_K8S; + } else { + cg->container_orchestrator = CGROUPS_ORCHESTRATOR_UNKNOWN; + } + } + + if (is_inside_k8s && !k8s_get_container_first_proc_comm(cg->id, comm)) { + // container initialization may take some time when CPU % is high + // seen on GKE: comm is '6' before 'runc:[2:INIT]' (dunno if it could be another number) + if (is_digits_only(comm) || matches_entrypoint_parent_process_comm(comm)) { + cg->first_time_seen = 1; + return; + } + if (!strcmp(comm, "pause")) { + // a container that holds the network namespace for the pod + // we don't need to collect its metrics + cg->processed = 1; + return; + } + } + + if (cgroup_enable_systemd_services && matches_systemd_services_cgroups(cg->id)) { + debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'cgroups to match as systemd services'", cg->id, cg->chart_title); + convert_cgroup_to_systemd_service(cg); + return; + } + + if (matches_enabled_cgroup_renames(cg->id)) { + debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'run script to rename cgroups matching', will try to rename it", cg->id, cg->chart_title); + if (is_inside_k8s && k8s_is_container(cg->id)) { + // it may take up to a minute for the K8s API to return data for the container + // tested on AWS K8s cluster with 100% CPU utilization + cg->pending_renames = 9; // 1.5 minute + } else { + cg->pending_renames = 2; + } + } +} + +static int discovery_is_cgroup_duplicate(struct cgroup *cg) { + // https://github.com/netdata/netdata/issues/797#issuecomment-241248884 + struct cgroup *c; + for (c = discovered_cgroup_root; c; c = c->discovered_next) { + if (c != cg && c->enabled && c->hash_chart == cg->hash_chart && !strcmp(c->chart_id, cg->chart_id)) { + error("CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", cg->chart_id, c->id, cg->id); + return 1; + } + } + return 0; +} + +static inline void discovery_process_cgroup(struct cgroup *cg) { + if (!cg) { + debug(D_CGROUP, "discovery_process_cgroup() received NULL"); + return; + } + if (!cg->available || cg->processed) { + return; + } + + if (cg->first_time_seen) { + worker_is_busy(WORKER_DISCOVERY_PROCESS_FIRST_TIME); + discovery_process_first_time_seen_cgroup(cg); + if (unlikely(cg->first_time_seen || cg->processed)) { + return; + } + } + + if (cg->pending_renames) { + worker_is_busy(WORKER_DISCOVERY_PROCESS_RENAME); + discovery_rename_cgroup(cg); + if (unlikely(cg->pending_renames || cg->processed)) { + return; + } + } + + cg->processed = 1; + + if ((strlen(cg->chart_id) + strlen(cgroup_chart_id_prefix)) >= RRD_ID_LENGTH_MAX) { + info("cgroup '%s' (chart id '%s') disabled because chart_id exceeds the limit (RRD_ID_LENGTH_MAX)", cg->id, cg->chart_id); + return; + } + + if (is_cgroup_systemd_service(cg)) { + cg->enabled = 1; + return; + } + + if (!(cg->enabled = matches_enabled_cgroup_names(cg->chart_title))) { + debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups names matching'", cg->id, cg->chart_title); + return; + } + + if (!(cg->enabled = matches_enabled_cgroup_paths(cg->id))) { + debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups matching'", cg->id, cg->chart_title); + return; + } + + if (discovery_is_cgroup_duplicate(cg)) { + cg->enabled = 0; + cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; + return; + } + + worker_is_busy(WORKER_DISCOVERY_PROCESS_NETWORK); + read_cgroup_network_interfaces(cg); +} + +static inline void discovery_find_all_cgroups() { + debug(D_CGROUP, "searching for cgroups"); + + worker_is_busy(WORKER_DISCOVERY_INIT); + discovery_mark_all_cgroups_as_unavailable(); + + worker_is_busy(WORKER_DISCOVERY_FIND); + if (!cgroup_use_unified_cgroups) { + discovery_find_all_cgroups_v1(); + } else { + discovery_find_all_cgroups_v2(); + } + + struct cgroup *cg; + for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { + worker_is_busy(WORKER_DISCOVERY_PROCESS); + discovery_process_cgroup(cg); + } + + worker_is_busy(WORKER_DISCOVERY_UPDATE); + discovery_update_filenames(); + + worker_is_busy(WORKER_DISCOVERY_LOCK); + uv_mutex_lock(&cgroup_root_mutex); + + worker_is_busy(WORKER_DISCOVERY_CLEANUP); + discovery_cleanup_all_cgroups(); + + worker_is_busy(WORKER_DISCOVERY_COPY); + discovery_copy_discovered_cgroups_to_reader(); + + uv_mutex_unlock(&cgroup_root_mutex); + + worker_is_busy(WORKER_DISCOVERY_SHARE); + discovery_share_cgroups_with_ebpf(); + + debug(D_CGROUP, "done searching for cgroups"); +} + +void cgroup_discovery_worker(void *ptr) +{ + UNUSED(ptr); + + worker_register("CGROUPSDISC"); + worker_register_job_name(WORKER_DISCOVERY_INIT, "init"); + worker_register_job_name(WORKER_DISCOVERY_FIND, "find"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS, "process"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS_RENAME, "rename"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS_NETWORK, "network"); + worker_register_job_name(WORKER_DISCOVERY_PROCESS_FIRST_TIME, "new"); + worker_register_job_name(WORKER_DISCOVERY_UPDATE, "update"); + worker_register_job_name(WORKER_DISCOVERY_CLEANUP, "cleanup"); + worker_register_job_name(WORKER_DISCOVERY_COPY, "copy"); + worker_register_job_name(WORKER_DISCOVERY_SHARE, "share"); + worker_register_job_name(WORKER_DISCOVERY_LOCK, "lock"); + + entrypoint_parent_process_comm = simple_pattern_create( + " runc:[* " // http://terenceli.github.io/%E6%8A%80%E6%9C%AF/2021/12/28/runc-internals-3) + " exe ", // https://github.com/falcosecurity/falco/blob/9d41b0a151b83693929d3a9c84f7c5c85d070d3a/rules/falco_rules.yaml#L1961 + NULL, + SIMPLE_PATTERN_EXACT); + + while (!netdata_exit) { + worker_is_idle(); + + uv_mutex_lock(&discovery_thread.mutex); + while (!discovery_thread.start_discovery) + uv_cond_wait(&discovery_thread.cond_var, &discovery_thread.mutex); + discovery_thread.start_discovery = 0; + uv_mutex_unlock(&discovery_thread.mutex); + + if (unlikely(netdata_exit)) + break; + + discovery_find_all_cgroups(); + } + + discovery_thread.exited = 1; + worker_unregister(); +} + +// ---------------------------------------------------------------------------- +// generate charts + +#define CHART_TITLE_MAX 300 + +void update_systemd_services_charts( + int update_every + , int do_cpu + , int do_mem_usage + , int do_mem_detailed + , int do_mem_failcnt + , int do_swap_usage + , int do_io + , int do_io_ops + , int do_throttle_io + , int do_throttle_ops + , int do_queued_ops + , int do_merged_ops +) { + static RRDSET + *st_cpu = NULL, + *st_mem_usage = NULL, + *st_mem_failcnt = NULL, + *st_swap_usage = NULL, + + *st_mem_detailed_cache = NULL, + *st_mem_detailed_rss = NULL, + *st_mem_detailed_mapped = NULL, + *st_mem_detailed_writeback = NULL, + *st_mem_detailed_pgfault = NULL, + *st_mem_detailed_pgmajfault = NULL, + *st_mem_detailed_pgpgin = NULL, + *st_mem_detailed_pgpgout = NULL, + + *st_io_read = NULL, + *st_io_serviced_read = NULL, + *st_throttle_io_read = NULL, + *st_throttle_ops_read = NULL, + *st_queued_ops_read = NULL, + *st_merged_ops_read = NULL, + + *st_io_write = NULL, + *st_io_serviced_write = NULL, + *st_throttle_io_write = NULL, + *st_throttle_ops_write = NULL, + *st_queued_ops_write = NULL, + *st_merged_ops_write = NULL; + + // create the charts + + if (unlikely(do_cpu && !st_cpu)) { + char title[CHART_TITLE_MAX + 1]; + snprintfz(title, CHART_TITLE_MAX, "Systemd Services CPU utilization (100%% = 1 core)"); + + st_cpu = rrdset_create_localhost( + "services" + , "cpu" + , NULL + , "cpu" + , "services.cpu" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if (unlikely(do_mem_usage && !st_mem_usage)) { + st_mem_usage = rrdset_create_localhost( + "services" + , "mem_usage" + , NULL + , "mem" + , "services.mem_usage" + , "Systemd Services Used Memory" + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 10 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(likely(do_mem_detailed)) { + if(unlikely(!st_mem_detailed_rss)) { + st_mem_detailed_rss = rrdset_create_localhost( + "services" + , "mem_rss" + , NULL + , "mem" + , "services.mem_rss" + , "Systemd Services RSS Memory" + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 20 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_mem_detailed_mapped)) { + st_mem_detailed_mapped = rrdset_create_localhost( + "services" + , "mem_mapped" + , NULL + , "mem" + , "services.mem_mapped" + , "Systemd Services Mapped Memory" + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 30 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_mem_detailed_cache)) { + st_mem_detailed_cache = rrdset_create_localhost( + "services" + , "mem_cache" + , NULL + , "mem" + , "services.mem_cache" + , "Systemd Services Cache Memory" + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 40 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_mem_detailed_writeback)) { + st_mem_detailed_writeback = rrdset_create_localhost( + "services" + , "mem_writeback" + , NULL + , "mem" + , "services.mem_writeback" + , "Systemd Services Writeback Memory" + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 50 + , update_every + , RRDSET_TYPE_STACKED + ); + + } + + if(unlikely(!st_mem_detailed_pgfault)) { + st_mem_detailed_pgfault = rrdset_create_localhost( + "services" + , "mem_pgfault" + , NULL + , "mem" + , "services.mem_pgfault" + , "Systemd Services Memory Minor Page Faults" + , "MiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 60 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_mem_detailed_pgmajfault)) { + st_mem_detailed_pgmajfault = rrdset_create_localhost( + "services" + , "mem_pgmajfault" + , NULL + , "mem" + , "services.mem_pgmajfault" + , "Systemd Services Memory Major Page Faults" + , "MiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 70 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_mem_detailed_pgpgin)) { + st_mem_detailed_pgpgin = rrdset_create_localhost( + "services" + , "mem_pgpgin" + , NULL + , "mem" + , "services.mem_pgpgin" + , "Systemd Services Memory Charging Activity" + , "MiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 80 + , update_every + , RRDSET_TYPE_STACKED + ); + + } + + if(unlikely(!st_mem_detailed_pgpgout)) { + st_mem_detailed_pgpgout = rrdset_create_localhost( + "services" + , "mem_pgpgout" + , NULL + , "mem" + , "services.mem_pgpgout" + , "Systemd Services Memory Uncharging Activity" + , "MiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 90 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + if(unlikely(do_mem_failcnt && !st_mem_failcnt)) { + st_mem_failcnt = rrdset_create_localhost( + "services" + , "mem_failcnt" + , NULL + , "mem" + , "services.mem_failcnt" + , "Systemd Services Memory Limit Failures" + , "failures" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 110 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if (do_swap_usage && !st_swap_usage) { + st_swap_usage = rrdset_create_localhost( + "services" + , "swap_usage" + , NULL + , "swap" + , "services.swap_usage" + , "Systemd Services Swap Memory Used" + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 100 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(likely(do_io)) { + if(unlikely(!st_io_read)) { + st_io_read = rrdset_create_localhost( + "services" + , "io_read" + , NULL + , "disk" + , "services.io_read" + , "Systemd Services Disk Read Bandwidth" + , "KiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 120 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_io_write)) { + st_io_write = rrdset_create_localhost( + "services" + , "io_write" + , NULL + , "disk" + , "services.io_write" + , "Systemd Services Disk Write Bandwidth" + , "KiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 130 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + if(likely(do_io_ops)) { + if(unlikely(!st_io_serviced_read)) { + st_io_serviced_read = rrdset_create_localhost( + "services" + , "io_ops_read" + , NULL + , "disk" + , "services.io_ops_read" + , "Systemd Services Disk Read Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 140 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_io_serviced_write)) { + st_io_serviced_write = rrdset_create_localhost( + "services" + , "io_ops_write" + , NULL + , "disk" + , "services.io_ops_write" + , "Systemd Services Disk Write Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 150 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + if(likely(do_throttle_io)) { + if(unlikely(!st_throttle_io_read)) { + + st_throttle_io_read = rrdset_create_localhost( + "services" + , "throttle_io_read" + , NULL + , "disk" + , "services.throttle_io_read" + , "Systemd Services Throttle Disk Read Bandwidth" + , "KiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 160 + , update_every + , RRDSET_TYPE_STACKED + ); + + } + + if(unlikely(!st_throttle_io_write)) { + st_throttle_io_write = rrdset_create_localhost( + "services" + , "throttle_io_write" + , NULL + , "disk" + , "services.throttle_io_write" + , "Systemd Services Throttle Disk Write Bandwidth" + , "KiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 170 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + if(likely(do_throttle_ops)) { + if(unlikely(!st_throttle_ops_read)) { + st_throttle_ops_read = rrdset_create_localhost( + "services" + , "throttle_io_ops_read" + , NULL + , "disk" + , "services.throttle_io_ops_read" + , "Systemd Services Throttle Disk Read Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 180 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_throttle_ops_write)) { + st_throttle_ops_write = rrdset_create_localhost( + "services" + , "throttle_io_ops_write" + , NULL + , "disk" + , "services.throttle_io_ops_write" + , "Systemd Services Throttle Disk Write Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 190 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + if(likely(do_queued_ops)) { + if(unlikely(!st_queued_ops_read)) { + st_queued_ops_read = rrdset_create_localhost( + "services" + , "queued_io_ops_read" + , NULL + , "disk" + , "services.queued_io_ops_read" + , "Systemd Services Queued Disk Read Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 200 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_queued_ops_write)) { + + st_queued_ops_write = rrdset_create_localhost( + "services" + , "queued_io_ops_write" + , NULL + , "disk" + , "services.queued_io_ops_write" + , "Systemd Services Queued Disk Write Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 210 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + if(likely(do_merged_ops)) { + if(unlikely(!st_merged_ops_read)) { + st_merged_ops_read = rrdset_create_localhost( + "services" + , "merged_io_ops_read" + , NULL + , "disk" + , "services.merged_io_ops_read" + , "Systemd Services Merged Disk Read Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 220 + , update_every + , RRDSET_TYPE_STACKED + ); + } + + if(unlikely(!st_merged_ops_write)) { + st_merged_ops_write = rrdset_create_localhost( + "services" + , "merged_io_ops_write" + , NULL + , "disk" + , "services.merged_io_ops_write" + , "Systemd Services Merged Disk Write Operations" + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME + , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 230 + , update_every + , RRDSET_TYPE_STACKED + ); + } + } + + // update the values + struct cgroup *cg; + for(cg = cgroup_root; cg ; cg = cg->next) { + if(unlikely(!cg->enabled || cg->pending_renames || !is_cgroup_systemd_service(cg))) + continue; + + if(likely(do_cpu && cg->cpuacct_stat.updated)) { + if(unlikely(!cg->rd_cpu)){ + + + if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + cg->rd_cpu = rrddim_add(st_cpu, cg->chart_id, cg->chart_title, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); + } else { + cg->rd_cpu = rrddim_add(st_cpu, cg->chart_id, cg->chart_title, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); + } + } + + rrddim_set_by_pointer(st_cpu, cg->rd_cpu, cg->cpuacct_stat.user + cg->cpuacct_stat.system); + } + + if(likely(do_mem_usage && cg->memory.updated_usage_in_bytes)) { + if(unlikely(!cg->rd_mem_usage)) + cg->rd_mem_usage = rrddim_add(st_mem_usage, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st_mem_usage, cg->rd_mem_usage, cg->memory.usage_in_bytes); + } + + if(likely(do_mem_detailed && cg->memory.updated_detailed)) { + if(unlikely(!cg->rd_mem_detailed_rss)) + cg->rd_mem_detailed_rss = rrddim_add(st_mem_detailed_rss, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st_mem_detailed_rss, cg->rd_mem_detailed_rss, cg->memory.total_rss); + + if(unlikely(!cg->rd_mem_detailed_mapped)) + cg->rd_mem_detailed_mapped = rrddim_add(st_mem_detailed_mapped, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st_mem_detailed_mapped, cg->rd_mem_detailed_mapped, cg->memory.total_mapped_file); + + if(unlikely(!cg->rd_mem_detailed_cache)) + cg->rd_mem_detailed_cache = rrddim_add(st_mem_detailed_cache, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st_mem_detailed_cache, cg->rd_mem_detailed_cache, cg->memory.total_cache); + + if(unlikely(!cg->rd_mem_detailed_writeback)) + cg->rd_mem_detailed_writeback = rrddim_add(st_mem_detailed_writeback, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st_mem_detailed_writeback, cg->rd_mem_detailed_writeback, cg->memory.total_writeback); + + if(unlikely(!cg->rd_mem_detailed_pgfault)) + cg->rd_mem_detailed_pgfault = rrddim_add(st_mem_detailed_pgfault, cg->chart_id, cg->chart_title, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_mem_detailed_pgfault, cg->rd_mem_detailed_pgfault, cg->memory.total_pgfault); + + if(unlikely(!cg->rd_mem_detailed_pgmajfault)) + cg->rd_mem_detailed_pgmajfault = rrddim_add(st_mem_detailed_pgmajfault, cg->chart_id, cg->chart_title, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_mem_detailed_pgmajfault, cg->rd_mem_detailed_pgmajfault, cg->memory.total_pgmajfault); + + if(unlikely(!cg->rd_mem_detailed_pgpgin)) + cg->rd_mem_detailed_pgpgin = rrddim_add(st_mem_detailed_pgpgin, cg->chart_id, cg->chart_title, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_mem_detailed_pgpgin, cg->rd_mem_detailed_pgpgin, cg->memory.total_pgpgin); + + if(unlikely(!cg->rd_mem_detailed_pgpgout)) + cg->rd_mem_detailed_pgpgout = rrddim_add(st_mem_detailed_pgpgout, cg->chart_id, cg->chart_title, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_mem_detailed_pgpgout, cg->rd_mem_detailed_pgpgout, cg->memory.total_pgpgout); + } + + if(likely(do_mem_failcnt && cg->memory.updated_failcnt)) { + if(unlikely(!cg->rd_mem_failcnt)) + cg->rd_mem_failcnt = rrddim_add(st_mem_failcnt, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_mem_failcnt, cg->rd_mem_failcnt, cg->memory.failcnt); + } + + if(likely(do_swap_usage && cg->memory.updated_msw_usage_in_bytes)) { + if(unlikely(!cg->rd_swap_usage)) + cg->rd_swap_usage = rrddim_add(st_swap_usage, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_set_by_pointer( + st_swap_usage, + cg->rd_swap_usage, + cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ? + cg->memory.msw_usage_in_bytes - (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) : 0); + } else { + rrddim_set_by_pointer(st_swap_usage, cg->rd_swap_usage, cg->memory.msw_usage_in_bytes); + } + } + + if(likely(do_io && cg->io_service_bytes.updated)) { + if(unlikely(!cg->rd_io_service_bytes_read)) + cg->rd_io_service_bytes_read = rrddim_add(st_io_read, cg->chart_id, cg->chart_title, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_io_read, cg->rd_io_service_bytes_read, cg->io_service_bytes.Read); + + if(unlikely(!cg->rd_io_service_bytes_write)) + cg->rd_io_service_bytes_write = rrddim_add(st_io_write, cg->chart_id, cg->chart_title, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_io_write, cg->rd_io_service_bytes_write, cg->io_service_bytes.Write); + } + + if(likely(do_io_ops && cg->io_serviced.updated)) { + if(unlikely(!cg->rd_io_serviced_read)) + cg->rd_io_serviced_read = rrddim_add(st_io_serviced_read, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_io_serviced_read, cg->rd_io_serviced_read, cg->io_serviced.Read); + + if(unlikely(!cg->rd_io_serviced_write)) + cg->rd_io_serviced_write = rrddim_add(st_io_serviced_write, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_io_serviced_write, cg->rd_io_serviced_write, cg->io_serviced.Write); + } + + if(likely(do_throttle_io && cg->throttle_io_service_bytes.updated)) { + if(unlikely(!cg->rd_throttle_io_read)) + cg->rd_throttle_io_read = rrddim_add(st_throttle_io_read, cg->chart_id, cg->chart_title, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_throttle_io_read, cg->rd_throttle_io_read, cg->throttle_io_service_bytes.Read); + + if(unlikely(!cg->rd_throttle_io_write)) + cg->rd_throttle_io_write = rrddim_add(st_throttle_io_write, cg->chart_id, cg->chart_title, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_throttle_io_write, cg->rd_throttle_io_write, cg->throttle_io_service_bytes.Write); + } + + if(likely(do_throttle_ops && cg->throttle_io_serviced.updated)) { + if(unlikely(!cg->rd_throttle_io_serviced_read)) + cg->rd_throttle_io_serviced_read = rrddim_add(st_throttle_ops_read, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_throttle_ops_read, cg->rd_throttle_io_serviced_read, cg->throttle_io_serviced.Read); + + if(unlikely(!cg->rd_throttle_io_serviced_write)) + cg->rd_throttle_io_serviced_write = rrddim_add(st_throttle_ops_write, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_throttle_ops_write, cg->rd_throttle_io_serviced_write, cg->throttle_io_serviced.Write); + } + + if(likely(do_queued_ops && cg->io_queued.updated)) { + if(unlikely(!cg->rd_io_queued_read)) + cg->rd_io_queued_read = rrddim_add(st_queued_ops_read, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_queued_ops_read, cg->rd_io_queued_read, cg->io_queued.Read); + + if(unlikely(!cg->rd_io_queued_write)) + cg->rd_io_queued_write = rrddim_add(st_queued_ops_write, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_queued_ops_write, cg->rd_io_queued_write, cg->io_queued.Write); + } + + if(likely(do_merged_ops && cg->io_merged.updated)) { + if(unlikely(!cg->rd_io_merged_read)) + cg->rd_io_merged_read = rrddim_add(st_merged_ops_read, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_merged_ops_read, cg->rd_io_merged_read, cg->io_merged.Read); + + if(unlikely(!cg->rd_io_merged_write)) + cg->rd_io_merged_write = rrddim_add(st_merged_ops_write, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(st_merged_ops_write, cg->rd_io_merged_write, cg->io_merged.Write); + } + } + + // complete the iteration + if(likely(do_cpu)) + rrdset_done(st_cpu); + + if(likely(do_mem_usage)) + rrdset_done(st_mem_usage); + + if(unlikely(do_mem_detailed)) { + rrdset_done(st_mem_detailed_cache); + rrdset_done(st_mem_detailed_rss); + rrdset_done(st_mem_detailed_mapped); + rrdset_done(st_mem_detailed_writeback); + rrdset_done(st_mem_detailed_pgfault); + rrdset_done(st_mem_detailed_pgmajfault); + rrdset_done(st_mem_detailed_pgpgin); + rrdset_done(st_mem_detailed_pgpgout); + } + + if(likely(do_mem_failcnt)) + rrdset_done(st_mem_failcnt); + + if(likely(do_swap_usage)) + rrdset_done(st_swap_usage); + + if(likely(do_io)) { + rrdset_done(st_io_read); + rrdset_done(st_io_write); + } + + if(likely(do_io_ops)) { + rrdset_done(st_io_serviced_read); + rrdset_done(st_io_serviced_write); + } + + if(likely(do_throttle_io)) { + rrdset_done(st_throttle_io_read); + rrdset_done(st_throttle_io_write); + } + + if(likely(do_throttle_ops)) { + rrdset_done(st_throttle_ops_read); + rrdset_done(st_throttle_ops_write); + } + + if(likely(do_queued_ops)) { + rrdset_done(st_queued_ops_read); + rrdset_done(st_queued_ops_write); + } + + if(likely(do_merged_ops)) { + rrdset_done(st_merged_ops_read); + rrdset_done(st_merged_ops_write); + } +} + +static inline char *cgroup_chart_type(char *buffer, const char *id, size_t len) { + if(buffer[0]) return buffer; + + if(id[0] == '\0' || (id[0] == '/' && id[1] == '\0')) + strncpy(buffer, "cgroup_root", len); + else + snprintfz(buffer, len, "%s%s", cgroup_chart_id_prefix, id); + + netdata_fix_chart_id(buffer); + return buffer; +} + +static inline unsigned long long cpuset_str2ull(char **s) { + unsigned long long n = 0; + char c; + for(c = **s; c >= '0' && c <= '9' ; c = *(++*s)) { + n *= 10; + n += c - '0'; + } + return n; +} + +static inline void update_cpu_limits(char **filename, unsigned long long *value, struct cgroup *cg) { + if(*filename) { + int ret = -1; + + if(value == &cg->cpuset_cpus) { + static char *buf = NULL; + static size_t buf_size = 0; + + if(!buf) { + buf_size = 100U + 6 * get_system_cpus(); // taken from kernel/cgroup/cpuset.c + buf = mallocz(buf_size + 1); + } + + ret = read_file(*filename, buf, buf_size); + + if(!ret) { + char *s = buf; + unsigned long long ncpus = 0; + + // parse the cpuset string and calculate the number of cpus the cgroup is allowed to use + while(*s) { + unsigned long long n = cpuset_str2ull(&s); + ncpus++; + if(*s == ',') { + s++; + continue; + } + if(*s == '-') { + s++; + unsigned long long m = cpuset_str2ull(&s); + ncpus += m - n; // calculate the number of cpus in the region + } + s++; + } + + if(likely(ncpus)) *value = ncpus; + } + } + else if(value == &cg->cpu_cfs_period) { + ret = read_single_number_file(*filename, value); + } + else if(value == &cg->cpu_cfs_quota) { + ret = read_single_number_file(*filename, value); + } + else ret = -1; + + if(ret) { + error("Cannot refresh cgroup %s cpu limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename); + freez(*filename); + *filename = NULL; + } + } +} + +static inline void update_cpu_limits2(struct cgroup *cg) { + if(cg->filename_cpu_cfs_quota){ + static procfile *ff = NULL; + + ff = procfile_reopen(ff, cg->filename_cpu_cfs_quota, NULL, CGROUP_PROCFILE_FLAG); + if(unlikely(!ff)) { + goto cpu_limits2_err; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + goto cpu_limits2_err; + } + + unsigned long lines = procfile_lines(ff); + + if (unlikely(lines < 1)) { + error("CGROUP: file '%s' should have 1 lines.", cg->filename_cpu_cfs_quota); + return; + } + + cg->cpu_cfs_period = str2ull(procfile_lineword(ff, 0, 1)); + cg->cpuset_cpus = get_system_cpus(); + + char *s = "max\n\0"; + if(strcmp(s, procfile_lineword(ff, 0, 0)) == 0){ + cg->cpu_cfs_quota = cg->cpu_cfs_period * cg->cpuset_cpus; + } else { + cg->cpu_cfs_quota = str2ull(procfile_lineword(ff, 0, 0)); + } + debug(D_CGROUP, "CPU limits values: %llu %llu %llu", cg->cpu_cfs_period, cg->cpuset_cpus, cg->cpu_cfs_quota); + return; + +cpu_limits2_err: + error("Cannot refresh cgroup %s cpu limit by reading '%s'. Will not update its limit anymore.", cg->id, cg->filename_cpu_cfs_quota); + freez(cg->filename_cpu_cfs_quota); + cg->filename_cpu_cfs_quota = NULL; + + } +} + +static inline int update_memory_limits(char **filename, const RRDSETVAR_ACQUIRED **chart_var, unsigned long long *value, const char *chart_var_name, struct cgroup *cg) { + if(*filename) { + if(unlikely(!*chart_var)) { + *chart_var = rrdsetvar_custom_chart_variable_add_and_acquire(cg->st_mem_usage, chart_var_name); + if(!*chart_var) { + error("Cannot create cgroup %s chart variable '%s'. Will not update its limit anymore.", cg->id, chart_var_name); + freez(*filename); + *filename = NULL; + } + } + + if(*filename && *chart_var) { + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + if(read_single_number_file(*filename, value)) { + error("Cannot refresh cgroup %s memory limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename); + freez(*filename); + *filename = NULL; + } + else { + rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); + return 1; + } + } else { + char buffer[30 + 1]; + int ret = read_file(*filename, buffer, 30); + if(ret) { + error("Cannot refresh cgroup %s memory limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename); + freez(*filename); + *filename = NULL; + return 0; + } + char *s = "max\n\0"; + if(strcmp(s, buffer) == 0){ + *value = UINT64_MAX; + rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); + return 1; + } + *value = str2ull(buffer); + rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); + return 1; + } + } + } + return 0; +} + +void update_cgroup_charts(int update_every) { + debug(D_CGROUP, "updating cgroups charts"); + + char type[RRD_ID_LENGTH_MAX + 1]; + char title[CHART_TITLE_MAX + 1]; + + int services_do_cpu = 0, + services_do_mem_usage = 0, + services_do_mem_detailed = 0, + services_do_mem_failcnt = 0, + services_do_swap_usage = 0, + services_do_io = 0, + services_do_io_ops = 0, + services_do_throttle_io = 0, + services_do_throttle_ops = 0, + services_do_queued_ops = 0, + services_do_merged_ops = 0; + + struct cgroup *cg; + for(cg = cgroup_root; cg ; cg = cg->next) { + if(unlikely(!cg->enabled || cg->pending_renames)) + continue; + + if(likely(cgroup_enable_systemd_services && is_cgroup_systemd_service(cg))) { + if(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES) services_do_cpu++; + + if(cgroup_enable_systemd_services_detailed_memory && cg->memory.updated_detailed && cg->memory.enabled_detailed) services_do_mem_detailed++; + if(cg->memory.updated_usage_in_bytes && cg->memory.enabled_usage_in_bytes == CONFIG_BOOLEAN_YES) services_do_mem_usage++; + if(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES) services_do_mem_failcnt++; + if(cg->memory.updated_msw_usage_in_bytes && cg->memory.enabled_msw_usage_in_bytes == CONFIG_BOOLEAN_YES) services_do_swap_usage++; + + if(cg->io_service_bytes.updated && cg->io_service_bytes.enabled == CONFIG_BOOLEAN_YES) services_do_io++; + if(cg->io_serviced.updated && cg->io_serviced.enabled == CONFIG_BOOLEAN_YES) services_do_io_ops++; + if(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.enabled == CONFIG_BOOLEAN_YES) services_do_throttle_io++; + if(cg->throttle_io_serviced.updated && cg->throttle_io_serviced.enabled == CONFIG_BOOLEAN_YES) services_do_throttle_ops++; + if(cg->io_queued.updated && cg->io_queued.enabled == CONFIG_BOOLEAN_YES) services_do_queued_ops++; + if(cg->io_merged.updated && cg->io_merged.enabled == CONFIG_BOOLEAN_YES) services_do_merged_ops++; + continue; + } + + type[0] = '\0'; + + if(likely(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_cpu)) { + snprintfz(title, CHART_TITLE_MAX, "CPU Usage (100%% = 1 core)"); + + cg->st_cpu = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu" : "cgroup.cpu" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_update_rrdlabels(cg->st_cpu, cg->chart_labels); + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_add(cg->st_cpu, "user", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_cpu, "system", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); + } + else { + rrddim_add(cg->st_cpu, "user", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_cpu, "system", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL); + } + } + + rrddim_set(cg->st_cpu, "user", cg->cpuacct_stat.user); + rrddim_set(cg->st_cpu, "system", cg->cpuacct_stat.system); + rrdset_done(cg->st_cpu); + + if(likely(cg->filename_cpuset_cpus || cg->filename_cpu_cfs_period || cg->filename_cpu_cfs_quota)) { + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + update_cpu_limits(&cg->filename_cpuset_cpus, &cg->cpuset_cpus, cg); + update_cpu_limits(&cg->filename_cpu_cfs_period, &cg->cpu_cfs_period, cg); + update_cpu_limits(&cg->filename_cpu_cfs_quota, &cg->cpu_cfs_quota, cg); + } else { + update_cpu_limits2(cg); + } + + if(unlikely(!cg->chart_var_cpu_limit)) { + cg->chart_var_cpu_limit = rrdsetvar_custom_chart_variable_add_and_acquire(cg->st_cpu, "cpu_limit"); + if(!cg->chart_var_cpu_limit) { + error("Cannot create cgroup %s chart variable 'cpu_limit'. Will not update its limit anymore.", cg->id); + if(cg->filename_cpuset_cpus) freez(cg->filename_cpuset_cpus); + cg->filename_cpuset_cpus = NULL; + if(cg->filename_cpu_cfs_period) freez(cg->filename_cpu_cfs_period); + cg->filename_cpu_cfs_period = NULL; + if(cg->filename_cpu_cfs_quota) freez(cg->filename_cpu_cfs_quota); + cg->filename_cpu_cfs_quota = NULL; + } + } + else { + NETDATA_DOUBLE value = 0, quota = 0; + + if(likely( ((!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) && (cg->filename_cpuset_cpus || (cg->filename_cpu_cfs_period && cg->filename_cpu_cfs_quota))) + || ((cg->options & CGROUP_OPTIONS_IS_UNIFIED) && cg->filename_cpu_cfs_quota))) { + if(unlikely(cg->cpu_cfs_quota > 0)) + quota = (NETDATA_DOUBLE)cg->cpu_cfs_quota / (NETDATA_DOUBLE)cg->cpu_cfs_period; + + if(unlikely(quota > 0 && quota < cg->cpuset_cpus)) + value = quota * 100; + else + value = (NETDATA_DOUBLE)cg->cpuset_cpus * 100; + } + if(likely(value)) { + if(unlikely(!cg->st_cpu_limit)) { + snprintfz(title, CHART_TITLE_MAX, "CPU Usage within the limits"); + + cg->st_cpu_limit = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_limit" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_limit" : "cgroup.cpu_limit" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority - 1 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_cpu_limit, cg->chart_labels); + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) + rrddim_add(cg->st_cpu_limit, "used", NULL, 1, system_hz, RRD_ALGORITHM_ABSOLUTE); + else + rrddim_add(cg->st_cpu_limit, "used", NULL, 1, 1000000, RRD_ALGORITHM_ABSOLUTE); + cg->prev_cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; + } + + NETDATA_DOUBLE cpu_usage = 0; + cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100; + NETDATA_DOUBLE cpu_used = 100 * (cpu_usage - cg->prev_cpu_usage) / (value * update_every); + + rrdset_isnot_obsolete(cg->st_cpu_limit); + + rrddim_set(cg->st_cpu_limit, "used", (cpu_used > 0)?cpu_used:0); + + cg->prev_cpu_usage = cpu_usage; + + rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, value); + rrdset_done(cg->st_cpu_limit); + } + else { + if(unlikely(cg->st_cpu_limit)) { + rrdset_is_obsolete(cg->st_cpu_limit); + cg->st_cpu_limit = NULL; + } + rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, NAN); + } + } + } + } + + if (likely(cg->cpuacct_cpu_throttling.updated && cg->cpuacct_cpu_throttling.enabled == CONFIG_BOOLEAN_YES)) { + if (unlikely(!cg->st_cpu_nr_throttled)) { + snprintfz(title, CHART_TITLE_MAX, "CPU Throttled Runnable Periods"); + + cg->st_cpu_nr_throttled = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "throttled" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.throttled" : "cgroup.throttled" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 10 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_cpu_nr_throttled, cg->chart_labels); + rrddim_add(cg->st_cpu_nr_throttled, "throttled", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } else { + rrddim_set(cg->st_cpu_nr_throttled, "throttled", cg->cpuacct_cpu_throttling.nr_throttled_perc); + rrdset_done(cg->st_cpu_nr_throttled); + } + + if (unlikely(!cg->st_cpu_throttled_time)) { + snprintfz(title, CHART_TITLE_MAX, "CPU Throttled Time Duration"); + + cg->st_cpu_throttled_time = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "throttled_duration" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.throttled_duration" : "cgroup.throttled_duration" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 15 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_cpu_throttled_time, cg->chart_labels); + rrddim_add(cg->st_cpu_throttled_time, "duration", NULL, 1, 1000000, RRD_ALGORITHM_INCREMENTAL); + } else { + rrddim_set(cg->st_cpu_throttled_time, "duration", cg->cpuacct_cpu_throttling.throttled_time); + rrdset_done(cg->st_cpu_throttled_time); + } + } + + if (likely(cg->cpuacct_cpu_shares.updated && cg->cpuacct_cpu_shares.enabled == CONFIG_BOOLEAN_YES)) { + if (unlikely(!cg->st_cpu_shares)) { + snprintfz(title, CHART_TITLE_MAX, "CPU Time Relative Share"); + + cg->st_cpu_shares = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_shares" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_shares" : "cgroup.cpu_shares" + , title + , "shares" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 20 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_cpu_shares, cg->chart_labels); + rrddim_add(cg->st_cpu_shares, "shares", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } else { + rrddim_set(cg->st_cpu_shares, "shares", cg->cpuacct_cpu_shares.shares); + rrdset_done(cg->st_cpu_shares); + } + } + + if(likely(cg->cpuacct_usage.updated && cg->cpuacct_usage.enabled == CONFIG_BOOLEAN_YES)) { + char id[RRD_ID_LENGTH_MAX + 1]; + unsigned int i; + + if(unlikely(!cg->st_cpu_per_core)) { + snprintfz(title, CHART_TITLE_MAX, "CPU Usage (100%% = 1 core) Per Core"); + + cg->st_cpu_per_core = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_per_core" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_per_core" : "cgroup.cpu_per_core" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 100 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_update_rrdlabels(cg->st_cpu_per_core, cg->chart_labels); + + for(i = 0; i < cg->cpuacct_usage.cpus; i++) { + snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i); + rrddim_add(cg->st_cpu_per_core, id, NULL, 100, 1000000000, RRD_ALGORITHM_INCREMENTAL); + } + } + + for(i = 0; i < cg->cpuacct_usage.cpus ;i++) { + snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i); + rrddim_set(cg->st_cpu_per_core, id, cg->cpuacct_usage.cpu_percpu[i]); + } + rrdset_done(cg->st_cpu_per_core); + } + + if(likely(cg->memory.updated_detailed && cg->memory.enabled_detailed == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_mem)) { + snprintfz(title, CHART_TITLE_MAX, "Memory Usage"); + + cg->st_mem = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem" : "cgroup.mem" + , title + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 220 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_update_rrdlabels(cg->st_mem, cg->chart_labels); + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_add(cg->st_mem, "cache", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "rss", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + if(cg->memory.detailed_has_swap) + rrddim_add(cg->st_mem, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_add(cg->st_mem, "rss_huge", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "mapped_file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } else { + rrddim_add(cg->st_mem, "anon", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "kernel_stack", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "slab", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "sock", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "anon_thp", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem, "file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + } + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_set(cg->st_mem, "cache", cg->memory.total_cache); + rrddim_set(cg->st_mem, "rss", (cg->memory.total_rss > cg->memory.total_rss_huge)?(cg->memory.total_rss - cg->memory.total_rss_huge):0); + + if(cg->memory.detailed_has_swap) + rrddim_set(cg->st_mem, "swap", cg->memory.total_swap); + + rrddim_set(cg->st_mem, "rss_huge", cg->memory.total_rss_huge); + rrddim_set(cg->st_mem, "mapped_file", cg->memory.total_mapped_file); + } else { + rrddim_set(cg->st_mem, "anon", cg->memory.anon); + rrddim_set(cg->st_mem, "kernel_stack", cg->memory.kernel_stack); + rrddim_set(cg->st_mem, "slab", cg->memory.slab); + rrddim_set(cg->st_mem, "sock", cg->memory.sock); + rrddim_set(cg->st_mem, "anon_thp", cg->memory.anon_thp); + rrddim_set(cg->st_mem, "file", cg->memory.total_mapped_file); + } + rrdset_done(cg->st_mem); + + if(unlikely(!cg->st_writeback)) { + snprintfz(title, CHART_TITLE_MAX, "Writeback Memory"); + + cg->st_writeback = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "writeback" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.writeback" : "cgroup.writeback" + , title + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 300 + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_update_rrdlabels(cg->st_writeback, cg->chart_labels); + + if(cg->memory.detailed_has_dirty) + rrddim_add(cg->st_writeback, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + + rrddim_add(cg->st_writeback, "writeback", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + if(cg->memory.detailed_has_dirty) + rrddim_set(cg->st_writeback, "dirty", cg->memory.total_dirty); + + rrddim_set(cg->st_writeback, "writeback", cg->memory.total_writeback); + rrdset_done(cg->st_writeback); + + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + if(unlikely(!cg->st_mem_activity)) { + snprintfz(title, CHART_TITLE_MAX, "Memory Activity"); + + cg->st_mem_activity = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_activity" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_activity" : "cgroup.mem_activity" + , title + , "MiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 400 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_mem_activity, cg->chart_labels); + + rrddim_add(cg->st_mem_activity, "pgpgin", "in", system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_mem_activity, "pgpgout", "out", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_mem_activity, "pgpgin", cg->memory.total_pgpgin); + rrddim_set(cg->st_mem_activity, "pgpgout", cg->memory.total_pgpgout); + rrdset_done(cg->st_mem_activity); + } + + if(unlikely(!cg->st_pgfaults)) { + snprintfz(title, CHART_TITLE_MAX, "Memory Page Faults"); + + cg->st_pgfaults = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "pgfaults" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.pgfaults" : "cgroup.pgfaults" + , title + , "MiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 500 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_pgfaults, cg->chart_labels); + + rrddim_add(cg->st_pgfaults, "pgfault", NULL, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_pgfaults, "pgmajfault", "swap", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_pgfaults, "pgfault", cg->memory.total_pgfault); + rrddim_set(cg->st_pgfaults, "pgmajfault", cg->memory.total_pgmajfault); + rrdset_done(cg->st_pgfaults); + } + + if(likely(cg->memory.updated_usage_in_bytes && cg->memory.enabled_usage_in_bytes == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_mem_usage)) { + snprintfz(title, CHART_TITLE_MAX, "Used Memory"); + + cg->st_mem_usage = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_usage" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage" : "cgroup.mem_usage" + , title + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 210 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_update_rrdlabels(cg->st_mem_usage, cg->chart_labels); + + rrddim_add(cg->st_mem_usage, "ram", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem_usage, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(cg->st_mem_usage, "ram", cg->memory.usage_in_bytes); + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_set( + cg->st_mem_usage, + "swap", + cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ? + cg->memory.msw_usage_in_bytes - (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) : 0); + } else { + rrddim_set(cg->st_mem_usage, "swap", cg->memory.msw_usage_in_bytes); + } + rrdset_done(cg->st_mem_usage); + + if (likely(update_memory_limits(&cg->filename_memory_limit, &cg->chart_var_memory_limit, &cg->memory_limit, "memory_limit", cg))) { + static unsigned long long ram_total = 0; + + if(unlikely(!ram_total)) { + procfile *ff = NULL; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/meminfo"); + ff = procfile_open(config_get("plugin:cgroups", "meminfo filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); + + if(likely(ff)) + ff = procfile_readall(ff); + if(likely(ff && procfile_lines(ff) && !strncmp(procfile_word(ff, 0), "MemTotal", 8))) + ram_total = str2ull(procfile_word(ff, 1)) * 1024; + else { + error("Cannot read file %s. Will not update cgroup %s RAM limit anymore.", filename, cg->id); + freez(cg->filename_memory_limit); + cg->filename_memory_limit = NULL; + } + + procfile_close(ff); + } + + if(likely(ram_total)) { + unsigned long long memory_limit = ram_total; + + if(unlikely(cg->memory_limit < ram_total)) + memory_limit = cg->memory_limit; + + if(unlikely(!cg->st_mem_usage_limit)) { + snprintfz(title, CHART_TITLE_MAX, "Used RAM within the limits"); + + cg->st_mem_usage_limit = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_usage_limit" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage_limit": "cgroup.mem_usage_limit" + , title + , "MiB" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 200 + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdset_update_rrdlabels(cg->st_mem_usage_limit, cg->chart_labels); + + rrddim_add(cg->st_mem_usage_limit, "available", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_mem_usage_limit, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); + } + + rrdset_isnot_obsolete(cg->st_mem_usage_limit); + + rrddim_set(cg->st_mem_usage_limit, "available", memory_limit - cg->memory.usage_in_bytes); + rrddim_set(cg->st_mem_usage_limit, "used", cg->memory.usage_in_bytes); + rrdset_done(cg->st_mem_usage_limit); + + if (unlikely(!cg->st_mem_utilization)) { + snprintfz(title, CHART_TITLE_MAX, "Memory Utilization"); + + cg->st_mem_utilization = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_utilization" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_utilization" : "cgroup.mem_utilization" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 199 + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_update_rrdlabels(cg->st_mem_utilization, cg->chart_labels); + + rrddim_add(cg->st_mem_utilization, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + if (memory_limit) { + rrdset_isnot_obsolete(cg->st_mem_utilization); + + rrddim_set( + cg->st_mem_utilization, "utilization", cg->memory.usage_in_bytes * 100 / memory_limit); + rrdset_done(cg->st_mem_utilization); + } + } + } + else { + if(unlikely(cg->st_mem_usage_limit)) { + rrdset_is_obsolete(cg->st_mem_usage_limit); + cg->st_mem_usage_limit = NULL; + } + + if(unlikely(cg->st_mem_utilization)) { + rrdset_is_obsolete(cg->st_mem_utilization); + cg->st_mem_utilization = NULL; + } + } + + update_memory_limits(&cg->filename_memoryswap_limit, &cg->chart_var_memoryswap_limit, &cg->memoryswap_limit, "memory_and_swap_limit", cg); + } + + if(likely(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_mem_failcnt)) { + snprintfz(title, CHART_TITLE_MAX, "Memory Limit Failures"); + + cg->st_mem_failcnt = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_failcnt" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_failcnt" : "cgroup.mem_failcnt" + , title + , "count" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 250 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_mem_failcnt, cg->chart_labels); + + rrddim_add(cg->st_mem_failcnt, "failures", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_mem_failcnt, "failures", cg->memory.failcnt); + rrdset_done(cg->st_mem_failcnt); + } + + if(likely(cg->io_service_bytes.updated && cg->io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_io)) { + snprintfz(title, CHART_TITLE_MAX, "I/O Bandwidth (all disks)"); + + cg->st_io = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io" : "cgroup.io" + , title + , "KiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 1200 + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_update_rrdlabels(cg->st_io, cg->chart_labels); + + rrddim_add(cg->st_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_io, "read", cg->io_service_bytes.Read); + rrddim_set(cg->st_io, "write", cg->io_service_bytes.Write); + rrdset_done(cg->st_io); + } + + if(likely(cg->io_serviced.updated && cg->io_serviced.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_serviced_ops)) { + snprintfz(title, CHART_TITLE_MAX, "Serviced I/O Operations (all disks)"); + + cg->st_serviced_ops = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "serviced_ops" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.serviced_ops" : "cgroup.serviced_ops" + , title + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 1200 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_serviced_ops, cg->chart_labels); + + rrddim_add(cg->st_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_serviced_ops, "read", cg->io_serviced.Read); + rrddim_set(cg->st_serviced_ops, "write", cg->io_serviced.Write); + rrdset_done(cg->st_serviced_ops); + } + + if(likely(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_throttle_io)) { + snprintfz(title, CHART_TITLE_MAX, "Throttle I/O Bandwidth (all disks)"); + + cg->st_throttle_io = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "throttle_io" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_io" : "cgroup.throttle_io" + , title + , "KiB/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 1200 + , update_every + , RRDSET_TYPE_AREA + ); + + rrdset_update_rrdlabels(cg->st_throttle_io, cg->chart_labels); + + rrddim_add(cg->st_throttle_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_throttle_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_throttle_io, "read", cg->throttle_io_service_bytes.Read); + rrddim_set(cg->st_throttle_io, "write", cg->throttle_io_service_bytes.Write); + rrdset_done(cg->st_throttle_io); + } + + if(likely(cg->throttle_io_serviced.updated && cg->throttle_io_serviced.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_throttle_serviced_ops)) { + snprintfz(title, CHART_TITLE_MAX, "Throttle Serviced I/O Operations (all disks)"); + + cg->st_throttle_serviced_ops = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "throttle_serviced_ops" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_serviced_ops" : "cgroup.throttle_serviced_ops" + , title + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 1200 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_throttle_serviced_ops, cg->chart_labels); + + rrddim_add(cg->st_throttle_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_throttle_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_throttle_serviced_ops, "read", cg->throttle_io_serviced.Read); + rrddim_set(cg->st_throttle_serviced_ops, "write", cg->throttle_io_serviced.Write); + rrdset_done(cg->st_throttle_serviced_ops); + } + + if(likely(cg->io_queued.updated && cg->io_queued.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_queued_ops)) { + snprintfz(title, CHART_TITLE_MAX, "Queued I/O Operations (all disks)"); + + cg->st_queued_ops = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "queued_ops" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.queued_ops" : "cgroup.queued_ops" + , title + , "operations" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2000 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_queued_ops, cg->chart_labels); + + rrddim_add(cg->st_queued_ops, "read", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(cg->st_queued_ops, "write", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set(cg->st_queued_ops, "read", cg->io_queued.Read); + rrddim_set(cg->st_queued_ops, "write", cg->io_queued.Write); + rrdset_done(cg->st_queued_ops); + } + + if(likely(cg->io_merged.updated && cg->io_merged.enabled == CONFIG_BOOLEAN_YES)) { + if(unlikely(!cg->st_merged_ops)) { + snprintfz(title, CHART_TITLE_MAX, "Merged I/O Operations (all disks)"); + + cg->st_merged_ops = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "merged_ops" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.merged_ops" : "cgroup.merged_ops" + , title + , "operations/s" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2100 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(cg->st_merged_ops, cg->chart_labels); + + rrddim_add(cg->st_merged_ops, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(cg->st_merged_ops, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set(cg->st_merged_ops, "read", cg->io_merged.Read); + rrddim_set(cg->st_merged_ops, "write", cg->io_merged.Write); + rrdset_done(cg->st_merged_ops); + } + + if (cg->options & CGROUP_OPTIONS_IS_UNIFIED) { + struct pressure *res = &cg->cpu_pressure; + + if (likely(res->updated && res->some.enabled)) { + struct pressure_charts *pcs; + pcs = &res->some; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU some pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_some_pressure" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure" : "cgroup.cpu_some_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2200 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU some pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_some_pressure_stall_time" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure_stall_time" : "cgroup.cpu_some_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2220 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + if (likely(res->updated && res->full.enabled)) { + struct pressure_charts *pcs; + pcs = &res->full; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU full pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_full_pressure" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure" : "cgroup.cpu_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2240 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU full pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_full_pressure_stall_time" + , NULL + , "cpu" + , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure_stall_time" : "cgroup.cpu_full_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2260 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + + res = &cg->memory_pressure; + + if (likely(res->updated && res->some.enabled)) { + struct pressure_charts *pcs; + pcs = &res->some; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory some pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_some_pressure" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure" : "cgroup.memory_some_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2300 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory some pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "memory_some_pressure_stall_time" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure_stall_time" : "cgroup.memory_some_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2320 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + + if (likely(res->updated && res->full.enabled)) { + struct pressure_charts *pcs; + pcs = &res->full; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory full pressure"); + + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_full_pressure" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure" : "cgroup.memory_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2340 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory full pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "memory_full_pressure_stall_time" + , NULL + , "mem" + , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure_stall_time" : "cgroup.memory_full_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2360 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + + res = &cg->io_pressure; + + if (likely(res->updated && res->some.enabled)) { + struct pressure_charts *pcs; + pcs = &res->some; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O some pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_some_pressure" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure" : "cgroup.io_some_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2400 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O some pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_some_pressure_stall_time" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure_stall_time" : "cgroup.io_some_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2420 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + + if (likely(res->updated && res->full.enabled)) { + struct pressure_charts *pcs; + pcs = &res->full; + + if (unlikely(!pcs->share_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O full pressure"); + chart = pcs->share_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_full_pressure" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure" : "cgroup.io_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2440 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels); + pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } + + if (unlikely(!pcs->total_time.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O full pressure stall time"); + chart = pcs->total_time.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_full_pressure_stall_time" + , NULL + , "disk" + , k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure_stall_time" : "cgroup.io_full_pressure_stall_time" + , title + , "ms" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2460 + , update_every + , RRDSET_TYPE_LINE + ); + rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels); + pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + update_pressure_charts(pcs); + } + } + } + + if(likely(cgroup_enable_systemd_services)) + update_systemd_services_charts(update_every, services_do_cpu, services_do_mem_usage, services_do_mem_detailed + , services_do_mem_failcnt, services_do_swap_usage, services_do_io + , services_do_io_ops, services_do_throttle_io, services_do_throttle_ops + , services_do_queued_ops, services_do_merged_ops + ); + + debug(D_CGROUP, "done updating cgroups charts"); +} + +// ---------------------------------------------------------------------------- +// cgroups main + +static void cgroup_main_cleanup(void *ptr) { + worker_unregister(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + usec_t max = 2 * USEC_PER_SEC, step = 50000; + + if (!discovery_thread.exited) { + info("stopping discovery thread worker"); + uv_mutex_lock(&discovery_thread.mutex); + discovery_thread.start_discovery = 1; + uv_cond_signal(&discovery_thread.cond_var); + uv_mutex_unlock(&discovery_thread.mutex); + } + + info("waiting for discovery thread to finish..."); + + while (!discovery_thread.exited && max > 0) { + max -= step; + sleep_usec(step); + } + + if (shm_mutex_cgroup_ebpf != SEM_FAILED) { + sem_close(shm_mutex_cgroup_ebpf); + } + + if (shm_cgroup_ebpf.header) { + munmap(shm_cgroup_ebpf.header, shm_cgroup_ebpf.header->body_length); + } + + if (shm_fd_cgroup_ebpf > 0) { + close(shm_fd_cgroup_ebpf); + } + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *cgroups_main(void *ptr) { + worker_register("CGROUPS"); + worker_register_job_name(WORKER_CGROUPS_LOCK, "lock"); + worker_register_job_name(WORKER_CGROUPS_READ, "read"); + worker_register_job_name(WORKER_CGROUPS_CHART, "chart"); + + netdata_thread_cleanup_push(cgroup_main_cleanup, ptr); + + if (getenv("KUBERNETES_SERVICE_HOST") != NULL && getenv("KUBERNETES_SERVICE_PORT") != NULL) { + is_inside_k8s = 1; + cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_YES; + } + + read_cgroup_plugin_configuration(); + netdata_cgroup_ebpf_initialize_shm(); + + if (uv_mutex_init(&cgroup_root_mutex)) { + error("CGROUP: cannot initialize mutex for the main cgroup list"); + goto exit; + } + + // dispatch a discovery worker thread + discovery_thread.start_discovery = 0; + discovery_thread.exited = 0; + + if (uv_mutex_init(&discovery_thread.mutex)) { + error("CGROUP: cannot initialize mutex for discovery thread"); + goto exit; + } + if (uv_cond_init(&discovery_thread.cond_var)) { + error("CGROUP: cannot initialize conditional variable for discovery thread"); + goto exit; + } + + int error = uv_thread_create(&discovery_thread.thread, cgroup_discovery_worker, NULL); + if (error) { + error("CGROUP: cannot create thread worker. uv_thread_create(): %s", uv_strerror(error)); + goto exit; + } + uv_thread_set_name_np(discovery_thread.thread, "PLUGIN[cgroups]"); + + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = cgroup_update_every * USEC_PER_SEC; + usec_t find_every = cgroup_check_for_new_every * USEC_PER_SEC, find_dt = 0; + + while(!netdata_exit) { + worker_is_idle(); + + usec_t hb_dt = heartbeat_next(&hb, step); + if(unlikely(netdata_exit)) break; + + find_dt += hb_dt; + if (unlikely(find_dt >= find_every || (!is_inside_k8s && cgroups_check))) { + uv_cond_signal(&discovery_thread.cond_var); + discovery_thread.start_discovery = 1; + find_dt = 0; + cgroups_check = 0; + } + + worker_is_busy(WORKER_CGROUPS_LOCK); + uv_mutex_lock(&cgroup_root_mutex); + + worker_is_busy(WORKER_CGROUPS_READ); + read_all_discovered_cgroups(cgroup_root); + + worker_is_busy(WORKER_CGROUPS_CHART); + update_cgroup_charts(cgroup_update_every); + + worker_is_idle(); + uv_mutex_unlock(&cgroup_root_mutex); + } + +exit: + worker_unregister(); + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.h b/collectors/cgroups.plugin/sys_fs_cgroup.h new file mode 100644 index 0000000..d1adf8a --- /dev/null +++ b/collectors/cgroups.plugin/sys_fs_cgroup.h @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SYS_FS_CGROUP_H +#define NETDATA_SYS_FS_CGROUP_H 1 + +#include "daemon/common.h" + +#define CGROUP_OPTIONS_DISABLED_DUPLICATE 0x00000001 +#define CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE 0x00000002 +#define CGROUP_OPTIONS_IS_UNIFIED 0x00000004 + +typedef struct netdata_ebpf_cgroup_shm_header { + int cgroup_root_count; + int cgroup_max; + int systemd_enabled; + int __pad; + size_t body_length; +} netdata_ebpf_cgroup_shm_header_t; + +#define CGROUP_EBPF_NAME_SHARED_LENGTH 256 + +typedef struct netdata_ebpf_cgroup_shm_body { + // Considering what is exposed in this link https://en.wikipedia.org/wiki/Comparison_of_file_systems#Limits + // this length is enough to store what we want. + char name[CGROUP_EBPF_NAME_SHARED_LENGTH]; + uint32_t hash; + uint32_t options; + int enabled; + char path[FILENAME_MAX + 1]; +} netdata_ebpf_cgroup_shm_body_t; + +typedef struct netdata_ebpf_cgroup_shm { + netdata_ebpf_cgroup_shm_header_t *header; + netdata_ebpf_cgroup_shm_body_t *body; +} netdata_ebpf_cgroup_shm_t; + +#define NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME "netdata_shm_cgroup_ebpf" +#define NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME "/netdata_sem_cgroup_ebpf" + +#include "../proc.plugin/plugin_proc.h" + +char *k8s_parse_resolved_name_and_labels(DICTIONARY *labels, char *data); + +#endif //NETDATA_SYS_FS_CGROUP_H diff --git a/collectors/cgroups.plugin/tests/test_cgroups_plugin.c b/collectors/cgroups.plugin/tests/test_cgroups_plugin.c new file mode 100644 index 0000000..25939a9 --- /dev/null +++ b/collectors/cgroups.plugin/tests/test_cgroups_plugin.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "test_cgroups_plugin.h" +#include "libnetdata/required_dummies.h" + +RRDHOST *localhost; +int netdata_zero_metrics_enabled = 1; +struct config netdata_config; +char *netdata_configured_primary_plugins_dir = NULL; + +struct k8s_test_data { + char *data; + char *name; + char *key[3]; + char *value[3]; + + const char *result_key[3]; + const char *result_value[3]; + int result_ls[3]; + int i; +}; + +static int read_label_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) +{ + struct k8s_test_data *test_data = (struct k8s_test_data *)data; + + test_data->result_key[test_data->i] = name; + test_data->result_value[test_data->i] = value; + test_data->result_ls[test_data->i] = ls; + + test_data->i++; + + return 1; +} + +static void test_k8s_parse_resolved_name(void **state) +{ + UNUSED(state); + + DICTIONARY *labels = rrdlabels_create(); + + struct k8s_test_data test_data[] = { + // One label + { .data = "name label1=\"value1\"", + .name = "name", + .key[0] = "label1", .value[0] = "value1" }, + + // Three labels + { .data = "name label1=\"value1\",label2=\"value2\",label3=\"value3\"", + .name = "name", + .key[0] = "label1", .value[0] = "value1", + .key[1] = "label2", .value[1] = "value2", + .key[2] = "label3", .value[2] = "value3" }, + + // Comma at the end of the data string + { .data = "name label1=\"value1\",", + .name = "name", + .key[0] = "label1", .value[0] = "value1" }, + + // Equals sign in the value + // { .data = "name label1=\"value=1\"", + // .name = "name", + // .key[0] = "label1", .value[0] = "value=1" }, + + // Double quotation mark in the value + // { .data = "name label1=\"value\"1\"", + // .name = "name", + // .key[0] = "label1", .value[0] = "value" }, + + // Escaped double quotation mark in the value + // { .data = "name label1=\"value\\\"1\"", + // .name = "name", + // .key[0] = "label1", .value[0] = "value\\\"1" }, + + // Equals sign in the key + // { .data = "name label=1=\"value1\"", + // .name = "name", + // .key[0] = "label", .value[0] = "1=\"value1\"" }, + + // Skipped value + // { .data = "name label1=,label2=\"value2\"", + // .name = "name", + // .key[0] = "label2", .value[0] = "value2" }, + + // A pair of equals signs + { .data = "name= =", + .name = "name=" }, + + // A pair of commas + { .data = "name, ,", + .name = "name," }, + + { .data = NULL } + }; + + for (int i = 0; test_data[i].data != NULL; i++) { + char *data = strdup(test_data[i].data); + + char *name = k8s_parse_resolved_name_and_labels(labels, data); + + assert_string_equal(name, test_data[i].name); + + rrdlabels_walkthrough_read(labels, read_label_callback, &test_data[i]); + + for (int l = 0; l < 3 && test_data[i].key[l] != NULL; l++) { + char *key = test_data[i].key[l]; + char *value = test_data[i].value[l]; + + const char *result_key = test_data[i].result_key[l]; + const char *result_value = test_data[i].result_value[l]; + int ls = test_data[i].result_ls[l]; + + assert_string_equal(key, result_key); + assert_string_equal(value, result_value); + assert_int_equal(RRDLABEL_SRC_AUTO | RRDLABEL_SRC_K8S, ls); + } + + free(data); + } +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_k8s_parse_resolved_name), + }; + + int test_res = cmocka_run_group_tests_name("test_k8s_parse_resolved_name", tests, NULL, NULL); + + return test_res; +} diff --git a/collectors/cgroups.plugin/tests/test_cgroups_plugin.h b/collectors/cgroups.plugin/tests/test_cgroups_plugin.h new file mode 100644 index 0000000..3d68e92 --- /dev/null +++ b/collectors/cgroups.plugin/tests/test_cgroups_plugin.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef TEST_CGROUPS_PLUGIN_H +#define TEST_CGROUPS_PLUGIN_H 1 + +#include "libnetdata/libnetdata.h" + +#include "../sys_fs_cgroup.h" + +#include <stdarg.h> +#include <stddef.h> +#include <setjmp.h> +#include <stdint.h> +#include <cmocka.h> + +#endif /* TEST_CGROUPS_PLUGIN_H */ diff --git a/collectors/cgroups.plugin/tests/test_doubles.c b/collectors/cgroups.plugin/tests/test_doubles.c new file mode 100644 index 0000000..498f649 --- /dev/null +++ b/collectors/cgroups.plugin/tests/test_doubles.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "test_cgroups_plugin.h" + +void rrdset_is_obsolete(RRDSET *st) +{ + UNUSED(st); +} + +void rrdset_isnot_obsolete(RRDSET *st) +{ + UNUSED(st); +} + +struct mountinfo *mountinfo_read(int do_statvfs) +{ + UNUSED(do_statvfs); + + return NULL; +} + +struct mountinfo * +mountinfo_find_by_filesystem_mount_source(struct mountinfo *root, const char *filesystem, const char *mount_source) +{ + UNUSED(root); + UNUSED(filesystem); + UNUSED(mount_source); + + return NULL; +} + +struct mountinfo * +mountinfo_find_by_filesystem_super_option(struct mountinfo *root, const char *filesystem, const char *super_options) +{ + UNUSED(root); + UNUSED(filesystem); + UNUSED(super_options); + + return NULL; +} + +void mountinfo_free_all(struct mountinfo *mi) +{ + UNUSED(mi); +} + +RRDSET *rrdset_create_custom( + RRDHOST *host, const char *type, const char *id, const char *name, const char *family, const char *context, + const char *title, const char *units, const char *plugin, const char *module, long priority, int update_every, + RRDSET_TYPE chart_type, RRD_MEMORY_MODE memory_mode, long history_entries) +{ + UNUSED(host); + UNUSED(type); + UNUSED(id); + UNUSED(name); + UNUSED(family); + UNUSED(context); + UNUSED(title); + UNUSED(units); + UNUSED(plugin); + UNUSED(module); + UNUSED(priority); + UNUSED(update_every); + UNUSED(chart_type); + UNUSED(memory_mode); + UNUSED(history_entries); + + return NULL; +} + +RRDDIM *rrddim_add_custom( + RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divisor, + RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode) +{ + UNUSED(st); + UNUSED(id); + UNUSED(name); + UNUSED(multiplier); + UNUSED(divisor); + UNUSED(algorithm); + UNUSED(memory_mode); + + return NULL; +} + +collected_number rrddim_set(RRDSET *st, const char *id, collected_number value) +{ + UNUSED(st); + UNUSED(id); + UNUSED(value); + + return 0; +} + +collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number value) +{ + UNUSED(st); + UNUSED(rd); + UNUSED(value); + + return 0; +} + +const RRDSETVAR_ACQUIRED *rrdsetvar_custom_chart_variable_add_and_acquire(RRDSET *st, const char *name) +{ + UNUSED(st); + UNUSED(name); + + return NULL; +} + +void rrdsetvar_custom_chart_variable_set(RRDSET *st, const RRDSETVAR_ACQUIRED *rsa, NETDATA_DOUBLE value) +{ + UNUSED(st); + UNUSED(rsa); + UNUSED(value); +} + +void rrdset_next_usec(RRDSET *st, usec_t microseconds) +{ + UNUSED(st); + UNUSED(microseconds); +} + +void rrdset_done(RRDSET *st) +{ + UNUSED(st); +} + +void update_pressure_charts(struct pressure_charts *charts) +{ + UNUSED(charts); +} + +void netdev_rename_device_add( + const char *host_device, const char *container_device, const char *container_name, DICTIONARY *labels, const char *ctx_prefix) +{ + UNUSED(host_device); + UNUSED(container_device); + UNUSED(container_name); + UNUSED(labels); + UNUSED(ctx_prefix); +} + +void netdev_rename_device_del(const char *host_device) +{ + UNUSED(host_device); +} + +void rrdcalc_update_rrdlabels(RRDSET *st) { + (void)st; +} + +void db_execute(const char *cmd) +{ + UNUSED(cmd); +} |