diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-05-08 16:27:04 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-05-08 16:27:04 +0000 |
commit | a836a244a3d2bdd4da1ee2641e3e957850668cea (patch) | |
tree | cb87c75b3677fab7144f868435243f864048a1e6 /collectors/cgroups.plugin | |
parent | Adding upstream version 1.38.1. (diff) | |
download | netdata-a836a244a3d2bdd4da1ee2641e3e957850668cea.tar.xz netdata-a836a244a3d2bdd4da1ee2641e3e957850668cea.zip |
Adding upstream version 1.39.0.upstream/1.39.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/cgroups.plugin')
-rw-r--r-- | collectors/cgroups.plugin/README.md | 56 | ||||
-rwxr-xr-x | collectors/cgroups.plugin/cgroup-name.sh | 55 | ||||
-rw-r--r-- | collectors/cgroups.plugin/metrics.csv | 109 | ||||
-rw-r--r-- | collectors/cgroups.plugin/sys_fs_cgroup.c | 270 | ||||
-rw-r--r-- | collectors/cgroups.plugin/sys_fs_cgroup.h | 2 | ||||
-rw-r--r-- | collectors/cgroups.plugin/tests/test_cgroups_plugin.c | 8 |
6 files changed, 322 insertions, 178 deletions
diff --git a/collectors/cgroups.plugin/README.md b/collectors/cgroups.plugin/README.md index e58f1ba0..2e4fff23 100644 --- a/collectors/cgroups.plugin/README.md +++ b/collectors/cgroups.plugin/README.md @@ -4,10 +4,10 @@ custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/cgro sidebar_label: "Monitor Cgroups" learn_status: "Published" learn_topic_type: "References" -learn_rel_path: "References/Collectors references/Virtualized environments/Containers" +learn_rel_path: "Integrations/Monitor/Virtualized environments/Containers" --> -# cgroups.plugin +# Monitor Cgroups (cgroups.plugin) You can monitor containers and virtual machines using **cgroups**. @@ -26,7 +26,7 @@ and **virtual machines** spawn by managers that register them with cgroups (qemu In general, no additional settings are required. Netdata discovers all available cgroups on the host system and collects their metrics. -### how Netdata finds the available cgroups +### How Netdata finds the available cgroups Linux exposes resource usage reporting and provides dynamic configuration for cgroups, using virtual files (usually) under `/sys/fs/cgroup`. Netdata reads `/proc/self/mountinfo` to detect the exact mount point of cgroups. Netdata also @@ -43,7 +43,7 @@ allows manual configuration of this mount point, using these settings: Netdata rescans these directories for added or removed cgroups every `check for new cgroups every` seconds. -### hierarchical search for cgroups +### Hierarchical search for cgroups Since cgroups are hierarchical, for each of the directories shown above, Netdata walks through the subdirectories recursively searching for cgroups (each subdirectory is another cgroup). @@ -61,7 +61,7 @@ cgroups ([systemd services are monitored by Netdata](#monitoring-systemd-service desktop and remote user sessions), qemu virtual machines (child cgroups of virtual machines) and `init.scope`. All others are enabled. -### unified cgroups (cgroups v2) support +### Unified cgroups (cgroups v2) support Netdata automatically detects cgroups version. If detection fails Netdata assumes v1. To switch to v2 manually add: @@ -75,19 +75,19 @@ To switch to v2 manually add: Unified cgroups use same name pattern matching as v1 cgroups. `cgroup_enable_systemd_services_detailed_memory` is currently unsupported when using unified cgroups. -### enabled cgroups +### Enabled cgroups To provide a sane default, Netdata uses the following [pattern list](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md): -- checks the pattern against the path of the cgroup +- Checks the pattern against the path of the cgroup ```text [plugin:cgroups] enable by default cgroups matching = !*/init.scope *.scope !*/vcpu* !*/emulator !*.mount !*.partition !*.service !*.slice !*.swap !*.user !/ !/docker !/libvirt !/lxc !/lxc/*/ns !/lxc/*/ns/* !/machine !/qemu !/system !/systemd !/user * ``` -- checks the pattern against the name of the cgroup (as you see it on the dashboard) +- Checks the pattern against the name of the cgroup (as you see it on the dashboard) ```text [plugin:cgroups] @@ -120,10 +120,11 @@ container names. To do this, ensure `podman system service` is running and Netda to `/run/podman/podman.sock` (the default permissions as specified by upstream are `0600`, with owner `root`, so you will have to adjust the configuration). -[docker-socket-proxy](https://github.com/Tecnativa/docker-socket-proxy) can also be used to give Netdata restricted -access to the socket. Note that `PODMAN_HOST` in Netdata's environment should be set to the proxy's URL in this case. +[Docker Socket Proxy (HAProxy)](https://github.com/Tecnativa/docker-socket-proxy) or [CetusGuard](https://github.com/hectorm/cetusguard) +can also be used to give Netdata restricted access to the socket. Note that `PODMAN_HOST` in Netdata's environment should +be set to the proxy's URL in this case. -### charts with zero metrics +### Charts with zero metrics By default, Netdata will enable monitoring metrics only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be @@ -138,7 +139,7 @@ chart instead of `auto` to enable it permanently. For example: You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics for all internal Netdata plugins. -### alarms +### Alarms CPU and memory limits are watched and used to rise alarms. Memory usage for every cgroup is checked against `ram` and `ram+swap` limits. CPU usage for every cgroup is checked against `cpuset.cpus` and `cpu.cfs_period_us` + `cpu.cfs_quota_us` pair assigned for the cgroup. Configuration for the alarms is available in `health.d/cgroups.conf` @@ -190,7 +191,7 @@ Support per distribution: - Merged disk read operations - Merged disk write operations -### how to enable cgroup accounting on systemd systems that is by default disabled +### How to enable cgroup accounting on systemd systems that is by default disabled You can verify there is no accounting enabled, by running `systemd-cgtop`. The program will show only resources for cgroup `/`, but all services will show nothing. @@ -259,28 +260,17 @@ Which systemd services are monitored by Netdata is determined by the following p Netdata monitors containers automatically when it is installed at the host, or when it is installed in a container that has access to the `/proc` and `/sys` filesystems of the host. -Netdata prior to v1.6 had 2 issues when such containers were monitored: +Network interfaces and cgroups (containers) are self-cleaned. When a network interface or container stops, Netdata might log +a few errors in error.log complaining about files it cannot find, but immediately: -1. network interface alarms where triggering when containers were stopped - -2. charts were never cleaned up, so after some time dozens of containers were showing up on the dashboard, and they were - occupying memory. - -### the current Netdata - -network interfaces and cgroups (containers) are now self-cleaned. - -So, when a network interface or container stops, Netdata might log a few errors in error.log complaining about files it -cannot find, but immediately: - -1. it will detect this is a removed container or network interface -2. it will freeze/pause all alarms for them -3. it will mark their charts as obsolete -4. obsolete charts are not be offered on new dashboard sessions (so hit F5 and the charts are gone) -5. existing dashboard sessions will continue to see them, but of course they will not refresh -6. obsolete charts will be removed from memory, 1 hour after the last user viewed them (configurable +1. It will detect this is a removed container or network interface +2. It will freeze/pause all alarms for them +3. It will mark their charts as obsolete +4. Obsolete charts are not be offered on new dashboard sessions (so hit F5 and the charts are gone) +5. Existing dashboard sessions will continue to see them, but of course they will not refresh +6. Obsolete charts will be removed from memory, 1 hour after the last user viewed them (configurable with `[global].cleanup obsolete charts after seconds = 3600` (at `netdata.conf`). -7. when obsolete charts are removed from memory they are also deleted from disk (configurable +7. When obsolete charts are removed from memory they are also deleted from disk (configurable with `[global].delete obsolete charts files = yes`) ### Monitored container metrics diff --git a/collectors/cgroups.plugin/cgroup-name.sh b/collectors/cgroups.plugin/cgroup-name.sh index 55b02ac7..9a5812f3 100755 --- a/collectors/cgroups.plugin/cgroup-name.sh +++ b/collectors/cgroups.plugin/cgroup-name.sh @@ -47,11 +47,14 @@ fatal() { function parse_docker_like_inspect_output() { local output="${1}" - eval "$(grep -E "^(NOMAD_NAMESPACE|NOMAD_JOB_NAME|NOMAD_TASK_NAME|NOMAD_SHORT_ALLOC_ID|CONT_NAME)=" <<<"$output")" + eval "$(grep -E "^(NOMAD_NAMESPACE|NOMAD_JOB_NAME|NOMAD_TASK_NAME|NOMAD_SHORT_ALLOC_ID|CONT_NAME|IMAGE_NAME)=" <<<"$output")" if [ -n "$NOMAD_NAMESPACE" ] && [ -n "$NOMAD_JOB_NAME" ] && [ -n "$NOMAD_TASK_NAME" ] && [ -n "$NOMAD_SHORT_ALLOC_ID" ]; then - echo "${NOMAD_NAMESPACE}-${NOMAD_JOB_NAME}-${NOMAD_TASK_NAME}-${NOMAD_SHORT_ALLOC_ID}" + NAME="${NOMAD_NAMESPACE}-${NOMAD_JOB_NAME}-${NOMAD_TASK_NAME}-${NOMAD_SHORT_ALLOC_ID}" else - echo "${CONT_NAME}" | sed 's|^/||' + NAME=$(echo "${CONT_NAME}" | sed 's|^/||') + fi + if [ -n "${IMAGE_NAME}" ]; then + LABELS="image=\"${IMAGE_NAME}\"" fi } @@ -59,9 +62,9 @@ function docker_like_get_name_command() { local command="${1}" local id="${2}" info "Running command: ${command} inspect --format='{{range .Config.Env}}{{println .}}{{end}}CONT_NAME={{ .Name}}' \"${id}\"" - if OUTPUT="$(${command} inspect --format='{{range .Config.Env}}{{println .}}{{end}}CONT_NAME={{ .Name}}' "${id}")" && + if OUTPUT="$(${command} inspect --format='{{range .Config.Env}}{{println .}}{{end}}CONT_NAME={{ .Name}}{{println}}IMAGE_NAME={{ .Config.Image}}' "${id}")" && [ -n "$OUTPUT" ]; then - NAME="$(parse_docker_like_inspect_output "$OUTPUT")" + parse_docker_like_inspect_output "$OUTPUT" fi return 0 } @@ -85,8 +88,8 @@ function docker_like_get_name_api() { info "Running API command: curl \"${host}${path}\"" JSON=$(curl -sS "${host}${path}") fi - if OUTPUT=$(echo "${JSON}" | jq -r '.Config.Env[],"CONT_NAME=\(.Name)"') && [ -n "$OUTPUT" ]; then - NAME="$(parse_docker_like_inspect_output "$OUTPUT")" + if OUTPUT=$(echo "${JSON}" | jq -r '.Config.Env[],"CONT_NAME=\(.Name)","IMAGE_NAME=\(.Config.Image)"') && [ -n "$OUTPUT" ]; then + parse_docker_like_inspect_output "$OUTPUT" fi return 0 } @@ -303,8 +306,14 @@ function k8s_get_kubepod_name() { fi fi - url="https://$host/api/v1/pods" - [ -n "$MY_NODE_NAME" ] && url+="?fieldSelector=spec.nodeName==$MY_NODE_NAME" + local url + if [ -n "${USE_KUBELET_FOR_PODS_METADATA}" ]; then + url="${KUBELET_URL:-https://localhost:10250}/pods" + else + url="https://$host/api/v1/pods" + [ -n "$MY_NODE_NAME" ] && url+="?fieldSelector=spec.nodeName==$MY_NODE_NAME" + fi + # FIX: check HTTP response code if ! pods=$(curl --fail -sSk -H "$header" "$url" 2>&1); then warning "${fn}: error on curl '${url}': ${pods}." @@ -401,6 +410,10 @@ function k8s_get_kubepod_name() { # jq filter nonexistent field and nonexistent label value is 'null' if [[ $name =~ _null(_|$) ]]; then warning "${fn}: invalid name: $name (cgroup '$id')" + if [ -n "${USE_KUBELET_FOR_PODS_METADATA}" ]; then + # local data is cached and may not contain the correct id + return 2 + fi return 1 fi @@ -413,20 +426,25 @@ function k8s_get_name() { local fn="${FUNCNAME[0]}" local cgroup_path="${1}" local id="${2}" + local kubepod_name="" - NAME=$(k8s_get_kubepod_name "$cgroup_path" "$id") + kubepod_name=$(k8s_get_kubepod_name "$cgroup_path" "$id") case "$?" in 0) - NAME="k8s_${NAME}" + kubepod_name="k8s_${kubepod_name}" local name labels - name=${NAME%% *} - labels=${NAME#* } + name=${kubepod_name%% *} + labels=${kubepod_name#* } + if [ "$name" != "$labels" ]; then info "${fn}: cgroup '${id}' has chart name '${name}', labels '${labels}" + NAME="$name" + LABELS="$labels" else info "${fn}: cgroup '${id}' has chart name '${NAME}'" + NAME="$name" fi EXIT_CODE=$EXIT_SUCCESS ;; @@ -512,6 +530,7 @@ EXIT_RETRY=2 EXIT_DISABLE=3 EXIT_CODE=$EXIT_SUCCESS NAME= +LABELS= # ----------------------------------------------------------------------------- @@ -591,7 +610,13 @@ if [ -z "${NAME}" ]; then [ ${#NAME} -gt 100 ] && NAME="${NAME:0:100}" fi -info "cgroup '${CGROUP}' is called '${NAME}'" -echo "${NAME}" +NAME="${NAME// /_}" + +info "cgroup '${CGROUP}' is called '${NAME}', labels '${LABELS}'" +if [ -n "$LABELS" ]; then + echo "${NAME} ${LABELS}" +else + echo "${NAME}" +fi exit ${EXIT_CODE} diff --git a/collectors/cgroups.plugin/metrics.csv b/collectors/cgroups.plugin/metrics.csv new file mode 100644 index 00000000..aae057ba --- /dev/null +++ b/collectors/cgroups.plugin/metrics.csv @@ -0,0 +1,109 @@ +metric,scope,dimensions,unit,description,chart_type,labels,plugin,module +cgroup.cpu_limit,cgroup,used,percentage,"CPU Usage within the limits",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.cpu,cgroup,"user, system",percentage,"CPU Usage (100% = 1 core)",stacked,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.cpu_per_core,cgroup,a dimension per core,percentage,"CPU Usage (100% = 1 core) Per Core",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.throttled,cgroup,throttled,percentage,"CPU Throttled Runnable Periods",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.throttled_duration,cgroup,duration,ms,"CPU Throttled Time Duration",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.cpu_shares,cgroup,shares,shares,"CPU Time Relative Share",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.mem,cgroup,"cache, rss, swap, rss_huge, mapped_file",MiB,"Memory Usage",stacked,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.writeback,cgroup,"dirty, writeback",MiB,"Writeback Memory",area,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.mem_activity,cgroup,"in, out",MiB/s,"Memory Activity",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.pgfaults,cgroup,"pgfault, swap",MiB/s,"Memory Page Faults",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.mem_usage,cgroup,"ram, swap",MiB,"Used Memory",stacked,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.mem_usage_limit,cgroup,"available, used",MiB,"Used RAM within the limits",stacked,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.mem_utilization,cgroup,utilization,percentage,"Memory Utilization",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.mem_failcnt,cgroup,failures,count,"Memory Limit Failures",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.io,cgroup,"read, write",KiB/s,"I/O Bandwidth (all disks)",area,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.serviced_ops,cgroup,"read, write",operations/s,"Serviced I/O Operations (all disks)",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.throttle_io,cgroup,"read, write",KiB/s,"Throttle I/O Bandwidth (all disks)",area,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.throttle_serviced_ops,cgroup,"read, write",operations/s,"Throttle Serviced I/O Operations (all disks)",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.queued_ops,cgroup,"read, write",operations,"Queued I/O Operations (all disks)",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.merged_ops,cgroup,"read, write",operations/s,"Merged I/O Operations (all disks)",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.cpu_some_pressure,cgroup,"some10, some60, some300",percentage,"CPU some pressure",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.cpu_some_pressure_stall_time,cgroup,time,ms,"CPU some pressure stall time",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.cpu_full_pressure,cgroup,"some10, some60, some300",percentage,"CPU full pressure",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.cpu_full_pressure_stall_time,cgroup,time,ms,"CPU full pressure stall time",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.memory_some_pressure,cgroup,"some10, some60, some300",percentage,"Memory some pressure",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.memory_some_pressure_stall_time,cgroup,time,ms,"Memory some pressure stall time",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.memory_full_pressure,cgroup,"some10, some60, some300",percentage,"Memory full pressure",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.memory_full_pressure_stall_time,cgroup,time,ms,"Memory full pressure stall time",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.io_some_pressure,cgroup,"some10, some60, some300",percentage,"I/O some pressure",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.io_some_pressure_stall_time,cgroup,time,ms,"I/O some pressure stall time",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.io_full_pressure,cgroup,"some10, some60, some300",percentage,"I/O some pressure",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.io_full_pressure_stall_time,cgroup,time,ms,"I/O some pressure stall time",line,"container_name, image",cgroups.plugin,/sys/fs/cgroup +cgroup.net_net,"cgroup, network device","received, sent",kilobits/s,"Bandwidth",area,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev +cgroup.net_packets,"cgroup, network device","received, sent, multicast",pps,"Packets",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev +cgroup.net_errors,"cgroup, network device","inbound, outbound",errors/s,"Interface Errors",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev +cgroup.net_drops,"cgroup, network device","inbound, outbound",errors/s,"Interface Drops",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev +cgroup.net_fifo,"cgroup, network device","receive, transmit",errors/s,"Interface FIFO Buffer Errors",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev +cgroup.net_compressed,"cgroup, network device","receive, sent",pps,"Interface FIFO Buffer Errors",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev +cgroup.net_events,"cgroup, network device","frames, collisions, carrier",events/s,"Network Interface Events",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev +cgroup.net_operstate,"cgroup, network device","up, down, notpresent, lowerlayerdown, testing, dormant, unknown",state,"Interface Operational State",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev +cgroup.net_carrier,"cgroup, network device","up, down",state,"Interface Physical Link State",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev +cgroup.net_mtu,"cgroup, network device",mtu,octets,"Interface MTU",line,"container_name, image, device, interface_type",cgroups.plugin,/proc/net/dev +k8s.cgroup.cpu_limit,k8s cgroup,used,percentage,"CPU Usage within the limits",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.cpu,k8s cgroup,"user, system",percentage,"CPU Usage (100% = 1000 mCPU)",stacked,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.cpu_per_core,k8s cgroup,a dimension per core,percentage,"CPU Usage (100% = 1000 mCPU) Per Core",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.throttled,k8s cgroup,throttled,percentage,"CPU Throttled Runnable Periods",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.throttled_duration,k8s cgroup,duration,ms,"CPU Throttled Time Duration",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.cpu_shares,k8s cgroup,shares,shares,"CPU Time Relative Share",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.mem,k8s cgroup,"cache, rss, swap, rss_huge, mapped_file",MiB,"Memory Usage",stacked,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.writeback,k8s cgroup,"dirty, writeback",MiB,"Writeback Memory",area,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.mem_activity,k8s cgroup,"in, out",MiB/s,"Memory Activity",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.pgfaults,k8s cgroup,"pgfault, swap",MiB/s,"Memory Page Faults",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.mem_usage,k8s cgroup,"ram, swap",MiB,"Used Memory",stacked,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.mem_usage_limit,k8s cgroup,"available, used",MiB,"Used RAM within the limits",stacked,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.mem_utilization,k8s cgroup,utilization,percentage,"Memory Utilization",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.mem_failcnt,k8s cgroup,failures,count,"Memory Limit Failures",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.io,k8s cgroup,"read, write",KiB/s,"I/O Bandwidth (all disks)",area,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.serviced_ops,k8s cgroup,"read, write",operations/s,"Serviced I/O Operations (all disks)",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.throttle_io,k8s cgroup,"read, write",KiB/s,"Throttle I/O Bandwidth (all disks)",area,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.throttle_serviced_ops,k8s cgroup,"read, write",operations/s,"Throttle Serviced I/O Operations (all disks)",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.queued_ops,k8s cgroup,"read, write",operations,"Queued I/O Operations (all disks)",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.merged_ops,k8s cgroup,"read, write",operations/s,"Merged I/O Operations (all disks)",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.cpu_some_pressure,k8s cgroup,"some10, some60, some300",percentage,"CPU some pressure",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.cpu_some_pressure_stall_time,k8s cgroup,time,ms,"CPU some pressure stall time",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.cpu_full_pressure,k8s cgroup,"some10, some60, some300",percentage,"CPU full pressure",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.cpu_full_pressure_stall_time,k8s cgroup,time,ms,"CPU full pressure stall time",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.memory_some_pressure,k8s cgroup,"some10, some60, some300",percentage,"Memory some pressure",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.memory_some_pressure_stall_time,k8s cgroup,time,ms,"Memory some pressure stall time",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.memory_full_pressure,k8s cgroup,"some10, some60, some300",percentage,"Memory full pressure",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.memory_full_pressure_stall_time,k8s cgroup,time,ms,"Memory full pressure stall time",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.io_some_pressure,k8s cgroup,"some10, some60, some300",percentage,"I/O some pressure",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.io_some_pressure_stall_time,k8s cgroup,time,ms,"I/O some pressure stall time",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.io_full_pressure,k8s cgroup,"some10, some60, some300",percentage,"I/O some pressure",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.io_full_pressure_stall_time,k8s cgroup,time,ms,"I/O some pressure stall time",line,"k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/sys/fs/cgroup +k8s.cgroup.net_net,"k8s cgroup, network device","received, sent",kilobits/s,"Bandwidth",area,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev +k8s.cgroup.net_packets,"k8s cgroup, network device","received, sent, multicast",pps,"Packets",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev +k8s.cgroup.net_errors,"k8s cgroup, network device","inbound, outbound",errors/s,"Interface Errors",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev +k8s.cgroup.net_drops,"k8s cgroup, network device","inbound, outbound",errors/s,"Interface Drops",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev +k8s.cgroup.net_fifo,"k8s cgroup, network device","receive, transmit",errors/s,"Interface FIFO Buffer Errors",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev +k8s.cgroup.net_compressed,"k8s cgroup, network device","receive, sent",pps,"Interface FIFO Buffer Errors",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev +k8s.cgroup.net_events,"k8s cgroup, network device","frames, collisions, carrier",events/s,"Network Interface Events",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev +k8s.cgroup.net_operstate,"k8s cgroup, network device","up, down, notpresent, lowerlayerdown, testing, dormant, unknown",state,"Interface Operational State",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev +k8s.cgroup.net_carrier,"k8s cgroup, network device","up, down",state,"Interface Physical Link State",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev +k8s.cgroup.net_mtu,"k8s cgroup, network device",mtu,octets,"Interface MTU",line,"device, interface_type, k8s_namespace, k8s_pod_name, k8s_pod_uid, k8s_controller_kind, k8s_controller_name, k8s_node_name, k8s_container_name, k8s_container_id, k8s_kind, k8s_qos_class, k8s_cluster_id",cgroups.plugin,/proc/net/dev +services.cpu,,a dimension per systemd service,percentage,"Systemd Services CPU utilization (100% = 1 core)",stacked,,cgroups.plugin,systemd +services.mem_usage,,a dimension per systemd service,MiB,"Systemd Services Used Memory",stacked,,cgroups.plugin,systemd +services.mem_rss,,a dimension per systemd service,MiB,"Systemd Services RSS Memory",stacked,,cgroups.plugin,systemd +services.mem_mapped,,a dimension per systemd service,MiB,"Systemd Services Mapped Memory",stacked,,cgroups.plugin,systemd +services.mem_cache,,a dimension per systemd service,MiB,"Systemd Services Cache Memory",stacked,,cgroups.plugin,systemd +services.mem_writeback,,a dimension per systemd service,MiB,"Systemd Services Writeback Memory",stacked,,cgroups.plugin,systemd +services.mem_pgfault,,a dimension per systemd service,MiB/s,"Systemd Services Memory Minor Page Faults",stacked,,cgroups.plugin,systemd +services.mem_pgmajfault,,a dimension per systemd service,MiB/s,"Systemd Services Memory Major Page Faults",stacked,,cgroups.plugin,systemd +services.mem_pgpgin,,a dimension per systemd service,MiB/s,"Systemd Services Memory Charging Activity",stacked,,cgroups.plugin,systemd +services.mem_pgpgout,,a dimension per systemd service,MiB/s,"Systemd Services Memory Uncharging Activity",stacked,,cgroups.plugin,systemd +services.mem_failcnt,,a dimension per systemd service,failures,"Systemd Services Memory Limit Failures",stacked,,cgroups.plugin,systemd +services.swap_usage,,a dimension per systemd service,MiB,"Systemd Services Swap Memory Used",stacked,,cgroups.plugin,systemd +services.io_read,,a dimension per systemd service,KiB/s,"Systemd Services Disk Read Bandwidth",stacked,,cgroups.plugin,systemd +services.io_write,,a dimension per systemd service,KiB/s,"Systemd Services Disk Write Bandwidth",stacked,,cgroups.plugin,systemd +services.io_ops_read,,a dimension per systemd service,operations/s,"Systemd Services Disk Read Operations",stacked,,cgroups.plugin,systemd +services.io_ops_write,,a dimension per systemd service,operations/s,"Systemd Services Disk Write Operations",stacked,,cgroups.plugin,systemd +services.throttle_io_read,,a dimension per systemd service,KiB/s,"Systemd Services Throttle Disk Read Bandwidth",stacked,,cgroups.plugin,systemd +services.services.throttle_io_write,,a dimension per systemd service,KiB/s,"Systemd Services Throttle Disk Write Bandwidth",stacked,,cgroups.plugin,systemd +services.throttle_io_ops_read,,a dimension per systemd service,operations/s,"Systemd Services Throttle Disk Read Operations",stacked,,cgroups.plugin,systemd +throttle_io_ops_write,,a dimension per systemd service,operations/s,"Systemd Services Throttle Disk Write Operations",stacked,,cgroups.plugin,systemd +services.queued_io_ops_read,,a dimension per systemd service,operations/s,"Systemd Services Queued Disk Read Operations",stacked,,cgroups.plugin,systemd +services.queued_io_ops_write,,a dimension per systemd service,operations/s,"Systemd Services Queued Disk Write Operations",stacked,,cgroups.plugin,systemd +services.merged_io_ops_read,,a dimension per systemd service,operations/s,"Systemd Services Merged Disk Read Operations",stacked,,cgroups.plugin,systemd +services.merged_io_ops_write,,a dimension per systemd service,operations/s,"Systemd Services Merged Disk Write Operations",stacked,,cgroups.plugin,systemd
\ No newline at end of file diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index e63e042d..007d4245 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -449,70 +449,70 @@ void read_cgroup_plugin_configuration() { config_get("plugin:cgroups", "enable by default cgroups matching", // ---------------------------------------------------------------- - " !*/init.scope " // ignore init.scope - " !/system.slice/run-*.scope " // ignore system.slice/run-XXXX.scope - " *.scope " // we need all other *.scope for sure - - // ---------------------------------------------------------------- - - " /machine.slice/*.service " // #3367 systemd-nspawn - - // ---------------------------------------------------------------- - - " */kubepods/pod*/* " // k8s containers - " */kubepods/*/pod*/* " // k8s containers - " */*-kubepods-pod*/* " // k8s containers - " */*-kubepods-*-pod*/* " // k8s containers - " !*kubepods* !*kubelet* " // all other k8s cgroups - - // ---------------------------------------------------------------- - - " !*/vcpu* " // libvirtd adds these sub-cgroups - " !*/emulator " // libvirtd adds these sub-cgroups - " !*.mount " - " !*.partition " - " !*.service " - " !*.socket " - " !*.slice " - " !*.swap " - " !*.user " - " !/ " - " !/docker " - " !*/libvirt " - " !/lxc " - " !/lxc/*/* " // #1397 #2649 - " !/lxc.monitor* " - " !/lxc.pivot " - " !/lxc.payload " - " !/machine " - " !/qemu " - " !/system " - " !/systemd " - " !/user " - " * " // enable anything else - ), NULL, SIMPLE_PATTERN_EXACT); + " !*/init.scope " // ignore init.scope + " !/system.slice/run-*.scope " // ignore system.slice/run-XXXX.scope + " *.scope " // we need all other *.scope for sure + + // ---------------------------------------------------------------- + + " /machine.slice/*.service " // #3367 systemd-nspawn + + // ---------------------------------------------------------------- + + " */kubepods/pod*/* " // k8s containers + " */kubepods/*/pod*/* " // k8s containers + " */*-kubepods-pod*/* " // k8s containers + " */*-kubepods-*-pod*/* " // k8s containers + " !*kubepods* !*kubelet* " // all other k8s cgroups + + // ---------------------------------------------------------------- + + " !*/vcpu* " // libvirtd adds these sub-cgroups + " !*/emulator " // libvirtd adds these sub-cgroups + " !*.mount " + " !*.partition " + " !*.service " + " !*.socket " + " !*.slice " + " !*.swap " + " !*.user " + " !/ " + " !/docker " + " !*/libvirt " + " !/lxc " + " !/lxc/*/* " // #1397 #2649 + " !/lxc.monitor* " + " !/lxc.pivot " + " !/lxc.payload " + " !/machine " + " !/qemu " + " !/system " + " !/systemd " + " !/user " + " * " // enable anything else + ), NULL, SIMPLE_PATTERN_EXACT, true); enabled_cgroup_names = simple_pattern_create( config_get("plugin:cgroups", "enable by default cgroups names matching", - " * " - ), NULL, SIMPLE_PATTERN_EXACT); + " * " + ), NULL, SIMPLE_PATTERN_EXACT, true); search_cgroup_paths = simple_pattern_create( config_get("plugin:cgroups", "search for cgroups in subpaths matching", - " !*/init.scope " // ignore init.scope - " !*-qemu " // #345 - " !*.libvirt-qemu " // #3010 - " !/init.scope " - " !/system " - " !/systemd " - " !/user " - " !/user.slice " - " !/lxc/*/* " // #2161 #2649 - " !/lxc.monitor " - " !/lxc.payload/*/* " - " !/lxc.payload.* " - " * " - ), NULL, SIMPLE_PATTERN_EXACT); + " !*/init.scope " // ignore init.scope + " !*-qemu " // #345 + " !*.libvirt-qemu " // #3010 + " !/init.scope " + " !/system " + " !/systemd " + " !/user " + " !/user.slice " + " !/lxc/*/* " // #2161 #2649 + " !/lxc.monitor " + " !/lxc.payload/*/* " + " !/lxc.payload.* " + " * " + ), NULL, SIMPLE_PATTERN_EXACT, true); snprintfz(filename, FILENAME_MAX, "%s/cgroup-name.sh", netdata_configured_primary_plugins_dir); cgroups_rename_script = config_get("plugin:cgroups", "script to get cgroup names", filename); @@ -522,37 +522,37 @@ void read_cgroup_plugin_configuration() { enabled_cgroup_renames = simple_pattern_create( config_get("plugin:cgroups", "run script to rename cgroups matching", - " !/ " - " !*.mount " - " !*.socket " - " !*.partition " - " /machine.slice/*.service " // #3367 systemd-nspawn - " !*.service " - " !*.slice " - " !*.swap " - " !*.user " - " !init.scope " - " !*.scope/vcpu* " // libvirtd adds these sub-cgroups - " !*.scope/emulator " // libvirtd adds these sub-cgroups - " *.scope " - " *docker* " - " *lxc* " - " *qemu* " - " */kubepods/pod*/* " // k8s containers - " */kubepods/*/pod*/* " // k8s containers - " */*-kubepods-pod*/* " // k8s containers - " */*-kubepods-*-pod*/* " // k8s containers - " !*kubepods* !*kubelet* " // all other k8s cgroups - " *.libvirt-qemu " // #3010 - " * " - ), NULL, SIMPLE_PATTERN_EXACT); + " !/ " + " !*.mount " + " !*.socket " + " !*.partition " + " /machine.slice/*.service " // #3367 systemd-nspawn + " !*.service " + " !*.slice " + " !*.swap " + " !*.user " + " !init.scope " + " !*.scope/vcpu* " // libvirtd adds these sub-cgroups + " !*.scope/emulator " // libvirtd adds these sub-cgroups + " *.scope " + " *docker* " + " *lxc* " + " *qemu* " + " */kubepods/pod*/* " // k8s containers + " */kubepods/*/pod*/* " // k8s containers + " */*-kubepods-pod*/* " // k8s containers + " */*-kubepods-*-pod*/* " // k8s containers + " !*kubepods* !*kubelet* " // all other k8s cgroups + " *.libvirt-qemu " // #3010 + " * " + ), NULL, SIMPLE_PATTERN_EXACT, true); if(cgroup_enable_systemd_services) { systemd_services_cgroups = simple_pattern_create( config_get("plugin:cgroups", "cgroups to match as systemd services", - " !/system.slice/*/*.service " - " /system.slice/*.service " - ), NULL, SIMPLE_PATTERN_EXACT); + " !/system.slice/*/*.service " + " /system.slice/*.service " + ), NULL, SIMPLE_PATTERN_EXACT, true); } mountinfo_free_all(root); @@ -1089,10 +1089,10 @@ static inline void cgroup_read_cpuacct_stat(struct cpuacct_stat *cp) { uint32_t hash = simple_hash(s); if(unlikely(hash == user_hash && !strcmp(s, "user"))) - cp->user = str2ull(procfile_lineword(ff, i, 1)); + cp->user = str2ull(procfile_lineword(ff, i, 1), NULL); else if(unlikely(hash == system_hash && !strcmp(s, "system"))) - cp->system = str2ull(procfile_lineword(ff, i, 1)); + cp->system = str2ull(procfile_lineword(ff, i, 1), NULL); } cp->updated = 1; @@ -1138,11 +1138,11 @@ static inline void cgroup_read_cpuacct_cpu_stat(struct cpuacct_cpu_throttling *c uint32_t hash = simple_hash(s); if (unlikely(hash == nr_periods_hash && !strcmp(s, "nr_periods"))) { - cp->nr_periods = str2ull(procfile_lineword(ff, i, 1)); + cp->nr_periods = str2ull(procfile_lineword(ff, i, 1), NULL); } else if (unlikely(hash == nr_throttled_hash && !strcmp(s, "nr_throttled"))) { - cp->nr_throttled = str2ull(procfile_lineword(ff, i, 1)); + cp->nr_throttled = str2ull(procfile_lineword(ff, i, 1), NULL); } else if (unlikely(hash == throttled_time_hash && !strcmp(s, "throttled_time"))) { - cp->throttled_time = str2ull(procfile_lineword(ff, i, 1)); + cp->throttled_time = str2ull(procfile_lineword(ff, i, 1), NULL); } } cp->nr_throttled_perc = @@ -1195,15 +1195,15 @@ static inline void cgroup2_read_cpuacct_cpu_stat(struct cpuacct_stat *cp, struct uint32_t hash = simple_hash(s); if (unlikely(hash == user_usec_hash && !strcmp(s, "user_usec"))) { - cp->user = str2ull(procfile_lineword(ff, i, 1)); + cp->user = str2ull(procfile_lineword(ff, i, 1), NULL); } else if (unlikely(hash == system_usec_hash && !strcmp(s, "system_usec"))) { - cp->system = str2ull(procfile_lineword(ff, i, 1)); + cp->system = str2ull(procfile_lineword(ff, i, 1), NULL); } else if (unlikely(hash == nr_periods_hash && !strcmp(s, "nr_periods"))) { - cpt->nr_periods = str2ull(procfile_lineword(ff, i, 1)); + cpt->nr_periods = str2ull(procfile_lineword(ff, i, 1), NULL); } else if (unlikely(hash == nr_throttled_hash && !strcmp(s, "nr_throttled"))) { - cpt->nr_throttled = str2ull(procfile_lineword(ff, i, 1)); + cpt->nr_throttled = str2ull(procfile_lineword(ff, i, 1), NULL); } else if (unlikely(hash == throttled_usec_hash && !strcmp(s, "throttled_usec"))) { - cpt->throttled_time = str2ull(procfile_lineword(ff, i, 1)) * 1000; // usec -> ns + cpt->throttled_time = str2ull(procfile_lineword(ff, i, 1), NULL) * 1000; // usec -> ns } } cpt->nr_throttled_perc = @@ -1289,7 +1289,7 @@ static inline void cgroup_read_cpuacct_usage(struct cpuacct_usage *ca) { unsigned long long total = 0; for(i = 0; i < ca->cpus ;i++) { - unsigned long long n = str2ull(procfile_lineword(ff, 0, i)); + unsigned long long n = str2ull(procfile_lineword(ff, 0, i), NULL); ca->cpu_percpu[i] = n; total += n; } @@ -1346,10 +1346,10 @@ static inline void cgroup_read_blkio(struct blkio *io) { uint32_t hash = simple_hash(s); if(unlikely(hash == Read_hash && !strcmp(s, "Read"))) - io->Read += str2ull(procfile_lineword(ff, i, 2)); + io->Read += str2ull(procfile_lineword(ff, i, 2), NULL); else if(unlikely(hash == Write_hash && !strcmp(s, "Write"))) - io->Write += str2ull(procfile_lineword(ff, i, 2)); + io->Write += str2ull(procfile_lineword(ff, i, 2), NULL); /* else if(unlikely(hash == Sync_hash && !strcmp(s, "Sync"))) @@ -1409,8 +1409,8 @@ static inline void cgroup2_read_blkio(struct blkio *io, unsigned int word_offset io->Write = 0; for (i = 0; i < lines; i++) { - io->Read += str2ull(procfile_lineword(ff, i, 2 + word_offset)); - io->Write += str2ull(procfile_lineword(ff, i, 4 + word_offset)); + io->Read += str2ull(procfile_lineword(ff, i, 2 + word_offset), NULL); + io->Write += str2ull(procfile_lineword(ff, i, 4 + word_offset), NULL); } io->updated = 1; @@ -1452,13 +1452,13 @@ static inline void cgroup2_read_pressure(struct pressure *res) { res->some.share_time.value10 = strtod(procfile_lineword(ff, 0, 2), NULL); res->some.share_time.value60 = strtod(procfile_lineword(ff, 0, 4), NULL); res->some.share_time.value300 = strtod(procfile_lineword(ff, 0, 6), NULL); - res->some.total_time.value_total = str2ull(procfile_lineword(ff, 0, 8)) / 1000; // us->ms + res->some.total_time.value_total = str2ull(procfile_lineword(ff, 0, 8), NULL) / 1000; // us->ms if (lines > 2) { res->full.share_time.value10 = strtod(procfile_lineword(ff, 1, 2), NULL); res->full.share_time.value60 = strtod(procfile_lineword(ff, 1, 4), NULL); res->full.share_time.value300 = strtod(procfile_lineword(ff, 1, 6), NULL); - res->full.total_time.value_total = str2ull(procfile_lineword(ff, 1, 8)) / 1000; // us->ms + res->full.total_time.value_total = str2ull(procfile_lineword(ff, 1, 8), NULL) / 1000; // us->ms } res->updated = 1; @@ -1769,13 +1769,13 @@ static inline void substitute_dots_in_id(char *s) { // ---------------------------------------------------------------------------- // parse k8s labels -char *k8s_parse_resolved_name_and_labels(DICTIONARY *labels, char *data) { +char *cgroup_parse_resolved_name_and_labels(DICTIONARY *labels, char *data) { // the first word, up to the first space is the name - char *name = mystrsep(&data, " "); + char *name = strsep_skip_consecutive_separators(&data, " "); // the rest are key=value pairs separated by comma while(data) { - char *pair = mystrsep(&data, ","); + char *pair = strsep_skip_consecutive_separators(&data, ","); rrdlabels_add_pair(labels, pair, RRDLABEL_SRC_AUTO| RRDLABEL_SRC_K8S); } @@ -1898,19 +1898,21 @@ static inline void discovery_rename_cgroup(struct cgroup *cg) { break; } - if(cg->pending_renames || cg->processed) return; - if(!new_name || !*new_name || *new_name == '\n') return; - if(!(new_name = trim(new_name))) return; + if (cg->pending_renames || cg->processed) + return; + if (!new_name || !*new_name || *new_name == '\n') + return; + if (!(new_name = trim(new_name))) + return; char *name = new_name; - if (!strncmp(new_name, "k8s_", 4)) { - if(!cg->chart_labels) cg->chart_labels = rrdlabels_create(); - // read the new labels and remove the obsolete ones - rrdlabels_unmark_all(cg->chart_labels); - name = k8s_parse_resolved_name_and_labels(cg->chart_labels, new_name); - rrdlabels_remove_all_unmarked(cg->chart_labels); - } + if (!cg->chart_labels) + cg->chart_labels = rrdlabels_create(); + // read the new labels and remove the obsolete ones + rrdlabels_unmark_all(cg->chart_labels); + name = cgroup_parse_resolved_name_and_labels(cg->chart_labels, new_name); + rrdlabels_remove_all_unmarked(cg->chart_labels); freez(cg->chart_title); cg->chart_title = cgroup_title_strdupz(name); @@ -2713,6 +2715,16 @@ static inline void discovery_process_cgroup(struct cgroup *cg) { return; } + if (!cg->chart_labels) + cg->chart_labels = rrdlabels_create(); + + if (!k8s_is_kubepod(cg)) { + rrdlabels_add(cg->chart_labels, "cgroup_name", cg->chart_id, RRDLABEL_SRC_AUTO); + if (!dictionary_get(cg->chart_labels, "image")) { + rrdlabels_add(cg->chart_labels, "image", "", RRDLABEL_SRC_AUTO); + } + } + worker_is_busy(WORKER_DISCOVERY_PROCESS_NETWORK); read_cgroup_network_interfaces(cg); } @@ -2784,10 +2796,10 @@ void cgroup_discovery_worker(void *ptr) worker_register_job_name(WORKER_DISCOVERY_LOCK, "lock"); entrypoint_parent_process_comm = simple_pattern_create( - " runc:[* " // http://terenceli.github.io/%E6%8A%80%E6%9C%AF/2021/12/28/runc-internals-3) - " exe ", // https://github.com/falcosecurity/falco/blob/9d41b0a151b83693929d3a9c84f7c5c85d070d3a/rules/falco_rules.yaml#L1961 - NULL, - SIMPLE_PATTERN_EXACT); + " runc:[* " // http://terenceli.github.io/%E6%8A%80%E6%9C%AF/2021/12/28/runc-internals-3) + " exe ", // https://github.com/falcosecurity/falco/blob/9d41b0a151b83693929d3a9c84f7c5c85d070d3a/rules/falco_rules.yaml#L1961 + NULL, + SIMPLE_PATTERN_EXACT, true); while (service_running(SERVICE_COLLECTORS)) { worker_is_idle(); @@ -3566,14 +3578,14 @@ static inline void update_cpu_limits2(struct cgroup *cg) { return; } - cg->cpu_cfs_period = str2ull(procfile_lineword(ff, 0, 1)); + cg->cpu_cfs_period = str2ull(procfile_lineword(ff, 0, 1), NULL); cg->cpuset_cpus = get_system_cpus(); char *s = "max\n\0"; if(strcmp(s, procfile_lineword(ff, 0, 0)) == 0){ cg->cpu_cfs_quota = cg->cpu_cfs_period * cg->cpuset_cpus; } else { - cg->cpu_cfs_quota = str2ull(procfile_lineword(ff, 0, 0)); + cg->cpu_cfs_quota = str2ull(procfile_lineword(ff, 0, 0), NULL); } debug(D_CGROUP, "CPU limits values: %llu %llu %llu", cg->cpu_cfs_period, cg->cpuset_cpus, cg->cpu_cfs_quota); return; @@ -3623,7 +3635,7 @@ static inline int update_memory_limits(char **filename, const RRDSETVAR_ACQUIRED rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); return 1; } - *value = str2ull(buffer); + *value = str2ull(buffer, NULL); rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024))); return 1; } @@ -3676,7 +3688,10 @@ void update_cgroup_charts(int update_every) { if(likely(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES)) { if(unlikely(!cg->st_cpu)) { - snprintfz(title, CHART_TITLE_MAX, "CPU Usage (100%% = 1 core)"); + snprintfz( + title, + CHART_TITLE_MAX, + k8s_is_kubepod(cg) ? "CPU Usage (100%% = 1000 mCPU)" : "CPU Usage (100%% = 1 core)"); cg->st_cpu = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -3879,7 +3894,11 @@ void update_cgroup_charts(int update_every) { unsigned int i; if(unlikely(!cg->st_cpu_per_core)) { - snprintfz(title, CHART_TITLE_MAX, "CPU Usage (100%% = 1 core) Per Core"); + snprintfz( + title, + CHART_TITLE_MAX, + k8s_is_kubepod(cg) ? "CPU Usage (100%% = 1000 mCPU) Per Core" : + "CPU Usage (100%% = 1 core) Per Core"); cg->st_cpu_per_core = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -4111,7 +4130,7 @@ void update_cgroup_charts(int update_every) { if(likely(ff)) ff = procfile_readall(ff); if(likely(ff && procfile_lines(ff) && !strncmp(procfile_word(ff, 0), "MemTotal", 8))) - ram_total = str2ull(procfile_word(ff, 1)) * 1024; + ram_total = str2ull(procfile_word(ff, 1), NULL) * 1024; else { collector_error("Cannot read file %s. Will not update cgroup %s RAM limit anymore.", filename, cg->id); freez(cg->filename_memory_limit); @@ -4771,6 +4790,7 @@ static void cgroup_main_cleanup(void *ptr) { } if (shm_cgroup_ebpf.header) { + shm_cgroup_ebpf.header->cgroup_root_count = 0; munmap(shm_cgroup_ebpf.header, shm_cgroup_ebpf.header->body_length); } diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.h b/collectors/cgroups.plugin/sys_fs_cgroup.h index d1adf8a9..dc800ba9 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.h +++ b/collectors/cgroups.plugin/sys_fs_cgroup.h @@ -39,6 +39,6 @@ typedef struct netdata_ebpf_cgroup_shm { #include "../proc.plugin/plugin_proc.h" -char *k8s_parse_resolved_name_and_labels(DICTIONARY *labels, char *data); +char *cgroup_parse_resolved_name_and_labels(DICTIONARY *labels, char *data); #endif //NETDATA_SYS_FS_CGROUP_H diff --git a/collectors/cgroups.plugin/tests/test_cgroups_plugin.c b/collectors/cgroups.plugin/tests/test_cgroups_plugin.c index 25939a9c..a0f91530 100644 --- a/collectors/cgroups.plugin/tests/test_cgroups_plugin.c +++ b/collectors/cgroups.plugin/tests/test_cgroups_plugin.c @@ -33,7 +33,7 @@ static int read_label_callback(const char *name, const char *value, RRDLABEL_SRC return 1; } -static void test_k8s_parse_resolved_name(void **state) +static void test_cgroup_parse_resolved_name(void **state) { UNUSED(state); @@ -96,7 +96,7 @@ static void test_k8s_parse_resolved_name(void **state) for (int i = 0; test_data[i].data != NULL; i++) { char *data = strdup(test_data[i].data); - char *name = k8s_parse_resolved_name_and_labels(labels, data); + char *name = cgroup_parse_resolved_name_and_labels(labels, data); assert_string_equal(name, test_data[i].name); @@ -122,10 +122,10 @@ static void test_k8s_parse_resolved_name(void **state) int main(void) { const struct CMUnitTest tests[] = { - cmocka_unit_test(test_k8s_parse_resolved_name), + cmocka_unit_test(test_cgroup_parse_resolved_name), }; - int test_res = cmocka_run_group_tests_name("test_k8s_parse_resolved_name", tests, NULL, NULL); + int test_res = cmocka_run_group_tests_name("test_cgroup_parse_resolved_name", tests, NULL, NULL); return test_res; } |