diff options
Diffstat (limited to 'collectors/cgroups.plugin')
-rwxr-xr-x | collectors/cgroups.plugin/cgroup-name.sh.in | 33 | ||||
-rwxr-xr-x | collectors/cgroups.plugin/cgroup-network-helper.sh | 17 | ||||
-rw-r--r-- | collectors/cgroups.plugin/sys_fs_cgroup.c | 184 | ||||
-rw-r--r-- | collectors/cgroups.plugin/sys_fs_cgroup.h | 34 |
4 files changed, 231 insertions, 37 deletions
diff --git a/collectors/cgroups.plugin/cgroup-name.sh.in b/collectors/cgroups.plugin/cgroup-name.sh.in index 8ef8ab58e..1f31c49a7 100755 --- a/collectors/cgroups.plugin/cgroup-name.sh.in +++ b/collectors/cgroups.plugin/cgroup-name.sh.in @@ -118,24 +118,37 @@ function add_lbl_prefix() { # pod level cgroup name format: 'pod_<namespace>_<pod_name>' # container level cgroup name format: 'cntr_<namespace>_<pod_name>_<container_name>' function k8s_get_kubepod_name() { - # GKE /sys/fs/cgroup/*/ tree: + # GKE /sys/fs/cgroup/*/ (cri=docker, cgroups=v1): # |-- kubepods # | |-- burstable # | | |-- pod98cee708-023b-11eb-933d-42010a800193 # | | | |-- 922161c98e6ea450bf665226cdc64ca2aa3e889934c2cff0aec4325f8f78ac03 - # | | `-- a5d223eec35e00f5a1c6fa3e3a5faac6148cdc1f03a2e762e873b7efede012d7 # | `-- pode314bbac-d577-11ea-a171-42010a80013b # | |-- 7d505356b04507de7b710016d540b2759483ed5f9136bb01a80872b08f771930 - # | `-- 88ab4683b99cfa7cc8c5f503adf7987dd93a3faa7c4ce0d17d419962b3220d50 # - # Minikube (v1.8.2) /sys/fs/cgroup/*/ tree: + # GKE /sys/fs/cgroup/*/ (cri=containerd, cgroups=v1): + # |-- kubepods.slice + # | |-- kubepods-besteffort.slice + # | | |-- kubepods-besteffort-pode1465238_4518_4c21_832f_fd9f87033dad.slice + # | | | |-- cri-containerd-66be9b2efdf4d85288c319b8c1a2f50d2439b5617e36f45d9d0d0be1381113be.scope + # | `-- kubepods-pod91f5b561_369f_4103_8015_66391059996a.slice + # | |-- cri-containerd-24c53b774a586f06abc058619b47f71d9d869ac50c92898adbd199106fd0aaeb.scope + # + # GKE /sys/fs/cgroup/*/ (cri=crio, cgroups=v1): + # |-- kubepods.slice + # | |-- kubepods-besteffort.slice + # | | |-- kubepods-besteffort-podad412dfe_3589_4056_965a_592356172968.slice + # | | | |-- crio-77b019312fd9825828b70214b2c94da69c30621af2a7ee06f8beace4bc9439e5.scope + # + # Minikube (v1.8.2) /sys/fs/cgroup/*/ (cri=docker, cgroups=v1): # |-- kubepods.slice # | |-- kubepods-besteffort.slice # | | |-- kubepods-besteffort-pod10fb5647_c724_400c_b9cc_0e6eae3110e7.slice # | | | |-- docker-36e5eb5056dfdf6dbb75c0c44a1ecf23217fe2c50d606209d8130fcbb19fb5a7.scope - # | | | `-- docker-87e18c2323621cf0f635c53c798b926e33e9665c348c60d489eef31ee1bd38d7.scope # - # NOTE: cgroups plugin uses '_' to join dir names, so it is <parent>_<child>_<child>_... + # NOTE: cgroups plugin + # - uses '_' to join dir names (so it is <parent>_<child>_<child>_...) + # - replaces '.' with '-' local fn="${FUNCNAME[0]}" local id="${1}" @@ -157,9 +170,9 @@ function k8s_get_kubepod_name() { # kubepods_kubepods-<QOS_CLASS> name=${clean_id//-/_} name=${name/#kubepods_kubepods/kubepods} - elif [[ $clean_id =~ .+pod[a-f0-9_-]+_docker-([a-f0-9]+)$ ]]; then - # ...pod<POD_UID>_docker-<CONTAINER_ID> (POD_UID w/ "_") - cntr_id=${BASH_REMATCH[1]} + elif [[ $clean_id =~ .+pod[a-f0-9_-]+_(docker|crio|cri-containerd)-([a-f0-9]+)$ ]]; then + # ...pod<POD_UID>_(docker|crio|cri-containerd)-<CONTAINER_ID> (POD_UID w/ "_") + cntr_id=${BASH_REMATCH[2]} elif [[ $clean_id =~ .+pod[a-f0-9-]+_([a-f0-9]+)$ ]]; then # ...pod<POD_UID>_<CONTAINER_ID> cntr_id=${BASH_REMATCH[1]} @@ -252,7 +265,7 @@ function k8s_get_kubepod_name() { jq_filter+='container_name=\"\(.name)\",' jq_filter+='container_id=\"\(.containerID)\"' jq_filter+='") | ' - jq_filter+='sub("docker://";"")' # containerID: docker://a346da9bc0e3eaba6b295f64ac16e02f2190db2cef570835706a9e7a36e2c722 + jq_filter+='sub("(docker|cri-o|containerd)://";"")' # containerID: docker://a346da9bc0e3eaba6b295f64ac16e02f2190db2cef570835706a9e7a36e2c722 local containers if ! containers=$(jq -r "${jq_filter}" <<< "$pods" 2>&1); then diff --git a/collectors/cgroups.plugin/cgroup-network-helper.sh b/collectors/cgroups.plugin/cgroup-network-helper.sh index 1b60f452a..f355480b8 100755 --- a/collectors/cgroups.plugin/cgroup-network-helper.sh +++ b/collectors/cgroups.plugin/cgroup-network-helper.sh @@ -76,7 +76,7 @@ debug() { pid= cgroup= -while [ ! -z "${1}" ] +while [ -n "${1}" ] do case "${1}" in --cgroup) cgroup="${2}"; shift 1;; @@ -164,7 +164,7 @@ virsh_find_all_interfaces_for_cgroup() { # shellcheck disable=SC2230 virsh="$(which virsh 2>/dev/null || command -v virsh 2>/dev/null)" - if [ ! -z "${virsh}" ] + if [ -n "${virsh}" ] then local d d="$(virsh_cgroup_to_domain_name "${c}")" @@ -172,7 +172,7 @@ virsh_find_all_interfaces_for_cgroup() { # e.g.: vm01\x2dweb => vm01-web (https://github.com/netdata/netdata/issues/11088#issuecomment-832618149) d="$(printf '%b' "${d}")" - if [ ! -z "${d}" ] + if [ -n "${d}" ] then debug "running: virsh domiflist ${d}; to find the network interfaces" @@ -203,8 +203,11 @@ netnsid_find_all_interfaces_for_pid() { local pid="${1}" [ -z "${pid}" ] && return 1 - local nsid=$(lsns -t net -p ${pid} -o NETNSID -nr) - [ -z "${nsid}" -o "${nsid}" = "unassigned" ] && return 1 + local nsid + nsid=$(lsns -t net -p "${pid}" -o NETNSID -nr 2>/dev/null) + if [ -z "${nsid}" ] || [ "${nsid}" = "unassigned" ]; then + return 1 + fi set_source "netnsid" ip link show |\ @@ -234,14 +237,14 @@ netnsid_find_all_interfaces_for_cgroup() { find_all_interfaces_of_pid_or_cgroup() { local p="${1}" c="${2}" # the pid and the cgroup path - if [ ! -z "${pid}" ] + if [ -n "${pid}" ] then # we have been called with a pid proc_pid_fdinfo_iff "${p}" netnsid_find_all_interfaces_for_pid "${p}" - elif [ ! -z "${c}" ] + elif [ -n "${c}" ] then # we have been called with a cgroup diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index eea4d9ae7..92aa22c77 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -94,6 +94,11 @@ static struct cgroups_systemd_config_setting cgroups_systemd_options[] = { { .name = NULL, .setting = SYSTEMD_CGROUP_ERR }, }; +// Shared memory with information from detected cgroups +netdata_ebpf_cgroup_shm_t shm_cgroup_ebpf = {NULL, NULL}; +static int shm_fd_cgroup_ebpf = -1; +sem_t *shm_mutex_cgroup_ebpf = SEM_FAILED; + /* on Fed systemd is not in PATH for some reason */ #define SYSTEMD_CMD_RHEL "/usr/lib/systemd/systemd --version" #define SYSTEMD_HIERARCHY_STRING "default-hierarchy=" @@ -168,8 +173,6 @@ static enum cgroups_type cgroups_try_detect_version() if (!statfs(filename, &fsinfo)) { if (fsinfo.f_type == CGROUP2_SUPER_MAGIC) return CGROUPS_V2; - if (fsinfo.f_type == CGROUP_SUPER_MAGIC) - return CGROUPS_V1; } #endif @@ -463,6 +466,61 @@ void read_cgroup_plugin_configuration() { mountinfo_free_all(root); } +void netdata_cgroup_ebpf_set_values(size_t length) +{ + sem_wait(shm_mutex_cgroup_ebpf); + + shm_cgroup_ebpf.header->cgroup_max = cgroup_root_max; + shm_cgroup_ebpf.header->systemd_enabled = cgroup_enable_systemd_services | + cgroup_enable_systemd_services_detailed_memory | + cgroup_used_memory; + shm_cgroup_ebpf.header->body_length = length; + + sem_post(shm_mutex_cgroup_ebpf); +} + +void netdata_cgroup_ebpf_initialize_shm() +{ + shm_fd_cgroup_ebpf = shm_open(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME, O_CREAT | O_RDWR, 0660); + if (shm_fd_cgroup_ebpf < 0) { + error("Cannot initialize shared memory used by cgroup and eBPF, integration won't happen."); + return; + } + + size_t length = sizeof(netdata_ebpf_cgroup_shm_header_t) + cgroup_root_max * sizeof(netdata_ebpf_cgroup_shm_body_t); + if (ftruncate(shm_fd_cgroup_ebpf, length)) { + error("Cannot set size for shared memory."); + goto end_init_shm; + } + + shm_cgroup_ebpf.header = (netdata_ebpf_cgroup_shm_header_t *) mmap(NULL, length, + PROT_READ | PROT_WRITE, MAP_SHARED, + shm_fd_cgroup_ebpf, 0); + + if (!shm_cgroup_ebpf.header) { + error("Cannot map shared memory used between cgroup and eBPF, integration won't happen"); + goto end_init_shm; + } + shm_cgroup_ebpf.body = (netdata_ebpf_cgroup_shm_body_t *) ((char *)shm_cgroup_ebpf.header + + sizeof(netdata_ebpf_cgroup_shm_header_t)); + + shm_mutex_cgroup_ebpf = sem_open(NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME, O_CREAT, + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH, 1); + + if (shm_mutex_cgroup_ebpf != SEM_FAILED) { + netdata_cgroup_ebpf_set_values(length); + return; + } + + error("Cannot create semaphore, integration between eBPF and cgroup won't happen"); + munmap(shm_cgroup_ebpf.header, length); + +end_init_shm: + close(shm_fd_cgroup_ebpf); + shm_fd_cgroup_ebpf = -1; + shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME); +} + // ---------------------------------------------------------------------------- // cgroup objects @@ -597,10 +655,6 @@ struct cgroup_network_interface { struct cgroup_network_interface *next; }; -#define CGROUP_OPTIONS_DISABLED_DUPLICATE 0x00000001 -#define CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE 0x00000002 -#define CGROUP_OPTIONS_IS_UNIFIED 0x00000004 - // *** WARNING *** The fields are not thread safe. Take care of safe usage. struct cgroup { uint32_t options; @@ -609,6 +663,7 @@ struct cgroup { char enabled; // enabled in the config char pending_renames; + char *intermediate_id; // TODO: remove it when the renaming script is fixed char *id; uint32_t hash; @@ -1313,13 +1368,16 @@ static inline char *cgroup_chart_id_strdupz(const char *s) { char *r = strdupz(s); netdata_fix_chart_id(r); + return r; +} + +// TODO: move the code to cgroup_chart_id_strdupz() when the renaming script is fixed +static inline void substitute_dots_in_id(char *s) { // dots are used to distinguish chart type and id in streaming, so we should replace them - for (char *d = r; *d; d++) { + for (char *d = s; *d; d++) { if (*d == '.') *d = '-'; } - - return r; } char *parse_k8s_data(struct label **labels, char *data) @@ -1357,7 +1415,8 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) { pid_t cgroup_pid; char command[CGROUP_CHARTID_LINE_MAX + 1]; - snprintfz(command, CGROUP_CHARTID_LINE_MAX, "exec %s '%s'", cgroups_rename_script, cg->chart_id); + // TODO: use cg->id when the renaming script is fixed + snprintfz(command, CGROUP_CHARTID_LINE_MAX, "exec %s '%s'", cgroups_rename_script, cg->intermediate_id); debug(D_CGROUP, "executing command \"%s\" for cgroup '%s'", command, cg->chart_id); FILE *fp = mypopen(command, &cgroup_pid); @@ -1394,6 +1453,7 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) { freez(cg->chart_id); cg->chart_id = cgroup_chart_id_strdupz(name); + substitute_dots_in_id(cg->chart_id); cg->hash_chart = simple_hash(cg->chart_id); } } @@ -1420,7 +1480,10 @@ static inline struct cgroup *cgroup_add(const char *id) { cg->chart_title = cgroup_title_strdupz(id); + cg->intermediate_id = cgroup_chart_id_strdupz(id); + cg->chart_id = cgroup_chart_id_strdupz(id); + substitute_dots_in_id(cg->chart_id); cg->hash_chart = simple_hash(cg->chart_id); if(cgroup_use_unified_cgroups) cg->options |= CGROUP_OPTIONS_IS_UNIFIED; @@ -1461,10 +1524,6 @@ static inline struct cgroup *cgroup_add(const char *id) { strncpy(buffer, cg->id, CGROUP_CHARTID_LINE_MAX); char *s = buffer; - //freez(cg->chart_id); - //cg->chart_id = cgroup_chart_id_strdupz(s); - //cg->hash_chart = simple_hash(cg->chart_id); - // skip to the last slash size_t len = strlen(s); while(len--) if(unlikely(s[len] == '/')) break; @@ -1588,6 +1647,7 @@ static inline void cgroup_free(struct cgroup *cg) { free_pressure(&cg->memory_pressure); freez(cg->id); + freez(cg->intermediate_id); freez(cg->chart_id); freez(cg->chart_title); @@ -2056,6 +2116,69 @@ static inline void copy_discovered_cgroups() cgroup_root = discovered_cgroup_root; } +static void is_there_cgroup_procs(netdata_ebpf_cgroup_shm_body_t *out, char *id) +{ + struct stat buf; + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_cpuset_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_blkio_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_memory_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_devices_base, id); + if (likely(stat(out->path, &buf) == 0)) { + return; + } + + out->path[0] = '\0'; + out->enabled = 0; +} + +static inline void share_cgroups() +{ + struct cgroup *cg; + int count; + struct stat buf; + + if (shm_mutex_cgroup_ebpf == SEM_FAILED) { + return; + } + sem_wait(shm_mutex_cgroup_ebpf); + + for (cg = cgroup_root, count = 0; cg ; cg = cg->next, count++) { + netdata_ebpf_cgroup_shm_body_t *ptr = &shm_cgroup_ebpf.body[count]; + char *prefix = (cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE) ? "" : "cgroup_"; + snprintfz(ptr->name, CGROUP_EBPF_NAME_SHARED_LENGTH - 1, "%s%s", prefix, cg->chart_title); + ptr->hash = simple_hash(ptr->name); + ptr->options = cg->options; + ptr->enabled = cg->enabled; + if (cgroup_use_unified_cgroups) { + snprintfz(ptr->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_unified_base, cg->id); + if (likely(stat(ptr->path, &buf) == -1)) { + ptr->path[0] = '\0'; + ptr->enabled = 0; + } + } else { + is_there_cgroup_procs(ptr, cg->id); + } + + debug(D_CGROUP, "cgroup shared: NAME=%s, ENABLED=%d", ptr->name, ptr->enabled); + } + + shm_cgroup_ebpf.header->cgroup_root_count = count; + sem_post(shm_mutex_cgroup_ebpf); +} + static inline void find_all_cgroups() { debug(D_CGROUP, "searching for cgroups"); @@ -2112,6 +2235,8 @@ static inline void find_all_cgroups() { copy_discovered_cgroups(); uv_mutex_unlock(&cgroup_root_mutex); + share_cgroups(); + debug(D_CGROUP, "done searching for cgroups"); } @@ -2743,7 +2868,7 @@ void update_systemd_services_charts( if(unlikely(!cg->rd_mem_detailed_rss)) cg->rd_mem_detailed_rss = rrddim_add(st_mem_detailed_rss, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_set_by_pointer(st_mem_detailed_rss, cg->rd_mem_detailed_rss, cg->memory.total_rss + cg->memory.total_rss_huge); + rrddim_set_by_pointer(st_mem_detailed_rss, cg->rd_mem_detailed_rss, cg->memory.total_rss); if(unlikely(!cg->rd_mem_detailed_mapped)) cg->rd_mem_detailed_mapped = rrddim_add(st_mem_detailed_mapped, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); @@ -2792,7 +2917,15 @@ void update_systemd_services_charts( if(unlikely(!cg->rd_swap_usage)) cg->rd_swap_usage = rrddim_add(st_swap_usage, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); - rrddim_set_by_pointer(st_swap_usage, cg->rd_swap_usage, cg->memory.msw_usage_in_bytes); + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { + rrddim_set_by_pointer( + st_swap_usage, + cg->rd_swap_usage, + cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ? + cg->memory.msw_usage_in_bytes - (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) : 0); + } else { + rrddim_set_by_pointer(st_swap_usage, cg->rd_swap_usage, cg->memory.msw_usage_in_bytes); + } } if(likely(do_io && cg->io_service_bytes.updated)) { @@ -3482,8 +3615,8 @@ void update_cgroup_charts(int update_every) { rrddim_set( cg->st_mem_usage, "swap", - (cg->memory.msw_usage_in_bytes > cg->memory.usage_in_bytes) ? - cg->memory.msw_usage_in_bytes - cg->memory.usage_in_bytes : 0); + cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ? + cg->memory.msw_usage_in_bytes - (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) : 0); } else { rrddim_set(cg->st_mem_usage, "swap", cg->memory.msw_usage_in_bytes); } @@ -4022,6 +4155,18 @@ static void cgroup_main_cleanup(void *ptr) { sleep_usec(step); } + if (shm_mutex_cgroup_ebpf != SEM_FAILED) { + sem_close(shm_mutex_cgroup_ebpf); + } + + if (shm_cgroup_ebpf.header) { + munmap(shm_cgroup_ebpf.header, shm_cgroup_ebpf.header->body_length); + } + + if (shm_fd_cgroup_ebpf > 0) { + close(shm_fd_cgroup_ebpf); + } + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; } @@ -4034,6 +4179,7 @@ void *cgroups_main(void *ptr) { int vdo_cpu_netdata = config_get_boolean("plugin:cgroups", "cgroups plugin resource charts", 1); read_cgroup_plugin_configuration(); + netdata_cgroup_ebpf_initialize_shm(); RRDSET *stcpu_thread = NULL; @@ -4057,7 +4203,7 @@ void *cgroups_main(void *ptr) { int error = uv_thread_create(&discovery_thread.thread, cgroup_discovery_worker, NULL); if (error) { - error("CGROUP: cannot create tread worker. uv_thread_create(): %s", uv_strerror(error)); + error("CGROUP: cannot create thread worker. uv_thread_create(): %s", uv_strerror(error)); goto exit; } uv_thread_set_name_np(discovery_thread.thread, "PLUGIN[cgroups]"); diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.h b/collectors/cgroups.plugin/sys_fs_cgroup.h index 155330ff1..017aa8fb5 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.h +++ b/collectors/cgroups.plugin/sys_fs_cgroup.h @@ -3,7 +3,7 @@ #ifndef NETDATA_SYS_FS_CGROUP_H #define NETDATA_SYS_FS_CGROUP_H 1 -#include "../../daemon/common.h" +#include "daemon/common.h" #if (TARGET_OS == OS_LINUX) @@ -20,6 +20,38 @@ extern void *cgroups_main(void *ptr); +#define CGROUP_OPTIONS_DISABLED_DUPLICATE 0x00000001 +#define CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE 0x00000002 +#define CGROUP_OPTIONS_IS_UNIFIED 0x00000004 + +typedef struct netdata_ebpf_cgroup_shm_header { + int cgroup_root_count; + int cgroup_max; + int systemd_enabled; + int __pad; + size_t body_length; +} netdata_ebpf_cgroup_shm_header_t; + +#define CGROUP_EBPF_NAME_SHARED_LENGTH 256 + +typedef struct netdata_ebpf_cgroup_shm_body { + // Considering what is exposed in this link https://en.wikipedia.org/wiki/Comparison_of_file_systems#Limits + // this length is enough to store what we want. + char name[CGROUP_EBPF_NAME_SHARED_LENGTH]; + uint32_t hash; + uint32_t options; + int enabled; + char path[FILENAME_MAX + 1]; +} netdata_ebpf_cgroup_shm_body_t; + +typedef struct netdata_ebpf_cgroup_shm { + netdata_ebpf_cgroup_shm_header_t *header; + netdata_ebpf_cgroup_shm_body_t *body; +} netdata_ebpf_cgroup_shm_t; + +#define NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME "netdata_shm_cgroup_ebpf" +#define NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME "/netdata_sem_cgroup_ebpf" + #include "../proc.plugin/plugin_proc.h" #else // (TARGET_OS == OS_LINUX) |