From bb8713bbc1c4594366fc735c04910edbf4c61aab Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 31 Mar 2021 14:59:21 +0200 Subject: Merging upstream version 1.30.0. Signed-off-by: Daniel Baumann --- collectors/ebpf.plugin/ebpf_cachestat.c | 655 ++++++++++++++++++++++++++++++++ 1 file changed, 655 insertions(+) create mode 100644 collectors/ebpf.plugin/ebpf_cachestat.c (limited to 'collectors/ebpf.plugin/ebpf_cachestat.c') diff --git a/collectors/ebpf.plugin/ebpf_cachestat.c b/collectors/ebpf.plugin/ebpf_cachestat.c new file mode 100644 index 000000000..6516d4da2 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_cachestat.c @@ -0,0 +1,655 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_cachestat.h" + +static ebpf_data_t cachestat_data; +netdata_publish_cachestat_t **cachestat_pid; + +static struct bpf_link **probe_links = NULL; +static struct bpf_object *objects = NULL; + +static char *cachestat_counter_dimension_name[NETDATA_CACHESTAT_END] = { "ratio", "dirty", "hit", + "miss" }; +static netdata_syscall_stat_t cachestat_counter_aggregated_data[NETDATA_CACHESTAT_END]; +static netdata_publish_syscall_t cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_END]; + +netdata_cachestat_pid_t *cachestat_vector = NULL; + +static netdata_idx_t *cachestat_hash_values = NULL; + +static int read_thread_closed = 1; + +struct netdata_static_thread cachestat_threads = {"CACHESTAT KERNEL", + NULL, NULL, 1, NULL, + NULL, NULL}; + +static int *map_fd = NULL; + +struct config cachestat_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +/***************************************************************** + * + * FUNCTIONS TO CLOSE THE THREAD + * + *****************************************************************/ + +/** + * Clean PID structures + * + * Clean the allocated structures. + */ +static void clean_pid_structures() { + struct pid_stat *pids = root_of_pids; + while (pids) { + freez(cachestat_pid[pids->pid]); + + pids = pids->next; + } +} + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void ebpf_cachestat_cleanup(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) + return; + + heartbeat_t hb; + heartbeat_init(&hb); + uint32_t tick = 2*USEC_PER_MS; + while (!read_thread_closed) { + usec_t dt = heartbeat_next(&hb, tick); + UNUSED(dt); + } + + clean_pid_structures(); + freez(cachestat_pid); + + ebpf_cleanup_publish_syscall(cachestat_counter_publish_aggregated); + + freez(cachestat_vector); + freez(cachestat_hash_values); + + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); +} + +/***************************************************************** + * + * COMMON FUNCTIONS + * + *****************************************************************/ + +/** + * Update publish + * + * Update publish values before to write dimension. + * + * @param out strcuture that will receive data. + * @param mpa calls for mark_page_accessed during the last second. + * @param mbd calls for mark_buffer_dirty during the last second. + * @param apcl calls for add_to_page_cache_lru during the last second. + * @param apd calls for account_page_dirtied during the last second. + */ +void cachestat_update_publish(netdata_publish_cachestat_t *out, uint64_t mpa, uint64_t mbd, + uint64_t apcl, uint64_t apd) +{ + // Adapted algorithm from https://github.com/iovisor/bcc/blob/master/tools/cachestat.py#L126-L138 + calculated_number total = (calculated_number) (((long long)mpa) - ((long long)mbd)); + if (total < 0) + total = 0; + + calculated_number misses = (calculated_number) ( ((long long) apcl) - ((long long) apd) ); + if (misses < 0) + misses = 0; + + // If hits are < 0, then its possible misses are overestimate due to possibly page cache read ahead adding + // more pages than needed. In this case just assume misses as total and reset hits. + calculated_number hits = total - misses; + if (hits < 0 ) { + misses = total; + hits = 0; + } + + calculated_number ratio = (total > 0) ? hits/total : 0; + + out->ratio = (long long )(ratio*100); + out->hit = (long long)hits; + out->miss = (long long)misses; +} + +/** + * Save previous values + * + * Save values used this time. + * + * @param publish + */ +static void save_previous_values(netdata_publish_cachestat_t *publish) { + publish->prev.mark_page_accessed = cachestat_hash_values[NETDATA_KEY_CALLS_MARK_PAGE_ACCESSED]; + publish->prev.account_page_dirtied = cachestat_hash_values[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED]; + publish->prev.add_to_page_cache_lru = cachestat_hash_values[NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU]; + publish->prev.mark_buffer_dirty = cachestat_hash_values[NETDATA_KEY_CALLS_MARK_BUFFER_DIRTY]; +} + +/** + * Calculate statistics + * + * @param publish the structure where we will store the data. + */ +static void calculate_stats(netdata_publish_cachestat_t *publish) { + if (!publish->prev.mark_page_accessed) { + save_previous_values(publish); + return; + } + + uint64_t mpa = cachestat_hash_values[NETDATA_KEY_CALLS_MARK_PAGE_ACCESSED] - publish->prev.mark_page_accessed; + uint64_t mbd = cachestat_hash_values[NETDATA_KEY_CALLS_MARK_BUFFER_DIRTY] - publish->prev.mark_buffer_dirty; + uint64_t apcl = cachestat_hash_values[NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU] - publish->prev.add_to_page_cache_lru; + uint64_t apd = cachestat_hash_values[NETDATA_KEY_CALLS_ACCOUNT_PAGE_DIRTIED] - publish->prev.account_page_dirtied; + + save_previous_values(publish); + + // We are changing the original algorithm to have a smooth ratio. + cachestat_update_publish(publish, mpa, mbd, apcl, apd); +} + + +/***************************************************************** + * + * APPS + * + *****************************************************************/ + +/** + * Apps Accumulator + * + * Sum all values read from kernel and store in the first address. + * + * @param out the vector with read values. + */ +static void cachestat_apps_accumulator(netdata_cachestat_pid_t *out) +{ + int i, end = (running_on_kernel >= NETDATA_KERNEL_V4_15) ? ebpf_nprocs : 1; + netdata_cachestat_pid_t *total = &out[0]; + for (i = 1; i < end; i++) { + netdata_cachestat_pid_t *w = &out[i]; + total->account_page_dirtied += w->account_page_dirtied; + total->add_to_page_cache_lru += w->add_to_page_cache_lru; + total->mark_buffer_dirty += w->mark_buffer_dirty; + total->mark_page_accessed += w->mark_page_accessed; + } +} + +/** + * Save Pid values + * + * Save the current values inside the structure + * + * @param out vector used to plot charts + * @param publish vector with values read from hash tables. + */ +static inline void cachestat_save_pid_values(netdata_publish_cachestat_t *out, netdata_cachestat_pid_t *publish) +{ + if (!out->current.mark_page_accessed) { + memcpy(&out->current, &publish[0], sizeof(netdata_cachestat_pid_t)); + return; + } + + memcpy(&out->prev, &out->current, sizeof(netdata_cachestat_pid_t)); + memcpy(&out->current, &publish[0], sizeof(netdata_cachestat_pid_t)); +} + +/** + * Fill PID + * + * Fill PID structures + * + * @param current_pid pid that we are collecting data + * @param out values read from hash tables; + */ +static void cachestat_fill_pid(uint32_t current_pid, netdata_cachestat_pid_t *publish) +{ + netdata_publish_cachestat_t *curr = cachestat_pid[current_pid]; + if (!curr) { + curr = callocz(1, sizeof(netdata_publish_cachestat_t)); + cachestat_pid[current_pid] = curr; + + cachestat_save_pid_values(curr, publish); + return; + } + + cachestat_save_pid_values(curr, publish); +} + +/** + * Read APPS table + * + * Read the apps table and store data inside the structure. + */ +static void read_apps_table() +{ + netdata_cachestat_pid_t *cv = cachestat_vector; + uint32_t key; + struct pid_stat *pids = root_of_pids; + int fd = map_fd[NETDATA_CACHESTAT_PID_STATS]; + size_t length = sizeof(netdata_cachestat_pid_t)*ebpf_nprocs; + while (pids) { + key = pids->pid; + + if (bpf_map_lookup_elem(fd, &key, cv)) { + pids = pids->next; + continue; + } + + cachestat_apps_accumulator(cv); + + cachestat_fill_pid(key, cv); + + // We are cleaning to avoid passing data read from one process to other. + memset(cv, 0, length); + + pids = pids->next; + } +} + +/** + * Create apps charts + * + * Call ebpf_create_chart to create the charts on apps submenu. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *ptr) +{ + UNUSED(em); + struct target *root = ptr; + ebpf_create_charts_on_apps(NETDATA_CACHESTAT_HIT_RATIO_CHART, + "The ratio is calculated dividing the Hit pages per total cache accesses without counting dirties.", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_APPS_CACHESTAT_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20090, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root); + + ebpf_create_charts_on_apps(NETDATA_CACHESTAT_DIRTY_CHART, + "Number of pages marked as dirty. When a page is called dirty, this means that the data stored inside the page needs to be written to devices.", + EBPF_CACHESTAT_DIMENSION_PAGE, + NETDATA_APPS_CACHESTAT_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20091, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], + root); + + ebpf_create_charts_on_apps(NETDATA_CACHESTAT_HIT_CHART, + "Number of cache access without counting dirty pages and page additions.", + EBPF_CACHESTAT_DIMENSION_HITS, + NETDATA_APPS_CACHESTAT_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20092, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root); + + ebpf_create_charts_on_apps(NETDATA_CACHESTAT_MISSES_CHART, + "Page caches added without counting dirty pages", + EBPF_CACHESTAT_DIMENSION_MISSES, + NETDATA_APPS_CACHESTAT_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + 20093, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX], + root); +} + +/***************************************************************** + * + * MAIN LOOP + * + *****************************************************************/ + +/** + * Read global counter + * + * Read the table with number of calls for all functions + */ +static void read_global_table() +{ + uint32_t idx; + netdata_idx_t *val = cachestat_hash_values; + netdata_idx_t stored; + int fd = map_fd[NETDATA_CACHESTAT_GLOBAL_STATS]; + + for (idx = NETDATA_KEY_CALLS_ADD_TO_PAGE_CACHE_LRU; idx < NETDATA_CACHESTAT_END; idx++) { + if (!bpf_map_lookup_elem(fd, &idx, &stored)) { + val[idx] = stored; + } + } +} + +/** + * Socket read hash + * + * This is the thread callback. + * This thread is necessary, because we cannot freeze the whole plugin to read the data on very busy socket. + * + * @param ptr It is a NULL value for this thread. + * + * @return It always returns NULL. + */ +void *ebpf_cachestat_read_hash(void *ptr) +{ + read_thread_closed = 0; + + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_LATENCY_CACHESTAT_SLEEP_MS * em->update_time; + int apps = em->apps_charts; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + (void)dt; + + read_global_table(); + + if (apps) + read_apps_table(); + } + read_thread_closed = 1; + + return NULL; +} + +/** + * Send global + * + * Send global charts to Netdata + */ +static void cachestat_send_global(netdata_publish_cachestat_t *publish) +{ + calculate_stats(publish); + + netdata_publish_syscall_t *ptr = cachestat_counter_publish_aggregated; + // The algorithm sets this value to zero sometimes, we are not written them to have a smooth chart + if (publish->ratio) { + ebpf_one_dimension_write_charts( + NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_RATIO_CHART, ptr[NETDATA_CACHESTAT_IDX_RATIO].dimension, + publish->ratio); + } + + ebpf_one_dimension_write_charts( + NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_DIRTY_CHART, ptr[NETDATA_CACHESTAT_IDX_DIRTY].dimension, + cachestat_hash_values[NETDATA_KEY_CALLS_MARK_BUFFER_DIRTY]); + + ebpf_one_dimension_write_charts( + NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_CHART, ptr[NETDATA_CACHESTAT_IDX_HIT].dimension, publish->hit); + + ebpf_one_dimension_write_charts( + NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_MISSES_CHART, ptr[NETDATA_CACHESTAT_IDX_MISS].dimension, + publish->miss); +} + +/** + * Cachestat sum PIDs + * + * Sum values for all PIDs associated to a group + * + * @param publish output structure. + * @param root structure with listed IPs + */ +void ebpf_cachestat_sum_pids(netdata_publish_cachestat_t *publish, struct pid_on_target *root) +{ + memcpy(&publish->prev, &publish->current,sizeof(publish->current)); + memset(&publish->current, 0, sizeof(publish->current)); + + netdata_cachestat_pid_t *dst = &publish->current; + while (root) { + int32_t pid = root->pid; + netdata_publish_cachestat_t *w = cachestat_pid[pid]; + if (w) { + netdata_cachestat_pid_t *src = &w->current; + dst->account_page_dirtied += src->account_page_dirtied; + dst->add_to_page_cache_lru += src->add_to_page_cache_lru; + dst->mark_buffer_dirty += src->mark_buffer_dirty; + dst->mark_page_accessed += src->mark_page_accessed; + } + + root = root->next; + } +} + +/** + * Send data to Netdata calling auxiliar functions. + * + * @param root the target list. +*/ +void ebpf_cache_send_apps_data(struct target *root) +{ + struct target *w; + collected_number value; + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_HIT_RATIO_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + ebpf_cachestat_sum_pids(&w->cachestat, w->root_pid); + netdata_cachestat_pid_t *current = &w->cachestat.current; + netdata_cachestat_pid_t *prev = &w->cachestat.prev; + + uint64_t mpa = current->mark_page_accessed - prev->mark_page_accessed; + uint64_t mbd = current->mark_buffer_dirty - prev->mark_buffer_dirty; + w->cachestat.dirty = current->mark_buffer_dirty; + uint64_t apcl = current->add_to_page_cache_lru - prev->add_to_page_cache_lru; + uint64_t apd = current->account_page_dirtied - prev->account_page_dirtied; + + cachestat_update_publish(&w->cachestat, mpa, mbd, apcl, apd); + value = (collected_number) w->cachestat.ratio; + // Here we are using different approach to have a chart more smooth + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_DIRTY_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = (collected_number) w->cachestat.dirty; + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_HIT_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = (collected_number) w->cachestat.hit; + write_chart_dimension(w->name, value); + } + } + write_end_chart(); + + write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_MISSES_CHART); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) { + value = (collected_number) w->cachestat.miss; + write_chart_dimension(w->name, value); + } + } + write_end_chart(); +} + +/** +* Main loop for this collector. +*/ +static void cachestat_collector(ebpf_module_t *em) +{ + cachestat_threads.thread = mallocz(sizeof(netdata_thread_t)); + cachestat_threads.start_routine = ebpf_cachestat_read_hash; + + map_fd = cachestat_data.map_fd; + + netdata_thread_create(cachestat_threads.thread, cachestat_threads.name, NETDATA_THREAD_OPTION_JOINABLE, + ebpf_cachestat_read_hash, em); + + netdata_publish_cachestat_t publish; + memset(&publish, 0, sizeof(publish)); + int apps = em->apps_charts; + while (!close_ebpf_plugin) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + + pthread_mutex_lock(&lock); + + cachestat_send_global(&publish); + + if (apps) + ebpf_cache_send_apps_data(apps_groups_root_target); + + pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * + * INITIALIZE THREAD + * + *****************************************************************/ + +/** + * Create global charts + * + * Call ebpf_create_chart to create the charts for the collector. + */ +static void ebpf_create_memory_charts() +{ + ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit is calculating using total cache added without dirties per total added because of red misses.", + EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21100, + ebpf_create_global_dimension, + cachestat_counter_publish_aggregated, 1); + + ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages added to the page cache.", + EBPF_CACHESTAT_DIMENSION_PAGE, NETDATA_CACHESTAT_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21101, + ebpf_create_global_dimension, + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_DIRTY], 1); + + ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_HIT_CHART, + "Hits are function calls that Netdata counts.", + EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21102, + ebpf_create_global_dimension, + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_HIT], 1); + + ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_CACHESTAT_MISSES_CHART, + "Misses are function calls that Netdata counts.", + EBPF_CACHESTAT_DIMENSION_MISSES, NETDATA_CACHESTAT_SUBMENU, + NULL, + NETDATA_EBPF_CHART_TYPE_LINE, + 21103, + ebpf_create_global_dimension, + &cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_MISS], 1); + + fflush(stdout); +} + +/** + * Allocate vectors used with this thread. + * + * We are not testing the return, because callocz does this and shutdown the software + * case it was not possible to allocate. + * + * @param length is the length for the vectors used inside the collector. + */ +static void ebpf_cachestat_allocate_global_vectors(size_t length) +{ + cachestat_pid = callocz((size_t)pid_max, sizeof(netdata_publish_cachestat_t *)); + cachestat_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_cachestat_pid_t)); + + cachestat_hash_values = callocz(length, sizeof(netdata_idx_t)); + + memset(cachestat_counter_aggregated_data, 0, length * sizeof(netdata_syscall_stat_t)); + memset(cachestat_counter_publish_aggregated, 0, length * sizeof(netdata_publish_syscall_t)); +} + +/***************************************************************** + * + * MAIN THREAD + * + *****************************************************************/ + +/** + * Cachestat thread + * + * Thread used to make cachestat thread + * + * @param ptr a pointer to `struct ebpf_module` + * + * @return It always return NULL + */ +void *ebpf_cachestat_thread(void *ptr) +{ + netdata_thread_cleanup_push(ebpf_cachestat_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + fill_ebpf_data(&cachestat_data); + + ebpf_update_module(em, &cachestat_config, NETDATA_CACHESTAT_CONFIG_FILE); + + if (!em->enabled) + goto endcachestat; + + pthread_mutex_lock(&lock); + ebpf_cachestat_allocate_global_vectors(NETDATA_CACHESTAT_END); + if (ebpf_update_kernel(&cachestat_data)) { + pthread_mutex_unlock(&lock); + goto endcachestat; + } + + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects, cachestat_data.map_fd); + if (!probe_links) { + pthread_mutex_unlock(&lock); + goto endcachestat; + } + + int algorithms[NETDATA_CACHESTAT_END] = { + NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_ABSOLUTE_IDX, NETDATA_EBPF_ABSOLUTE_IDX + }; + + ebpf_global_labels(cachestat_counter_aggregated_data, cachestat_counter_publish_aggregated, + cachestat_counter_dimension_name, cachestat_counter_dimension_name, + algorithms, NETDATA_CACHESTAT_END); + + ebpf_create_memory_charts(); + + pthread_mutex_unlock(&lock); + + cachestat_collector(em); + +endcachestat: + netdata_thread_cleanup_pop(1); + return NULL; +} -- cgit v1.2.3