diff options
Diffstat (limited to '')
-rw-r--r-- | collectors/ebpf.plugin/ebpf_mdflush.c | 456 |
1 files changed, 456 insertions, 0 deletions
diff --git a/collectors/ebpf.plugin/ebpf_mdflush.c b/collectors/ebpf.plugin/ebpf_mdflush.c new file mode 100644 index 00000000..fe33ff6a --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_mdflush.c @@ -0,0 +1,456 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_mdflush.h" + +struct config mdflush_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +#define MDFLUSH_MAP_COUNT 0 +static ebpf_local_maps_t mdflush_maps[] = { + { + .name = "tbl_mdflush", + .internal_input = 1024, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED, +#ifdef LIBBPF_MAJOR_VERSION + .map_type = BPF_MAP_TYPE_PERCPU_HASH +#endif + }, + /* end */ + { + .name = NULL, + .internal_input = 0, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + } +}; + +netdata_ebpf_targets_t mdflush_targets[] = { {.name = "md_flush_request", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; + + +// store for "published" data from the reader thread, which the collector +// thread will write to netdata agent. +static avl_tree_lock mdflush_pub; + +// tmp store for mdflush values we get from a per-CPU eBPF map. +static mdflush_ebpf_val_t *mdflush_ebpf_vals = NULL; + +#ifdef LIBBPF_MAJOR_VERSION +/** + * Disable probes + * + * Disable probes to use trampolines. + * + * @param obj the loaded object structure. + */ +static inline void ebpf_disable_probes(struct mdflush_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_md_flush_request_kprobe, false); +} + +/** + * Disable trampolines + * + * Disable trampoliness to use probes. + * + * @param obj the loaded object structure. + */ +static inline void ebpf_disable_trampoline(struct mdflush_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_md_flush_request_fentry, false); +} + +/** + * Set Trampoline + * + * Define target to attach trampoline + * + * @param obj the loaded object structure. + */ +static void ebpf_set_trampoline_target(struct mdflush_bpf *obj) +{ + bpf_program__set_attach_target(obj->progs.netdata_md_flush_request_fentry, 0, + mdflush_targets[NETDATA_MD_FLUSH_REQUEST].name); +} + +/** + * Load probe + * + * Load probe to monitor internal function. + * + * @param obj the loaded object structure. + */ +static inline int ebpf_load_probes(struct mdflush_bpf *obj) +{ + obj->links.netdata_md_flush_request_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_md_flush_request_kprobe, + false, + mdflush_targets[NETDATA_MD_FLUSH_REQUEST].name); + return libbpf_get_error(obj->links.netdata_md_flush_request_kprobe); +} + +/** + * Load and Attach + * + * Load and attach bpf codes according user selection. + * + * @param obj the loaded object structure. + * @param em the structure with configuration + */ +static inline int ebpf_mdflush_load_and_attach(struct mdflush_bpf *obj, ebpf_module_t *em) +{ + int mode = em->targets[NETDATA_MD_FLUSH_REQUEST].mode; + if (mode == EBPF_LOAD_TRAMPOLINE) { // trampoline + ebpf_disable_probes(obj); + + ebpf_set_trampoline_target(obj); + } else // kprobe + ebpf_disable_trampoline(obj); + + int ret = mdflush_bpf__load(obj); + if (ret) { + fprintf(stderr, "failed to load BPF object: %d\n", ret); + return -1; + } + + if (mode == EBPF_LOAD_TRAMPOLINE) + ret = mdflush_bpf__attach(obj); + else + ret = ebpf_load_probes(obj); + + return ret; +} + +#endif + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_mdflush_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete("mdstat", + "mdstat_flush", + "", + "MD flushes", + "flushes", + "flush (eBPF)", + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + NETDATA_CHART_PRIO_MDSTAT_FLUSH, + em->update_every); +} + +/** + * MDflush exit + * + * Cancel thread and exit. + * + * @param ptr thread data. + */ +static void mdflush_exit(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + + ebpf_obsolete_mdflush_global(em); + + pthread_mutex_unlock(&lock); + fflush(stdout); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + + if (em->objects) { + ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&ebpf_exit_cleanup); +} + +/** + * Compare mdflush values. + * + * @param a `netdata_mdflush_t *`. + * @param b `netdata_mdflush_t *`. + * + * @return 0 if a==b, 1 if a>b, -1 if a<b. +*/ +static int mdflush_val_cmp(void *a, void *b) +{ + netdata_mdflush_t *ptr1 = a; + netdata_mdflush_t *ptr2 = b; + + if (ptr1->unit > ptr2->unit) { + return 1; + } + else if (ptr1->unit < ptr2->unit) { + return -1; + } + else { + return 0; + } +} + +/** + * Read count map + * + * Read the hash table and store data to allocated vectors. + * + * @param maps_per_core do I need to read all cores? + */ +static void mdflush_read_count_map(int maps_per_core) +{ + int mapfd = mdflush_maps[MDFLUSH_MAP_COUNT].map_fd; + mdflush_ebpf_key_t curr_key = (uint32_t)-1; + mdflush_ebpf_key_t key = (uint32_t)-1; + netdata_mdflush_t search_v; + netdata_mdflush_t *v = NULL; + + while (bpf_map_get_next_key(mapfd, &curr_key, &key) == 0) { + curr_key = key; + + // get val for this key. + int test = bpf_map_lookup_elem(mapfd, &key, mdflush_ebpf_vals); + if (unlikely(test < 0)) { + continue; + } + + // is this record saved yet? + // + // if not, make a new one, mark it as unsaved for now, and continue; we + // will insert it at the end after all of its values are correctly set, + // so that we can safely publish it to the collector within a single, + // short locked operation. + // + // otherwise simply continue; we will only update the flush count, + // which can be republished safely without a lock. + // + // NOTE: lock isn't strictly necessary for this initial search, as only + // this thread does writing, but the AVL is using a read-write lock so + // there is no congestion. + bool v_is_new = false; + search_v.unit = key; + v = (netdata_mdflush_t *)avl_search_lock( + &mdflush_pub, + (avl_t *)&search_v + ); + if (unlikely(v == NULL)) { + // flush count can only be added reliably at a later time. + // when they're added, only then will we AVL insert. + v = callocz(1, sizeof(netdata_mdflush_t)); + v->unit = key; + sprintf(v->disk_name, "md%u", key); + v->dim_exists = false; + + v_is_new = true; + } + + // we must add up count value for this record across all CPUs. + uint64_t total_cnt = 0; + int i; + int end = (!maps_per_core) ? 1 : ebpf_nprocs; + for (i = 0; i < end; i++) { + total_cnt += mdflush_ebpf_vals[i]; + } + + // can now safely publish count for existing records. + v->cnt = total_cnt; + + // can now safely publish new record. + if (v_is_new) { + avl_t *check = avl_insert_lock(&mdflush_pub, (avl_t *)v); + if (check != (avl_t *)v) { + netdata_log_error("Internal error, cannot insert the AVL tree."); + } + } + } +} + +static void mdflush_create_charts(int update_every) +{ + ebpf_create_chart( + "mdstat", + "mdstat_flush", + "MD flushes", + "flushes", + "flush (eBPF)", + "md.flush", + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CHART_PRIO_MDSTAT_FLUSH, + NULL, NULL, 0, update_every, + NETDATA_EBPF_MODULE_NAME_MDFLUSH + ); + + fflush(stdout); +} + +// callback for avl tree traversal on `mdflush_pub`. +static int mdflush_write_dims(void *entry, void *data) +{ + UNUSED(data); + + netdata_mdflush_t *v = entry; + + // records get dynamically added in, so add the dim if we haven't yet. + if (!v->dim_exists) { + ebpf_write_global_dimension( + v->disk_name, v->disk_name, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX] + ); + v->dim_exists = true; + } + + write_chart_dimension(v->disk_name, v->cnt); + + return 1; +} + +/** +* Main loop for this collector. +*/ +static void mdflush_collector(ebpf_module_t *em) +{ + mdflush_ebpf_vals = callocz(ebpf_nprocs, sizeof(mdflush_ebpf_val_t)); + + int update_every = em->update_every; + avl_init_lock(&mdflush_pub, mdflush_val_cmp); + + // create chart and static dims. + pthread_mutex_lock(&lock); + mdflush_create_charts(update_every); + ebpf_update_stats(&plugin_statistics, em); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); + pthread_mutex_unlock(&lock); + + // loop and read from published data until ebpf plugin is closed. + heartbeat_t hb; + heartbeat_init(&hb); + int counter = update_every - 1; + int maps_per_core = em->maps_per_core; + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_plugin_exit && running_time < lifetime) { + (void)heartbeat_next(&hb, USEC_PER_SEC); + + if (ebpf_plugin_exit || ++counter != update_every) + continue; + + counter = 0; + mdflush_read_count_map(maps_per_core); + pthread_mutex_lock(&lock); + // write dims now for all hitherto discovered devices. + ebpf_write_begin_chart("mdstat", "mdstat_flush", ""); + avl_traverse_lock(&mdflush_pub, mdflush_write_dims, NULL); + ebpf_write_end_chart(); + + pthread_mutex_unlock(&lock); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); + } +} + +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_mdflush_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + mdflush_bpf_obj = mdflush_bpf__open(); + if (!mdflush_bpf_obj) + ret = -1; + else { + ret = ebpf_mdflush_load_and_attach(mdflush_bpf_obj, em); + if (ret && em->targets[NETDATA_MD_FLUSH_REQUEST].mode == EBPF_LOAD_TRAMPOLINE) { + mdflush_bpf__destroy(mdflush_bpf_obj); + mdflush_bpf_obj = mdflush_bpf__open(); + if (!mdflush_bpf_obj) + ret = -1; + else { + em->targets[NETDATA_MD_FLUSH_REQUEST].mode = EBPF_LOAD_PROBE; + ret = ebpf_mdflush_load_and_attach(mdflush_bpf_obj, em); + } + } + } + } +#endif + + return ret; +} + + +/** + * mdflush thread. + * + * @param ptr a `ebpf_module_t *`. + * @return always NULL. + */ +void *ebpf_mdflush_thread(void *ptr) +{ + netdata_thread_cleanup_push(mdflush_exit, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = mdflush_maps; + + char *md_flush_request = ebpf_find_symbol("md_flush_request"); + if (!md_flush_request) { + netdata_log_error("Cannot monitor MD devices, because md is not loaded."); + goto endmdflush; + } + +#ifdef LIBBPF_MAJOR_VERSION + ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); + ebpf_adjust_thread_load(em, default_btf); +#endif + if (ebpf_mdflush_load_bpf(em)) { + netdata_log_error("Cannot load eBPF software."); + goto endmdflush; + } + + mdflush_collector(em); + +endmdflush: + freez(md_flush_request); + ebpf_update_disabled_plugin_stats(em); + + netdata_thread_cleanup_pop(1); + + return NULL; +} |