diff options
Diffstat (limited to 'collectors')
161 files changed, 29639 insertions, 3248 deletions
diff --git a/collectors/all.h b/collectors/all.h index 653729bb..0ce40c75 100644 --- a/collectors/all.h +++ b/collectors/all.h @@ -51,6 +51,7 @@ #define NETDATA_CHART_PRIO_SYSTEM_INTERRUPTS 1000 #define NETDATA_CHART_PRIO_SYSTEM_DEV_INTR 1000 // freebsd only #define NETDATA_CHART_PRIO_SYSTEM_SOFT_INTR 1100 // freebsd only +#define NETDATA_CHART_PRIO_SYSTEM_FILES_NR 1000 #define NETDATA_CHART_PRIO_SYSTEM_ENTROPY 1000 #define NETDATA_CHART_PRIO_SYSTEM_UPTIME 1000 #define NETDATA_CHART_PRIO_CLOCK_SYNC_STATE 1100 @@ -395,16 +396,17 @@ #define ML_CHART_PRIO_DETECTOR_EVENTS 39183 // [netdata.ml] charts -#define NETDATA_ML_CHART_PRIO_MACHINE_LEARNING_STATUS 890001 -#define NETDATA_ML_CHART_PRIO_METRIC_TYPES 890002 -#define NETDATA_ML_CHART_PRIO_TRAINING_STATUS 890003 +#define NETDATA_ML_CHART_RUNNING 890001 +#define NETDATA_ML_CHART_PRIO_MACHINE_LEARNING_STATUS 890002 +#define NETDATA_ML_CHART_PRIO_METRIC_TYPES 890003 +#define NETDATA_ML_CHART_PRIO_TRAINING_STATUS 890004 -#define NETDATA_ML_CHART_PRIO_PREDICTION_USAGE 890004 -#define NETDATA_ML_CHART_PRIO_TRAINING_USAGE 890005 +#define NETDATA_ML_CHART_PRIO_PREDICTION_USAGE 890005 +#define NETDATA_ML_CHART_PRIO_TRAINING_USAGE 890006 -#define NETDATA_ML_CHART_PRIO_QUEUE_STATS 890006 -#define NETDATA_ML_CHART_PRIO_TRAINING_TIME_STATS 890007 -#define NETDATA_ML_CHART_PRIO_TRAINING_RESULTS 890008 +#define NETDATA_ML_CHART_PRIO_QUEUE_STATS 890007 +#define NETDATA_ML_CHART_PRIO_TRAINING_TIME_STATS 890008 +#define NETDATA_ML_CHART_PRIO_TRAINING_RESULTS 890009 #define NETDATA_ML_CHART_FAMILY "machine learning" #define NETDATA_ML_PLUGIN "ml.plugin" diff --git a/collectors/apps.plugin/apps_plugin.c b/collectors/apps.plugin/apps_plugin.c index 3132b224..105c5426 100644 --- a/collectors/apps.plugin/apps_plugin.c +++ b/collectors/apps.plugin/apps_plugin.c @@ -2,7 +2,7 @@ /* * netdata apps.plugin - * (C) Copyright 2016-2017 Costa Tsaousis <costa@tsaousis.gr> + * (C) Copyright 2023 Netdata Inc. * Released under GPL v3+ */ @@ -106,6 +106,7 @@ static int enable_file_charts = 1, max_fds_cache_seconds = 60, #endif + enable_function_cmdline = 0, enable_detailed_uptime_charts = 0, enable_users_charts = 1, enable_groups_charts = 1, @@ -140,6 +141,17 @@ static const char *proc_states[] = { // internal flags // handled in code (automatically set) +// log each problem once per process +// log flood protection flags (log_thrown) +typedef enum __attribute__((packed)) { + PID_LOG_IO = (1 << 0), + PID_LOG_STATUS = (1 << 1), + PID_LOG_CMDLINE = (1 << 2), + PID_LOG_FDS = (1 << 3), + PID_LOG_STAT = (1 << 4), + PID_LOG_LIMITS = (1 << 5), +} PID_LOG; + static int show_guest_time = 0, // 1 when guest values are collected show_guest_time_old = 0, @@ -168,7 +180,7 @@ static size_t // the metrics. This results in utilization that exceeds the total utilization // of the system. // -// With normalization we align the per-process utilization, to the total of +// During normalization, we align the per-process utilization, to the total of // the system. We first consume the exited children utilization and it the // collected values is above the total, we proportionally scale each reported // metric. @@ -210,6 +222,27 @@ struct openfds { kernel_uint_t other; }; +#define pid_openfds_sum(p) ((p)->openfds.files + (p)->openfds.pipes + (p)->openfds.sockets + (p)->openfds.inotifies + (p)->openfds.eventfds + (p)->openfds.timerfds + (p)->openfds.signalfds + (p)->openfds.eventpolls + (p)->openfds.other) + +struct pid_limits { +// kernel_uint_t max_cpu_time; +// kernel_uint_t max_file_size; +// kernel_uint_t max_data_size; +// kernel_uint_t max_stack_size; +// kernel_uint_t max_core_file_size; +// kernel_uint_t max_resident_set; +// kernel_uint_t max_processes; + kernel_uint_t max_open_files; +// kernel_uint_t max_locked_memory; +// kernel_uint_t max_address_space; +// kernel_uint_t max_file_locks; +// kernel_uint_t max_pending_signals; +// kernel_uint_t max_msgqueue_size; +// kernel_uint_t max_nice_priority; +// kernel_uint_t max_realtime_priority; +// kernel_uint_t max_realtime_timeout; +}; + // ---------------------------------------------------------------------------- // target // @@ -267,6 +300,8 @@ struct target { struct openfds openfds; + NETDATA_DOUBLE max_open_files_percent; + kernel_uint_t starttime; kernel_uint_t collected_starttime; kernel_uint_t uptime_min; @@ -317,12 +352,6 @@ struct pid_fd { struct pid_stat { int32_t pid; - char comm[MAX_COMPARE_NAME + 1]; - char *cmdline; - - uint32_t log_thrown; - - char state; int32_t ppid; // int32_t pgrp; // int32_t session; @@ -330,6 +359,11 @@ struct pid_stat { // int32_t tpgid; // uint64_t flags; + char state; + + char comm[MAX_COMPARE_NAME + 1]; + char *cmdline; + // these are raw values collected kernel_uint_t minflt_raw; kernel_uint_t cminflt_raw; @@ -414,22 +448,30 @@ struct pid_stat { kernel_uint_t io_storage_bytes_written; kernel_uint_t io_cancelled_write_bytes; + kernel_uint_t uptime; + struct pid_fd *fds; // array of fds it uses size_t fds_size; // the size of the fds array struct openfds openfds; + struct pid_limits limits; - int children_count; // number of processes directly referencing this - unsigned char keep:1; // 1 when we need to keep this process in memory even after it exited - int keeploops; // increases by 1 every time keep is 1 and updated 0 - unsigned char updated:1; // 1 when the process is currently running - unsigned char merged:1; // 1 when it has been merged to its parent - unsigned char read:1; // 1 when we have already read this process for this iteration + NETDATA_DOUBLE openfds_limits_percent; int sortlist; // higher numbers = top on the process tree // each process gets a unique number + int children_count; // number of processes directly referencing this + int keeploops; // increases by 1 every time keep is 1 and updated 0 + + PID_LOG log_thrown; + + bool keep; // true when we need to keep this process in memory even after it exited + bool updated; // true when the process is currently running + bool merged; // true when it has been merged to its parent + bool read; // true when we have already read this process for this iteration bool matched_by_config; + struct target *target; // app_groups.conf targets struct target *user_target; // uid based targets struct target *group_target; // gid based targets @@ -439,8 +481,7 @@ struct pid_stat { usec_t io_collected_usec; usec_t last_io_collected_usec; - - kernel_uint_t uptime; + usec_t last_limits_collected_usec; char *fds_dirname; // the full directory name in /proc/PID/fd @@ -448,6 +489,7 @@ struct pid_stat { char *status_filename; char *io_filename; char *cmdline_filename; + char *limits_filename; struct pid_stat *parent; struct pid_stat *prev; @@ -458,14 +500,6 @@ size_t pagesize; kernel_uint_t global_uptime; -// log each problem once per process -// log flood protection flags (log_thrown) -#define PID_LOG_IO 0x00000001 -#define PID_LOG_STATUS 0x00000002 -#define PID_LOG_CMDLINE 0x00000004 -#define PID_LOG_FDS 0x00000008 -#define PID_LOG_STAT 0x00000010 - static struct pid_stat *root_of_pids = NULL, // global list of all processes running **all_pids = NULL; // to avoid allocations, we pre-allocate @@ -524,8 +558,6 @@ static int all_files_len = 0, all_files_size = 0; -long currentmaxfds = 0; - // ---------------------------------------------------------------------------- // read users and groups from files @@ -665,7 +697,7 @@ int read_user_or_group_ids(struct user_or_group_ids *ids, struct timespec *last_ } else { if(unlikely(avl_insert(&ids->index, (avl_t *) user_or_group_id) != (void *) user_or_group_id)) { - error("INTERNAL ERROR: duplicate indexing of id during realloc"); + netdata_log_error("INTERNAL ERROR: duplicate indexing of id during realloc"); }; user_or_group_id->next = ids->root; @@ -681,7 +713,7 @@ int read_user_or_group_ids(struct user_or_group_ids *ids, struct timespec *last_ while(user_or_group_id) { if(unlikely(!user_or_group_id->updated)) { if(unlikely((struct user_or_group_id *)avl_remove(&ids->index, (avl_t *) user_or_group_id) != user_or_group_id)) - error("INTERNAL ERROR: removal of unused id from index, removed a different id"); + netdata_log_error("INTERNAL ERROR: removal of unused id from index, removed a different id"); if(prev_user_id) prev_user_id->next = user_or_group_id->next; @@ -946,7 +978,7 @@ static int read_apps_groups_conf(const char *path, const char *file) // add this target struct target *n = get_apps_groups_target(s, w, name); if(!n) { - error("Cannot create target '%s' (line %zu, word %zu)", s, line, word); + netdata_log_error("Cannot create target '%s' (line %zu, word %zu)", s, line, word); continue; } @@ -996,7 +1028,7 @@ static inline void del_pid_entry(pid_t pid) { struct pid_stat *p = all_pids[pid]; if(unlikely(!p)) { - error("attempted to free pid %d that is not allocated.", pid); + netdata_log_error("attempted to free pid %d that is not allocated.", pid); return; } @@ -1018,6 +1050,7 @@ static inline void del_pid_entry(pid_t pid) { freez(p->fds_dirname); freez(p->stat_filename); freez(p->status_filename); + freez(p->limits_filename); #ifndef __FreeBSD__ arl_free(p->status_arl); #endif @@ -1032,9 +1065,9 @@ static inline void del_pid_entry(pid_t pid) { // ---------------------------------------------------------------------------- -static inline int managed_log(struct pid_stat *p, uint32_t log, int status) { +static inline int managed_log(struct pid_stat *p, PID_LOG log, int status) { if(unlikely(!status)) { - // error("command failed log %u, errno %d", log, errno); + // netdata_log_error("command failed log %u, errno %d", log, errno); if(unlikely(debug_enabled || errno != ENOENT)) { if(unlikely(debug_enabled || !(p->log_thrown & log))) { @@ -1042,41 +1075,48 @@ static inline int managed_log(struct pid_stat *p, uint32_t log, int status) { switch(log) { case PID_LOG_IO: #ifdef __FreeBSD__ - error("Cannot fetch process %d I/O info (command '%s')", p->pid, p->comm); + netdata_log_error("Cannot fetch process %d I/O info (command '%s')", p->pid, p->comm); #else - error("Cannot process %s/proc/%d/io (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + netdata_log_error("Cannot process %s/proc/%d/io (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); #endif break; case PID_LOG_STATUS: #ifdef __FreeBSD__ - error("Cannot fetch process %d status info (command '%s')", p->pid, p->comm); + netdata_log_error("Cannot fetch process %d status info (command '%s')", p->pid, p->comm); #else - error("Cannot process %s/proc/%d/status (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + netdata_log_error("Cannot process %s/proc/%d/status (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); #endif break; case PID_LOG_CMDLINE: #ifdef __FreeBSD__ - error("Cannot fetch process %d command line (command '%s')", p->pid, p->comm); + netdata_log_error("Cannot fetch process %d command line (command '%s')", p->pid, p->comm); #else - error("Cannot process %s/proc/%d/cmdline (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + netdata_log_error("Cannot process %s/proc/%d/cmdline (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); #endif break; case PID_LOG_FDS: #ifdef __FreeBSD__ - error("Cannot fetch process %d files (command '%s')", p->pid, p->comm); + netdata_log_error("Cannot fetch process %d files (command '%s')", p->pid, p->comm); #else - error("Cannot process entries in %s/proc/%d/fd (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + netdata_log_error("Cannot process entries in %s/proc/%d/fd (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); #endif break; + case PID_LOG_LIMITS: + #ifdef __FreeBSD__ + ; + #else + netdata_log_error("Cannot process %s/proc/%d/limits (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); + #endif + case PID_LOG_STAT: break; default: - error("unhandled error for pid %d, command '%s'", p->pid, p->comm); + netdata_log_error("unhandled error for pid %d, command '%s'", p->pid, p->comm); break; } } @@ -1084,7 +1124,7 @@ static inline int managed_log(struct pid_stat *p, uint32_t log, int status) { errno = 0; } else if(unlikely(p->log_thrown & log)) { - // error("unsetting log %u on pid %d", log, p->pid); + // netdata_log_error("unsetting log %u on pid %d", log, p->pid); p->log_thrown &= ~log; } @@ -1276,8 +1316,7 @@ void arl_callback_status_voluntary_ctxt_switches(const char *name, uint32_t hash if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 2)) return; struct pid_stat *p = aptr->p; - pid_incremental_rate( - stat, p->status_voluntary_ctxt_switches, str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1))); + pid_incremental_rate(stat, p->status_voluntary_ctxt_switches, str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1))); } void arl_callback_status_nonvoluntary_ctxt_switches(const char *name, uint32_t hash, const char *value, void *dst) { @@ -1286,8 +1325,7 @@ void arl_callback_status_nonvoluntary_ctxt_switches(const char *name, uint32_t h if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 2)) return; struct pid_stat *p = aptr->p; - pid_incremental_rate( - stat, p->status_nonvoluntary_ctxt_switches, str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1))); + pid_incremental_rate(stat, p->status_nonvoluntary_ctxt_switches, str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1))); } static void update_proc_state_count(char proc_state) { @@ -1313,6 +1351,64 @@ static void update_proc_state_count(char proc_state) { } #endif // !__FreeBSD__ +#define MAX_PROC_PID_LIMITS 8192 +#define PROC_PID_LIMITS_MAX_OPEN_FILES_KEY "\nMax open files " + +static inline kernel_uint_t get_proc_pid_limits_limit(char *buf, const char *key, size_t key_len, kernel_uint_t def) { + char *line = strstr(buf, key); + if(!line) + return def; + + char *v = &line[key_len]; + while(isspace(*v)) v++; + + return str2ull(v, NULL); +} + +static inline int read_proc_pid_limits(struct pid_stat *p, void *ptr) { + (void)ptr; + +#ifdef __FreeBSD__ + return 0; +#else + static char proc_pid_limits_buffer[MAX_PROC_PID_LIMITS + 1]; + int ret = 0; + + kernel_uint_t all_fds = pid_openfds_sum(p); + if(all_fds < p->limits.max_open_files / 2 && p->io_collected_usec > p->last_limits_collected_usec && p->io_collected_usec - p->last_limits_collected_usec <= 60 * USEC_PER_SEC) + // too frequent, we want to collect limits once per minute + goto cleanup; + + if(unlikely(!p->limits_filename)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/limits", netdata_configured_host_prefix, p->pid); + p->limits_filename = strdupz(filename); + } + + int fd = open(p->limits_filename, procfile_open_flags, 0666); + if(unlikely(fd == -1)) goto cleanup; + + ssize_t bytes = read(fd, proc_pid_limits_buffer, MAX_PROC_PID_LIMITS); + close(fd); + + if(bytes <= 0) + goto cleanup; + + p->limits.max_open_files = get_proc_pid_limits_limit(proc_pid_limits_buffer, PROC_PID_LIMITS_MAX_OPEN_FILES_KEY, sizeof(PROC_PID_LIMITS_MAX_OPEN_FILES_KEY) - 1, 0); + p->last_limits_collected_usec = p->io_collected_usec; + + ret = 1; + +cleanup: + if(p->limits.max_open_files) + p->openfds_limits_percent = (NETDATA_DOUBLE)all_fds * 100.0 / (NETDATA_DOUBLE)p->limits.max_open_files; + else + p->openfds_limits_percent = 0.0; + + return ret; +#endif +} + static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) { p->status_vmsize = 0; p->status_vmrss = 0; @@ -1470,7 +1566,7 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) { if(enable_guest_charts) { enable_guest_charts = 0; - info("Guest charts aren't supported by FreeBSD"); + netdata_log_info("Guest charts aren't supported by FreeBSD"); } #else pid_incremental_rate(stat, p->minflt, str2kernel_uint_t(procfile_lineword(ff, 0, 9))); @@ -1732,7 +1828,7 @@ cleanup: int file_descriptor_compare(void* a, void* b) { #ifdef NETDATA_INTERNAL_CHECKS if(((struct file_descriptor *)a)->magic != 0x0BADCAFE || ((struct file_descriptor *)b)->magic != 0x0BADCAFE) - error("Corrupted index data detected. Please report this."); + netdata_log_error("Corrupted index data detected. Please report this."); #endif /* NETDATA_INTERNAL_CHECKS */ if(((struct file_descriptor *)a)->hash < ((struct file_descriptor *)b)->hash) @@ -1776,7 +1872,7 @@ static inline void file_descriptor_not_used(int id) #ifdef NETDATA_INTERNAL_CHECKS if(all_files[id].magic != 0x0BADCAFE) { - error("Ignoring request to remove empty file id %d.", id); + netdata_log_error("Ignoring request to remove empty file id %d.", id); return; } #endif /* NETDATA_INTERNAL_CHECKS */ @@ -1790,7 +1886,7 @@ static inline void file_descriptor_not_used(int id) debug_log(" >> slot %d is empty.", id); if(unlikely(file_descriptor_remove(&all_files[id]) != (void *)&all_files[id])) - error("INTERNAL ERROR: removal of unused fd from index, removed a different fd"); + netdata_log_error("INTERNAL ERROR: removal of unused fd from index, removed a different fd"); #ifdef NETDATA_INTERNAL_CHECKS all_files[id].magic = 0x00000000; @@ -1799,9 +1895,14 @@ static inline void file_descriptor_not_used(int id) } } else - error("Request to decrease counter of fd %d (%s), while the use counter is 0", id, all_files[id].name); + netdata_log_error("Request to decrease counter of fd %d (%s), while the use counter is 0", + id, + all_files[id].name); } - else error("Request to decrease counter of fd %d, which is outside the array size (1 to %d)", id, all_files_size); + else + netdata_log_error("Request to decrease counter of fd %d, which is outside the array size (1 to %d)", + id, + all_files_size); } static inline void all_files_grow() { @@ -1823,7 +1924,7 @@ static inline void all_files_grow() { for(i = 0; i < all_files_size; i++) { if(!all_files[i].count) continue; if(unlikely(file_descriptor_add(&all_files[i]) != (void *)&all_files[i])) - error("INTERNAL ERROR: duplicate indexing of fd during realloc."); + netdata_log_error("INTERNAL ERROR: duplicate indexing of fd during realloc."); } debug_log(" >> re-indexing done."); @@ -1864,7 +1965,7 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h #ifdef NETDATA_INTERNAL_CHECKS if(all_files[c].magic == 0x0BADCAFE && all_files[c].name && file_descriptor_find(all_files[c].name, all_files[c].hash)) - error("fd on position %d is not cleared properly. It still has %s in it.", c, all_files[c].name); + netdata_log_error("fd on position %d is not cleared properly. It still has %s in it.", c, all_files[c].name); #endif /* NETDATA_INTERNAL_CHECKS */ debug_log(" >> %s fd position %d for %s (last name: %s)", all_files[c].name?"re-using":"using", c, name, all_files[c].name); @@ -1895,7 +1996,7 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h all_files[c].magic = 0x0BADCAFE; #endif /* NETDATA_INTERNAL_CHECKS */ if(unlikely(file_descriptor_add(&all_files[c]) != (void *)&all_files[c])) - error("INTERNAL ERROR: duplicate indexing of fd."); + netdata_log_error("INTERNAL ERROR: duplicate indexing of fd."); debug_log("using fd position %d (name: %s)", c, all_files[c].name); @@ -2013,13 +2114,13 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { mib[3] = p->pid; if (unlikely(sysctl(mib, 4, NULL, &size, NULL, 0))) { - error("sysctl error: Can't get file descriptors data size for pid %d", p->pid); + netdata_log_error("sysctl error: Can't get file descriptors data size for pid %d", p->pid); return 0; } if (likely(size > 0)) fdsbuf = reallocz(fdsbuf, size); if (unlikely(sysctl(mib, 4, fdsbuf, &size, NULL, 0))) { - error("sysctl error: Can't get file descriptors data for pid %d", p->pid); + netdata_log_error("sysctl error: Can't get file descriptors data for pid %d", p->pid); return 0; } @@ -2192,7 +2293,7 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { // cannot read the link if(debug_enabled || (p->target && p->target->debug_enabled)) - error("Cannot read link %s", p->fds[fdid].filename); + netdata_log_error("Cannot read link %s", p->fds[fdid].filename); if(unlikely(p->fds[fdid].fd < 0)) { file_descriptor_not_used(-p->fds[fdid].fd); @@ -2454,7 +2555,7 @@ static inline void process_exited_processes() { if(majflt) debug_find_lost_child(p, majflt, 2); } - p->keep = 1; + p->keep = true; debug_log(" > remaining resources - KEEP - for another loop: %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")" , p->comm @@ -2469,7 +2570,7 @@ static inline void process_exited_processes() { for(pp = p->parent; pp ; pp = pp->parent) { if(pp->updated) break; - pp->keep = 1; + pp->keep = true; debug_log(" > - KEEP - parent for another loop: %s (%d %s)" , pp->comm @@ -2523,7 +2624,7 @@ static inline void link_all_processes_to_their_parents(void) { } else { p->parent = NULL; - error("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid); + netdata_log_error("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid); } } } @@ -2561,13 +2662,13 @@ static int compar_pid(const void *pid1, const void *pid2) { static inline int collect_data_for_pid(pid_t pid, void *ptr) { if(unlikely(pid < 0 || pid > pid_max)) { - error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, 0, pid_max); + netdata_log_error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, 0, pid_max); return 0; } struct pid_stat *p = get_pid_entry(pid); if(unlikely(!p || p->read)) return 0; - p->read = 1; + p->read = true; // debug_log("Reading process %d (%s), sortlist %d", p->pid, p->comm, p->sortlist); @@ -2580,7 +2681,7 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) { // check its parent pid if(unlikely(p->ppid < 0 || p->ppid > pid_max)) { - error("Pid %d (command '%s') states invalid parent pid %d. Using 0.", pid, p->comm, p->ppid); + netdata_log_error("Pid %d (command '%s') states invalid parent pid %d. Using 0.", pid, p->comm, p->ppid); p->ppid = 0; } @@ -2599,8 +2700,10 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) { // -------------------------------------------------------------------- // /proc/<pid>/fd - if(enable_file_charts) - managed_log(p, PID_LOG_FDS, read_pid_file_descriptors(p, ptr)); + if(enable_file_charts) { + managed_log(p, PID_LOG_FDS, read_pid_file_descriptors(p, ptr)); + managed_log(p, PID_LOG_LIMITS, read_proc_pid_limits(p, ptr)); + } // -------------------------------------------------------------------- // done! @@ -2609,8 +2712,8 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) { debug_log("Read process %d (%s) sortlisted %d, but its parent %d (%s) sortlisted %d, is not read", p->pid, p->comm, p->sortlist, all_pids[p->ppid]->pid, all_pids[p->ppid]->comm, all_pids[p->ppid]->sortlist); // mark it as updated - p->updated = 1; - p->keep = 0; + p->updated = true; + p->keep = false; p->keeploops = 0; return 1; @@ -2632,7 +2735,7 @@ static int collect_data_for_all_processes(void) { int mib[3] = { CTL_KERN, KERN_PROC, KERN_PROC_PROC }; if (unlikely(sysctl(mib, 3, NULL, &new_procbase_size, NULL, 0))) { - error("sysctl error: Can't get processes data size"); + netdata_log_error("sysctl error: Can't get processes data size"); return 0; } @@ -2652,7 +2755,7 @@ static int collect_data_for_all_processes(void) { // get the processes from the system if (unlikely(sysctl(mib, 3, procbase, &new_procbase_size, NULL, 0))) { - error("sysctl error: Can't get processes data"); + netdata_log_error("sysctl error: Can't get processes data"); return 0; } @@ -2667,9 +2770,9 @@ static int collect_data_for_all_processes(void) { size_t slc = 0; #endif for(p = root_of_pids; p ; p = p->next) { - p->read = 0; // mark it as not read, so that collect_data_for_pid() will read it - p->updated = 0; - p->merged = 0; + p->read = false; // mark it as not read, so that collect_data_for_pid() will read it + p->updated = false; + p->merged = false; p->children_count = 0; p->parent = NULL; @@ -2680,7 +2783,7 @@ static int collect_data_for_all_processes(void) { #if (ALL_PIDS_ARE_READ_INSTANTLY == 0) if(unlikely(slc != all_pids_count)) { - error("Internal error: I was thinking I had %zu processes in my arrays, but it seems there are %zu.", all_pids_count, slc); + netdata_log_error("Internal error: I was thinking I had %zu processes in my arrays, but it seems there are %zu.", all_pids_count, slc); all_pids_count = slc; } @@ -2795,7 +2898,7 @@ static void cleanup_exited_pids(void) { } else { if(unlikely(p->keep)) p->keeploops++; - p->keep = 0; + p->keep = false; p = p->next; } } @@ -2811,7 +2914,7 @@ static void apply_apps_groups_targets_inheritance(void) { if(unlikely(debug_enabled)) loops++; found = 0; for(p = root_of_pids; p ; p = p->next) { - // if this process does not have a target + // if this process does not have a target, // and it has a parent // and its parent has a target // then, set the parent's target to this process @@ -2849,7 +2952,7 @@ static void apply_apps_groups_targets_inheritance(void) { )) { // mark it as merged p->parent->children_count--; - p->merged = 1; + p->merged = true; // the parent inherits the child's target, if it does not have a target itself if(unlikely(p->target && !p->parent->target)) { @@ -2878,7 +2981,7 @@ static void apply_apps_groups_targets_inheritance(void) { if(unlikely(debug_enabled)) loops++; for(p = root_of_pids; p ; p = p->next) { // if the process is not merged itself - // then is is a top level process + // then it is a top level process if(unlikely(!p->merged && !p->target)) p->target = apps_groups_default_target; @@ -2959,6 +3062,8 @@ static size_t zero_all_targets(struct target *root) { w->openfds.signalfds = 0; w->openfds.eventpolls = 0; w->openfds.other = 0; + + w->max_open_files_percent = 0.0; } w->collected_starttime = 0; @@ -3090,9 +3195,6 @@ static inline void aggregate_pid_fds_on_targets(struct pid_stat *p) { aggregate_fd_on_target(fd, u); aggregate_fd_on_target(fd, g); } - - if (currentfds >= currentmaxfds) - currentmaxfds = currentfds; } static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target *o) { @@ -3104,10 +3206,13 @@ static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p, } if(unlikely(!w)) { - error("pid %d %s was left without a target!", p->pid, p->comm); + netdata_log_error("pid %d %s was left without a target!", p->pid, p->comm); return; } + if(p->openfds_limits_percent > w->max_open_files_percent) + w->max_open_files_percent = p->openfds_limits_percent; + w->cutime += p->cutime; w->cstime += p->cstime; w->cgtime += p->cgtime; @@ -3712,30 +3817,82 @@ static void send_collected_data_to_netdata(struct target *root, const char *type send_END(); if(enable_file_charts) { - send_BEGIN(type, "files", dt); + send_BEGIN(type, "fds_open_limit", dt); for (w = root; w; w = w->next) { if (unlikely(w->exposed && w->processes)) - send_SET(w->name, w->openfds.files); + send_SET(w->name, w->max_open_files_percent * 100.0); } - if (!strcmp("apps", type)){ - kernel_uint_t usedfdpercentage = (kernel_uint_t) ((currentmaxfds * 100) / sysconf(_SC_OPEN_MAX)); - fprintf(stdout, "VARIABLE fdperc = " KERNEL_UINT_FORMAT "\n", usedfdpercentage); + send_END(); + + send_BEGIN(type, "fds_open", dt); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) + send_SET(w->name, pid_openfds_sum(w)); } send_END(); - send_BEGIN(type, "sockets", dt); + send_BEGIN(type, "fds_files", dt); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) + send_SET(w->name, w->openfds.files); + } + send_END(); + + send_BEGIN(type, "fds_sockets", dt); for (w = root; w; w = w->next) { if (unlikely(w->exposed && w->processes)) send_SET(w->name, w->openfds.sockets); } send_END(); - send_BEGIN(type, "pipes", dt); + send_BEGIN(type, "fds_pipes", dt); for (w = root; w; w = w->next) { if (unlikely(w->exposed && w->processes)) send_SET(w->name, w->openfds.pipes); } send_END(); + + send_BEGIN(type, "fds_inotifies", dt); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) + send_SET(w->name, w->openfds.inotifies); + } + send_END(); + + send_BEGIN(type, "fds_eventfds", dt); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) + send_SET(w->name, w->openfds.eventfds); + } + send_END(); + + send_BEGIN(type, "fds_timerfds", dt); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) + send_SET(w->name, w->openfds.timerfds); + } + send_END(); + + send_BEGIN(type, "fds_signalfds", dt); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) + send_SET(w->name, w->openfds.signalfds); + } + send_END(); + + send_BEGIN(type, "fds_eventpolls", dt); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) + send_SET(w->name, w->openfds.eventpolls); + } + send_END(); + + send_BEGIN(type, "fds_other", dt); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed && w->processes)) + send_SET(w->name, w->openfds.other); + } + send_END(); } } @@ -3958,7 +4115,23 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type #endif if(enable_file_charts) { - fprintf(stdout, "CHART %s.files '' '%s Open Files' 'open files' disk %s.files stacked 20050 %d\n", type, + fprintf(stdout, "CHART %s.fds_open_limit '' '%s Open File Descriptors Limit' '%%' fds %s.fds_open_limit line 20050 %d\n", type, + title, type, update_every); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 100\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.fds_open '' '%s Open File Descriptors' 'fds' fds %s.fds_open stacked 20051 %d\n", type, + title, type, update_every); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.fds_files '' '%s Open Files' 'fds' fds %s.fds_files stacked 20052 %d\n", type, title, type, update_every); for (w = root; w; w = w->next) { if (unlikely(w->exposed)) @@ -3966,7 +4139,7 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type } APPS_PLUGIN_FUNCTIONS(); - fprintf(stdout, "CHART %s.sockets '' '%s Open Sockets' 'open sockets' net %s.sockets stacked 20051 %d\n", + fprintf(stdout, "CHART %s.fds_sockets '' '%s Open Sockets' 'fds' fds %s.fds_sockets stacked 20053 %d\n", type, title, type, update_every); for (w = root; w; w = w->next) { if (unlikely(w->exposed)) @@ -3974,13 +4147,61 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type } APPS_PLUGIN_FUNCTIONS(); - fprintf(stdout, "CHART %s.pipes '' '%s Pipes' 'open pipes' processes %s.pipes stacked 20053 %d\n", type, + fprintf(stdout, "CHART %s.fds_pipes '' '%s Pipes' 'fds' fds %s.fds_pipes stacked 20054 %d\n", type, title, type, update_every); for (w = root; w; w = w->next) { if (unlikely(w->exposed)) fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); } APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.fds_inotifies '' '%s iNotify File Descriptors' 'fds' fds %s.fds_inotifies stacked 20055 %d\n", type, + title, type, update_every); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.fds_eventfds '' '%s Event File Descriptors' 'fds' fds %s.fds_eventfds stacked 20056 %d\n", type, + title, type, update_every); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.fds_timerfds '' '%s Timer File Descriptors' 'fds' fds %s.fds_timerfds stacked 20057 %d\n", type, + title, type, update_every); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.fds_signalfds '' '%s Signal File Descriptors' 'fds' fds %s.fds_signalfds stacked 20058 %d\n", type, + title, type, update_every); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.fds_eventpolls '' '%s Event Poll File Descriptors' 'fds' fds %s.fds_eventpolls stacked 20059 %d\n", type, + title, type, update_every); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); + + fprintf(stdout, "CHART %s.fds_other '' '%s Other File Descriptors' 'fds' fds %s.fds_other stacked 20060 %d\n", type, + title, type, update_every); + for (w = root; w; w = w->next) { + if (unlikely(w->exposed)) + fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name); + } + APPS_PLUGIN_FUNCTIONS(); } } @@ -4122,6 +4343,10 @@ static void parse_args(int argc, char **argv) enable_detailed_uptime_charts = 1; continue; } + if(strcmp("with-function-cmdline", argv[i]) == 0) { + enable_function_cmdline = 1; + continue; + } if(strcmp("-h", argv[i]) == 0 || strcmp("--help", argv[i]) == 0) { fprintf(stderr, @@ -4139,6 +4364,11 @@ static void parse_args(int argc, char **argv) "\n" " debug enable debugging (lot of output)\n" "\n" + " with-function-cmdline enable reporting the complete command line for processes\n" + " it includes the command and passed arguments\n" + " it may include sensitive data such as passwords and tokens\n" + " enabling this could be a security risk\n" + "\n" " with-childs\n" " without-childs enable / disable aggregating exited\n" " children resources into parents\n" @@ -4177,35 +4407,35 @@ static void parse_args(int argc, char **argv) exit(1); } - error("Cannot understand option %s", argv[i]); + netdata_log_error("Cannot understand option %s", argv[i]); exit(1); } if(freq > 0) update_every = freq; if(read_apps_groups_conf(user_config_dir, "groups")) { - info("Cannot read process groups configuration file '%s/apps_groups.conf'. Will try '%s/apps_groups.conf'", user_config_dir, stock_config_dir); + netdata_log_info("Cannot read process groups configuration file '%s/apps_groups.conf'. Will try '%s/apps_groups.conf'", user_config_dir, stock_config_dir); if(read_apps_groups_conf(stock_config_dir, "groups")) { - error("Cannot read process groups '%s/apps_groups.conf'. There are no internal defaults. Failing.", stock_config_dir); + netdata_log_error("Cannot read process groups '%s/apps_groups.conf'. There are no internal defaults. Failing.", stock_config_dir); exit(1); } else - info("Loaded config file '%s/apps_groups.conf'", stock_config_dir); + netdata_log_info("Loaded config file '%s/apps_groups.conf'", stock_config_dir); } else - info("Loaded config file '%s/apps_groups.conf'", user_config_dir); + netdata_log_info("Loaded config file '%s/apps_groups.conf'", user_config_dir); } static int am_i_running_as_root() { uid_t uid = getuid(), euid = geteuid(); if(uid == 0 || euid == 0) { - if(debug_enabled) info("I am running with escalated privileges, uid = %u, euid = %u.", uid, euid); + if(debug_enabled) netdata_log_info("I am running with escalated privileges, uid = %u, euid = %u.", uid, euid); return 1; } - if(debug_enabled) info("I am not running with escalated privileges, uid = %u, euid = %u.", uid, euid); + if(debug_enabled) netdata_log_info("I am not running with escalated privileges, uid = %u, euid = %u.", uid, euid); return 0; } @@ -4213,40 +4443,40 @@ static int am_i_running_as_root() { static int check_capabilities() { cap_t caps = cap_get_proc(); if(!caps) { - error("Cannot get current capabilities."); + netdata_log_error("Cannot get current capabilities."); return 0; } else if(debug_enabled) - info("Received my capabilities from the system."); + netdata_log_info("Received my capabilities from the system."); int ret = 1; cap_flag_value_t cfv = CAP_CLEAR; if(cap_get_flag(caps, CAP_DAC_READ_SEARCH, CAP_EFFECTIVE, &cfv) == -1) { - error("Cannot find if CAP_DAC_READ_SEARCH is effective."); + netdata_log_error("Cannot find if CAP_DAC_READ_SEARCH is effective."); ret = 0; } else { if(cfv != CAP_SET) { - error("apps.plugin should run with CAP_DAC_READ_SEARCH."); + netdata_log_error("apps.plugin should run with CAP_DAC_READ_SEARCH."); ret = 0; } else if(debug_enabled) - info("apps.plugin runs with CAP_DAC_READ_SEARCH."); + netdata_log_info("apps.plugin runs with CAP_DAC_READ_SEARCH."); } cfv = CAP_CLEAR; if(cap_get_flag(caps, CAP_SYS_PTRACE, CAP_EFFECTIVE, &cfv) == -1) { - error("Cannot find if CAP_SYS_PTRACE is effective."); + netdata_log_error("Cannot find if CAP_SYS_PTRACE is effective."); ret = 0; } else { if(cfv != CAP_SET) { - error("apps.plugin should run with CAP_SYS_PTRACE."); + netdata_log_error("apps.plugin should run with CAP_SYS_PTRACE."); ret = 0; } else if(debug_enabled) - info("apps.plugin runs with CAP_SYS_PTRACE."); + netdata_log_info("apps.plugin runs with CAP_SYS_PTRACE."); } cap_free(caps); @@ -4356,32 +4586,6 @@ static void apps_plugin_function_processes_help(const char *transaction) { pluginsd_function_result_end_to_stdout(); } -#define add_table_field(wb, key, name, visible, type, visualization, transform, decimal_points, units, max, sort, sortable, sticky, unique_key, pointer_to, summary, range) do { \ - buffer_json_member_add_object(wb, key); \ - buffer_json_member_add_uint64(wb, "index", fields_added); \ - buffer_json_member_add_boolean(wb, "unique_key", unique_key); \ - buffer_json_member_add_string(wb, "name", name); \ - buffer_json_member_add_boolean(wb, "visible", visible); \ - buffer_json_member_add_string(wb, "type", type); \ - buffer_json_member_add_string_or_omit(wb, "units", (char*)(units)); \ - buffer_json_member_add_string(wb, "visualization", visualization); \ - buffer_json_member_add_object(wb, "value_options"); \ - buffer_json_member_add_string_or_omit(wb, "units", (char*)(units)); \ - buffer_json_member_add_string(wb, "transform", transform); \ - buffer_json_member_add_uint64(wb, "decimal_points", decimal_points); \ - buffer_json_object_close(wb); \ - if(!isnan((NETDATA_DOUBLE)(max))) \ - buffer_json_member_add_double(wb, "max", (NETDATA_DOUBLE)(max)); \ - buffer_json_member_add_string_or_omit(wb, "pointer_to", (char *)(pointer_to)); \ - buffer_json_member_add_string(wb, "sort", sort); \ - buffer_json_member_add_boolean(wb, "sortable", sortable); \ - buffer_json_member_add_boolean(wb, "sticky", sticky); \ - buffer_json_member_add_string(wb, "summary", summary); \ - buffer_json_member_add_string(wb, "filter", (range)?"range":"multiselect"); \ - buffer_json_object_close(wb); \ - fields_added++; \ -} while(0) - #define add_value_field_llu_with_max(wb, key, value) do { \ unsigned long long _tmp = (value); \ key ## _max = (rows == 0) ? (_tmp) : MAX(key ## _max, _tmp); \ @@ -4398,7 +4602,7 @@ static void apps_plugin_function_processes(const char *transaction, char *functi struct pid_stat *p; char *words[PLUGINSD_MAX_WORDS] = { NULL }; - size_t num_words = pluginsd_split_words(function, words, PLUGINSD_MAX_WORDS); + size_t num_words = quoted_strings_splitter_pluginsd(function, words, PLUGINSD_MAX_WORDS); struct target *category = NULL, *user = NULL, *group = NULL; const char *process_name = NULL; @@ -4488,6 +4692,7 @@ static void apps_plugin_function_processes(const char *transaction, char *functi , Shared_max = 0.0 , Swap_max = 0.0 , Memory_max = 0.0 + , FDsLimitPercent_max = 0.0 ; unsigned long long @@ -4553,7 +4758,7 @@ static void apps_plugin_function_processes(const char *transaction, char *functi rows++; - buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_array(wb); // for each pid // IMPORTANT! // THE ORDER SHOULD BE THE SAME WITH THE FIELDS! @@ -4564,10 +4769,10 @@ static void apps_plugin_function_processes(const char *transaction, char *functi // cmd buffer_json_add_array_item_string(wb, p->comm); -#ifdef NETDATA_DEV_MODE // cmdline - buffer_json_add_array_item_string(wb, (p->cmdline && *p->cmdline) ? p->cmdline : p->comm); -#endif + if (enable_function_cmdline) { + buffer_json_add_array_item_string(wb, (p->cmdline && *p->cmdline) ? p->cmdline : p->comm); + } // ppid buffer_json_add_array_item_uint64(wb, p->ppid); @@ -4633,7 +4838,8 @@ static void apps_plugin_function_processes(const char *transaction, char *functi add_value_field_llu_with_max(wb, TMajFlt, (p->majflt + p->cmajflt) / RATES_DETAIL); // open file descriptors - add_value_field_llu_with_max(wb, FDs, p->openfds.files + p->openfds.pipes + p->openfds.sockets + p->openfds.inotifies + p->openfds.eventfds + p->openfds.timerfds + p->openfds.signalfds + p->openfds.eventpolls + p->openfds.other); + add_value_field_ndd_with_max(wb, FDsLimitPercent, p->openfds_limits_percent); + add_value_field_llu_with_max(wb, FDs, pid_openfds_sum(p)); add_value_field_llu_with_max(wb, Files, p->openfds.files); add_value_field_llu_with_max(wb, Pipes, p->openfds.pipes); add_value_field_llu_with_max(wb, Sockets, p->openfds.sockets); @@ -4644,99 +4850,311 @@ static void apps_plugin_function_processes(const char *transaction, char *functi add_value_field_llu_with_max(wb, EvPollFDs, p->openfds.eventpolls); add_value_field_llu_with_max(wb, OtherFDs, p->openfds.other); + // processes, threads, uptime add_value_field_llu_with_max(wb, Processes, p->children_count); add_value_field_llu_with_max(wb, Threads, p->num_threads); add_value_field_llu_with_max(wb, Uptime, p->uptime); - buffer_json_array_close(wb); + buffer_json_array_close(wb); // for each pid } - buffer_json_array_close(wb); + buffer_json_array_close(wb); // data buffer_json_member_add_object(wb, "columns"); { - int fields_added = 0; + int field_id = 0; // IMPORTANT! // THE ORDER SHOULD BE THE SAME WITH THE VALUES! - add_table_field(wb, "PID", "Process ID", true, "integer", "value", "number", 0, NULL, NAN, "ascending", true, true, true, NULL, "count_unique", false); - add_table_field(wb, "Cmd", "Process Name", true, "string", "value", "none", 0, NULL, NAN, "ascending", true, true, false, NULL, "count_unique", false); + // wb, key, name, visible, type, visualization, transform, decimal_points, units, max, sort, sortable, sticky, unique_key, pointer_to, summary, range + buffer_rrdf_table_add_field(wb, field_id++, "PID", "Process ID", RRDF_FIELD_TYPE_INTEGER, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY | + RRDF_FIELD_OPTS_UNIQUE_KEY, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "Cmd", "Process Name", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL); + + if (enable_function_cmdline) { + buffer_rrdf_table_add_field(wb, field_id++, "CmdLine", "Command Line", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, + NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + } -#ifdef NETDATA_DEV_MODE - add_table_field(wb, "CmdLine", "Command Line", false, "detail-string:Cmd", "value", "none", 0, NULL, NAN, "ascending", true, false, false, NULL, "count_unique", false); -#endif - add_table_field(wb, "PPID", "Parent Process ID", false, "integer", "value", "number", 0, NULL, NAN, "ascending", true, false, false, "PID", "count_unique", false); - add_table_field(wb, "Category", "Category (apps_groups.conf)", true, "string", "value", "none", 0, NULL, NAN, "ascending", true, true, false, NULL, "count_unique", false); - add_table_field(wb, "User", "User Owner", true, "string", "value", "none", 0, NULL, NAN, "ascending", true, false, false, NULL, "count_unique", false); - add_table_field(wb, "Uid", "User ID", false, "integer", "value", "number", 0, NULL, NAN, "ascending", true, false, false, NULL, "count_unique", false); - add_table_field(wb, "Group", "Group Owner", false, "string", "value", "none", 0, NULL, NAN, "ascending", true, false, false, NULL, "count_unique", false); - add_table_field(wb, "Gid", "Group ID", false, "integer", "value", "number", 0, NULL, NAN, "ascending", true, false, false, NULL, "count_unique", false); + buffer_rrdf_table_add_field(wb, field_id++, "PPID", "Parent Process ID", RRDF_FIELD_TYPE_INTEGER, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, 0, NULL, + NAN, RRDF_FIELD_SORT_ASCENDING, "PID", RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Category", "Category (apps_groups.conf)", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, + RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "User", "User Owner", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Uid", "User ID", RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, + RRDF_FIELD_TRANSFORM_NUMBER, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Group", "Group Owner", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Gid", "Group ID", RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, + RRDF_FIELD_TRANSFORM_NUMBER, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); // CPU utilization - add_table_field(wb, "CPU", "Total CPU Time (100% = 1 core)", true, "bar-with-integer", "bar", "number", 2, "%", CPU_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "UserCPU", "User CPU time (100% = 1 core)", false, "bar-with-integer", "bar", "number", 2, "%", UserCPU_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "SysCPU", "System CPU Time (100% = 1 core)", false, "bar-with-integer", "bar", "number", 2, "%", SysCPU_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "GuestCPU", "Guest CPU Time (100% = 1 core)", false, "bar-with-integer", "bar", "number", 2, "%", GuestCPU_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "CUserCPU", "Children User CPU Time (100% = 1 core)", false, "bar-with-integer", "bar", "number", 2, "%", CUserCPU_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "CSysCPU", "Children System CPU Time (100% = 1 core)", false, "bar-with-integer", "bar", "number", 2, "%", CSysCPU_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "CGuestCPU", "Children Guest CPU Time (100% = 1 core)", false, "bar-with-integer", "bar", "number", 2, "%", CGuestCPU_max, "descending", true, false, false, NULL, "sum", true); + buffer_rrdf_table_add_field(wb, field_id++, "CPU", "Total CPU Time (100% = 1 core)", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", CPU_max, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "UserCPU", "User CPU time (100% = 1 core)", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", UserCPU_max, + RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "SysCPU", "System CPU Time (100% = 1 core)", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", SysCPU_max, + RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "GuestCPU", "Guest CPU Time (100% = 1 core)", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", GuestCPU_max, + RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "CUserCPU", "Children User CPU Time (100% = 1 core)", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", CUserCPU_max, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "CSysCPU", "Children System CPU Time (100% = 1 core)", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", CSysCPU_max, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "CGuestCPU", "Children Guest CPU Time (100% = 1 core)", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", CGuestCPU_max, RRDF_FIELD_SORT_DESCENDING, + NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); // CPU context switches - add_table_field(wb, "vCtxSwitch", "Voluntary Context Switches", false, "bar-with-integer", "bar", "number", 2, "switches/s", VoluntaryCtxtSwitches_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "iCtxSwitch", "Involuntary Context Switches", false, "bar-with-integer", "bar", "number", 2, "switches/s", NonVoluntaryCtxtSwitches_max, "descending", true, false, false, NULL, "sum", true); + buffer_rrdf_table_add_field(wb, field_id++, "vCtxSwitch", "Voluntary Context Switches", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "switches/s", + VoluntaryCtxtSwitches_max, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "iCtxSwitch", "Involuntary Context Switches", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "switches/s", + NonVoluntaryCtxtSwitches_max, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); // memory - if(MemTotal) - add_table_field(wb, "Memory", "Memory Percentage", true, "bar-with-integer", "bar", "number", 2, "%", 100.0, "descending", true, false, false, NULL, "sum", true); - - add_table_field(wb, "Resident", "Resident Set Size", true, "bar-with-integer", "bar", "number", 2, "MiB", RSS_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "Shared", "Shared Pages", true, "bar-with-integer", "bar", "number", 2, "MiB", Shared_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "Virtual", "Virtual Memory Size", true, "bar-with-integer", "bar", "number", 2, "MiB", VMSize_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "Swap", "Swap Memory", false, "bar-with-integer", "bar", "number", 2, "MiB", Swap_max, "descending", true, false, false, NULL, "sum", true); + if (MemTotal) + buffer_rrdf_table_add_field(wb, field_id++, "Memory", "Memory Percentage", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", 100.0, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "Resident", "Resident Set Size", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, + 2, "MiB", RSS_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Shared", "Shared Pages", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, + "MiB", Shared_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Virtual", "Virtual Memory Size", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "MiB", VMSize_max, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Swap", "Swap Memory", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, + "MiB", + Swap_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); // Physical I/O - add_table_field(wb, "PReads", "Physical I/O Reads", true, "bar-with-integer", "bar", "number", 2, "KiB/s", PReads_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "PWrites", "Physical I/O Writes", true, "bar-with-integer", "bar", "number", 2, "KiB/s", PWrites_max, "descending", true, false, false, NULL, "sum", true); + buffer_rrdf_table_add_field(wb, field_id++, "PReads", "Physical I/O Reads", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "KiB/s", PReads_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "PWrites", "Physical I/O Writes", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "KiB/s", PWrites_max, RRDF_FIELD_SORT_DESCENDING, + NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); // Logical I/O #ifndef __FreeBSD__ - add_table_field(wb, "LReads", "Logical I/O Reads", true, "bar-with-integer", "bar", "number", 2, "KiB/s", LReads_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "LWrites", "Logical I/O Writes", true, "bar-with-integer", "bar", "number", 2, "KiB/s", LWrites_max, "descending", true, false, false, NULL, "sum", true); + buffer_rrdf_table_add_field(wb, field_id++, "LReads", "Logical I/O Reads", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "KiB/s", LReads_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "LWrites", "Logical I/O Writes", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, + 2, "KiB/s", LWrites_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); #endif // I/O calls - add_table_field(wb, "RCalls", "I/O Read Calls", true, "bar-with-integer", "bar", "number", 2, "calls/s", RCalls_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "WCalls", "I/O Write Calls", true, "bar-with-integer", "bar", "number", 2, "calls/s", WCalls_max, "descending", true, false, false, NULL, "sum", true); + buffer_rrdf_table_add_field(wb, field_id++, "RCalls", "I/O Read Calls", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, + "calls/s", RCalls_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "WCalls", "I/O Write Calls", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, + "calls/s", WCalls_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); // minor page faults - add_table_field(wb, "MinFlt", "Minor Page Faults/s", false, "bar-with-integer", "bar", "number", 2, "pgflts/s", MinFlt_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "CMinFlt", "Children Minor Page Faults/s", false, "bar-with-integer", "bar", "number", 2, "pgflts/s", CMinFlt_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "TMinFlt", "Total Minor Page Faults/s", false, "bar-with-integer", "bar", "number", 2, "pgflts/s", TMinFlt_max, "descending", true, false, false, NULL, "sum", true); + buffer_rrdf_table_add_field(wb, field_id++, "MinFlt", "Minor Page Faults/s", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, + 2, "pgflts/s", MinFlt_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "CMinFlt", "Children Minor Page Faults/s", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "pgflts/s", CMinFlt_max, RRDF_FIELD_SORT_DESCENDING, + NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "TMinFlt", "Total Minor Page Faults/s", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "pgflts/s", TMinFlt_max, RRDF_FIELD_SORT_DESCENDING, + NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); // major page faults - add_table_field(wb, "MajFlt", "Major Page Faults/s", false, "bar-with-integer", "bar", "number", 2, "pgflts/s", MajFlt_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "CMajFlt", "Children Major Page Faults/s", false, "bar-with-integer", "bar", "number", 2, "pgflts/s", CMajFlt_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "TMajFlt", "Total Major Page Faults/s", true, "bar-with-integer", "bar", "number", 2, "pgflts/s", TMajFlt_max, "descending", true, false, false, NULL, "sum", true); + buffer_rrdf_table_add_field(wb, field_id++, "MajFlt", "Major Page Faults/s", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, + 2, "pgflts/s", MajFlt_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "CMajFlt", "Children Major Page Faults/s", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "pgflts/s", CMajFlt_max, RRDF_FIELD_SORT_DESCENDING, + NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "TMajFlt", "Total Major Page Faults/s", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "pgflts/s", TMajFlt_max, RRDF_FIELD_SORT_DESCENDING, + NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); // open file descriptors - add_table_field(wb, "FDs", "All Open File Descriptors", true, "bar-with-integer", "bar", "number", 0, "fds", FDs_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "Files", "Open Files", true, "bar-with-integer", "bar", "number", 0, "fds", Files_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "Pipes", "Open Pipes", true, "bar-with-integer", "bar", "number", 0, "fds", Pipes_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "Sockets", "Open Sockets", true, "bar-with-integer", "bar", "number", 0, "fds", Sockets_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "iNotiFDs", "Open iNotify Descriptors", false, "bar-with-integer", "bar", "number", 0, "fds", iNotiFDs_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "EventFDs", "Open Event Descriptors", false, "bar-with-integer", "bar", "number", 0, "fds", EventFDs_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "TimerFDs", "Open Timer Descriptors", false, "bar-with-integer", "bar", "number", 0, "fds", TimerFDs_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "SigFDs", "Open Signal Descriptors", false, "bar-with-integer", "bar", "number", 0, "fds", SigFDs_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "EvPollFDs", "Open Event Poll Descriptors", false, "bar-with-integer", "bar", "number", 0, "fds", EvPollFDs_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "OtherFDs", "Other Open Descriptors", false, "bar-with-integer", "bar", "number", 0, "fds", OtherFDs_max, "descending", true, false, false, NULL, "sum", true); + buffer_rrdf_table_add_field(wb, field_id++, "FDsLimitPercent", "Percentage of Open Descriptors vs Limits", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", FDsLimitPercent_max, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "FDs", "All Open File Descriptors", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 0, "fds", FDs_max, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Files", "Open Files", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 0, + "fds", + Files_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Pipes", "Open Pipes", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 0, + "fds", + Pipes_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Sockets", "Open Sockets", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 0, + "fds", Sockets_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "iNotiFDs", "Open iNotify Descriptors", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 0, "fds", iNotiFDs_max, RRDF_FIELD_SORT_DESCENDING, + NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "EventFDs", "Open Event Descriptors", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 0, "fds", EventFDs_max, RRDF_FIELD_SORT_DESCENDING, + NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "TimerFDs", "Open Timer Descriptors", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 0, "fds", TimerFDs_max, RRDF_FIELD_SORT_DESCENDING, + NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "SigFDs", "Open Signal Descriptors", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 0, "fds", SigFDs_max, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "EvPollFDs", "Open Event Poll Descriptors", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 0, "fds", EvPollFDs_max, + RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "OtherFDs", "Other Open Descriptors", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 0, "fds", OtherFDs_max, RRDF_FIELD_SORT_DESCENDING, + NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); // processes, threads, uptime - add_table_field(wb, "Processes", "Processes", true, "bar-with-integer", "bar", "number", 0, "processes", Processes_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "Threads", "Threads", true, "bar-with-integer", "bar", "number", 0, "threads", Threads_max, "descending", true, false, false, NULL, "sum", true); - add_table_field(wb, "Uptime", "Uptime in seconds", true, "duration", "bar", "duration", 2, "seconds", Uptime_max, "descending", true, false, false, NULL, "max", true); - } - buffer_json_object_close(wb); + buffer_rrdf_table_add_field(wb, field_id++, "Processes", "Processes", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 0, + "processes", Processes_max, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Threads", "Threads", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 0, + "threads", Threads_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Uptime", "Uptime in seconds", RRDF_FIELD_TYPE_DURATION, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_DURATION, 2, + "seconds", Uptime_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_MAX, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + } + buffer_json_object_close(wb); // columns buffer_json_member_add_string(wb, "default_sort_column", "CPU"); @@ -4953,6 +5371,20 @@ static void apps_plugin_function_processes(const char *transaction, char *functi } buffer_json_object_close(wb); // charts + buffer_json_member_add_array(wb, "default_charts"); + { + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "CPU"); + buffer_json_add_array_item_string(wb, "Category"); + buffer_json_array_close(wb); + + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "Memory"); + buffer_json_add_array_item_string(wb, "Category"); + buffer_json_array_close(wb); + } + buffer_json_array_close(wb); + buffer_json_member_add_object(wb, "group_by"); { // group by PID @@ -5026,7 +5458,7 @@ void *reader_main(void *arg __maybe_unused) { while(!apps_plugin_exit && (s = fgets(buffer, PLUGINSD_LINE_MAX, stdin))) { char *words[PLUGINSD_MAX_WORDS] = { NULL }; - size_t num_words = pluginsd_split_words(buffer, words, PLUGINSD_MAX_WORDS); + size_t num_words = quoted_strings_splitter_pluginsd(buffer, words, PLUGINSD_MAX_WORDS); const char *keyword = get_word(words, num_words, 0); @@ -5036,7 +5468,7 @@ void *reader_main(void *arg __maybe_unused) { char *function = get_word(words, num_words, 3); if(!transaction || !*transaction || !timeout_s || !*timeout_s || !function || !*function) { - error("Received incomplete %s (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.", + netdata_log_error("Received incomplete %s (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.", keyword, transaction?transaction:"(unset)", timeout_s?timeout_s:"(unset)", @@ -5062,12 +5494,12 @@ void *reader_main(void *arg __maybe_unused) { } } else - error("Received unknown command: %s", keyword?keyword:"(unset)"); + netdata_log_error("Received unknown command: %s", keyword?keyword:"(unset)"); } if(!s || feof(stdin) || ferror(stdin)) { apps_plugin_exit = true; - error("Received error on stdin."); + netdata_log_error("Received error on stdin."); } exit(1); @@ -5107,23 +5539,23 @@ int main(int argc, char **argv) { user_config_dir = getenv("NETDATA_USER_CONFIG_DIR"); if(user_config_dir == NULL) { - // info("NETDATA_CONFIG_DIR is not passed from netdata"); + // netdata_log_info("NETDATA_CONFIG_DIR is not passed from netdata"); user_config_dir = CONFIG_DIR; } - // else info("Found NETDATA_USER_CONFIG_DIR='%s'", user_config_dir); + // else netdata_log_info("Found NETDATA_USER_CONFIG_DIR='%s'", user_config_dir); stock_config_dir = getenv("NETDATA_STOCK_CONFIG_DIR"); if(stock_config_dir == NULL) { - // info("NETDATA_CONFIG_DIR is not passed from netdata"); + // netdata_log_info("NETDATA_CONFIG_DIR is not passed from netdata"); stock_config_dir = LIBCONFIG_DIR; } - // else info("Found NETDATA_USER_CONFIG_DIR='%s'", user_config_dir); + // else netdata_log_info("Found NETDATA_USER_CONFIG_DIR='%s'", user_config_dir); #ifdef NETDATA_INTERNAL_CHECKS if(debug_flags != 0) { struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; if(setrlimit(RLIMIT_CORE, &rl) != 0) - info("Cannot request unlimited core dumps for debugging... Proceeding anyway..."); + netdata_log_info("Cannot request unlimited core dumps for debugging... Proceeding anyway..."); #ifdef HAVE_SYS_PRCTL_H prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); #endif @@ -5147,14 +5579,14 @@ int main(int argc, char **argv) { if(!check_capabilities() && !am_i_running_as_root() && !check_proc_1_io()) { uid_t uid = getuid(), euid = geteuid(); #ifdef HAVE_CAPABILITY - error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " + netdata_log_error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " "Without these, apps.plugin cannot report disk I/O utilization of other processes. " "To enable capabilities run: sudo setcap cap_dac_read_search,cap_sys_ptrace+ep %s; " "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; " , uid, euid, argv[0], argv[0], argv[0] ); #else - error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " + netdata_log_error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " "Without these, apps.plugin cannot report disk I/O utilization of other processes. " "Your system does not support capabilities. " "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; " @@ -5163,7 +5595,7 @@ int main(int argc, char **argv) { #endif } - info("started on pid %d", getpid()); + netdata_log_info("started on pid %d", getpid()); snprintfz(all_user_ids.filename, FILENAME_MAX, "%s/etc/passwd", netdata_configured_host_prefix); debug_log("passwd file: '%s'", all_user_ids.filename); @@ -5215,14 +5647,13 @@ int main(int argc, char **argv) { get_MemTotal(); if(!collect_data_for_all_processes()) { - error("Cannot collect /proc data for running processes. Disabling apps.plugin..."); + netdata_log_error("Cannot collect /proc data for running processes. Disabling apps.plugin..."); printf("DISABLE\n"); netdata_mutex_unlock(&mutex); netdata_thread_cancel(reader_thread); exit(1); } - currentmaxfds = 0; calculate_netdata_statistics(); normalize_utilization(apps_groups_root_target); diff --git a/collectors/apps.plugin/metrics.csv b/collectors/apps.plugin/metrics.csv index e1ca3434..afda7a86 100644 --- a/collectors/apps.plugin/metrics.csv +++ b/collectors/apps.plugin/metrics.csv @@ -1,81 +1,81 @@ metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -system.processes_state,,"running, sleeping_interruptible, sleeping_uninterruptible, zombie, stopped",processes,"System Processes State",line,,apps.plugin, -apps.cpu,,a dimension per app group,percentage,"Apps CPU Time (100% = 1 core)",stacked,,apps.plugin, -apps.cpu_user,,a dimension per app group,percentage,"Apps CPU User Time (100% = 1 core)",stacked,,apps.plugin, -apps.cpu_system,,a dimension per app group,percentage,"Apps CPU System Time (100% = 1 core)",stacked,,apps.plugin, -apps.cpu_guest,,a dimension per app group,percentage,"Apps CPU Guest Time (100% = 1 core)",stacked,,apps.plugin, -apps.mem,,a dimension per app group,MiB,"Apps Real Memory (w/o shared)",stacked,,apps.plugin, -apps.rss,,a dimension per app group,MiB,"Apps Resident Set Size (w/shared)",stacked,,apps.plugin, -apps.vmem,,a dimension per app group,MiB,"Apps Virtual Memory Size",stacked,,apps.plugin, -apps.swap,,a dimension per app group,MiB,"Apps Swap Memory",stacked,,apps.plugin, -apps.major_faults,,a dimension per app group,"page faults/s","Apps Major Page Faults (swap read)",stacked,,apps.plugin, -apps.minor_faults,,a dimension per app group,"page faults/s","Apps Minor Page Faults (swap read)",stacked,,apps.plugin, -apps.preads,,a dimension per app group,"KiB/s","Apps Disk Reads",stacked,,apps.plugin, -apps.pwrites,,a dimension per app group,"KiB/s","Apps Disk Writes",stacked,,apps.plugin, -apps.lreads,,a dimension per app group,"KiB/s","Apps Disk Logical Reads",stacked,,apps.plugin, -apps.lwrites,,a dimension per app group,"KiB/s","Apps I/O Logical Writes",stacked,,apps.plugin, -apps.threads,,a dimension per app group,threads,"Apps Threads",stacked,,apps.plugin, -apps.processes,,a dimension per app group,processes,"Apps Processes",stacked,,apps.plugin, -apps.voluntary_ctxt_switches,,a dimension per app group,processes,"Apps Voluntary Context Switches",stacked,,apps.plugin, -apps.involuntary_ctxt_switches,,a dimension per app group,processes,"Apps Involuntary Context Switches",stacked,,apps.plugin, -apps.uptime,,a dimension per app group,seconds,"Apps Carried Over Uptime",line,,apps.plugin, -apps.uptime_min,,a dimension per app group,seconds,"Apps Minimum Uptime",line,,apps.plugin, -apps.uptime_avg,,a dimension per app group,seconds,"Apps Average Uptime",line,,apps.plugin, -apps.uptime_max,,a dimension per app group,seconds,"Apps Maximum Uptime",line,,apps.plugin, -apps.files,,a dimension per app group,"open files","Apps Open Files",stacked,,apps.plugin, -apps.sockets,,a dimension per app group,"open sockets","Apps Open Sockets",stacked,,apps.plugin, -apps.pipes,,a dimension per app group,"open pipes","Apps Open Pipes",stacked,,apps.plugin, -groups.cpu,,a dimension per user group,percentage,"User Groups CPU Time (100% = 1 core)",stacked,,apps.plugin, -groups.cpu_user,,a dimension per user group,percentage,"User Groups CPU User Time (100% = 1 core)",stacked,,apps.plugin, -groups.cpu_system,,a dimension per user group,percentage,"User Groups CPU System Time (100% = 1 core)",stacked,,apps.plugin, -groups.cpu_guest,,a dimension per user group,percentage,"User Groups CPU Guest Time (100% = 1 core)",stacked,,apps.plugin, -groups.mem,,a dimension per user group,MiB,"User Groups Real Memory (w/o shared)",stacked,,apps.plugin, -groups.rss,,a dimension per user group,MiB,"User Groups Resident Set Size (w/shared)",stacked,,apps.plugin, -groups.vmem,,a dimension per user group,MiB,"User Groups Virtual Memory Size",stacked,,apps.plugin, -groups.swap,,a dimension per user group,MiB,"User Groups Swap Memory",stacked,,apps.plugin, -groups.major_faults,,a dimension per user group,"page faults/s","User Groups Major Page Faults (swap read)",stacked,,apps.plugin, -groups.minor_faults,,a dimension per user group,"page faults/s","User Groups Page Faults (swap read)",stacked,,apps.plugin, -groups.preads,,a dimension per user group,"KiB/s","User Groups Disk Reads",stacked,,apps.plugin, -groups.pwrites,,a dimension per user group,"KiB/s","User Groups Disk Writes",stacked,,apps.plugin, -groups.lreads,,a dimension per user group,"KiB/s","User Groups Disk Logical Reads",stacked,,apps.plugin, -groups.lwrites,,a dimension per user group,"KiB/s","User Groups I/O Logical Writes",stacked,,apps.plugin, -groups.threads,,a dimension per user group,threads,"User Groups Threads",stacked,,apps.plugin, -groups.processes,,a dimension per user group,processes,"User Groups Processes",stacked,,apps.plugin, -groups.voluntary_ctxt_switches,,a dimension per app group,processes,"User Groups Voluntary Context Switches",stacked,,apps.plugin, -groups.involuntary_ctxt_switches,,a dimension per app group,processes,"User Groups Involuntary Context Switches",stacked,,apps.plugin, -groups.uptime,,a dimension per user group,seconds,"User Groups Carried Over Uptime",line,,apps.plugin, -groups.uptime_min,,a dimension per user group,seconds,"User Groups Minimum Uptime",line,,apps.plugin, -groups.uptime_avg,,a dimension per user group,seconds,"User Groups Average Uptime",line,,apps.plugin, -groups.uptime_max,,a dimension per user group,seconds,"User Groups Maximum Uptime",line,,apps.plugin, -groups.files,,a dimension per user group,"open files","User Groups Open Files",stacked,,apps.plugin, -groups.sockets,,a dimension per user group,"open sockets","User Groups Open Sockets",stacked,,apps.plugin, -groups.pipes,,a dimension per user group,"open pipes","User Groups Open Pipes",stacked,,apps.plugin, -users.cpu,,a dimension per user,percentage,"Users CPU Time (100% = 1 core)",stacked,,apps.plugin, -users.cpu_user,,a dimension per user,percentage,"Users CPU User Time (100% = 1 core)",stacked,,apps.plugin, -users.cpu_system,,a dimension per user,percentage,"Users CPU System Time (100% = 1 core)",stacked,,apps.plugin, -users.cpu_guest,,a dimension per user,percentage,"Users CPU Guest Time (100% = 1 core)",stacked,,apps.plugin, -users.mem,,a dimension per user,MiB,"Users Real Memory (w/o shared)",stacked,,apps.plugin, -users.rss,,a dimension per user,MiB,"Users Resident Set Size (w/shared)",stacked,,apps.plugin, -users.vmem,,a dimension per user,MiB,"Users Virtual Memory Size",stacked,,apps.plugin, -users.swap,,a dimension per user,MiB,"Users Swap Memory",stacked,,apps.plugin, -users.major_faults,,a dimension per user,"page faults/s","Users Major Page Faults (swap read)",stacked,,apps.plugin, -users.minor_faults,,a dimension per user,"page faults/s","Users Page Faults (swap read)",stacked,,apps.plugin, -users.preads,,a dimension per user,"KiB/s","Users Disk Reads",stacked,,apps.plugin, -users.pwrites,,a dimension per user,"KiB/s","Users Disk Writes",stacked,,apps.plugin, -users.lreads,,a dimension per user,"KiB/s","Users Disk Logical Reads",stacked,,apps.plugin, -users.lwrites,,a dimension per user,"KiB/s","Users I/O Logical Writes",stacked,,apps.plugin, -users.threads,,a dimension per user,threads,"Users Threads",stacked,,apps.plugin, -users.processes,,a dimension per user,processes,"Users Processes",stacked,,apps.plugin, -users.voluntary_ctxt_switches,,a dimension per app group,processes,"Users Voluntary Context Switches",stacked,,apps.plugin, -users.involuntary_ctxt_switches,,a dimension per app group,processes,"Users Involuntary Context Switches",stacked,,apps.plugin, -users.uptime,,a dimension per user,seconds,"Users Carried Over Uptime",line,,apps.plugin, -users.uptime_min,,a dimension per user,seconds,"Users Minimum Uptime",line,,apps.plugin, -users.uptime_avg,,a dimension per user,seconds,"Users Average Uptime",line,,apps.plugin, -users.uptime_max,,a dimension per user,seconds,"Users Maximum Uptime",line,,apps.plugin, -users.files,,a dimension per user,"open files","Users Open Files",stacked,,apps.plugin, -users.sockets,,a dimension per user,"open sockets","Users Open Sockets",stacked,,apps.plugin, -users.pipes,,a dimension per user,"open pipes","Users Open Pipes",stacked,,apps.plugin, -netdata.apps_cpu,,"user, system",milliseconds/s,"Apps Plugin CPU",stacked,,apps.plugin, -netdata.apps_sizes,,"calls, files, filenames, inode_changes, link_changes, pids, fds, targets, new_pids",files/s,"Apps Plugin Files",line,,apps.plugin, -netdata.apps_fix,,"utime, stime, gtime, minflt, majflt",percentage,"Apps Plugin Normalization Ratios",line,,apps.plugin, -netdata.apps_children_fix,,"utime, stime, gtime, minflt, majflt",percentage,"Apps Plugin Exited Children Normalization Ratios",line,,apps.plugin,
\ No newline at end of file +system.processes_state,,"running, sleeping_interruptible, sleeping_uninterruptible, zombie, stopped",processes,"System Processes State",line,,apps.plugin,system +apps.cpu,,a dimension per app group,percentage,"Apps CPU Time (100% = 1 core)",stacked,,apps.plugin,apps +apps.cpu_user,,a dimension per app group,percentage,"Apps CPU User Time (100% = 1 core)",stacked,,apps.plugin,apps +apps.cpu_system,,a dimension per app group,percentage,"Apps CPU System Time (100% = 1 core)",stacked,,apps.plugin,apps +apps.cpu_guest,,a dimension per app group,percentage,"Apps CPU Guest Time (100% = 1 core)",stacked,,apps.plugin,apps +apps.mem,,a dimension per app group,MiB,"Apps Real Memory (w/o shared)",stacked,,apps.plugin,apps +apps.rss,,a dimension per app group,MiB,"Apps Resident Set Size (w/shared)",stacked,,apps.plugin,apps +apps.vmem,,a dimension per app group,MiB,"Apps Virtual Memory Size",stacked,,apps.plugin,apps +apps.swap,,a dimension per app group,MiB,"Apps Swap Memory",stacked,,apps.plugin,apps +apps.major_faults,,a dimension per app group,"page faults/s","Apps Major Page Faults (swap read)",stacked,,apps.plugin,apps +apps.minor_faults,,a dimension per app group,"page faults/s","Apps Minor Page Faults (swap read)",stacked,,apps.plugin,apps +apps.preads,,a dimension per app group,"KiB/s","Apps Disk Reads",stacked,,apps.plugin,apps +apps.pwrites,,a dimension per app group,"KiB/s","Apps Disk Writes",stacked,,apps.plugin,apps +apps.lreads,,a dimension per app group,"KiB/s","Apps Disk Logical Reads",stacked,,apps.plugin,apps +apps.lwrites,,a dimension per app group,"KiB/s","Apps I/O Logical Writes",stacked,,apps.plugin,apps +apps.threads,,a dimension per app group,threads,"Apps Threads",stacked,,apps.plugin,apps +apps.processes,,a dimension per app group,processes,"Apps Processes",stacked,,apps.plugin,apps +apps.voluntary_ctxt_switches,,a dimension per app group,processes,"Apps Voluntary Context Switches",stacked,,apps.plugin,apps +apps.involuntary_ctxt_switches,,a dimension per app group,processes,"Apps Involuntary Context Switches",stacked,,apps.plugin,apps +apps.uptime,,a dimension per app group,seconds,"Apps Carried Over Uptime",line,,apps.plugin,apps +apps.uptime_min,,a dimension per app group,seconds,"Apps Minimum Uptime",line,,apps.plugin,apps +apps.uptime_avg,,a dimension per app group,seconds,"Apps Average Uptime",line,,apps.plugin,apps +apps.uptime_max,,a dimension per app group,seconds,"Apps Maximum Uptime",line,,apps.plugin,apps +apps.files,,a dimension per app group,"open files","Apps Open Files",stacked,,apps.plugin,apps +apps.sockets,,a dimension per app group,"open sockets","Apps Open Sockets",stacked,,apps.plugin,apps +apps.pipes,,a dimension per app group,"open pipes","Apps Open Pipes",stacked,,apps.plugin,apps +groups.cpu,,a dimension per user group,percentage,"User Groups CPU Time (100% = 1 core)",stacked,,apps.plugin,groups +groups.cpu_user,,a dimension per user group,percentage,"User Groups CPU User Time (100% = 1 core)",stacked,,apps.plugin,groups +groups.cpu_system,,a dimension per user group,percentage,"User Groups CPU System Time (100% = 1 core)",stacked,,apps.plugin,groups +groups.cpu_guest,,a dimension per user group,percentage,"User Groups CPU Guest Time (100% = 1 core)",stacked,,apps.plugin,groups +groups.mem,,a dimension per user group,MiB,"User Groups Real Memory (w/o shared)",stacked,,apps.plugin,groups +groups.rss,,a dimension per user group,MiB,"User Groups Resident Set Size (w/shared)",stacked,,apps.plugin,groups +groups.vmem,,a dimension per user group,MiB,"User Groups Virtual Memory Size",stacked,,apps.plugin,groups +groups.swap,,a dimension per user group,MiB,"User Groups Swap Memory",stacked,,apps.plugin,groups +groups.major_faults,,a dimension per user group,"page faults/s","User Groups Major Page Faults (swap read)",stacked,,apps.plugin,groups +groups.minor_faults,,a dimension per user group,"page faults/s","User Groups Page Faults (swap read)",stacked,,apps.plugin,groups +groups.preads,,a dimension per user group,"KiB/s","User Groups Disk Reads",stacked,,apps.plugin,groups +groups.pwrites,,a dimension per user group,"KiB/s","User Groups Disk Writes",stacked,,apps.plugin,groups +groups.lreads,,a dimension per user group,"KiB/s","User Groups Disk Logical Reads",stacked,,apps.plugin,groups +groups.lwrites,,a dimension per user group,"KiB/s","User Groups I/O Logical Writes",stacked,,apps.plugin,groups +groups.threads,,a dimension per user group,threads,"User Groups Threads",stacked,,apps.plugin,groups +groups.processes,,a dimension per user group,processes,"User Groups Processes",stacked,,apps.plugin,groups +groups.voluntary_ctxt_switches,,a dimension per app group,processes,"User Groups Voluntary Context Switches",stacked,,apps.plugin,groups +groups.involuntary_ctxt_switches,,a dimension per app group,processes,"User Groups Involuntary Context Switches",stacked,,apps.plugin,groups +groups.uptime,,a dimension per user group,seconds,"User Groups Carried Over Uptime",line,,apps.plugin,groups +groups.uptime_min,,a dimension per user group,seconds,"User Groups Minimum Uptime",line,,apps.plugin,groups +groups.uptime_avg,,a dimension per user group,seconds,"User Groups Average Uptime",line,,apps.plugin,groups +groups.uptime_max,,a dimension per user group,seconds,"User Groups Maximum Uptime",line,,apps.plugin,groups +groups.files,,a dimension per user group,"open files","User Groups Open Files",stacked,,apps.plugin,groups +groups.sockets,,a dimension per user group,"open sockets","User Groups Open Sockets",stacked,,apps.plugin,groups +groups.pipes,,a dimension per user group,"open pipes","User Groups Open Pipes",stacked,,apps.plugin,groups +users.cpu,,a dimension per user,percentage,"Users CPU Time (100% = 1 core)",stacked,,apps.plugin,users +users.cpu_user,,a dimension per user,percentage,"Users CPU User Time (100% = 1 core)",stacked,,apps.plugin,users +users.cpu_system,,a dimension per user,percentage,"Users CPU System Time (100% = 1 core)",stacked,,apps.plugin,users +users.cpu_guest,,a dimension per user,percentage,"Users CPU Guest Time (100% = 1 core)",stacked,,apps.plugin,users +users.mem,,a dimension per user,MiB,"Users Real Memory (w/o shared)",stacked,,apps.plugin,users +users.rss,,a dimension per user,MiB,"Users Resident Set Size (w/shared)",stacked,,apps.plugin,users +users.vmem,,a dimension per user,MiB,"Users Virtual Memory Size",stacked,,apps.plugin,users +users.swap,,a dimension per user,MiB,"Users Swap Memory",stacked,,apps.plugin,users +users.major_faults,,a dimension per user,"page faults/s","Users Major Page Faults (swap read)",stacked,,apps.plugin,users +users.minor_faults,,a dimension per user,"page faults/s","Users Page Faults (swap read)",stacked,,apps.plugin,users +users.preads,,a dimension per user,"KiB/s","Users Disk Reads",stacked,,apps.plugin,users +users.pwrites,,a dimension per user,"KiB/s","Users Disk Writes",stacked,,apps.plugin,users +users.lreads,,a dimension per user,"KiB/s","Users Disk Logical Reads",stacked,,apps.plugin,users +users.lwrites,,a dimension per user,"KiB/s","Users I/O Logical Writes",stacked,,apps.plugin,users +users.threads,,a dimension per user,threads,"Users Threads",stacked,,apps.plugin,users +users.processes,,a dimension per user,processes,"Users Processes",stacked,,apps.plugin,users +users.voluntary_ctxt_switches,,a dimension per app group,processes,"Users Voluntary Context Switches",stacked,,apps.plugin,users +users.involuntary_ctxt_switches,,a dimension per app group,processes,"Users Involuntary Context Switches",stacked,,apps.plugin,users +users.uptime,,a dimension per user,seconds,"Users Carried Over Uptime",line,,apps.plugin,users +users.uptime_min,,a dimension per user,seconds,"Users Minimum Uptime",line,,apps.plugin,users +users.uptime_avg,,a dimension per user,seconds,"Users Average Uptime",line,,apps.plugin,users +users.uptime_max,,a dimension per user,seconds,"Users Maximum Uptime",line,,apps.plugin,users +users.files,,a dimension per user,"open files","Users Open Files",stacked,,apps.plugin,users +users.sockets,,a dimension per user,"open sockets","Users Open Sockets",stacked,,apps.plugin,users +users.pipes,,a dimension per user,"open pipes","Users Open Pipes",stacked,,apps.plugin,users +netdata.apps_cpu,,"user, system",milliseconds/s,"Apps Plugin CPU",stacked,,apps.plugin,netdata +netdata.apps_sizes,,"calls, files, filenames, inode_changes, link_changes, pids, fds, targets, new_pids",files/s,"Apps Plugin Files",line,,apps.plugin,netdata +netdata.apps_fix,,"utime, stime, gtime, minflt, majflt",percentage,"Apps Plugin Normalization Ratios",line,,apps.plugin,netdata +netdata.apps_children_fix,,"utime, stime, gtime, minflt, majflt",percentage,"Apps Plugin Exited Children Normalization Ratios",line,,apps.plugin,netdata
\ No newline at end of file diff --git a/collectors/apps.plugin/multi_metadata.yaml b/collectors/apps.plugin/multi_metadata.yaml new file mode 100644 index 00000000..2bdb3dbf --- /dev/null +++ b/collectors/apps.plugin/multi_metadata.yaml @@ -0,0 +1,662 @@ +name: apps.plugin +modules: + # removed system.processes_state + - meta: + plugin_name: apps.plugin + module_name: apps + monitored_instance: + name: Applications + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - applications + - processes + - os + - host monitoring + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: apps.cpu + description: Apps CPU Time (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.cpu_user + description: Apps CPU User Time (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.cpu_system + description: Apps CPU System Time (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.cpu_guest + description: Apps CPU Guest Time (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.mem + description: Apps Real Memory (w/o shared) + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.rss + description: Apps Resident Set Size (w/shared) + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vmem + description: Apps Virtual Memory Size + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.swap + description: Apps Swap Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.major_faults + description: Apps Major Page Faults (swap read) + unit: "page faults/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.minor_faults + description: Apps Minor Page Faults (swap read) + unit: "page faults/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.preads + description: Apps Disk Reads + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.pwrites + description: Apps Disk Writes + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.lreads + description: Apps Disk Logical Reads + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.lwrites + description: Apps I/O Logical Writes + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.threads + description: Apps Threads + unit: "threads" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.processes + description: Apps Processes + unit: "processes" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.voluntary_ctxt_switches + description: Apps Voluntary Context Switches + unit: "processes" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.involuntary_ctxt_switches + description: Apps Involuntary Context Switches + unit: "processes" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.uptime + description: Apps Carried Over Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per app group + - name: apps.uptime_min + description: Apps Minimum Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per app group + - name: apps.uptime_avg + description: Apps Average Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per app group + - name: apps.uptime_max + description: Apps Maximum Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per app group + - name: apps.files + description: Apps Open Files + unit: "open files" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.sockets + description: Apps Open Sockets + unit: "open sockets" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.pipes + description: Apps Open Pipes + unit: "open pipes" + chart_type: stacked + dimensions: + - name: a dimension per app group + - meta: + plugin_name: apps.plugin + module_name: groups + monitored_instance: + name: User Groups + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - groups + - processes + - user auditing + - authorization + - os + - host monitoring + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: groups.cpu + description: User Groups CPU Time (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.cpu_user + description: User Groups CPU User Time (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.cpu_system + description: User Groups CPU System Time (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.cpu_guest + description: User Groups CPU Guest Time (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.mem + description: User Groups Real Memory (w/o shared) + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.rss + description: User Groups Resident Set Size (w/shared) + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.vmem + description: User Groups Virtual Memory Size + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.swap + description: User Groups Swap Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.major_faults + description: User Groups Major Page Faults (swap read) + unit: "page faults/s" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.minor_faults + description: User Groups Page Faults (swap read) + unit: "page faults/s" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.preads + description: User Groups Disk Reads + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.pwrites + description: User Groups Disk Writes + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.lreads + description: User Groups Disk Logical Reads + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.lwrites + description: User Groups I/O Logical Writes + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.threads + description: User Groups Threads + unit: "threads" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.processes + description: User Groups Processes + unit: "processes" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.voluntary_ctxt_switches + description: User Groups Voluntary Context Switches + unit: "processes" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: groups.involuntary_ctxt_switches + description: User Groups Involuntary Context Switches + unit: "processes" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: groups.uptime + description: User Groups Carried Over Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per user group + - name: groups.uptime_min + description: User Groups Minimum Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per user group + - name: groups.uptime_avg + description: User Groups Average Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per user group + - name: groups.uptime_max + description: User Groups Maximum Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per user group + - name: groups.files + description: User Groups Open Files + unit: "open files" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.sockets + description: User Groups Open Sockets + unit: "open sockets" + chart_type: stacked + dimensions: + - name: a dimension per user group + - name: groups.pipes + description: User Groups Open Pipes + unit: "open pipes" + chart_type: stacked + dimensions: + - name: a dimension per user group + - meta: + plugin_name: apps.plugin + module_name: users + monitored_instance: + name: Users + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: + - users + - processes + - os + - host monitoring + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: users.cpu + description: Users CPU Time (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.cpu_user + description: Users CPU User Time (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.cpu_system + description: Users CPU System Time (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.cpu_guest + description: Users CPU Guest Time (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.mem + description: Users Real Memory (w/o shared) + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.rss + description: Users Resident Set Size (w/shared) + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.vmem + description: Users Virtual Memory Size + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.swap + description: Users Swap Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.major_faults + description: Users Major Page Faults (swap read) + unit: "page faults/s" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.minor_faults + description: Users Page Faults (swap read) + unit: "page faults/s" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.preads + description: Users Disk Reads + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.pwrites + description: Users Disk Writes + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.lreads + description: Users Disk Logical Reads + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.lwrites + description: Users I/O Logical Writes + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.threads + description: Users Threads + unit: "threads" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.processes + description: Users Processes + unit: "processes" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.voluntary_ctxt_switches + description: Users Voluntary Context Switches + unit: "processes" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: users.involuntary_ctxt_switches + description: Users Involuntary Context Switches + unit: "processes" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: users.uptime + description: Users Carried Over Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per user + - name: users.uptime_min + description: Users Minimum Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per user + - name: users.uptime_avg + description: Users Average Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per user + - name: users.uptime_max + description: Users Maximum Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per user + - name: users.files + description: Users Open Files + unit: "open files" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.sockets + description: Users Open Sockets + unit: "open sockets" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: users.pipes + description: Users Open Pipes + unit: "open pipes" + chart_type: stacked + dimensions: + - name: a dimension per user diff --git a/collectors/cgroups.plugin/cgroup-name.sh b/collectors/cgroups.plugin/cgroup-name.sh index 9a5812f3..6edd9d9f 100755 --- a/collectors/cgroups.plugin/cgroup-name.sh +++ b/collectors/cgroups.plugin/cgroup-name.sh @@ -132,6 +132,22 @@ function add_lbl_prefix() { echo "${new_labels:0:-1}" # trim last ',' } +function remove_lbl() { + local orig_labels lbl_name + orig_labels="${1}" + lbl_name="${2}" + + IFS=, read -ra labels <<< "$orig_labels" + + local new_labels + for l in "${labels[@]}"; do + IFS="=" read -r lname lval <<< "$l" + [ "$lbl_name" != "$lname" ] && new_labels+="${l}," + done + + echo "${new_labels:0:-1}" # trim last ',' +} + function k8s_is_pause_container() { local cgroup_path="${1}" @@ -385,6 +401,8 @@ function k8s_get_kubepod_name() { name+="_$(get_lbl_val "$labels" namespace)" name+="_$(get_lbl_val "$labels" pod_name)" name+="_$(get_lbl_val "$labels" container_name)" + labels=$(remove_lbl "$labels" "container_id") + labels=$(remove_lbl "$labels" "pod_uid") labels=$(add_lbl_prefix "$labels" "k8s_") name+=" $labels" else @@ -400,6 +418,7 @@ function k8s_get_kubepod_name() { name="pod" name+="_$(get_lbl_val "$labels" namespace)" name+="_$(get_lbl_val "$labels" pod_name)" + labels=$(remove_lbl "$labels" "pod_uid") labels=$(add_lbl_prefix "$labels" "k8s_") name+=" $labels" else diff --git a/collectors/cgroups.plugin/multi_metadata.yaml b/collectors/cgroups.plugin/multi_metadata.yaml new file mode 100644 index 00000000..b2b13c2d --- /dev/null +++ b/collectors/cgroups.plugin/multi_metadata.yaml @@ -0,0 +1,1023 @@ +name: cgroups.plugin +modules: + - &module + meta: &meta + plugin_name: cgroups.plugin + module_name: /sys/fs/cgroup + monitored_instance: + name: Containers + link: "" + categories: + - data-collection.containers-and-vms + icon_filename: netdata.png + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - containers + most_popular: true + overview: + data_collection: + metrics_description: "" + method_description: "" + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: "" + description: "" + options: + description: "" + folding: + title: "" + enabled: true + list: [] + examples: + folding: + enabled: true + title: "" + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: cgroup_10min_cpu_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf + metric: cgroup.cpu_limit + info: average cgroup CPU utilization over the last 10 minutes + - name: cgroup_ram_in_use + link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf + metric: cgroup.mem_usage + info: cgroup memory utilization + - name: cgroup_1m_received_packets_rate + link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf + metric: cgroup.net_packets + info: average number of packets received by the network interface ${label:device} over the last minute + - name: cgroup_10s_received_packets_storm + link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf + metric: cgroup.net_packets + info: + ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over + the last minute + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "" + labels: + - name: container_name + description: TBD + - name: image + description: TBD + metrics: + - name: cgroup.cpu_limit + description: CPU Usage within the limits + unit: "percentage" + chart_type: line + dimensions: + - name: used + - name: cgroup.cpu + description: CPU Usage (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: user + - name: system + - name: cgroup.cpu_per_core + description: CPU Usage (100% = 1 core) Per Core + unit: "percentage" + chart_type: line + dimensions: + - name: a dimension per core + - name: cgroup.throttled + description: CPU Throttled Runnable Periods + unit: "percentage" + chart_type: line + dimensions: + - name: throttled + - name: cgroup.throttled_duration + description: CPU Throttled Time Duration + unit: "ms" + chart_type: line + dimensions: + - name: duration + - name: cgroup.cpu_shares + description: CPU Time Relative Share + unit: "shares" + chart_type: line + dimensions: + - name: shares + - name: cgroup.mem + description: Memory Usage + unit: "MiB" + chart_type: stacked + dimensions: + - name: cache + - name: rss + - name: swap + - name: rss_huge + - name: mapped_file + - name: cgroup.writeback + description: Writeback Memory + unit: "MiB" + chart_type: area + dimensions: + - name: dirty + - name: writeback + - name: cgroup.mem_activity + description: Memory Activity + unit: "MiB/s" + chart_type: line + dimensions: + - name: in + - name: out + - name: cgroup.pgfaults + description: Memory Page Faults + unit: "MiB/s" + chart_type: line + dimensions: + - name: pgfault + - name: swap + - name: cgroup.mem_usage + description: Used Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: ram + - name: swap + - name: cgroup.mem_usage_limit + description: Used RAM within the limits + unit: "MiB" + chart_type: stacked + dimensions: + - name: available + - name: used + - name: cgroup.mem_utilization + description: Memory Utilization + unit: "percentage" + chart_type: line + dimensions: + - name: utilization + - name: cgroup.mem_failcnt + description: Memory Limit Failures + unit: "count" + chart_type: line + dimensions: + - name: failures + - name: cgroup.io + description: I/O Bandwidth (all disks) + unit: "KiB/s" + chart_type: area + dimensions: + - name: read + - name: write + - name: cgroup.serviced_ops + description: Serviced I/O Operations (all disks) + unit: "operations/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: cgroup.throttle_io + description: Throttle I/O Bandwidth (all disks) + unit: "KiB/s" + chart_type: area + dimensions: + - name: read + - name: write + - name: cgroup.throttle_serviced_ops + description: Throttle Serviced I/O Operations (all disks) + unit: "operations/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: cgroup.queued_ops + description: Queued I/O Operations (all disks) + unit: "operations" + chart_type: line + dimensions: + - name: read + - name: write + - name: cgroup.merged_ops + description: Merged I/O Operations (all disks) + unit: "operations/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: cgroup.cpu_some_pressure + description: CPU some pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: cgroup.cpu_some_pressure_stall_time + description: CPU some pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: cgroup.cpu_full_pressure + description: CPU full pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: cgroup.cpu_full_pressure_stall_time + description: CPU full pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: cgroup.memory_some_pressure + description: Memory some pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: cgroup.memory_some_pressure_stall_time + description: Memory some pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: cgroup.memory_full_pressure + description: Memory full pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: cgroup.memory_full_pressure_stall_time + description: Memory full pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: cgroup.io_some_pressure + description: I/O some pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: cgroup.io_some_pressure_stall_time + description: I/O some pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: cgroup.io_full_pressure + description: I/O some pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: cgroup.io_full_pressure_stall_time + description: I/O some pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: cgroup network device + description: "" + labels: + - name: container_name + description: TBD + - name: image + description: TBD + - name: device + description: TBD + - name: interface_type + description: TBD + metrics: + - name: cgroup.net_net + description: Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: cgroup.net_packets + description: Packets + unit: "pps" + chart_type: line + dimensions: + - name: received + - name: sent + - name: multicast + - name: cgroup.net_errors + description: Interface Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: inbound + - name: outbound + - name: cgroup.net_drops + description: Interface Drops + unit: "errors/s" + chart_type: line + dimensions: + - name: inbound + - name: outbound + - name: cgroup.net_fifo + description: Interface FIFO Buffer Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: receive + - name: transmit + - name: cgroup.net_compressed + description: Interface FIFO Buffer Errors + unit: "pps" + chart_type: line + dimensions: + - name: receive + - name: sent + - name: cgroup.net_events + description: Network Interface Events + unit: "events/s" + chart_type: line + dimensions: + - name: frames + - name: collisions + - name: carrier + - name: cgroup.net_operstate + description: Interface Operational State + unit: "state" + chart_type: line + dimensions: + - name: up + - name: down + - name: notpresent + - name: lowerlayerdown + - name: testing + - name: dormant + - name: unknown + - name: cgroup.net_carrier + description: Interface Physical Link State + unit: "state" + chart_type: line + dimensions: + - name: up + - name: down + - name: cgroup.net_mtu + description: Interface MTU + unit: "octets" + chart_type: line + dimensions: + - name: mtu + + - <<: *module + meta: + <<: *meta + monitored_instance: + name: Kubernetes Containers + link: https://kubernetes.io/ + icon_filename: k8s.png + categories: + - data-collection.containers-vms + - data-collection.kubernetes + keywords: + - k8s + - kubernetes + - pods + - containers + alerts: + - name: k8s_cgroup_10min_cpu_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf + metric: k8s.cgroup.cpu_limit + info: average cgroup CPU utilization over the last 10 minutes + - name: k8s_cgroup_ram_in_use + link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf + metric: k8s.cgroup.mem_usage + info: cgroup memory utilization + - name: k8s_cgroup_1m_received_packets_rate + link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf + metric: k8s.cgroup.net_packets + info: average number of packets received by the network interface ${label:device} over the last minute + - name: k8s_cgroup_10s_received_packets_storm + link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf + metric: k8s.cgroup.net_packets + info: + ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over + the last minute + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: k8s cgroup + description: "" + labels: + - name: k8s_namespace + description: TBD + - name: k8s_pod_name + description: TBD + - name: k8s_pod_uid + description: TBD + - name: k8s_controller_kind + description: TBD + - name: k8s_controller_name + description: TBD + - name: k8s_node_name + description: TBD + - name: k8s_container_name + description: TBD + - name: k8s_container_id + description: TBD + - name: k8s_kind + description: TBD + - name: k8s_qos_class + description: TBD + - name: k8s_cluster_id + description: TBD + metrics: + - name: k8s.cgroup.cpu_limit + description: CPU Usage within the limits + unit: "percentage" + chart_type: line + dimensions: + - name: used + - name: k8s.cgroup.cpu + description: CPU Usage (100% = 1000 mCPU) + unit: "percentage" + chart_type: stacked + dimensions: + - name: user + - name: system + - name: k8s.cgroup.cpu_per_core + description: CPU Usage (100% = 1000 mCPU) Per Core + unit: "percentage" + chart_type: line + dimensions: + - name: a dimension per core + - name: k8s.cgroup.throttled + description: CPU Throttled Runnable Periods + unit: "percentage" + chart_type: line + dimensions: + - name: throttled + - name: k8s.cgroup.throttled_duration + description: CPU Throttled Time Duration + unit: "ms" + chart_type: line + dimensions: + - name: duration + - name: k8s.cgroup.cpu_shares + description: CPU Time Relative Share + unit: "shares" + chart_type: line + dimensions: + - name: shares + - name: k8s.cgroup.mem + description: Memory Usage + unit: "MiB" + chart_type: stacked + dimensions: + - name: cache + - name: rss + - name: swap + - name: rss_huge + - name: mapped_file + - name: k8s.cgroup.writeback + description: Writeback Memory + unit: "MiB" + chart_type: area + dimensions: + - name: dirty + - name: writeback + - name: k8s.cgroup.mem_activity + description: Memory Activity + unit: "MiB/s" + chart_type: line + dimensions: + - name: in + - name: out + - name: k8s.cgroup.pgfaults + description: Memory Page Faults + unit: "MiB/s" + chart_type: line + dimensions: + - name: pgfault + - name: swap + - name: k8s.cgroup.mem_usage + description: Used Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: ram + - name: swap + - name: k8s.cgroup.mem_usage_limit + description: Used RAM within the limits + unit: "MiB" + chart_type: stacked + dimensions: + - name: available + - name: used + - name: k8s.cgroup.mem_utilization + description: Memory Utilization + unit: "percentage" + chart_type: line + dimensions: + - name: utilization + - name: k8s.cgroup.mem_failcnt + description: Memory Limit Failures + unit: "count" + chart_type: line + dimensions: + - name: failures + - name: k8s.cgroup.io + description: I/O Bandwidth (all disks) + unit: "KiB/s" + chart_type: area + dimensions: + - name: read + - name: write + - name: k8s.cgroup.serviced_ops + description: Serviced I/O Operations (all disks) + unit: "operations/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: k8s.cgroup.throttle_io + description: Throttle I/O Bandwidth (all disks) + unit: "KiB/s" + chart_type: area + dimensions: + - name: read + - name: write + - name: k8s.cgroup.throttle_serviced_ops + description: Throttle Serviced I/O Operations (all disks) + unit: "operations/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: k8s.cgroup.queued_ops + description: Queued I/O Operations (all disks) + unit: "operations" + chart_type: line + dimensions: + - name: read + - name: write + - name: k8s.cgroup.merged_ops + description: Merged I/O Operations (all disks) + unit: "operations/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: k8s.cgroup.cpu_some_pressure + description: CPU some pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: k8s.cgroup.cpu_some_pressure_stall_time + description: CPU some pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: k8s.cgroup.cpu_full_pressure + description: CPU full pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: k8s.cgroup.cpu_full_pressure_stall_time + description: CPU full pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: k8s.cgroup.memory_some_pressure + description: Memory some pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: k8s.cgroup.memory_some_pressure_stall_time + description: Memory some pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: k8s.cgroup.memory_full_pressure + description: Memory full pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: k8s.cgroup.memory_full_pressure_stall_time + description: Memory full pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: k8s.cgroup.io_some_pressure + description: I/O some pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: k8s.cgroup.io_some_pressure_stall_time + description: I/O some pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: k8s.cgroup.io_full_pressure + description: I/O some pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: k8s.cgroup.io_full_pressure_stall_time + description: I/O some pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: k8s cgroup network device + description: "" + labels: + - name: device + description: TBD + - name: interface_type + description: TBD + - name: k8s_namespace + description: TBD + - name: k8s_pod_name + description: TBD + - name: k8s_pod_uid + description: TBD + - name: k8s_controller_kind + description: TBD + - name: k8s_controller_name + description: TBD + - name: k8s_node_name + description: TBD + - name: k8s_container_name + description: TBD + - name: k8s_container_id + description: TBD + - name: k8s_kind + description: TBD + - name: k8s_qos_class + description: TBD + - name: k8s_cluster_id + description: TBD + metrics: + - name: k8s.cgroup.net_net + description: Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: k8s.cgroup.net_packets + description: Packets + unit: "pps" + chart_type: line + dimensions: + - name: received + - name: sent + - name: multicast + - name: k8s.cgroup.net_errors + description: Interface Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: inbound + - name: outbound + - name: k8s.cgroup.net_drops + description: Interface Drops + unit: "errors/s" + chart_type: line + dimensions: + - name: inbound + - name: outbound + - name: k8s.cgroup.net_fifo + description: Interface FIFO Buffer Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: receive + - name: transmit + - name: k8s.cgroup.net_compressed + description: Interface FIFO Buffer Errors + unit: "pps" + chart_type: line + dimensions: + - name: receive + - name: sent + - name: k8s.cgroup.net_events + description: Network Interface Events + unit: "events/s" + chart_type: line + dimensions: + - name: frames + - name: collisions + - name: carrier + - name: k8s.cgroup.net_operstate + description: Interface Operational State + unit: "state" + chart_type: line + dimensions: + - name: up + - name: down + - name: notpresent + - name: lowerlayerdown + - name: testing + - name: dormant + - name: unknown + - name: k8s.cgroup.net_carrier + description: Interface Physical Link State + unit: "state" + chart_type: line + dimensions: + - name: up + - name: down + - name: k8s.cgroup.net_mtu + description: Interface MTU + unit: "octets" + chart_type: line + dimensions: + - name: mtu + + - <<: *module + meta: + <<: *meta + monitored_instance: + name: Systemd Services + link: "" + icon_filename: systemd.png + categories: + - data-collection.systemd + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: services.cpu + description: Systemd Services CPU utilization (100% = 1 core) + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.mem_usage + description: Systemd Services Used Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.mem_rss + description: Systemd Services RSS Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.mem_mapped + description: Systemd Services Mapped Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.mem_cache + description: Systemd Services Cache Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.mem_writeback + description: Systemd Services Writeback Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.mem_pgfault + description: Systemd Services Memory Minor Page Faults + unit: "MiB/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.mem_pgmajfault + description: Systemd Services Memory Major Page Faults + unit: "MiB/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.mem_pgpgin + description: Systemd Services Memory Charging Activity + unit: "MiB/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.mem_pgpgout + description: Systemd Services Memory Uncharging Activity + unit: "MiB/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.mem_failcnt + description: Systemd Services Memory Limit Failures + unit: "failures" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.swap_usage + description: Systemd Services Swap Memory Used + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.io_read + description: Systemd Services Disk Read Bandwidth + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.io_write + description: Systemd Services Disk Write Bandwidth + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.io_ops_read + description: Systemd Services Disk Read Operations + unit: "operations/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.io_ops_write + description: Systemd Services Disk Write Operations + unit: "operations/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.throttle_io_read + description: Systemd Services Throttle Disk Read Bandwidth + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.services.throttle_io_write + description: Systemd Services Throttle Disk Write Bandwidth + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.throttle_io_ops_read + description: Systemd Services Throttle Disk Read Operations + unit: "operations/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: throttle_io_ops_write + description: Systemd Services Throttle Disk Write Operations + unit: "operations/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.queued_io_ops_read + description: Systemd Services Queued Disk Read Operations + unit: "operations/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.queued_io_ops_write + description: Systemd Services Queued Disk Write Operations + unit: "operations/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.merged_io_ops_read + description: Systemd Services Merged Disk Read Operations + unit: "operations/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.merged_io_ops_write + description: Systemd Services Merged Disk Write Operations + unit: "operations/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - <<: *module + meta: + <<: *meta + monitored_instance: + name: Virtual Machines + link: "" + icon_filename: k8s.png + categories: + - data-collection.containers-vms + - data-collection.kubernetes + keywords: + - vms + - virtualization + - container + - <<: *module + meta: + <<: *meta + monitored_instance: + name: LXC Containers + link: "" + icon_filename: lxc.png + categories: + - data-collection.containers-vms + keywords: + - lxc + - lxd + - container + - <<: *module + meta: + <<: *meta + monitored_instance: + name: Libvirt Containers + link: "" + icon_filename: libvirt.png + categories: + - data-collection.containers-vms + keywords: + - libvirt + - container + - <<: *module + meta: + <<: *meta + monitored_instance: + name: oVirt Containers + link: "" + icon_filename: ovirt.png + categories: + - data-collection.containers-vms + keywords: + - ovirt + - container + - <<: *module + meta: + <<: *meta + monitored_instance: + name: Proxmox Containers + link: "" + icon_filename: proxmox.png + categories: + - data-collection.containers-vms + keywords: + - proxmox + - container diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index d9049b2f..fb805e63 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -427,10 +427,12 @@ void read_cgroup_plugin_configuration() { //TODO: can there be more than 1 cgroup2 mount point? mi = mountinfo_find_by_filesystem_super_option(root, "cgroup2", "rw"); //there is no cgroup2 specific super option - for now use 'rw' option - if(mi) debug(D_CGROUP, "found unified cgroup root using super options, with path: '%s'", mi->mount_point); + if(mi) + netdata_log_debug(D_CGROUP, "found unified cgroup root using super options, with path: '%s'", mi->mount_point); if(!mi) { mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup2", "cgroup"); - if(mi) debug(D_CGROUP, "found unified cgroup root using mountsource info, with path: '%s'", mi->mount_point); + if(mi) + netdata_log_debug(D_CGROUP, "found unified cgroup root using mountsource info, with path: '%s'", mi->mount_point); } if(!mi) { collector_error("CGROUP: cannot find cgroup2 mountinfo. Assuming default: /sys/fs/cgroup"); @@ -439,7 +441,7 @@ void read_cgroup_plugin_configuration() { else s = mi->mount_point; set_cgroup_base_path(filename, s); cgroup_unified_base = config_get("plugin:cgroups", "path to unified cgroups", filename); - debug(D_CGROUP, "using cgroup root: '%s'", cgroup_unified_base); + netdata_log_debug(D_CGROUP, "using cgroup root: '%s'", cgroup_unified_base); } cgroup_root_max = (int)config_get_number("plugin:cgroups", "max cgroups to allow", cgroup_root_max); @@ -982,13 +984,13 @@ static int k8s_get_container_first_proc_comm(const char *id, char *comm) { ff = procfile_reopen(ff, filename, NULL, CGROUP_PROCFILE_FLAG); if (unlikely(!ff)) { - debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); + netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); return 1; } ff = procfile_readall(ff); if (unlikely(!ff)) { - debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); + netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); return 1; } @@ -1006,13 +1008,13 @@ static int k8s_get_container_first_proc_comm(const char *id, char *comm) { ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT); if (unlikely(!ff)) { - debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); + netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename); return 1; } ff = procfile_readall(ff); if (unlikely(!ff)) { - debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); + netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename); return 1; } @@ -1612,7 +1614,7 @@ memory_next: } static inline void read_cgroup(struct cgroup *cg) { - debug(D_CGROUP, "reading metrics for cgroups '%s'", cg->id); + netdata_log_debug(D_CGROUP, "reading metrics for cgroups '%s'", cg->id); if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { cgroup_read_cpuacct_stat(&cg->cpuacct_stat); cgroup_read_cpuacct_usage(&cg->cpuacct_usage); @@ -1640,7 +1642,7 @@ static inline void read_cgroup(struct cgroup *cg) { } static inline void read_all_discovered_cgroups(struct cgroup *root) { - debug(D_CGROUP, "reading metrics for all cgroups"); + netdata_log_debug(D_CGROUP, "reading metrics for all cgroups"); struct cgroup *cg; for (cg = root; cg; cg = cg->next) { @@ -1655,7 +1657,7 @@ static inline void read_all_discovered_cgroups(struct cgroup *root) { #define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048 static inline void read_cgroup_network_interfaces(struct cgroup *cg) { - debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title); + netdata_log_debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title); pid_t cgroup_pid; char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; @@ -1667,7 +1669,7 @@ static inline void read_cgroup_network_interfaces(struct cgroup *cg) { snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_unified_base, cg->id); } - debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id); + netdata_log_debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id); FILE *fp_child_input, *fp_child_output; (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_network_interface_script, "--cgroup", cgroup_identifier); if(!fp_child_output) { @@ -1713,7 +1715,7 @@ static inline void read_cgroup_network_interfaces(struct cgroup *cg) { } netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); - // debug(D_CGROUP, "closed cgroup_identifier for cgroup '%s'", cg->id); + // netdata_log_debug(D_CGROUP, "closed cgroup_identifier for cgroup '%s'", cg->id); } static inline void free_cgroup_network_interfaces(struct cgroup *cg) { @@ -1793,7 +1795,7 @@ static inline void free_pressure(struct pressure *res) { } static inline void cgroup_free(struct cgroup *cg) { - debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available"); + netdata_log_debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available"); if(cg->st_cpu) rrdset_is_obsolete(cg->st_cpu); if(cg->st_cpu_limit) rrdset_is_obsolete(cg->st_cpu_limit); @@ -1870,8 +1872,8 @@ static inline void discovery_rename_cgroup(struct cgroup *cg) { } cg->pending_renames--; - debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title); - debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id); + netdata_log_debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title); + netdata_log_debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id); pid_t cgroup_pid; FILE *fp_child_input, *fp_child_output; @@ -1984,7 +1986,7 @@ static inline void convert_cgroup_to_systemd_service(struct cgroup *cg) { } static inline struct cgroup *discovery_cgroup_add(const char *id) { - debug(D_CGROUP, "adding to list, cgroup with id '%s'", id); + netdata_log_debug(D_CGROUP, "adding to list, cgroup with id '%s'", id); struct cgroup *cg = callocz(1, sizeof(struct cgroup)); cg->id = strdupz(id); @@ -2011,7 +2013,7 @@ static inline struct cgroup *discovery_cgroup_add(const char *id) { } static inline struct cgroup *discovery_cgroup_find(const char *id) { - debug(D_CGROUP, "searching for cgroup '%s'", id); + netdata_log_debug(D_CGROUP, "searching for cgroup '%s'", id); uint32_t hash = simple_hash(id); @@ -2021,7 +2023,7 @@ static inline struct cgroup *discovery_cgroup_find(const char *id) { break; } - debug(D_CGROUP, "cgroup '%s' %s in memory", id, (cg)?"found":"not found"); + netdata_log_debug(D_CGROUP, "cgroup '%s' %s in memory", id, (cg)?"found":"not found"); return cg; } @@ -2029,7 +2031,7 @@ static inline void discovery_find_cgroup_in_dir_callback(const char *dir) { if (!dir || !*dir) { dir = "/"; } - debug(D_CGROUP, "examining cgroup dir '%s'", dir); + netdata_log_debug(D_CGROUP, "examining cgroup dir '%s'", dir); struct cgroup *cg = discovery_cgroup_find(dir); if (cg) { @@ -2058,7 +2060,7 @@ static inline void discovery_find_cgroup_in_dir_callback(const char *dir) { static inline int discovery_find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) { if(!this) this = base; - debug(D_CGROUP, "searching for directories in '%s' (base '%s')", this?this:"", base); + netdata_log_debug(D_CGROUP, "searching for directories in '%s' (base '%s')", this?this:"", base); size_t dirlen = strlen(this), baselen = strlen(base); @@ -2112,7 +2114,7 @@ static inline int discovery_find_dir_in_subdirs(const char *base, const char *th } static inline void discovery_mark_all_cgroups_as_unavailable() { - debug(D_CGROUP, "marking all cgroups as not available"); + netdata_log_debug(D_CGROUP, "marking all cgroups as not available"); struct cgroup *cg; for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) { cg->available = 0; @@ -2126,7 +2128,7 @@ static inline void discovery_update_filenames() { if(unlikely(!cg->available || !cg->enabled || cg->pending_renames)) continue; - debug(D_CGROUP, "checking paths for cgroup '%s'", cg->id); + netdata_log_debug(D_CGROUP, "checking paths for cgroup '%s'", cg->id); // check for newly added cgroups // and update the filenames they read @@ -2143,10 +2145,10 @@ static inline void discovery_update_filenames() { cg->filename_cpu_cfs_period = strdupz(filename); snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_quota_us", cgroup_cpuacct_base, cg->id); cg->filename_cpu_cfs_quota = strdupz(filename); - debug(D_CGROUP, "cpuacct.stat filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_stat.filename); + netdata_log_debug(D_CGROUP, "cpuacct.stat filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_stat.filename); } else - debug(D_CGROUP, "cpuacct.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "cpuacct.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if(unlikely(cgroup_enable_cpuacct_usage && !cg->cpuacct_usage.filename && !is_cgroup_systemd_service(cg))) { @@ -2154,20 +2156,20 @@ static inline void discovery_update_filenames() { if(likely(stat(filename, &buf) != -1)) { cg->cpuacct_usage.filename = strdupz(filename); cg->cpuacct_usage.enabled = cgroup_enable_cpuacct_usage; - debug(D_CGROUP, "cpuacct.usage_percpu filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_usage.filename); + netdata_log_debug(D_CGROUP, "cpuacct.usage_percpu filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_usage.filename); } else - debug(D_CGROUP, "cpuacct.usage_percpu file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "cpuacct.usage_percpu file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if(unlikely(cgroup_enable_cpuacct_cpu_throttling && !cg->cpuacct_cpu_throttling.filename && !is_cgroup_systemd_service(cg))) { snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_cpuacct_base, cg->id); if(likely(stat(filename, &buf) != -1)) { cg->cpuacct_cpu_throttling.filename = strdupz(filename); cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling; - debug(D_CGROUP, "cpu.stat filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_throttling.filename); + netdata_log_debug(D_CGROUP, "cpu.stat filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_throttling.filename); } else - debug(D_CGROUP, "cpu.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "cpu.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if (unlikely( cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename && @@ -2176,10 +2178,10 @@ static inline void discovery_update_filenames() { if (likely(stat(filename, &buf) != -1)) { cg->cpuacct_cpu_shares.filename = strdupz(filename); cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; - debug( + netdata_log_debug( D_CGROUP, "cpu.shares filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_shares.filename); } else - debug(D_CGROUP, "cpu.shares file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "cpu.shares file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { @@ -2187,10 +2189,10 @@ static inline void discovery_update_filenames() { if(likely(stat(filename, &buf) != -1)) { cg->memory.filename_detailed = strdupz(filename); cg->memory.enabled_detailed = (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_AUTO; - debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_detailed); + netdata_log_debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_detailed); } else - debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if(unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) { @@ -2198,12 +2200,12 @@ static inline void discovery_update_filenames() { if(likely(stat(filename, &buf) != -1)) { cg->memory.filename_usage_in_bytes = strdupz(filename); cg->memory.enabled_usage_in_bytes = cgroup_enable_memory; - debug(D_CGROUP, "memory.usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes); + netdata_log_debug(D_CGROUP, "memory.usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes); snprintfz(filename, FILENAME_MAX, "%s%s/memory.limit_in_bytes", cgroup_memory_base, cg->id); cg->filename_memory_limit = strdupz(filename); } else - debug(D_CGROUP, "memory.usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "memory.usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if(unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) { @@ -2213,10 +2215,10 @@ static inline void discovery_update_filenames() { cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap; snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.limit_in_bytes", cgroup_memory_base, cg->id); cg->filename_memoryswap_limit = strdupz(filename); - debug(D_CGROUP, "memory.msw_usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes); + netdata_log_debug(D_CGROUP, "memory.msw_usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes); } else - debug(D_CGROUP, "memory.msw_usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "memory.msw_usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if(unlikely(cgroup_enable_memory_failcnt && !cg->memory.filename_failcnt)) { @@ -2224,10 +2226,10 @@ static inline void discovery_update_filenames() { if(likely(stat(filename, &buf) != -1)) { cg->memory.filename_failcnt = strdupz(filename); cg->memory.enabled_failcnt = cgroup_enable_memory_failcnt; - debug(D_CGROUP, "memory.failcnt filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_failcnt); + netdata_log_debug(D_CGROUP, "memory.failcnt filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_failcnt); } else - debug(D_CGROUP, "memory.failcnt file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "memory.failcnt file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if(unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) { @@ -2235,16 +2237,16 @@ static inline void discovery_update_filenames() { if (unlikely(stat(filename, &buf) != -1)) { cg->io_service_bytes.filename = strdupz(filename); cg->io_service_bytes.enabled = cgroup_enable_blkio_io; - debug(D_CGROUP, "blkio.io_service_bytes_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); + netdata_log_debug(D_CGROUP, "blkio.io_service_bytes_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); } else { - debug(D_CGROUP, "blkio.io_service_bytes_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "blkio.io_service_bytes_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes", cgroup_blkio_base, cg->id); if (likely(stat(filename, &buf) != -1)) { cg->io_service_bytes.filename = strdupz(filename); cg->io_service_bytes.enabled = cgroup_enable_blkio_io; - debug(D_CGROUP, "blkio.io_service_bytes filename for cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); + netdata_log_debug(D_CGROUP, "blkio.io_service_bytes filename for cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); } else { - debug(D_CGROUP, "blkio.io_service_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "blkio.io_service_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); } } } @@ -2254,16 +2256,16 @@ static inline void discovery_update_filenames() { if (unlikely(stat(filename, &buf) != -1)) { cg->io_serviced.filename = strdupz(filename); cg->io_serviced.enabled = cgroup_enable_blkio_ops; - debug(D_CGROUP, "blkio.io_serviced_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_serviced.filename); + netdata_log_debug(D_CGROUP, "blkio.io_serviced_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_serviced.filename); } else { - debug(D_CGROUP, "blkio.io_serviced_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "blkio.io_serviced_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced", cgroup_blkio_base, cg->id); if (likely(stat(filename, &buf) != -1)) { cg->io_serviced.filename = strdupz(filename); cg->io_serviced.enabled = cgroup_enable_blkio_ops; - debug(D_CGROUP, "blkio.io_serviced filename for cgroup '%s': '%s'", cg->id, cg->io_serviced.filename); + netdata_log_debug(D_CGROUP, "blkio.io_serviced filename for cgroup '%s': '%s'", cg->id, cg->io_serviced.filename); } else { - debug(D_CGROUP, "blkio.io_serviced file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "blkio.io_serviced file for cgroup '%s': '%s' does not exist.", cg->id, filename); } } } @@ -2273,17 +2275,17 @@ static inline void discovery_update_filenames() { if (unlikely(stat(filename, &buf) != -1)) { cg->throttle_io_service_bytes.filename = strdupz(filename); cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io; - debug(D_CGROUP,"blkio.throttle.io_service_bytes_recursive filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_service_bytes.filename); + netdata_log_debug(D_CGROUP,"blkio.throttle.io_service_bytes_recursive filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_service_bytes.filename); } else { - debug(D_CGROUP, "blkio.throttle.io_service_bytes_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "blkio.throttle.io_service_bytes_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); snprintfz( filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes", cgroup_blkio_base, cg->id); if (likely(stat(filename, &buf) != -1)) { cg->throttle_io_service_bytes.filename = strdupz(filename); cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io; - debug(D_CGROUP, "blkio.throttle.io_service_bytes filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_service_bytes.filename); + netdata_log_debug(D_CGROUP, "blkio.throttle.io_service_bytes filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_service_bytes.filename); } else { - debug(D_CGROUP, "blkio.throttle.io_service_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "blkio.throttle.io_service_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); } } } @@ -2293,16 +2295,16 @@ static inline void discovery_update_filenames() { if (unlikely(stat(filename, &buf) != -1)) { cg->throttle_io_serviced.filename = strdupz(filename); cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops; - debug(D_CGROUP, "blkio.throttle.io_serviced_recursive filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_serviced.filename); + netdata_log_debug(D_CGROUP, "blkio.throttle.io_serviced_recursive filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_serviced.filename); } else { - debug(D_CGROUP, "blkio.throttle.io_serviced_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "blkio.throttle.io_serviced_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced", cgroup_blkio_base, cg->id); if (likely(stat(filename, &buf) != -1)) { cg->throttle_io_serviced.filename = strdupz(filename); cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops; - debug(D_CGROUP, "blkio.throttle.io_serviced filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_serviced.filename); + netdata_log_debug(D_CGROUP, "blkio.throttle.io_serviced filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_serviced.filename); } else { - debug(D_CGROUP, "blkio.throttle.io_serviced file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "blkio.throttle.io_serviced file for cgroup '%s': '%s' does not exist.", cg->id, filename); } } } @@ -2312,16 +2314,16 @@ static inline void discovery_update_filenames() { if (unlikely(stat(filename, &buf) != -1)) { cg->io_merged.filename = strdupz(filename); cg->io_merged.enabled = cgroup_enable_blkio_merged_ops; - debug(D_CGROUP, "blkio.io_merged_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_merged.filename); + netdata_log_debug(D_CGROUP, "blkio.io_merged_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_merged.filename); } else { - debug(D_CGROUP, "blkio.io_merged_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "blkio.io_merged_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged", cgroup_blkio_base, cg->id); if (likely(stat(filename, &buf) != -1)) { cg->io_merged.filename = strdupz(filename); cg->io_merged.enabled = cgroup_enable_blkio_merged_ops; - debug(D_CGROUP, "blkio.io_merged filename for cgroup '%s': '%s'", cg->id, cg->io_merged.filename); + netdata_log_debug(D_CGROUP, "blkio.io_merged filename for cgroup '%s': '%s'", cg->id, cg->io_merged.filename); } else { - debug(D_CGROUP, "blkio.io_merged file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "blkio.io_merged file for cgroup '%s': '%s' does not exist.", cg->id, filename); } } } @@ -2331,16 +2333,16 @@ static inline void discovery_update_filenames() { if (unlikely(stat(filename, &buf) != -1)) { cg->io_queued.filename = strdupz(filename); cg->io_queued.enabled = cgroup_enable_blkio_queued_ops; - debug(D_CGROUP, "blkio.io_queued_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_queued.filename); + netdata_log_debug(D_CGROUP, "blkio.io_queued_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_queued.filename); } else { - debug(D_CGROUP, "blkio.io_queued_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "blkio.io_queued_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename); snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued", cgroup_blkio_base, cg->id); if (likely(stat(filename, &buf) != -1)) { cg->io_queued.filename = strdupz(filename); cg->io_queued.enabled = cgroup_enable_blkio_queued_ops; - debug(D_CGROUP, "blkio.io_queued filename for cgroup '%s': '%s'", cg->id, cg->io_queued.filename); + netdata_log_debug(D_CGROUP, "blkio.io_queued filename for cgroup '%s': '%s'", cg->id, cg->io_queued.filename); } else { - debug(D_CGROUP, "blkio.io_queued file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "blkio.io_queued file for cgroup '%s': '%s' does not exist.", cg->id, filename); } } } @@ -2351,18 +2353,18 @@ static inline void discovery_update_filenames() { if(likely(stat(filename, &buf) != -1)) { cg->io_service_bytes.filename = strdupz(filename); cg->io_service_bytes.enabled = cgroup_enable_blkio_io; - debug(D_CGROUP, "io.stat filename for unified cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); + netdata_log_debug(D_CGROUP, "io.stat filename for unified cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); } else - debug(D_CGROUP, "io.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "io.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); } if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) { snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id); if (likely(stat(filename, &buf) != -1)) { cg->io_serviced.filename = strdupz(filename); cg->io_serviced.enabled = cgroup_enable_blkio_ops; - debug(D_CGROUP, "io.stat filename for unified cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); + netdata_log_debug(D_CGROUP, "io.stat filename for unified cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename); } else - debug(D_CGROUP, "io.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "io.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); } if (unlikely( (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_cpu_throttling) && @@ -2376,19 +2378,19 @@ static inline void discovery_update_filenames() { cg->filename_cpu_cfs_period = NULL; snprintfz(filename, FILENAME_MAX, "%s%s/cpu.max", cgroup_unified_base, cg->id); cg->filename_cpu_cfs_quota = strdupz(filename); - debug(D_CGROUP, "cpu.stat filename for unified cgroup '%s': '%s'", cg->id, cg->cpuacct_stat.filename); + netdata_log_debug(D_CGROUP, "cpu.stat filename for unified cgroup '%s': '%s'", cg->id, cg->cpuacct_stat.filename); } else - debug(D_CGROUP, "cpu.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "cpu.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename); } if (unlikely(cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename)) { snprintfz(filename, FILENAME_MAX, "%s%s/cpu.weight", cgroup_unified_base, cg->id); if (likely(stat(filename, &buf) != -1)) { cg->cpuacct_cpu_shares.filename = strdupz(filename); cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares; - debug(D_CGROUP, "cpu.weight filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_shares.filename); + netdata_log_debug(D_CGROUP, "cpu.weight filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_shares.filename); } else - debug(D_CGROUP, "cpu.weight file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "cpu.weight file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) { @@ -2396,10 +2398,10 @@ static inline void discovery_update_filenames() { if(likely(stat(filename, &buf) != -1)) { cg->memory.filename_detailed = strdupz(filename); cg->memory.enabled_detailed = (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_AUTO; - debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_detailed); + netdata_log_debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_detailed); } else - debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if(unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) { @@ -2407,12 +2409,12 @@ static inline void discovery_update_filenames() { if(likely(stat(filename, &buf) != -1)) { cg->memory.filename_usage_in_bytes = strdupz(filename); cg->memory.enabled_usage_in_bytes = cgroup_enable_memory; - debug(D_CGROUP, "memory.current filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes); + netdata_log_debug(D_CGROUP, "memory.current filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes); snprintfz(filename, FILENAME_MAX, "%s%s/memory.max", cgroup_unified_base, cg->id); cg->filename_memory_limit = strdupz(filename); } else - debug(D_CGROUP, "memory.current file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "memory.current file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if(unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) { @@ -2422,10 +2424,10 @@ static inline void discovery_update_filenames() { cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap; snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.max", cgroup_unified_base, cg->id); cg->filename_memoryswap_limit = strdupz(filename); - debug(D_CGROUP, "memory.swap.current filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes); + netdata_log_debug(D_CGROUP, "memory.swap.current filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes); } else - debug(D_CGROUP, "memory.swap file for cgroup '%s': '%s' does not exist.", cg->id, filename); + netdata_log_debug(D_CGROUP, "memory.swap file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) { @@ -2434,9 +2436,9 @@ static inline void discovery_update_filenames() { cg->cpu_pressure.filename = strdupz(filename); cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu; cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO; - debug(D_CGROUP, "cpu.pressure filename for cgroup '%s': '%s'", cg->id, cg->cpu_pressure.filename); + netdata_log_debug(D_CGROUP, "cpu.pressure filename for cgroup '%s': '%s'", cg->id, cg->cpu_pressure.filename); } else { - debug(D_CGROUP, "cpu.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + netdata_log_debug(D_CGROUP, "cpu.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); } } @@ -2446,9 +2448,9 @@ static inline void discovery_update_filenames() { cg->io_pressure.filename = strdupz(filename); cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some; cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full; - debug(D_CGROUP, "io.pressure filename for cgroup '%s': '%s'", cg->id, cg->io_pressure.filename); + netdata_log_debug(D_CGROUP, "io.pressure filename for cgroup '%s': '%s'", cg->id, cg->io_pressure.filename); } else { - debug(D_CGROUP, "io.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + netdata_log_debug(D_CGROUP, "io.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); } } @@ -2458,9 +2460,9 @@ static inline void discovery_update_filenames() { cg->memory_pressure.filename = strdupz(filename); cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some; cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full; - debug(D_CGROUP, "memory.pressure filename for cgroup '%s': '%s'", cg->id, cg->memory_pressure.filename); + netdata_log_debug(D_CGROUP, "memory.pressure filename for cgroup '%s': '%s'", cg->id, cg->memory_pressure.filename); } else { - debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + netdata_log_debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); } } } @@ -2477,7 +2479,7 @@ static inline void discovery_cleanup_all_cgroups() { struct cgroup *t; for(t = discovered_cgroup_root; t ; t = t->discovered_next) { if(t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) { - debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id); + netdata_log_debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id); t->enabled = 1; t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE; break; @@ -2505,7 +2507,7 @@ static inline void discovery_cleanup_all_cgroups() { } static inline void discovery_copy_discovered_cgroups_to_reader() { - debug(D_CGROUP, "copy discovered cgroups to the main group list"); + netdata_log_debug(D_CGROUP, "copy discovered cgroups to the main group list"); struct cgroup *cg; @@ -2543,7 +2545,7 @@ static inline void discovery_share_cgroups_with_ebpf() { is_cgroup_procs_exist(ptr, cg->id); } - debug(D_CGROUP, "cgroup shared: NAME=%s, ENABLED=%d", ptr->name, ptr->enabled); + netdata_log_debug(D_CGROUP, "cgroup shared: NAME=%s, ENABLED=%d", ptr->name, ptr->enabled); } shm_cgroup_ebpf.header->cgroup_root_count = count; @@ -2633,13 +2635,13 @@ static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) { } if (cgroup_enable_systemd_services && matches_systemd_services_cgroups(cg->id)) { - debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'cgroups to match as systemd services'", cg->id, cg->chart_title); + netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'cgroups to match as systemd services'", cg->id, cg->chart_title); convert_cgroup_to_systemd_service(cg); return; } if (matches_enabled_cgroup_renames(cg->id)) { - debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'run script to rename cgroups matching', will try to rename it", cg->id, cg->chart_title); + netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'run script to rename cgroups matching', will try to rename it", cg->id, cg->chart_title); if (is_inside_k8s && k8s_is_container(cg->id)) { // it may take up to a minute for the K8s API to return data for the container // tested on AWS K8s cluster with 100% CPU utilization @@ -2664,7 +2666,7 @@ static int discovery_is_cgroup_duplicate(struct cgroup *cg) { static inline void discovery_process_cgroup(struct cgroup *cg) { if (!cg) { - debug(D_CGROUP, "discovery_process_cgroup() received NULL"); + netdata_log_debug(D_CGROUP, "discovery_process_cgroup() received NULL"); return; } if (!cg->available || cg->processed) { @@ -2700,12 +2702,12 @@ static inline void discovery_process_cgroup(struct cgroup *cg) { } if (!(cg->enabled = matches_enabled_cgroup_names(cg->chart_title))) { - debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups names matching'", cg->id, cg->chart_title); + netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups names matching'", cg->id, cg->chart_title); return; } if (!(cg->enabled = matches_enabled_cgroup_paths(cg->id))) { - debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups matching'", cg->id, cg->chart_title); + netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups matching'", cg->id, cg->chart_title); return; } @@ -2730,7 +2732,7 @@ static inline void discovery_process_cgroup(struct cgroup *cg) { } static inline void discovery_find_all_cgroups() { - debug(D_CGROUP, "searching for cgroups"); + netdata_log_debug(D_CGROUP, "searching for cgroups"); worker_is_busy(WORKER_DISCOVERY_INIT); discovery_mark_all_cgroups_as_unavailable(); @@ -2765,7 +2767,7 @@ static inline void discovery_find_all_cgroups() { worker_is_busy(WORKER_DISCOVERY_SHARE); discovery_share_cgroups_with_ebpf(); - debug(D_CGROUP, "done searching for cgroups"); + netdata_log_debug(D_CGROUP, "done searching for cgroups"); } static void cgroup_discovery_cleanup(void *ptr) { @@ -3587,7 +3589,7 @@ static inline void update_cpu_limits2(struct cgroup *cg) { } else { cg->cpu_cfs_quota = str2ull(procfile_lineword(ff, 0, 0), NULL); } - debug(D_CGROUP, "CPU limits values: %llu %llu %llu", cg->cpu_cfs_period, cg->cpuset_cpus, cg->cpu_cfs_quota); + netdata_log_debug(D_CGROUP, "CPU limits values: %llu %llu %llu", cg->cpu_cfs_period, cg->cpuset_cpus, cg->cpu_cfs_quota); return; cpu_limits2_err: @@ -3645,7 +3647,7 @@ static inline int update_memory_limits(char **filename, const RRDSETVAR_ACQUIRED } void update_cgroup_charts(int update_every) { - debug(D_CGROUP, "updating cgroups charts"); + netdata_log_debug(D_CGROUP, "updating cgroups charts"); char type[RRD_ID_LENGTH_MAX + 1]; char title[CHART_TITLE_MAX + 1]; @@ -4754,7 +4756,7 @@ void update_cgroup_charts(int update_every) { , services_do_queued_ops, services_do_merged_ops ); - debug(D_CGROUP, "done updating cgroups charts"); + netdata_log_debug(D_CGROUP, "done updating cgroups charts"); } // ---------------------------------------------------------------------------- diff --git a/collectors/charts.d.plugin/ap/metadata.yaml b/collectors/charts.d.plugin/ap/metadata.yaml new file mode 100644 index 00000000..344b6817 --- /dev/null +++ b/collectors/charts.d.plugin/ap/metadata.yaml @@ -0,0 +1,107 @@ +meta: + plugin_name: charts.d.plugin + module_name: ap + monitored_instance: + name: Access Points + link: '' + categories: + - data-collection.networking-stack-and-network-interfaces + icon_filename: 'netdata.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Maintain surveillance over Access Points, ensuring optimal wireless network connectivity and performance. Monitor and troubleshoot in realtime for high-quality network operations.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: wireless device + description: "" + labels: [] + metrics: + - name: ap.clients + description: Connected clients to ${ssid} on ${dev} + unit: "clients" + chart_type: line + dimensions: + - name: clients + - name: ap.net + description: Bandwidth for ${ssid} on ${dev} + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: ap.packets + description: Packets for ${ssid} on ${dev} + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ap.issues + description: Transmit Issues for ${ssid} on ${dev} + unit: "issues/s" + chart_type: line + dimensions: + - name: retries + - name: failures + - name: ap.signal + description: Average Signal for ${ssid} on ${dev} + unit: "dBm" + chart_type: line + dimensions: + - name: average signal + - name: ap.bitrate + description: Bitrate for ${ssid} on ${dev} + unit: "Mbps" + chart_type: line + dimensions: + - name: receive + - name: transmit + - name: expected diff --git a/collectors/charts.d.plugin/apcupsd/metadata.yaml b/collectors/charts.d.plugin/apcupsd/metadata.yaml new file mode 100644 index 00000000..203f0482 --- /dev/null +++ b/collectors/charts.d.plugin/apcupsd/metadata.yaml @@ -0,0 +1,144 @@ +meta: + plugin_name: charts.d.plugin + module_name: apcupsd + monitored_instance: + name: APC UPS + link: '' + categories: + - data-collection.ups + icon_filename: 'apc.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor APC UPS performance with Netdata for optimal uninterruptible power supply operations. Enhance your power supply reliability with real-time APC UPS metrics.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: apcupsd_ups_charge + link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf + metric: apcupsd.charge + info: average UPS charge over the last minute + os: "*" +- name: apcupsd_10min_ups_load + link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf + metric: apcupsd.load + info: average UPS load over the last 10 minutes + os: "*" +- name: apcupsd_last_collected_secs + link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf + metric: apcupsd.load + info: number of seconds since the last successful data collection +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: ups + description: "" + labels: [] + metrics: + - name: apcupsd.charge + description: UPS Charge + unit: "percentage" + chart_type: area + dimensions: + - name: charge + - name: apcupsd.battery.voltage + description: UPS Battery Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: voltage + - name: nominal + - name: apcupsd.input.voltage + description: UPS Input Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: voltage + - name: min + - name: max + - name: apcupsd.output.voltage + description: UPS Output Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: absolute + - name: nominal + - name: apcupsd.input.frequency + description: UPS Input Voltage + unit: "Hz" + chart_type: line + dimensions: + - name: frequency + - name: apcupsd.load + description: UPS Load + unit: "percentage" + chart_type: area + dimensions: + - name: load + - name: apcupsd.load_usage + description: UPS Load Usage + unit: "Watts" + chart_type: area + dimensions: + - name: load + - name: apcupsd.temperature + description: UPS Temperature + unit: "Celsius" + chart_type: line + dimensions: + - name: temp + - name: apcupsd.time + description: UPS Time Remaining + unit: "Minutes" + chart_type: area + dimensions: + - name: time + - name: apcupsd.online + description: UPS ONLINE flag + unit: "boolean" + chart_type: line + dimensions: + - name: online diff --git a/collectors/charts.d.plugin/libreswan/metadata.yaml b/collectors/charts.d.plugin/libreswan/metadata.yaml new file mode 100644 index 00000000..480db363 --- /dev/null +++ b/collectors/charts.d.plugin/libreswan/metadata.yaml @@ -0,0 +1,79 @@ +meta: + plugin_name: charts.d.plugin + module_name: libreswan + monitored_instance: + name: Libreswan + link: '' + categories: + - data-collection.vpns + icon_filename: 'libreswan.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Libreswan performance for optimal IPsec VPN operations. Improve your VPN operations with Netdata''s real-time metrics and built-in alerts.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: IPSEC tunnel + description: "" + labels: [] + metrics: + - name: libreswan.net + description: LibreSWAN Tunnel ${name} Traffic + unit: "kilobits/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: libreswan.uptime + description: LibreSWAN Tunnel ${name} Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: uptime diff --git a/collectors/charts.d.plugin/nut/metadata.yaml b/collectors/charts.d.plugin/nut/metadata.yaml new file mode 100644 index 00000000..ce6e5700 --- /dev/null +++ b/collectors/charts.d.plugin/nut/metadata.yaml @@ -0,0 +1,152 @@ +meta: + plugin_name: charts.d.plugin + module_name: nut + monitored_instance: + name: UPS/PDU + link: '' + categories: + - data-collection.ups + icon_filename: 'plug-circle-bolt.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Examine UPS/PDU metrics with Netdata for insights into power device performance. Improve your power device performance with comprehensive dashboards and anomaly detection.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: nut_ups_charge + link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf + metric: nut.charge + info: average UPS charge over the last minute + os: "*" +- name: nut_10min_ups_load + link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf + metric: nut.load + info: average UPS load over the last 10 minutes + os: "*" +- name: nut_last_collected_secs + link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf + metric: nut.load + info: number of seconds since the last successful data collection +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: ups + description: "" + labels: [] + metrics: + - name: nut.charge + description: UPS Charge + unit: "percentage" + chart_type: area + dimensions: + - name: charge + - name: nut.runtime + description: UPS Runtime + unit: "seconds" + chart_type: line + dimensions: + - name: runtime + - name: nut.battery.voltage + description: UPS Battery Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: voltage + - name: high + - name: low + - name: nominal + - name: nut.input.voltage + description: UPS Input Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: voltage + - name: fault + - name: nominal + - name: nut.input.current + description: UPS Input Current + unit: "Ampere" + chart_type: line + dimensions: + - name: nominal + - name: nut.input.frequency + description: UPS Input Frequency + unit: "Hz" + chart_type: line + dimensions: + - name: frequency + - name: nominal + - name: nut.output.voltage + description: UPS Output Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: voltage + - name: nut.load + description: UPS Load + unit: "percentage" + chart_type: area + dimensions: + - name: load + - name: nut.load_usage + description: UPS Load Usage + unit: "Watts" + chart_type: area + dimensions: + - name: load_usage + - name: nut.temperature + description: UPS Temperature + unit: "temperature" + chart_type: line + dimensions: + - name: temp + - name: nut.clients + description: UPS Connected Clients + unit: "clients" + chart_type: area + dimensions: + - name: clients diff --git a/collectors/charts.d.plugin/opensips/metadata.yaml b/collectors/charts.d.plugin/opensips/metadata.yaml new file mode 100644 index 00000000..90a2ebe7 --- /dev/null +++ b/collectors/charts.d.plugin/opensips/metadata.yaml @@ -0,0 +1,215 @@ +meta: + plugin_name: charts.d.plugin + module_name: opensips + monitored_instance: + name: OpenSIPS + link: '' + categories: + - data-collection.telephony-servers + icon_filename: 'opensips.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Examine OpenSIPS metrics for insights into SIP server operations. Study call rates, error rates, and response times for reliable voice over IP services.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: opensips.dialogs_active + description: OpenSIPS Active Dialogs + unit: "dialogs" + chart_type: area + dimensions: + - name: active + - name: early + - name: opensips.users + description: OpenSIPS Users + unit: "users" + chart_type: line + dimensions: + - name: registered + - name: location + - name: contacts + - name: expires + - name: opensips.registrar + description: OpenSIPS Registrar + unit: "registrations/s" + chart_type: line + dimensions: + - name: accepted + - name: rejected + - name: opensips.transactions + description: OpenSIPS Transactions + unit: "transactions/s" + chart_type: line + dimensions: + - name: UAS + - name: UAC + - name: opensips.core_rcv + description: OpenSIPS Core Receives + unit: "queries/s" + chart_type: line + dimensions: + - name: requests + - name: replies + - name: opensips.core_fwd + description: OpenSIPS Core Forwards + unit: "queries/s" + chart_type: line + dimensions: + - name: requests + - name: replies + - name: opensips.core_drop + description: OpenSIPS Core Drops + unit: "queries/s" + chart_type: line + dimensions: + - name: requests + - name: replies + - name: opensips.core_err + description: OpenSIPS Core Errors + unit: "queries/s" + chart_type: line + dimensions: + - name: requests + - name: replies + - name: opensips.core_bad + description: OpenSIPS Core Bad + unit: "queries/s" + chart_type: line + dimensions: + - name: bad_URIs_rcvd + - name: unsupported_methods + - name: bad_msg_hdr + - name: opensips.tm_replies + description: OpenSIPS TM Replies + unit: "replies/s" + chart_type: line + dimensions: + - name: received + - name: relayed + - name: local + - name: opensips.transactions_status + description: OpenSIPS Transactions Status + unit: "transactions/s" + chart_type: line + dimensions: + - name: 2xx + - name: 3xx + - name: 4xx + - name: 5xx + - name: 6xx + - name: opensips.transactions_inuse + description: OpenSIPS InUse Transactions + unit: "transactions" + chart_type: line + dimensions: + - name: inuse + - name: opensips.sl_replies + description: OpenSIPS SL Replies + unit: "replies/s" + chart_type: line + dimensions: + - name: 1xx + - name: 2xx + - name: 3xx + - name: 4xx + - name: 5xx + - name: 6xx + - name: sent + - name: error + - name: ACKed + - name: opensips.dialogs + description: OpenSIPS Dialogs + unit: "dialogs/s" + chart_type: line + dimensions: + - name: processed + - name: expire + - name: failed + - name: opensips.net_waiting + description: OpenSIPS Network Waiting + unit: "kilobytes" + chart_type: line + dimensions: + - name: UDP + - name: TCP + - name: opensips.uri_checks + description: OpenSIPS URI Checks + unit: "checks / sec" + chart_type: line + dimensions: + - name: positive + - name: negative + - name: opensips.traces + description: OpenSIPS Traces + unit: "traces / sec" + chart_type: line + dimensions: + - name: requests + - name: replies + - name: opensips.shmem + description: OpenSIPS Shared Memory + unit: "kilobytes" + chart_type: line + dimensions: + - name: total + - name: used + - name: real_used + - name: max_used + - name: free + - name: opensips.shmem_fragment + description: OpenSIPS Shared Memory Fragmentation + unit: "fragments" + chart_type: line + dimensions: + - name: fragments diff --git a/collectors/charts.d.plugin/sensors/metadata.yaml b/collectors/charts.d.plugin/sensors/metadata.yaml new file mode 100644 index 00000000..e56b97e6 --- /dev/null +++ b/collectors/charts.d.plugin/sensors/metadata.yaml @@ -0,0 +1,107 @@ +meta: + plugin_name: charts.d.plugin + module_name: sensors + monitored_instance: + name: charts.d sensors + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: sensor chip + description: "" + labels: [] + metrics: + - name: sensors.temp + description: Temperature + unit: "Celsius" + chart_type: line + dimensions: + - name: '{filename}' + - name: sensors.volt + description: Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: '{filename}' + - name: sensors.curr + description: Current + unit: "Ampere" + chart_type: line + dimensions: + - name: '{filename}' + - name: sensors.power + description: Power + unit: "Watt" + chart_type: line + dimensions: + - name: '{filename}' + - name: sensors.fans + description: Fans Speed + unit: "Rotations / Minute" + chart_type: line + dimensions: + - name: '{filename}' + - name: sensors.energy + description: Energy + unit: "Joule" + chart_type: area + dimensions: + - name: '{filename}' + - name: sensors.humidity + description: Humidity + unit: "Percent" + chart_type: line + dimensions: + - name: '{filename}' diff --git a/collectors/cups.plugin/cups_plugin.c b/collectors/cups.plugin/cups_plugin.c index ecadc4ec..ce7f05d4 100644 --- a/collectors/cups.plugin/cups_plugin.c +++ b/collectors/cups.plugin/cups_plugin.c @@ -17,7 +17,7 @@ static int debug = 0; static int netdata_update_every = 1; -static int netdata_priority = 100004; +static uint32_t netdata_priority = 100004; http_t *http; // connection to the cups daemon @@ -25,7 +25,9 @@ http_t *http; // connection to the cups daemon * Used to aggregate job metrics for a destination (and all destinations). */ struct job_metrics { - int is_collected; // flag if this was collected in the current cycle + uint32_t id; + + bool is_collected; // flag if this was collected in the current cycle int num_pending; int num_processing; @@ -102,7 +104,7 @@ void parse_command_line(int argc, char **argv) { if (freq >= netdata_update_every) { netdata_update_every = freq; } else if (freq) { - error("update frequency %d seconds is too small for CUPS. Using %d.", freq, netdata_update_every); + netdata_log_error("update frequency %d seconds is too small for CUPS. Using %d.", freq, netdata_update_every); } } @@ -140,7 +142,7 @@ getIntegerOption( static int reset_job_metrics(const DICTIONARY_ITEM *item __maybe_unused, void *entry, void *data __maybe_unused) { struct job_metrics *jm = (struct job_metrics *)entry; - jm->is_collected = 0; + jm->is_collected = false; jm->num_held = 0; jm->num_pending = 0; jm->num_processing = 0; @@ -151,28 +153,34 @@ static int reset_job_metrics(const DICTIONARY_ITEM *item __maybe_unused, void *e return 0; } +void send_job_charts_definitions_to_netdata(const char *name, uint32_t job_id, bool obsolete) { + printf("CHART cups.job_num_%s '' 'Active jobs of %s' jobs '%s' cups.destination_job_num stacked %u %i %s\n", + name, name, name, netdata_priority + job_id, netdata_update_every, obsolete?"obsolete":""); + printf("DIMENSION pending '' absolute 1 1\n"); + printf("DIMENSION held '' absolute 1 1\n"); + printf("DIMENSION processing '' absolute 1 1\n"); + + printf("CHART cups.job_size_%s '' 'Active jobs size of %s' KB '%s' cups.destination_job_size stacked %u %i %s\n", + name, name, name, netdata_priority + 1 + job_id, netdata_update_every, obsolete?"obsolete":""); + printf("DIMENSION pending '' absolute 1 1\n"); + printf("DIMENSION held '' absolute 1 1\n"); + printf("DIMENSION processing '' absolute 1 1\n"); +} + struct job_metrics *get_job_metrics(char *dest) { struct job_metrics *jm = dictionary_get(dict_dest_job_metrics, dest); if (unlikely(!jm)) { - struct job_metrics new_job_metrics; - reset_job_metrics(NULL, &new_job_metrics, NULL); + static uint32_t job_id = 0; + struct job_metrics new_job_metrics = { .id = ++job_id }; jm = dictionary_set(dict_dest_job_metrics, dest, &new_job_metrics, sizeof(struct job_metrics)); - - printf("CHART cups.job_num_%s '' 'Active jobs of %s' jobs '%s' cups.destination_job_num stacked %i %i\n", dest, dest, dest, netdata_priority++, netdata_update_every); - printf("DIMENSION pending '' absolute 1 1\n"); - printf("DIMENSION held '' absolute 1 1\n"); - printf("DIMENSION processing '' absolute 1 1\n"); - - printf("CHART cups.job_size_%s '' 'Active jobs size of %s' KB '%s' cups.destination_job_size stacked %i %i\n", dest, dest, dest, netdata_priority++, netdata_update_every); - printf("DIMENSION pending '' absolute 1 1\n"); - printf("DIMENSION held '' absolute 1 1\n"); - printf("DIMENSION processing '' absolute 1 1\n"); + send_job_charts_definitions_to_netdata(dest, jm->id, false); }; + return jm; } -int collect_job_metrics(const DICTIONARY_ITEM *item, void *entry, void *data __maybe_unused) { +int send_job_metrics_to_netdata(const DICTIONARY_ITEM *item, void *entry, void *data __maybe_unused) { const char *name = dictionary_acquired_item_name(item); struct job_metrics *jm = (struct job_metrics *)entry; @@ -192,16 +200,12 @@ int collect_job_metrics(const DICTIONARY_ITEM *item, void *entry, void *data __m "SET processing = %d\n" "END\n", name, jm->size_pending, jm->size_held, jm->size_processing); - } else { - printf("CHART cups.job_num_%s '' 'Active jobs of %s' jobs '%s' cups.destination_job_num stacked 1 %i 'obsolete'\n", name, name, name, netdata_update_every); - printf("DIMENSION pending '' absolute 1 1\n"); - printf("DIMENSION held '' absolute 1 1\n"); - printf("DIMENSION processing '' absolute 1 1\n"); - - printf("CHART cups.job_size_%s '' 'Active jobs size of %s' KB '%s' cups.destination_job_size stacked 1 %i 'obsolete'\n", name, name, name, netdata_update_every); - printf("DIMENSION pending '' absolute 1 1\n"); - printf("DIMENSION held '' absolute 1 1\n"); - printf("DIMENSION processing '' absolute 1 1\n"); + } + else { + // mark it obsolete + send_job_charts_definitions_to_netdata(name, jm->id, true); + + // delete it dictionary_del(dict_dest_job_metrics, name); } @@ -255,14 +259,11 @@ int main(int argc, char **argv) { heartbeat_t hb; heartbeat_init(&hb); - for (iteration = 0; 1; iteration++) - { + for (iteration = 0; 1; iteration++) { heartbeat_next(&hb, step); if (unlikely(netdata_exit)) - { break; - } reset_metrics(); @@ -274,7 +275,7 @@ int main(int argc, char **argv) { httpClose(http); http = httpConnect2(cupsServer(), ippPort(), NULL, AF_UNSPEC, cupsEncryption(), 0, netdata_update_every * 1000, NULL); if(http == NULL) { - error("cups daemon is not running. Exiting!"); + netdata_log_error("cups daemon is not running. Exiting!"); exit(1); } } @@ -320,7 +321,7 @@ int main(int argc, char **argv) { fprintf(stderr, "printer state is missing for destination %s", curr_dest->name); break; default: - error("Unknown printer state (%d) found.", printer_state); + netdata_log_error("Unknown printer state (%d) found.", printer_state); break; } @@ -329,7 +330,7 @@ int main(int argc, char **argv) { * This is needed to report also destinations with zero active jobs. */ struct job_metrics *jm = get_job_metrics(curr_dest->name); - jm->is_collected = 1; + jm->is_collected = true; } cupsFreeDests(num_dest_total, dests); @@ -341,7 +342,7 @@ int main(int argc, char **argv) { int i; for (i = num_jobs, curr_job = jobs; i > 0; i--, curr_job++) { struct job_metrics *jm = get_job_metrics(curr_job->dest); - jm->is_collected = 1; + jm->is_collected = true; switch (curr_job->state) { case IPP_JOB_PENDING: @@ -363,13 +364,14 @@ int main(int argc, char **argv) { global_job_metrics.size_processing += curr_job->size; break; default: - error("Unsupported job state (%u) found.", curr_job->state); + netdata_log_error("Unsupported job state (%u) found.", curr_job->state); break; } } cupsFreeJobs(num_jobs, jobs); - dictionary_walkthrough_write(dict_dest_job_metrics, collect_job_metrics, NULL); + dictionary_walkthrough_write(dict_dest_job_metrics, send_job_metrics_to_netdata, NULL); + dictionary_garbage_collect(dict_dest_job_metrics); static int cups_printer_by_option_created = 0; if (unlikely(!cups_printer_by_option_created)) @@ -436,5 +438,5 @@ int main(int argc, char **argv) { } httpClose(http); - info("CUPS process exiting"); + netdata_log_info("CUPS process exiting"); } diff --git a/collectors/cups.plugin/metadata.yaml b/collectors/cups.plugin/metadata.yaml new file mode 100644 index 00000000..c8a7e083 --- /dev/null +++ b/collectors/cups.plugin/metadata.yaml @@ -0,0 +1,118 @@ +meta: + plugin_name: cups.plugin + module_name: cups.plugin + monitored_instance: + name: CUPS + link: '' + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: 'cups.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor CUPS performance for achieving optimal printing system operations. Monitor job statuses, queue lengths, and error rates to ensure smooth printing tasks.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: cups.dests_state + description: Destinations by state + unit: "dests" + chart_type: stacked + dimensions: + - name: idle + - name: printing + - name: stopped + - name: cups.dests_option + description: Destinations by option + unit: "dests" + chart_type: line + dimensions: + - name: total + - name: acceptingjobs + - name: shared + - name: cups.job_num + description: Active jobs + unit: "jobs" + chart_type: stacked + dimensions: + - name: pending + - name: held + - name: processing + - name: cups.job_size + description: Active jobs size + unit: "KB" + chart_type: stacked + dimensions: + - name: pending + - name: held + - name: processing + - name: destination + description: "" + labels: [] + metrics: + - name: cups.destination_job_num + description: Active jobs of {destination} + unit: "jobs" + chart_type: stacked + dimensions: + - name: pending + - name: held + - name: processing + - name: cups.destination_job_size + description: Active jobs size of {destination} + unit: "KB" + chart_type: stacked + dimensions: + - name: pending + - name: held + - name: processing diff --git a/collectors/debugfs.plugin/debugfs_plugin.c b/collectors/debugfs.plugin/debugfs_plugin.c index 9713be32..1c5bf106 100644 --- a/collectors/debugfs.plugin/debugfs_plugin.c +++ b/collectors/debugfs.plugin/debugfs_plugin.c @@ -30,18 +30,18 @@ static int debugfs_check_capabilities() { cap_t caps = cap_get_proc(); if (!caps) { - error("Cannot get current capabilities."); + netdata_log_error("Cannot get current capabilities."); return 0; } int ret = 1; cap_flag_value_t cfv = CAP_CLEAR; if (cap_get_flag(caps, CAP_DAC_READ_SEARCH, CAP_EFFECTIVE, &cfv) == -1) { - error("Cannot find if CAP_DAC_READ_SEARCH is effective."); + netdata_log_error("Cannot find if CAP_DAC_READ_SEARCH is effective."); ret = 0; } else { if (cfv != CAP_SET) { - error("debugfs.plugin should run with CAP_DAC_READ_SEARCH."); + netdata_log_error("debugfs.plugin should run with CAP_DAC_READ_SEARCH."); ret = 0; } } @@ -176,7 +176,7 @@ int main(int argc, char **argv) stock_config_dir = getenv("NETDATA_STOCK_CONFIG_DIR"); if (stock_config_dir == NULL) { - // info("NETDATA_CONFIG_DIR is not passed from netdata"); + // netdata_log_info("NETDATA_CONFIG_DIR is not passed from netdata"); stock_config_dir = LIBCONFIG_DIR; } @@ -186,7 +186,7 @@ int main(int argc, char **argv) if (!debugfs_check_capabilities() && !debugfs_am_i_running_as_root() && !debugfs_check_sys_permission()) { uid_t uid = getuid(), euid = geteuid(); #ifdef HAVE_CAPABILITY - error( + netdata_log_error( "debugfs.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " "Without these, debugfs.plugin cannot access /sys/kernel/debug. " "To enable capabilities run: sudo setcap cap_dac_read_search,cap_sys_ptrace+ep %s; " @@ -197,7 +197,7 @@ int main(int argc, char **argv) argv[0], argv[0]); #else - error( + netdata_log_error( "debugfs.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " "Without these, debugfs.plugin cannot access /sys/kernel/debug." "Your system does not support capabilities. " @@ -235,7 +235,7 @@ int main(int argc, char **argv) enabled++; } if (!enabled) { - info("all modules are disabled, exiting..."); + netdata_log_info("all modules are disabled, exiting..."); return 1; } } diff --git a/collectors/debugfs.plugin/debugfs_zswap.c b/collectors/debugfs.plugin/debugfs_zswap.c index a2991b9f..c8fc0f03 100644 --- a/collectors/debugfs.plugin/debugfs_zswap.c +++ b/collectors/debugfs.plugin/debugfs_zswap.c @@ -251,7 +251,7 @@ int zswap_collect_data(struct netdata_zswap_metric *metric) snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, metric->filename); if (read_single_number_file(filename, (unsigned long long *)&metric->value)) { - error("Cannot read file %s", filename); + netdata_log_error("Cannot read file %s", filename); return 1; } @@ -383,7 +383,7 @@ int do_debugfs_zswap(int update_every, const char *name) static int check_if_enabled = 1; if (likely(check_if_enabled && debugfs_is_zswap_enabled())) { - info("Zswap is disabled"); + netdata_log_info("Zswap is disabled"); return 1; } diff --git a/collectors/debugfs.plugin/multi_metadata.yaml b/collectors/debugfs.plugin/multi_metadata.yaml new file mode 100644 index 00000000..652aff7a --- /dev/null +++ b/collectors/debugfs.plugin/multi_metadata.yaml @@ -0,0 +1,233 @@ +name: debugfs.plugin +modules: + - meta: + plugin_name: debugfs.plugin + module_name: /sys/kernel/debug/extfrag + monitored_instance: + name: debugfs /sys/kernel/debug/extfrag + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: node + description: "" + labels: + - name: numa_node + description: TBD + metrics: + - name: mem.fragmentation_index_dma + description: Memory fragmentation index for each order + unit: "index" + chart_type: line + dimensions: + - name: order0 + - name: order1 + - name: order2 + - name: order3 + - name: order4 + - name: order5 + - name: order6 + - name: order7 + - name: order8 + - name: order9 + - name: order10 + - name: mem.fragmentation_index_dma32 + description: Memory fragmentation index for each order + unit: "index" + chart_type: line + dimensions: + - name: order0 + - name: order1 + - name: order2 + - name: order3 + - name: order4 + - name: order5 + - name: order6 + - name: order7 + - name: order8 + - name: order9 + - name: order10 + - name: mem.fragmentation_index_normal + description: Memory fragmentation index for each order + unit: "index" + chart_type: line + dimensions: + - name: order0 + - name: order1 + - name: order2 + - name: order3 + - name: order4 + - name: order5 + - name: order6 + - name: order7 + - name: order8 + - name: order9 + - name: order10 + - meta: + plugin_name: debugfs.plugin + module_name: /sys/kernel/debug/zswap + monitored_instance: + name: debugfs /sys/kernel/debug/zswap + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.zswap_pool_compression_ratio + description: Zswap compression ratio + unit: "ratio" + chart_type: line + dimensions: + - name: compression_ratio + - name: system.zswap_pool_compressed_size + description: Zswap compressed bytes currently stored + unit: "bytes" + chart_type: area + dimensions: + - name: compressed_size + - name: system.zswap_pool_raw_size + description: Zswap uncompressed bytes currently stored + unit: "bytes" + chart_type: area + dimensions: + - name: uncompressed_size + - name: system.zswap_rejections + description: Zswap rejections + unit: "rejections/s" + chart_type: stacked + dimensions: + - name: compress_poor + - name: kmemcache_fail + - name: alloc_fail + - name: reclaim_fail + - name: system.zswap_pool_limit_hit + description: Zswap pool limit was reached + unit: "events/s" + chart_type: line + dimensions: + - name: limit + - name: system.zswap_written_back_raw_bytes + description: Zswap uncomressed bytes written back when pool limit was reached + unit: "bytes/s" + chart_type: area + dimensions: + - name: written_back + - name: system.zswap_same_filled_raw_size + description: Zswap same-value filled uncompressed bytes currently stored + unit: "bytes" + chart_type: area + dimensions: + - name: same_filled + - name: system.zswap_duplicate_entry + description: Zswap duplicate store was encountered + unit: "entries/s" + chart_type: line + dimensions: + - name: duplicate diff --git a/collectors/diskspace.plugin/metadata.yaml b/collectors/diskspace.plugin/metadata.yaml new file mode 100644 index 00000000..0e8e7f35 --- /dev/null +++ b/collectors/diskspace.plugin/metadata.yaml @@ -0,0 +1,98 @@ +meta: + plugin_name: diskspace.plugin + module_name: diskspace.plugin + monitored_instance: + name: Disk space + link: '' + categories: + - data-collection.linux-systems + icon_filename: 'hard-drive.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Disk space metrics for proficient storage management. Keep track of usage, free space, and error rates to prevent disk space issues.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: disk_space_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf + metric: disk.space + info: disk ${label:mount_point} space utilization + os: "linux freebsd" +- name: disk_inode_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf + metric: disk.inodes + info: disk ${label:mount_point} inode utilization + os: "linux freebsd" +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: mount point + description: "" + labels: + - name: mount_point + description: TBD + - name: filesystem + description: TBD + - name: mount_root + description: TBD + metrics: + - name: disk.space + description: Disk Space Usage + unit: "GiB" + chart_type: stacked + dimensions: + - name: avail + - name: used + - name: reserved_for_root + - name: disk.inodes + description: Disk Files (inodes) Usage + unit: "inodes" + chart_type: stacked + dimensions: + - name: avail + - name: used + - name: reserved_for_root diff --git a/collectors/ebpf.plugin/README.md b/collectors/ebpf.plugin/README.md index 94bbc184..fb036a5a 100644 --- a/collectors/ebpf.plugin/README.md +++ b/collectors/ebpf.plugin/README.md @@ -235,13 +235,12 @@ Linux metrics: The eBPF collector enables and runs the following eBPF programs by default: +- `cachestat`: Netdata's eBPF data collector creates charts about the memory page cache. When the integration with + [`apps.plugin`](https://github.com/netdata/netdata/blob/master/collectors/apps.plugin/README.md) is enabled, this collector creates charts for the whole host _and_ + for each application. - `fd` : This eBPF program creates charts that show information about calls to open files. - `mount`: This eBPF program creates charts that show calls to syscalls mount(2) and umount(2). - `shm`: This eBPF program creates charts that show calls to syscalls shmget(2), shmat(2), shmdt(2) and shmctl(2). -- `sync`: Monitor calls to syscalls sync(2), fsync(2), fdatasync(2), syncfs(2), msync(2), and sync_file_range(2). -- `network viewer`: This eBPF program creates charts with information about `TCP` and `UDP` functions, including the - bandwidth consumed by each. -- `vfs`: This eBPF program creates charts that show information about VFS (Virtual File System) functions. - `process`: This eBPF program creates charts that show information about process life. When in `return` mode, it also creates charts showing errors when these operations are executed. - `hardirq`: This eBPF program creates charts that show information about time spent servicing individual hardware @@ -254,9 +253,6 @@ The eBPF collector enables and runs the following eBPF programs by default: You can also enable the following eBPF programs: -- `cachestat`: Netdata's eBPF data collector creates charts about the memory page cache. When the integration with - [`apps.plugin`](https://github.com/netdata/netdata/blob/master/collectors/apps.plugin/README.md) is enabled, this collector creates charts for the whole host _and_ - for each application. - `dcstat` : This eBPF program creates charts that show information about file access using directory cache. It appends `kprobes` for `lookup_fast()` and `d_lookup()` to identify if files are inside directory cache, outside and files are not found. @@ -264,7 +260,11 @@ You can also enable the following eBPF programs: - `filesystem` : This eBPF program creates charts that show information about some filesystem latency. - `swap` : This eBPF program creates charts that show information about swap access. - `mdflush`: This eBPF program creates charts that show information about +- `sync`: Monitor calls to syscalls sync(2), fsync(2), fdatasync(2), syncfs(2), msync(2), and sync_file_range(2). +- `network viewer`: This eBPF program creates charts with information about `TCP` and `UDP` functions, including the + bandwidth consumed by each. multi-device software flushes. +- `vfs`: This eBPF program creates charts that show information about VFS (Virtual File System) functions. ### Configuring eBPF threads @@ -989,3 +989,50 @@ shows how the lockdown module impacts `ebpf.plugin` based on the selected option If you or your distribution compiled the kernel with the last combination, your system cannot load shared libraries required to run `ebpf.plugin`. + +## Function + +The eBPF plugin has a [function](https://github.com/netdata/netdata/blob/master/docs/cloud/netdata-functions.md) named +`ebpf_thread` that controls its internal threads and helps to reduce the overhead on host. Using the function you +can run the plugin with all threads disabled and enable them only when you want to take a look in specific areas. + +### List threads + +To list all threads status you can query directly the endpoint function: + +`http://localhost:19999/api/v1/function?function=ebpf_thread` + +It is also possible to query a specific thread adding keyword `thread` and thread name: + +`http://localhost:19999/api/v1/function?function=ebpf_thread%20thread:mount` + +### Enable thread + +It is possible to enable a specific thread using the keyword `enable`: + +`http://localhost:19999/api/v1/function?function=ebpf_thread%20enable:mount` + +this will run thread `mount` during 300 seconds (5 minutes). You can specify a specific period by appending the period +after the thread name: + +`http://localhost:19999/api/v1/function?function=ebpf_thread%20enable:mount:600` + +in this example thread `mount` will run during 600 seconds (10 minutes). + +### Disable thread + +It is also possible to stop any thread running using the keyword `disable`. For example, to disable `cachestat` you can +request: + +`http://localhost:19999/api/v1/function?function=ebpf_thread%20disable:cachestat` + +### Debugging threads + +You can verify the impact of threads on the host by running the +[ebpf_thread_function.sh](https://github.com/netdata/netdata/blob/master/tests/ebpf/ebpf_thread_function.sh) +script on your environment. + +You can check the results of having threads running on your environment in the Netdata monitoring section on your +dashboard + +<img src="https://github.com/netdata/netdata/assets/49162938/91823573-114c-4c16-b634-cc46f7bb1bcf" alt="Threads running." /> diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c index ffab37de..72aedba6 100644 --- a/collectors/ebpf.plugin/ebpf.c +++ b/collectors/ebpf.plugin/ebpf.c @@ -30,6 +30,8 @@ int ebpf_nprocs; int isrh = 0; int main_thread_id = 0; int process_pid_fd = -1; +static size_t global_iterations_counter = 1; +bool publish_internal_metrics = true; pthread_mutex_t lock; pthread_mutex_t ebpf_exit_cleanup; @@ -47,7 +49,8 @@ struct netdata_static_thread cgroup_integration_thread = { }; ebpf_module_t ebpf_modules[] = { - { .thread_name = "process", .config_name = "process", .enabled = 0, .start_routine = ebpf_process_thread, + { .thread_name = "process", .config_name = "process", .thread_description = NETDATA_EBPF_MODULE_PROCESS_DESC, + .enabled = 0, .start_routine = ebpf_process_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_process_create_apps_charts, .maps = NULL, @@ -56,8 +59,9 @@ ebpf_module_t ebpf_modules[] = { .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "socket", .config_name = "socket", .enabled = 0, .start_routine = ebpf_socket_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0 }, + { .thread_name = "socket", .config_name = "socket", .thread_description = NETDATA_EBPF_SOCKET_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_socket_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_socket_create_apps_charts, .maps = NULL, @@ -65,8 +69,9 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_NETWORK_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = socket_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "cachestat", .config_name = "cachestat", .enabled = 0, .start_routine = ebpf_cachestat_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "cachestat", .config_name = "cachestat", .thread_description = NETDATA_EBPF_CACHESTAT_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_cachestat_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_cachestat_create_apps_charts, .maps = cachestat_maps, @@ -75,8 +80,9 @@ ebpf_module_t ebpf_modules[] = { .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18| NETDATA_V5_4 | NETDATA_V5_14 | NETDATA_V5_15 | NETDATA_V5_16, .load = EBPF_LOAD_LEGACY, .targets = cachestat_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "sync", .config_name = "sync", .enabled = 0, .start_routine = ebpf_sync_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "sync", .config_name = "sync", .thread_description = NETDATA_EBPF_SYNC_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_sync_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &sync_config, @@ -84,8 +90,9 @@ ebpf_module_t ebpf_modules[] = { // All syscalls have the same kernels .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = sync_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "dc", .config_name = "dc", .enabled = 0, .start_routine = ebpf_dcstat_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "dc", .config_name = "dc", .thread_description = NETDATA_EBPF_DC_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_dcstat_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_dcstat_create_apps_charts, .maps = dcstat_maps, @@ -93,8 +100,9 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = dc_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "swap", .config_name = "swap", .enabled = 0, .start_routine = ebpf_swap_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "swap", .config_name = "swap", .thread_description = NETDATA_EBPF_SWAP_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_swap_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_swap_create_apps_charts, .maps = NULL, @@ -102,8 +110,9 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_DIRECTORY_SWAP_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = swap_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "vfs", .config_name = "vfs", .enabled = 0, .start_routine = ebpf_vfs_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "vfs", .config_name = "vfs", .thread_description = NETDATA_EBPF_VFS_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_vfs_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_vfs_create_apps_charts, .maps = NULL, @@ -111,32 +120,36 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_DIRECTORY_VFS_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = vfs_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "filesystem", .config_name = "filesystem", .enabled = 0, .start_routine = ebpf_filesystem_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "filesystem", .config_name = "filesystem", .thread_description = NETDATA_EBPF_FS_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_filesystem_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fs_config, .config_file = NETDATA_FILESYSTEM_CONFIG_FILE, //We are setting kernels as zero, because we load eBPF programs according the kernel running. .kernels = 0, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES }, - { .thread_name = "disk", .config_name = "disk", .enabled = 0, .start_routine = ebpf_disk_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "disk", .config_name = "disk", .thread_description = NETDATA_EBPF_DISK_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_disk_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &disk_config, .config_file = NETDATA_DISK_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "mount", .config_name = "mount", .enabled = 0, .start_routine = ebpf_mount_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "mount", .config_name = "mount", .thread_description = NETDATA_EBPF_MOUNT_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_mount_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mount_config, .config_file = NETDATA_MOUNT_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = mount_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "fd", .config_name = "fd", .enabled = 0, .start_routine = ebpf_fd_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "fd", .config_name = "fd", .thread_description = NETDATA_EBPF_FD_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_fd_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_fd_create_apps_charts, .maps = NULL, @@ -145,24 +158,27 @@ ebpf_module_t ebpf_modules[] = { .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_11 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = fd_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "hardirq", .config_name = "hardirq", .enabled = 0, .start_routine = ebpf_hardirq_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "hardirq", .config_name = "hardirq", .thread_description = NETDATA_EBPF_HARDIRQ_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_hardirq_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &hardirq_config, .config_file = NETDATA_HARDIRQ_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "softirq", .config_name = "softirq", .enabled = 0, .start_routine = ebpf_softirq_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "softirq", .config_name = "softirq", .thread_description = NETDATA_EBPF_SOFTIRQ_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_softirq_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &softirq_config, .config_file = NETDATA_SOFTIRQ_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "oomkill", .config_name = "oomkill", .enabled = 0, .start_routine = ebpf_oomkill_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "oomkill", .config_name = "oomkill", .thread_description = NETDATA_EBPF_OOMKILL_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_oomkill_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_oomkill_create_apps_charts, .maps = NULL, @@ -170,8 +186,9 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_OOMKILL_CONFIG_FILE, .kernels = NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "shm", .config_name = "shm", .enabled = 0, .start_routine = ebpf_shm_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "shm", .config_name = "shm", .thread_description = NETDATA_EBPF_SHM_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_shm_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = ebpf_shm_create_apps_charts, .maps = NULL, @@ -179,15 +196,25 @@ ebpf_module_t ebpf_modules[] = { .config_file = NETDATA_DIRECTORY_SHM_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = shm_targets, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, - { .thread_name = "mdflush", .config_name = "mdflush", .enabled = 0, .start_routine = ebpf_mdflush_thread, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "mdflush", .config_name = "mdflush", .thread_description = NETDATA_EBPF_MD_MODULE_DESC, + .enabled = 0, .start_routine = ebpf_mdflush_thread, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mdflush_config, .config_file = NETDATA_DIRECTORY_MDFLUSH_CONFIG_FILE, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, + .load = EBPF_LOAD_LEGACY, .targets = mdflush_targets, .probe_links = NULL, .objects = NULL, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, + { .thread_name = "functions", .config_name = "functions", .thread_description = NETDATA_EBPF_FUNCTIONS_MODULE_DESC, + .enabled = 1, .start_routine = ebpf_function_thread, + .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, + .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, + .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = NULL, + .config_file = NETDATA_DIRECTORY_FUNCTIONS_CONFIG_FILE, + .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL, - .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES}, + .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0}, { .thread_name = NULL, .enabled = 0, .start_routine = NULL, .update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 0, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, @@ -357,6 +384,20 @@ struct netdata_static_thread ebpf_threads[] = { .start_routine = NULL }, { + .name = "EBPF FUNCTIONS", + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, +#ifdef NETDATA_DEV_MODE + .enabled = 1, +#else + .enabled = 0, +#endif + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + }, + { .name = NULL, .config_section = NULL, .config_name = NULL, @@ -378,7 +419,13 @@ ebpf_filesystem_partitions_t localfs[] = .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .fs_maps = NULL}, + .fs_maps = NULL, + .fs_obj = NULL, + .functions = { "ext4_file_read_iter", + "ext4_file_write_iter", + "ext4_file_open", + "ext4_sync_file", + NULL }}, {.filesystem = "xfs", .optional_filesystem = NULL, .family = "xfs", @@ -388,7 +435,13 @@ ebpf_filesystem_partitions_t localfs[] = .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .fs_maps = NULL}, + .fs_maps = NULL, + .fs_obj = NULL, + .functions = { "xfs_file_read_iter", + "xfs_file_write_iter", + "xfs_file_open", + "xfs_file_fsync", + NULL }}, {.filesystem = "nfs", .optional_filesystem = "nfs4", .family = "nfs", @@ -398,7 +451,13 @@ ebpf_filesystem_partitions_t localfs[] = .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .fs_maps = NULL}, + .fs_maps = NULL, + .fs_obj = NULL, + .functions = { "nfs_file_read", + "nfs_file_write", + "nfs_open", + "nfs_getattr", + NULL }}, // // "nfs4_file_open" - not present on all kernels {.filesystem = "zfs", .optional_filesystem = NULL, .family = "zfs", @@ -408,7 +467,13 @@ ebpf_filesystem_partitions_t localfs[] = .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4, - .fs_maps = NULL}, + .fs_maps = NULL, + .fs_obj = NULL, + .functions = { "zpl_iter_read", + "zpl_iter_write", + "zpl_open", + "zpl_fsync", + NULL }}, {.filesystem = "btrfs", .optional_filesystem = NULL, .family = "btrfs", @@ -418,7 +483,13 @@ ebpf_filesystem_partitions_t localfs[] = .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = "btrfs_file_operations", .addr = 0}, .kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10, - .fs_maps = NULL}, + .fs_maps = NULL, + .fs_obj = NULL, + .functions = { "btrfs_file_read_iter", + "btrfs_file_write_iter", + "btrfs_file_open", + "btrfs_sync_file", + NULL }}, {.filesystem = NULL, .optional_filesystem = NULL, .family = NULL, @@ -427,7 +498,7 @@ ebpf_filesystem_partitions_t localfs[] = .flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION, .enabled = CONFIG_BOOLEAN_YES, .addresses = {.function = NULL, .addr = 0}, - .kernels = 0, .fs_maps = NULL}}; + .kernels = 0, .fs_maps = NULL, .fs_obj = NULL}}; ebpf_sync_syscalls_t local_syscalls[] = { {.syscall = NETDATA_SYSCALLS_SYNC, .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL, @@ -493,7 +564,10 @@ ebpf_plugin_stats_t plugin_statistics = {.core = 0, .legacy = 0, .running = 0, . struct btf *default_btf = NULL; struct cachestat_bpf *cachestat_bpf_obj = NULL; struct dc_bpf *dc_bpf_obj = NULL; +struct disk_bpf *disk_bpf_obj = NULL; struct fd_bpf *fd_bpf_obj = NULL; +struct hardirq_bpf *hardirq_bpf_obj = NULL; +struct mdflush_bpf *mdflush_bpf_obj = NULL; struct mount_bpf *mount_bpf_obj = NULL; struct shm_bpf *shm_bpf_obj = NULL; struct socket_bpf *socket_bpf_obj = NULL; @@ -524,7 +598,7 @@ ARAL *ebpf_allocate_pid_aral(char *name, size_t size) { static size_t max_elements = NETDATA_EBPF_ALLOC_MAX_PID; if (max_elements < NETDATA_EBPF_ALLOC_MIN_ELEMENTS) { - error("Number of elements given is too small, adjusting it for %d", NETDATA_EBPF_ALLOC_MIN_ELEMENTS); + netdata_log_error("Number of elements given is too small, adjusting it for %d", NETDATA_EBPF_ALLOC_MIN_ELEMENTS); max_elements = NETDATA_EBPF_ALLOC_MIN_ELEMENTS; } @@ -553,14 +627,14 @@ static inline void ebpf_check_before2go() int j; pthread_mutex_lock(&ebpf_exit_cleanup); for (j = 0; ebpf_modules[j].thread_name != NULL; j++) { - if (ebpf_modules[j].enabled == NETDATA_THREAD_EBPF_RUNNING) + if (ebpf_modules[j].enabled < NETDATA_THREAD_EBPF_STOPPING) i++; } pthread_mutex_unlock(&ebpf_exit_cleanup); } if (i) { - error("eBPF cannot unload all threads on time, but it will go away"); + netdata_log_error("eBPF cannot unload all threads on time, but it will go away"); } } @@ -581,10 +655,10 @@ static void ebpf_exit() char filename[FILENAME_MAX + 1]; ebpf_pid_file(filename, FILENAME_MAX); if (unlink(filename)) - error("Cannot remove PID file %s", filename); + netdata_log_error("Cannot remove PID file %s", filename); #ifdef NETDATA_INTERNAL_CHECKS - error("Good bye world! I was PID %d", main_thread_id); + netdata_log_error("Good bye world! I was PID %d", main_thread_id); #endif fprintf(stdout, "EXIT\n"); fflush(stdout); @@ -632,12 +706,12 @@ static void ebpf_unload_unique_maps() int i; for (i = 0; ebpf_modules[i].thread_name; i++) { // These threads are cleaned with other functions - if (i > EBPF_MODULE_SOCKET_IDX) + if (i != EBPF_MODULE_SOCKET_IDX) continue; if (ebpf_modules[i].enabled != NETDATA_THREAD_EBPF_STOPPED) { if (ebpf_modules[i].enabled != NETDATA_THREAD_EBPF_NOT_RUNNING) - error("Cannot unload maps for thread %s, because it is not stopped.", ebpf_modules[i].thread_name); + netdata_log_error("Cannot unload maps for thread %s, because it is not stopped.", ebpf_modules[i].thread_name); continue; } @@ -647,13 +721,10 @@ static void ebpf_unload_unique_maps() continue; } - if (i == EBPF_MODULE_SOCKET_IDX) { #ifdef LIBBPF_MAJOR_VERSION - if (socket_bpf_obj) - socket_bpf__destroy(socket_bpf_obj); + if (socket_bpf_obj) + socket_bpf__destroy(socket_bpf_obj); #endif - } - } } @@ -665,7 +736,7 @@ static void ebpf_unload_unique_maps() static void ebpf_unload_filesystems() { if (ebpf_modules[EBPF_MODULE_FILESYSTEM_IDX].enabled == NETDATA_THREAD_EBPF_NOT_RUNNING || - ebpf_modules[EBPF_MODULE_FILESYSTEM_IDX].enabled == NETDATA_THREAD_EBPF_RUNNING || + ebpf_modules[EBPF_MODULE_FILESYSTEM_IDX].enabled < NETDATA_THREAD_EBPF_STOPPING || ebpf_modules[EBPF_MODULE_FILESYSTEM_IDX].load != EBPF_LOAD_LEGACY) return; @@ -686,7 +757,7 @@ static void ebpf_unload_filesystems() static void ebpf_unload_sync() { if (ebpf_modules[EBPF_MODULE_SYNC_IDX].enabled == NETDATA_THREAD_EBPF_NOT_RUNNING || - ebpf_modules[EBPF_MODULE_SYNC_IDX].enabled == NETDATA_THREAD_EBPF_RUNNING) + ebpf_modules[EBPF_MODULE_SYNC_IDX].enabled < NETDATA_THREAD_EBPF_STOPPING) return; int i; @@ -724,10 +795,10 @@ static void ebpf_stop_threads(int sig) only_one = 1; int i; for (i = 0; ebpf_modules[i].thread_name != NULL; i++) { - if (ebpf_modules[i].enabled == NETDATA_THREAD_EBPF_RUNNING) { + if (ebpf_modules[i].enabled < NETDATA_THREAD_EBPF_STOPPING) { netdata_thread_cancel(*ebpf_modules[i].thread->thread); #ifdef NETDATA_DEV_MODE - info("Sending cancel for thread %s", ebpf_modules[i].thread_name); + netdata_log_info("Sending cancel for thread %s", ebpf_modules[i].thread_name); #endif } } @@ -736,7 +807,7 @@ static void ebpf_stop_threads(int sig) pthread_mutex_lock(&mutex_cgroup_shm); netdata_thread_cancel(*cgroup_integration_thread.thread); #ifdef NETDATA_DEV_MODE - info("Sending cancel for thread %s", cgroup_integration_thread.name); + netdata_log_info("Sending cancel for thread %s", cgroup_integration_thread.name); #endif pthread_mutex_unlock(&mutex_cgroup_shm); @@ -760,6 +831,19 @@ static void ebpf_stop_threads(int sig) *****************************************************************/ /** + * Create apps for module + * + * Create apps chart that will be used with specific module + * + * @param em the module main structure. + * @param root a pointer for the targets. + */ +static inline void ebpf_create_apps_for_module(ebpf_module_t *em, struct ebpf_target *root) { + if (em->enabled < NETDATA_THREAD_EBPF_STOPPING && em->apps_charts && em->apps_routine) + em->apps_routine(em, root); +} + +/** * Create apps charts * * Call ebpf_create_chart to create the charts on apps submenu. @@ -800,14 +884,21 @@ static void ebpf_create_apps_charts(struct ebpf_target *root) } } - if (!newly_added) + int i; + if (!newly_added) { + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) { + ebpf_module_t *current = &ebpf_modules[i]; + if (current->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + continue; + + ebpf_create_apps_for_module(current, root); + } return; + } - int counter; - for (counter = 0; ebpf_modules[counter].thread_name; counter++) { - ebpf_module_t *current = &ebpf_modules[counter]; - if (current->enabled == NETDATA_THREAD_EBPF_RUNNING && current->apps_charts && current->apps_routine) - current->apps_routine(current, root); + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) { + ebpf_module_t *current = &ebpf_modules[i]; + ebpf_create_apps_for_module(current, root); } } @@ -1136,7 +1227,7 @@ void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, * @param name the name used to create aral * @param em a pointer to the structure with the default values. */ -void ebpf_statistic_create_aral_chart(char *name, ebpf_module_t *em) +int ebpf_statistic_create_aral_chart(char *name, ebpf_module_t *em) { static int priority = 140100; char *mem = { NETDATA_EBPF_STAT_DIMENSION_MEMORY }; @@ -1174,6 +1265,40 @@ void ebpf_statistic_create_aral_chart(char *name, ebpf_module_t *em) ebpf_write_global_dimension(aral, aral, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + return priority - 2; +} + +/** + * ARAL Charts + * + * Add chart to monitor ARAL usage + * Caller must call this function with mutex locked. + * + * @param em a pointer to the structure with the default values. + * @param prio the initial priority used to disable charts. + */ +void ebpf_statistic_obsolete_aral_chart(ebpf_module_t *em, int prio) +{ + ebpf_write_chart_obsolete(NETDATA_MONITORING_FAMILY, + em->memory_allocations, + "Calls to allocate memory.", + "calls", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_STACKED, + "netdata.ebpf_aral_stat_alloc", + prio++, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_MONITORING_FAMILY, + em->memory_allocations, + "Calls to allocate memory.", + "calls", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_STACKED, + "netdata.ebpf_aral_stat_alloc", + prio++, + em->update_every); } /** @@ -1247,7 +1372,7 @@ void ebpf_global_labels(netdata_syscall_stat_t *is, netdata_publish_syscall_t *p static inline void ebpf_set_thread_mode(netdata_run_mode_t lmode) { int i; - for (i = 0; ebpf_modules[i].thread_name; i++) { + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { ebpf_modules[i].mode = lmode; } } @@ -1256,16 +1381,15 @@ static inline void ebpf_set_thread_mode(netdata_run_mode_t lmode) * Enable specific charts selected by user. * * @param em the structure that will be changed - * @param disable_apps the status about the apps charts. * @param disable_cgroup the status about the cgroups charts. */ -static inline void ebpf_enable_specific_chart(struct ebpf_module *em, int disable_apps, int disable_cgroup) +static inline void ebpf_enable_specific_chart(struct ebpf_module *em, int disable_cgroup) { em->enabled = CONFIG_BOOLEAN_YES; // oomkill stores data inside apps submenu, so it always need to have apps_enabled for plugin to create // its chart, without this comparison eBPF.plugin will try to store invalid data when apps is disabled. - if (!disable_apps || !strcmp(em->thread_name, "oomkill")) { + if (!strcmp(em->thread_name, "oomkill")) { em->apps_charts = NETDATA_EBPF_APPS_FLAG_YES; } @@ -1277,20 +1401,6 @@ static inline void ebpf_enable_specific_chart(struct ebpf_module *em, int disabl } /** - * Enable all charts - * - * @param apps what is the current status of apps - * @param cgroups what is the current status of cgroups - */ -static inline void ebpf_enable_all_charts(int apps, int cgroups) -{ - int i; - for (i = 0; ebpf_modules[i].thread_name; i++) { - ebpf_enable_specific_chart(&ebpf_modules[i], apps, cgroups); - } -} - -/** * Disable all Global charts * * Disable charts @@ -1304,38 +1414,23 @@ static inline void disable_all_global_charts() } } - /** * Enable the specified chart group * * @param idx the index of ebpf_modules that I am enabling - * @param disable_apps should I keep apps charts? */ -static inline void ebpf_enable_chart(int idx, int disable_apps, int disable_cgroup) +static inline void ebpf_enable_chart(int idx, int disable_cgroup) { int i; for (i = 0; ebpf_modules[i].thread_name; i++) { if (i == idx) { - ebpf_enable_specific_chart(&ebpf_modules[i], disable_apps, disable_cgroup); + ebpf_enable_specific_chart(&ebpf_modules[i], disable_cgroup); break; } } } /** - * Disable APPs - * - * Disable charts for apps loading only global charts. - */ -static inline void ebpf_disable_apps() -{ - int i; - for (i = 0; ebpf_modules[i].thread_name; i++) { - ebpf_modules[i].apps_charts = NETDATA_EBPF_APPS_FLAG_NO; - } -} - -/** * Disable Cgroups * * Disable charts for apps loading only global charts. @@ -1508,8 +1603,7 @@ uint32_t ebpf_enable_tracepoints(ebpf_tracepoint_t *tps) uint32_t cnt = 0; for (int i = 0; tps[i].class != NULL; i++) { if (ebpf_enable_tracepoint(&tps[i]) == -1) { - infoerr("failed to enable tracepoint %s:%s", - tps[i].class, tps[i].event); + netdata_log_error("Failed to enable tracepoint %s:%s", tps[i].class, tps[i].event); } else { cnt += 1; @@ -1572,7 +1666,7 @@ static void read_local_addresses() { struct ifaddrs *ifaddr, *ifa; if (getifaddrs(&ifaddr) == -1) { - error("Cannot get the local IP addresses, it is no possible to do separation between inbound and outbound connections"); + netdata_log_error("Cannot get the local IP addresses, it is no possible to do separation between inbound and outbound connections"); return; } @@ -1639,32 +1733,10 @@ void ebpf_start_pthread_variables() } /** - * Am I collecting PIDs? - * - * Test if eBPF plugin needs to collect PID information. - * - * @return It returns 1 if at least one thread needs to collect the data, or zero otherwise. - */ -static inline uint32_t ebpf_am_i_collect_pids() -{ - uint32_t ret = 0; - int i; - for (i = 0; ebpf_modules[i].thread_name; i++) { - ret |= ebpf_modules[i].cgroup_charts | (ebpf_modules[i].apps_charts & NETDATA_EBPF_APPS_FLAG_YES); - } - - return ret; -} - -/** * Allocate the vectors used for all threads. */ static void ebpf_allocate_common_vectors() { - if (unlikely(!ebpf_am_i_collect_pids())) { - return; - } - ebpf_all_pids = callocz((size_t)pid_max, sizeof(struct ebpf_pid_stat *)); ebpf_aral_init(); } @@ -1674,17 +1746,31 @@ static void ebpf_allocate_common_vectors() * * @param ptr the option given by users */ -static inline void how_to_load(char *ptr) +static inline void ebpf_how_to_load(char *ptr) { if (!strcasecmp(ptr, EBPF_CFG_LOAD_MODE_RETURN)) ebpf_set_thread_mode(MODE_RETURN); else if (!strcasecmp(ptr, EBPF_CFG_LOAD_MODE_DEFAULT)) ebpf_set_thread_mode(MODE_ENTRY); else - error("the option %s for \"ebpf load mode\" is not a valid option.", ptr); + netdata_log_error("the option %s for \"ebpf load mode\" is not a valid option.", ptr); } /** + * Define whether we should have charts for apps + * + * @param lmode the mode that will be used for them. + */ +static inline void ebpf_set_apps_mode(netdata_apps_integration_flags_t value) +{ + int i; + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + ebpf_modules[i].apps_charts = value; + } +} + + +/** * Update interval * * Update default interval with value from user @@ -1717,6 +1803,21 @@ static void ebpf_update_table_size() } /** + * Update lifetime + * + * Update the period of time that specific thread will run + */ +static void ebpf_update_lifetime() +{ + int i; + uint32_t value = (uint32_t) appconfig_get_number(&collector_config, EBPF_GLOBAL_SECTION, + EBPF_CFG_LIFETIME, EBPF_DEFAULT_LIFETIME); + for (i = 0; ebpf_modules[i].thread_name; i++) { + ebpf_modules[i].lifetime = value; + } +} + +/** * Set Load mode * * @param origin specify the configuration file loaded @@ -1761,12 +1862,11 @@ static void ebpf_update_map_per_core() /** * Read collector values * - * @param disable_apps variable to store information related to apps. * @param disable_cgroups variable to store information related to cgroups. * @param update_every value to overwrite the update frequency set by the server. * @param origin specify the configuration file loaded */ -static void read_collector_values(int *disable_apps, int *disable_cgroups, +static void read_collector_values(int *disable_cgroups, int update_every, netdata_ebpf_load_mode_t origin) { // Read global section @@ -1778,7 +1878,7 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, value = appconfig_get(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_LOAD_MODE, EBPF_CFG_LOAD_MODE_DEFAULT); - how_to_load(value); + ebpf_how_to_load(value); btf_path = appconfig_get(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_PROGRAM_PATH, EBPF_DEFAULT_BTF_PATH); @@ -1795,6 +1895,8 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, ebpf_update_table_size(); + ebpf_update_lifetime(); + // This is kept to keep compatibility uint32_t enabled = appconfig_get_boolean(&collector_config, EBPF_GLOBAL_SECTION, "disable apps", CONFIG_BOOLEAN_NO); @@ -1804,7 +1906,8 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, CONFIG_BOOLEAN_YES); enabled = (enabled == CONFIG_BOOLEAN_NO)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_NO; } - *disable_apps = (int)enabled; + + ebpf_set_apps_mode(!enabled); // Cgroup is a positive sentence, so we need to invert the values to disable apps. // We are using the same pattern for cgroup and apps @@ -1816,10 +1919,8 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, // Read ebpf programs section enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, ebpf_modules[EBPF_MODULE_PROCESS_IDX].config_name, CONFIG_BOOLEAN_YES); - int started = 0; if (enabled) { - ebpf_enable_chart(EBPF_MODULE_PROCESS_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_PROCESS_IDX, *disable_cgroups); } // This is kept to keep compatibility @@ -1830,8 +1931,7 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, ebpf_modules[EBPF_MODULE_SOCKET_IDX].config_name, CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, *disable_cgroups); } // This is kept to keep compatibility @@ -1843,123 +1943,98 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, network_viewer_opt.enabled = enabled; if (enabled) { if (!ebpf_modules[EBPF_MODULE_SOCKET_IDX].enabled) - ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, *disable_apps, *disable_cgroups); + ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, *disable_cgroups); // Read network viewer section if network viewer is enabled // This is kept here to keep backward compatibility parse_network_viewer_section(&collector_config); parse_service_name_section(&collector_config); - started++; } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "cachestat", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_CACHESTAT_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_CACHESTAT_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "sync", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_SYNC_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_SYNC_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "dcstat", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_DCSTAT_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_DCSTAT_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "swap", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_SWAP_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_SWAP_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "vfs", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_VFS_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_VFS_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "filesystem", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_FILESYSTEM_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_FILESYSTEM_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "disk", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_DISK_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_DISK_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "mount", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_MOUNT_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_MOUNT_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "fd", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_FD_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_FD_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "hardirq", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_HARDIRQ_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_HARDIRQ_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "softirq", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_SOFTIRQ_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_SOFTIRQ_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "oomkill", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_OOMKILL_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_OOMKILL_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "shm", CONFIG_BOOLEAN_YES); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_SHM_IDX, *disable_apps, *disable_cgroups); - started++; + ebpf_enable_chart(EBPF_MODULE_SHM_IDX, *disable_cgroups); } enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "mdflush", CONFIG_BOOLEAN_NO); if (enabled) { - ebpf_enable_chart(EBPF_MODULE_MDFLUSH_IDX, *disable_apps, *disable_cgroups); - started++; - } - - if (!started){ - ebpf_enable_all_charts(*disable_apps, *disable_cgroups); - // Read network viewer section - // This is kept here to keep backward compatibility - if (network_viewer_opt.enabled) { - parse_network_viewer_section(&collector_config); - parse_service_name_section(&collector_config); - } + ebpf_enable_chart(EBPF_MODULE_MDFLUSH_IDX, *disable_cgroups); } } @@ -1967,13 +2042,12 @@ static void read_collector_values(int *disable_apps, int *disable_cgroups, * Load collector config * * @param path the path where the file ebpf.conf is stored. - * @param disable_apps variable to store the information about apps plugin status. * @param disable_cgroups variable to store the information about cgroups plugin status. * @param update_every value to overwrite the update frequency set by the server. * * @return 0 on success and -1 otherwise. */ -static int load_collector_config(char *path, int *disable_apps, int *disable_cgroups, int update_every) +static int ebpf_load_collector_config(char *path, int *disable_cgroups, int update_every) { char lpath[4096]; netdata_ebpf_load_mode_t origin; @@ -1988,7 +2062,7 @@ static int load_collector_config(char *path, int *disable_apps, int *disable_cgr } else origin = EBPF_LOADED_FROM_USER; - read_collector_values(disable_apps, disable_cgroups, update_every, origin); + read_collector_values(disable_cgroups, update_every, origin); return 0; } @@ -2018,7 +2092,7 @@ void set_global_variables() ebpf_nprocs = (int)sysconf(_SC_NPROCESSORS_ONLN); if (ebpf_nprocs < 0) { ebpf_nprocs = NETDATA_MAX_PROCESSOR; - error("Cannot identify number of process, using default value %d", ebpf_nprocs); + netdata_log_error("Cannot identify number of process, using default value %d", ebpf_nprocs); } isrh = get_redhat_release(); @@ -2032,7 +2106,7 @@ void set_global_variables() static inline void ebpf_load_thread_config() { int i; - for (i = 0; ebpf_modules[i].thread_name; i++) { + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { ebpf_update_module(&ebpf_modules[i], default_btf, running_on_kernel, isrh); } } @@ -2047,12 +2121,12 @@ static inline void ebpf_load_thread_config() int ebpf_check_conditions() { if (!has_condition_to_run(running_on_kernel)) { - error("The current collector cannot run on this kernel."); + netdata_log_error("The current collector cannot run on this kernel."); return -1; } if (!am_i_running_as_root()) { - error( + netdata_log_error( "ebpf.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities..", (unsigned int)getuid(), (unsigned int)geteuid()); return -1; @@ -2072,7 +2146,7 @@ int ebpf_adjust_memory_limit() { struct rlimit r = { RLIM_INFINITY, RLIM_INFINITY }; if (setrlimit(RLIMIT_MEMLOCK, &r)) { - error("Setrlimit(RLIMIT_MEMLOCK)"); + netdata_log_error("Setrlimit(RLIMIT_MEMLOCK)"); return -1; } @@ -2087,7 +2161,6 @@ int ebpf_adjust_memory_limit() */ static void ebpf_parse_args(int argc, char **argv) { - int disable_apps = 0; int disable_cgroups = 1; int freq = 0; int option_index = 0; @@ -2134,12 +2207,12 @@ static void ebpf_parse_args(int argc, char **argv) if (!freq) freq = EBPF_DEFAULT_UPDATE_EVERY; - if (load_collector_config(ebpf_user_config_dir, &disable_apps, &disable_cgroups, freq)) { - info( + if (ebpf_load_collector_config(ebpf_user_config_dir, &disable_cgroups, freq)) { + netdata_log_info( "Does not have a configuration file inside `%s/ebpf.d.conf. It will try to load stock file.", ebpf_user_config_dir); - if (load_collector_config(ebpf_stock_config_dir, &disable_apps, &disable_cgroups, freq)) { - info("Does not have a stock file. It is starting with default options."); + if (ebpf_load_collector_config(ebpf_stock_config_dir, &disable_cgroups, freq)) { + netdata_log_info("Does not have a stock file. It is starting with default options."); } } @@ -2154,120 +2227,120 @@ static void ebpf_parse_args(int argc, char **argv) case EBPF_MODULE_PROCESS_IDX: { select_threads |= 1<<EBPF_MODULE_PROCESS_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"PROCESS\" charts, because it was started with the option \"[-]-process\"."); + netdata_log_info("EBPF enabling \"PROCESS\" charts, because it was started with the option \"[-]-process\"."); #endif break; } case EBPF_MODULE_SOCKET_IDX: { select_threads |= 1<<EBPF_MODULE_SOCKET_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"NET\" charts, because it was started with the option \"[-]-net\"."); + netdata_log_info("EBPF enabling \"NET\" charts, because it was started with the option \"[-]-net\"."); #endif break; } case EBPF_MODULE_CACHESTAT_IDX: { select_threads |= 1<<EBPF_MODULE_CACHESTAT_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"CACHESTAT\" charts, because it was started with the option \"[-]-cachestat\"."); + netdata_log_info("EBPF enabling \"CACHESTAT\" charts, because it was started with the option \"[-]-cachestat\"."); #endif break; } case EBPF_MODULE_SYNC_IDX: { select_threads |= 1<<EBPF_MODULE_SYNC_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"SYNC\" chart, because it was started with the option \"[-]-sync\"."); + netdata_log_info("EBPF enabling \"SYNC\" chart, because it was started with the option \"[-]-sync\"."); #endif break; } case EBPF_MODULE_DCSTAT_IDX: { select_threads |= 1<<EBPF_MODULE_DCSTAT_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"DCSTAT\" charts, because it was started with the option \"[-]-dcstat\"."); + netdata_log_info("EBPF enabling \"DCSTAT\" charts, because it was started with the option \"[-]-dcstat\"."); #endif break; } case EBPF_MODULE_SWAP_IDX: { select_threads |= 1<<EBPF_MODULE_SWAP_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"SWAP\" chart, because it was started with the option \"[-]-swap\"."); + netdata_log_info("EBPF enabling \"SWAP\" chart, because it was started with the option \"[-]-swap\"."); #endif break; } case EBPF_MODULE_VFS_IDX: { select_threads |= 1<<EBPF_MODULE_VFS_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"VFS\" chart, because it was started with the option \"[-]-vfs\"."); + netdata_log_info("EBPF enabling \"VFS\" chart, because it was started with the option \"[-]-vfs\"."); #endif break; } case EBPF_MODULE_FILESYSTEM_IDX: { select_threads |= 1<<EBPF_MODULE_FILESYSTEM_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"FILESYSTEM\" chart, because it was started with the option \"[-]-filesystem\"."); + netdata_log_info("EBPF enabling \"FILESYSTEM\" chart, because it was started with the option \"[-]-filesystem\"."); #endif break; } case EBPF_MODULE_DISK_IDX: { select_threads |= 1<<EBPF_MODULE_DISK_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"DISK\" chart, because it was started with the option \"[-]-disk\"."); + netdata_log_info("EBPF enabling \"DISK\" chart, because it was started with the option \"[-]-disk\"."); #endif break; } case EBPF_MODULE_MOUNT_IDX: { select_threads |= 1<<EBPF_MODULE_MOUNT_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"MOUNT\" chart, because it was started with the option \"[-]-mount\"."); + netdata_log_info("EBPF enabling \"MOUNT\" chart, because it was started with the option \"[-]-mount\"."); #endif break; } case EBPF_MODULE_FD_IDX: { select_threads |= 1<<EBPF_MODULE_FD_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"FILEDESCRIPTOR\" chart, because it was started with the option \"[-]-filedescriptor\"."); + netdata_log_info("EBPF enabling \"FILEDESCRIPTOR\" chart, because it was started with the option \"[-]-filedescriptor\"."); #endif break; } case EBPF_MODULE_HARDIRQ_IDX: { select_threads |= 1<<EBPF_MODULE_HARDIRQ_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"HARDIRQ\" chart, because it was started with the option \"[-]-hardirq\"."); + netdata_log_info("EBPF enabling \"HARDIRQ\" chart, because it was started with the option \"[-]-hardirq\"."); #endif break; } case EBPF_MODULE_SOFTIRQ_IDX: { select_threads |= 1<<EBPF_MODULE_SOFTIRQ_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"SOFTIRQ\" chart, because it was started with the option \"[-]-softirq\"."); + netdata_log_info("EBPF enabling \"SOFTIRQ\" chart, because it was started with the option \"[-]-softirq\"."); #endif break; } case EBPF_MODULE_OOMKILL_IDX: { select_threads |= 1<<EBPF_MODULE_OOMKILL_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"OOMKILL\" chart, because it was started with the option \"[-]-oomkill\"."); + netdata_log_info("EBPF enabling \"OOMKILL\" chart, because it was started with the option \"[-]-oomkill\"."); #endif break; } case EBPF_MODULE_SHM_IDX: { select_threads |= 1<<EBPF_MODULE_SHM_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"SHM\" chart, because it was started with the option \"[-]-shm\"."); + netdata_log_info("EBPF enabling \"SHM\" chart, because it was started with the option \"[-]-shm\"."); #endif break; } case EBPF_MODULE_MDFLUSH_IDX: { select_threads |= 1<<EBPF_MODULE_MDFLUSH_IDX; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF enabling \"MDFLUSH\" chart, because it was started with the option \"[-]-mdflush\"."); + netdata_log_info("EBPF enabling \"MDFLUSH\" chart, because it was started with the option \"[-]-mdflush\"."); #endif break; } case EBPF_OPTION_ALL_CHARTS: { - disable_apps = 0; + ebpf_set_apps_mode(NETDATA_EBPF_APPS_FLAG_YES); disable_cgroups = 0; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF running with all chart groups, because it was started with the option \"[-]-all\"."); + netdata_log_info("EBPF running with all chart groups, because it was started with the option \"[-]-all\"."); #endif break; } @@ -2280,31 +2353,30 @@ static void ebpf_parse_args(int argc, char **argv) exit(0); } case EBPF_OPTION_GLOBAL_CHART: { - disable_apps = 1; disable_cgroups = 1; #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF running with global chart group, because it was started with the option \"[-]-global\"."); + netdata_log_info("EBPF running with global chart group, because it was started with the option \"[-]-global\"."); #endif break; } case EBPF_OPTION_RETURN_MODE: { ebpf_set_thread_mode(MODE_RETURN); #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF running in \"RETURN\" mode, because it was started with the option \"[-]-return\"."); + netdata_log_info("EBPF running in \"RETURN\" mode, because it was started with the option \"[-]-return\"."); #endif break; } case EBPF_OPTION_LEGACY: { ebpf_set_load_mode(EBPF_LOAD_LEGACY, EBPF_LOADED_FROM_USER); #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF running with \"LEGACY\" code, because it was started with the option \"[-]-legacy\"."); + netdata_log_info("EBPF running with \"LEGACY\" code, because it was started with the option \"[-]-legacy\"."); #endif break; } case EBPF_OPTION_CORE: { ebpf_set_load_mode(EBPF_LOAD_CORE, EBPF_LOADED_FROM_USER); #ifdef NETDATA_INTERNAL_CHECKS - info("EBPF running with \"CO-RE\" code, because it was started with the option \"[-]-core\"."); + netdata_log_info("EBPF running with \"CO-RE\" code, because it was started with the option \"[-]-core\"."); #endif break; } @@ -2341,10 +2413,7 @@ unittest: } } - if (disable_apps || disable_cgroups) { - if (disable_apps) - ebpf_disable_apps(); - + if (disable_cgroups) { if (disable_cgroups) ebpf_disable_cgroups(); } @@ -2354,23 +2423,301 @@ unittest: uint64_t idx; for (idx = 0; idx < EBPF_OPTION_ALL_CHARTS; idx++) { if (select_threads & 1<<idx) - ebpf_enable_specific_chart(&ebpf_modules[idx], disable_apps, disable_cgroups); + ebpf_enable_specific_chart(&ebpf_modules[idx], disable_cgroups); } } // Load apps_groups.conf if (ebpf_read_apps_groups_conf( &apps_groups_default_target, &apps_groups_root_target, ebpf_user_config_dir, "groups")) { - info("Cannot read process groups configuration file '%s/apps_groups.conf'. Will try '%s/apps_groups.conf'", + netdata_log_info("Cannot read process groups configuration file '%s/apps_groups.conf'. Will try '%s/apps_groups.conf'", ebpf_user_config_dir, ebpf_stock_config_dir); if (ebpf_read_apps_groups_conf( &apps_groups_default_target, &apps_groups_root_target, ebpf_stock_config_dir, "groups")) { - error("Cannot read process groups '%s/apps_groups.conf'. There are no internal defaults. Failing.", + netdata_log_error("Cannot read process groups '%s/apps_groups.conf'. There are no internal defaults. Failing.", ebpf_stock_config_dir); ebpf_exit(); } } else - info("Loaded config file '%s/apps_groups.conf'", ebpf_user_config_dir); + netdata_log_info("Loaded config file '%s/apps_groups.conf'", ebpf_user_config_dir); +} + +/***************************************************************** + * + * Collector charts + * + *****************************************************************/ + +static char *load_event_stat[NETDATA_EBPF_LOAD_STAT_END] = {"legacy", "co-re"}; +static char *memlock_stat = {"memory_locked"}; +static char *hash_table_stat = {"hash_table"}; +static char *hash_table_core[NETDATA_EBPF_LOAD_STAT_END] = {"per_core", "unique"}; + +/** + * Send Statistic Data + * + * Send statistic information to netdata. + */ +void ebpf_send_statistic_data() +{ + if (!publish_internal_metrics) + return; + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_THREADS); + int i; + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + ebpf_module_t *wem = &ebpf_modules[i]; + write_chart_dimension((char *)wem->thread_name, (wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? 1 : 0); + } + write_end_chart(); + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LIFE_TIME); + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) { + ebpf_module_t *wem = &ebpf_modules[i]; + // Threads like VFS is slow to load and this can create an invalid number, this is the motive + // we are also testing wem->lifetime value. + write_chart_dimension((char *)wem->thread_name, + (wem->lifetime && wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? + (long long) (wem->lifetime - wem->running_time): + 0) ; + } + write_end_chart(); + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LOAD_METHOD); + write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], (long long)plugin_statistics.legacy); + write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], (long long)plugin_statistics.core); + write_end_chart(); + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_KERNEL_MEMORY); + write_chart_dimension(memlock_stat, (long long)plugin_statistics.memlock_kern); + write_end_chart(); + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_LOADED); + write_chart_dimension(hash_table_stat, (long long)plugin_statistics.hash_tables); + write_end_chart(); + + write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_PER_CORE); + write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], (long long)plugin_statistics.hash_percpu); + write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], (long long)plugin_statistics.hash_unique); + write_end_chart(); +} + +/** + * Update Internal Metric variable + * + * By default eBPF.plugin sends internal metrics for netdata, but user can + * disable this. + * + * The function updates the variable used to send charts. + */ +static void update_internal_metric_variable() +{ + const char *s = getenv("NETDATA_INTERNALS_MONITORING"); + if (s && *s && strcmp(s, "NO") == 0) + publish_internal_metrics = false; +} + +/** + * Create chart for Statistic Thread + * + * Write to standard output current values for threads. + * + * @param update_every time used to update charts + */ +static inline void ebpf_create_statistic_thread_chart(int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_THREADS, + "Threads running.", + "boolean", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_EBPF_ORDER_STAT_THREADS, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + int i; + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + ebpf_write_global_dimension((char *)ebpf_modules[i].thread_name, + (char *)ebpf_modules[i].thread_name, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + } +} + +/** + * Create lifetime Thread Chart + * + * Write to standard output current values for threads lifetime. + * + * @param update_every time used to update charts + */ +static inline void ebpf_create_lifetime_thread_chart(int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_LIFE_TIME, + "Threads running.", + "seconds", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_EBPF_ORDER_STAT_LIFE_TIME, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + int i; + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + ebpf_write_global_dimension((char *)ebpf_modules[i].thread_name, + (char *)ebpf_modules[i].thread_name, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + } +} + +/** + * Create chart for Load Thread + * + * Write to standard output current values for load mode. + * + * @param update_every time used to update charts + */ +static inline void ebpf_create_statistic_load_chart(int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_LOAD_METHOD, + "Load info.", + "methods", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_EBPF_ORDER_STAT_LOAD_METHOD, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_write_global_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], + load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_global_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], + load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); +} + +/** + * Create chart for Kernel Memory + * + * Write to standard output current values for allocated memory. + * + * @param update_every time used to update charts + */ +static inline void ebpf_create_statistic_kernel_memory(int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_KERNEL_MEMORY, + "Memory allocated for hash tables.", + "bytes", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_EBPF_ORDER_STAT_KERNEL_MEMORY, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_write_global_dimension(memlock_stat, + memlock_stat, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); +} + +/** + * Create chart Hash Table + * + * Write to standard output number of hash tables used with this software. + * + * @param update_every time used to update charts + */ +static inline void ebpf_create_statistic_hash_tables(int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_HASH_TABLES_LOADED, + "Number of hash tables loaded.", + "hash tables", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_EBPF_ORDER_STAT_HASH_TABLES, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_write_global_dimension(hash_table_stat, + hash_table_stat, + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); +} + +/** + * Create chart for percpu stats + * + * Write to standard output current values for threads. + * + * @param update_every time used to update charts + */ +static inline void ebpf_create_statistic_hash_per_core(int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + NETDATA_EBPF_HASH_TABLES_PER_CORE, + "How threads are loading hash/array tables.", + "threads", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_EBPF_ORDER_STAT_HASH_CORE, + update_every, + NETDATA_EBPF_MODULE_NAME_PROCESS); + + ebpf_write_global_dimension(hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], + hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_global_dimension(hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], + hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); +} + + +/** + * Create Statistics Charts + * + * Create charts that will show statistics related to eBPF plugin. + * + * @param update_every time used to update charts + */ +static void ebpf_create_statistic_charts(int update_every) +{ + static char create_charts = 1; + update_internal_metric_variable(); + if (!publish_internal_metrics) + return; + + if (!create_charts) + return; + + create_charts = 0; + + ebpf_create_statistic_thread_chart(update_every); +#ifdef NETDATA_DEV_MODE + EBPF_PLUGIN_FUNCTIONS(EBPF_FUNCTION_THREAD, EBPF_PLUGIN_THREAD_FUNCTION_DESCRIPTION); +#endif + + ebpf_create_lifetime_thread_chart(update_every); +#ifdef NETDATA_DEV_MODE + EBPF_PLUGIN_FUNCTIONS(EBPF_FUNCTION_THREAD, EBPF_PLUGIN_THREAD_FUNCTION_DESCRIPTION); +#endif + + ebpf_create_statistic_load_chart(update_every); + + ebpf_create_statistic_kernel_memory(update_every); + + ebpf_create_statistic_hash_tables(update_every); + + ebpf_create_statistic_hash_per_core(update_every); } /***************************************************************** @@ -2412,7 +2759,7 @@ static char *ebpf_get_process_name(pid_t pid) procfile *ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); if(unlikely(!ff)) { - error("Cannot open %s", filename); + netdata_log_error("Cannot open %s", filename); return name; } @@ -2605,11 +2952,11 @@ int main(int argc, char **argv) ebpf_module_t *em = &ebpf_modules[i]; em->thread = st; - // We always initialize process, because it is responsible to take care of apps integration - if (em->enabled || !i) { + em->thread_id = i; + if (em->enabled) { st->thread = mallocz(sizeof(netdata_thread_t)); - em->thread_id = i; em->enabled = NETDATA_THREAD_EBPF_RUNNING; + em->lifetime = EBPF_NON_FUNCTION_LIFE_TIME; netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, em); } else { em->enabled = NETDATA_THREAD_EBPF_NOT_RUNNING; @@ -2623,23 +2970,30 @@ int main(int argc, char **argv) int update_apps_list = update_apps_every - 1; int process_maps_per_core = ebpf_modules[EBPF_MODULE_PROCESS_IDX].maps_per_core; //Plugin will be killed when it receives a signal - while (!ebpf_exit_plugin) { + for ( ; !ebpf_exit_plugin ; global_iterations_counter++) { (void)heartbeat_next(&hb, step); + if (global_iterations_counter % EBPF_DEFAULT_UPDATE_EVERY == 0) { + pthread_mutex_lock(&lock); + ebpf_create_statistic_charts(EBPF_DEFAULT_UPDATE_EVERY); + + ebpf_send_statistic_data(); + pthread_mutex_unlock(&lock); + fflush(stdout); + } + pthread_mutex_lock(&ebpf_exit_cleanup); - if (process_pid_fd != -1) { - pthread_mutex_lock(&collect_data_mutex); - if (++update_apps_list == update_apps_every) { - update_apps_list = 0; - cleanup_exited_pids(); - collect_data_for_all_processes(process_pid_fd, process_maps_per_core); - - pthread_mutex_lock(&lock); - ebpf_create_apps_charts(apps_groups_root_target); - pthread_mutex_unlock(&lock); - } - pthread_mutex_unlock(&collect_data_mutex); + pthread_mutex_lock(&collect_data_mutex); + if (++update_apps_list == update_apps_every) { + update_apps_list = 0; + cleanup_exited_pids(); + collect_data_for_all_processes(process_pid_fd, process_maps_per_core); + + pthread_mutex_lock(&lock); + ebpf_create_apps_charts(apps_groups_root_target); + pthread_mutex_unlock(&lock); } + pthread_mutex_unlock(&collect_data_mutex); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -2647,4 +3001,3 @@ int main(int argc, char **argv) return 0; } - diff --git a/collectors/ebpf.plugin/ebpf.d.conf b/collectors/ebpf.plugin/ebpf.d.conf index 8807f9a3..5cb844b2 100644 --- a/collectors/ebpf.plugin/ebpf.d.conf +++ b/collectors/ebpf.plugin/ebpf.d.conf @@ -19,6 +19,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.15. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# [global] ebpf load mode = entry apps = no @@ -27,6 +29,7 @@ pid table size = 32768 btf path = /sys/kernel/btf/ maps per core = yes + lifetime = 300 # # eBPF Programs diff --git a/collectors/ebpf.plugin/ebpf.d/cachestat.conf b/collectors/ebpf.plugin/ebpf.d/cachestat.conf index 82f870c9..9c51b2c5 100644 --- a/collectors/ebpf.plugin/ebpf.d/cachestat.conf +++ b/collectors/ebpf.plugin/ebpf.d/cachestat.conf @@ -26,6 +26,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -37,3 +39,4 @@ ebpf co-re tracing = trampoline collect pid = real parent # maps per core = yes + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/dcstat.conf b/collectors/ebpf.plugin/ebpf.d/dcstat.conf index f741b62a..614d814e 100644 --- a/collectors/ebpf.plugin/ebpf.d/dcstat.conf +++ b/collectors/ebpf.plugin/ebpf.d/dcstat.conf @@ -24,6 +24,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -35,3 +37,4 @@ ebpf co-re tracing = trampoline collect pid = real parent # maps per core = yes + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/disk.conf b/collectors/ebpf.plugin/ebpf.d/disk.conf index 4adf88e7..c5a0a270 100644 --- a/collectors/ebpf.plugin/ebpf.d/disk.conf +++ b/collectors/ebpf.plugin/ebpf.d/disk.conf @@ -3,7 +3,10 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # -#[global] +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# +[global] # ebpf load mode = entry # update every = 10 + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/fd.conf b/collectors/ebpf.plugin/ebpf.d/fd.conf index 30a5fcfd..d4823032 100644 --- a/collectors/ebpf.plugin/ebpf.d/fd.conf +++ b/collectors/ebpf.plugin/ebpf.d/fd.conf @@ -12,6 +12,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -22,3 +24,4 @@ ebpf type format = auto ebpf co-re tracing = trampoline # maps per core = yes + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/filesystem.conf b/collectors/ebpf.plugin/ebpf.d/filesystem.conf index c5eb01e5..209abba7 100644 --- a/collectors/ebpf.plugin/ebpf.d/filesystem.conf +++ b/collectors/ebpf.plugin/ebpf.d/filesystem.conf @@ -3,13 +3,16 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # The eBPF collector also creates charts for each running application through an integration with the `apps plugin`. # If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to # 'no'. # -#[global] +[global] # ebpf load mode = entry # update every = 10 + lifetime = 300 # All filesystems are named as 'NAMEdist' where NAME is the filesystem name while 'dist' is a reference for distribution. [filesystem] diff --git a/collectors/ebpf.plugin/ebpf.d/functions.conf b/collectors/ebpf.plugin/ebpf.d/functions.conf new file mode 100644 index 00000000..a4f57f64 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/functions.conf @@ -0,0 +1,3 @@ +#[global] +# update every = 5 + diff --git a/collectors/ebpf.plugin/ebpf.d/hardirq.conf b/collectors/ebpf.plugin/ebpf.d/hardirq.conf index f2bae1d5..6a47a94b 100644 --- a/collectors/ebpf.plugin/ebpf.d/hardirq.conf +++ b/collectors/ebpf.plugin/ebpf.d/hardirq.conf @@ -3,6 +3,9 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # -#[global] +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# +[global] # ebpf load mode = entry # update every = 10 + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/mdflush.conf b/collectors/ebpf.plugin/ebpf.d/mdflush.conf index e65e8672..ea97ebe8 100644 --- a/collectors/ebpf.plugin/ebpf.d/mdflush.conf +++ b/collectors/ebpf.plugin/ebpf.d/mdflush.conf @@ -2,6 +2,10 @@ # `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. -#[global] +# +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# +[global] # ebpf load mode = entry # update every = 1 + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/mount.conf b/collectors/ebpf.plugin/ebpf.d/mount.conf index fdd82f22..ff9a2948 100644 --- a/collectors/ebpf.plugin/ebpf.d/mount.conf +++ b/collectors/ebpf.plugin/ebpf.d/mount.conf @@ -12,8 +12,12 @@ # `trampoline`: This is the default mode used by the eBPF collector, due the small overhead added to host. # `tracepoint`: When available, the eBPF collector will use kernel tracepoint to monitor syscall. # `probe` : This is the same as legacy code. +# +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# [global] # ebpf load mode = entry # update every = 1 ebpf type format = auto ebpf co-re tracing = trampoline + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/network.conf b/collectors/ebpf.plugin/ebpf.d/network.conf index 75644a77..00cbf2e8 100644 --- a/collectors/ebpf.plugin/ebpf.d/network.conf +++ b/collectors/ebpf.plugin/ebpf.d/network.conf @@ -26,6 +26,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -39,6 +41,7 @@ ebpf type format = auto ebpf co-re tracing = trampoline maps per core = no + lifetime = 300 # # Network Connection diff --git a/collectors/ebpf.plugin/ebpf.d/oomkill.conf b/collectors/ebpf.plugin/ebpf.d/oomkill.conf index e65e8672..ea97ebe8 100644 --- a/collectors/ebpf.plugin/ebpf.d/oomkill.conf +++ b/collectors/ebpf.plugin/ebpf.d/oomkill.conf @@ -2,6 +2,10 @@ # `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. -#[global] +# +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# +[global] # ebpf load mode = entry # update every = 1 + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/process.conf b/collectors/ebpf.plugin/ebpf.d/process.conf index f5e8804c..150c5792 100644 --- a/collectors/ebpf.plugin/ebpf.d/process.conf +++ b/collectors/ebpf.plugin/ebpf.d/process.conf @@ -17,6 +17,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -26,3 +28,4 @@ # pid table size = 32768 collect pid = real parent # maps per core = yes + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/shm.conf b/collectors/ebpf.plugin/ebpf.d/shm.conf index f8ec1a18..95fb54e0 100644 --- a/collectors/ebpf.plugin/ebpf.d/shm.conf +++ b/collectors/ebpf.plugin/ebpf.d/shm.conf @@ -20,6 +20,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -30,6 +32,7 @@ ebpf type format = auto ebpf co-re tracing = trampoline # maps per core = yes + lifetime = 300 # List of monitored syscalls [syscalls] diff --git a/collectors/ebpf.plugin/ebpf.d/softirq.conf b/collectors/ebpf.plugin/ebpf.d/softirq.conf index f2bae1d5..6a47a94b 100644 --- a/collectors/ebpf.plugin/ebpf.d/softirq.conf +++ b/collectors/ebpf.plugin/ebpf.d/softirq.conf @@ -3,6 +3,9 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # -#[global] +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# +[global] # ebpf load mode = entry # update every = 10 + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/swap.conf b/collectors/ebpf.plugin/ebpf.d/swap.conf index 5bad0442..29d9b420 100644 --- a/collectors/ebpf.plugin/ebpf.d/swap.conf +++ b/collectors/ebpf.plugin/ebpf.d/swap.conf @@ -19,6 +19,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -29,3 +31,4 @@ ebpf type format = auto ebpf co-re tracing = trampoline # maps per core = yes + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.d/sync.conf b/collectors/ebpf.plugin/ebpf.d/sync.conf index fefbd4ee..a086ed4d 100644 --- a/collectors/ebpf.plugin/ebpf.d/sync.conf +++ b/collectors/ebpf.plugin/ebpf.d/sync.conf @@ -19,6 +19,8 @@ # `probe` : This is the same as legacy code. # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. +# +# The `lifetime` defines the time length a thread will run when it is enabled by a function. # # Uncomment lines to define specific options for thread. [global] @@ -29,6 +31,7 @@ ebpf type format = auto ebpf co-re tracing = trampoline # maps per core = yes + lifetime = 300 # List of monitored syscalls [syscalls] diff --git a/collectors/ebpf.plugin/ebpf.d/vfs.conf b/collectors/ebpf.plugin/ebpf.d/vfs.conf index b4e5daac..f511581b 100644 --- a/collectors/ebpf.plugin/ebpf.d/vfs.conf +++ b/collectors/ebpf.plugin/ebpf.d/vfs.conf @@ -20,6 +20,8 @@ # # The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. # +# The `lifetime` defines the time length a thread will run when it is enabled by a function. +# # Uncomment lines to define specific options for thread. [global] # ebpf load mode = entry @@ -30,3 +32,4 @@ ebpf type format = auto ebpf co-re tracing = trampoline # maps per core = yes + lifetime = 300 diff --git a/collectors/ebpf.plugin/ebpf.h b/collectors/ebpf.plugin/ebpf.h index ae24c302..f008d21a 100644 --- a/collectors/ebpf.plugin/ebpf.h +++ b/collectors/ebpf.plugin/ebpf.h @@ -39,7 +39,10 @@ #ifdef LIBBPF_MAJOR_VERSION // BTF code #include "includes/cachestat.skel.h" #include "includes/dc.skel.h" +#include "includes/disk.skel.h" #include "includes/fd.skel.h" +#include "includes/hardirq.skel.h" +#include "includes/mdflush.skel.h" #include "includes/mount.skel.h" #include "includes/shm.skel.h" #include "includes/socket.skel.h" @@ -48,8 +51,11 @@ extern struct cachestat_bpf *cachestat_bpf_obj; extern struct dc_bpf *dc_bpf_obj; +extern struct disk_bpf *disk_bpf_obj; extern struct fd_bpf *fd_bpf_obj; +extern struct hardirq_bpf *hardirq_bpf_obj; extern struct mount_bpf *mount_bpf_obj; +extern struct mdflush_bpf *mdflush_bpf_obj; extern struct shm_bpf *shm_bpf_obj; extern struct socket_bpf *socket_bpf_obj; extern struct swap_bpf *bpf_obj; @@ -112,6 +118,7 @@ enum ebpf_main_index { EBPF_MODULE_OOMKILL_IDX, EBPF_MODULE_SHM_IDX, EBPF_MODULE_MDFLUSH_IDX, + EBPF_MODULE_FUNCTION_IDX, /* THREADS MUST BE INCLUDED BEFORE THIS COMMENT */ EBPF_OPTION_ALL_CHARTS, EBPF_OPTION_VERSION, @@ -157,6 +164,7 @@ typedef struct ebpf_tracepoint { // Statistics charts #define NETDATA_EBPF_THREADS "ebpf_threads" +#define NETDATA_EBPF_LIFE_TIME "ebpf_life_time" #define NETDATA_EBPF_LOAD_METHOD "ebpf_load_methods" #define NETDATA_EBPF_KERNEL_MEMORY "ebpf_kernel_memory" #define NETDATA_EBPF_HASH_TABLES_LOADED "ebpf_hash_tables_count" diff --git a/collectors/ebpf.plugin/ebpf_apps.c b/collectors/ebpf.plugin/ebpf_apps.c index 3826f8ef..c7c0cbbb 100644 --- a/collectors/ebpf.plugin/ebpf_apps.c +++ b/collectors/ebpf.plugin/ebpf_apps.c @@ -35,7 +35,7 @@ void ebpf_aral_init(void) { size_t max_elements = NETDATA_EBPF_ALLOC_MAX_PID; if (max_elements < NETDATA_EBPF_ALLOC_MIN_ELEMENTS) { - error("Number of elements given is too small, adjusting it for %d", NETDATA_EBPF_ALLOC_MIN_ELEMENTS); + netdata_log_error("Number of elements given is too small, adjusting it for %d", NETDATA_EBPF_ALLOC_MIN_ELEMENTS); max_elements = NETDATA_EBPF_ALLOC_MIN_ELEMENTS; } @@ -44,7 +44,7 @@ void ebpf_aral_init(void) ebpf_aral_process_stat = ebpf_allocate_pid_aral(NETDATA_EBPF_PROC_ARAL_NAME, sizeof(ebpf_process_stat_t)); #ifdef NETDATA_DEV_MODE - info("Plugin is using ARAL with values %d", NETDATA_EBPF_ALLOC_MAX_PID); + netdata_log_info("Plugin is using ARAL with values %d", NETDATA_EBPF_ALLOC_MAX_PID); #endif } @@ -652,7 +652,7 @@ int ebpf_read_apps_groups_conf(struct ebpf_target **agdt, struct ebpf_target **a // add this target struct ebpf_target *n = get_apps_groups_target(agrt, s, w, name); if (!n) { - error("Cannot create target '%s' (line %zu, word %zu)", s, line, word); + netdata_log_error("Cannot create target '%s' (line %zu, word %zu)", s, line, word); continue; } @@ -755,32 +755,32 @@ static inline void debug_log_dummy(void) static inline int managed_log(struct ebpf_pid_stat *p, uint32_t log, int status) { if (unlikely(!status)) { - // error("command failed log %u, errno %d", log, errno); + // netdata_log_error("command failed log %u, errno %d", log, errno); if (unlikely(debug_enabled || errno != ENOENT)) { if (unlikely(debug_enabled || !(p->log_thrown & log))) { p->log_thrown |= log; switch (log) { case PID_LOG_IO: - error( + netdata_log_error( "Cannot process %s/proc/%d/io (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); break; case PID_LOG_STATUS: - error( + netdata_log_error( "Cannot process %s/proc/%d/status (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); break; case PID_LOG_CMDLINE: - error( + netdata_log_error( "Cannot process %s/proc/%d/cmdline (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); break; case PID_LOG_FDS: - error( + netdata_log_error( "Cannot process entries in %s/proc/%d/fd (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); break; @@ -789,14 +789,14 @@ static inline int managed_log(struct ebpf_pid_stat *p, uint32_t log, int status) break; default: - error("unhandled error for pid %d, command '%s'", p->pid, p->comm); + netdata_log_error("unhandled error for pid %d, command '%s'", p->pid, p->comm); break; } } } errno = 0; } else if (unlikely(p->log_thrown & log)) { - // error("unsetting log %u on pid %d", log, p->pid); + // netdata_log_error("unsetting log %u on pid %d", log, p->pid); p->log_thrown &= ~log; } @@ -1005,7 +1005,7 @@ static inline int read_proc_pid_stat(struct ebpf_pid_stat *p, void *ptr) static inline int collect_data_for_pid(pid_t pid, void *ptr) { if (unlikely(pid < 0 || pid > pid_max)) { - error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, 0, pid_max); + netdata_log_error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, 0, pid_max); return 0; } @@ -1020,7 +1020,7 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) // check its parent pid if (unlikely(p->ppid < 0 || p->ppid > pid_max)) { - error("Pid %d (command '%s') states invalid parent pid %d. Using 0.", pid, p->comm, p->ppid); + netdata_log_error("Pid %d (command '%s') states invalid parent pid %d. Using 0.", pid, p->comm, p->ppid); p->ppid = 0; } @@ -1220,7 +1220,7 @@ static inline void del_pid_entry(pid_t pid) struct ebpf_pid_stat *p = ebpf_all_pids[pid]; if (unlikely(!p)) { - error("attempted to free pid %d that is not allocated.", pid); + netdata_log_error("attempted to free pid %d that is not allocated.", pid); return; } @@ -1338,8 +1338,10 @@ void cleanup_exited_pids() p = p->next; // Clean process structure - ebpf_process_stat_release(global_process_stats[r]); - global_process_stats[r] = NULL; + if (global_process_stats) { + ebpf_process_stat_release(global_process_stats[r]); + global_process_stats[r] = NULL; + } cleanup_variables_from_other_threads(r); @@ -1403,7 +1405,7 @@ static inline void aggregate_pid_on_target(struct ebpf_target *w, struct ebpf_pi } if (unlikely(!w)) { - error("pid %d %s was left without a target!", p->pid, p->comm); + netdata_log_error("pid %d %s was left without a target!", p->pid, p->comm); return; } @@ -1471,36 +1473,40 @@ void collect_data_for_all_processes(int tbl_pid_stats_fd, int maps_per_core) uint32_t key; pids = ebpf_root_of_pids; // global list of all processes running // while (bpf_map_get_next_key(tbl_pid_stats_fd, &key, &next_key) == 0) { - size_t length = sizeof(ebpf_process_stat_t); - if (maps_per_core) - length *= ebpf_nprocs; - while (pids) { - key = pids->pid; - ebpf_process_stat_t *w = global_process_stats[key]; - if (!w) { - w = ebpf_process_stat_get(); - global_process_stats[key] = w; - } + if (tbl_pid_stats_fd != -1) { + size_t length = sizeof(ebpf_process_stat_t); + if (maps_per_core) + length *= ebpf_nprocs; - if (bpf_map_lookup_elem(tbl_pid_stats_fd, &key, process_stat_vector)) { - // Clean Process structures - ebpf_process_stat_release(w); - global_process_stats[key] = NULL; + while (pids) { + key = pids->pid; - cleanup_variables_from_other_threads(key); + ebpf_process_stat_t *w = global_process_stats[key]; + if (!w) { + w = ebpf_process_stat_get(); + global_process_stats[key] = w; + } - pids = pids->next; - continue; - } + if (bpf_map_lookup_elem(tbl_pid_stats_fd, &key, process_stat_vector)) { + // Clean Process structures + ebpf_process_stat_release(w); + global_process_stats[key] = NULL; - ebpf_process_apps_accumulator(process_stat_vector, maps_per_core); + cleanup_variables_from_other_threads(key); - memcpy(w, process_stat_vector, sizeof(ebpf_process_stat_t)); + pids = pids->next; + continue; + } - memset(process_stat_vector, 0, length); + ebpf_process_apps_accumulator(process_stat_vector, maps_per_core); - pids = pids->next; + memcpy(w, process_stat_vector, sizeof(ebpf_process_stat_t)); + + memset(process_stat_vector, 0, length); + + pids = pids->next; + } } link_all_processes_to_their_parents(); diff --git a/collectors/ebpf.plugin/ebpf_apps.h b/collectors/ebpf.plugin/ebpf_apps.h index ad2e338d..fc894a55 100644 --- a/collectors/ebpf.plugin/ebpf_apps.h +++ b/collectors/ebpf.plugin/ebpf_apps.h @@ -21,6 +21,7 @@ #include "ebpf_disk.h" #include "ebpf_fd.h" #include "ebpf_filesystem.h" +#include "ebpf_functions.h" #include "ebpf_hardirq.h" #include "ebpf_cachestat.h" #include "ebpf_mdflush.h" diff --git a/collectors/ebpf.plugin/ebpf_cachestat.c b/collectors/ebpf.plugin/ebpf_cachestat.c index c287136c..72c33794 100644 --- a/collectors/ebpf.plugin/ebpf_cachestat.c +++ b/collectors/ebpf.plugin/ebpf_cachestat.c @@ -58,6 +58,10 @@ netdata_ebpf_targets_t cachestat_targets[] = { {.name = "add_to_page_cache_lru", static char *account_page[NETDATA_CACHESTAT_ACCOUNT_DIRTY_END] ={ "account_page_dirtied", "__set_page_dirty", "__folio_mark_dirty" }; +#ifdef NETDATA_DEV_MODE +int cachestat_disable_priority; +#endif + #ifdef LIBBPF_MAJOR_VERSION /** * Disable probe @@ -336,6 +340,179 @@ static inline int ebpf_cachestat_load_and_attach(struct cachestat_bpf *obj, ebpf * *****************************************************************/ +static void ebpf_obsolete_specific_cachestat_charts(char *type, int update_every); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit ratio", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_CACHESTAT_HIT_RATIO_CONTEXT, + 21100, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages", + EBPF_CACHESTAT_DIMENSION_PAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_CACHESTAT_MODIFIED_CACHE_CONTEXT, + 21101, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_CACHESTAT_HIT_CHART, + "Number of accessed files", + EBPF_CACHESTAT_DIMENSION_HITS, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_CACHESTAT_HIT_FILE_CONTEXT, + 21102, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_CACHESTAT_MISSES_CHART, + "Files out of page cache", + EBPF_CACHESTAT_DIMENSION_MISSES, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_CACHESTAT_MISS_FILES_CONTEXT, + 21103, + em->update_every); +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_cachestat_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_cachestat_charts(ect->name, em->update_every); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_cachestat_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit ratio", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21100, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages", + EBPF_CACHESTAT_DIMENSION_PAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21101, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_CACHESTAT_HIT_CHART, + "Number of accessed files", + EBPF_CACHESTAT_DIMENSION_HITS, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21102, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_CACHESTAT_MISSES_CHART, + "Files out of page cache", + EBPF_CACHESTAT_DIMENSION_MISSES, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21103, + em->update_every); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_cachestat_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_CACHESTAT_HIT_RATIO_CHART, + "Hit ratio", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 20090, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_CACHESTAT_DIRTY_CHART, + "Number of dirty pages", + EBPF_CACHESTAT_DIMENSION_PAGE, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20091, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_HIT_CHART, + "Number of accessed files", + EBPF_CACHESTAT_DIMENSION_HITS, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20092, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_CACHESTAT_MISSES_CHART, + "Files out of page cache", + EBPF_CACHESTAT_DIMENSION_MISSES, + NETDATA_CACHESTAT_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20093, + em->update_every); +} + /** * Cachestat exit. * @@ -347,17 +524,47 @@ static void ebpf_cachestat_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_cachestat_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_cachestat_apps_charts(em); + } + + ebpf_obsolete_cachestat_global(em); + +#ifdef NETDATA_DEV_MODE + if (ebpf_aral_cachestat_pid) + ebpf_statistic_obsolete_aral_chart(em, cachestat_disable_priority); +#endif + + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (cachestat_bpf_obj) + if (cachestat_bpf_obj) { cachestat_bpf__destroy(cachestat_bpf_obj); + cachestat_bpf_obj = NULL; + } #endif if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -1079,7 +1286,9 @@ static void cachestat_collector(ebpf_module_t *em) heartbeat_init(&hb); int counter = update_every - 1; //This will be cancelled by its parent - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -1112,6 +1321,15 @@ static void cachestat_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -1220,7 +1438,7 @@ static int ebpf_cachestat_set_internal_value() } if (!address.addr) { - error("%s cachestat.", NETDATA_EBPF_DEFAULT_FNT_NOT_FOUND); + netdata_log_error("%s cachestat.", NETDATA_EBPF_DEFAULT_FNT_NOT_FOUND); return -1; } @@ -1261,7 +1479,7 @@ static int ebpf_cachestat_load_bpf(ebpf_module_t *em) #endif if (ret) - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); return ret; } @@ -1307,11 +1525,11 @@ void *ebpf_cachestat_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); ebpf_create_memory_charts(em); #ifdef NETDATA_DEV_MODE if (ebpf_aral_cachestat_pid) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_CACHESTAT_ARAL_NAME, em); + cachestat_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_CACHESTAT_ARAL_NAME, em); #endif pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_cachestat.h b/collectors/ebpf.plugin/ebpf_cachestat.h index 2c1f171c..ba2b1283 100644 --- a/collectors/ebpf.plugin/ebpf_cachestat.h +++ b/collectors/ebpf.plugin/ebpf_cachestat.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_CACHESTAT_H #define NETDATA_EBPF_CACHESTAT_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_CACHESTAT "cachestat" +#define NETDATA_EBPF_CACHESTAT_MODULE_DESC "Monitor Linux page cache internal functions. This thread is integrated with apps and cgroup." // charts #define NETDATA_CACHESTAT_HIT_RATIO_CHART "cachestat_ratio" diff --git a/collectors/ebpf.plugin/ebpf_cgroup.c b/collectors/ebpf.plugin/ebpf_cgroup.c index 6d7c555b..fd4e783d 100644 --- a/collectors/ebpf.plugin/ebpf_cgroup.c +++ b/collectors/ebpf.plugin/ebpf_cgroup.c @@ -28,7 +28,7 @@ static inline void *ebpf_cgroup_map_shm_locally(int fd, size_t length) value = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (!value) { - error("Cannot map shared memory used between eBPF and cgroup, integration between processes won't happen"); + netdata_log_error("Cannot map shared memory used between eBPF and cgroup, integration between processes won't happen"); close(shm_fd_ebpf_cgroup); shm_fd_ebpf_cgroup = -1; shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME); @@ -71,7 +71,7 @@ void ebpf_map_cgroup_shared_memory() shm_fd_ebpf_cgroup = shm_open(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME, O_RDWR, 0660); if (shm_fd_ebpf_cgroup < 0) { if (limit_try == NETDATA_EBPF_CGROUP_MAX_TRIES) - error("Shared memory was not initialized, integration between processes won't happen."); + netdata_log_error("Shared memory was not initialized, integration between processes won't happen."); return; } @@ -103,7 +103,7 @@ void ebpf_map_cgroup_shared_memory() shm_sem_ebpf_cgroup = sem_open(NETDATA_NAMED_SEMAPHORE_EBPF_CGROUP_NAME, O_CREAT, 0660, 1); if (shm_sem_ebpf_cgroup == SEM_FAILED) { - error("Cannot create semaphore, integration between eBPF and cgroup won't happen"); + netdata_log_error("Cannot create semaphore, integration between eBPF and cgroup won't happen"); limit_try = NETDATA_EBPF_CGROUP_MAX_TRIES + 1; munmap(ebpf_mapped_memory, length); shm_ebpf_cgroup.header = NULL; @@ -303,7 +303,7 @@ void ebpf_parse_cgroup_shm_data() sem_post(shm_sem_ebpf_cgroup); pthread_mutex_unlock(&mutex_cgroup_shm); #ifdef NETDATA_DEV_MODE - info("Updating cgroup %d (Previous: %d, Current: %d)", + netdata_log_info("Updating cgroup %d (Previous: %d, Current: %d)", send_cgroup_chart, previous, shm_ebpf_cgroup.header->cgroup_root_count); #endif diff --git a/collectors/ebpf.plugin/ebpf_dcstat.c b/collectors/ebpf.plugin/ebpf_dcstat.c index 4157f0c8..dba3f44d 100644 --- a/collectors/ebpf.plugin/ebpf_dcstat.c +++ b/collectors/ebpf.plugin/ebpf_dcstat.c @@ -59,6 +59,10 @@ netdata_ebpf_targets_t dc_targets[] = { {.name = "lookup_fast", .mode = EBPF_LOA {.name = "d_lookup", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; +#ifdef NETDATA_DEV_MODE +int dcstat_disable_priority; +#endif + #ifdef LIBBPF_MAJOR_VERSION /** * Disable probe @@ -195,7 +199,7 @@ netdata_ebpf_program_loaded_t ebpf_dc_update_load(ebpf_module_t *em) return EBPF_LOAD_TRAMPOLINE; if (em->targets[NETDATA_DC_TARGET_LOOKUP_FAST].mode != EBPF_LOAD_RETPROBE) - info("When your kernel was compiled the symbol %s was modified, instead to use `trampoline`, the plugin will use `probes`.", + netdata_log_info("When your kernel was compiled the symbol %s was modified, instead to use `trampoline`, the plugin will use `probes`.", dc_optional_name[NETDATA_DC_TARGET_LOOKUP_FAST].function_to_attach); return EBPF_LOAD_RETPROBE; @@ -285,6 +289,160 @@ void dcstat_update_publish(netdata_publish_dcstat_t *out, uint64_t cache_access, * *****************************************************************/ +static void ebpf_obsolete_specific_dc_charts(char *type, int update_every); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_dc_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_DC_HIT_CHART, + "Percentage of files inside directory cache", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_DC_HIT_RATIO_CONTEXT, + 21200, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_DC_REFERENCE_CHART, + "Count file access", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_DC_REFERENCE_CONTEXT, + 21201, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "Files not present inside directory cache", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_DC_NOT_CACHE_CONTEXT, + 21202, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "Files not found", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_SYSTEMD_DC_NOT_FOUND_CONTEXT, + 21202, + em->update_every); +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_dc_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_dc_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_dc_charts(ect->name, em->update_every); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_dc_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_DC_HIT_CHART, + "Percentage of files inside directory cache", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 20100, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_DC_REFERENCE_CHART, + "Count file access", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20101, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "Files not present inside directory cache", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20102, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "Files not found", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20103, + em->update_every); +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_dc_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_DC_HIT_CHART, + "Percentage of files inside directory cache", + EBPF_COMMON_DIMENSION_PERCENTAGE, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21200, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_DC_REFERENCE_CHART, + "Variables used to calculate hit ratio.", + EBPF_COMMON_DIMENSION_FILES, + NETDATA_DIRECTORY_CACHE_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21201, + em->update_every); +} + /** * DCstat exit * @@ -296,16 +454,46 @@ static void ebpf_dcstat_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_dc_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_dc_apps_charts(em); + } + + ebpf_obsolete_dc_global(em); + +#ifdef NETDATA_DEV_MODE + if (ebpf_aral_dcstat_pid) + ebpf_statistic_obsolete_aral_chart(em, dcstat_disable_priority); +#endif + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (dc_bpf_obj) + if (dc_bpf_obj) { dc_bpf__destroy(dc_bpf_obj); + dc_bpf_obj = NULL; + } #endif - if (em->objects) + if (em->objects){ ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -979,7 +1167,9 @@ static void dcstat_collector(ebpf_module_t *em) heartbeat_init(&hb); int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -1012,6 +1202,15 @@ static void dcstat_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -1028,7 +1227,7 @@ static void dcstat_collector(ebpf_module_t *em) * * @param update_every value to overwrite the update frequency set by the server. */ -static void ebpf_create_filesystem_charts(int update_every) +static void ebpf_create_dc_global_charts(int update_every) { ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, NETDATA_DC_HIT_CHART, "Percentage of files inside directory cache", @@ -1112,7 +1311,7 @@ static int ebpf_dcstat_load_bpf(ebpf_module_t *em) #endif if (ret) - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); return ret; } @@ -1156,12 +1355,12 @@ void *ebpf_dcstat_thread(void *ptr) algorithms, NETDATA_DCSTAT_IDX_END); pthread_mutex_lock(&lock); - ebpf_create_filesystem_charts(em->update_every); + ebpf_create_dc_global_charts(em->update_every); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); #ifdef NETDATA_DEV_MODE if (ebpf_aral_dcstat_pid) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_DCSTAT_ARAL_NAME, em); + dcstat_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_DCSTAT_ARAL_NAME, em); #endif pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_dcstat.h b/collectors/ebpf.plugin/ebpf_dcstat.h index 5c9eed4d..845b6590 100644 --- a/collectors/ebpf.plugin/ebpf_dcstat.h +++ b/collectors/ebpf.plugin/ebpf_dcstat.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_DCSTAT_H #define NETDATA_EBPF_DCSTAT_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_DCSTAT "dcstat" +#define NETDATA_EBPF_DC_MODULE_DESC "Monitor file access using directory cache. This thread is integrated with apps and cgroup." // charts #define NETDATA_DC_HIT_CHART "dc_hit_ratio" diff --git a/collectors/ebpf.plugin/ebpf_disk.c b/collectors/ebpf.plugin/ebpf_disk.c index 231186b8..87945627 100644 --- a/collectors/ebpf.plugin/ebpf_disk.c +++ b/collectors/ebpf.plugin/ebpf_disk.c @@ -52,6 +52,39 @@ static netdata_idx_t *disk_hash_values = NULL; ebpf_publish_disk_t *plot_disks = NULL; pthread_mutex_t plot_mutex; +#ifdef LIBBPF_MAJOR_VERSION +/** + * Set hash table + * + * Set the values for maps according the value given by kernel. + * + * @param obj is the main structure for bpf objects. + */ +static inline void ebpf_disk_set_hash_table(struct disk_bpf *obj) + { + disk_maps[NETDATA_DISK_IO].map_fd = bpf_map__fd(obj->maps.tbl_disk_iocall); + } + +/** + * Load and attach + * + * Load and attach the eBPF code in kernel. + * + * @param obj is the main structure for bpf objects. + * + * @return it returns 0 on success and -1 otherwise + */ +static inline int ebpf_disk_load_and_attach(struct disk_bpf *obj) +{ + int ret = disk_bpf__load(obj); + if (ret) { + return ret; + } + + return disk_bpf__attach(obj); +} +#endif + /***************************************************************** * * FUNCTIONS TO MANIPULATE HARD DISKS @@ -308,10 +341,10 @@ static void update_disk_table(char *name, int major, int minor, time_t current_t netdata_ebpf_disks_t *check; check = (netdata_ebpf_disks_t *) avl_insert_lock(&disk_tree, (avl_t *)w); if (check != w) - error("Internal error, cannot insert the AVL tree."); + netdata_log_error("Internal error, cannot insert the AVL tree."); #ifdef NETDATA_INTERNAL_CHECKS - info("The Latency is monitoring the hard disk %s (Major = %d, Minor = %d, Device = %u)", name, major, minor,w->dev); + netdata_log_info("The Latency is monitoring the hard disk %s (Major = %d, Minor = %d, Device = %u)", name, major, minor,w->dev); #endif w->flags |= NETDATA_DISK_IS_HERE; @@ -391,12 +424,12 @@ static void ebpf_disk_disable_tracepoints() char *default_message = { "Cannot disable the tracepoint" }; if (!was_block_issue_enabled) { if (ebpf_disable_tracing_values(tracepoint_block_type, tracepoint_block_issue)) - error("%s %s/%s.", default_message, tracepoint_block_type, tracepoint_block_issue); + netdata_log_error("%s %s/%s.", default_message, tracepoint_block_type, tracepoint_block_issue); } if (!was_block_rq_complete_enabled) { if (ebpf_disable_tracing_values(tracepoint_block_type, tracepoint_block_rq_complete)) - error("%s %s/%s.", default_message, tracepoint_block_type, tracepoint_block_rq_complete); + netdata_log_error("%s %s/%s.", default_message, tracepoint_block_type, tracepoint_block_rq_complete); } } @@ -415,6 +448,7 @@ static void ebpf_cleanup_plot_disks() move = next; } + plot_disks = NULL; } /** @@ -432,6 +466,36 @@ static void ebpf_cleanup_disk_list() move = next; } + disk_list = NULL; +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_disk_global(ebpf_module_t *em) +{ + ebpf_publish_disk_t *move = plot_disks; + while (move) { + netdata_ebpf_disks_t *ned = move->plot; + uint32_t flags = ned->flags; + if (flags & NETDATA_DISK_CHART_CREATED) { + ebpf_write_chart_obsolete(ned->histogram.name, + ned->family, + "Disk latency", + EBPF_COMMON_DIMENSION_CALL, + ned->family, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + ned->histogram.order, + em->update_every); + } + + move = move->next; + } } /** @@ -445,15 +509,29 @@ static void ebpf_disk_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (em->objects) - ebpf_unload_legacy_code(em->objects, em->probe_links); + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + ebpf_obsolete_disk_global(em); + + pthread_mutex_unlock(&lock); + fflush(stdout); + } ebpf_disk_disable_tracepoints(); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, disk_maps, EBPF_ACTION_STAT_REMOVE); + + if (em->objects) { + ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } + if (dimensions) ebpf_histogram_dimension_cleanup(dimensions, NETDATA_EBPF_HIST_MAX_BINS); freez(disk_hash_values); + disk_hash_values = NULL; pthread_mutex_destroy(&plot_mutex); ebpf_cleanup_plot_disks(); @@ -461,6 +539,7 @@ static void ebpf_disk_exit(void *ptr) pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -607,6 +686,8 @@ static void ebpf_create_hd_charts(netdata_ebpf_disks_t *w, int update_every) order++; w->flags |= NETDATA_DISK_CHART_CREATED; + + fflush(stdout); } /** @@ -695,14 +776,16 @@ static void disk_collector(ebpf_module_t *em) heartbeat_init(&hb); int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; counter = 0; - read_hard_disk_tables(disk_maps[NETDATA_DISK_READ].map_fd, maps_per_core); + read_hard_disk_tables(disk_maps[NETDATA_DISK_IO].map_fd, maps_per_core); pthread_mutex_lock(&lock); ebpf_remove_pointer_from_plot_disk(em); ebpf_latency_send_hd_data(update_every); @@ -710,6 +793,15 @@ static void disk_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); ebpf_update_disks(em); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -749,6 +841,43 @@ static int ebpf_disk_enable_tracepoints() return 0; } +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_disk_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + disk_bpf_obj = disk_bpf__open(); + if (!disk_bpf_obj) + ret = -1; + else { + ret = ebpf_disk_load_and_attach(disk_bpf_obj); + if (!ret) + ebpf_disk_set_hash_table(disk_bpf_obj); + } + } +#endif + + if (ret) + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + + return ret; +} + /** * Disk thread * @@ -775,15 +904,15 @@ void *ebpf_disk_thread(void *ptr) } if (pthread_mutex_init(&plot_mutex, NULL)) { - error("Cannot initialize local mutex"); + netdata_log_error("Cannot initialize local mutex"); goto enddisk; } #ifdef LIBBPF_MAJOR_VERSION ebpf_define_map_type(disk_maps, em->maps_per_core, running_on_kernel); + ebpf_adjust_thread_load(em, default_btf); #endif - em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); - if (!em->probe_links) { + if (ebpf_disk_load_bpf(em)) { goto enddisk; } @@ -796,7 +925,7 @@ void *ebpf_disk_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, disk_maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, disk_maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); disk_collector(em); diff --git a/collectors/ebpf.plugin/ebpf_disk.h b/collectors/ebpf.plugin/ebpf_disk.h index 69c70587..487ed376 100644 --- a/collectors/ebpf.plugin/ebpf_disk.h +++ b/collectors/ebpf.plugin/ebpf_disk.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_DISK_H #define NETDATA_EBPF_DISK_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_DISK "disk" +#define NETDATA_EBPF_DISK_MODULE_DESC "Monitor disk latency independent of filesystem." #include "libnetdata/avl/avl.h" #include "libnetdata/ebpf/ebpf.h" @@ -54,10 +55,7 @@ typedef struct netdata_ebpf_disks { struct netdata_ebpf_disks *next; } netdata_ebpf_disks_t; -enum ebpf_disk_tables { - NETDATA_DISK_READ, - NETDATA_DISK_TMP -}; +enum ebpf_disk_tables { NETDATA_DISK_IO }; typedef struct block_key { uint32_t bin; diff --git a/collectors/ebpf.plugin/ebpf_fd.c b/collectors/ebpf.plugin/ebpf_fd.c index d39e6ae2..214d2955 100644 --- a/collectors/ebpf.plugin/ebpf_fd.c +++ b/collectors/ebpf.plugin/ebpf_fd.c @@ -57,6 +57,10 @@ netdata_ebpf_targets_t fd_targets[] = { {.name = "open", .mode = EBPF_LOAD_TRAMP {.name = "close", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; +#ifdef NETDATA_DEV_MODE +int fd_disable_priority; +#endif + #ifdef LIBBPF_MAJOR_VERSION /** * Disable probe @@ -326,7 +330,7 @@ static inline int ebpf_fd_load_and_attach(struct fd_bpf *obj, ebpf_module_t *em) netdata_ebpf_program_loaded_t test = mt[NETDATA_FD_SYSCALL_OPEN].mode; if (ebpf_fd_set_target_values()) { - error("%s file descriptor.", NETDATA_EBPF_DEFAULT_FNT_NOT_FOUND); + netdata_log_error("%s file descriptor.", NETDATA_EBPF_DEFAULT_FNT_NOT_FOUND); return -1; } @@ -369,6 +373,170 @@ static inline int ebpf_fd_load_and_attach(struct fd_bpf *obj, ebpf_module_t *em) * *****************************************************************/ +static void ebpf_obsolete_specific_fd_charts(char *type, ebpf_module_t *em); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_fd_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_FILE_OPEN, + "Number of open files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CGROUP_FD_OPEN_CONTEXT, + 20061, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, + "Fails to open files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT, + 20062, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_FILE_CLOSED, + "Files closed", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CGROUP_FD_CLOSE_CONTEXT, + 20063, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, + "Fails to close files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT, + 20064, + em->update_every); + } +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_fd_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_fd_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_fd_charts(ect->name, em); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_fd_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_FILE_OPEN, + "Number of open files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20061, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, + "Fails to open files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20062, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_FILE_CLOSED, + "Files closed", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20063, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, + "Fails to close files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20064, + em->update_every); + } +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_fd_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_FILE_OPEN_CLOSE_COUNT, + "Open and close calls", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_EBPF_FD_CHARTS, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_FILE_OPEN_ERR_COUNT, + "Open fails", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_FILE_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_EBPF_FD_CHARTS + 1, + em->update_every); + } +} + /** * FD Exit * @@ -380,15 +548,46 @@ static void ebpf_fd_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_fd_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_fd_apps_charts(em); + } + + ebpf_obsolete_fd_global(em); + +#ifdef NETDATA_DEV_MODE + if (ebpf_aral_fd_pid) + ebpf_statistic_obsolete_aral_chart(em, fd_disable_priority); +#endif + + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (fd_bpf_obj) + if (fd_bpf_obj) { fd_bpf__destroy(fd_bpf_obj); + fd_bpf_obj = NULL; + } #endif - if (em->objects) + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -935,7 +1134,9 @@ static void fd_collector(ebpf_module_t *em) int update_every = em->update_every; int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -968,6 +1169,15 @@ static void fd_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -1066,6 +1276,8 @@ static void ebpf_create_fd_global_charts(ebpf_module_t *em) NETDATA_FD_SYSCALL_END, em->update_every, NETDATA_EBPF_MODULE_NAME_FD); } + + fflush(stdout); } /***************************************************************** @@ -1125,7 +1337,7 @@ static int ebpf_fd_load_bpf(ebpf_module_t *em) #endif if (ret) - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); return ret; } @@ -1165,10 +1377,10 @@ void *ebpf_fd_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_create_fd_global_charts(em); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); #ifdef NETDATA_DEV_MODE if (ebpf_aral_fd_pid) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_FD_ARAL_NAME, em); + fd_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_FD_ARAL_NAME, em); #endif pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_fd.h b/collectors/ebpf.plugin/ebpf_fd.h index 85dfd36e..00986673 100644 --- a/collectors/ebpf.plugin/ebpf_fd.h +++ b/collectors/ebpf.plugin/ebpf_fd.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_FD_H #define NETDATA_EBPF_FD_H 1 -// Module name +// Module name & File description #define NETDATA_EBPF_MODULE_NAME_FD "filedescriptor" +#define NETDATA_EBPF_FD_MODULE_DESC "Monitor when files are open and closed. This thread is integrated with apps and cgroup." // Menu group #define NETDATA_FILE_GROUP "file_access" diff --git a/collectors/ebpf.plugin/ebpf_filesystem.c b/collectors/ebpf.plugin/ebpf_filesystem.c index 63f592eb..2bff738c 100644 --- a/collectors/ebpf.plugin/ebpf_filesystem.c +++ b/collectors/ebpf.plugin/ebpf_filesystem.c @@ -131,6 +131,202 @@ static netdata_publish_syscall_t filesystem_publish_aggregated[NETDATA_EBPF_HIST char **dimensions = NULL; static netdata_idx_t *filesystem_hash_values = NULL; +#ifdef LIBBPF_MAJOR_VERSION +/** + * FS disable kprobe + * + * Disable kprobes, because system will use trampolines. + * We are not calling this function for while, because we are prioritizing kprobes. We opted by this road, because + * distribution are still not deliverying necessary btf files per FS. + * + * @param obj FS object loaded. + */ +static void ebpf_fs_disable_kprobe(struct filesystem_bpf *obj) + { + // kprobe + bpf_program__set_autoload(obj->progs.netdata_fs_file_read_probe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_write_probe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_open_probe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_2nd_file_open_probe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_getattr_probe, false); + // kretprobe + bpf_program__set_autoload(obj->progs.netdata_fs_file_read_retprobe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_write_retprobe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_open_retprobe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_2nd_file_open_retprobe, false); + bpf_program__set_autoload(obj->progs.netdata_fs_getattr_retprobe, false); + } + + /** + * Disable trampoline + * + * Disable trampolines to use kprobes. + * + * @param obj FS object loaded. + */ + static void ebpf_fs_disable_trampoline(struct filesystem_bpf *obj) + { + // entry + bpf_program__set_autoload(obj->progs.netdata_fs_file_read_entry, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_write_entry, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_open_entry, false); + bpf_program__set_autoload(obj->progs.netdata_fs_getattr_entry, false); + bpf_program__set_autoload(obj->progs.netdata_fs_2nd_file_open_entry, false); + + // exit + bpf_program__set_autoload(obj->progs.netdata_fs_file_read_exit, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_write_exit, false); + bpf_program__set_autoload(obj->progs.netdata_fs_file_open_exit, false); + bpf_program__set_autoload(obj->progs.netdata_fs_getattr_exit, false); + bpf_program__set_autoload(obj->progs.netdata_fs_2nd_file_open_exit, false); + } + + /** + * Set targets + * + * Set targets for each objects. + * + * @param obj FS object loaded. + * @param functions array with function names. + */ + static void ebpf_fs_set_target(struct filesystem_bpf *obj, const char **functions) +{ + // entry + bpf_program__set_attach_target(obj->progs.netdata_fs_file_read_entry, 0, + functions[NETDATA_KEY_BTF_READ]); + bpf_program__set_attach_target(obj->progs.netdata_fs_file_write_entry, 0, + functions[NETDATA_KEY_BTF_WRITE]); + bpf_program__set_attach_target(obj->progs.netdata_fs_file_open_entry, 0, + functions[NETDATA_KEY_BTF_OPEN]); + bpf_program__set_attach_target(obj->progs.netdata_fs_getattr_entry, 0, + functions[NETDATA_KEY_BTF_SYNC_ATTR]); + + // exit + bpf_program__set_attach_target(obj->progs.netdata_fs_file_read_exit, 0, + functions[NETDATA_KEY_BTF_READ]); + bpf_program__set_attach_target(obj->progs.netdata_fs_file_write_exit, 0, + functions[NETDATA_KEY_BTF_WRITE]); + bpf_program__set_attach_target(obj->progs.netdata_fs_file_open_exit, 0, + functions[NETDATA_KEY_BTF_OPEN]); + bpf_program__set_attach_target(obj->progs.netdata_fs_getattr_exit, 0, + functions[NETDATA_KEY_BTF_SYNC_ATTR]); + + if (functions[NETDATA_KEY_BTF_OPEN2]) { + bpf_program__set_attach_target(obj->progs.netdata_fs_2nd_file_open_entry, 0, + functions[NETDATA_KEY_BTF_OPEN2]); + bpf_program__set_attach_target(obj->progs.netdata_fs_2nd_file_open_exit, 0, + functions[NETDATA_KEY_BTF_OPEN2]); + } else { + bpf_program__set_autoload(obj->progs.netdata_fs_2nd_file_open_entry, false); + bpf_program__set_autoload(obj->progs.netdata_fs_2nd_file_open_exit, false); + } +} + +/** + * Attach Kprobe + * + * Attach kprobe on targets + * + * @param obj FS object loaded. + * @param functions array with function names. + */ +static int ebpf_fs_attach_kprobe(struct filesystem_bpf *obj, const char **functions) +{ + // kprobe + obj->links.netdata_fs_file_read_probe = bpf_program__attach_kprobe(obj->progs.netdata_fs_file_read_probe, + false, functions[NETDATA_KEY_BTF_READ]); + if (libbpf_get_error(obj->links.netdata_fs_file_read_probe)) + return -1; + + obj->links.netdata_fs_file_write_probe = bpf_program__attach_kprobe(obj->progs.netdata_fs_file_write_probe, + false, functions[NETDATA_KEY_BTF_WRITE]); + if (libbpf_get_error(obj->links.netdata_fs_file_write_probe)) + return -1; + + obj->links.netdata_fs_file_open_probe = bpf_program__attach_kprobe(obj->progs.netdata_fs_file_open_probe, + false, functions[NETDATA_KEY_BTF_OPEN]); + if (libbpf_get_error(obj->links.netdata_fs_file_open_probe)) + return -1; + + obj->links.netdata_fs_getattr_probe = bpf_program__attach_kprobe(obj->progs.netdata_fs_getattr_probe, + false, functions[NETDATA_KEY_BTF_SYNC_ATTR]); + if (libbpf_get_error(obj->links.netdata_fs_getattr_probe)) + return -1; + + // kretprobe + obj->links.netdata_fs_file_read_retprobe = bpf_program__attach_kprobe(obj->progs.netdata_fs_file_read_retprobe, + false, functions[NETDATA_KEY_BTF_READ]); + if (libbpf_get_error(obj->links.netdata_fs_file_read_retprobe)) + return -1; + + obj->links.netdata_fs_file_write_retprobe = bpf_program__attach_kprobe(obj->progs.netdata_fs_file_write_retprobe, + false, functions[NETDATA_KEY_BTF_WRITE]); + if (libbpf_get_error(obj->links.netdata_fs_file_write_retprobe)) + return -1; + + obj->links.netdata_fs_file_open_retprobe = bpf_program__attach_kprobe(obj->progs.netdata_fs_file_open_retprobe, + false, functions[NETDATA_KEY_BTF_OPEN]); + if (libbpf_get_error(obj->links.netdata_fs_file_open_retprobe)) + return -1; + + obj->links.netdata_fs_getattr_retprobe = bpf_program__attach_kprobe(obj->progs.netdata_fs_getattr_retprobe, + false, functions[NETDATA_KEY_BTF_SYNC_ATTR]); + if (libbpf_get_error(obj->links.netdata_fs_getattr_retprobe)) + return -1; + + if (functions[NETDATA_KEY_BTF_OPEN2]) { + obj->links.netdata_fs_2nd_file_open_probe = bpf_program__attach_kprobe(obj->progs.netdata_fs_2nd_file_open_probe, + false, functions[NETDATA_KEY_BTF_OPEN2]); + if (libbpf_get_error(obj->links.netdata_fs_2nd_file_open_probe)) + return -1; + + obj->links.netdata_fs_2nd_file_open_retprobe = bpf_program__attach_kprobe(obj->progs.netdata_fs_2nd_file_open_retprobe, + false, functions[NETDATA_KEY_BTF_OPEN2]); + if (libbpf_get_error(obj->links.netdata_fs_2nd_file_open_retprobe)) + return -1; + } + + return 0; +} + +/** + * Load and Attach + * + * Load binary and attach to targets. + * + * @param map Structure with information about maps. + * @param obj FS object loaded. + * @param functions array with function names. + * @param bf sttruct with btf file loaded. + */ +static inline int ebpf_fs_load_and_attach(ebpf_local_maps_t *map, struct filesystem_bpf *obj, + const char **functions, struct btf *bf) +{ + if (bf) { + ebpf_fs_disable_kprobe(obj); + ebpf_fs_set_target(obj, functions); + } else { + ebpf_fs_disable_trampoline(obj); + } + + int ret = filesystem_bpf__load(obj); + if (ret) { + fprintf(stderr, "failed to load BPF object: %d\n", ret); + return -1; + } + + if (bf) + ret = filesystem_bpf__attach(obj); + else + ret = ebpf_fs_attach_kprobe(obj, functions); + + if (!ret) + map->map_fd = bpf_map__fd(obj->maps.tbl_fs);; + + return ret; +} +#endif + /***************************************************************** * * COMMON FUNCTIONS @@ -199,13 +395,15 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(chart_name, 63, "%s_read_latency", efp->filesystem); efp->hread.name = strdupz(chart_name); efp->hread.title = strdupz(title); + efp->hread.ctx = NULL; efp->hread.order = order; efp->family_name = strdupz(family); ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hread.name, - title, - EBPF_COMMON_DIMENSION_CALL, family, - "filesystem.read_latency", NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + efp->hread.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + "filesystem.read_latency", NETDATA_EBPF_CHART_TYPE_STACKED, order, + ebpf_create_global_dimension, filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); order++; @@ -214,11 +412,13 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(chart_name, 63, "%s_write_latency", efp->filesystem); efp->hwrite.name = strdupz(chart_name); efp->hwrite.title = strdupz(title); + efp->hwrite.ctx = NULL; efp->hwrite.order = order; ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hwrite.name, - title, - EBPF_COMMON_DIMENSION_CALL, family, - "filesystem.write_latency", NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + efp->hwrite.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + "filesystem.write_latency", NETDATA_EBPF_CHART_TYPE_STACKED, order, + ebpf_create_global_dimension, filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); order++; @@ -227,11 +427,13 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(chart_name, 63, "%s_open_latency", efp->filesystem); efp->hopen.name = strdupz(chart_name); efp->hopen.title = strdupz(title); + efp->hopen.ctx = NULL; efp->hopen.order = order; ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, - title, - EBPF_COMMON_DIMENSION_CALL, family, - "filesystem.open_latency", NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, + efp->hopen.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, + "filesystem.open_latency", NETDATA_EBPF_CHART_TYPE_STACKED, order, + ebpf_create_global_dimension, filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); order++; @@ -242,9 +444,10 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(ctx, 63, "filesystem.%s_latency", type); efp->hadditional.name = strdupz(chart_name); efp->hadditional.title = strdupz(title); + efp->hadditional.ctx = strdupz(ctx); efp->hadditional.order = order; - ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name, title, - EBPF_COMMON_DIMENSION_CALL, family, + ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name, efp->hadditional.title, + EBPF_COMMON_DIMENSION_CALL, efp->family_name, ctx, NETDATA_EBPF_CHART_TYPE_STACKED, order, ebpf_create_global_dimension, filesystem_publish_aggregated, NETDATA_EBPF_HIST_MAX_BINS, update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM); @@ -252,6 +455,8 @@ static void ebpf_create_fs_charts(int update_every) efp->flags |= NETDATA_FILESYSTEM_FLAG_CHART_CREATED; } } + + fflush(stdout); } /** @@ -263,6 +468,7 @@ static void ebpf_create_fs_charts(int update_every) */ int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em) { + pthread_mutex_lock(&lock); int i; const char *saved_name = em->thread_name; uint64_t kernels = em->kernels; @@ -275,17 +481,32 @@ int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em) #ifdef LIBBPF_MAJOR_VERSION ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); #endif - efp->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &efp->objects); - if (!efp->probe_links) { - em->thread_name = saved_name; - em->kernels = kernels; - em->maps = NULL; - return -1; + if (em->load & EBPF_LOAD_LEGACY) { + efp->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &efp->objects); + if (!efp->probe_links) { + em->thread_name = saved_name; + em->kernels = kernels; + em->maps = NULL; + pthread_mutex_unlock(&lock); + return -1; + } } +#ifdef LIBBPF_MAJOR_VERSION + else { + efp->fs_obj = filesystem_bpf__open(); + if (!efp->fs_obj) { + em->thread_name = saved_name; + em->kernels = kernels; + return -1; + } else { + if (ebpf_fs_load_and_attach(em->maps, efp->fs_obj, + efp->functions, NULL)) + return -1; + } + } +#endif efp->flags |= NETDATA_FILESYSTEM_FLAG_HAS_PARTITION; - pthread_mutex_lock(&lock); ebpf_update_kernel_memory(&plugin_statistics, efp->fs_maps, EBPF_ACTION_STAT_ADD); - pthread_mutex_unlock(&lock); // Nedeed for filesystems like btrfs if ((efp->flags & NETDATA_FILESYSTEM_FILL_ADDRESS_TABLE) && (efp->addresses.function)) { @@ -295,6 +516,7 @@ int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em) efp->flags &= ~NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM; } em->thread_name = saved_name; + pthread_mutex_unlock(&lock); em->kernels = kernels; em->maps = NULL; @@ -405,43 +627,88 @@ void ebpf_filesystem_cleanup_ebpf_data() ebpf_filesystem_partitions_t *efp = &localfs[i]; if (efp->probe_links) { freez(efp->family_name); + efp->family_name = NULL; freez(efp->hread.name); + efp->hread.name = NULL; freez(efp->hread.title); + efp->hread.title = NULL; freez(efp->hwrite.name); + efp->hwrite.name = NULL; freez(efp->hwrite.title); + efp->hwrite.title = NULL; freez(efp->hopen.name); + efp->hopen.name = NULL; freez(efp->hopen.title); + efp->hopen.title = NULL; freez(efp->hadditional.name); + efp->hadditional.name = NULL; freez(efp->hadditional.title); + efp->hadditional.title = NULL; + freez(efp->hadditional.ctx); + efp->hadditional.ctx = NULL; } } } /** - * Filesystem Free + * Obsolete global * - * Cleanup variables after child threads to stop + * Obsolete global charts created by thread. * - * @param ptr thread data. + * @param em a pointer to `struct ebpf_module` */ -static void ebpf_filesystem_free(ebpf_module_t *em) +static void ebpf_obsolete_filesystem_global(ebpf_module_t *em) { - pthread_mutex_lock(&ebpf_exit_cleanup); - em->enabled = NETDATA_THREAD_EBPF_STOPPING; - pthread_mutex_unlock(&ebpf_exit_cleanup); - - ebpf_filesystem_cleanup_ebpf_data(); - if (dimensions) - ebpf_histogram_dimension_cleanup(dimensions, NETDATA_EBPF_HIST_MAX_BINS); - freez(filesystem_hash_values); + int i; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + if (!efp->objects) + continue; - pthread_mutex_lock(&ebpf_exit_cleanup); - em->enabled = NETDATA_THREAD_EBPF_STOPPED; - pthread_mutex_unlock(&ebpf_exit_cleanup); + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + efp->hread.name, + efp->hread.title, + EBPF_COMMON_DIMENSION_CALL, + efp->family_name, + NETDATA_EBPF_CHART_TYPE_STACKED, + "filesystem.read_latency", + efp->hread.order, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + efp->hwrite.name, + efp->hwrite.title, + EBPF_COMMON_DIMENSION_CALL, + efp->family_name, + NETDATA_EBPF_CHART_TYPE_STACKED, + "filesystem.write_latency", + efp->hwrite.order, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + efp->hopen.name, + efp->hopen.title, + EBPF_COMMON_DIMENSION_CALL, + efp->family_name, + NETDATA_EBPF_CHART_TYPE_STACKED, + "filesystem.open_latency", + efp->hopen.order, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + efp->hadditional.name, + efp->hadditional.title, + EBPF_COMMON_DIMENSION_CALL, + efp->family_name, + NETDATA_EBPF_CHART_TYPE_STACKED, + efp->hadditional.ctx, + efp->hadditional.order, + em->update_every); + } } /** @@ -454,7 +721,39 @@ static void ebpf_filesystem_free(ebpf_module_t *em) static void ebpf_filesystem_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - ebpf_filesystem_free(em); + + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + ebpf_obsolete_filesystem_global(em); + + pthread_mutex_unlock(&lock); + fflush(stdout); + } + + ebpf_filesystem_cleanup_ebpf_data(); + if (dimensions) { + ebpf_histogram_dimension_cleanup(dimensions, NETDATA_EBPF_HIST_MAX_BINS); + dimensions = NULL; + } + + freez(filesystem_hash_values); + + int i; + for (i = 0; localfs[i].filesystem; i++) { + ebpf_filesystem_partitions_t *efp = &localfs[i]; + if (!efp->probe_links) + continue; + + ebpf_unload_legacy_code(efp->objects, efp->probe_links); + efp->objects = NULL; + efp->probe_links = NULL; + efp->flags = NETDATA_FILESYSTEM_FLAG_NO_PARTITION; + } + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&ebpf_exit_cleanup); } /***************************************************************** @@ -608,7 +907,9 @@ static void filesystem_collector(ebpf_module_t *em) heartbeat_t hb; heartbeat_init(&hb); int counter = update_every - 1; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -622,6 +923,15 @@ static void filesystem_collector(ebpf_module_t *em) ebpf_histogram_send_data(); pthread_mutex_unlock(&lock); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -683,9 +993,12 @@ void *ebpf_filesystem_thread(void *ptr) // Initialize optional as zero, to identify when there are not partitions to monitor em->optional = 0; +#ifdef LIBBPF_MAJOR_VERSION + ebpf_adjust_thread_load(em, default_btf); +#endif if (ebpf_update_partitions(em)) { if (em->optional) - info("Netdata cannot monitor the filesystems used on this host."); + netdata_log_info("Netdata cannot monitor the filesystems used on this host."); goto endfilesystem; } diff --git a/collectors/ebpf.plugin/ebpf_filesystem.h b/collectors/ebpf.plugin/ebpf_filesystem.h index b1126bad..f58d7fbe 100644 --- a/collectors/ebpf.plugin/ebpf_filesystem.h +++ b/collectors/ebpf.plugin/ebpf_filesystem.h @@ -3,10 +3,14 @@ #ifndef NETDATA_EBPF_FILESYSTEM_H #define NETDATA_EBPF_FILESYSTEM_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_FILESYSTEM "filesystem" +#define NETDATA_EBPF_FS_MODULE_DESC "Monitor filesystem latency for: btrfs, ext4, nfs, xfs and zfs." #include "ebpf.h" +#ifdef LIBBPF_MAJOR_VERSION +#include "includes/filesystem.skel.h" +#endif #define NETDATA_FS_MAX_DIST_NAME 64UL diff --git a/collectors/ebpf.plugin/ebpf_functions.c b/collectors/ebpf.plugin/ebpf_functions.c new file mode 100644 index 00000000..cc26044c --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_functions.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_functions.h" + +/***************************************************************** + * EBPF SELECT MODULE + *****************************************************************/ + +/** + * Select Module + * + * @param thread_name name of the thread we are looking for. + * + * @return it returns a pointer for the module that has thread_name on success or NULL otherwise. + */ +ebpf_module_t *ebpf_functions_select_module(const char *thread_name) { + int i; + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + if (strcmp(ebpf_modules[i].thread_name, thread_name) == 0) { + return &ebpf_modules[i]; + } + } + + return NULL; +} + +/***************************************************************** + * EBPF HELP FUNCTIONS + *****************************************************************/ + +/** + * Thread Help + * + * Shows help with all options accepted by thread function. + * + * @param transaction the transaction id that Netdata sent for this function execution +*/ +static void ebpf_function_thread_manipulation_help(const char *transaction) { + pthread_mutex_lock(&lock); + pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600); + fprintf(stdout, "%s", + "ebpf.plugin / thread\n" + "\n" + "Function `thread` allows user to control eBPF threads.\n" + "\n" + "The following filters are supported:\n" + "\n" + " thread:NAME\n" + " Shows information for the thread NAME. Names are listed inside `ebpf.d.conf`.\n" + "\n" + " enable:NAME:PERIOD\n" + " Enable a specific thread named `NAME` to run a specific PERIOD in seconds. When PERIOD is not\n" + " specified plugin will use the default 300 seconds\n" + "\n" + " disable:NAME\n" + " Disable a sp.\n" + "\n" + "Filters can be combined. Each filter can be given only one time.\n" + "Process thread is not controlled by functions until we finish the creation of functions per thread..\n" + ); + pluginsd_function_result_end_to_stdout(); + fflush(stdout); + pthread_mutex_unlock(&lock); +} + + +/***************************************************************** + * EBPF ERROR FUNCTIONS + *****************************************************************/ + +/** + * Function error + * + * Show error when a wrong function is given + * + * @param transaction the transaction id that Netdata sent for this function execution + * @param code the error code to show with the message. + * @param msg the error message + */ +static void ebpf_function_error(const char *transaction, int code, const char *msg) { + char buffer[PLUGINSD_LINE_MAX + 1]; + json_escape_string(buffer, msg, PLUGINSD_LINE_MAX); + + pluginsd_function_result_begin_to_stdout(transaction, code, "application/json", now_realtime_sec()); + fprintf(stdout, "{\"status\":%d,\"error_message\":\"%s\"}", code, buffer); + pluginsd_function_result_end_to_stdout(); +} + +/***************************************************************** + * EBPF THREAD FUNCTION + *****************************************************************/ + +/** + * Function enable + * + * Enable a specific thread. + * + * @param transaction the transaction id that Netdata sent for this function execution + * @param function function name and arguments given to thread. + * @param line_buffer buffer used to parse args + * @param line_max Number of arguments given + * @param timeout The function timeout + * @param em The structure with thread information + */ +static void ebpf_function_thread_manipulation(const char *transaction, + char *function __maybe_unused, + char *line_buffer __maybe_unused, + int line_max __maybe_unused, + int timeout __maybe_unused, + ebpf_module_t *em) +{ + char *words[PLUGINSD_MAX_WORDS] = { NULL }; + char message[512]; + uint32_t show_specific_thread = 0; + size_t num_words = quoted_strings_splitter_pluginsd(function, words, PLUGINSD_MAX_WORDS); + for(int i = 1; i < PLUGINSD_MAX_WORDS ;i++) { + const char *keyword = get_word(words, num_words, i); + if (!keyword) + break; + + ebpf_module_t *lem; + if(strncmp(keyword, EBPF_THREADS_ENABLE_CATEGORY, sizeof(EBPF_THREADS_ENABLE_CATEGORY) -1) == 0) { + char thread_name[128]; + int period = -1; + const char *name = &keyword[sizeof(EBPF_THREADS_ENABLE_CATEGORY) - 1]; + char *separator = strchr(name, ':'); + if (separator) { + strncpyz(thread_name, name, separator - name); + period = str2i(++separator); + } else { + strncpyz(thread_name, name, strlen(name)); + } + + lem = ebpf_functions_select_module(thread_name); + if (!lem) { + snprintfz(message, 511, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name); + ebpf_function_error(transaction, HTTP_RESP_NOT_FOUND, message); + return; + } + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (lem->enabled > NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + struct netdata_static_thread *st = lem->thread; + // Load configuration again + ebpf_update_module(lem, default_btf, running_on_kernel, isrh); + + // another request for thread that already ran, cleanup and restart + if (st->thread) + freez(st->thread); + + if (period <= 0) + period = EBPF_DEFAULT_LIFETIME; + + st->thread = mallocz(sizeof(netdata_thread_t)); + lem->enabled = NETDATA_THREAD_EBPF_FUNCTION_RUNNING; + lem->lifetime = period; + +#ifdef NETDATA_INTERNAL_CHECKS + netdata_log_info("Starting thread %s with lifetime = %d", thread_name, period); +#endif + + netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, + st->start_routine, lem); + } else { + lem->running_time = 0; + if (period > 0) // user is modifying period to run + lem->lifetime = period; +#ifdef NETDATA_INTERNAL_CHECKS + netdata_log_info("Thread %s had lifetime updated for %d", thread_name, period); +#endif + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + } else if(strncmp(keyword, EBPF_THREADS_DISABLE_CATEGORY, sizeof(EBPF_THREADS_DISABLE_CATEGORY) -1) == 0) { + const char *name = &keyword[sizeof(EBPF_THREADS_DISABLE_CATEGORY) - 1]; + lem = ebpf_functions_select_module(name); + if (!lem) { + snprintfz(message, 511, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name); + ebpf_function_error(transaction, HTTP_RESP_NOT_FOUND, message); + return; + } + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (lem->enabled < NETDATA_THREAD_EBPF_STOPPING && lem->thread->thread) { + lem->lifetime = 0; + lem->running_time = lem->update_every; + netdata_thread_cancel(*lem->thread->thread); + } + pthread_mutex_unlock(&ebpf_exit_cleanup); + } else if(strncmp(keyword, EBPF_THREADS_SELECT_THREAD, sizeof(EBPF_THREADS_SELECT_THREAD) -1) == 0) { + const char *name = &keyword[sizeof(EBPF_THREADS_SELECT_THREAD) - 1]; + lem = ebpf_functions_select_module(name); + if (!lem) { + snprintfz(message, 511, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name); + ebpf_function_error(transaction, HTTP_RESP_NOT_FOUND, message); + return; + } + + show_specific_thread |= 1<<lem->thread_id; + } else if(strncmp(keyword, "help", 4) == 0) { + ebpf_function_thread_manipulation_help(transaction); + return; + } + } + + time_t expires = now_realtime_sec() + em->update_every; + + BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX, NULL); + buffer_json_initialize(wb, "\"", "\"", 0, true, false); + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + buffer_json_member_add_string(wb, "type", "table"); + buffer_json_member_add_time_t(wb, "update_every", em->update_every); + buffer_json_member_add_string(wb, "help", EBPF_PLUGIN_THREAD_FUNCTION_DESCRIPTION); + + // Collect data + buffer_json_member_add_array(wb, "data"); + int i; + for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) { + if (show_specific_thread && !(show_specific_thread & 1<<i)) + continue; + + ebpf_module_t *wem = &ebpf_modules[i]; + buffer_json_add_array_item_array(wb); + + // IMPORTANT! + // THE ORDER SHOULD BE THE SAME WITH THE FIELDS! + + // thread name + buffer_json_add_array_item_string(wb, wem->thread_name); + + // description + buffer_json_add_array_item_string(wb, wem->thread_description); + // Either it is not running or received a disabled signal and it is stopping. + if (wem->enabled > NETDATA_THREAD_EBPF_FUNCTION_RUNNING || + (!wem->lifetime && (int)wem->running_time == wem->update_every)) { + // status + buffer_json_add_array_item_string(wb, EBPF_THREAD_STATUS_STOPPED); + + // Time remaining + buffer_json_add_array_item_uint64(wb, 0); + + // action + buffer_json_add_array_item_string(wb, "NULL"); + } else { + // status + buffer_json_add_array_item_string(wb, EBPF_THREAD_STATUS_RUNNING); + + // Time remaining + buffer_json_add_array_item_uint64(wb, (wem->lifetime) ? (wem->lifetime - wem->running_time) : 0); + + // action + buffer_json_add_array_item_string(wb, "Enabled/Disabled"); + } + + buffer_json_array_close(wb); + } + + buffer_json_array_close(wb); // data + + buffer_json_member_add_object(wb, "columns"); + { + int fields_id = 0; + + // IMPORTANT! + // THE ORDER SHOULD BE THE SAME WITH THE VALUES! + buffer_rrdf_table_add_field(wb, fields_id++, "Thread", "Thread Name", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL); + + buffer_rrdf_table_add_field(wb, fields_id++, "Description", "Thread Desc", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL); + + buffer_rrdf_table_add_field(wb, fields_id++, "Status", "Thread Status", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL); + + buffer_rrdf_table_add_field(wb, fields_id++, "Time", "Time Remaining", RRDF_FIELD_TYPE_INTEGER, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, 0, NULL, + NAN, RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, fields_id++, "Action", "Thread Action", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL); + } + buffer_json_object_close(wb); // columns + + buffer_json_member_add_string(wb, "default_sort_column", "Thread"); + + buffer_json_member_add_object(wb, "charts"); + { + // Threads + buffer_json_member_add_object(wb, "eBPFThreads"); + { + buffer_json_member_add_string(wb, "name", "Threads"); + buffer_json_member_add_string(wb, "type", "line"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Threads"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + // Life Time + buffer_json_member_add_object(wb, "eBPFLifeTime"); + { + buffer_json_member_add_string(wb, "name", "LifeTime"); + buffer_json_member_add_string(wb, "type", "line"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Threads"); + buffer_json_add_array_item_string(wb, "Time"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // charts + + // Do we use only on fields that can be groupped? + buffer_json_member_add_object(wb, "group_by"); + { + // group by Status + buffer_json_member_add_object(wb, "Status"); + { + buffer_json_member_add_string(wb, "name", "Thread status"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Status"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // group_by + + buffer_json_member_add_time_t(wb, "expires", expires); + buffer_json_finalize(wb); + + // Lock necessary to avoid race condition + pthread_mutex_lock(&lock); + pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "application/json", expires); + + fwrite(buffer_tostring(wb), buffer_strlen(wb), 1, stdout); + + pluginsd_function_result_end_to_stdout(); + fflush(stdout); + pthread_mutex_unlock(&lock); + + buffer_free(wb); +} + + +/***************************************************************** + * EBPF FUNCTION THREAD + *****************************************************************/ + +/** + * FUNCTION thread. + * + * @param ptr a `ebpf_module_t *`. + * + * @return always NULL. + */ +void *ebpf_function_thread(void *ptr) +{ + ebpf_module_t *em = (ebpf_module_t *)ptr; + char buffer[PLUGINSD_LINE_MAX + 1]; + + char *s = NULL; + while(!ebpf_exit_plugin && (s = fgets(buffer, PLUGINSD_LINE_MAX, stdin))) { + char *words[PLUGINSD_MAX_WORDS] = { NULL }; + size_t num_words = quoted_strings_splitter_pluginsd(buffer, words, PLUGINSD_MAX_WORDS); + + const char *keyword = get_word(words, num_words, 0); + + if(keyword && strcmp(keyword, PLUGINSD_KEYWORD_FUNCTION) == 0) { + char *transaction = get_word(words, num_words, 1); + char *timeout_s = get_word(words, num_words, 2); + char *function = get_word(words, num_words, 3); + + if(!transaction || !*transaction || !timeout_s || !*timeout_s || !function || !*function) { + netdata_log_error("Received incomplete %s (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.", + keyword, + transaction?transaction:"(unset)", + timeout_s?timeout_s:"(unset)", + function?function:"(unset)"); + } + else { + int timeout = str2i(timeout_s); + if (!strncmp(function, EBPF_FUNCTION_THREAD, sizeof(EBPF_FUNCTION_THREAD) - 1)) + ebpf_function_thread_manipulation(transaction, + function, + buffer, + PLUGINSD_LINE_MAX + 1, + timeout, + em); + else + ebpf_function_error(transaction, + HTTP_RESP_NOT_FOUND, + "No function with this name found in ebpf.plugin."); + } + } + else + netdata_log_error("Received unknown command: %s", keyword ? keyword : "(unset)"); + } + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_functions.h b/collectors/ebpf.plugin/ebpf_functions.h new file mode 100644 index 00000000..b20dab63 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_functions.h @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_FUNCTIONS_H +#define NETDATA_EBPF_FUNCTIONS_H 1 + +// configuration file & description +#define NETDATA_DIRECTORY_FUNCTIONS_CONFIG_FILE "functions.conf" +#define NETDATA_EBPF_FUNCTIONS_MODULE_DESC "Show information about current function status." + +// function list +#define EBPF_FUNCTION_THREAD "ebpf_thread" + +#define EBPF_PLUGIN_THREAD_FUNCTION_DESCRIPTION "Detailed information about eBPF threads." +#define EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND "ebpf.plugin does not have thread named " + +#define EBPF_PLUGIN_FUNCTIONS(NAME, DESC) do { \ + fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " \"" NAME "\" 10 \"%s\"\n", DESC); \ +} while(0) + +#define EBPF_THREADS_SELECT_THREAD "thread:" +#define EBPF_THREADS_ENABLE_CATEGORY "enable:" +#define EBPF_THREADS_DISABLE_CATEGORY "disable:" + +#define EBPF_THREAD_STATUS_RUNNING "running" +#define EBPF_THREAD_STATUS_STOPPED "stopped" + +void *ebpf_function_thread(void *ptr); + +#endif diff --git a/collectors/ebpf.plugin/ebpf_hardirq.c b/collectors/ebpf.plugin/ebpf_hardirq.c index f714c261..9092c7ac 100644 --- a/collectors/ebpf.plugin/ebpf_hardirq.c +++ b/collectors/ebpf.plugin/ebpf_hardirq.c @@ -9,8 +9,6 @@ struct config hardirq_config = { .first_section = NULL, .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, .rwlock = AVL_LOCK_INITIALIZER } }; -#define HARDIRQ_MAP_LATENCY 0 -#define HARDIRQ_MAP_LATENCY_STATIC 1 static ebpf_local_maps_t hardirq_maps[] = { { .name = "tbl_hardirq", @@ -138,6 +136,36 @@ static hardirq_static_val_t hardirq_static_vals[] = { // thread will write to netdata agent. static avl_tree_lock hardirq_pub; +#ifdef LIBBPF_MAJOR_VERSION +/** + * Set hash table + * + * Set the values for maps according the value given by kernel. + * + * @param obj is the main structure for bpf objects. + */ +static inline void ebpf_hardirq_set_hash_table(struct hardirq_bpf *obj) +{ + hardirq_maps[HARDIRQ_MAP_LATENCY].map_fd = bpf_map__fd(obj->maps.tbl_hardirq); + hardirq_maps[HARDIRQ_MAP_LATENCY_STATIC].map_fd = bpf_map__fd(obj->maps.tbl_hardirq_static); +} + +/** + * Load and Attach + * + * Load and attach bpf software. + */ +static inline int ebpf_hardirq_load_and_attach(struct hardirq_bpf *obj) +{ + int ret = hardirq_bpf__load(obj); + if (ret) { + return -1; + } + + return hardirq_bpf__attach(obj); +} +#endif + /***************************************************************** * * ARAL SECTION @@ -188,6 +216,27 @@ void ebpf_hardirq_release(hardirq_val_t *stat) *****************************************************************/ /** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_hardirq_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + "hardirq_latency", + "Hardware IRQ latency", + EBPF_COMMON_DIMENSION_MILLISECONDS, + "interrupts", + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + NETDATA_CHART_PRIO_HARDIRQ_LATENCY, + em->update_every + ); +} + +/** * Hardirq Exit * * Cancel child and exit. @@ -198,8 +247,22 @@ static void hardirq_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (em->objects) + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + + ebpf_obsolete_hardirq_global(em); + + pthread_mutex_unlock(&lock); + fflush(stdout); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } for (int i = 0; hardirq_tracepoints[i].class != NULL; i++) { ebpf_disable_tracepoint(&hardirq_tracepoints[i]); @@ -207,6 +270,7 @@ static void hardirq_exit(void *ptr) pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -378,7 +442,7 @@ static int hardirq_read_latency_map(int mapfd) avl_t *check = avl_insert_lock(&hardirq_pub, (avl_t *)v); if (check != (avl_t *)v) { - error("Internal error, cannot insert the AVL tree."); + netdata_log_error("Internal error, cannot insert the AVL tree."); } } @@ -505,7 +569,7 @@ static void hardirq_collector(ebpf_module_t *em) hardirq_create_charts(em->update_every); hardirq_create_static_dims(); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); // loop and read from published data until ebpf plugin is closed. @@ -514,7 +578,9 @@ static void hardirq_collector(ebpf_module_t *em) int update_every = em->update_every; int counter = update_every - 1; //This will be cancelled by its parent - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -533,6 +599,15 @@ static void hardirq_collector(ebpf_module_t *em) write_end_chart(); pthread_mutex_unlock(&lock); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -540,6 +615,40 @@ static void hardirq_collector(ebpf_module_t *em) * EBPF HARDIRQ THREAD *****************************************************************/ +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_hardirq_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + hardirq_bpf_obj = hardirq_bpf__open(); + if (!hardirq_bpf_obj) + ret = -1; + else { + ret = ebpf_hardirq_load_and_attach(hardirq_bpf_obj); + if (!ret) + ebpf_hardirq_set_hash_table(hardirq_bpf_obj); + } + } +#endif + + return ret; +} + /** * Hard IRQ latency thread. * @@ -559,9 +668,9 @@ void *ebpf_hardirq_thread(void *ptr) #ifdef LIBBPF_MAJOR_VERSION ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); + ebpf_adjust_thread_load(em, default_btf); #endif - em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); - if (!em->probe_links) { + if (ebpf_hardirq_load_bpf(em)) { goto endhardirq; } diff --git a/collectors/ebpf.plugin/ebpf_hardirq.h b/collectors/ebpf.plugin/ebpf_hardirq.h index 52dea1e5..35b03b76 100644 --- a/collectors/ebpf.plugin/ebpf_hardirq.h +++ b/collectors/ebpf.plugin/ebpf_hardirq.h @@ -3,6 +3,9 @@ #ifndef NETDATA_EBPF_HARDIRQ_H #define NETDATA_EBPF_HARDIRQ_H 1 +// Module description +#define NETDATA_EBPF_HARDIRQ_MODULE_DESC "Show time spent servicing individual hardware interrupt requests (hard IRQs)." + #include <stdint.h> #include "libnetdata/avl/avl.h" @@ -34,6 +37,11 @@ enum hardirq_ebpf_static { HARDIRQ_EBPF_STATIC_END }; +enum hardirq_maps { + HARDIRQ_MAP_LATENCY, + HARDIRQ_MAP_LATENCY_STATIC +}; + typedef struct hardirq_ebpf_static_val { uint64_t latency; uint64_t ts; diff --git a/collectors/ebpf.plugin/ebpf_mdflush.c b/collectors/ebpf.plugin/ebpf_mdflush.c index 65ed860a..3548d673 100644 --- a/collectors/ebpf.plugin/ebpf_mdflush.c +++ b/collectors/ebpf.plugin/ebpf_mdflush.c @@ -31,6 +31,10 @@ static ebpf_local_maps_t mdflush_maps[] = { } }; +netdata_ebpf_targets_t mdflush_targets[] = { {.name = "md_flush_request", .mode = EBPF_LOAD_TRAMPOLINE}, + {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; + + // store for "published" data from the reader thread, which the collector // thread will write to netdata agent. static avl_tree_lock mdflush_pub; @@ -38,6 +42,113 @@ static avl_tree_lock mdflush_pub; // tmp store for mdflush values we get from a per-CPU eBPF map. static mdflush_ebpf_val_t *mdflush_ebpf_vals = NULL; +#ifdef LIBBPF_MAJOR_VERSION +/** + * Disable probes + * + * Disable probes to use trampolines. + * + * @param obj the loaded object structure. + */ +static inline void ebpf_disable_probes(struct mdflush_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_md_flush_request_kprobe, false); +} + +/** + * Disable trampolines + * + * Disable trampoliness to use probes. + * + * @param obj the loaded object structure. + */ +static inline void ebpf_disable_trampoline(struct mdflush_bpf *obj) +{ + bpf_program__set_autoload(obj->progs.netdata_md_flush_request_fentry, false); +} + +/** + * Set Trampoline + * + * Define target to attach trampoline + * + * @param obj the loaded object structure. + */ +static void ebpf_set_trampoline_target(struct mdflush_bpf *obj) +{ + bpf_program__set_attach_target(obj->progs.netdata_md_flush_request_fentry, 0, + mdflush_targets[NETDATA_MD_FLUSH_REQUEST].name); +} + +/** + * Load probe + * + * Load probe to monitor internal function. + * + * @param obj the loaded object structure. + */ +static inline int ebpf_load_probes(struct mdflush_bpf *obj) +{ + obj->links.netdata_md_flush_request_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_md_flush_request_kprobe, + false, + mdflush_targets[NETDATA_MD_FLUSH_REQUEST].name); + return libbpf_get_error(obj->links.netdata_md_flush_request_kprobe); +} + +/** + * Load and Attach + * + * Load and attach bpf codes according user selection. + * + * @param obj the loaded object structure. + * @param em the structure with configuration + */ +static inline int ebpf_mdflush_load_and_attach(struct mdflush_bpf *obj, ebpf_module_t *em) +{ + int mode = em->targets[NETDATA_MD_FLUSH_REQUEST].mode; + if (mode == EBPF_LOAD_TRAMPOLINE) { // trampoline + ebpf_disable_probes(obj); + + ebpf_set_trampoline_target(obj); + } else // kprobe + ebpf_disable_trampoline(obj); + + int ret = mdflush_bpf__load(obj); + if (ret) { + fprintf(stderr, "failed to load BPF object: %d\n", ret); + return -1; + } + + if (mode == EBPF_LOAD_TRAMPOLINE) + ret = mdflush_bpf__attach(obj); + else + ret = ebpf_load_probes(obj); + + return ret; +} + +#endif + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_mdflush_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete("mdstat", + "mdstat_flush", + "MD flushes", + "flushes", + "flush (eBPF)", + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + NETDATA_CHART_PRIO_MDSTAT_FLUSH, + em->update_every); +} + /** * MDflush exit * @@ -49,11 +160,26 @@ static void mdflush_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (em->objects) + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + + ebpf_obsolete_mdflush_global(em); + + pthread_mutex_unlock(&lock); + fflush(stdout); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -150,7 +276,7 @@ static void mdflush_read_count_map(int maps_per_core) if (v_is_new) { avl_t *check = avl_insert_lock(&mdflush_pub, (avl_t *)v); if (check != (avl_t *)v) { - error("Internal error, cannot insert the AVL tree."); + netdata_log_error("Internal error, cannot insert the AVL tree."); } } } @@ -209,7 +335,7 @@ static void mdflush_collector(ebpf_module_t *em) pthread_mutex_lock(&lock); mdflush_create_charts(update_every); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); // loop and read from published data until ebpf plugin is closed. @@ -217,7 +343,9 @@ static void mdflush_collector(ebpf_module_t *em) heartbeat_init(&hb); int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) @@ -232,9 +360,61 @@ static void mdflush_collector(ebpf_module_t *em) write_end_chart(); pthread_mutex_unlock(&lock); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); + } +} + +/* + * Load BPF + * + * Load BPF files. + * + * @param em the structure with configuration + * + * @return It returns 0 on success and -1 otherwise. + */ +static int ebpf_mdflush_load_bpf(ebpf_module_t *em) +{ + int ret = 0; + if (em->load & EBPF_LOAD_LEGACY) { + em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); + if (!em->probe_links) { + ret = -1; + } + } +#ifdef LIBBPF_MAJOR_VERSION + else { + mdflush_bpf_obj = mdflush_bpf__open(); + if (!mdflush_bpf_obj) + ret = -1; + else { + ret = ebpf_mdflush_load_and_attach(mdflush_bpf_obj, em); + if (ret && em->targets[NETDATA_MD_FLUSH_REQUEST].mode == EBPF_LOAD_TRAMPOLINE) { + mdflush_bpf__destroy(mdflush_bpf_obj); + mdflush_bpf_obj = mdflush_bpf__open(); + if (!mdflush_bpf_obj) + ret = -1; + else { + em->targets[NETDATA_MD_FLUSH_REQUEST].mode = EBPF_LOAD_PROBE; + ret = ebpf_mdflush_load_and_attach(mdflush_bpf_obj, em); + } + } + } } +#endif + + return ret; } + /** * mdflush thread. * @@ -250,15 +430,16 @@ void *ebpf_mdflush_thread(void *ptr) char *md_flush_request = ebpf_find_symbol("md_flush_request"); if (!md_flush_request) { - error("Cannot monitor MD devices, because md is not loaded."); + netdata_log_error("Cannot monitor MD devices, because md is not loaded."); goto endmdflush; } #ifdef LIBBPF_MAJOR_VERSION ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel); + ebpf_adjust_thread_load(em, default_btf); #endif - em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects); - if (!em->probe_links) { + if (ebpf_mdflush_load_bpf(em)) { + netdata_log_error("Cannot load eBPF software."); goto endmdflush; } diff --git a/collectors/ebpf.plugin/ebpf_mdflush.h b/collectors/ebpf.plugin/ebpf_mdflush.h index 4913ad01..62955074 100644 --- a/collectors/ebpf.plugin/ebpf_mdflush.h +++ b/collectors/ebpf.plugin/ebpf_mdflush.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_MDFLUSH_H #define NETDATA_EBPF_MDFLUSH_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_MDFLUSH "mdflush" +#define NETDATA_EBPF_MD_MODULE_DESC "Show information about multi-device software flushes." // charts #define NETDATA_MDFLUSH_GLOBAL_CHART "mdflush" @@ -33,8 +34,15 @@ typedef struct netdata_mdflush { uint64_t cnt; } netdata_mdflush_t; +enum netdata_mdflush_targets { + NETDATA_MD_FLUSH_REQUEST, + + NETDATA_MD_FLUSH_END +}; + void *ebpf_mdflush_thread(void *ptr); extern struct config mdflush_config; +extern netdata_ebpf_targets_t mdflush_targets[]; #endif diff --git a/collectors/ebpf.plugin/ebpf_mount.c b/collectors/ebpf.plugin/ebpf_mount.c index e48c8922..57ea5b2f 100644 --- a/collectors/ebpf.plugin/ebpf_mount.c +++ b/collectors/ebpf.plugin/ebpf_mount.c @@ -223,6 +223,36 @@ static inline int ebpf_mount_load_and_attach(struct mount_bpf *obj, ebpf_module_ *****************************************************************/ /** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_mount_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, + NETDATA_EBPF_MOUNT_CALLS, + "Calls to mount and umount syscalls", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_MOUNT_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_EBPF_MOUNT_CHARTS, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_EBPF_MOUNT_GLOBAL_FAMILY, + NETDATA_EBPF_MOUNT_ERRORS, + "Errors to mount and umount file systems", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_MOUNT_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_EBPF_MOUNT_CHARTS + 1, + em->update_every); +} + +/** * Mount Exit * * Cancel child thread. @@ -233,15 +263,32 @@ static void ebpf_mount_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + + ebpf_obsolete_mount_global(em); + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (mount_bpf_obj) + if (mount_bpf_obj) { mount_bpf__destroy(mount_bpf_obj); + mount_bpf_obj = NULL; + } #endif - if (em->objects) + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -318,7 +365,9 @@ static void mount_collector(ebpf_module_t *em) int update_every = em->update_every; int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -330,6 +379,15 @@ static void mount_collector(ebpf_module_t *em) ebpf_mount_send_data(); pthread_mutex_unlock(&lock); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -408,7 +466,7 @@ static int ebpf_mount_load_bpf(ebpf_module_t *em) #endif if (ret) - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); return ret; } @@ -444,7 +502,7 @@ void *ebpf_mount_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_create_mount_charts(em->update_every); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); mount_collector(em); diff --git a/collectors/ebpf.plugin/ebpf_mount.h b/collectors/ebpf.plugin/ebpf_mount.h index 11b21f83..768914b0 100644 --- a/collectors/ebpf.plugin/ebpf_mount.h +++ b/collectors/ebpf.plugin/ebpf_mount.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_MOUNT_H #define NETDATA_EBPF_MOUNT_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_MOUNT "mount" +#define NETDATA_EBPF_MOUNT_MODULE_DESC "Show calls to syscalls mount(2) and umount(2)." #define NETDATA_EBPF_MOUNT_SYSCALL 2 diff --git a/collectors/ebpf.plugin/ebpf_oomkill.c b/collectors/ebpf.plugin/ebpf_oomkill.c index c80f4487..66421d27 100644 --- a/collectors/ebpf.plugin/ebpf_oomkill.c +++ b/collectors/ebpf.plugin/ebpf_oomkill.c @@ -44,6 +44,71 @@ static netdata_publish_syscall_t oomkill_publish_aggregated = {.name = "oomkill" .algorithm = "absolute", .next = NULL}; +static void ebpf_create_specific_oomkill_charts(char *type, int update_every); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_oomkill_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_OOMKILL_CHART, + "OOM kills. This chart is provided by eBPF plugin.", + EBPF_COMMON_DIMENSION_KILLS, + NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 20191, + em->update_every); +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_oomkill_cgroup_charts(ebpf_module_t *em) +{ + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_oomkill_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_create_specific_oomkill_charts(ect->name, em->update_every); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_oomkill_apps(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_OOMKILL_CHART, + "OOM kills", + EBPF_COMMON_DIMENSION_KILLS, + "mem", + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20020, + em->update_every); +} + /** * Clean up the main thread. * @@ -53,11 +118,30 @@ static void oomkill_cleanup(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (em->objects) + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + + if (em->cgroup_charts) { + ebpf_obsolete_oomkill_cgroup_charts(em); + } + + ebpf_obsolete_oomkill_apps(em); + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -259,7 +343,7 @@ static uint32_t oomkill_read_data(int32_t *keys) if (unlikely(test < 0)) { // since there's only 1 thread doing these deletions, it should be // impossible to get this condition. - error("key unexpectedly not available for deletion."); + netdata_log_error("key unexpectedly not available for deletion."); } } @@ -294,6 +378,30 @@ static void ebpf_update_oomkill_cgroup(int32_t *keys, uint32_t total) } /** + * Update OOMkill period + * + * Update oomkill period according function arguments. + * + * @param running_time current value of running_value. + * @param em the thread main structure. + * + * @return It returns new running_time value. + */ +static int ebpf_update_oomkill_period(int running_time, ebpf_module_t *em) +{ + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = em->update_every; + else + running_time += em->update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); + + return running_time; +} + +/** * Main loop for this collector. * * @param em the thread main structure. @@ -309,7 +417,9 @@ static void oomkill_collector(ebpf_module_t *em) heartbeat_t hb; heartbeat_init(&hb); int counter = update_every - 1; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -317,8 +427,10 @@ static void oomkill_collector(ebpf_module_t *em) counter = 0; uint32_t count = oomkill_read_data(keys); - if (!count) + if (!count) { + running_time = ebpf_update_oomkill_period(running_time, em); continue; + } pthread_mutex_lock(&collect_data_mutex); pthread_mutex_lock(&lock); @@ -335,6 +447,8 @@ static void oomkill_collector(ebpf_module_t *em) } pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + running_time = ebpf_update_oomkill_period(running_time, em); } } @@ -379,14 +493,14 @@ void *ebpf_oomkill_thread(void *ptr) // we need to disable it. pthread_mutex_lock(&ebpf_exit_cleanup); if (em->enabled) - info("%s apps integration is completely disabled.", NETDATA_DEFAULT_OOM_DISABLED_MSG); + netdata_log_info("%s apps integration is completely disabled.", NETDATA_DEFAULT_OOM_DISABLED_MSG); pthread_mutex_unlock(&ebpf_exit_cleanup); goto endoomkill; } else if (running_on_kernel < NETDATA_EBPF_KERNEL_4_14) { pthread_mutex_lock(&ebpf_exit_cleanup); if (em->enabled) - info("%s kernel does not have necessary tracepoints.", NETDATA_DEFAULT_OOM_DISABLED_MSG); + netdata_log_info("%s kernel does not have necessary tracepoints.", NETDATA_DEFAULT_OOM_DISABLED_MSG); pthread_mutex_unlock(&ebpf_exit_cleanup); goto endoomkill; @@ -406,7 +520,7 @@ void *ebpf_oomkill_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); oomkill_collector(em); diff --git a/collectors/ebpf.plugin/ebpf_oomkill.h b/collectors/ebpf.plugin/ebpf_oomkill.h index f921f9d8..4a5fa62a 100644 --- a/collectors/ebpf.plugin/ebpf_oomkill.h +++ b/collectors/ebpf.plugin/ebpf_oomkill.h @@ -3,6 +3,9 @@ #ifndef NETDATA_EBPF_OOMKILL_H #define NETDATA_EBPF_OOMKILL_H 1 +// Module description +#define NETDATA_EBPF_OOMKILL_MODULE_DESC "Show OOM kills for all applications recognized via the apps.plugin." + /***************************************************************** * copied from kernel-collectors repo, with modifications needed * for inclusion here. diff --git a/collectors/ebpf.plugin/ebpf_process.c b/collectors/ebpf.plugin/ebpf_process.c index 17a9809d..4d915e13 100644 --- a/collectors/ebpf.plugin/ebpf_process.c +++ b/collectors/ebpf.plugin/ebpf_process.c @@ -59,20 +59,15 @@ ebpf_process_stat_t *process_stat_vector = NULL; static netdata_syscall_stat_t process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_END]; static netdata_publish_syscall_t process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_END]; -int process_enabled = 0; -bool publish_internal_metrics = true; - struct config process_config = { .first_section = NULL, .last_section = NULL, .mutex = NETDATA_MUTEX_INITIALIZER, .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, .rwlock = AVL_LOCK_INITIALIZER } }; -static char *threads_stat[NETDATA_EBPF_THREAD_STAT_END] = {"total", "running"}; -static char *load_event_stat[NETDATA_EBPF_LOAD_STAT_END] = {"legacy", "co-re"}; -static char *memlock_stat = {"memory_locked"}; -static char *hash_table_stat = {"hash_table"}; -static char *hash_table_core[NETDATA_EBPF_LOAD_STAT_END] = {"per_core", "unique"}; +#ifdef NETDATA_DEV_MODE +int process_disable_priority; +#endif /***************************************************************** * @@ -427,182 +422,8 @@ static void ebpf_create_global_charts(ebpf_module_t *em) &process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK], 2, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS); } -} - -/** - * Create chart for Statistic Thread - * - * Write to standard output current values for threads. - * - * @param em a pointer to the structure with the default values. - */ -static inline void ebpf_create_statistic_thread_chart(ebpf_module_t *em) -{ - ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, - NETDATA_EBPF_THREADS, - "Threads info.", - "threads", - NETDATA_EBPF_FAMILY, - NETDATA_EBPF_CHART_TYPE_LINE, - NULL, - 140000, - em->update_every, - NETDATA_EBPF_MODULE_NAME_PROCESS); - - ebpf_write_global_dimension(threads_stat[NETDATA_EBPF_THREAD_STAT_TOTAL], - threads_stat[NETDATA_EBPF_THREAD_STAT_TOTAL], - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); - - ebpf_write_global_dimension(threads_stat[NETDATA_EBPF_THREAD_STAT_RUNNING], - threads_stat[NETDATA_EBPF_THREAD_STAT_RUNNING], - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); -} - -/** - * Create chart for Load Thread - * - * Write to standard output current values for load mode. - * - * @param em a pointer to the structure with the default values. - */ -static inline void ebpf_create_statistic_load_chart(ebpf_module_t *em) -{ - ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, - NETDATA_EBPF_LOAD_METHOD, - "Load info.", - "methods", - NETDATA_EBPF_FAMILY, - NETDATA_EBPF_CHART_TYPE_LINE, - NULL, - 140001, - em->update_every, - NETDATA_EBPF_MODULE_NAME_PROCESS); - - ebpf_write_global_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], - load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); - - ebpf_write_global_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], - load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); -} - -/** - * Create chart for Kernel Memory - * - * Write to standard output current values for allocated memory. - * - * @param em a pointer to the structure with the default values. - */ -static inline void ebpf_create_statistic_kernel_memory(ebpf_module_t *em) -{ - ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, - NETDATA_EBPF_KERNEL_MEMORY, - "Memory allocated for hash tables.", - "bytes", - NETDATA_EBPF_FAMILY, - NETDATA_EBPF_CHART_TYPE_LINE, - NULL, - 140002, - em->update_every, - NETDATA_EBPF_MODULE_NAME_PROCESS); - - ebpf_write_global_dimension(memlock_stat, - memlock_stat, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); -} - -/** - * Create chart Hash Table - * - * Write to standard output number of hash tables used with this software. - * - * @param em a pointer to the structure with the default values. - */ -static inline void ebpf_create_statistic_hash_tables(ebpf_module_t *em) -{ - ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, - NETDATA_EBPF_HASH_TABLES_LOADED, - "Number of hash tables loaded.", - "hash tables", - NETDATA_EBPF_FAMILY, - NETDATA_EBPF_CHART_TYPE_LINE, - NULL, - 140003, - em->update_every, - NETDATA_EBPF_MODULE_NAME_PROCESS); - - ebpf_write_global_dimension(hash_table_stat, - hash_table_stat, - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); -} - -/** - * Create chart for percpu stats - * - * Write to standard output current values for threads. - * - * @param em a pointer to the structure with the default values. - */ -static inline void ebpf_create_statistic_hash_per_core(ebpf_module_t *em) -{ - ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, - NETDATA_EBPF_HASH_TABLES_PER_CORE, - "How threads are loading hash/array tables.", - "threads", - NETDATA_EBPF_FAMILY, - NETDATA_EBPF_CHART_TYPE_LINE, - NULL, - 140004, - em->update_every, - NETDATA_EBPF_MODULE_NAME_PROCESS); - - ebpf_write_global_dimension(hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], - hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); - - ebpf_write_global_dimension(hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], - hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], - ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); -} - -/** - * Update Internal Metric variable - * - * By default eBPF.plugin sends internal metrics for netdata, but user can - * disable this. - * - * The function updates the variable used to send charts. - */ -static void update_internal_metric_variable() -{ - const char *s = getenv("NETDATA_INTERNALS_MONITORING"); - if (s && *s && strcmp(s, "NO") == 0) - publish_internal_metrics = false; -} - -/** - * Create Statistics Charts - * - * Create charts that will show statistics related to eBPF plugin. - * - * @param em a pointer to the structure with the default values. - */ -static void ebpf_create_statistic_charts(ebpf_module_t *em) -{ - update_internal_metric_variable(); - if (!publish_internal_metrics) - return; - ebpf_create_statistic_thread_chart(em); - - ebpf_create_statistic_load_chart(em); - - ebpf_create_statistic_kernel_memory(em); - - ebpf_create_statistic_hash_tables(em); - - ebpf_create_statistic_hash_per_core(em); + fflush(stdout); } /** @@ -673,6 +494,206 @@ void ebpf_process_create_apps_charts(struct ebpf_module *em, void *ptr) * *****************************************************************/ +static void ebpf_obsolete_specific_process_charts(char *type, ebpf_module_t *em); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_process_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_TASK_PROCESS, + "Process started", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20065, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_TASK_THREAD, + "Threads started", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20066, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_TASK_CLOSE, + "Tasks starts exit process.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20067, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_TASK_EXIT, + "Tasks closed", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20068, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_TASK_ERROR, + "Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20069, + em->update_every); + } +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_process_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_process_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_process_charts(ect->name, em); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_process_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_TASK_PROCESS, + "Process started", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20065, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_TASK_THREAD, + "Threads started", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20066, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_TASK_EXIT, + "Tasks starts exit process.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20067, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_TASK_CLOSE, + "Tasks closed", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20068, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_TASK_ERROR, + "Errors to create process or threads.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20069, + em->update_every); + } +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_process_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_PROCESS_SYSCALL, + "Start process", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21002, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_EXIT_SYSCALL, + "Exit process", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21003, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_PROCESS_STATUS_NAME, + "Process not closed", + EBPF_COMMON_DIMENSION_DIFFERENCE, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21004, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_PROCESS_ERROR_NAME, + "Fails to create process", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_PROCESS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21005, + em->update_every); + } +} + /** * Process disable tracepoints * @@ -683,17 +704,17 @@ static void ebpf_process_disable_tracepoints() char *default_message = { "Cannot disable the tracepoint" }; if (!was_sched_process_exit_enabled) { if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exit)) - error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exit); + netdata_log_error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exit); } if (!was_sched_process_exec_enabled) { if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_exec)) - error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exec); + netdata_log_error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_exec); } if (!was_sched_process_fork_enabled) { if (ebpf_disable_tracing_values(tracepoint_sched_type, tracepoint_sched_process_fork)) - error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_fork); + netdata_log_error("%s %s/%s.", default_message, tracepoint_sched_type, tracepoint_sched_process_fork); } } @@ -708,6 +729,37 @@ static void ebpf_process_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_process_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_process_apps_charts(em); + } + + ebpf_obsolete_process_global(em); + +#ifdef NETDATA_DEV_MODE + if (ebpf_aral_process_stat) + ebpf_statistic_obsolete_aral_chart(em, process_disable_priority); +#endif + + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + + if (em->objects) { + ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } + freez(process_hash_values); freez(process_stat_vector); @@ -716,6 +768,7 @@ static void ebpf_process_exit(void *ptr) pthread_mutex_lock(&ebpf_exit_cleanup); process_pid_fd = -1; em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -740,14 +793,14 @@ static void ebpf_process_sum_cgroup_pids(ebpf_process_stat_t *ps, struct pid_on_ memset(&accumulator, 0, sizeof(accumulator)); while (pids) { - ebpf_process_stat_t *ps = &pids->ps; + ebpf_process_stat_t *pps = &pids->ps; - accumulator.exit_call += ps->exit_call; - accumulator.release_call += ps->release_call; - accumulator.create_process += ps->create_process; - accumulator.create_thread += ps->create_thread; + accumulator.exit_call += pps->exit_call; + accumulator.release_call += pps->release_call; + accumulator.create_process += pps->create_process; + accumulator.create_thread += pps->create_thread; - accumulator.task_err += ps->task_err; + accumulator.task_err += pps->task_err; pids = pids->next; } @@ -1047,40 +1100,6 @@ void ebpf_process_update_cgroup_algorithm() } /** - * Send Statistic Data - * - * Send statistic information to netdata. - */ -void ebpf_send_statistic_data() -{ - if (!publish_internal_metrics) - return; - - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_THREADS); - write_chart_dimension(threads_stat[NETDATA_EBPF_THREAD_STAT_TOTAL], (long long)plugin_statistics.threads); - write_chart_dimension(threads_stat[NETDATA_EBPF_THREAD_STAT_RUNNING], (long long)plugin_statistics.running); - write_end_chart(); - - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LOAD_METHOD); - write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], (long long)plugin_statistics.legacy); - write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], (long long)plugin_statistics.core); - write_end_chart(); - - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_KERNEL_MEMORY); - write_chart_dimension(memlock_stat, (long long)plugin_statistics.memlock_kern); - write_end_chart(); - - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_LOADED); - write_chart_dimension(hash_table_stat, (long long)plugin_statistics.hash_tables); - write_end_chart(); - - write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_PER_CORE); - write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], (long long)plugin_statistics.hash_percpu); - write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], (long long)plugin_statistics.hash_unique); - write_end_chart(); -} - -/** * Main loop for this collector. * * @param em the structure with thread information @@ -1092,7 +1111,6 @@ static void process_collector(ebpf_module_t *em) int publish_global = em->global_charts; int cgroups = em->cgroup_charts; pthread_mutex_lock(&ebpf_exit_cleanup); - int thread_enabled = em->enabled; process_pid_fd = process_maps[NETDATA_PROCESS_PID_TABLE].map_fd; pthread_mutex_unlock(&ebpf_exit_cleanup); if (cgroups) @@ -1101,7 +1119,9 @@ static void process_collector(ebpf_module_t *em) int update_every = em->update_every; int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { usec_t dt = heartbeat_next(&hb, USEC_PER_SEC); (void)dt; if (ebpf_exit_plugin) @@ -1122,28 +1142,35 @@ static void process_collector(ebpf_module_t *em) } pthread_mutex_lock(&lock); - ebpf_send_statistic_data(); - if (thread_enabled == NETDATA_THREAD_EBPF_RUNNING) { - if (publish_global) { - ebpf_process_send_data(em); - } + if (publish_global) { + ebpf_process_send_data(em); + } - if (apps_enabled & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { - ebpf_process_send_apps_data(apps_groups_root_target, em); - } + if (apps_enabled & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_process_send_apps_data(apps_groups_root_target, em); + } #ifdef NETDATA_DEV_MODE - if (ebpf_aral_process_stat) - ebpf_send_data_aral_chart(ebpf_aral_process_stat, em); + if (ebpf_aral_process_stat) + ebpf_send_data_aral_chart(ebpf_aral_process_stat, em); #endif - if (cgroups && shm_ebpf_cgroup.header) { - ebpf_process_send_cgroup_data(em); - } + if (cgroups && shm_ebpf_cgroup.header) { + ebpf_process_send_cgroup_data(em); } + pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } fflush(stdout); @@ -1254,7 +1281,6 @@ void *ebpf_process_thread(void *ptr) if (ebpf_process_enable_tracepoints()) { em->enabled = em->global_charts = em->apps_charts = em->cgroup_charts = NETDATA_THREAD_EBPF_STOPPING; } - process_enabled = em->enabled; pthread_mutex_unlock(&ebpf_exit_cleanup); pthread_mutex_lock(&lock); @@ -1276,27 +1302,22 @@ void *ebpf_process_thread(void *ptr) process_aggregated_data, process_publish_aggregated, process_dimension_names, process_id_names, algorithms, NETDATA_KEY_PUBLISH_PROCESS_END); - if (process_enabled == NETDATA_THREAD_EBPF_RUNNING) { - ebpf_create_global_charts(em); - } + ebpf_create_global_charts(em); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); #ifdef NETDATA_DEV_MODE if (ebpf_aral_process_stat) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_PROC_ARAL_NAME, em); + process_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_PROC_ARAL_NAME, em); #endif - ebpf_create_statistic_charts(em); - pthread_mutex_unlock(&lock); process_collector(em); pthread_mutex_lock(&ebpf_exit_cleanup); - if (em->enabled == NETDATA_THREAD_EBPF_RUNNING) - ebpf_update_disabled_plugin_stats(em); + ebpf_update_disabled_plugin_stats(em); pthread_mutex_unlock(&ebpf_exit_cleanup); netdata_thread_cleanup_pop(1); diff --git a/collectors/ebpf.plugin/ebpf_process.h b/collectors/ebpf.plugin/ebpf_process.h index bccdc0eb..48267d87 100644 --- a/collectors/ebpf.plugin/ebpf_process.h +++ b/collectors/ebpf.plugin/ebpf_process.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_PROCESS_H #define NETDATA_EBPF_PROCESS_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_PROCESS "process" +#define NETDATA_EBPF_MODULE_PROCESS_DESC "Monitor information about process life. This thread is integrated with apps and cgroup." // Groups used on Dashboard #define NETDATA_PROCESS_GROUP "processes" @@ -41,12 +42,13 @@ #define NETDATA_EBPF_CGROUP_UPDATE 30 -// Statistical information -enum netdata_ebpf_thread_stats{ - NETDATA_EBPF_THREAD_STAT_TOTAL, - NETDATA_EBPF_THREAD_STAT_RUNNING, - - NETDATA_EBPF_THREAD_STAT_END +enum netdata_ebpf_stats_order { + NETDATA_EBPF_ORDER_STAT_THREADS = 140000, + NETDATA_EBPF_ORDER_STAT_LIFE_TIME, + NETDATA_EBPF_ORDER_STAT_LOAD_METHOD, + NETDATA_EBPF_ORDER_STAT_KERNEL_MEMORY, + NETDATA_EBPF_ORDER_STAT_HASH_TABLES, + NETDATA_EBPF_ORDER_STAT_HASH_CORE }; enum netdata_ebpf_load_mode_stats{ diff --git a/collectors/ebpf.plugin/ebpf_shm.c b/collectors/ebpf.plugin/ebpf_shm.c index 94ac624b..78ada81f 100644 --- a/collectors/ebpf.plugin/ebpf_shm.c +++ b/collectors/ebpf.plugin/ebpf_shm.c @@ -50,6 +50,10 @@ netdata_ebpf_targets_t shm_targets[] = { {.name = "shmget", .mode = EBPF_LOAD_TR {.name = "shmctl", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; +#ifdef NETDATA_DEV_MODE +int shm_disable_priority; +#endif + #ifdef LIBBPF_MAJOR_VERSION /***************************************************************** * @@ -288,6 +292,150 @@ static inline int ebpf_shm_load_and_attach(struct shm_bpf *obj, ebpf_module_t *e * FUNCTIONS TO CLOSE THE THREAD *****************************************************************/ +static void ebpf_obsolete_specific_shm_charts(char *type, int update_every); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_shm_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SHMGET_CHART, + "Calls to syscall <code>shmget(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20191, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SHMAT_CHART, + "Calls to syscall <code>shmat(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20192, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SHMDT_CHART, + "Calls to syscall <code>shmdt(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20193, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SHMCTL_CHART, + "Calls to syscall <code>shmctl(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20193, + em->update_every); +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_shm_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_shm_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_shm_charts(ect->name, em->update_every); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_shm_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SHMGET_CHART, + "Calls to syscall <code>shmget(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20191, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SHMAT_CHART, + "Calls to syscall <code>shmat(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20192, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SHMDT_CHART, + "Calls to syscall <code>shmdt(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20193, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SHMCTL_CHART, + "Calls to syscall <code>shmctl(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_APPS_IPC_SHM_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20194, + em->update_every); +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_shm_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_SHM_GLOBAL_CHART, + "Calls to shared memory system calls", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SYSTEM_IPC_SHM_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_SYSTEM_IPC_SHARED_MEM_CALLS, + em->update_every); +} + /** * SHM Exit * @@ -299,16 +447,46 @@ static void ebpf_shm_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_shm_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_shm_apps_charts(em); + } + + ebpf_obsolete_shm_global(em); + +#ifdef NETDATA_DEV_MODE + if (ebpf_aral_shm_pid) + ebpf_statistic_obsolete_aral_chart(em, shm_disable_priority); +#endif + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (shm_bpf_obj) + if (shm_bpf_obj) { shm_bpf__destroy(shm_bpf_obj); + shm_bpf_obj = NULL; + } #endif - if (em->objects) + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -859,7 +1037,9 @@ static void shm_collector(ebpf_module_t *em) heartbeat_init(&hb); int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -895,6 +1075,15 @@ static void shm_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -1037,7 +1226,7 @@ static int ebpf_shm_load_bpf(ebpf_module_t *em) if (ret) - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); return ret; } @@ -1084,10 +1273,10 @@ void *ebpf_shm_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_create_shm_charts(em->update_every); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); #ifdef NETDATA_DEV_MODE if (ebpf_aral_shm_pid) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_SHM_ARAL_NAME, em); + shm_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_SHM_ARAL_NAME, em); #endif pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_shm.h b/collectors/ebpf.plugin/ebpf_shm.h index f58eaa6c..a415006e 100644 --- a/collectors/ebpf.plugin/ebpf_shm.h +++ b/collectors/ebpf.plugin/ebpf_shm.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_SHM_H #define NETDATA_EBPF_SHM_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_SHM "shm" +#define NETDATA_EBPF_SHM_MODULE_DESC "Show calls to syscalls shmget(2), shmat(2), shmdt(2) and shmctl(2). This thread is integrated with apps and cgroup." // charts #define NETDATA_SHM_GLOBAL_CHART "shared_memory_calls" diff --git a/collectors/ebpf.plugin/ebpf_socket.c b/collectors/ebpf.plugin/ebpf_socket.c index b45dec7d..2cad8bdf 100644 --- a/collectors/ebpf.plugin/ebpf_socket.c +++ b/collectors/ebpf.plugin/ebpf_socket.c @@ -130,6 +130,10 @@ struct netdata_static_thread socket_threads = { .start_routine = NULL }; +#ifdef NETDATA_DEV_MODE +int socket_disable_priority; +#endif + #ifdef LIBBPF_MAJOR_VERSION /** * Disable Probe @@ -646,6 +650,8 @@ static void ebpf_socket_free(ebpf_module_t *em ) pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -1217,6 +1223,8 @@ static void ebpf_create_global_charts(ebpf_module_t *em) &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF], 2, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); } + + fflush(stdout); } /** @@ -1844,7 +1852,7 @@ static void fill_last_nv_dimension(netdata_socket_plot_t *ptr, int is_outbound) fill_resolved_name(ptr, hostname, 10 + NETDATA_DOTS_PROTOCOL_COMBINED_LENGTH, service_name, is_outbound); #ifdef NETDATA_INTERNAL_CHECKS - info("Last %s dimension added: ID = %u, IP = OTHER, NAME = %s, DIM1 = %s, DIM2 = %s, DIM3 = %s", + netdata_log_info("Last %s dimension added: ID = %u, IP = OTHER, NAME = %s, DIM1 = %s, DIM2 = %s, DIM3 = %s", (is_outbound)?"outbound":"inbound", network_viewer_opt.max_dim - 1, ptr->resolved_name, ptr->dimension_recv, ptr->dimension_sent, ptr->dimension_retransmit); #endif @@ -1927,12 +1935,12 @@ static void store_socket_inside_avl(netdata_vector_plot_t *out, netdata_socket_t netdata_socket_plot_t *check ; check = (netdata_socket_plot_t *) avl_insert_lock(&out->tree, (avl_t *)w); if (check != w) - error("Internal error, cannot insert the AVL tree."); + netdata_log_error("Internal error, cannot insert the AVL tree."); #ifdef NETDATA_INTERNAL_CHECKS char iptext[INET6_ADDRSTRLEN]; if (inet_ntop(family, &w->index.daddr.addr8, iptext, sizeof(iptext))) - info("New %s dimension added: ID = %u, IP = %s, NAME = %s, DIM1 = %s, DIM2 = %s, DIM3 = %s", + netdata_log_info("New %s dimension added: ID = %u, IP = %s, NAME = %s, DIM1 = %s, DIM2 = %s, DIM3 = %s", (out == &inbound_vectors)?"inbound":"outbound", curr, iptext, w->resolved_name, w->dimension_recv, w->dimension_sent, w->dimension_retransmit); #endif @@ -2120,7 +2128,7 @@ void update_listen_table(uint16_t value, uint16_t proto, netdata_passive_connect fill_nv_port_list(w, value, proto, in); #ifdef NETDATA_INTERNAL_CHECKS - info("The network viewer is monitoring inbound connections for port %u", ntohs(value)); + netdata_log_info("The network viewer is monitoring inbound connections for port %u", ntohs(value)); #endif } @@ -2177,7 +2185,9 @@ void *ebpf_socket_read_hash(void *ptr) int fd_ipv6 = socket_maps[NETDATA_SOCKET_TABLE_IPV6].map_fd; int maps_per_core = em->maps_per_core; // This thread is cancelled from another thread - for (;;) { + uint32_t running_time; + uint32_t lifetime = em->lifetime; + for (running_time = 0;!ebpf_exit_plugin && running_time < lifetime; running_time++) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin) break; @@ -2918,7 +2928,9 @@ static void socket_collector(ebpf_module_t *em) int update_every = em->update_every; int maps_per_core = em->maps_per_core; int counter = update_every - 1; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -2973,6 +2985,15 @@ static void socket_collector(ebpf_module_t *em) } pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -3044,14 +3065,14 @@ static inline void fill_port_list(ebpf_network_viewer_port_list_t **out, ebpf_ne uint16_t cmp_last = ntohs(move->last); if (cmp_first <= first && first <= cmp_last && cmp_first <= last && last <= cmp_last ) { - info("The range/value (%u, %u) is inside the range/value (%u, %u) already inserted, it will be ignored.", + netdata_log_info("The range/value (%u, %u) is inside the range/value (%u, %u) already inserted, it will be ignored.", first, last, cmp_first, cmp_last); freez(in->value); freez(in); return; } else if (first <= cmp_first && cmp_first <= last && first <= cmp_last && cmp_last <= last) { - info("The range (%u, %u) is bigger than previous range (%u, %u) already inserted, the previous will be ignored.", + netdata_log_info("The range (%u, %u) is bigger than previous range (%u, %u) already inserted, the previous will be ignored.", first, last, cmp_first, cmp_last); freez(move->value); move->value = in->value; @@ -3071,7 +3092,7 @@ static inline void fill_port_list(ebpf_network_viewer_port_list_t **out, ebpf_ne } #ifdef NETDATA_INTERNAL_CHECKS - info("Adding values %s( %u, %u) to %s port list used on network viewer", + netdata_log_info("Adding values %s( %u, %u) to %s port list used on network viewer", in->value, ntohs(in->first), ntohs(in->last), (*out == network_viewer_opt.included_port)?"included":"excluded"); #endif @@ -3091,7 +3112,7 @@ static void parse_service_list(void **out, char *service) serv = getservbyname((const char *)service, "udp"); if (!serv) { - info("Cannot resolv the service '%s' with protocols TCP and UDP, it will be ignored", service); + netdata_log_info("Cannot resolv the service '%s' with protocols TCP and UDP, it will be ignored", service); return; } @@ -3165,7 +3186,7 @@ static inline in_addr_t ipv4_network(in_addr_t addr, int prefix) static inline int ip2nl(uint8_t *dst, char *ip, int domain, char *source) { if (inet_pton(domain, ip, dst) <= 0) { - error("The address specified (%s) is invalid ", source); + netdata_log_error("The address specified (%s) is invalid ", source); return -1; } @@ -3301,7 +3322,7 @@ void ebpf_fill_ip_list(ebpf_network_viewer_ip_list_t **out, ebpf_network_viewer_ while (move) { if (in->ver == move->ver && ebpf_is_ip_inside_range(&move->first, &move->last, &in->first, &in->last, in->ver)) { - info("The range/value (%s) is inside the range/value (%s) already inserted, it will be ignored.", + netdata_log_info("The range/value (%s) is inside the range/value (%s) already inserted, it will be ignored.", in->value, move->value); freez(in->value); freez(in); @@ -3319,14 +3340,14 @@ void ebpf_fill_ip_list(ebpf_network_viewer_ip_list_t **out, ebpf_network_viewer_ #ifdef NETDATA_INTERNAL_CHECKS char first[256], last[512]; if (in->ver == AF_INET) { - info("Adding values %s: (%u - %u) to %s IP list \"%s\" used on network viewer", + netdata_log_info("Adding values %s: (%u - %u) to %s IP list \"%s\" used on network viewer", in->value, in->first.addr32[0], in->last.addr32[0], (*out == network_viewer_opt.included_ips)?"included":"excluded", table); } else { if (inet_ntop(AF_INET6, in->first.addr8, first, INET6_ADDRSTRLEN) && inet_ntop(AF_INET6, in->last.addr8, last, INET6_ADDRSTRLEN)) - info("Adding values %s - %s to %s IP list \"%s\" used on network viewer", + netdata_log_info("Adding values %s - %s to %s IP list \"%s\" used on network viewer", first, last, (*out == network_viewer_opt.included_ips)?"included":"excluded", table); @@ -3373,7 +3394,7 @@ static void ebpf_parse_ip_list(void **out, char *ip) select = (*end == '/') ? 0 : 1; *end++ = '\0'; if (*end == '!') { - info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); + netdata_log_info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); goto cleanipdup; } @@ -3384,7 +3405,7 @@ static void ebpf_parse_ip_list(void **out, char *ip) select = (int) str2i(end); if (select < NETDATA_MINIMUM_IPV4_CIDR || select > NETDATA_MAXIMUM_IPV4_CIDR) { - info("The specified CIDR %s is not valid, the IP %s will be ignored.", end, ip); + netdata_log_info("The specified CIDR %s is not valid, the IP %s will be ignored.", end, ip); goto cleanipdup; } @@ -3400,7 +3421,7 @@ static void ebpf_parse_ip_list(void **out, char *ip) ipv4_convert.s_addr = ipv4_test; char ipv4_msg[INET_ADDRSTRLEN]; if(inet_ntop(AF_INET, &ipv4_convert, ipv4_msg, INET_ADDRSTRLEN)) - info("The network value of CIDR %s was updated for %s .", ipdup, ipv4_msg); + netdata_log_info("The network value of CIDR %s was updated for %s .", ipdup, ipv4_msg); } } else { // Range select = ip2nl(first.addr8, ip, AF_INET, ipdup); @@ -3413,7 +3434,7 @@ static void ebpf_parse_ip_list(void **out, char *ip) } if (htonl(first.addr32[0]) > htonl(last.addr32[0])) { - info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.", + netdata_log_info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.", ipdup); goto cleanipdup; } @@ -3427,7 +3448,7 @@ static void ebpf_parse_ip_list(void **out, char *ip) } else if (*end == '-') { *end++ = 0x00; if (*end == '!') { - info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); + netdata_log_info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); goto cleanipdup; } @@ -3441,13 +3462,13 @@ static void ebpf_parse_ip_list(void **out, char *ip) } else { // CIDR *end++ = 0x00; if (*end == '!') { - info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); + netdata_log_info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup); goto cleanipdup; } select = str2i(end); if (select < 0 || select > 128) { - info("The CIDR %s is not valid, the address %s will be ignored.", end, ip); + netdata_log_info("The CIDR %s is not valid, the address %s will be ignored.", end, ip); goto cleanipdup; } @@ -3469,14 +3490,14 @@ static void ebpf_parse_ip_list(void **out, char *ip) char ipv6_msg[INET6_ADDRSTRLEN]; if(inet_ntop(AF_INET6, &ipv6_convert, ipv6_msg, INET6_ADDRSTRLEN)) - info("The network value of CIDR %s was updated for %s .", ipdup, ipv6_msg); + netdata_log_info("The network value of CIDR %s was updated for %s .", ipdup, ipv6_msg); } } if ((be64toh(*(uint64_t *)&first.addr32[2]) > be64toh(*(uint64_t *)&last.addr32[2]) && !memcmp(first.addr32, last.addr32, 2*sizeof(uint32_t))) || (be64toh(*(uint64_t *)&first.addr32) > be64toh(*(uint64_t *)&last.addr32)) ) { - info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.", + netdata_log_info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.", ipdup); goto cleanipdup; } @@ -3580,7 +3601,7 @@ static void parse_port_list(void **out, char *range) if (likely(*end)) { *end++ = '\0'; if (*end == '!') { - info("The exclusion cannot be in the second part of the range, the range %s will be ignored.", copied); + netdata_log_info("The exclusion cannot be in the second part of the range, the range %s will be ignored.", copied); freez(copied); return; } @@ -3591,7 +3612,7 @@ static void parse_port_list(void **out, char *range) first = str2i((const char *)range); if (first < NETDATA_MINIMUM_PORT_VALUE || first > NETDATA_MAXIMUM_PORT_VALUE) { - info("The first port %d of the range \"%s\" is invalid and it will be ignored!", first, copied); + netdata_log_info("The first port %d of the range \"%s\" is invalid and it will be ignored!", first, copied); freez(copied); return; } @@ -3600,13 +3621,13 @@ static void parse_port_list(void **out, char *range) last = first; if (last < NETDATA_MINIMUM_PORT_VALUE || last > NETDATA_MAXIMUM_PORT_VALUE) { - info("The second port %d of the range \"%s\" is invalid and the whole range will be ignored!", last, copied); + netdata_log_info("The second port %d of the range \"%s\" is invalid and the whole range will be ignored!", last, copied); freez(copied); return; } if (first > last) { - info("The specified order %s is wrong, the smallest value is always the first, it will be ignored!", copied); + netdata_log_info("The specified order %s is wrong, the smallest value is always the first, it will be ignored!", copied); freez(copied); return; } @@ -3639,14 +3660,14 @@ static void read_max_dimension(struct config *cfg) EBPF_MAXIMUM_DIMENSIONS, NETDATA_NV_CAP_VALUE); if (maxdim < 0) { - error("'maximum dimensions = %d' must be a positive number, Netdata will change for default value %ld.", + netdata_log_error("'maximum dimensions = %d' must be a positive number, Netdata will change for default value %ld.", maxdim, NETDATA_NV_CAP_VALUE); maxdim = NETDATA_NV_CAP_VALUE; } maxdim /= 2; if (!maxdim) { - info("The number of dimensions is too small (%u), we are setting it to minimum 2", network_viewer_opt.max_dim); + netdata_log_info("The number of dimensions is too small (%u), we are setting it to minimum 2", network_viewer_opt.max_dim); network_viewer_opt.max_dim = 1; return; } @@ -3714,7 +3735,7 @@ static void link_hostname(ebpf_network_viewer_hostname_list_t **out, ebpf_networ ebpf_network_viewer_hostname_list_t *move = *out; for (; move->next ; move = move->next ) { if (move->hash == in->hash && !strcmp(move->value, in->value)) { - info("The hostname %s was already inserted, it will be ignored.", in->value); + netdata_log_info("The hostname %s was already inserted, it will be ignored.", in->value); freez(in->value); simple_pattern_free(in->value_pattern); freez(in); @@ -3727,7 +3748,7 @@ static void link_hostname(ebpf_network_viewer_hostname_list_t **out, ebpf_networ *out = in; } #ifdef NETDATA_INTERNAL_CHECKS - info("Adding value %s to %s hostname list used on network viewer", + netdata_log_info("Adding value %s to %s hostname list used on network viewer", in->value, (*out == network_viewer_opt.included_hostnames)?"included":"excluded"); #endif @@ -3806,7 +3827,7 @@ void parse_network_viewer_section(struct config *cfg) value = appconfig_get(cfg, EBPF_NETWORK_VIEWER_SECTION, EBPF_CONFIG_HOSTNAMES, NULL); link_hostnames(value); } else { - info("Name resolution is disabled, collector will not parser \"hostnames\" list."); + netdata_log_info("Name resolution is disabled, collector will not parser \"hostnames\" list."); } value = appconfig_get(cfg, EBPF_NETWORK_VIEWER_SECTION, @@ -3827,7 +3848,7 @@ static void link_dimension_name(char *port, uint32_t hash, char *value) { int test = str2i(port); if (test < NETDATA_MINIMUM_PORT_VALUE || test > NETDATA_MAXIMUM_PORT_VALUE){ - error("The dimension given (%s = %s) has an invalid value and it will be ignored.", port, value); + netdata_log_error("The dimension given (%s = %s) has an invalid value and it will be ignored.", port, value); return; } @@ -3845,7 +3866,7 @@ static void link_dimension_name(char *port, uint32_t hash, char *value) } else { for (; names->next; names = names->next) { if (names->port == w->port) { - info("Duplicated definition for a service, the name %s will be ignored. ", names->name); + netdata_log_info("Duplicated definition for a service, the name %s will be ignored. ", names->name); freez(names->name); names->name = w->name; names->hash = w->hash; @@ -3857,7 +3878,7 @@ static void link_dimension_name(char *port, uint32_t hash, char *value) } #ifdef NETDATA_INTERNAL_CHECKS - info("Adding values %s( %u) to dimension name list used on network viewer", w->name, htons(w->port)); + netdata_log_info("Adding values %s( %u) to dimension name list used on network viewer", w->name, htons(w->port)); #endif } @@ -3950,7 +3971,7 @@ static int ebpf_socket_load_bpf(ebpf_module_t *em) #endif if (ret) { - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); } return ret; @@ -3975,7 +3996,7 @@ void *ebpf_socket_thread(void *ptr) parse_table_size_options(&socket_config); if (pthread_mutex_init(&nv_mutex, NULL)) { - error("Cannot initialize local mutex"); + netdata_log_error("Cannot initialize local mutex"); goto endsocket; } @@ -4015,11 +4036,11 @@ void *ebpf_socket_thread(void *ptr) ebpf_create_global_charts(em); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); #ifdef NETDATA_DEV_MODE if (ebpf_aral_socket_pid) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_SOCKET_ARAL_NAME, em); + socket_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_SOCKET_ARAL_NAME, em); #endif pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_socket.h b/collectors/ebpf.plugin/ebpf_socket.h index 1ba20e65..ae2ee28a 100644 --- a/collectors/ebpf.plugin/ebpf_socket.h +++ b/collectors/ebpf.plugin/ebpf_socket.h @@ -4,8 +4,9 @@ #include <stdint.h> #include "libnetdata/avl/avl.h" -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_SOCKET "socket" +#define NETDATA_EBPF_SOCKET_MODULE_DESC "Monitors TCP and UDP bandwidth. This thread is integrated with apps and cgroup." // Vector indexes #define NETDATA_UDP_START 3 diff --git a/collectors/ebpf.plugin/ebpf_softirq.c b/collectors/ebpf.plugin/ebpf_softirq.c index b5c77bf0..8d8930a1 100644 --- a/collectors/ebpf.plugin/ebpf_softirq.c +++ b/collectors/ebpf.plugin/ebpf_softirq.c @@ -61,6 +61,26 @@ static softirq_val_t softirq_vals[] = { static softirq_ebpf_val_t *softirq_ebpf_vals = NULL; /** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_softirq_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + "softirq_latency", + "Software IRQ latency", + EBPF_COMMON_DIMENSION_MILLISECONDS, + "softirqs", + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + NETDATA_CHART_PRIO_SYSTEM_SOFTIRQS+1, + em->update_every); +} + +/** * Cleanup * * Clean up allocated memory. @@ -71,16 +91,32 @@ static void softirq_cleanup(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - if (em->objects) + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + + ebpf_obsolete_softirq_global(em); + + pthread_mutex_unlock(&lock); + fflush(stdout); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } for (int i = 0; softirq_tracepoints[i].class != NULL; i++) { ebpf_disable_tracepoint(&softirq_tracepoints[i]); } freez(softirq_ebpf_vals); + softirq_ebpf_vals = NULL; pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -170,7 +206,7 @@ static void softirq_collector(ebpf_module_t *em) softirq_create_charts(em->update_every); softirq_create_dims(); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); // loop and read from published data until ebpf plugin is closed. @@ -180,7 +216,9 @@ static void softirq_collector(ebpf_module_t *em) int counter = update_every - 1; int maps_per_core = em->maps_per_core; //This will be cancelled by its parent - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -195,6 +233,15 @@ static void softirq_collector(ebpf_module_t *em) write_end_chart(); pthread_mutex_unlock(&lock); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } diff --git a/collectors/ebpf.plugin/ebpf_softirq.h b/collectors/ebpf.plugin/ebpf_softirq.h index eea2a184..4ef36775 100644 --- a/collectors/ebpf.plugin/ebpf_softirq.h +++ b/collectors/ebpf.plugin/ebpf_softirq.h @@ -3,6 +3,9 @@ #ifndef NETDATA_EBPF_SOFTIRQ_H #define NETDATA_EBPF_SOFTIRQ_H 1 +// Module observation +#define NETDATA_EBPF_SOFTIRQ_MODULE_DESC "Show time spent servicing individual software interrupt requests (soft IRQs)." + /***************************************************************** * copied from kernel-collectors repo, with modifications needed * for inclusion here. diff --git a/collectors/ebpf.plugin/ebpf_swap.c b/collectors/ebpf.plugin/ebpf_swap.c index 492b5967..9a1640a3 100644 --- a/collectors/ebpf.plugin/ebpf_swap.c +++ b/collectors/ebpf.plugin/ebpf_swap.c @@ -229,6 +229,109 @@ static inline int ebpf_swap_load_and_attach(struct swap_bpf *obj, ebpf_module_t * *****************************************************************/ +static void ebpf_obsolete_specific_swap_charts(char *type, int update_every); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_swap_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_MEM_SWAP_READ_CHART, + "Calls to function <code>swap_readpage</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SWAP_READ_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5100, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_MEM_SWAP_WRITE_CHART, + "Calls to function <code>swap_writepage</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SWAP_WRITE_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5101, + em->update_every); +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_swap_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_swap_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_swap_charts(ect->name, em->update_every); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_swap_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_MEM_SWAP_READ_CHART, + "Calls to function <code>swap_readpage</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20191, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_MEM_SWAP_WRITE_CHART, + "Calls to function <code>swap_writepage</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20192, + em->update_every); +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_swap_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP, + NETDATA_MEM_SWAP_CHART, + "Calls to access swap memory", + EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS, + em->update_every); +} + /** * Swap exit * @@ -240,15 +343,40 @@ static void ebpf_swap_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_swap_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_swap_apps_charts(em); + } + + ebpf_obsolete_swap_global(em); + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (bpf_obj) + if (bpf_obj) { swap_bpf__destroy(bpf_obj); + bpf_obj = NULL; + } #endif - if (em->objects) + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -674,7 +802,9 @@ static void swap_collector(ebpf_module_t *em) heartbeat_init(&hb); int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -701,6 +831,15 @@ static void swap_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -784,6 +923,8 @@ static void ebpf_create_swap_charts(int update_every) ebpf_create_global_dimension, swap_publish_aggregated, NETDATA_SWAP_END, update_every, NETDATA_EBPF_MODULE_NAME_SWAP); + + fflush(stdout); } /* @@ -818,7 +959,7 @@ static int ebpf_swap_load_bpf(ebpf_module_t *em) #endif if (ret) - error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); + netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name); return ret; } @@ -857,7 +998,7 @@ void *ebpf_swap_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_create_swap_charts(em->update_every); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); swap_collector(em); diff --git a/collectors/ebpf.plugin/ebpf_swap.h b/collectors/ebpf.plugin/ebpf_swap.h index 8ca980bf..79e9a01a 100644 --- a/collectors/ebpf.plugin/ebpf_swap.h +++ b/collectors/ebpf.plugin/ebpf_swap.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_SWAP_H #define NETDATA_EBPF_SWAP_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_SWAP "swap" +#define NETDATA_EBPF_SWAP_MODULE_DESC "Monitor swap space usage. This thread is integrated with apps and cgroup." #define NETDATA_SWAP_SLEEP_MS 850000ULL diff --git a/collectors/ebpf.plugin/ebpf_sync.c b/collectors/ebpf.plugin/ebpf_sync.c index 9f1c0159..521d39f3 100644 --- a/collectors/ebpf.plugin/ebpf_sync.c +++ b/collectors/ebpf.plugin/ebpf_sync.c @@ -248,7 +248,6 @@ static inline int ebpf_sync_load_and_attach(struct sync_bpf *obj, ebpf_module_t * *****************************************************************/ -#ifdef LIBBPF_MAJOR_VERSION /** * Cleanup Objects * @@ -259,28 +258,86 @@ void ebpf_sync_cleanup_objects() int i; for (i = 0; local_syscalls[i].syscall; i++) { ebpf_sync_syscalls_t *w = &local_syscalls[i]; - if (w->sync_obj) +#ifdef LIBBPF_MAJOR_VERSION + if (w->sync_obj) { sync_bpf__destroy(w->sync_obj); + w->sync_obj = NULL; + } +#endif + if (w->probe_links) { + ebpf_unload_legacy_code(w->objects, w->probe_links); + w->objects = NULL; + w->probe_links = NULL; + } } } -#endif + +/* + static void ebpf_create_sync_chart(char *id, + char *title, + int order, + int idx, + int end, + int update_every) + { + ebpf_write_chart_cmd(NETDATA_EBPF_MEMORY_GROUP, id, title, EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_SYNC_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NULL, order, + update_every, + NETDATA_EBPF_MODULE_NAME_SYNC); + */ /** - * Sync Free + * Obsolete global * - * Cleanup variables after child threads to stop + * Obsolete global charts created by thread. * - * @param ptr thread data. + * @param em a pointer to `struct ebpf_module` */ -static void ebpf_sync_free(ebpf_module_t *em) +static void ebpf_obsolete_sync_global(ebpf_module_t *em) { -#ifdef LIBBPF_MAJOR_VERSION - ebpf_sync_cleanup_objects(); -#endif + if (local_syscalls[NETDATA_SYNC_FSYNC_IDX].enabled && local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].enabled) + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_FILE_SYNC_CHART, + "Monitor calls for <code>fsync(2)</code> and <code>fdatasync(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_SYNC_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21300, + em->update_every); - pthread_mutex_lock(&ebpf_exit_cleanup); - em->enabled = NETDATA_THREAD_EBPF_STOPPED; - pthread_mutex_unlock(&ebpf_exit_cleanup); + if (local_syscalls[NETDATA_SYNC_MSYNC_IDX].enabled) + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_MSYNC_CHART, + "Monitor calls for <code>msync(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_SYNC_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21301, + em->update_every); + + if (local_syscalls[NETDATA_SYNC_SYNC_IDX].enabled && local_syscalls[NETDATA_SYNC_SYNCFS_IDX].enabled) + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_SYNC_CHART, + "Monitor calls for <code>sync(2)</code> and <code>syncfs(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_SYNC_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21302, + em->update_every); + + if (local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].enabled) + ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP, + NETDATA_EBPF_FILE_SEGMENT_CHART, + "Monitor calls for <code>sync_file_range(2)</code>.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_EBPF_SYNC_SUBMENU, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + 21303, + em->update_every); } /** @@ -293,7 +350,19 @@ static void ebpf_sync_free(ebpf_module_t *em) static void ebpf_sync_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; - ebpf_sync_free(em); + + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + ebpf_obsolete_sync_global(em); + pthread_mutex_unlock(&lock); + } + + ebpf_sync_cleanup_objects(); + + pthread_mutex_lock(&ebpf_exit_cleanup); + em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); + pthread_mutex_unlock(&ebpf_exit_cleanup); } /***************************************************************** @@ -373,7 +442,7 @@ static int ebpf_sync_initialize_syscall(ebpf_module_t *em) } } } else { - info("Cannot find syscall %s we are not going to monitor it.", syscall); + netdata_log_info("Cannot find syscall %s we are not going to monitor it.", syscall); w->enabled = false; } @@ -489,7 +558,9 @@ static void sync_collector(ebpf_module_t *em) int update_every = em->update_every; int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -501,6 +572,15 @@ static void sync_collector(ebpf_module_t *em) sync_send_data(); pthread_mutex_unlock(&lock); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -574,6 +654,8 @@ static void ebpf_create_sync_charts(int update_every) ebpf_create_sync_chart(NETDATA_EBPF_FILE_SEGMENT_CHART, "Monitor calls for <code>sync_file_range(2)</code>.", 21303, NETDATA_SYNC_SYNC_FILE_RANGE_IDX, NETDATA_SYNC_SYNC_FILE_RANGE_IDX, update_every); + + fflush(stdout); } /** diff --git a/collectors/ebpf.plugin/ebpf_sync.h b/collectors/ebpf.plugin/ebpf_sync.h index cace2a1c..bd1bb78b 100644 --- a/collectors/ebpf.plugin/ebpf_sync.h +++ b/collectors/ebpf.plugin/ebpf_sync.h @@ -7,8 +7,9 @@ #include "includes/sync.skel.h" #endif -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_SYNC "sync" +#define NETDATA_EBPF_SYNC_MODULE_DESC "Monitor calls to syscalls sync(2), fsync(2), fdatasync(2), syncfs(2), msync(2), and sync_file_range(2)." // charts #define NETDATA_EBPF_SYNC_CHART "sync" diff --git a/collectors/ebpf.plugin/ebpf_vfs.c b/collectors/ebpf.plugin/ebpf_vfs.c index 6cafafc3..5747a240 100644 --- a/collectors/ebpf.plugin/ebpf_vfs.c +++ b/collectors/ebpf.plugin/ebpf_vfs.c @@ -60,6 +60,10 @@ netdata_ebpf_targets_t vfs_targets[] = { {.name = "vfs_write", .mode = EBPF_LOAD {.name = "release_task", .mode = EBPF_LOAD_TRAMPOLINE}, {.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}}; +#ifdef NETDATA_DEV_MODE +int vfs_disable_priority; +#endif + #ifdef LIBBPF_MAJOR_VERSION /** * Disable probe @@ -403,6 +407,447 @@ static inline int ebpf_vfs_load_and_attach(struct vfs_bpf *obj, ebpf_module_t *e * *****************************************************************/ +static void ebpf_obsolete_specific_vfs_charts(char *type, ebpf_module_t *em); + +/** + * Obsolete services + * + * Obsolete all service charts created + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_vfs_services(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_FILE_DELETED, + "Files deleted", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20065, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, + "Write to disk", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20066, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, + "Fails to write", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20067, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_READ_CALLS, + "Read from disk", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20068, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, + "Fails to read", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20069, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, + "Bytes written on disk", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20070, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_READ_BYTES, + "Bytes read from disk", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20071, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_FSYNC, + "Calls to <code>vfs_fsync</code>", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20072, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, + "Sync error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20073, + em->update_every); + } + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_OPEN, + "Calls to <code>vfs_open</code>", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20074, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, + "Open error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20075, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_CREATE, + "Calls to <code>vfs_create</code>", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20076, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, + NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, + "Create error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_CGROUP_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20077, + em->update_every); + } +} + +/** + * Obsolete cgroup chart + * + * Send obsolete for all charts created before to close. + * + * @param em a pointer to `struct ebpf_module` + */ +static inline void ebpf_obsolete_vfs_cgroup_charts(ebpf_module_t *em) { + pthread_mutex_lock(&mutex_cgroup_shm); + + ebpf_obsolete_vfs_services(em); + + ebpf_cgroup_target_t *ect; + for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { + if (ect->systemd) + continue; + + ebpf_obsolete_specific_vfs_charts(ect->name, em); + } + pthread_mutex_unlock(&mutex_cgroup_shm); +} + +/** + * Obsolette apps charts + * + * Obsolete apps charts. + * + * @param em a pointer to the structure with the default values. + */ +void ebpf_obsolete_vfs_apps_charts(struct ebpf_module *em) +{ + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_FILE_DELETED, + "Files deleted", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20065, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, + "Write to disk", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20066, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, + "Fails to write", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20067, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_READ_CALLS, + "Read from disk", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20068, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, + "Fails to read", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20069, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, + "Bytes written on disk", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20070, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_READ_BYTES, + "Bytes read from disk", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20071, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_FSYNC, + "Calls for <code>vfs_fsync</code>", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20072, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, + "Sync error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20073, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_OPEN, + "Calls for <code>vfs_open</code>", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20074, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, + "Open error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20075, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_CREATE, + "Calls for <code>vfs_create</code>", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20076, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, + NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, + "Create error", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_STACKED, + NULL, + 20077, + em->update_every); + } +} + +/** + * Obsolete global + * + * Obsolete global charts created by thread. + * + * @param em a pointer to `struct ebpf_module` + */ +static void ebpf_obsolete_vfs_global(ebpf_module_t *em) +{ + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FILE_CLEAN_COUNT, + "Remove files", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_CLEAN, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FILE_IO_COUNT, + "Calls to IO", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_COUNT, + em->update_every); + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_IO_FILE_BYTES, + "Bytes written and read", + EBPF_COMMON_DIMENSION_BYTES, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_BYTES, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FILE_ERR_COUNT, + "Fails to write or read", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EBYTES, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FSYNC, + "Calls for <code>vfs_fsync</code>", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_FSYNC, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_FSYNC_ERR, + "Fails to synchronize", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EFSYNC, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_OPEN, + "Calls for <code>vfs_open</code>", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_OPEN, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_OPEN_ERR, + "Fails to open a file", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_EOPEN, + em->update_every); + } + + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_CREATE, + "Calls for <code>vfs_create</code>", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_CREATE, + em->update_every); + + if (em->mode < MODE_ENTRY) { + ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, + NETDATA_VFS_CREATE_ERR, + "Fails to create a file.", + EBPF_COMMON_DIMENSION_CALL, + NETDATA_VFS_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NULL, + NETDATA_CHART_PRIO_FILESYSTEM_VFS_IO_ECREATE, + em->update_every); + } +} + /** * Exit * @@ -414,15 +859,45 @@ static void ebpf_vfs_exit(void *ptr) { ebpf_module_t *em = (ebpf_module_t *)ptr; + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { + pthread_mutex_lock(&lock); + if (em->cgroup_charts) { + ebpf_obsolete_vfs_cgroup_charts(em); + fflush(stdout); + } + + if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) { + ebpf_obsolete_vfs_apps_charts(em); + } + + ebpf_obsolete_vfs_global(em); + +#ifdef NETDATA_DEV_MODE + if (ebpf_aral_vfs_pid) + ebpf_statistic_obsolete_aral_chart(em, vfs_disable_priority); +#endif + + fflush(stdout); + pthread_mutex_unlock(&lock); + } + + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); + #ifdef LIBBPF_MAJOR_VERSION - if (vfs_bpf_obj) + if (vfs_bpf_obj) { vfs_bpf__destroy(vfs_bpf_obj); + vfs_bpf_obj = NULL; + } #endif - if (em->objects) + if (em->objects) { ebpf_unload_legacy_code(em->objects, em->probe_links); + em->objects = NULL; + em->probe_links = NULL; + } pthread_mutex_lock(&ebpf_exit_cleanup); em->enabled = NETDATA_THREAD_EBPF_STOPPED; + ebpf_update_stats(&plugin_statistics, em); pthread_mutex_unlock(&ebpf_exit_cleanup); } @@ -1486,7 +1961,9 @@ static void vfs_collector(ebpf_module_t *em) int update_every = em->update_every; int counter = update_every - 1; int maps_per_core = em->maps_per_core; - while (!ebpf_exit_plugin) { + uint32_t running_time = 0; + uint32_t lifetime = em->lifetime; + while (!ebpf_exit_plugin && running_time < lifetime) { (void)heartbeat_next(&hb, USEC_PER_SEC); if (ebpf_exit_plugin || ++counter != update_every) continue; @@ -1519,6 +1996,15 @@ static void vfs_collector(ebpf_module_t *em) pthread_mutex_unlock(&lock); pthread_mutex_unlock(&collect_data_mutex); + + pthread_mutex_lock(&ebpf_exit_cleanup); + if (running_time && !em->running_time) + running_time = update_every; + else + running_time += update_every; + + em->running_time = running_time; + pthread_mutex_unlock(&ebpf_exit_cleanup); } } @@ -1690,6 +2176,8 @@ static void ebpf_create_global_charts(ebpf_module_t *em) &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE], 1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS); } + + fflush(stdout); } /** @@ -1934,10 +2422,10 @@ void *ebpf_vfs_thread(void *ptr) pthread_mutex_lock(&lock); ebpf_create_global_charts(em); ebpf_update_stats(&plugin_statistics, em); - ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps); + ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); #ifdef NETDATA_DEV_MODE if (ebpf_aral_vfs_pid) - ebpf_statistic_create_aral_chart(NETDATA_EBPF_VFS_ARAL_NAME, em); + vfs_disable_priority = ebpf_statistic_create_aral_chart(NETDATA_EBPF_VFS_ARAL_NAME, em); #endif pthread_mutex_unlock(&lock); diff --git a/collectors/ebpf.plugin/ebpf_vfs.h b/collectors/ebpf.plugin/ebpf_vfs.h index 45a1df4b..8fe12a7e 100644 --- a/collectors/ebpf.plugin/ebpf_vfs.h +++ b/collectors/ebpf.plugin/ebpf_vfs.h @@ -3,8 +3,9 @@ #ifndef NETDATA_EBPF_VFS_H #define NETDATA_EBPF_VFS_H 1 -// Module name +// Module name & description #define NETDATA_EBPF_MODULE_NAME_VFS "vfs" +#define NETDATA_EBPF_VFS_MODULE_DESC "Monitor VFS (Virtual File System) functions. This thread is integrated with apps and cgroup." #define NETDATA_DIRECTORY_VFS_CONFIG_FILE "vfs.conf" diff --git a/collectors/ebpf.plugin/multi_metadata.yaml b/collectors/ebpf.plugin/multi_metadata.yaml new file mode 100644 index 00000000..9a31a403 --- /dev/null +++ b/collectors/ebpf.plugin/multi_metadata.yaml @@ -0,0 +1,2360 @@ +name: ebpf.plugin +modules: + - meta: + plugin_name: ebpf.plugin + module_name: filedescriptor + monitored_instance: + name: ebpf filedescriptor + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.fd_open + description: Number of open files + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: cgroup.fd_open_error + description: Fails to open files + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: cgroup.fd_closed + description: Files closed + unit: "calls/s" + chart_type: line + dimensions: + - name: close + - name: cgroup.fd_close_error + description: Fails to close files + unit: "calls/s" + chart_type: line + dimensions: + - name: close + - name: global + description: "" + labels: [] + metrics: + - name: services.file_open + description: Number of open files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.file_open_error + description: Fails to open files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.file_closed + description: Files closed + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.file_close_error + description: Fails to close files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: apps.file_open + description: Number of open files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.file_open_error + description: Fails to open files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.file_closed + description: Files closed + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.file_close_error + description: Fails to close files + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: filesystem.file_descriptor + description: Open and close calls + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: close + - name: filesystem.file_error + description: Open fails + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: close + - meta: + plugin_name: ebpf.plugin + module_name: processes + monitored_instance: + name: ebpf processes + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.process_thread + description: Start process + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: system.process_status + description: Process not closed + unit: "difference" + chart_type: line + dimensions: + - name: process + - name: zombie + - name: system.exit + description: Exit process + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: system.task_error + description: Fails to create process + unit: "calls/s" + chart_type: line + dimensions: + - name: task + - name: apps.process_create + description: Process started + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.thread_create + description: Threads started + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.task_exit + description: Tasks starts exit process + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.task_close + description: Tasks closed + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.task_error + description: Errors to create process or threads + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.process_create + description: Process started + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: cgroup.thread_create + description: Threads started + unit: "calls/s" + chart_type: line + dimensions: + - name: thread + - name: cgroup.task_exit + description: Tasks starts exit process + unit: "calls/s" + chart_type: line + dimensions: + - name: exit + - name: cgroup.task_close + description: Tasks closed + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: cgroup.task_error + description: Errors to create process or threads + unit: "calls/s" + chart_type: line + dimensions: + - name: process + - name: services.process_create + description: Process started + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.thread_create + description: Threads started + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.task_close + description: Tasks starts exit process + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.task_exit + description: Tasks closed + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.task_error + description: Errors to create process or threads + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - meta: + plugin_name: ebpf.plugin + module_name: disk + monitored_instance: + name: ebpf disk + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: disk + description: "" + labels: [] + metrics: + - name: disk.latency_io + description: Disk latency + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency + - meta: + plugin_name: ebpf.plugin + module_name: hardirq + monitored_instance: + name: ebpf hardirq + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.hardirq_latency + description: Hardware IRQ latency + unit: "milisecondds" + chart_type: stacked + dimensions: + - name: hardirq names + - meta: + plugin_name: ebpf.plugin + module_name: cachestat + monitored_instance: + name: ebpf cachestat + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: apps.cachestat_ratio + description: Hit ratio + unit: "%" + chart_type: line + dimensions: + - name: a dimension per app group + - name: apps.cachestat_dirties + description: Number of dirty pages + unit: "page/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.cachestat_hits + description: Number of accessed files + unit: "hits/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.cachestat_misses + description: Files out of page cache + unit: "misses/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: services.cachestat_ratio + description: Hit ratio + unit: "%" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.cachestat_dirties + description: Number of dirty pages + unit: "page/s" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.cachestat_hits + description: Number of accessed files + unit: "hits/s" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.cachestat_misses + description: Files out of page cache + unit: "misses/s" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: mem.cachestat_ratio + description: Hit ratio + unit: "%" + chart_type: line + dimensions: + - name: ratio + - name: mem.cachestat_dirties + description: Number of dirty pages + unit: "page/s" + chart_type: line + dimensions: + - name: dirty + - name: mem.cachestat_hits + description: Number of accessed files + unit: "hits/s" + chart_type: line + dimensions: + - name: hit + - name: mem.cachestat_misses + description: Files out of page cache + unit: "misses/s" + chart_type: line + dimensions: + - name: miss + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.cachestat_ratio + description: Hit ratio + unit: "%" + chart_type: line + dimensions: + - name: ratio + - name: cgroup.cachestat_dirties + description: Number of dirty pages + unit: "page/s" + chart_type: line + dimensions: + - name: dirty + - name: cgroup.cachestat_hits + description: Number of accessed files + unit: "hits/s" + chart_type: line + dimensions: + - name: hit + - name: cgroup.cachestat_misses + description: Files out of page cache + unit: "misses/s" + chart_type: line + dimensions: + - name: miss + - meta: + plugin_name: ebpf.plugin + module_name: sync + monitored_instance: + name: ebpf sync + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: sync_freq + link: https://github.com/netdata/netdata/blob/master/health/health.d/synchronization.conf + metric: mem.sync + info: number of sync() system calls. Every call causes all pending modifications to filesystem metadata and cached file data to be written to the underlying filesystems. + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: mem.file_sync + description: Monitor calls for <code>fsync(2)</code> and <code>fdatasync(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: fsync + - name: fdatasync + - name: mem.meory_map + description: Monitor calls for <code>msync(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: msync + - name: mem.sync + description: Monitor calls for <code>sync(2)</code> and <code>syncfs(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: sync + - name: syncfs + - name: mem.file_segment + description: Monitor calls for <code>sync_file_range(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: sync_file_range + - meta: + plugin_name: ebpf.plugin + module_name: mdflush + monitored_instance: + name: ebpf mdflush + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: mdstat.mdstat_flush + description: MD flushes + unit: "flushes" + chart_type: stacked + dimensions: + - name: disk + - meta: + plugin_name: ebpf.plugin + module_name: swap + monitored_instance: + name: ebpf swap + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.swap_read + description: Calls to function <code>swap_readpage</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: cgroup.swap_write + description: Calls to function <code>swap_writepage</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: write + - name: global + description: "" + labels: [] + metrics: + - name: services.swap_read + description: Calls to <code>swap_readpage</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.swap_write + description: Calls to function <code>swap_writepage</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: apps.swap_read_call + description: Calls to function <code>swap_readpage</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.swap_write_call + description: Calls to function <code>swap_writepage</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: system.swapcalls + description: Calls to access swap memory + unit: "calls/s" + chart_type: line + dimensions: + - name: write + - name: read + - meta: + plugin_name: ebpf.plugin + module_name: oomkill + monitored_instance: + name: ebpf oomkill + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.oomkills + description: OOM kills. This chart is provided by eBPF plugin. + unit: "kills" + chart_type: line + dimensions: + - name: cgroup name + - name: global + description: "" + labels: [] + metrics: + - name: services.oomkills + description: OOM kills. This chart is provided by eBPF plugin. + unit: "kills" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: apps.oomkills + description: OOM kills + unit: "kills" + chart_type: stacked + dimensions: + - name: a dimension per app group + - meta: + plugin_name: ebpf.plugin + module_name: socket + monitored_instance: + name: ebpf socket + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ip.inbound_conn + description: Inbound connections. + unit: "connections/s" + chart_type: line + dimensions: + - name: connection_tcp + - name: ip.tcp_outbound_conn + description: TCP outbound connections. + unit: "connections/s" + chart_type: line + dimensions: + - name: received + - name: ip.tcp_functions + description: Calls to internal functions + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: closed + - name: ip.total_tcp_bandwidth + description: TCP bandwidth + unit: "kilobits/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: ip.tcp_error + description: TCP errors + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: ip.tcp_retransmit + description: Packages retransmitted + unit: "calls/s" + chart_type: line + dimensions: + - name: retransmited + - name: ip.udp_functions + description: UDP calls + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: ip.total_udp_bandwidth + description: UDP bandwidth + unit: "kilobits/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: ip.udp_error + description: UDP errors + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: send + - name: apps.outbound_conn_v4 + description: Calls to tcp_v4_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.outbound_conn_v6 + description: Calls to tcp_v6_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.total_bandwidth_sent + description: Bytes sent + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.total_bandwidth_recv + description: bytes received + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_tcp_send + description: Calls for tcp_sendmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_tcp_recv + description: Calls for tcp_cleanup_rbuf + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_tcp_retransmit + description: Calls for tcp_retransmit + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_udp_send + description: Calls for udp_sendmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.bandwidth_udp_recv + description: Calls for udp_recvmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: services.net_conn_ipv4 + description: Calls to tcp_v4_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_conn_ipv6 + description: Calls to tcp_v6_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_bytes_recv + description: Bytes received + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_bytes_sent + description: Bytes sent + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_tcp_recv + description: Calls to tcp_cleanup_rbuf. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_tcp_send + description: Calls to tcp_sendmsg. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_tcp_retransmit + description: Calls to tcp_retransmit + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_udp_send + description: Calls to udp_sendmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.net_udp_recv + description: Calls to udp_recvmsg + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.net_conn_ipv4 + description: Calls to tcp_v4_connection + unit: "connections/s" + chart_type: line + dimensions: + - name: connected_v4 + - name: cgroup.net_conn_ipv6 + description: Calls to tcp_v6_connection + unit: "connections/s" + chart_type: line + dimensions: + - name: connected_v6 + - name: cgroup.net_bytes_recv + description: Bytes received + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: cgroup.net_bytes_sent + description: Bytes sent + unit: "calls/s" + chart_type: line + dimensions: + - name: sent + - name: cgroup.net_tcp_recv + description: Calls to tcp_cleanup_rbuf. + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - name: cgroup.net_tcp_send + description: Calls to tcp_sendmsg. + unit: "calls/s" + chart_type: line + dimensions: + - name: sent + - name: cgroup.net_retransmit + description: Calls to tcp_retransmit. + unit: "calls/s" + chart_type: line + dimensions: + - name: retransmitted + - name: cgroup.net_udp_send + description: Calls to udp_sendmsg + unit: "calls/s" + chart_type: line + dimensions: + - name: sent + - name: cgroup.net_udp_recv + description: Calls to udp_recvmsg + unit: "calls/s" + chart_type: line + dimensions: + - name: received + - meta: + plugin_name: ebpf.plugin + module_name: dcstat + monitored_instance: + name: ebpf dcstat + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: apps.dc_ratio + description: Percentage of files inside directory cache + unit: "%" + chart_type: line + dimensions: + - name: a dimension per app group + - name: apps.dc_reference + description: Count file access + unit: "files" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.dc_not_cache + description: Files not present inside directory cache + unit: "files" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.dc_not_found + description: Files not found + unit: "files" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: services.dc_ratio + description: Percentage of files inside directory cache + unit: "%" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.dc_reference + description: Count file access + unit: "files" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.dc_not_cache + description: Files not present inside directory cache + unit: "files" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: services.dc_not_found + description: Files not found + unit: "files" + chart_type: line + dimensions: + - name: a dimension per systemd service + - name: filesystem.dc_hit_ratio + description: Percentage of files inside directory cache + unit: "%" + chart_type: line + dimensions: + - name: ratio + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.dc_ratio + description: Percentage of files inside directory cache + unit: "%" + chart_type: line + dimensions: + - name: ratio + - name: cgroup.dc_reference + description: Count file access + unit: "files" + chart_type: line + dimensions: + - name: reference + - name: cgroup.dc_not_cache + description: Files not present inside directory cache + unit: "files" + chart_type: line + dimensions: + - name: slow + - name: cgroup.dc_not_found + description: Files not found + unit: "files" + chart_type: line + dimensions: + - name: miss + - name: filesystem + description: "" + labels: [] + metrics: + - name: filesystem.dc_reference + description: Variables used to calculate hit ratio. + unit: "files" + chart_type: line + dimensions: + - name: reference + - name: slow + - name: miss + - meta: + plugin_name: ebpf.plugin + module_name: filesystem + monitored_instance: + name: ebpf filesystem + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: filesystem + description: "" + labels: [] + metrics: + - name: filesystem.read_latency + description: ext4 latency for each read request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - name: filesystem.open_latency + description: ext4 latency for each open request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - name: filesystem.sync_latency + description: ext4 latency for each sync request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - name: iilesystem + description: "" + labels: [] + metrics: + - name: filesystem.write_latency + description: ext4 latency for each write request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - name: global + description: "" + labels: [] + metrics: + - name: filesystem.attributte_latency + description: nfs latency for each attribute request. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: latency period + - meta: + plugin_name: ebpf.plugin + module_name: shm + monitored_instance: + name: ebpf shm + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.shmget + description: Calls to syscall <code>shmget(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: get + - name: cgroup.shmat + description: Calls to syscall <code>shmat(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: at + - name: cgroup.shmdt + description: Calls to syscall <code>shmdt(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: dt + - name: cgroup.shmctl + description: Calls to syscall <code>shmctl(2)</code>. + unit: "calls/s" + chart_type: line + dimensions: + - name: ctl + - name: global + description: "" + labels: [] + metrics: + - name: services.shmget + description: Calls to syscall <code>shmget(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.shmat + description: Calls to syscall <code>shmat(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.shmdt + description: Calls to syscall <code>shmdt(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.shmctl + description: Calls to syscall <code>shmctl(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: apps.shmget_call + description: Calls to syscall <code>shmget(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.shmat_call + description: Calls to syscall <code>shmat(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.shmdt_call + description: Calls to syscall <code>shmdt(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.shmctl_call + description: Calls to syscall <code>shmctl(2)</code>. + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: system.shared_memory_calls + description: Calls to shared memory system calls + unit: "calls/s" + chart_type: line + dimensions: + - name: get + - name: at + - name: dt + - name: ctl + - meta: + plugin_name: ebpf.plugin + module_name: softirq + monitored_instance: + name: ebpf softirq + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.softirq_latency + description: Software IRQ latency + unit: "miliseconds" + chart_type: stacked + dimensions: + - name: soft IRQs + - meta: + plugin_name: ebpf.plugin + module_name: mount + monitored_instance: + name: ebpf mount + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: mount_points.call + description: Calls to mount and umount syscalls + unit: "calls/s" + chart_type: line + dimensions: + - name: mount + - name: umount + - name: mount_points.error + description: Errors to mount and umount file systems + unit: "calls/s" + chart_type: line + dimensions: + - name: mount + - name: umount + - meta: + plugin_name: ebpf.plugin + module_name: vfs + monitored_instance: + name: ebpf vfs + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: cgroup + description: "" + labels: [] + metrics: + - name: cgroup.vfs_unlink + description: Files deleted + unit: "calls/s" + chart_type: line + dimensions: + - name: delete + - name: cgroup.vfs_write + description: Write to disk + unit: "calls/s" + chart_type: line + dimensions: + - name: write + - name: cgroup.vfs_write_error + description: Fails to write + unit: "calls/s" + chart_type: line + dimensions: + - name: write + - name: cgroup.vfs_read + description: Read from disk + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: cgroup.vfs_read_error + description: Fails to read + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: cgroup.vfs_write_bytes + description: Bytes written on disk + unit: "bytes/s" + chart_type: line + dimensions: + - name: write + - name: cgroup.vfs_read_bytes + description: Bytes read from disk + unit: "bytes/s" + chart_type: line + dimensions: + - name: read + - name: cgroup.vfs_fsync + description: Calls for <code>vfs_fsync</code> + unit: "calls/s" + chart_type: line + dimensions: + - name: fsync + - name: cgroup.vfs_fsync_error + description: Sync error + unit: "calls/s" + chart_type: line + dimensions: + - name: fsync + - name: cgroup.vfs_open + description: Calls for <code>vfs_open</code> + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: cgroup.vfs_open_error + description: Open error + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: cgroup.vfs_create + description: Calls for <code>vfs_create</code> + unit: "calls/s" + chart_type: line + dimensions: + - name: create + - name: cgroup.vfs_create_error + description: Create error + unit: "calls/s" + chart_type: line + dimensions: + - name: create + - name: global + description: "" + labels: [] + metrics: + - name: services.vfs_unlink + description: Files deleted + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_write + description: Write to disk + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_write_error + description: Fails to write + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_read + description: Read from disk + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_read_error + description: Fails to read + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_write_bytes + description: Bytes written on disk + unit: "bytes/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_read_bytes + description: Bytes read from disk + unit: "bytes/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_fsync + description: Calls to <code>vfs_fsync</code> + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_fsync_error + description: Sync error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_open + description: Calls to <code>vfs_open</code> + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_open_error + description: Open error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_create + description: Calls to <code>vfs_create</code> + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: services.vfs_create_error + description: Create error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per systemd service + - name: filesystem.vfs_deleted_objects + description: Remove files + unit: "calls/s" + chart_type: line + dimensions: + - name: delete + - name: filesystem.vfs_io + description: Calls to IO + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: filesystem.vfs_io_bytes + description: Bytes written and read + unit: "bytes/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: filesystem.vfs_io_error + description: Fails to write or read + unit: "calls/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: filesystem.vfs_fsync + description: Calls for <code>vfs_fsync</code> + unit: "calls/s" + chart_type: line + dimensions: + - name: fsync + - name: filesystem.vfs_fsync_error + description: Fails to synchronize + unit: "calls/s" + chart_type: line + dimensions: + - name: fsync + - name: filesystem.vfs_open + description: Calls for <code>vfs_open</code> + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: filesystem.vfs_open_error + description: Fails to open a file + unit: "calls/s" + chart_type: line + dimensions: + - name: open + - name: filesystem.vfs_create + description: Calls for <code>vfs_create</code> + unit: "calls/s" + chart_type: line + dimensions: + - name: create + - name: filesystem.vfs_create_error + description: Fails to create a file. + unit: "calls/s" + chart_type: line + dimensions: + - name: create + - name: apps.file_deleted + description: Files deleted + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_write_call + description: Write to disk + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_write_error + description: Fails to write + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_read_call + description: Read from disk + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_read_error + description: Fails to read + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_write_bytes + description: Bytes written on disk + unit: "bytes/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_read_bytes + description: Bytes read on disk + unit: "bytes/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_fsync + description: Calls for <code>vfs_fsync</code> + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_fsync_error + description: Sync error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_open + description: Calls for <code>vfs_open</code> + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_open_error + description: Open error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_create + description: Calls for <code>vfs_create</code> + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - name: apps.vfs_create_error + description: Create error + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per app group + - meta: + plugin_name: ebpf.plugin + module_name: process + monitored_instance: + name: ebpf process + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: netdata.ebpf_aral_stat_size + description: Bytes allocated for ARAL. + unit: "bytes" + chart_type: stacked + dimensions: + - name: memory + - name: netdata.ebpf_aral_stat_alloc + description: Calls to allocate memory. + unit: "calls" + chart_type: stacked + dimensions: + - name: aral + - name: netdata.ebpf_threads + description: Threads info + unit: "threads" + chart_type: line + dimensions: + - name: total + - name: running + - name: netdata.ebpf_load_methods + description: Load info + unit: "methods" + chart_type: line + dimensions: + - name: legacy + - name: co-re + - name: netdata.ebpf_kernel_memory + description: Memory allocated for hash tables. + unit: "bytes" + chart_type: line + dimensions: + - name: memory_locked + - name: netdata.ebpf_hash_tables_count + description: Number of hash tables loaded + unit: "hash tables" + chart_type: line + dimensions: + - name: hash_table + - name: netdata.ebpf_aral_stat_size + description: Bytes allocated for ARAL + unit: "bytes" + chart_type: stacked + dimensions: + - name: memory + - name: netdata.ebpf_aral_stat_alloc + description: Calls to allocate memory + unit: "calls" + chart_type: stacked + dimensions: + - name: aral + - name: netdata.ebpf_aral_stat_size + description: Bytes allocated for ARAL. + unit: "bytes" + chart_type: stacked + dimensions: + - name: memory + - name: netdata.ebpf_aral_stat_alloc + description: Calls to allocate memory + unit: "calls" + chart_type: stacked + dimensions: + - name: aral diff --git a/collectors/freebsd.plugin/freebsd_sysctl.c b/collectors/freebsd.plugin/freebsd_sysctl.c index a154c635..c8aa5dad 100644 --- a/collectors/freebsd.plugin/freebsd_sysctl.c +++ b/collectors/freebsd.plugin/freebsd_sysctl.c @@ -459,9 +459,9 @@ int do_dev_cpu_temperature(int update_every, usec_t dt) { static RRDDIM **rd_pcpu_temperature; if (unlikely(number_of_cpus != old_number_of_cpus)) { - rd_pcpu_temperature = reallocz(rd_pcpu_temperature, sizeof(RRDDIM) * number_of_cpus); + rd_pcpu_temperature = reallocz(rd_pcpu_temperature, sizeof(RRDDIM *) * number_of_cpus); if (unlikely(number_of_cpus > old_number_of_cpus)) - memset(&rd_pcpu_temperature[old_number_of_cpus], 0, sizeof(RRDDIM) * (number_of_cpus - old_number_of_cpus)); + memset(&rd_pcpu_temperature[old_number_of_cpus], 0, sizeof(RRDDIM *) * (number_of_cpus - old_number_of_cpus)); } if (unlikely(!st)) { diff --git a/collectors/freebsd.plugin/multi_metadata.yaml b/collectors/freebsd.plugin/multi_metadata.yaml new file mode 100644 index 00000000..6928df64 --- /dev/null +++ b/collectors/freebsd.plugin/multi_metadata.yaml @@ -0,0 +1,3031 @@ +name: freebsd.plugin +modules: + - meta: + plugin_name: freebsd.plugin + module_name: vm.loadavg + monitored_instance: + name: freebsd vm.loadavg + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: load_cpu_number + link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf + metric: system.load + info: number of active CPU cores in the system + os: "linux" + - name: load_average_15 + link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf + metric: system.load + info: system fifteen-minute load average + os: "linux" + - name: load_average_5 + link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf + metric: system.load + info: system five-minute load average + os: "linux" + - name: load_average_1 + link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf + metric: system.load + info: system one-minute load average + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.load + description: System Load Average + unit: "load" + chart_type: line + dimensions: + - name: load1 + - name: load5 + - name: load15 + - meta: + plugin_name: freebsd.plugin + module_name: vm.vmtotal + monitored_instance: + name: freebsd vm.vmtotal + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: active_processes + link: https://github.com/netdata/netdata/blob/master/health/health.d/processes.conf + metric: system.active_processes + info: system process IDs (PID) space utilization + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.active_processes + description: System Active Processes + unit: "processes" + chart_type: line + dimensions: + - name: active + - name: system.processes + description: System Processes + unit: "processes" + chart_type: line + dimensions: + - name: running + - name: blocked + - name: mem.real + description: Total Real Memory In Use + unit: "MiB" + chart_type: area + dimensions: + - name: used + - meta: + plugin_name: freebsd.plugin + module_name: kern.cp_time + monitored_instance: + name: freebsd kern.cp_time + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 10min_cpu_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf + metric: system.cpu + info: average CPU utilization over the last 10 minutes (excluding iowait, nice and steal) + os: "linux" + - name: 10min_cpu_iowait + link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf + metric: system.cpu + info: average CPU iowait time over the last 10 minutes + os: "linux" + - name: 20min_steal_cpu + link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf + metric: system.cpu + info: average CPU steal time over the last 20 minutes + os: "linux" + - name: 10min_cpu_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf + metric: system.cpu + info: average CPU utilization over the last 10 minutes (excluding nice) + os: "freebsd" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.cpu + description: Total CPU utilization + unit: "percentage" + chart_type: stacked + dimensions: + - name: nice + - name: system + - name: user + - name: interrupt + - name: idle + - name: core + description: "" + labels: [] + metrics: + - name: cpu.cpu + description: Core utilization + unit: "percentage" + chart_type: stacked + dimensions: + - name: nice + - name: system + - name: user + - name: interrupt + - name: idle + - meta: + plugin_name: freebsd.plugin + module_name: dev.cpu.temperature + monitored_instance: + name: freebsd dev.cpu.temperature + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: cpu.temperature + description: Core temperature + unit: "Celsius" + chart_type: line + dimensions: + - name: a dimension per core + - meta: + plugin_name: freebsd.plugin + module_name: dev.cpu.0.freq + monitored_instance: + name: freebsd dev.cpu.0.freq + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: cpu.scaling_cur_freq + description: Current CPU Scaling Frequency + unit: "MHz" + chart_type: line + dimensions: + - name: frequency + - meta: + plugin_name: freebsd.plugin + module_name: hw.intrcnt + monitored_instance: + name: freebsd hw.intrcnt + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.intr + description: Total Hardware Interrupts + unit: "interrupts/s" + chart_type: line + dimensions: + - name: interrupts + - name: system.interrupts + description: System interrupts + unit: "interrupts/s" + chart_type: stacked + dimensions: + - name: a dimension per interrupt + - meta: + plugin_name: freebsd.plugin + module_name: vm.stats.sys.v_intr + monitored_instance: + name: freebsd vm.stats.sys.v_intr + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.dev_intr + description: Device Interrupts + unit: "interrupts/s" + chart_type: line + dimensions: + - name: interrupts + - meta: + plugin_name: freebsd.plugin + module_name: vm.stats.sys.v_soft + monitored_instance: + name: freebsd vm.stats.sys.v_soft + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.soft_intr + description: Software Interrupts + unit: "interrupts/s" + chart_type: line + dimensions: + - name: interrupts + - meta: + plugin_name: freebsd.plugin + module_name: vm.stats.sys.v_swtch + monitored_instance: + name: freebsd vm.stats.sys.v_swtch + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.ctxt + description: CPU Context Switches + unit: "context switches/s" + chart_type: line + dimensions: + - name: switches + - name: system.forks + description: Started Processes + unit: "processes/s" + chart_type: line + dimensions: + - name: started + - meta: + plugin_name: freebsd.plugin + module_name: vm.swap_info + monitored_instance: + name: freebsd vm.swap_info + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: used_swap + link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf + metric: system.swap + info: swap memory utilization + os: "linux freebsd" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.swap + description: System Swap + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: used + - meta: + plugin_name: freebsd.plugin + module_name: system.ram + monitored_instance: + name: freebsd system.ram + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: ram_in_use + link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf + metric: system.ram + info: system memory utilization + os: "linux" + - name: ram_in_use + link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf + metric: system.ram + info: system memory utilization + os: "freebsd" + - name: ram_available + link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf + metric: mem.available + info: percentage of estimated amount of RAM available for userspace processes, without causing swapping + os: "linux" + - name: ram_available + link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf + metric: mem.available + info: percentage of estimated amount of RAM available for userspace processes, without causing swapping + os: "freebsd" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.ram + description: System RAM + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: active + - name: inactive + - name: wired + - name: cache + - name: laundry + - name: buffers + - name: mem.available + description: Available RAM for applications + unit: "MiB" + chart_type: line + dimensions: + - name: avail + - meta: + plugin_name: freebsd.plugin + module_name: vm.stats.vm.v_swappgs + monitored_instance: + name: freebsd vm.stats.vm.v_swappgs + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 30min_ram_swapped_out + link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf + metric: system.swapio + info: percentage of the system RAM swapped in the last 30 minutes + os: "linux freebsd" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.swapio + description: Swap I/O + unit: "KiB/s" + chart_type: area + dimensions: + - name: io + - name: out + - meta: + plugin_name: freebsd.plugin + module_name: vm.stats.vm.v_pgfaults + monitored_instance: + name: freebsd vm.stats.vm.v_pgfaults + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: mem.pgfaults + description: Memory Page Faults + unit: "page faults/s" + chart_type: line + dimensions: + - name: memory + - name: io_requiring + - name: cow + - name: cow_optimized + - name: in_transit + - meta: + plugin_name: freebsd.plugin + module_name: kern.ipc.sem + monitored_instance: + name: freebsd kern.ipc.sem + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: semaphores_used + link: https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf + metric: system.ipc_semaphores + info: IPC semaphore utilization + os: "linux" + - name: semaphore_arrays_used + link: https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf + metric: system.ipc_semaphore_arrays + info: IPC semaphore arrays utilization + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.ipc_semaphores + description: IPC Semaphores + unit: "semaphores" + chart_type: area + dimensions: + - name: semaphores + - name: system.ipc_semaphore_arrays + description: IPC Semaphore Arrays + unit: "arrays" + chart_type: area + dimensions: + - name: arrays + - meta: + plugin_name: freebsd.plugin + module_name: kern.ipc.shm + monitored_instance: + name: freebsd kern.ipc.shm + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.ipc_shared_mem_segs + description: IPC Shared Memory Segments + unit: "segments" + chart_type: area + dimensions: + - name: segments + - name: system.ipc_shared_mem_size + description: IPC Shared Memory Segments Size + unit: "KiB" + chart_type: area + dimensions: + - name: allocated + - meta: + plugin_name: freebsd.plugin + module_name: kern.ipc.msq + monitored_instance: + name: freebsd kern.ipc.msq + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.ipc_msq_queues + description: Number of IPC Message Queues + unit: "queues" + chart_type: area + dimensions: + - name: queues + - name: system.ipc_msq_messages + description: Number of Messages in IPC Message Queues + unit: "messages" + chart_type: area + dimensions: + - name: messages + - name: system.ipc_msq_size + description: Size of IPC Message Queues + unit: "bytes" + chart_type: line + dimensions: + - name: allocated + - name: used + - meta: + plugin_name: freebsd.plugin + module_name: uptime + monitored_instance: + name: freebsd uptime + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.uptime + description: System Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: uptime + - meta: + plugin_name: freebsd.plugin + module_name: net.isr + monitored_instance: + name: freebsd net.isr + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 1min_netdev_backlog_exceeded + link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf + metric: system.softnet_stat + info: average number of dropped packets in the last minute due to exceeded net.core.netdev_max_backlog + os: "linux" + - name: 1min_netdev_budget_ran_outs + link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf + metric: system.softnet_stat + info: average number of times ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs with work remaining over the last minute (this can be a cause for dropped packets) + os: "linux" + - name: 10min_netisr_backlog_exceeded + link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf + metric: system.softnet_stat + info: average number of drops in the last minute due to exceeded sysctl net.route.netisr_maxqlen (this can be a cause for dropped packets) + os: "freebsd" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.softnet_stat + description: System softnet_stat + unit: "events/s" + chart_type: line + dimensions: + - name: dispatched + - name: hybrid_dispatched + - name: qdrops + - name: queued + - name: core + description: "" + labels: [] + metrics: + - name: cpu.softnet_stat + description: Per CPU netisr statistics + unit: "events/s" + chart_type: line + dimensions: + - name: dispatched + - name: hybrid_dispatched + - name: qdrops + - name: queued + - meta: + plugin_name: freebsd.plugin + module_name: devstat + monitored_instance: + name: freebsd devstat + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 10min_disk_utilization + link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf + metric: disk.util + info: average percentage of time ${label:device} disk was busy over the last 10 minutes + os: "linux freebsd" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.io + description: Disk I/O + unit: "KiB/s" + chart_type: area + dimensions: + - name: io + - name: out + - name: disk + description: "" + labels: [] + metrics: + - name: disk.io + description: Disk I/O Bandwidth + unit: "KiB/s" + chart_type: area + dimensions: + - name: reads + - name: writes + - name: frees + - name: disk.ops + description: Disk Completed I/O Operations + unit: "operations/s" + chart_type: line + dimensions: + - name: reads + - name: writes + - name: other + - name: frees + - name: disk.qops + description: Disk Current I/O Operations + unit: "operations" + chart_type: line + dimensions: + - name: operations + - name: disk.util + description: Disk Utilization Time + unit: "% of time working" + chart_type: line + dimensions: + - name: utilization + - name: disk.iotime + description: Disk Total I/O Time + unit: "milliseconds/s" + chart_type: line + dimensions: + - name: reads + - name: writes + - name: other + - name: frees + - name: disk.await + description: Average Completed I/O Operation Time + unit: "milliseconds/operation" + chart_type: line + dimensions: + - name: reads + - name: writes + - name: other + - name: frees + - name: disk.avgsz + description: Average Completed I/O Operation Bandwidth + unit: "KiB/operation" + chart_type: area + dimensions: + - name: reads + - name: writes + - name: frees + - name: disk.svctm + description: Average Service Time + unit: "milliseconds/operation" + chart_type: line + dimensions: + - name: svctm + - meta: + plugin_name: freebsd.plugin + module_name: net.inet.tcp.states + monitored_instance: + name: freebsd net.inet.tcp.states + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: tcp_connections + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_conn.conf + metric: ipv4.tcpsock + info: IPv4 TCP connections utilization + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ipv4.tcpsock + description: IPv4 TCP Connections + unit: "active connections" + chart_type: line + dimensions: + - name: connections + - meta: + plugin_name: freebsd.plugin + module_name: net.inet.tcp.stats + monitored_instance: + name: freebsd net.inet.tcp.stats + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 1m_ipv4_tcp_resets_sent + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf + metric: ipv4.tcphandshake + info: average number of sent TCP RESETS over the last minute + os: "linux" + - name: 10s_ipv4_tcp_resets_sent + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf + metric: ipv4.tcphandshake + info: average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has crashed. Netdata will not send a clear notification for this alarm. + os: "linux" + - name: 1m_ipv4_tcp_resets_received + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf + metric: ipv4.tcphandshake + info: average number of received TCP RESETS over the last minute + os: "linux freebsd" + - name: 10s_ipv4_tcp_resets_received + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf + metric: ipv4.tcphandshake + info: average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed. Netdata will not send a clear notification for this alarm. + os: "linux freebsd" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ipv4.tcppackets + description: IPv4 TCP Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv4.tcperrors + description: IPv4 TCP Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: InErrs + - name: InCsumErrors + - name: RetransSegs + - name: ipv4.tcphandshake + description: IPv4 TCP Handshake Issues + unit: "events/s" + chart_type: line + dimensions: + - name: EstabResets + - name: ActiveOpens + - name: PassiveOpens + - name: AttemptFails + - name: ipv4.tcpconnaborts + description: TCP Connection Aborts + unit: "connections/s" + chart_type: line + dimensions: + - name: baddata + - name: userclosed + - name: nomemory + - name: timeout + - name: linger + - name: ipv4.tcpofo + description: TCP Out-Of-Order Queue + unit: "packets/s" + chart_type: line + dimensions: + - name: inqueue + - name: ipv4.tcpsyncookies + description: TCP SYN Cookies + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: failed + - name: ipv4.tcplistenissues + description: TCP Listen Socket Issues + unit: "packets/s" + chart_type: line + dimensions: + - name: overflows + - name: ipv4.ecnpkts + description: IPv4 ECN Statistics + unit: "packets/s" + chart_type: line + dimensions: + - name: InCEPkts + - name: InECT0Pkts + - name: InECT1Pkts + - name: OutECT0Pkts + - name: OutECT1Pkts + - meta: + plugin_name: freebsd.plugin + module_name: net.inet.udp.stats + monitored_instance: + name: freebsd net.inet.udp.stats + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 1m_ipv4_udp_receive_buffer_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf + metric: ipv4.udperrors + info: average number of UDP receive buffer errors over the last minute + os: "linux freebsd" + - name: 1m_ipv4_udp_send_buffer_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf + metric: ipv4.udperrors + info: average number of UDP send buffer errors over the last minute + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ipv4.udppackets + description: IPv4 UDP Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv4.udperrors + description: IPv4 UDP Errors + unit: "events/s" + chart_type: line + dimensions: + - name: InErrors + - name: NoPorts + - name: RcvbufErrors + - name: InCsumErrors + - name: IgnoredMulti + - meta: + plugin_name: freebsd.plugin + module_name: net.inet.icmp.stats + monitored_instance: + name: freebsd net.inet.icmp.stats + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ipv4.icmp + description: IPv4 ICMP Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv4.icmp_errors + description: IPv4 ICMP Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: InErrors + - name: OutErrors + - name: InCsumErrors + - name: ipv4.icmpmsg + description: IPv4 ICMP Messages + unit: "packets/s" + chart_type: line + dimensions: + - name: InEchoReps + - name: OutEchoReps + - name: InEchos + - name: OutEchos + - meta: + plugin_name: freebsd.plugin + module_name: net.inet.ip.stats + monitored_instance: + name: freebsd net.inet.ip.stats + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ipv4.packets + description: IPv4 Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: forwarded + - name: delivered + - name: ipv4.fragsout + description: IPv4 Fragments Sent + unit: "packets/s" + chart_type: line + dimensions: + - name: ok + - name: failed + - name: created + - name: ipv4.fragsin + description: IPv4 Fragments Reassembly + unit: "packets/s" + chart_type: line + dimensions: + - name: ok + - name: failed + - name: all + - name: ipv4.errors + description: IPv4 Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: InDiscards + - name: OutDiscards + - name: InHdrErrors + - name: OutNoRoutes + - name: InAddrErrors + - name: InUnknownProtos + - meta: + plugin_name: freebsd.plugin + module_name: net.inet6.ip6.stats + monitored_instance: + name: freebsd net.inet6.ip6.stats + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ipv6.packets + description: IPv6 Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: forwarded + - name: delivers + - name: ipv6.fragsout + description: IPv6 Fragments Sent + unit: "packets/s" + chart_type: line + dimensions: + - name: ok + - name: failed + - name: all + - name: ipv6.fragsin + description: IPv6 Fragments Reassembly + unit: "packets/s" + chart_type: line + dimensions: + - name: ok + - name: failed + - name: timeout + - name: all + - name: ipv6.errors + description: IPv6 Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: InDiscards + - name: OutDiscards + - name: InHdrErrors + - name: InAddrErrors + - name: InTruncatedPkts + - name: InNoRoutes + - name: OutNoRoutes + - meta: + plugin_name: freebsd.plugin + module_name: net.inet6.icmp6.stats + monitored_instance: + name: freebsd net.inet6.icmp6.stats + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ipv6.icmp + description: IPv6 ICMP Messages + unit: "messages/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv6.icmpredir + description: IPv6 ICMP Redirects + unit: "redirects/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv6.icmperrors + description: IPv6 ICMP Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: InErrors + - name: OutErrors + - name: InCsumErrors + - name: InDestUnreachs + - name: InPktTooBigs + - name: InTimeExcds + - name: InParmProblems + - name: OutDestUnreachs + - name: OutTimeExcds + - name: OutParmProblems + - name: ipv6.icmpechos + description: IPv6 ICMP Echo + unit: "messages/s" + chart_type: line + dimensions: + - name: InEchos + - name: OutEchos + - name: InEchoReplies + - name: OutEchoReplies + - name: ipv6.icmprouter + description: IPv6 Router Messages + unit: "messages/s" + chart_type: line + dimensions: + - name: InSolicits + - name: OutSolicits + - name: InAdvertisements + - name: OutAdvertisements + - name: ipv6.icmpneighbor + description: IPv6 Neighbor Messages + unit: "messages/s" + chart_type: line + dimensions: + - name: InSolicits + - name: OutSolicits + - name: InAdvertisements + - name: OutAdvertisements + - name: ipv6.icmptypes + description: IPv6 ICMP Types + unit: "messages/s" + chart_type: line + dimensions: + - name: InType1 + - name: InType128 + - name: InType129 + - name: InType136 + - name: OutType1 + - name: OutType128 + - name: OutType129 + - name: OutType133 + - name: OutType135 + - name: OutType143 + - meta: + plugin_name: freebsd.plugin + module_name: ipfw + monitored_instance: + name: freebsd ipfw + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ipfw.mem + description: Memory allocated by rules + unit: "bytes" + chart_type: stacked + dimensions: + - name: dynamic + - name: static + - name: ipfw.packets + description: Packets + unit: "packets/s" + chart_type: stacked + dimensions: + - name: a dimension per static rule + - name: ipfw.bytes + description: Bytes + unit: "bytes/s" + chart_type: stacked + dimensions: + - name: a dimension per static rule + - name: ipfw.active + description: Active rules + unit: "rules" + chart_type: stacked + dimensions: + - name: a dimension per dynamic rule + - name: ipfw.expired + description: Expired rules + unit: "rules" + chart_type: stacked + dimensions: + - name: a dimension per dynamic rule + - meta: + plugin_name: freebsd.plugin + module_name: getifaddrs + monitored_instance: + name: freebsd getifaddrs + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: interface_speed + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.net + info: network interface ${label:device} current speed + os: "*" + - name: 1m_received_traffic_overflow + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.net + info: average inbound utilization for the network interface ${label:device} over the last minute + os: "linux" + - name: 1m_sent_traffic_overflow + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.net + info: average outbound utilization for the network interface ${label:device} over the last minute + os: "linux" + - name: inbound_packets_dropped_ratio + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes + os: "linux" + - name: outbound_packets_dropped_ratio + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes + os: "linux" + - name: wifi_inbound_packets_dropped_ratio + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes + os: "linux" + - name: wifi_outbound_packets_dropped_ratio + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes + os: "linux" + - name: 1m_received_packets_rate + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: average number of packets received by the network interface ${label:device} over the last minute + os: "linux freebsd" + - name: 10s_received_packets_storm + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute + os: "linux freebsd" + - name: interface_inbound_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.errors + info: number of inbound errors for the network interface ${label:device} in the last 10 minutes + os: "freebsd" + - name: interface_outbound_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.errors + info: number of outbound errors for the network interface ${label:device} in the last 10 minutes + os: "freebsd" + - name: inbound_packets_dropped + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.drops + info: number of inbound dropped packets for the network interface ${label:device} in the last 10 minutes + os: "linux" + - name: outbound_packets_dropped + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.drops + info: number of outbound dropped packets for the network interface ${label:device} in the last 10 minutes + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.net + description: Network Traffic + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: system.packets + description: Network Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: multicast_received + - name: multicast_sent + - name: system.ipv4 + description: IPv4 Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: system.ipv6 + description: IPv6 Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: network device + description: "" + labels: [] + metrics: + - name: net.net + description: Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: net.packets + description: Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: multicast_received + - name: multicast_sent + - name: net.errors + description: Interface Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: inbound + - name: outbound + - name: net.drops + description: Interface Drops + unit: "drops/s" + chart_type: line + dimensions: + - name: inbound + - name: outbound + - name: net.events + description: Network Interface Events + unit: "events/s" + chart_type: line + dimensions: + - name: collisions + - meta: + plugin_name: freebsd.plugin + module_name: getmntinfo + monitored_instance: + name: freebsd getmntinfo + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: disk_space_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf + metric: disk.space + info: disk ${label:mount_point} space utilization + os: "linux freebsd" + - name: disk_inode_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf + metric: disk.inodes + info: disk ${label:mount_point} inode utilization + os: "linux freebsd" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: mount point + description: "" + labels: [] + metrics: + - name: disk.space + description: Disk Space Usage for {mounted dir} [{mounted filesystem}] + unit: "GiB" + chart_type: stacked + dimensions: + - name: avail + - name: used + - name: reserved_for_root + - name: disk.inodes + description: Disk Files (inodes) Usage for {mounted dir} [{mounted filesystem}] + unit: "inodes" + chart_type: stacked + dimensions: + - name: avail + - name: used + - name: reserved_for_root + - meta: + plugin_name: freebsd.plugin + module_name: zfs + monitored_instance: + name: freebsd zfs + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: zfs_memory_throttle + link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf + metric: zfs.memory_ops + info: number of times ZFS had to limit the ARC growth in the last 10 minutes + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: zfs.arc_size + description: ZFS ARC Size + unit: "MiB" + chart_type: area + dimensions: + - name: arcsz + - name: target + - name: min + - name: max + - name: zfs.l2_size + description: ZFS L2 ARC Size + unit: "MiB" + chart_type: area + dimensions: + - name: actual + - name: size + - name: zfs.reads + description: ZFS Reads + unit: "reads/s" + chart_type: area + dimensions: + - name: arc + - name: demand + - name: prefetch + - name: metadata + - name: l2 + - name: zfs.bytes + description: ZFS ARC L2 Read/Write Rate + unit: "KiB/s" + chart_type: area + dimensions: + - name: read + - name: write + - name: zfs.hits + description: ZFS ARC Hits + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.hits_rate + description: ZFS ARC Hits Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.dhits + description: ZFS Demand Hits + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.dhits_rate + description: ZFS Demand Hits Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.phits + description: ZFS Prefetch Hits + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.phits_rate + description: ZFS Prefetch Hits Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.mhits + description: ZFS Metadata Hits + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.mhits_rate + description: ZFS Metadata Hits Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.l2hits + description: ZFS L2 Hits + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.l2hits_rate + description: ZFS L2 Hits Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.list_hits + description: ZFS List Hits + unit: "hits/s" + chart_type: area + dimensions: + - name: mfu + - name: mfu_ghost + - name: mru + - name: mru_ghost + - name: zfs.arc_size_breakdown + description: ZFS ARC Size Breakdown + unit: "percentage" + chart_type: stacked + dimensions: + - name: recent + - name: frequent + - name: zfs.memory_ops + description: ZFS Memory Operations + unit: "operations/s" + chart_type: line + dimensions: + - name: throttled + - name: zfs.important_ops + description: ZFS Important Operations + unit: "operations/s" + chart_type: line + dimensions: + - name: evict_skip + - name: deleted + - name: mutex_miss + - name: hash_collisions + - name: zfs.actual_hits + description: ZFS Actual Cache Hits + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.actual_hits_rate + description: ZFS Actual Cache Hits Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.demand_data_hits + description: ZFS Data Demand Efficiency + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.demand_data_hits_rate + description: ZFS Data Demand Efficiency Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.prefetch_data_hits + description: ZFS Data Prefetch Efficiency + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.prefetch_data_hits_rate + description: ZFS Data Prefetch Efficiency Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.hash_elements + description: ZFS ARC Hash Elements + unit: "elements" + chart_type: line + dimensions: + - name: current + - name: max + - name: zfs.hash_chains + description: ZFS ARC Hash Chains + unit: "chains" + chart_type: line + dimensions: + - name: current + - name: max + - name: zfs.trim_bytes + description: Successfully TRIMmed bytes + unit: "bytes" + chart_type: line + dimensions: + - name: TRIMmed + - name: zfs.trim_requests + description: TRIM requests + unit: "requests" + chart_type: line + dimensions: + - name: successful + - name: failed + - name: unsupported diff --git a/collectors/freebsd.plugin/plugin_freebsd.c b/collectors/freebsd.plugin/plugin_freebsd.c index e47b224c..976fe26f 100644 --- a/collectors/freebsd.plugin/plugin_freebsd.c +++ b/collectors/freebsd.plugin/plugin_freebsd.c @@ -121,7 +121,7 @@ void *freebsd_main(void *ptr) if (unlikely(!pm->enabled)) continue; - debug(D_PROCNETDEV_LOOP, "FREEBSD calling %s.", pm->name); + netdata_log_debug(D_PROCNETDEV_LOOP, "FREEBSD calling %s.", pm->name); worker_is_busy(i); pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt); diff --git a/collectors/freeipmi.plugin/README.md b/collectors/freeipmi.plugin/README.md index 47decd7f..5a9fd93c 100644 --- a/collectors/freeipmi.plugin/README.md +++ b/collectors/freeipmi.plugin/README.md @@ -11,7 +11,10 @@ learn_rel_path: "Integrations/Monitor/Devices" Netdata has a [freeipmi](https://www.gnu.org/software/freeipmi/) plugin. -> FreeIPMI provides in-band and out-of-band IPMI software based on the IPMI v1.5/2.0 specification. The IPMI specification defines a set of interfaces for platform management and is implemented by a number vendors for system management. The features of IPMI that most users will be interested in are sensor monitoring, system event monitoring, power control, and serial-over-LAN (SOL). +> FreeIPMI provides in-band and out-of-band IPMI software based on the IPMI v1.5/2.0 specification. The IPMI +> specification defines a set of interfaces for platform management and is implemented by a number vendors for system +> management. The features of IPMI that most users will be interested in are sensor monitoring, system event monitoring, +> power control, and serial-over-LAN (SOL). ## Installing the FreeIPMI plugin @@ -22,7 +25,8 @@ installed automatically due to the large number of dependencies it requires. When using a static build of Netdata, the FreeIPMI plugin will be included and installed automatically, though you will still need to have FreeIPMI installed on your system to be able to use the plugin. -When using a local build of Netdata, you need to ensure that the FreeIPMI development packages (typically called `libipmimonitoring-dev`, `libipmimonitoring-devel`, or `freeipmi-devel`) are installed when building Netdata. +When using a local build of Netdata, you need to ensure that the FreeIPMI development packages (typically +called `libipmimonitoring-dev`, `libipmimonitoring-devel`, or `freeipmi-devel`) are installed when building Netdata. ### Special Considerations @@ -30,7 +34,9 @@ Accessing IPMI requires root access, so the FreeIPMI plugin is automatically ins FreeIPMI does not work correctly on IBM POWER systems, thus Netdata’s FreeIPMI plugin is not usable on such systems. -If you have not previously used IPMI on your system, you will probably need to run the `ipmimonitoring` command as root to initiailze IPMI settings so that the Netdata plugin works correctly. It should return information about available seensors on the system. +If you have not previously used IPMI on your system, you will probably need to run the `ipmimonitoring` command as root +to initiailze IPMI settings so that the Netdata plugin works correctly. It should return information about available +seensors on the system. In some distributions `libipmimonitoring.pc` is located in a non-standard directory, which can cause building the plugin to fail when building Netdata from source. In that case you @@ -38,37 +44,68 @@ should find the file and link it to the standard pkg-config directory. Usually, /usr/lib/$(uname -m)-linux-gnu/pkgconfig/libipmimonitoring.pc/libipmimonitoring.pc /usr/lib/pkgconfig/libipmimonitoring.pc` resolves this issue. -## Netdata use +## Metrics -The plugin creates (up to) 8 charts, based on the information collected from IPMI: +The plugin does a speed test when it starts, to find out the duration needed by the IPMI processor to respond. Depending +on the speed of your IPMI processor, charts may need several seconds to show up on the dashboard. -1. number of sensors by state -2. number of events in SEL -3. Temperatures CELSIUS -4. Temperatures FAHRENHEIT -5. Voltages -6. Currents -7. Power -8. Fans +Metrics grouped by *scope*. -It also adds 2 alarms: +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. -1. Sensors in non-nominal state (i.e. warning and critical) -2. SEL is non empty +### global -![image](https://cloud.githubusercontent.com/assets/2662304/23674138/88926a20-037d-11e7-89c0-20e74ee10cd1.png) +These metrics refer to the monitored host. -The plugin does a speed test when it starts, to find out the duration needed by the IPMI processor to respond. Depending on the speed of your IPMI processor, charts may need several seconds to show up on the dashboard. +This scope has no labels. -## `freeipmi.plugin` configuration +Metrics: + +| Metric | Dimensions | Unit | +|----------|:----------:|:------:| +| ipmi.sel | events | events | + +### sensor + +These metrics refer to the sensor. + +Labels: + +| Label | Description | +|-----------|-----------------------------------------------------------------------------------------------------------------| +| sensor | Sensor name. Same value as the "Name" column in the `ipmi-sensors` output. | +| type | Sensor type. Same value as the "Type" column in the `ipmi-sensors` output. | +| component | General sensor component. Identified by Netdata based on sensor name and type (e.g. System, Processor, Memory). | + +Metrics: + +| Metric | Dimensions | Unit | +|-----------------------------|:-----------------------------------:|:----------:| +| ipmi.sensor_state | nominal, critical, warning, unknown | state | +| ipmi.sensor_temperature_c | temperature | Celsius | +| ipmi.sensor_temperature_f | temperature | Fahrenheit | +| ipmi.sensor_voltage | voltage | Volts | +| ipmi.sensor_ampere | ampere | Amps | +| ipmi.sensor_fan_speed | rotations | RPM | +| ipmi.sensor_power | power | Watts | +| ipmi.sensor_reading_percent | percentage | % | + +## Alarms + +There are 2 alarms: + +- The sensor is in a warning or critical state. +- System Event Log (SEL) is non-empty. + +## Configuration The plugin supports a few options. To see them, run: ```text -# /usr/libexec/netdata/plugins.d/freeipmi.plugin -h +# ./freeipmi.plugin --help - netdata freeipmi.plugin 1.8.0-546-g72ce5d6b_rolling - Copyright (C) 2016-2017 Costa Tsaousis <costa@tsaousis.gr> + netdata freeipmi.plugin v1.40.0-137-gf162c25bd + Copyright (C) 2023 Netdata Inc. Released under GNU General Public License v3 or later. All rights reserved. @@ -86,17 +123,49 @@ The plugin supports a few options. To see them, run: no-sel enable/disable SEL collection default: enabled + reread-sdr-cache re-read SDR cache on every iteration + default: disabled + + interpret-oem-data attempt to parse OEM data + default: disabled + + assume-system-event-record + tread illegal SEL events records as normal + default: disabled + + ignore-non-interpretable-sensors + do not read sensors that cannot be interpreted + default: disabled + + bridge-sensors bridge sensors not owned by the BMC + default: disabled + + shared-sensors enable shared sensors, if found + default: disabled + + no-discrete-reading do not read sensors that their event/reading type code is invalid + default: enabled + + ignore-scanning-disabled + Ignore the scanning bit and read sensors no matter what + default: disabled + + assume-bmc-owner assume the BMC is the sensor owner no matter what + (usually bridging is required too) + default: disabled + hostname HOST username USER password PASS connect to remote IPMI host default: local IPMI processor + no-auth-code-check noauthcodecheck don't check the authentication codes returned - driver-type IPMIDRIVER - Specify the driver type to use instead of doing an auto selection. - The currently available outofband drivers are LAN and LAN_2_0, - which perform IPMI 1.5 and IPMI 2.0 respectively. + driver-type IPMIDRIVER + Specify the driver type to use instead of doing an auto selection. + The currently available outofband drivers are LAN and LAN_2_0, + which perform IPMI 1.5 and IPMI 2.0 respectively. The currently available inband drivers are KCS, SSIF, OPENIPMI and SUNBMC. sdr-cache-dir PATH directory for SDR cache files @@ -105,9 +174,15 @@ The plugin supports a few options. To see them, run: sensor-config-file FILE filename to read sensor configuration default: system default + sel-config-file FILE filename to read sel configuration + default: system default + ignore N1,N2,N3,... sensor IDs to ignore default: none + ignore-status N1,N2,N3,... sensor IDs to ignore status (nominal/warning/critical) + default: none + -v -V version print version and exit @@ -131,13 +206,17 @@ You can set these options in `/etc/netdata/netdata.conf` at this section: command options = ``` -Append to `command options =` the settings you need. The minimum `update every` is 5 (enforced internally by the plugin). IPMI is slow and CPU hungry. So, once every 5 seconds is pretty acceptable. +Append to `command options =` the settings you need. The minimum `update every` is 5 (enforced internally by the +plugin). IPMI is slow and CPU hungry. So, once every 5 seconds is pretty acceptable. ## Ignoring specific sensors -Specific sensor IDs can be excluded from freeipmi tools by editing `/etc/freeipmi/freeipmi.conf` and setting the IDs to be ignored at `ipmi-sensors-exclude-record-ids`. **However this file is not used by `libipmimonitoring`** (the library used by Netdata's `freeipmi.plugin`). +Specific sensor IDs can be excluded from freeipmi tools by editing `/etc/freeipmi/freeipmi.conf` and setting the IDs to +be ignored at `ipmi-sensors-exclude-record-ids`. **However this file is not used by `libipmimonitoring`** (the library +used by Netdata's `freeipmi.plugin`). -So, `freeipmi.plugin` supports the option `ignore` that accepts a comma separated list of sensor IDs to ignore. To configure it, edit `/etc/netdata/netdata.conf` and set: +So, `freeipmi.plugin` supports the option `ignore` that accepts a comma separated list of sensor IDs to ignore. To +configure it, edit `/etc/netdata/netdata.conf` and set: ``` [plugin:freeipmi] @@ -196,7 +275,9 @@ You can also permanently set the above setting by creating the file `/etc/modpro options ipmi_si kipmid_max_busy_us=10 ``` -This instructs the kernel IPMI module to pause for a tick between checking IPMI. Querying IPMI will be a lot slower now (e.g. several seconds for IPMI to respond), but `kipmi` will not use any noticeable CPU. You can also use a higher number (this is the number of microseconds to poll IPMI for a response, before waiting for a tick). +This instructs the kernel IPMI module to pause for a tick between checking IPMI. Querying IPMI will be a lot slower +now (e.g. several seconds for IPMI to respond), but `kipmi` will not use any noticeable CPU. You can also use a higher +number (this is the number of microseconds to poll IPMI for a response, before waiting for a tick). If you need to disable IPMI for Netdata, edit `/etc/netdata/netdata.conf` and set: diff --git a/collectors/freeipmi.plugin/freeipmi_plugin.c b/collectors/freeipmi.plugin/freeipmi_plugin.c index a2251891..bfd867cc 100644 --- a/collectors/freeipmi.plugin/freeipmi_plugin.c +++ b/collectors/freeipmi.plugin/freeipmi_plugin.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later /* * netdata freeipmi.plugin - * Copyright (C) 2017 Costa Tsaousis + * Copyright (C) 2023 Netdata Inc. * GPL v3+ * * Based on: @@ -15,9 +15,59 @@ * UCRL-CODE-222073 */ +// ---------------------------------------------------------------------------- +// BEGIN NETDATA CODE + +// #define NETDATA_TIMING_REPORT 1 #include "libnetdata/libnetdata.h" #include "libnetdata/required_dummies.h" +// component names, based on our patterns +#define NETDATA_SENSOR_COMPONENT_MEMORY_MODULE "Memory Module" +#define NETDATA_SENSOR_COMPONENT_MEMORY "Memory" +#define NETDATA_SENSOR_COMPONENT_PROCESSOR "Processor" +#define NETDATA_SENSOR_COMPONENT_IPU "Image Processor" +#define NETDATA_SENSOR_COMPONENT_STORAGE "Storage" +#define NETDATA_SENSOR_COMPONENT_MOTHERBOARD "Motherboard" +#define NETDATA_SENSOR_COMPONENT_NETWORK "Network" +#define NETDATA_SENSOR_COMPONENT_POWER_SUPPLY "Power Supply" +#define NETDATA_SENSOR_COMPONENT_SYSTEM "System" +#define NETDATA_SENSOR_COMPONENT_PERIPHERAL "Peripheral" + +// netdata plugin defaults +#define SENSORS_DICT_KEY_SIZE 2048 // the max size of the key for the dictionary of sensors +#define SPEED_TEST_ITERATIONS 5 // how many times to repeat data collection to decide latency +#define IPMI_SENSORS_DASHBOARD_PRIORITY 90000 // the priority of the sensors charts on the dashboard +#define IPMI_SEL_DASHBOARD_PRIORITY 99000 // the priority of the SEL events chart on the dashboard +#define IPMI_SENSORS_MIN_UPDATE_EVERY 5 // the minimum data collection frequency for sensors +#define IPMI_SEL_MIN_UPDATE_EVERY 30 // the minimum data collection frequency for SEL events +#define IPMI_ENABLE_SEL_BY_DEFAULT true // true/false, to enable/disable SEL by default +#define IPMI_RESTART_EVERY_SECONDS 14400 // restart the plugin every this many seconds + // this is to prevent possible bugs/leaks in ipmi libraries +#define IPMI_RESTART_IF_SENSORS_DONT_ITERATE_EVERY_SECONDS (10 * 60) // stale data collection detection time + +// forward definition of functions and structures +struct netdata_ipmi_state; +static void netdata_update_ipmi_sensor_reading( + int record_id + , int sensor_number + , int sensor_type + , int sensor_state + , int sensor_units + , int sensor_reading_type + , char *sensor_name + , void *sensor_reading + , int event_reading_type_code + , int sensor_bitmask_type + , int sensor_bitmask + , char **sensor_bitmask_strings + , struct netdata_ipmi_state *state +); +static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *state, uint32_t events); + +// END NETDATA CODE +// ---------------------------------------------------------------------------- + #include <stdio.h> #include <stdlib.h> #include <stdint.h> @@ -27,22 +77,9 @@ #include <unistd.h> #include <sys/time.h> -#define IPMI_PARSE_DEVICE_LAN_STR "lan" -#define IPMI_PARSE_DEVICE_LAN_2_0_STR "lan_2_0" -#define IPMI_PARSE_DEVICE_LAN_2_0_STR2 "lan20" -#define IPMI_PARSE_DEVICE_LAN_2_0_STR3 "lan_20" -#define IPMI_PARSE_DEVICE_LAN_2_0_STR4 "lan2_0" -#define IPMI_PARSE_DEVICE_LAN_2_0_STR5 "lanplus" -#define IPMI_PARSE_DEVICE_KCS_STR "kcs" -#define IPMI_PARSE_DEVICE_SSIF_STR "ssif" -#define IPMI_PARSE_DEVICE_OPENIPMI_STR "openipmi" -#define IPMI_PARSE_DEVICE_OPENIPMI_STR2 "open" -#define IPMI_PARSE_DEVICE_SUNBMC_STR "sunbmc" -#define IPMI_PARSE_DEVICE_SUNBMC_STR2 "bmc" -#define IPMI_PARSE_DEVICE_INTELDCMI_STR "inteldcmi" - #include <ipmi_monitoring.h> #include <ipmi_monitoring_bitmasks.h> +#include <ipmi_monitoring_offsets.h> /* Communication Configuration - Initialize accordingly */ @@ -50,53 +87,38 @@ char *hostname = NULL; /* In-band Communication Configuration */ -int driver_type = -1; // IPMI_MONITORING_DRIVER_TYPE_KCS; /* or -1 for default */ -int disable_auto_probe = 0; /* probe for in-band device */ -unsigned int driver_address = 0; /* not used if probing */ -unsigned int register_spacing = 0; /* not used if probing */ -char *driver_device = NULL; /* not used if probing */ +int driver_type = -1; // IPMI_MONITORING_DRIVER_TYPE_KCS, etc. or -1 for default +int disable_auto_probe = 0; /* probe for in-band device */ +unsigned int driver_address = 0; /* not used if probing */ +unsigned int register_spacing = 0; /* not used if probing */ +char *driver_device = NULL; /* not used if probing */ /* Out-of-band Communication Configuration */ -int protocol_version = -1; //IPMI_MONITORING_PROTOCOL_VERSION_1_5; /* or -1 for default */ -char *username = "foousername"; -char *password = "foopassword"; -unsigned char *ipmi_k_g = NULL; -unsigned int ipmi_k_g_len = 0; -int privilege_level = -1; // IPMI_MONITORING_PRIVILEGE_LEVEL_USER; /* or -1 for default */ -int authentication_type = -1; // IPMI_MONITORING_AUTHENTICATION_TYPE_MD5; /* or -1 for default */ -int cipher_suite_id = 0; /* or -1 for default */ +int protocol_version = -1; // IPMI_MONITORING_PROTOCOL_VERSION_1_5, etc. or -1 for default +char *username = ""; +char *password = ""; +unsigned char *k_g = NULL; +unsigned int k_g_len = 0; +int privilege_level = -1; // IPMI_MONITORING_PRIVILEGE_LEVEL_USER, etc. or -1 for default +int authentication_type = -1; // IPMI_MONITORING_AUTHENTICATION_TYPE_MD5, etc. or -1 for default +int cipher_suite_id = -1; /* 0 or -1 for default */ int session_timeout = 0; /* 0 for default */ int retransmission_timeout = 0; /* 0 for default */ /* Workarounds - specify workaround flags if necessary */ unsigned int workaround_flags = 0; -/* Initialize w/ record id numbers to only monitor specific record ids */ -unsigned int record_ids[] = {0}; -unsigned int record_ids_length = 0; - -/* Initialize w/ sensor types to only monitor specific sensor types - * see ipmi_monitoring.h sensor types list. - */ -unsigned int sensor_types[] = {0}; -unsigned int sensor_types_length = 0; - /* Set to an appropriate alternate if desired */ char *sdr_cache_directory = "/tmp"; +char *sdr_sensors_cache_format = ".netdata-freeipmi-sensors-%H-on-%L.sdr"; +char *sdr_sel_cache_format = ".netdata-freeipmi-sel-%H-on-%L.sdr"; char *sensor_config_file = NULL; +char *sel_config_file = NULL; -/* Set to 1 or 0 to enable these sensor reading flags - * - See ipmi_monitoring.h for descriptions of these flags. - */ -int reread_sdr_cache = 0; -int ignore_non_interpretable_sensors = 0; -int bridge_sensors = 0; -int interpret_oem_data = 0; -int shared_sensors = 0; -int discrete_reading = 1; -int ignore_scanning_disabled = 0; -int assume_bmc_owner = 0; -int entity_sensor_names = 0; +// controlled via command line options +unsigned int global_sel_flags = IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE; +unsigned int global_sensor_reading_flags = IPMI_MONITORING_SENSOR_READING_FLAGS_DISCRETE_READING|IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE; +bool remove_reread_sdr_after_first_use = true; /* Initialization flags * @@ -106,26 +128,10 @@ int entity_sensor_names = 0; */ unsigned int ipmimonitoring_init_flags = 0; -int errnum; - -// ---------------------------------------------------------------------------- -// SEL only variables - -/* Initialize w/ date range to only monitoring specific date range */ -char *date_begin = NULL; /* use MM/DD/YYYY format */ -char *date_end = NULL; /* use MM/DD/YYYY format */ - -int assume_system_event_record = 0; - -char *sel_config_file = NULL; - - // ---------------------------------------------------------------------------- // functions common to sensors and SEL -static void -_init_ipmi_config (struct ipmi_monitoring_ipmi_config *ipmi_config) -{ +static void initialize_ipmi_config (struct ipmi_monitoring_ipmi_config *ipmi_config) { fatal_assert(ipmi_config); ipmi_config->driver_type = driver_type; @@ -137,8 +143,8 @@ _init_ipmi_config (struct ipmi_monitoring_ipmi_config *ipmi_config) ipmi_config->protocol_version = protocol_version; ipmi_config->username = username; ipmi_config->password = password; - ipmi_config->k_g = ipmi_k_g; - ipmi_config->k_g_len = ipmi_k_g_len; + ipmi_config->k_g = k_g; + ipmi_config->k_g_len = k_g_len; ipmi_config->privilege_level = privilege_level; ipmi_config->authentication_type = authentication_type; ipmi_config->cipher_suite_id = cipher_suite_id; @@ -148,414 +154,566 @@ _init_ipmi_config (struct ipmi_monitoring_ipmi_config *ipmi_config) ipmi_config->workaround_flags = workaround_flags; } -#ifdef NETDATA_COMMENTED -static const char * -_get_sensor_type_string (int sensor_type) -{ - switch (sensor_type) - { +static const char *netdata_ipmi_get_sensor_type_string (int sensor_type, const char **component) { + switch (sensor_type) { case IPMI_MONITORING_SENSOR_TYPE_RESERVED: return ("Reserved"); + case IPMI_MONITORING_SENSOR_TYPE_TEMPERATURE: return ("Temperature"); + case IPMI_MONITORING_SENSOR_TYPE_VOLTAGE: return ("Voltage"); + case IPMI_MONITORING_SENSOR_TYPE_CURRENT: return ("Current"); + case IPMI_MONITORING_SENSOR_TYPE_FAN: return ("Fan"); + case IPMI_MONITORING_SENSOR_TYPE_PHYSICAL_SECURITY: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Physical Security"); + case IPMI_MONITORING_SENSOR_TYPE_PLATFORM_SECURITY_VIOLATION_ATTEMPT: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Platform Security Violation Attempt"); + case IPMI_MONITORING_SENSOR_TYPE_PROCESSOR: + *component = NETDATA_SENSOR_COMPONENT_PROCESSOR; return ("Processor"); + case IPMI_MONITORING_SENSOR_TYPE_POWER_SUPPLY: + *component = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY; return ("Power Supply"); + case IPMI_MONITORING_SENSOR_TYPE_POWER_UNIT: + *component = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY; return ("Power Unit"); + case IPMI_MONITORING_SENSOR_TYPE_COOLING_DEVICE: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Cooling Device"); + case IPMI_MONITORING_SENSOR_TYPE_OTHER_UNITS_BASED_SENSOR: return ("Other Units Based Sensor"); + case IPMI_MONITORING_SENSOR_TYPE_MEMORY: + *component = NETDATA_SENSOR_COMPONENT_MEMORY; return ("Memory"); + case IPMI_MONITORING_SENSOR_TYPE_DRIVE_SLOT: + *component = NETDATA_SENSOR_COMPONENT_STORAGE; return ("Drive Slot"); + case IPMI_MONITORING_SENSOR_TYPE_POST_MEMORY_RESIZE: + *component = NETDATA_SENSOR_COMPONENT_MEMORY; return ("POST Memory Resize"); + case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_FIRMWARE_PROGRESS: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("System Firmware Progress"); + case IPMI_MONITORING_SENSOR_TYPE_EVENT_LOGGING_DISABLED: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Event Logging Disabled"); + case IPMI_MONITORING_SENSOR_TYPE_WATCHDOG1: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Watchdog 1"); + case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_EVENT: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("System Event"); + case IPMI_MONITORING_SENSOR_TYPE_CRITICAL_INTERRUPT: return ("Critical Interrupt"); + case IPMI_MONITORING_SENSOR_TYPE_BUTTON_SWITCH: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Button/Switch"); + case IPMI_MONITORING_SENSOR_TYPE_MODULE_BOARD: return ("Module/Board"); + case IPMI_MONITORING_SENSOR_TYPE_MICROCONTROLLER_COPROCESSOR: + *component = NETDATA_SENSOR_COMPONENT_PROCESSOR; return ("Microcontroller/Coprocessor"); + case IPMI_MONITORING_SENSOR_TYPE_ADD_IN_CARD: return ("Add In Card"); + case IPMI_MONITORING_SENSOR_TYPE_CHASSIS: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Chassis"); + case IPMI_MONITORING_SENSOR_TYPE_CHIP_SET: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Chip Set"); + case IPMI_MONITORING_SENSOR_TYPE_OTHER_FRU: return ("Other Fru"); + case IPMI_MONITORING_SENSOR_TYPE_CABLE_INTERCONNECT: return ("Cable/Interconnect"); + case IPMI_MONITORING_SENSOR_TYPE_TERMINATOR: return ("Terminator"); + case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_BOOT_INITIATED: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("System Boot Initiated"); + case IPMI_MONITORING_SENSOR_TYPE_BOOT_ERROR: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Boot Error"); + case IPMI_MONITORING_SENSOR_TYPE_OS_BOOT: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("OS Boot"); + case IPMI_MONITORING_SENSOR_TYPE_OS_CRITICAL_STOP: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("OS Critical Stop"); + case IPMI_MONITORING_SENSOR_TYPE_SLOT_CONNECTOR: return ("Slot/Connector"); + case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_ACPI_POWER_STATE: return ("System ACPI Power State"); + case IPMI_MONITORING_SENSOR_TYPE_WATCHDOG2: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Watchdog 2"); + case IPMI_MONITORING_SENSOR_TYPE_PLATFORM_ALERT: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Platform Alert"); + case IPMI_MONITORING_SENSOR_TYPE_ENTITY_PRESENCE: return ("Entity Presence"); + case IPMI_MONITORING_SENSOR_TYPE_MONITOR_ASIC_IC: return ("Monitor ASIC/IC"); + case IPMI_MONITORING_SENSOR_TYPE_LAN: + *component = NETDATA_SENSOR_COMPONENT_NETWORK; return ("LAN"); + case IPMI_MONITORING_SENSOR_TYPE_MANAGEMENT_SUBSYSTEM_HEALTH: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Management Subsystem Health"); + case IPMI_MONITORING_SENSOR_TYPE_BATTERY: return ("Battery"); + case IPMI_MONITORING_SENSOR_TYPE_SESSION_AUDIT: return ("Session Audit"); + case IPMI_MONITORING_SENSOR_TYPE_VERSION_CHANGE: return ("Version Change"); + case IPMI_MONITORING_SENSOR_TYPE_FRU_STATE: return ("FRU State"); - } - - return ("Unrecognized"); -} -#endif // NETDATA_COMMENTED + case IPMI_MONITORING_SENSOR_TYPE_UNKNOWN: + return ("Unknown"); -// ---------------------------------------------------------------------------- -// BEGIN NETDATA CODE + default: + if(sensor_type >= IPMI_MONITORING_SENSOR_TYPE_OEM_MIN && sensor_type <= IPMI_MONITORING_SENSOR_TYPE_OEM_MAX) + return ("OEM"); -static int debug = 0; + return ("Unrecognized"); + } +} -static int netdata_update_every = 5; // this is the minimum update frequency -static int netdata_priority = 90000; -static int netdata_do_sel = 1; +#define netdata_ipmi_get_value_int(var, func, ctx) do { \ + (var) = func(ctx); \ + if( (var) < 0) { \ + collector_error("%s(): call to " #func " failed: %s", \ + __FUNCTION__, ipmi_monitoring_ctx_errormsg(ctx)); \ + goto cleanup; \ + } \ + timing_step(TIMING_STEP_FREEIPMI_READ_ ## var); \ +} while(0) + +#define netdata_ipmi_get_value_ptr(var, func, ctx) do { \ + (var) = func(ctx); \ + if(!(var)) { \ + collector_error("%s(): call to " #func " failed: %s", \ + __FUNCTION__, ipmi_monitoring_ctx_errormsg(ctx)); \ + goto cleanup; \ + } \ + timing_step(TIMING_STEP_FREEIPMI_READ_ ## var); \ +} while(0) + +#define netdata_ipmi_get_value_no_check(var, func, ctx) do { \ + (var) = func(ctx); \ + timing_step(TIMING_STEP_FREEIPMI_READ_ ## var); \ +} while(0) + +static int netdata_read_ipmi_sensors(struct ipmi_monitoring_ipmi_config *ipmi_config, struct netdata_ipmi_state *state) { + timing_init(); -static size_t netdata_sensors_updated = 0; -static size_t netdata_sensors_collected = 0; -static size_t netdata_sel_events = 0; -static size_t netdata_sensors_states_nominal = 0; -static size_t netdata_sensors_states_warning = 0; -static size_t netdata_sensors_states_critical = 0; + ipmi_monitoring_ctx_t ctx = NULL; + unsigned int sensor_reading_flags = global_sensor_reading_flags; + int i; + int sensor_count; + int rv = -1; -struct sensor { - int record_id; - int sensor_number; - int sensor_type; - int sensor_state; - int sensor_units; - char *sensor_name; + if (!(ctx = ipmi_monitoring_ctx_create ())) { + collector_error("ipmi_monitoring_ctx_create()"); + goto cleanup; + } - int sensor_reading_type; - union { - uint8_t bool_value; - uint32_t uint32_value; - double double_value; - } sensor_reading; + timing_step(TIMING_STEP_FREEIPMI_CTX_CREATE); - int sent; - int ignore; - int exposed; - int updated; - struct sensor *next; -} *sensors_root = NULL; + if (sdr_cache_directory) { + if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, sdr_cache_directory) < 0) { + collector_error("ipmi_monitoring_ctx_sdr_cache_directory(): %s\n", ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + if (sdr_sensors_cache_format) { + if (ipmi_monitoring_ctx_sdr_cache_filenames(ctx, sdr_sensors_cache_format) < 0) { + collector_error("ipmi_monitoring_ctx_sdr_cache_filenames(): %s\n", ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } -static void netdata_mark_as_not_updated() { - struct sensor *sn; - for(sn = sensors_root; sn ;sn = sn->next) - sn->updated = sn->sent = 0; + timing_step(TIMING_STEP_FREEIPMI_DSR_CACHE_DIR); - netdata_sensors_updated = 0; - netdata_sensors_collected = 0; - netdata_sel_events = 0; + // Must call otherwise only default interpretations ever used + // sensor_config_file can be NULL + if (ipmi_monitoring_ctx_sensor_config_file (ctx, sensor_config_file) < 0) { + collector_error( "ipmi_monitoring_ctx_sensor_config_file(): %s\n", ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } - netdata_sensors_states_nominal = 0; - netdata_sensors_states_warning = 0; - netdata_sensors_states_critical = 0; -} + timing_step(TIMING_STEP_FREEIPMI_SENSOR_CONFIG_FILE); + + if ((sensor_count = ipmi_monitoring_sensor_readings_by_record_id (ctx, + hostname, + ipmi_config, + sensor_reading_flags, + NULL, + 0, + NULL, + NULL)) < 0) { + collector_error( "ipmi_monitoring_sensor_readings_by_record_id(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } -static void send_chart_to_netdata_for_units(int units) { - struct sensor *sn, *sn_stored; - int dupfound, multiplier; + timing_step(TIMING_STEP_FREEIPMI_SENSOR_READINGS_BY_X); - switch(units) { - case IPMI_MONITORING_SENSOR_UNITS_CELSIUS: - printf("CHART ipmi.temperatures_c '' 'System Celsius Temperatures read by IPMI' 'Celsius' 'temperatures' 'ipmi.temperatures_c' 'line' %d %d\n" - , netdata_priority + 10 - , netdata_update_every - ); - break; + for (i = 0; i < sensor_count; i++, ipmi_monitoring_sensor_iterator_next (ctx)) { + int record_id, sensor_number, sensor_type, sensor_state, sensor_units, + sensor_bitmask_type, sensor_bitmask, event_reading_type_code, sensor_reading_type; - case IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT: - printf("CHART ipmi.temperatures_f '' 'System Fahrenheit Temperatures read by IPMI' 'Fahrenheit' 'temperatures' 'ipmi.temperatures_f' 'line' %d %d\n" - , netdata_priority + 11 - , netdata_update_every - ); - break; + char **sensor_bitmask_strings = NULL; + char *sensor_name = NULL; + void *sensor_reading; - case IPMI_MONITORING_SENSOR_UNITS_VOLTS: - printf("CHART ipmi.volts '' 'System Voltages read by IPMI' 'Volts' 'voltages' 'ipmi.voltages' 'line' %d %d\n" - , netdata_priority + 12 - , netdata_update_every - ); - break; + netdata_ipmi_get_value_int(record_id, ipmi_monitoring_sensor_read_record_id, ctx); + netdata_ipmi_get_value_int(sensor_number, ipmi_monitoring_sensor_read_sensor_number, ctx); + netdata_ipmi_get_value_int(sensor_type, ipmi_monitoring_sensor_read_sensor_type, ctx); + netdata_ipmi_get_value_ptr(sensor_name, ipmi_monitoring_sensor_read_sensor_name, ctx); + netdata_ipmi_get_value_int(sensor_state, ipmi_monitoring_sensor_read_sensor_state, ctx); + netdata_ipmi_get_value_int(sensor_units, ipmi_monitoring_sensor_read_sensor_units, ctx); + netdata_ipmi_get_value_int(sensor_bitmask_type, ipmi_monitoring_sensor_read_sensor_bitmask_type, ctx); + netdata_ipmi_get_value_int(sensor_bitmask, ipmi_monitoring_sensor_read_sensor_bitmask, ctx); + // it's ok for this to be NULL, i.e. sensor_bitmask == IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN + netdata_ipmi_get_value_no_check(sensor_bitmask_strings, ipmi_monitoring_sensor_read_sensor_bitmask_strings, ctx); + netdata_ipmi_get_value_int(sensor_reading_type, ipmi_monitoring_sensor_read_sensor_reading_type, ctx); + // whatever we read from the sensor, it is ok + netdata_ipmi_get_value_no_check(sensor_reading, ipmi_monitoring_sensor_read_sensor_reading, ctx); + netdata_ipmi_get_value_int(event_reading_type_code, ipmi_monitoring_sensor_read_event_reading_type_code, ctx); + + netdata_update_ipmi_sensor_reading( + record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type, sensor_name, + sensor_reading, event_reading_type_code, sensor_bitmask_type, sensor_bitmask, sensor_bitmask_strings, + state + ); - case IPMI_MONITORING_SENSOR_UNITS_AMPS: - printf("CHART ipmi.amps '' 'System Current read by IPMI' 'Amps' 'current' 'ipmi.amps' 'line' %d %d\n" - , netdata_priority + 13 - , netdata_update_every - ); - break; +#ifdef NETDATA_COMMENTED + /* It is possible you may want to monitor specific event + * conditions that may occur. If that is the case, you may want + * to check out what specific bitmask type and bitmask events + * occurred. See ipmi_monitoring_bitmasks.h for a list of + * bitmasks and types. + */ - case IPMI_MONITORING_SENSOR_UNITS_RPM: - printf("CHART ipmi.rpm '' 'System Fans read by IPMI' 'RPM' 'fans' 'ipmi.rpm' 'line' %d %d\n" - , netdata_priority + 14 - , netdata_update_every - ); - break; + if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN) + printf (", %Xh", sensor_bitmask); + else + printf (", N/A"); - case IPMI_MONITORING_SENSOR_UNITS_WATTS: - printf("CHART ipmi.watts '' 'System Power read by IPMI' 'Watts' 'power' 'ipmi.watts' 'line' %d %d\n" - , netdata_priority + 5 - , netdata_update_every - ); - break; + if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN + && sensor_bitmask_strings) + { + unsigned int i = 0; - case IPMI_MONITORING_SENSOR_UNITS_PERCENT: - printf("CHART ipmi.percent '' 'System Metrics read by IPMI' '%%' 'other' 'ipmi.percent' 'line' %d %d\n" - , netdata_priority + 15 - , netdata_update_every - ); - break; + printf (","); - default: - for(sn = sensors_root; sn; sn = sn->next) - if(sn->sensor_units == units) - sn->ignore = 1; - return; - } + while (sensor_bitmask_strings[i]) + { + printf (" "); - for(sn = sensors_root; sn; sn = sn->next) { - dupfound = 0; - if(sn->sensor_units == units && sn->updated && !sn->ignore) { - sn->exposed = 1; - multiplier = 1; - - switch(sn->sensor_reading_type) { - case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: - multiplier = 1000; - // fallthrough - case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: - case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: - for (sn_stored = sensors_root; sn_stored; sn_stored = sn_stored->next) { - if (sn_stored == sn) continue; - // If the name is a duplicate, append the sensor number - if ( !strcmp(sn_stored->sensor_name, sn->sensor_name) ) { - dupfound = 1; - printf("DIMENSION i%d_n%d_r%d '%s i%d' absolute 1 %d\n" - , sn->sensor_number - , sn->record_id - , sn->sensor_reading_type - , sn->sensor_name - , sn->sensor_number - , multiplier - ); - break; - } - } - // No duplicate name was found, display it just with Name - if (!dupfound) { - // display without ID - printf("DIMENSION i%d_n%d_r%d '%s' absolute 1 %d\n" - , sn->sensor_number - , sn->record_id - , sn->sensor_reading_type - , sn->sensor_name - , multiplier - ); - } - break; + printf ("'%s'", + sensor_bitmask_strings[i]); - default: - sn->ignore = 1; - break; + i++; } } + else + printf (", N/A"); + + printf ("\n"); +#endif // NETDATA_COMMENTED } -} -static void send_metrics_to_netdata_for_units(int units) { - struct sensor *sn; + rv = 0; - switch(units) { - case IPMI_MONITORING_SENSOR_UNITS_CELSIUS: - printf("BEGIN ipmi.temperatures_c\n"); - break; +cleanup: + if (ctx) + ipmi_monitoring_ctx_destroy (ctx); - case IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT: - printf("BEGIN ipmi.temperatures_f\n"); - break; + timing_report(); - case IPMI_MONITORING_SENSOR_UNITS_VOLTS: - printf("BEGIN ipmi.volts\n"); - break; + if(remove_reread_sdr_after_first_use) + global_sensor_reading_flags &= ~(IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE); - case IPMI_MONITORING_SENSOR_UNITS_AMPS: - printf("BEGIN ipmi.amps\n"); - break; + return (rv); +} - case IPMI_MONITORING_SENSOR_UNITS_RPM: - printf("BEGIN ipmi.rpm\n"); - break; - case IPMI_MONITORING_SENSOR_UNITS_WATTS: - printf("BEGIN ipmi.watts\n"); - break; +static int netdata_get_ipmi_sel_events_count(struct ipmi_monitoring_ipmi_config *ipmi_config, struct netdata_ipmi_state *state) { + timing_init(); - case IPMI_MONITORING_SENSOR_UNITS_PERCENT: - printf("BEGIN ipmi.percent\n"); - break; + ipmi_monitoring_ctx_t ctx = NULL; + unsigned int sel_flags = global_sel_flags; + int sel_count; + int rv = -1; - default: - for(sn = sensors_root; sn; sn = sn->next) - if(sn->sensor_units == units) - sn->ignore = 1; - return; + if (!(ctx = ipmi_monitoring_ctx_create ())) { + collector_error("ipmi_monitoring_ctx_create()"); + goto cleanup; } - for(sn = sensors_root; sn; sn = sn->next) { - if(sn->sensor_units == units && sn->updated && !sn->sent && !sn->ignore) { - netdata_sensors_updated++; + if (sdr_cache_directory) { + if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, sdr_cache_directory) < 0) { + collector_error( "ipmi_monitoring_ctx_sdr_cache_directory(): %s", ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + if (sdr_sel_cache_format) { + if (ipmi_monitoring_ctx_sdr_cache_filenames(ctx, sdr_sel_cache_format) < 0) { + collector_error("ipmi_monitoring_ctx_sdr_cache_filenames(): %s\n", ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } - sn->sent = 1; + // Must call otherwise only default interpretations ever used + // sel_config_file can be NULL + if (ipmi_monitoring_ctx_sel_config_file (ctx, sel_config_file) < 0) { + collector_error( "ipmi_monitoring_ctx_sel_config_file(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } - switch(sn->sensor_reading_type) { - case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: - printf("SET i%d_n%d_r%d = %u\n" - , sn->sensor_number - , sn->record_id - , sn->sensor_reading_type - , sn->sensor_reading.bool_value - ); - break; + if ((sel_count = ipmi_monitoring_sel_by_record_id (ctx, + hostname, + ipmi_config, + sel_flags, + NULL, + 0, + NULL, + NULL)) < 0) { + collector_error( "ipmi_monitoring_sel_by_record_id(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } - case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: - printf("SET i%d_n%d_r%d = %u\n" - , sn->sensor_number - , sn->record_id - , sn->sensor_reading_type - , sn->sensor_reading.uint32_value - ); - break; + netdata_update_ipmi_sel_events_count(state, sel_count); - case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: - printf("SET i%d_n%d_r%d = %lld\n" - , sn->sensor_number - , sn->record_id - , sn->sensor_reading_type - , (long long int)(sn->sensor_reading.double_value * 1000) - ); - break; + rv = 0; - default: - sn->ignore = 1; - break; - } - } - } +cleanup: + if (ctx) + ipmi_monitoring_ctx_destroy (ctx); + + timing_report(); - printf("END\n"); + if(remove_reread_sdr_after_first_use) + global_sel_flags &= ~(IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE); + + return (rv); } -static void send_metrics_to_netdata() { - static int sel_chart_generated = 0, sensors_states_chart_generated = 0; - struct sensor *sn; +// ---------------------------------------------------------------------------- +// copied from freeipmi codebase commit 8dea6dec4012d0899901e595f2c868a05e1cefed +// added netdata_ in-front to not overwrite library functions + +// FROM: common/miscutil/network.c +static int netdata_host_is_localhost (const char *host) { + /* Ordered by my assumption of most popular */ + if (!strcasecmp (host, "localhost") + || !strcmp (host, "127.0.0.1") + || !strcasecmp (host, "ipv6-localhost") + || !strcmp (host, "::1") + || !strcasecmp (host, "ip6-localhost") + || !strcmp (host, "0:0:0:0:0:0:0:1")) + return (1); - if(netdata_do_sel && !sel_chart_generated) { - sel_chart_generated = 1; - printf("CHART ipmi.events '' 'IPMI Events' 'events' 'events' ipmi.sel area %d %d\n" - , netdata_priority + 2 - , netdata_update_every - ); - printf("DIMENSION events '' absolute 1 1\n"); - } + return (0); +} - if(!sensors_states_chart_generated) { - sensors_states_chart_generated = 1; - printf("CHART ipmi.sensors_states '' 'IPMI Sensors State' 'sensors' 'states' ipmi.sensors_states line %d %d\n" - , netdata_priority + 1 - , netdata_update_every - ); - printf("DIMENSION nominal '' absolute 1 1\n"); - printf("DIMENSION critical '' absolute 1 1\n"); - printf("DIMENSION warning '' absolute 1 1\n"); - } +// FROM: common/parsecommon/parse-common.h +#define IPMI_PARSE_DEVICE_LAN_STR "lan" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR "lan_2_0" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR2 "lan20" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR3 "lan_20" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR4 "lan2_0" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR5 "lanplus" +#define IPMI_PARSE_DEVICE_KCS_STR "kcs" +#define IPMI_PARSE_DEVICE_SSIF_STR "ssif" +#define IPMI_PARSE_DEVICE_OPENIPMI_STR "openipmi" +#define IPMI_PARSE_DEVICE_OPENIPMI_STR2 "open" +#define IPMI_PARSE_DEVICE_SUNBMC_STR "sunbmc" +#define IPMI_PARSE_DEVICE_SUNBMC_STR2 "bmc" +#define IPMI_PARSE_DEVICE_INTELDCMI_STR "inteldcmi" - // generate the CHART/DIMENSION lines, if we have to - for(sn = sensors_root; sn; sn = sn->next) - if(sn->updated && !sn->exposed && !sn->ignore) - send_chart_to_netdata_for_units(sn->sensor_units); +// FROM: common/parsecommon/parse-common.c +// changed the return values to match ipmi_monitoring.h +static int netdata_parse_outofband_driver_type (const char *str) { + if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_STR) == 0) + return (IPMI_MONITORING_PROTOCOL_VERSION_1_5); - if(netdata_do_sel) { - printf( - "BEGIN ipmi.events\n" - "SET events = %zu\n" - "END\n" - , netdata_sel_events - ); - } + /* support "lanplus" for those that might be used to ipmitool. + * support typo variants to ease. + */ + else if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR2) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR3) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR4) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR5) == 0) + return (IPMI_MONITORING_PROTOCOL_VERSION_2_0); - printf( - "BEGIN ipmi.sensors_states\n" - "SET nominal = %zu\n" - "SET warning = %zu\n" - "SET critical = %zu\n" - "END\n" - , netdata_sensors_states_nominal - , netdata_sensors_states_warning - , netdata_sensors_states_critical - ); - - // send metrics to netdata - for(sn = sensors_root; sn; sn = sn->next) - if(sn->updated && sn->exposed && !sn->sent && !sn->ignore) - send_metrics_to_netdata_for_units(sn->sensor_units); + return (-1); +} +// FROM: common/parsecommon/parse-common.c +// changed the return values to match ipmi_monitoring.h +static int netdata_parse_inband_driver_type (const char *str) { + if (strcasecmp (str, IPMI_PARSE_DEVICE_KCS_STR) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_KCS); + else if (strcasecmp (str, IPMI_PARSE_DEVICE_SSIF_STR) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_SSIF); + /* support "open" for those that might be used to + * ipmitool. + */ + else if (strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR2) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_OPENIPMI); + /* support "bmc" for those that might be used to + * ipmitool. + */ + else if (strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR2) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_SUNBMC); + +#ifdef IPMI_MONITORING_DRIVER_TYPE_INTELDCMI + else if (strcasecmp (str, IPMI_PARSE_DEVICE_INTELDCMI_STR) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_INTELDCMI); +#endif // IPMI_MONITORING_DRIVER_TYPE_INTELDCMI + + return (-1); } +// ---------------------------------------------------------------------------- +// BEGIN NETDATA CODE + +typedef enum __attribute__((packed)) { + IPMI_COLLECT_TYPE_SENSORS = (1 << 0), + IPMI_COLLECT_TYPE_SEL = (1 << 1), +} IPMI_COLLECTION_TYPE; + +struct sensor { + int sensor_type; + int sensor_state; + int sensor_units; + char *sensor_name; + + int sensor_reading_type; + union { + uint8_t bool_value; + uint32_t uint32_value; + double double_value; + } sensor_reading; + + // netdata provided + const char *context; + const char *title; + const char *units; + const char *family; + const char *chart_type; + const char *dimension; + int priority; + + const char *type; + const char *component; + + int multiplier; + bool do_metric; + bool do_state; + bool metric_chart_sent; + bool state_chart_sent; + usec_t last_collected_metric_ut; + usec_t last_collected_state_ut; +}; + +typedef enum __attribute__((packed)) { + ICS_INIT, + ICS_INIT_FAILED, + ICS_RUNNING, + ICS_FAILED, +} IPMI_COLLECTOR_STATUS; + +struct netdata_ipmi_state { + bool debug; + + struct { + IPMI_COLLECTOR_STATUS status; + usec_t last_iteration_ut; + size_t collected; + usec_t now_ut; + usec_t freq_ut; + int priority; + DICTIONARY *dict; + } sensors; + + struct { + IPMI_COLLECTOR_STATUS status; + usec_t last_iteration_ut; + size_t events; + usec_t now_ut; + usec_t freq_ut; + int priority; + } sel; + + struct { + usec_t now_ut; + } updates; +}; + +// ---------------------------------------------------------------------------- +// excluded record ids maintenance (both for sensor data and state) + static int *excluded_record_ids = NULL; size_t excluded_record_ids_length = 0; -static void excluded_record_ids_parse(const char *s) { +static void excluded_record_ids_parse(const char *s, bool debug) { if(!s) return; while(*s) { @@ -567,18 +725,14 @@ static void excluded_record_ids_parse(const char *s) { s = e; if(n != 0) { - excluded_record_ids = realloc(excluded_record_ids, (excluded_record_ids_length + 1) * sizeof(int)); - if(!excluded_record_ids) { - fprintf(stderr, "freeipmi.plugin: failed to allocate memory. Exiting."); - exit(1); - } + excluded_record_ids = reallocz(excluded_record_ids, (excluded_record_ids_length + 1) * sizeof(int)); excluded_record_ids[excluded_record_ids_length++] = (int)n; } } } if(debug) { - fprintf(stderr, "freeipmi.plugin: excluded record ids:"); + fprintf(stderr, "%s: excluded record ids:", program_name); size_t i; for(i = 0; i < excluded_record_ids_length; i++) { fprintf(stderr, " %d", excluded_record_ids[i]); @@ -590,7 +744,7 @@ static void excluded_record_ids_parse(const char *s) { static int *excluded_status_record_ids = NULL; size_t excluded_status_record_ids_length = 0; -static void excluded_status_record_ids_parse(const char *s) { +static void excluded_status_record_ids_parse(const char *s, bool debug) { if(!s) return; while(*s) { @@ -602,18 +756,14 @@ static void excluded_status_record_ids_parse(const char *s) { s = e; if(n != 0) { - excluded_status_record_ids = realloc(excluded_status_record_ids, (excluded_status_record_ids_length + 1) * sizeof(int)); - if(!excluded_status_record_ids) { - fprintf(stderr, "freeipmi.plugin: failed to allocate memory. Exiting."); - exit(1); - } + excluded_status_record_ids = reallocz(excluded_status_record_ids, (excluded_status_record_ids_length + 1) * sizeof(int)); excluded_status_record_ids[excluded_status_record_ids_length++] = (int)n; } } } if(debug) { - fprintf(stderr, "freeipmi.plugin: excluded status record ids:"); + fprintf(stderr, "%s: excluded status record ids:", program_name); size_t i; for(i = 0; i < excluded_status_record_ids_length; i++) { fprintf(stderr, " %d", excluded_status_record_ids[i]); @@ -645,959 +795,649 @@ static int excluded_status_record_ids_check(int record_id) { return 0; } -static void netdata_get_sensor( - int record_id - , int sensor_number - , int sensor_type - , int sensor_state - , int sensor_units - , int sensor_reading_type - , char *sensor_name - , void *sensor_reading -) { - // find the sensor record - struct sensor *sn; - for(sn = sensors_root; sn ;sn = sn->next) - if( sn->record_id == record_id && - sn->sensor_number == sensor_number && - sn->sensor_reading_type == sensor_reading_type && - sn->sensor_units == sensor_units && - !strcmp(sn->sensor_name, sensor_name) - ) - break; +// ---------------------------------------------------------------------------- +// data collection functions - if(!sn) { - // not found, create it - // check if it is excluded - if(excluded_record_ids_check(record_id)) { - if(debug) fprintf(stderr, "Sensor '%s' is excluded by excluded_record_ids_check()\n", sensor_name); - return; - } +struct { + const char *search; + SIMPLE_PATTERN *pattern; + const char *label; +} sensors_component_patterns[] = { - if(debug) fprintf(stderr, "Allocating new sensor data record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n", sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type); + // The order is important! + // They are evaluated top to bottom + // The first the matches is used - sn = calloc(1, sizeof(struct sensor)); - if(!sn) { - fatal("cannot allocate %zu bytes of memory.", sizeof(struct sensor)); - } + { + .search = "*DIMM*|*_DIM*|*VTT*|*VDDQ*|*ECC*|*MEM*CRC*|*MEM*BD*", + .label = NETDATA_SENSOR_COMPONENT_MEMORY_MODULE, + }, + { + .search = "*CPU*|SOC_*|*VDDCR*|P*_VDD*|*_DTS|*VCORE*|*PROC*", + .label = NETDATA_SENSOR_COMPONENT_PROCESSOR, + }, + { + .search = "IPU*", + .label = NETDATA_SENSOR_COMPONENT_IPU, + }, + { + .search = "M2_*|*SSD*|*HSC*|*HDD*|*NVME*", + .label = NETDATA_SENSOR_COMPONENT_STORAGE, + }, + { + .search = "MB_*|*PCH*|*VBAT*|*I/O*BD*|*IO*BD*", + .label = NETDATA_SENSOR_COMPONENT_MOTHERBOARD, + }, + { + .search = "Watchdog|SEL|SYS_*|*CHASSIS*", + .label = NETDATA_SENSOR_COMPONENT_SYSTEM, + }, + { + .search = "PS*|P_*|*PSU*|*PWR*|*TERMV*|*D2D*", + .label = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY, + }, - sn->record_id = record_id; - sn->sensor_number = sensor_number; - sn->sensor_type = sensor_type; - sn->sensor_state = sensor_state; - sn->sensor_units = sensor_units; - sn->sensor_reading_type = sensor_reading_type; - sn->sensor_name = strdup(sensor_name); - if(!sn->sensor_name) { - fatal("cannot allocate %zu bytes of memory.", strlen(sensor_name)); + // fallback components + { + .search = "VR_P*|*VRMP*", + .label = NETDATA_SENSOR_COMPONENT_PROCESSOR, + }, + { + .search = "*VSB*|*PS*", + .label = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY, + }, + { + .search = "*MEM*|*MEM*RAID*", + .label = NETDATA_SENSOR_COMPONENT_MEMORY, + }, + { + .search = "*RAID*", // there is also "Memory RAID", so keep this after memory + .label = NETDATA_SENSOR_COMPONENT_STORAGE, + }, + { + .search = "*PERIPHERAL*|*USB*", + .label = NETDATA_SENSOR_COMPONENT_PERIPHERAL, + }, + { + .search = "*FAN*|*12V*|*VCC*|*PCI*|*CHIPSET*|*AMP*|*BD*", + .label = NETDATA_SENSOR_COMPONENT_SYSTEM, + }, + + // terminator + { + .search = NULL, + .label = NULL, } +}; - sn->next = sensors_root; - sensors_root = sn; - } - else { - if(debug) fprintf(stderr, "Reusing sensor record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n", sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type); +static const char *netdata_sensor_name_to_component(const char *sensor_name) { + for(int i = 0; sensors_component_patterns[i].search ;i++) { + if(!sensors_component_patterns[i].pattern) + sensors_component_patterns[i].pattern = simple_pattern_create(sensors_component_patterns[i].search, "|", SIMPLE_PATTERN_EXACT, false); + + if(simple_pattern_matches(sensors_component_patterns[i].pattern, sensor_name)) + return sensors_component_patterns[i].label; } - switch(sensor_reading_type) { + return "Other"; +} + +const char *netdata_collect_type_to_string(IPMI_COLLECTION_TYPE type) { + if((type & (IPMI_COLLECT_TYPE_SENSORS|IPMI_COLLECT_TYPE_SEL)) == (IPMI_COLLECT_TYPE_SENSORS|IPMI_COLLECT_TYPE_SEL)) + return "sensors,sel"; + if(type & IPMI_COLLECT_TYPE_SEL) + return "sel"; + if(type & IPMI_COLLECT_TYPE_SENSORS) + return "sensors"; + + return "unknown"; +} + +static void netdata_sensor_set_value(struct sensor *sn, void *sensor_reading, struct netdata_ipmi_state *state __maybe_unused) { + switch(sn->sensor_reading_type) { case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: sn->sensor_reading.bool_value = *((uint8_t *)sensor_reading); - sn->updated = 1; - netdata_sensors_collected++; break; case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: sn->sensor_reading.uint32_value = *((uint32_t *)sensor_reading); - sn->updated = 1; - netdata_sensors_collected++; break; case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: sn->sensor_reading.double_value = *((double *)sensor_reading); - sn->updated = 1; - netdata_sensors_collected++; break; default: - if(debug) fprintf(stderr, "Unknown reading type - Ignoring sensor record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n", sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type); - sn->ignore = 1; + case IPMI_MONITORING_SENSOR_READING_TYPE_UNKNOWN: + sn->do_metric = false; break; } +} - // check if it is excluded - if(excluded_status_record_ids_check(record_id)) { - if(debug) fprintf(stderr, "Sensor '%s' is excluded for status check, by excluded_status_record_ids_check()\n", sensor_name); +static void netdata_update_ipmi_sensor_reading( + int record_id + , int sensor_number + , int sensor_type + , int sensor_state + , int sensor_units + , int sensor_reading_type + , char *sensor_name + , void *sensor_reading + , int event_reading_type_code __maybe_unused + , int sensor_bitmask_type __maybe_unused + , int sensor_bitmask __maybe_unused + , char **sensor_bitmask_strings __maybe_unused + , struct netdata_ipmi_state *state +) { + if(unlikely(sensor_state == IPMI_MONITORING_STATE_UNKNOWN && + sensor_type == IPMI_MONITORING_SENSOR_TYPE_UNKNOWN && + sensor_units == IPMI_MONITORING_SENSOR_UNITS_UNKNOWN && + sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_UNKNOWN && + (!sensor_name || !*sensor_name))) + // we can't do anything about this sensor - everything is unknown return; - } - switch(sensor_state) { - case IPMI_MONITORING_STATE_NOMINAL: - netdata_sensors_states_nominal++; - break; + if(unlikely(!sensor_name || !*sensor_name)) + sensor_name = "UNNAMED"; - case IPMI_MONITORING_STATE_WARNING: - netdata_sensors_states_warning++; - break; + state->sensors.collected++; - case IPMI_MONITORING_STATE_CRITICAL: - netdata_sensors_states_critical++; - break; + char key[SENSORS_DICT_KEY_SIZE + 1]; + snprintfz(key, SENSORS_DICT_KEY_SIZE, "i%d_n%d_t%d_u%d_%s", + record_id, sensor_number, sensor_reading_type, sensor_units, sensor_name); - default: - break; - } -} + // find the sensor record + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(state->sensors.dict, key); + if(likely(item)) { + // recurring collection -static void netdata_get_sel( - int record_id - , int record_type_class - , int sel_state -) { - (void)record_id; - (void)record_type_class; - (void)sel_state; + if(state->debug) + fprintf(stderr, "%s: reusing sensor record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n", + program_name, sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type); - netdata_sel_events++; -} + struct sensor *sn = dictionary_acquired_item_value(item); + if(sensor_reading) { + netdata_sensor_set_value(sn, sensor_reading, state); + sn->last_collected_metric_ut = state->sensors.now_ut; + } -// END NETDATA CODE -// ---------------------------------------------------------------------------- + sn->sensor_state = sensor_state; + sn->last_collected_state_ut = state->sensors.now_ut; -static int -_ipmimonitoring_sensors (struct ipmi_monitoring_ipmi_config *ipmi_config) -{ - ipmi_monitoring_ctx_t ctx = NULL; - unsigned int sensor_reading_flags = 0; - int i; - int sensor_count; - int rv = -1; + dictionary_acquired_item_release(state->sensors.dict, item); - if (!(ctx = ipmi_monitoring_ctx_create ())) { - collector_error("ipmi_monitoring_ctx_create()"); - goto cleanup; + return; } - if (sdr_cache_directory) - { - if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, - sdr_cache_directory) < 0) - { - collector_error("ipmi_monitoring_ctx_sdr_cache_directory(): %s\n", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } + if(state->debug) + fprintf(stderr, "Allocating new sensor data record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n", + sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type); - /* Must call otherwise only default interpretations ever used */ - if (sensor_config_file) - { - if (ipmi_monitoring_ctx_sensor_config_file (ctx, - sensor_config_file) < 0) - { - collector_error( "ipmi_monitoring_ctx_sensor_config_file(): %s\n", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + // check if it is excluded + bool excluded_metric = excluded_record_ids_check(record_id); + bool excluded_state = excluded_status_record_ids_check(record_id); + + if(excluded_metric) { + if(state->debug) + fprintf(stderr, "Sensor '%s' is excluded by excluded_record_ids_check()\n", sensor_name); } - else - { - if (ipmi_monitoring_ctx_sensor_config_file (ctx, NULL) < 0) - { - collector_error( "ipmi_monitoring_ctx_sensor_config_file(): %s\n", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + + if(excluded_state) { + if(state->debug) + fprintf(stderr, "Sensor '%s' is excluded for status check, by excluded_status_record_ids_check()\n", sensor_name); } - if (reread_sdr_cache) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE; + struct sensor t = { + .sensor_type = sensor_type, + .sensor_state = sensor_state, + .sensor_units = sensor_units, + .sensor_reading_type = sensor_reading_type, + .sensor_name = strdupz(sensor_name), + .component = netdata_sensor_name_to_component(sensor_name), + .do_state = !excluded_state, + .do_metric = !excluded_metric, + }; - if (ignore_non_interpretable_sensors) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_NON_INTERPRETABLE_SENSORS; + t.type = netdata_ipmi_get_sensor_type_string(t.sensor_type, &t.component); - if (bridge_sensors) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_BRIDGE_SENSORS; + switch(t.sensor_units) { + case IPMI_MONITORING_SENSOR_UNITS_CELSIUS: + t.dimension = "temperature"; + t.context = "ipmi.sensor_temperature_c"; + t.title = "IPMI Sensor Temperature Celsius"; + t.units = "Celsius"; + t.family = "temperatures"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 10; + break; - if (interpret_oem_data) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_INTERPRET_OEM_DATA; + case IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT: + t.dimension = "temperature"; + t.context = "ipmi.sensor_temperature_f"; + t.title = "IPMI Sensor Temperature Fahrenheit"; + t.units = "Fahrenheit"; + t.family = "temperatures"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 20; + break; - if (shared_sensors) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_SHARED_SENSORS; + case IPMI_MONITORING_SENSOR_UNITS_VOLTS: + t.dimension = "voltage"; + t.context = "ipmi.sensor_voltage"; + t.title = "IPMI Sensor Voltage"; + t.units = "Volts"; + t.family = "voltages"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 30; + break; - if (discrete_reading) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_DISCRETE_READING; + case IPMI_MONITORING_SENSOR_UNITS_AMPS: + t.dimension = "ampere"; + t.context = "ipmi.sensor_ampere"; + t.title = "IPMI Sensor Current"; + t.units = "Amps"; + t.family = "current"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 40; + break; - if (ignore_scanning_disabled) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_SCANNING_DISABLED; + case IPMI_MONITORING_SENSOR_UNITS_RPM: + t.dimension = "rotations"; + t.context = "ipmi.sensor_fan_speed"; + t.title = "IPMI Sensor Fans Speed"; + t.units = "RPM"; + t.family = "fans"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 50; + break; - if (assume_bmc_owner) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ASSUME_BMC_OWNER; + case IPMI_MONITORING_SENSOR_UNITS_WATTS: + t.dimension = "power"; + t.context = "ipmi.sensor_power"; + t.title = "IPMI Sensor Power"; + t.units = "Watts"; + t.family = "power"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 60; + break; -#ifdef IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES - if (entity_sensor_names) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES; -#endif // IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES + case IPMI_MONITORING_SENSOR_UNITS_PERCENT: + t.dimension = "percentage"; + t.context = "ipmi.sensor_reading_percent"; + t.title = "IPMI Sensor Reading Percentage"; + t.units = "%%"; + t.family = "other"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 70; + break; - if (!record_ids_length && !sensor_types_length) - { - if ((sensor_count = ipmi_monitoring_sensor_readings_by_record_id (ctx, - hostname, - ipmi_config, - sensor_reading_flags, - NULL, - 0, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sensor_readings_by_record_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } - else if (record_ids_length) - { - if ((sensor_count = ipmi_monitoring_sensor_readings_by_record_id (ctx, - hostname, - ipmi_config, - sensor_reading_flags, - record_ids, - record_ids_length, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sensor_readings_by_record_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } - else - { - if ((sensor_count = ipmi_monitoring_sensor_readings_by_sensor_type (ctx, - hostname, - ipmi_config, - sensor_reading_flags, - sensor_types, - sensor_types_length, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sensor_readings_by_sensor_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + default: + t.priority = state->sensors.priority + 80; + t.do_metric = false; + break; } -#ifdef NETDATA_COMMENTED - printf ("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n", - "Record ID", - "Sensor Name", - "Sensor Number", - "Sensor Type", - "Sensor State", - "Sensor Reading", - "Sensor Units", - "Sensor Event/Reading Type Code", - "Sensor Event Bitmask", - "Sensor Event String"); -#endif // NETDATA_COMMENTED - - for (i = 0; i < sensor_count; i++, ipmi_monitoring_sensor_iterator_next (ctx)) - { - int record_id, sensor_number, sensor_type, sensor_state, sensor_units, - sensor_reading_type; - -#ifdef NETDATA_COMMENTED - int sensor_bitmask_type, sensor_bitmask, event_reading_type_code; - char **sensor_bitmask_strings = NULL; - const char *sensor_type_str; - const char *sensor_state_str; -#endif // NETDATA_COMMENTED + switch(sensor_reading_type) { + case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: + t.multiplier = 1000; + break; - char *sensor_name = NULL; - void *sensor_reading; + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: + t.multiplier = 1; + break; - if ((record_id = ipmi_monitoring_sensor_read_record_id (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_record_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + default: + t.do_metric = false; + break; + } - if ((sensor_number = ipmi_monitoring_sensor_read_sensor_number (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_number(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if(sensor_reading) { + netdata_sensor_set_value(&t, sensor_reading, state); + t.last_collected_metric_ut = state->sensors.now_ut; + } + t.last_collected_state_ut = state->sensors.now_ut; - if ((sensor_type = ipmi_monitoring_sensor_read_sensor_type (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + dictionary_set(state->sensors.dict, key, &t, sizeof(t)); +} - if (!(sensor_name = ipmi_monitoring_sensor_read_sensor_name (ctx))) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_name(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } +static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *state, uint32_t events) { + state->sel.events = events; +} - if ((sensor_state = ipmi_monitoring_sensor_read_sensor_state (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_state(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } +int netdata_ipmi_collect_data(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *state) { + errno = 0; - if ((sensor_units = ipmi_monitoring_sensor_read_sensor_units (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_units(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if(type & IPMI_COLLECT_TYPE_SENSORS) { + state->sensors.collected = 0; + state->sensors.now_ut = now_monotonic_usec(); -#ifdef NETDATA_COMMENTED - if ((sensor_bitmask_type = ipmi_monitoring_sensor_read_sensor_bitmask_type (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_bitmask_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - if ((sensor_bitmask = ipmi_monitoring_sensor_read_sensor_bitmask (ctx)) < 0) - { - collector_error("ipmi_monitoring_sensor_read_sensor_bitmask(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if (netdata_read_ipmi_sensors(ipmi_config, state) < 0) return -1; + } - /* it's ok for this to be NULL, i.e. sensor_bitmask == - * IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN - */ - sensor_bitmask_strings = ipmi_monitoring_sensor_read_sensor_bitmask_strings (ctx); - - - -#endif // NETDATA_COMMENTED + if(type & IPMI_COLLECT_TYPE_SEL) { + state->sel.events = 0; + state->sel.now_ut = now_monotonic_usec(); + if(netdata_get_ipmi_sel_events_count(ipmi_config, state) < 0) return -2; + } - if ((sensor_reading_type = ipmi_monitoring_sensor_read_sensor_reading_type (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_reading_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + return 0; +} - sensor_reading = ipmi_monitoring_sensor_read_sensor_reading (ctx); +int netdata_ipmi_detect_speed_secs(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *state) { + int i, checks = SPEED_TEST_ITERATIONS, successful = 0; + usec_t total = 0; -#ifdef NETDATA_COMMENTED - if ((event_reading_type_code = ipmi_monitoring_sensor_read_event_reading_type_code (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_event_reading_type_code(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } -#endif // NETDATA_COMMENTED + for(i = 0 ; i < checks ; i++) { + if(unlikely(state->debug)) + fprintf(stderr, "%s: checking %s data collection speed iteration %d of %d\n", + program_name, netdata_collect_type_to_string(type), i + 1, checks); - netdata_get_sensor( - record_id - , sensor_number - , sensor_type - , sensor_state - , sensor_units - , sensor_reading_type - , sensor_name - , sensor_reading - ); + // measure the time a data collection needs + usec_t start = now_realtime_usec(); -#ifdef NETDATA_COMMENTED - if (!strlen (sensor_name)) - sensor_name = "N/A"; - - sensor_type_str = _get_sensor_type_string (sensor_type); - - printf ("%d, %s, %d, %s", - record_id, - sensor_name, - sensor_number, - sensor_type_str); - - if (sensor_state == IPMI_MONITORING_STATE_NOMINAL) - sensor_state_str = "Nominal"; - else if (sensor_state == IPMI_MONITORING_STATE_WARNING) - sensor_state_str = "Warning"; - else if (sensor_state == IPMI_MONITORING_STATE_CRITICAL) - sensor_state_str = "Critical"; - else - sensor_state_str = "N/A"; + if(netdata_ipmi_collect_data(ipmi_config, type, state) < 0) + continue; - printf (", %s", sensor_state_str); + usec_t end = now_realtime_usec(); - if (sensor_reading) - { - const char *sensor_units_str; - - if (sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL) - printf (", %s", - (*((uint8_t *)sensor_reading) ? "true" : "false")); - else if (sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32) - printf (", %u", - *((uint32_t *)sensor_reading)); - else if (sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE) - printf (", %.2f", - *((double *)sensor_reading)); - else - printf (", N/A"); - - if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_CELSIUS) - sensor_units_str = "C"; - else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT) - sensor_units_str = "F"; - else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_VOLTS) - sensor_units_str = "V"; - else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_AMPS) - sensor_units_str = "A"; - else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_RPM) - sensor_units_str = "RPM"; - else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_WATTS) - sensor_units_str = "W"; - else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_PERCENT) - sensor_units_str = "%"; - else - sensor_units_str = "N/A"; - - printf (", %s", sensor_units_str); - } - else - printf (", N/A, N/A"); + successful++; - printf (", %Xh", event_reading_type_code); + if(unlikely(state->debug)) + fprintf(stderr, "%s: %s data collection speed was %llu usec\n", + program_name, netdata_collect_type_to_string(type), end - start); - /* It is possible you may want to monitor specific event - * conditions that may occur. If that is the case, you may want - * to check out what specific bitmask type and bitmask events - * occurred. See ipmi_monitoring_bitmasks.h for a list of - * bitmasks and types. - */ + // add it to our total + total += end - start; - if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN) - printf (", %Xh", sensor_bitmask); - else - printf (", N/A"); + // wait the same time + // to avoid flooding the IPMI processor with requests + sleep_usec(end - start); + } - if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN - && sensor_bitmask_strings) - { - unsigned int i = 0; + if(!successful) + return 0; - printf (","); + // so, we assume it needed 2x the time + // we find the average in microseconds + // and we round-up to the closest second - while (sensor_bitmask_strings[i]) - { - printf (" "); + return (int)(( total * 2 / successful / USEC_PER_SEC ) + 1); +} - printf ("'%s'", - sensor_bitmask_strings[i]); +// ---------------------------------------------------------------------------- +// data collection threads - i++; - } - } - else - printf (", N/A"); +struct ipmi_collection_thread { + struct ipmi_monitoring_ipmi_config ipmi_config; + int freq_s; + bool debug; + IPMI_COLLECTION_TYPE type; + SPINLOCK spinlock; + struct netdata_ipmi_state state; +}; - printf ("\n"); -#endif // NETDATA_COMMENTED - } +void *netdata_ipmi_collection_thread(void *ptr) { + struct ipmi_collection_thread *t = ptr; - rv = 0; - cleanup: - if (ctx) - ipmi_monitoring_ctx_destroy (ctx); - return (rv); -} + if(t->debug) fprintf(stderr, "%s: calling initialize_ipmi_config() for %s\n", + program_name, netdata_collect_type_to_string(t->type)); + initialize_ipmi_config(&t->ipmi_config); -static int -_ipmimonitoring_sel (struct ipmi_monitoring_ipmi_config *ipmi_config) -{ - ipmi_monitoring_ctx_t ctx = NULL; - unsigned int sel_flags = 0; - int i; - int sel_count; - int rv = -1; + if(t->debug) fprintf(stderr, "%s: detecting IPMI minimum update frequency for %s...\n", + program_name, netdata_collect_type_to_string(t->type)); - if (!(ctx = ipmi_monitoring_ctx_create ())) - { - collector_error("ipmi_monitoring_ctx_create()"); - goto cleanup; - } - - if (sdr_cache_directory) - { - if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, - sdr_cache_directory) < 0) - { - collector_error( "ipmi_monitoring_ctx_sdr_cache_directory(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; + int freq_s = netdata_ipmi_detect_speed_secs(&t->ipmi_config, t->type, &t->state); + if(!freq_s) { + if(t->type & IPMI_COLLECT_TYPE_SENSORS) { + t->state.sensors.status = ICS_INIT_FAILED; + t->state.sensors.last_iteration_ut = 0; } - } - /* Must call otherwise only default interpretations ever used */ - if (sel_config_file) - { - if (ipmi_monitoring_ctx_sel_config_file (ctx, - sel_config_file) < 0) - { - collector_error( "ipmi_monitoring_ctx_sel_config_file(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } - else - { - if (ipmi_monitoring_ctx_sel_config_file (ctx, NULL) < 0) - { - collector_error( "ipmi_monitoring_ctx_sel_config_file(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; + if(t->type & IPMI_COLLECT_TYPE_SEL) { + t->state.sel.status = ICS_INIT_FAILED; + t->state.sel.last_iteration_ut = 0; } - } - if (reread_sdr_cache) - sel_flags |= IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE; - - if (interpret_oem_data) - sel_flags |= IPMI_MONITORING_SEL_FLAGS_INTERPRET_OEM_DATA; - - if (assume_system_event_record) - sel_flags |= IPMI_MONITORING_SEL_FLAGS_ASSUME_SYSTEM_EVENT_RECORD; - -#ifdef IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES - if (entity_sensor_names) - sel_flags |= IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES; -#endif // IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES - - if (record_ids_length) - { - if ((sel_count = ipmi_monitoring_sel_by_record_id (ctx, - hostname, - ipmi_config, - sel_flags, - record_ids, - record_ids_length, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sel_by_record_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } - else if (sensor_types_length) - { - if ((sel_count = ipmi_monitoring_sel_by_sensor_type (ctx, - hostname, - ipmi_config, - sel_flags, - sensor_types, - sensor_types_length, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sel_by_sensor_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } - else if (date_begin - || date_end) - { - if ((sel_count = ipmi_monitoring_sel_by_date_range (ctx, - hostname, - ipmi_config, - sel_flags, - date_begin, - date_end, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sel_by_sensor_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + return ptr; } - else - { - if ((sel_count = ipmi_monitoring_sel_by_record_id (ctx, - hostname, - ipmi_config, - sel_flags, - NULL, - 0, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sel_by_record_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } - -#ifdef NETDATA_COMMENTED - printf ("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n", - "Record ID", - "Record Type", - "SEL State", - "Timestamp", - "Sensor Name", - "Sensor Type", - "Event Direction", - "Event Type Code", - "Event Data", - "Event Offset", - "Event Offset String"); -#endif // NETDATA_COMMENTED - - for (i = 0; i < sel_count; i++, ipmi_monitoring_sel_iterator_next (ctx)) - { - int record_id, record_type, sel_state, record_type_class; -#ifdef NETDATA_COMMENTED - int sensor_type, sensor_number, event_direction, - event_offset_type, event_offset, event_type_code, manufacturer_id; - unsigned int timestamp, event_data1, event_data2, event_data3; - char *event_offset_string = NULL; - const char *sensor_type_str; - const char *event_direction_str; - const char *sel_state_str; - char *sensor_name = NULL; - unsigned char oem_data[64]; - int oem_data_len; - unsigned int j; -#endif // NETDATA_COMMENTED - - if ((record_id = ipmi_monitoring_sel_read_record_id (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_record_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; + else { + if(t->type & IPMI_COLLECT_TYPE_SENSORS) { + t->state.sensors.status = ICS_RUNNING; } - if ((record_type = ipmi_monitoring_sel_read_record_type (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_record_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; + if(t->type & IPMI_COLLECT_TYPE_SEL) { + t->state.sel.status = ICS_RUNNING; } + } - if ((record_type_class = ipmi_monitoring_sel_read_record_type_class (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_record_type_class(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + t->freq_s = freq_s = MAX(t->freq_s, freq_s); - if ((sel_state = ipmi_monitoring_sel_read_sel_state (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_sel_state(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if(t->debug) { + fprintf(stderr, "%s: IPMI minimum update frequency of %s was calculated to %d seconds.\n", + program_name, netdata_collect_type_to_string(t->type), t->freq_s); - netdata_get_sel( - record_id - , record_type_class - , sel_state - ); + fprintf(stderr, "%s: starting data collection of %s\n", + program_name, netdata_collect_type_to_string(t->type)); + } -#ifdef NETDATA_COMMENTED - if (sel_state == IPMI_MONITORING_STATE_NOMINAL) - sel_state_str = "Nominal"; - else if (sel_state == IPMI_MONITORING_STATE_WARNING) - sel_state_str = "Warning"; - else if (sel_state == IPMI_MONITORING_STATE_CRITICAL) - sel_state_str = "Critical"; - else - sel_state_str = "N/A"; + size_t iteration = 0, failures = 0; + usec_t step = t->freq_s * USEC_PER_SEC; - printf ("%d, %d, %s", - record_id, - record_type, - sel_state_str); + heartbeat_t hb; + heartbeat_init(&hb); + while(++iteration) { + heartbeat_next(&hb, step); - if (record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_SYSTEM_EVENT_RECORD - || record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_TIMESTAMPED_OEM_RECORD) - { + if(t->debug) + fprintf(stderr, "%s: calling netdata_ipmi_collect_data() for %s\n", + program_name, netdata_collect_type_to_string(t->type)); - if (ipmi_monitoring_sel_read_timestamp (ctx, ×tamp) < 0) - { - collector_error( "ipmi_monitoring_sel_read_timestamp(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + struct netdata_ipmi_state tmp_state = t->state; - /* XXX: This should be converted to a nice date output using - * your favorite timestamp -> string conversion functions. - */ - printf (", %u", timestamp); + if(t->type & IPMI_COLLECT_TYPE_SENSORS) { + tmp_state.sensors.last_iteration_ut = now_monotonic_usec(); + tmp_state.sensors.freq_ut = t->freq_s * USEC_PER_SEC; } - else - printf (", N/A"); - - if (record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_SYSTEM_EVENT_RECORD) - { - /* If you are integrating ipmimonitoring SEL into a monitoring application, - * you may wish to count the number of times a specific error occurred - * and report that to the monitoring application. - * - * In this particular case, you'll probably want to check out - * what sensor type each SEL event is reporting, the - * event offset type, and the specific event offset that occurred. - * - * See ipmi_monitoring_offsets.h for a list of event offsets - * and types. - */ - - if (!(sensor_name = ipmi_monitoring_sel_read_sensor_name (ctx))) - { - collector_error( "ipmi_monitoring_sel_read_sensor_name(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - - if ((sensor_type = ipmi_monitoring_sel_read_sensor_type (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_sensor_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - - if ((sensor_number = ipmi_monitoring_sel_read_sensor_number (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_sensor_number(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - - if ((event_direction = ipmi_monitoring_sel_read_event_direction (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_event_direction(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - - if ((event_type_code = ipmi_monitoring_sel_read_event_type_code (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_event_type_code(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - if (ipmi_monitoring_sel_read_event_data (ctx, - &event_data1, - &event_data2, - &event_data3) < 0) - { - collector_error( "ipmi_monitoring_sel_read_event_data(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if(t->type & IPMI_COLLECT_TYPE_SEL) { + tmp_state.sel.last_iteration_ut = now_monotonic_usec(); + tmp_state.sel.freq_ut = t->freq_s * USEC_PER_SEC; + } - if ((event_offset_type = ipmi_monitoring_sel_read_event_offset_type (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_event_offset_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if(netdata_ipmi_collect_data(&t->ipmi_config, t->type, &tmp_state) != 0) + failures++; + else + failures = 0; - if ((event_offset = ipmi_monitoring_sel_read_event_offset (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_event_offset(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if(failures > 10) { + collector_error("%s() failed to collect %s data for %zu consecutive times, having made %zu iterations.", + __FUNCTION__, netdata_collect_type_to_string(t->type), failures, iteration); - if (!(event_offset_string = ipmi_monitoring_sel_read_event_offset_string (ctx))) - { - collector_error( "ipmi_monitoring_sel_read_event_offset_string(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; + if(t->type & IPMI_COLLECT_TYPE_SENSORS) { + t->state.sensors.status = ICS_FAILED; + t->state.sensors.last_iteration_ut = 0; } - if (!strlen (sensor_name)) - sensor_name = "N/A"; - - sensor_type_str = _get_sensor_type_string (sensor_type); - - if (event_direction == IPMI_MONITORING_SEL_EVENT_DIRECTION_ASSERTION) - event_direction_str = "Assertion"; - else - event_direction_str = "Deassertion"; - - printf (", %s, %s, %d, %s, %Xh, %Xh-%Xh-%Xh", - sensor_name, - sensor_type_str, - sensor_number, - event_direction_str, - event_type_code, - event_data1, - event_data2, - event_data3); - - if (event_offset_type != IPMI_MONITORING_EVENT_OFFSET_TYPE_UNKNOWN) - printf (", %Xh", event_offset); - else - printf (", N/A"); - - if (event_offset_type != IPMI_MONITORING_EVENT_OFFSET_TYPE_UNKNOWN) - printf (", %s", event_offset_string); - else - printf (", N/A"); - } - else if (record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_TIMESTAMPED_OEM_RECORD - || record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_NON_TIMESTAMPED_OEM_RECORD) - { - if (record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_TIMESTAMPED_OEM_RECORD) - { - if ((manufacturer_id = ipmi_monitoring_sel_read_manufacturer_id (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_manufacturer_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - - printf (", Manufacturer ID = %Xh", manufacturer_id); - } - - if ((oem_data_len = ipmi_monitoring_sel_read_oem_data (ctx, oem_data, 1024)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_oem_data(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; + if(t->type & IPMI_COLLECT_TYPE_SEL) { + t->state.sel.status = ICS_FAILED; + t->state.sel.last_iteration_ut = 0; } - printf (", OEM Data = "); - - for (j = 0; j < oem_data_len; j++) - printf ("%02Xh ", oem_data[j]); + break; } - else - printf (", N/A, N/A, N/A, N/A, N/A, N/A, N/A"); - printf ("\n"); -#endif // NETDATA_COMMENTED + spinlock_lock(&t->spinlock); + t->state = tmp_state; + spinlock_unlock(&t->spinlock); } - rv = 0; - cleanup: - if (ctx) - ipmi_monitoring_ctx_destroy (ctx); - return (rv); + return ptr; } // ---------------------------------------------------------------------------- -// MAIN PROGRAM FOR NETDATA PLUGIN - -int ipmi_collect_data(struct ipmi_monitoring_ipmi_config *ipmi_config) { - errno = 0; +// sending data to netdata - if (_ipmimonitoring_sensors(ipmi_config) < 0) return -1; +static inline bool is_sensor_updated(usec_t last_collected_ut, usec_t now_ut, usec_t freq) { + return (now_ut - last_collected_ut < freq * 2) ? true : false; +} - if(netdata_do_sel) { - if(_ipmimonitoring_sel(ipmi_config) < 0) return -2; +static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *state) { + if(state->sensors.status != ICS_RUNNING) { + if(unlikely(state->debug)) + fprintf(stderr, "%s: %s() sensors state is not RUNNING\n", + program_name, __FUNCTION__ ); + return 0; } - return 0; -} + size_t total_sensors_sent = 0; + int update_every = (int)(state->sensors.freq_ut / USEC_PER_SEC); + struct sensor *sn; -int ipmi_detect_speed_secs(struct ipmi_monitoring_ipmi_config *ipmi_config) { - int i, checks = 10; - unsigned long long total = 0; + // generate the CHART/DIMENSION lines, if we have to + dfe_start_reentrant(state->sensors.dict, sn) { + if(unlikely(!sn->do_metric && !sn->do_state)) + continue; - for(i = 0 ; i < checks ; i++) { - if(debug) fprintf(stderr, "freeipmi.plugin: checking data collection speed iteration %d of %d\n", i+1, checks); + bool did_metric = false, did_state = false; - // measure the time a data collection needs - unsigned long long start = now_realtime_usec(); - if(ipmi_collect_data(ipmi_config) < 0) - fatal("freeipmi.plugin: data collection failed."); + if(likely(sn->do_metric)) { + if(unlikely(!is_sensor_updated(sn->last_collected_metric_ut, state->updates.now_ut, state->sensors.freq_ut))) { + if(unlikely(state->debug)) + fprintf(stderr, "%s: %s() sensor '%s' metric is not UPDATED (last updated %llu, now %llu, freq %llu\n", + program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_metric_ut, state->updates.now_ut, state->sensors.freq_ut); + } + else { + if (unlikely(!sn->metric_chart_sent)) { + sn->metric_chart_sent = true; - unsigned long long end = now_realtime_usec(); + printf("CHART '%s_%s' '' '%s' '%s' '%s' '%s' '%s' %d %d '' '%s' '%s'\n", + sn->context, sn_dfe.name, sn->title, sn->units, sn->family, sn->context, + sn->chart_type, sn->priority + 1, update_every, program_name, "sensors"); - if(debug) fprintf(stderr, "freeipmi.plugin: data collection speed was %llu usec\n", end - start); + printf("CLABEL 'sensor' '%s' 1\n", sn->sensor_name); + printf("CLABEL 'type' '%s' 1\n", sn->type); + printf("CLABEL 'component' '%s' 1\n", sn->component); + printf("CLABEL_COMMIT\n"); - // add it to our total - total += end - start; + printf("DIMENSION '%s' '' absolute 1 %d\n", sn->dimension, sn->multiplier); + } - // wait the same time - // to avoid flooding the IPMI processor with requests - sleep_usec(end - start); - } + printf("BEGIN '%s_%s'\n", sn->context, sn_dfe.name); + + switch (sn->sensor_reading_type) { + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: + printf("SET '%s' = %u\n", sn->dimension, sn->sensor_reading.uint32_value + ); + break; + + case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: + printf("SET '%s' = %lld\n", sn->dimension, + (long long int) (sn->sensor_reading.double_value * sn->multiplier) + ); + break; + + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: + printf("SET '%s' = %u\n", sn->dimension, sn->sensor_reading.bool_value + ); + break; + + default: + case IPMI_MONITORING_SENSOR_READING_TYPE_UNKNOWN: + // this should never happen because we also do the same check at netdata_get_sensor() + sn->do_metric = false; + break; + } - // so, we assume it needed 2x the time - // we find the average in microseconds - // and we round-up to the closest second + printf("END\n"); + did_metric = true; + } + } - return (int)(( total * 2 / checks / 1000000 ) + 1); -} + if(likely(sn->do_state)) { + if(unlikely(!is_sensor_updated(sn->last_collected_state_ut, state->updates.now_ut, state->sensors.freq_ut))) { + if (unlikely(state->debug)) + fprintf(stderr, "%s: %s() sensor '%s' state is not UPDATED (last updated %llu, now %llu, freq %llu\n", + program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_state_ut, state->updates.now_ut, state->sensors.freq_ut); + } + else { + if (unlikely(!sn->state_chart_sent)) { + sn->state_chart_sent = true; + + printf("CHART 'ipmi.sensor_state_%s' '' 'IPMI Sensor State' 'state' 'states' 'ipmi.sensor_state' 'line' %d %d '' '%s' '%s'\n", + sn_dfe.name, sn->priority, update_every, program_name, "sensors"); + + printf("CLABEL 'sensor' '%s' 1\n", sn->sensor_name); + printf("CLABEL 'type' '%s' 1\n", sn->type); + printf("CLABEL 'component' '%s' 1\n", sn->component); + printf("CLABEL_COMMIT\n"); + + printf("DIMENSION 'nominal' '' absolute 1 1\n"); + printf("DIMENSION 'warning' '' absolute 1 1\n"); + printf("DIMENSION 'critical' '' absolute 1 1\n"); + printf("DIMENSION 'unknown' '' absolute 1 1\n"); + } -int parse_inband_driver_type (const char *str) -{ - fatal_assert(str); + printf("BEGIN 'ipmi.sensor_state_%s'\n", sn_dfe.name); + printf("SET 'nominal' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_NOMINAL ? 1LL : 0LL); + printf("SET 'warning' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_WARNING ? 1LL : 0LL); + printf("SET 'critical' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_CRITICAL ? 1LL : 0LL); + printf("SET 'unknown' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_UNKNOWN ? 1LL : 0LL); + printf("END\n"); + did_state = true; + } + } - if (strcasecmp (str, IPMI_PARSE_DEVICE_KCS_STR) == 0) - return (IPMI_MONITORING_DRIVER_TYPE_KCS); - else if (strcasecmp (str, IPMI_PARSE_DEVICE_SSIF_STR) == 0) - return (IPMI_MONITORING_DRIVER_TYPE_SSIF); - /* support "open" for those that might be used to - * ipmitool. - */ - else if (strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR) == 0 - || strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR2) == 0) - return (IPMI_MONITORING_DRIVER_TYPE_OPENIPMI); - /* support "bmc" for those that might be used to - * ipmitool. - */ - else if (strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR) == 0 - || strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR2) == 0) - return (IPMI_MONITORING_DRIVER_TYPE_SUNBMC); + if(likely(did_metric || did_state)) + total_sensors_sent++; + } + dfe_done(sn); - return (-1); + return total_sensors_sent; } -int parse_outofband_driver_type (const char *str) -{ - fatal_assert(str); +static size_t send_ipmi_sel_metrics_to_netdata(struct netdata_ipmi_state *state) { + static bool sel_chart_generated = false; + + if(likely(state->sel.status == ICS_RUNNING)) { + if(unlikely(!sel_chart_generated)) { + sel_chart_generated = true; + printf("CHART ipmi.events '' 'IPMI Events' 'events' 'events' ipmi.sel area %d %d '' '%s' '%s'\n" + , state->sel.priority + 2 + , (int)(state->sel.freq_ut / USEC_PER_SEC) + , program_name + , "sel" + ); + printf("DIMENSION events '' absolute 1 1\n"); + } - if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_STR) == 0) - return (IPMI_MONITORING_PROTOCOL_VERSION_1_5); - /* support "lanplus" for those that might be used to ipmitool. - * support typo variants to ease. - */ - else if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR) == 0 - || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR2) == 0 - || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR3) == 0 - || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR4) == 0 - || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR5) == 0) - return (IPMI_MONITORING_PROTOCOL_VERSION_2_0); + printf( + "BEGIN ipmi.events\n" + "SET events = %zu\n" + "END\n" + , state->sel.events + ); + } - return (-1); + return state->sel.events; } -int host_is_local(const char *host) -{ - if (host && (!strcmp(host, "localhost") || !strcmp(host, "127.0.0.1") || !strcmp(host, "::1"))) - return (1); - - return (0); -} +// ---------------------------------------------------------------------------- +// main, command line arguments parsing int main (int argc, char **argv) { + bool netdata_do_sel = IPMI_ENABLE_SEL_BY_DEFAULT; + stderror = stderr; clocks_init(); + int update_every = IPMI_SENSORS_MIN_UPDATE_EVERY; // this is the minimum update frequency + int update_every_sel = IPMI_SEL_MIN_UPDATE_EVERY; // this is the minimum update frequency for SEL events + bool debug = false; + // ------------------------------------------------------------------------ // initialization of netdata plugin @@ -1610,40 +1450,85 @@ int main (int argc, char **argv) { error_log_errors_per_period = 100; error_log_throttle_period = 3600; - // ------------------------------------------------------------------------ // parse command line parameters - int i, freq = 0; + int i, freq_s = 0; for(i = 1; i < argc ; i++) { - if(isdigit(*argv[i]) && !freq) { + if(isdigit(*argv[i]) && !freq_s) { int n = str2i(argv[i]); if(n > 0 && n < 86400) { - freq = n; + freq_s = n; continue; } } else if(strcmp("version", argv[i]) == 0 || strcmp("-version", argv[i]) == 0 || strcmp("--version", argv[i]) == 0 || strcmp("-v", argv[i]) == 0 || strcmp("-V", argv[i]) == 0) { - printf("freeipmi.plugin %s\n", VERSION); + printf("%s %s\n", program_name, VERSION); exit(0); } else if(strcmp("debug", argv[i]) == 0) { - debug = 1; + debug = true; continue; } else if(strcmp("sel", argv[i]) == 0) { - netdata_do_sel = 1; + netdata_do_sel = true; continue; } else if(strcmp("no-sel", argv[i]) == 0) { - netdata_do_sel = 0; + netdata_do_sel = false; continue; } + else if(strcmp("reread-sdr-cache", argv[i]) == 0) { + global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE; + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE; + remove_reread_sdr_after_first_use = false; + if (debug) fprintf(stderr, "%s: reread-sdr-cache enabled for both sensors and SEL\n", program_name); + } + else if(strcmp("interpret-oem-data", argv[i]) == 0) { + global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_INTERPRET_OEM_DATA; + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_INTERPRET_OEM_DATA; + if (debug) fprintf(stderr, "%s: interpret-oem-data enabled for both sensors and SEL\n", program_name); + } + else if(strcmp("assume-system-event-record", argv[i]) == 0) { + global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_ASSUME_SYSTEM_EVENT_RECORD; + if (debug) fprintf(stderr, "%s: assume-system-event-record enabled\n", program_name); + } + else if(strcmp("ignore-non-interpretable-sensors", argv[i]) == 0) { + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_NON_INTERPRETABLE_SENSORS; + if (debug) fprintf(stderr, "%s: ignore-non-interpretable-sensors enabled\n", program_name); + } + else if(strcmp("bridge-sensors", argv[i]) == 0) { + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_BRIDGE_SENSORS; + if (debug) fprintf(stderr, "%s: bridge-sensors enabled\n", program_name); + } + else if(strcmp("shared-sensors", argv[i]) == 0) { + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_SHARED_SENSORS; + if (debug) fprintf(stderr, "%s: shared-sensors enabled\n", program_name); + } + else if(strcmp("no-discrete-reading", argv[i]) == 0) { + global_sensor_reading_flags &= ~(IPMI_MONITORING_SENSOR_READING_FLAGS_DISCRETE_READING); + if (debug) fprintf(stderr, "%s: discrete-reading disabled\n", program_name); + } + else if(strcmp("ignore-scanning-disabled", argv[i]) == 0) { + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_SCANNING_DISABLED; + if (debug) fprintf(stderr, "%s: ignore-scanning-disabled enabled\n", program_name); + } + else if(strcmp("assume-bmc-owner", argv[i]) == 0) { + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ASSUME_BMC_OWNER; + if (debug) fprintf(stderr, "%s: assume-bmc-owner enabled\n", program_name); + } +#if defined(IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES) && defined(IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES) + else if(strcmp("entity-sensor-names", argv[i]) == 0) { + global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES; + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES; + if (debug) fprintf(stderr, "%s: entity-sensor-names enabled for both sensors and SEL\n", program_name); + } +#endif else if(strcmp("-h", argv[i]) == 0 || strcmp("--help", argv[i]) == 0) { fprintf(stderr, "\n" - " netdata freeipmi.plugin %s\n" - " Copyright (C) 2016-2017 Costa Tsaousis <costa@tsaousis.gr>\n" + " netdata %s %s\n" + " Copyright (C) 2023 Netdata Inc.\n" " Released under GNU General Public License v3 or later.\n" " All rights reserved.\n" "\n" @@ -1661,16 +1546,53 @@ int main (int argc, char **argv) { " no-sel enable/disable SEL collection\n" " default: %s\n" "\n" + " reread-sdr-cache re-read SDR cache on every iteration\n" + " default: disabled\n" + "\n" + " interpret-oem-data attempt to parse OEM data\n" + " default: disabled\n" + "\n" + " assume-system-event-record \n" + " tread illegal SEL events records as normal\n" + " default: disabled\n" + "\n" + " ignore-non-interpretable-sensors \n" + " do not read sensors that cannot be interpreted\n" + " default: disabled\n" + "\n" + " bridge-sensors bridge sensors not owned by the BMC\n" + " default: disabled\n" + "\n" + " shared-sensors enable shared sensors, if found\n" + " default: disabled\n" + "\n" + " no-discrete-reading do not read sensors that their event/reading type code is invalid\n" + " default: enabled\n" + "\n" + " ignore-scanning-disabled \n" + " Ignore the scanning bit and read sensors no matter what\n" + " default: disabled\n" + "\n" + " assume-bmc-owner assume the BMC is the sensor owner no matter what\n" + " (usually bridging is required too)\n" + " default: disabled\n" + "\n" +#if defined(IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES) && defined(IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES) + " entity-sensor-names sensor names prefixed with entity id and instance\n" + " default: disabled\n" + "\n" +#endif " hostname HOST\n" " username USER\n" " password PASS connect to remote IPMI host\n" " default: local IPMI processor\n" "\n" + " no-auth-code-check\n" " noauthcodecheck don't check the authentication codes returned\n" "\n" " driver-type IPMIDRIVER\n" " Specify the driver type to use instead of doing an auto selection. \n" - " The currently available outofband drivers are LAN and LAN_2_0,\n" + " The currently available outofband drivers are LAN and LAN_2_0,\n" " which perform IPMI 1.5 and IPMI 2.0 respectively. \n" " The currently available inband drivers are KCS, SSIF, OPENIPMI and SUNBMC.\n" "\n" @@ -1680,6 +1602,9 @@ int main (int argc, char **argv) { " sensor-config-file FILE filename to read sensor configuration\n" " default: %s\n" "\n" + " sel-config-file FILE filename to read sel configuration\n" + " default: %s\n" + "\n" " ignore N1,N2,N3,... sensor IDs to ignore\n" " default: none\n" "\n" @@ -1700,11 +1625,12 @@ int main (int argc, char **argv) { " For more information:\n" " https://github.com/netdata/netdata/tree/master/collectors/freeipmi.plugin\n" "\n" - , VERSION - , netdata_update_every + , program_name, VERSION + , update_every , netdata_do_sel?"enabled":"disabled" , sdr_cache_directory?sdr_cache_directory:"system default" , sensor_config_file?sensor_config_file:"system default" + , sel_config_file?sel_config_file:"system default" ); exit(1); } @@ -1713,7 +1639,7 @@ int main (int argc, char **argv) { char *s = argv[i]; // mask it be hidden from the process tree while(*s) *s++ = 'x'; - if(debug) fprintf(stderr, "freeipmi.plugin: hostname set to '%s'\n", hostname); + if(debug) fprintf(stderr, "%s: hostname set to '%s'\n", program_name, hostname); continue; } else if(i < argc && strcmp("username", argv[i]) == 0) { @@ -1721,7 +1647,7 @@ int main (int argc, char **argv) { char *s = argv[i]; // mask it be hidden from the process tree while(*s) *s++ = 'x'; - if(debug) fprintf(stderr, "freeipmi.plugin: username set to '%s'\n", username); + if(debug) fprintf(stderr, "%s: username set to '%s'\n", program_name, username); continue; } else if(i < argc && strcmp("password", argv[i]) == 0) { @@ -1729,149 +1655,247 @@ int main (int argc, char **argv) { char *s = argv[i]; // mask it be hidden from the process tree while(*s) *s++ = 'x'; - if(debug) fprintf(stderr, "freeipmi.plugin: password set to '%s'\n", password); + if(debug) fprintf(stderr, "%s: password set to '%s'\n", program_name, password); continue; } else if(strcmp("driver-type", argv[i]) == 0) { if (hostname) { - protocol_version=parse_outofband_driver_type(argv[++i]); - if(debug) fprintf(stderr, "freeipmi.plugin: outband protocol version set to '%d'\n", protocol_version); + protocol_version = netdata_parse_outofband_driver_type(argv[++i]); + if(debug) fprintf(stderr, "%s: outband protocol version set to '%d'\n", + program_name, protocol_version); } else { - driver_type=parse_inband_driver_type(argv[++i]); - if(debug) fprintf(stderr, "freeipmi.plugin: inband driver type set to '%d'\n", driver_type); + driver_type = netdata_parse_inband_driver_type(argv[++i]); + if(debug) fprintf(stderr, "%s: inband driver type set to '%d'\n", + program_name, driver_type); } continue; - } else if (i < argc && strcmp("noauthcodecheck", argv[i]) == 0) { - if (!hostname || host_is_local(hostname)) { + } else if (i < argc && (strcmp("noauthcodecheck", argv[i]) == 0 || strcmp("no-auth-code-check", argv[i]) == 0)) { + if (!hostname || netdata_host_is_localhost(hostname)) { if (debug) - fprintf( - stderr, - "freeipmi.plugin: noauthcodecheck workaround flag is ignored for inband configuration\n"); - } else if (protocol_version < 0 || protocol_version == IPMI_MONITORING_PROTOCOL_VERSION_1_5) { + fprintf(stderr, "%s: noauthcodecheck workaround flag is ignored for inband configuration\n", + program_name); + + } + else if (protocol_version < 0 || protocol_version == IPMI_MONITORING_PROTOCOL_VERSION_1_5) { workaround_flags |= IPMI_MONITORING_WORKAROUND_FLAGS_PROTOCOL_VERSION_1_5_NO_AUTH_CODE_CHECK; + if (debug) - fprintf(stderr, "freeipmi.plugin: noauthcodecheck workaround flag enabled\n"); - } else { + fprintf(stderr, "%s: noauthcodecheck workaround flag enabled\n", program_name); + } + else { if (debug) - fprintf( - stderr, - "freeipmi.plugin: noauthcodecheck workaround flag is ignored for protocol version 2.0\n"); + fprintf(stderr, "%s: noauthcodecheck workaround flag is ignored for protocol version 2.0\n", + program_name); } continue; } else if(i < argc && strcmp("sdr-cache-dir", argv[i]) == 0) { sdr_cache_directory = argv[++i]; - if(debug) fprintf(stderr, "freeipmi.plugin: SDR cache directory set to '%s'\n", sdr_cache_directory); + + if(debug) + fprintf(stderr, "%s: SDR cache directory set to '%s'\n", program_name, sdr_cache_directory); + continue; } else if(i < argc && strcmp("sensor-config-file", argv[i]) == 0) { sensor_config_file = argv[++i]; - if(debug) fprintf(stderr, "freeipmi.plugin: sensor config file set to '%s'\n", sensor_config_file); + if(debug) fprintf(stderr, "%s: sensor config file set to '%s'\n", program_name, sensor_config_file); + continue; + } + else if(i < argc && strcmp("sel-config-file", argv[i]) == 0) { + sel_config_file = argv[++i]; + if(debug) fprintf(stderr, "%s: sel config file set to '%s'\n", program_name, sel_config_file); continue; } else if(i < argc && strcmp("ignore", argv[i]) == 0) { - excluded_record_ids_parse(argv[++i]); + excluded_record_ids_parse(argv[++i], debug); continue; } else if(i < argc && strcmp("ignore-status", argv[i]) == 0) { - excluded_status_record_ids_parse(argv[++i]); + excluded_status_record_ids_parse(argv[++i], debug); continue; } - collector_error("freeipmi.plugin: ignoring parameter '%s'", argv[i]); + collector_error("%s(): ignoring parameter '%s'", __FUNCTION__, argv[i]); } errno = 0; - if(freq >= netdata_update_every) - netdata_update_every = freq; - - else if(freq) - collector_error("update frequency %d seconds is too small for IPMI. Using %d.", freq, netdata_update_every); + if(freq_s && freq_s < update_every) + collector_error("%s(): update frequency %d seconds is too small for IPMI. Using %d.", + __FUNCTION__, freq_s, update_every); + update_every = freq_s = MAX(freq_s, update_every); + update_every_sel = MAX(update_every, update_every_sel); // ------------------------------------------------------------------------ // initialize IPMI - struct ipmi_monitoring_ipmi_config ipmi_config; - - if(debug) fprintf(stderr, "freeipmi.plugin: calling _init_ipmi_config()\n"); - - _init_ipmi_config(&ipmi_config); - if(debug) { - fprintf(stderr, "freeipmi.plugin: calling ipmi_monitoring_init()\n"); - ipmimonitoring_init_flags|=IPMI_MONITORING_FLAGS_DEBUG|IPMI_MONITORING_FLAGS_DEBUG_IPMI_PACKETS; + fprintf(stderr, "%s: calling ipmi_monitoring_init()\n", program_name); + ipmimonitoring_init_flags |= IPMI_MONITORING_FLAGS_DEBUG|IPMI_MONITORING_FLAGS_DEBUG_IPMI_PACKETS; } - if(ipmi_monitoring_init(ipmimonitoring_init_flags, &errnum) < 0) - fatal("ipmi_monitoring_init: %s", ipmi_monitoring_ctx_strerror(errnum)); - - if(debug) fprintf(stderr, "freeipmi.plugin: detecting IPMI minimum update frequency...\n"); - freq = ipmi_detect_speed_secs(&ipmi_config); - if(debug) fprintf(stderr, "freeipmi.plugin: IPMI minimum update frequency was calculated to %d seconds.\n", freq); - - if(freq > netdata_update_every) { - collector_info("enforcing minimum data collection frequency, calculated to %d seconds.", freq); - netdata_update_every = freq; - } + int rc; + if(ipmi_monitoring_init(ipmimonitoring_init_flags, &rc) < 0) + fatal("ipmi_monitoring_init: %s", ipmi_monitoring_ctx_strerror(rc)); + // ------------------------------------------------------------------------ + // create the data collection threads + + struct ipmi_collection_thread sensors_data = { + .type = IPMI_COLLECT_TYPE_SENSORS, + .freq_s = update_every, + .spinlock = NETDATA_SPINLOCK_INITIALIZER, + .debug = debug, + .state = { + .debug = debug, + .sensors = { + .status = ICS_INIT, + .last_iteration_ut = now_monotonic_usec(), + .freq_ut = update_every * USEC_PER_SEC, + .priority = IPMI_SENSORS_DASHBOARD_PRIORITY, + .dict = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE|DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct sensor)), + }, + }, + }, sel_data = { + .type = IPMI_COLLECT_TYPE_SEL, + .freq_s = update_every_sel, + .spinlock = NETDATA_SPINLOCK_INITIALIZER, + .debug = debug, + .state = { + .debug = debug, + .sel = { + .status = ICS_INIT, + .last_iteration_ut = now_monotonic_usec(), + .freq_ut = update_every_sel * USEC_PER_SEC, + .priority = IPMI_SEL_DASHBOARD_PRIORITY, + }, + }, + }; + + netdata_thread_t sensors_thread = 0, sel_thread = 0; + + netdata_thread_create(&sensors_thread, "IPMI[sensors]", NETDATA_THREAD_OPTION_DONT_LOG, netdata_ipmi_collection_thread, &sensors_data); + + if(netdata_do_sel) + netdata_thread_create(&sel_thread, "IPMI[sel]", NETDATA_THREAD_OPTION_DONT_LOG, netdata_ipmi_collection_thread, &sel_data); // ------------------------------------------------------------------------ // the main loop - if(debug) fprintf(stderr, "freeipmi.plugin: starting data collection\n"); + if(debug) fprintf(stderr, "%s: starting data collection\n", program_name); time_t started_t = now_monotonic_sec(); size_t iteration = 0; - usec_t step = netdata_update_every * USEC_PER_SEC; + usec_t step = 100 * USEC_PER_MS; + bool global_chart_created = false; + bool tty = isatty(fileno(stderr)) == 1; heartbeat_t hb; heartbeat_init(&hb); for(iteration = 0; 1 ; iteration++) { usec_t dt = heartbeat_next(&hb, step); - if (iteration) { - if (iteration == 1) { - fprintf( - stdout, - "CHART netdata.freeipmi_availability_status '' 'Plugin availability status' 'status' plugins netdata.plugin_availability_status line 146000 %d\n" - "DIMENSION available '' absolute 1 1\n", - netdata_update_every); + if(!tty) + fprintf(stdout, "\n"); // keepalive to avoid parser read timeout (2 minutes) during ipmi_detect_speed_secs() + + struct netdata_ipmi_state state = {0 }; + + spinlock_lock(&sensors_data.spinlock); + state.sensors = sensors_data.state.sensors; + spinlock_unlock(&sensors_data.spinlock); + + spinlock_lock(&sel_data.spinlock); + state.sel = sel_data.state.sel; + spinlock_unlock(&sel_data.spinlock); + + switch(state.sensors.status) { + case ICS_RUNNING: + step = update_every * USEC_PER_SEC; + if(state.sensors.last_iteration_ut < now_monotonic_usec() - IPMI_RESTART_IF_SENSORS_DONT_ITERATE_EVERY_SECONDS * USEC_PER_SEC) { + collector_error("%s(): sensors have not be collected for %zu seconds. Exiting to restart.", + __FUNCTION__, (size_t)((now_monotonic_usec() - state.sensors.last_iteration_ut) / USEC_PER_SEC)); + + fprintf(stdout, "EXIT\n"); + fflush(stdout); + exit(0); + } + break; + + case ICS_INIT: + continue; + + case ICS_INIT_FAILED: + collector_error("%s(): sensors failed to initialize. Calling DISABLE.", __FUNCTION__); + fprintf(stdout, "DISABLE\n"); + fflush(stdout); + exit(0); + + case ICS_FAILED: + collector_error("%s(): sensors fails repeatedly to collect metrics. Exiting to restart.", __FUNCTION__); + fprintf(stdout, "EXIT\n"); + fflush(stdout); + exit(0); + } + + if(netdata_do_sel) { + switch (state.sensors.status) { + case ICS_RUNNING: + case ICS_INIT: + break; + + case ICS_INIT_FAILED: + case ICS_FAILED: + collector_error("%s(): SEL fails to collect events. Disabling SEL collection.", __FUNCTION__); + netdata_do_sel = false; + break; } - fprintf( - stdout, - "BEGIN netdata.freeipmi_availability_status\n" - "SET available = 1\n" - "END\n"); } - if(debug && iteration) - fprintf(stderr, "freeipmi.plugin: iteration %zu, dt %llu usec, sensors collected %zu, sensors sent to netdata %zu \n" + if(unlikely(debug)) + fprintf(stderr, "%s: calling send_ipmi_sensor_metrics_to_netdata()\n", program_name); + + state.updates.now_ut = now_monotonic_usec(); + send_ipmi_sensor_metrics_to_netdata(&state); + + if(netdata_do_sel) + send_ipmi_sel_metrics_to_netdata(&state); + + if(unlikely(debug)) + fprintf(stderr, "%s: iteration %zu, dt %llu usec, sensors ever collected %zu, sensors last collected %zu \n" + , program_name , iteration , dt - , netdata_sensors_collected - , netdata_sensors_updated + , dictionary_entries(state.sensors.dict) + , state.sensors.collected ); - netdata_mark_as_not_updated(); + if (!global_chart_created) { + global_chart_created = true; - if(debug) fprintf(stderr, "freeipmi.plugin: calling ipmi_collect_data()\n"); - if(ipmi_collect_data(&ipmi_config) < 0) - fatal("data collection failed."); + fprintf(stdout, + "CHART netdata.freeipmi_availability_status '' 'Plugin availability status' 'status' " + "plugins netdata.plugin_availability_status line 146000 %d '' '%s' '%s'\n" + "DIMENSION available '' absolute 1 1\n", + update_every, program_name, ""); + } - if(debug) fprintf(stderr, "freeipmi.plugin: calling send_metrics_to_netdata()\n"); - send_metrics_to_netdata(); - fflush(stdout); + fprintf(stdout, + "BEGIN netdata.freeipmi_availability_status\n" + "SET available = 1\n" + "END\n"); // restart check (14400 seconds) - if (now_monotonic_sec() - started_t > 14400) { + if (now_monotonic_sec() - started_t > IPMI_RESTART_EVERY_SECONDS) { + collector_error("%s(): reached my lifetime expectancy. Exiting to restart.", __FUNCTION__); fprintf(stdout, "EXIT\n"); fflush(stdout); exit(0); } + + fflush(stdout); } } - diff --git a/collectors/freeipmi.plugin/metrics.csv b/collectors/freeipmi.plugin/metrics.csv index 9d493a53..4c90d5c1 100644 --- a/collectors/freeipmi.plugin/metrics.csv +++ b/collectors/freeipmi.plugin/metrics.csv @@ -1,10 +1,10 @@ metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -ipmi.sel,,events,events,"IPMI Events",area,,freeipmi.plugin, -ipmi.sensors_states,,"nominal, critical, warning",sensors,"IPMI Sensors State",line,,freeipmi.plugin, -ipmi.temperatures_c,,a dimension per sensor,Celsius,"System Celsius Temperatures read by IPMI",line,,freeipmi.plugin, -ipmi.temperatures_f,,a dimension per sensor,Fahrenheit,"System Celsius Temperatures read by IPMI",line,,freeipmi.plugin, -ipmi.voltages,,a dimension per sensor,Volts,"System Voltages read by IPMI",line,,freeipmi.plugin, -ipmi.amps,,a dimension per sensor,Amps,"System Current read by IPMI",line,,freeipmi.plugin, -ipmi.rpm,,a dimension per sensor,RPM,"System Fans read by IPMI",line,,freeipmi.plugin, -ipmi.watts,,a dimension per sensor,Watts,"System Power read by IPMI",line,,freeipmi.plugin, -ipmi.percent,,a dimension per sensor,%,"System Metrics read by IPMI",line,,freeipmi.plugin,
\ No newline at end of file +ipmi.sel,,events,events,"IPMI Events",area,,freeipmi.plugin,sel +ipmi.sensor_state,sensor,"nominal, critical, warning, unknown",state,"IPMI Sensors State",line,"sensor, type, component",freeipmi.plugin,sensors +ipmi.sensor_temperature_c,sensor,temperature,Celsius,"IPMI Sensor Temperature Celsius",line,"sensor, type, component",freeipmi.plugin,sensors +ipmi.sensor_temperature_f,sensor,temperature,Fahrenheit,"IPMI Sensor Temperature Fahrenheit",line,"sensor, type, component",freeipmi.plugin,sensors +ipmi.sensor_voltage,sensor,voltage,Volts,"IPMI Sensor Voltage",line,"sensor, type, component",freeipmi.plugin,sensors +ipmi.sensor_ampere,sensor,ampere,Amps,"IPMI Sensor Current",line,"sensor, type, component",freeipmi.plugin,sensors +ipmi.sensor_fan_speed,sensor,rotations,RPM,"IPMI Sensor Fans Speed",line,"sensor, type, component",freeipmi.plugin,sensors +ipmi.sensor_power,sensor,power,Watts,"IPMI Sensor Power",line,"sensor, type, component",freeipmi.plugin,sensors +ipmi.sensor_reading_percent,sensor,percentage,%,"IPMI Sensor Reading Percentage",line,"sensor, type, component",freeipmi.plugin,sensors diff --git a/collectors/freeipmi.plugin/multi_metadata.yaml b/collectors/freeipmi.plugin/multi_metadata.yaml new file mode 100644 index 00000000..21333278 --- /dev/null +++ b/collectors/freeipmi.plugin/multi_metadata.yaml @@ -0,0 +1,199 @@ +name: freeipmi.plugin +modules: + - meta: + plugin_name: freeipmi.plugin + module_name: sel + monitored_instance: + name: freeipmi sel + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ipmi.sel + description: IPMI Events + unit: "events" + chart_type: area + dimensions: + - name: events + - meta: + plugin_name: freeipmi.plugin + module_name: sensors + monitored_instance: + name: freeipmi sensors + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: ipmi_sensor_state + link: https://github.com/netdata/netdata/blob/master/health/health.d/ipmi.conf + metric: ipmi.sensor_state + info: IPMI sensor ${label:sensor} (${label:component}) state + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: sensor + description: "" + labels: + - name: sensor + description: TBD + - name: type + description: TBD + - name: component + description: TBD + metrics: + - name: ipmi.sensor_state + description: IPMI Sensors State + unit: "state" + chart_type: line + dimensions: + - name: nominal + - name: critical + - name: warning + - name: unknown + - name: ipmi.sensor_temperature_c + description: IPMI Sensor Temperature Celsius + unit: "Celsius" + chart_type: line + dimensions: + - name: temperature + - name: ipmi.sensor_temperature_f + description: IPMI Sensor Temperature Fahrenheit + unit: "Fahrenheit" + chart_type: line + dimensions: + - name: temperature + - name: ipmi.sensor_voltage + description: IPMI Sensor Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: voltage + - name: ipmi.sensor_ampere + description: IPMI Sensor Current + unit: "Amps" + chart_type: line + dimensions: + - name: ampere + - name: ipmi.sensor_fan_speed + description: IPMI Sensor Fans Speed + unit: "RPM" + chart_type: line + dimensions: + - name: rotations + - name: ipmi.sensor_power + description: IPMI Sensor Power + unit: "Watts" + chart_type: line + dimensions: + - name: power + - name: ipmi.sensor_reading_percent + description: IPMI Sensor Reading Percentage + unit: "%" + chart_type: line + dimensions: + - name: percentage diff --git a/collectors/idlejitter.plugin/metadata.yaml b/collectors/idlejitter.plugin/metadata.yaml new file mode 100644 index 00000000..cd1737d3 --- /dev/null +++ b/collectors/idlejitter.plugin/metadata.yaml @@ -0,0 +1,74 @@ +meta: + plugin_name: idlejitter.plugin + module_name: idlejitter.plugin + monitored_instance: + name: IdleJitter + link: '' + categories: + - data-collection.synthetic-checks + icon_filename: 'syslog.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor IdleJitter performance for optimal system idle process operations. Monitor CPU idle times, wake-ups, and power states to optimize system resource usage.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.idlejitter + description: CPU Idle Jitter + unit: "microseconds lost/s" + chart_type: line + dimensions: + - name: min + - name: max + - name: average diff --git a/collectors/ioping.plugin/metadata.yaml b/collectors/ioping.plugin/metadata.yaml new file mode 100644 index 00000000..52264f17 --- /dev/null +++ b/collectors/ioping.plugin/metadata.yaml @@ -0,0 +1,76 @@ +meta: + plugin_name: ioping.plugin + module_name: ioping.plugin + monitored_instance: + name: IOPing + link: '' + categories: + - data-collection.synthetic-checks + icon_filename: 'syslog.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor IOPing metrics for efficient disk I/O latency tracking. Keep track of read/write speeds, latency, and error rates for optimized disk operations.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: ioping_disk_latency + link: https://github.com/netdata/netdata/blob/master/health/health.d/ioping.conf + metric: ioping.latency + info: average I/O latency over the last 10 seconds +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: disk + description: "" + labels: [] + metrics: + - name: ioping.latency + description: Read Latency + unit: "microseconds" + chart_type: line + dimensions: + - name: latency diff --git a/collectors/macos.plugin/multi_metadata.yaml b/collectors/macos.plugin/multi_metadata.yaml new file mode 100644 index 00000000..38668fdc --- /dev/null +++ b/collectors/macos.plugin/multi_metadata.yaml @@ -0,0 +1,797 @@ +name: macos.plugin +modules: + - meta: + plugin_name: macos.plugin + module_name: mach_smi + monitored_instance: + name: macos mach_smi + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 10min_cpu_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf + metric: system.cpu + info: average CPU utilization over the last 10 minutes (excluding iowait, nice and steal) + os: "linux" + - name: 10min_cpu_iowait + link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf + metric: system.cpu + info: average CPU iowait time over the last 10 minutes + os: "linux" + - name: 20min_steal_cpu + link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf + metric: system.cpu + info: average CPU steal time over the last 20 minutes + os: "linux" + - name: 10min_cpu_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf + metric: system.cpu + info: average CPU utilization over the last 10 minutes (excluding nice) + os: "freebsd" + - name: ram_in_use + link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf + metric: system.ram + info: system memory utilization + os: "linux" + - name: ram_in_use + link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf + metric: system.ram + info: system memory utilization + os: "freebsd" + - name: 30min_ram_swapped_out + link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf + metric: system.swapio + info: percentage of the system RAM swapped in the last 30 minutes + os: "linux freebsd" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.cpu + description: Total CPU utilization + unit: "percentage" + chart_type: stacked + dimensions: + - name: user + - name: nice + - name: system + - name: idle + - name: system.ram + description: System RAM + unit: "MiB" + chart_type: stacked + dimensions: + - name: active + - name: wired + - name: throttled + - name: compressor + - name: inactive + - name: purgeable + - name: speculative + - name: free + - name: system.swapio + description: Swap I/O + unit: "KiB/s" + chart_type: area + dimensions: + - name: io + - name: out + - name: mem.pgfaults + description: Memory Page Faults + unit: "faults/s" + chart_type: line + dimensions: + - name: memory + - name: cow + - name: pagein + - name: pageout + - name: compress + - name: decompress + - name: zero_fill + - name: reactivate + - name: purge + - meta: + plugin_name: macos.plugin + module_name: sysctl + monitored_instance: + name: macos sysctl + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: load_cpu_number + link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf + metric: system.load + info: number of active CPU cores in the system + os: "linux" + - name: load_average_15 + link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf + metric: system.load + info: system fifteen-minute load average + os: "linux" + - name: load_average_5 + link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf + metric: system.load + info: system five-minute load average + os: "linux" + - name: load_average_1 + link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf + metric: system.load + info: system one-minute load average + os: "linux" + - name: used_swap + link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf + metric: system.swap + info: swap memory utilization + os: "linux freebsd" + - name: 1m_ipv4_tcp_resets_sent + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf + metric: ipv4.tcphandshake + info: average number of sent TCP RESETS over the last minute + os: "linux" + - name: 10s_ipv4_tcp_resets_sent + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf + metric: ipv4.tcphandshake + info: average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has crashed. Netdata will not send a clear notification for this alarm. + os: "linux" + - name: 1m_ipv4_tcp_resets_received + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf + metric: ipv4.tcphandshake + info: average number of received TCP RESETS over the last minute + os: "linux freebsd" + - name: 10s_ipv4_tcp_resets_received + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf + metric: ipv4.tcphandshake + info: average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed. Netdata will not send a clear notification for this alarm. + os: "linux freebsd" + - name: 1m_ipv4_udp_receive_buffer_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf + metric: ipv4.udperrors + info: average number of UDP receive buffer errors over the last minute + os: "linux freebsd" + - name: 1m_ipv4_udp_send_buffer_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf + metric: ipv4.udperrors + info: average number of UDP send buffer errors over the last minute + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.load + description: System Load Average + unit: "load" + chart_type: line + dimensions: + - name: load1 + - name: load5 + - name: load15 + - name: system.swap + description: System Swap + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: system.ipv4 + description: IPv4 Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: ipv4.tcppackets + description: IPv4 TCP Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv4.tcperrors + description: IPv4 TCP Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: InErrs + - name: InCsumErrors + - name: RetransSegs + - name: ipv4.tcphandshake + description: IPv4 TCP Handshake Issues + unit: "events/s" + chart_type: line + dimensions: + - name: EstabResets + - name: ActiveOpens + - name: PassiveOpens + - name: AttemptFails + - name: ipv4.tcpconnaborts + description: TCP Connection Aborts + unit: "connections/s" + chart_type: line + dimensions: + - name: baddata + - name: userclosed + - name: nomemory + - name: timeout + - name: ipv4.tcpofo + description: TCP Out-Of-Order Queue + unit: "packets/s" + chart_type: line + dimensions: + - name: inqueue + - name: ipv4.tcpsyncookies + description: TCP SYN Cookies + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: failed + - name: ipv4.ecnpkts + description: IPv4 ECN Statistics + unit: "packets/s" + chart_type: line + dimensions: + - name: CEP + - name: NoECTP + - name: ipv4.udppackets + description: IPv4 UDP Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv4.udperrors + description: IPv4 UDP Errors + unit: "events/s" + chart_type: line + dimensions: + - name: RcvbufErrors + - name: InErrors + - name: NoPorts + - name: InCsumErrors + - name: IgnoredMulti + - name: ipv4.icmp + description: IPv4 ICMP Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv4.icmp_errors + description: IPv4 ICMP Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: InErrors + - name: OutErrors + - name: InCsumErrors + - name: ipv4.icmpmsg + description: IPv4 ICMP Messages + unit: "packets/s" + chart_type: line + dimensions: + - name: InEchoReps + - name: OutEchoReps + - name: InEchos + - name: OutEchos + - name: ipv4.packets + description: IPv4 Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: forwarded + - name: delivered + - name: ipv4.fragsout + description: IPv4 Fragments Sent + unit: "packets/s" + chart_type: line + dimensions: + - name: ok + - name: failed + - name: created + - name: ipv4.fragsin + description: IPv4 Fragments Reassembly + unit: "packets/s" + chart_type: line + dimensions: + - name: ok + - name: failed + - name: all + - name: ipv4.errors + description: IPv4 Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: InDiscards + - name: OutDiscards + - name: InHdrErrors + - name: OutNoRoutes + - name: InAddrErrors + - name: InUnknownProtos + - name: ipv6.packets + description: IPv6 Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: forwarded + - name: delivers + - name: ipv6.fragsout + description: IPv6 Fragments Sent + unit: "packets/s" + chart_type: line + dimensions: + - name: ok + - name: failed + - name: all + - name: ipv6.fragsin + description: IPv6 Fragments Reassembly + unit: "packets/s" + chart_type: line + dimensions: + - name: ok + - name: failed + - name: timeout + - name: all + - name: ipv6.errors + description: IPv6 Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: InDiscards + - name: OutDiscards + - name: InHdrErrors + - name: InAddrErrors + - name: InTruncatedPkts + - name: InNoRoutes + - name: OutNoRoutes + - name: ipv6.icmp + description: IPv6 ICMP Messages + unit: "messages/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv6.icmpredir + description: IPv6 ICMP Redirects + unit: "redirects/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv6.icmperrors + description: IPv6 ICMP Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: InErrors + - name: OutErrors + - name: InCsumErrors + - name: InDestUnreachs + - name: InPktTooBigs + - name: InTimeExcds + - name: InParmProblems + - name: OutDestUnreachs + - name: OutTimeExcds + - name: OutParmProblems + - name: ipv6.icmpechos + description: IPv6 ICMP Echo + unit: "messages/s" + chart_type: line + dimensions: + - name: InEchos + - name: OutEchos + - name: InEchoReplies + - name: OutEchoReplies + - name: ipv6.icmprouter + description: IPv6 Router Messages + unit: "messages/s" + chart_type: line + dimensions: + - name: InSolicits + - name: OutSolicits + - name: InAdvertisements + - name: OutAdvertisements + - name: ipv6.icmpneighbor + description: IPv6 Neighbor Messages + unit: "messages/s" + chart_type: line + dimensions: + - name: InSolicits + - name: OutSolicits + - name: InAdvertisements + - name: OutAdvertisements + - name: ipv6.icmptypes + description: IPv6 ICMP Types + unit: "messages/s" + chart_type: line + dimensions: + - name: InType1 + - name: InType128 + - name: InType129 + - name: InType136 + - name: OutType1 + - name: OutType128 + - name: OutType129 + - name: OutType133 + - name: OutType135 + - name: OutType143 + - name: system.uptime + description: System Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: uptime + - meta: + plugin_name: macos.plugin + module_name: iokit + monitored_instance: + name: macos iokit + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 10min_disk_utilization + link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf + metric: disk.util + info: average percentage of time ${label:device} disk was busy over the last 10 minutes + os: "linux freebsd" + - name: disk_space_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf + metric: disk.space + info: disk ${label:mount_point} space utilization + os: "linux freebsd" + - name: disk_inode_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf + metric: disk.inodes + info: disk ${label:mount_point} inode utilization + os: "linux freebsd" + - name: interface_speed + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.net + info: network interface ${label:device} current speed + os: "*" + - name: 1m_received_traffic_overflow + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.net + info: average inbound utilization for the network interface ${label:device} over the last minute + os: "linux" + - name: 1m_sent_traffic_overflow + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.net + info: average outbound utilization for the network interface ${label:device} over the last minute + os: "linux" + - name: inbound_packets_dropped_ratio + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes + os: "linux" + - name: outbound_packets_dropped_ratio + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes + os: "linux" + - name: wifi_inbound_packets_dropped_ratio + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes + os: "linux" + - name: wifi_outbound_packets_dropped_ratio + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes + os: "linux" + - name: 1m_received_packets_rate + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: average number of packets received by the network interface ${label:device} over the last minute + os: "linux freebsd" + - name: 10s_received_packets_storm + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute + os: "linux freebsd" + - name: interface_inbound_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.errors + info: number of inbound errors for the network interface ${label:device} in the last 10 minutes + os: "freebsd" + - name: interface_outbound_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.errors + info: number of outbound errors for the network interface ${label:device} in the last 10 minutes + os: "freebsd" + - name: inbound_packets_dropped + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.drops + info: number of inbound dropped packets for the network interface ${label:device} in the last 10 minutes + os: "linux" + - name: outbound_packets_dropped + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.drops + info: number of outbound dropped packets for the network interface ${label:device} in the last 10 minutes + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: disk + description: "" + labels: [] + metrics: + - name: disk.io + description: Disk I/O Bandwidth + unit: "KiB/s" + chart_type: area + dimensions: + - name: read + - name: writes + - name: disk.ops + description: Disk Completed I/O Operations + unit: "operations/s" + chart_type: line + dimensions: + - name: read + - name: writes + - name: disk.util + description: Disk Utilization Time + unit: "% of time working" + chart_type: area + dimensions: + - name: utilization + - name: disk.iotime + description: Disk Total I/O Time + unit: "milliseconds/s" + chart_type: line + dimensions: + - name: reads + - name: writes + - name: disk.await + description: Average Completed I/O Operation Time + unit: "milliseconds/operation" + chart_type: line + dimensions: + - name: reads + - name: writes + - name: disk.avgsz + description: Average Completed I/O Operation Bandwidth + unit: "KiB/operation" + chart_type: line + dimensions: + - name: reads + - name: writes + - name: disk.svctm + description: Average Service Time + unit: "milliseconds/operation" + chart_type: line + dimensions: + - name: svctm + - name: global + description: "" + labels: [] + metrics: + - name: system.io + description: Disk I/O + unit: "KiB/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: mount point + description: "" + labels: [] + metrics: + - name: disk.space + description: Disk Space Usage for {mounted dir} [{mounted filesystem}] + unit: "GiB" + chart_type: stacked + dimensions: + - name: avail + - name: used + - name: reserved_for_root + - name: disk.inodes + description: Disk Files (inodes) Usage for {mounted dir} [{mounted filesystem}] + unit: "inodes" + chart_type: stacked + dimensions: + - name: avail + - name: used + - name: reserved_for_root + - name: network device + description: "" + labels: [] + metrics: + - name: net.net + description: Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: net.packets + description: Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: multicast_received + - name: multicast_sent + - name: net.errors + description: Interface Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: inbound + - name: outbound + - name: net.drops + description: Interface Drops + unit: "drops/s" + chart_type: line + dimensions: + - name: inbound + - name: net.events + description: Network Interface Events + unit: "events/s" + chart_type: line + dimensions: + - name: frames + - name: collisions + - name: carrier diff --git a/collectors/macos.plugin/plugin_macos.c b/collectors/macos.plugin/plugin_macos.c index f3b86051..3aaa46c7 100644 --- a/collectors/macos.plugin/plugin_macos.c +++ b/collectors/macos.plugin/plugin_macos.c @@ -66,7 +66,7 @@ void *macos_main(void *ptr) if (unlikely(!pm->enabled)) continue; - debug(D_PROCNETDEV_LOOP, "macos calling %s.", pm->name); + netdata_log_debug(D_PROCNETDEV_LOOP, "macos calling %s.", pm->name); worker_is_busy(i); pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt); diff --git a/collectors/metadata/schemas/multi-module.json b/collectors/metadata/schemas/multi-module.json new file mode 100644 index 00000000..6c332b81 --- /dev/null +++ b/collectors/metadata/schemas/multi-module.json @@ -0,0 +1,16 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "plugin_name": { + "type": "string" + }, + "modules": { + "type": "array", + "description": "A list of single module templates", + "items": { + "$ref": "./single-module.json" + } + } + } +}
\ No newline at end of file diff --git a/collectors/metadata/schemas/single-module.json b/collectors/metadata/schemas/single-module.json new file mode 100644 index 00000000..75052b07 --- /dev/null +++ b/collectors/metadata/schemas/single-module.json @@ -0,0 +1,662 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "title": "Netdata collector single module meta", + "properties": { + "meta": { + "type": "object", + "properties": { + "plugin_name": { + "type": "string", + "description": "Plugin name (e.g. apps.plugin, proc.plugin, go.d.plugin). It must match the name of the executable file in the plugins.d directory." + }, + "module_name": { + "type": "string", + "description": "Module name (e.g. apache, /proc/stat, httpcheck). It usually has the same name as the module configuration file (external plugin) or the section name in netdata.conf (internal plugin)." + }, + "monitored_instance": { + "type": "object", + "description": "Information about the monitored instance (metrics source).", + "properties": { + "name": { + "description": "Metrics source name (e.g. VerneMQ, Network interfaces, Files and directories). Use official spelling for applications.", + "type": "string" + }, + "link": { + "description": "Link to the monitored instance official website if any.", + "type": "string" + }, + "categories": { + "type": "array", + "description": "Category IDs that this integration falls into. IDs can be found in integrations/categories.yaml", + "items": { + "type": "string", + "description": "String defining integration category" + } + }, + "icon_filename": { + "type": "string", + "description": "The filename of the integration's icon, as sourced from https://github.com/netdata/website/tree/master/themes/tailwind/static/img." + } + }, + "required": [ + "name", + "link", + "categories", + "icon_filename" + ] + }, + "keywords": { + "type": "array", + "description": "An array of terms related to the integration.", + "items": { + "type": "string" + } + }, + "related_resources": { + "type": "object", + "description": "Available related resources for the monitored instance.", + "properties": { + "integrations": { + "type": "object", + "description": "All collectors that provide additional metrics for the monitored instance. This may include generic collectors, e.g. 'httpcheck' for web servers to monitor specific endpoints.", + "properties": { + "list": { + "type": "array", + "description": "List of related integrations.", + "items": { + "type": "object", + "properties": { + "plugin_name": { + "type": "string", + "description": "Related integration plugin name." + }, + "module_name": { + "type": "string", + "description": "Related integration module name." + } + }, + "required": [ + "plugin_name", + "module_name" + ] + } + } + }, + "required": [ + "list" + ] + } + }, + "required": [ + "integrations" + ] + }, + "info_provided_to_referring_integrations": { + "type": "object", + "description": "Information that this collector can provide about itself when other integrations mention it. This text will not be appear on this collector's page.", + "properties": { + "description": { + "type": "string", + "description": "TODO" + } + }, + "required": [ + "description" + ] + }, + "most_popular": { + "type": "boolean", + "description": "Whether or not the integration is to be flagged as most-popular, meaning it will show up at the top of the menu." + } + }, + "required": [ + "plugin_name", + "module_name", + "monitored_instance", + "keywords", + "related_resources", + "info_provided_to_referring_integrations", + "most_popular" + ] + }, + "overview": { + "type": "object", + "properties": { + "data_collection": { + "type": "object", + "description": "An overview of the collected metrics and a detailed description of the data collection method.", + "properties": { + "metrics_description": { + "type": "string", + "description": "Brief description of what metrics are collected. A suggested approach here is to talk about the areas covered (e.g. health, performance, errors) and the metric scopes." + }, + "method_description": { + "type": "string", + "description": "Description of how metrics are collected (e.g. HTTP requests, establish a TCP connection and send a command, a specific binary execution). A suggested approach here is to provide a detailed description of how the collector gathers metrics: how many connections are established, exact requests/commands executed, exact endpoints used." + } + }, + "required": [ + "metrics_description", + "method_description" + ] + }, + "supported_platforms": { + "type": "object", + "description": "Supported OS/platforms. By default, all platforms supported by Netdata are considered supported. See https://learn.netdata.cloud/docs/installing/platform-support-policy#currently-supported-platforms.", + "properties": { + "include": { + "type": "array", + "description": "Only supported OS/platforms. Platforms supported by Netdata will be ignored, only those listed are considered supported.", + "items": { + "type": "string", + "minLength": 2 + } + }, + "exclude": { + "type": "array", + "description": "Unsupported OS/platforms. The result set is all platforms supported by Netdata except for those excluded.", + "items": { + "type": "string", + "minLength": 2 + } + } + }, + "required": [ + "include", + "exclude" + ] + }, + "multi-instance": { + "type": "boolean", + "description": "Whether this collector supports collecting metrics from multiple (for example, local and remote) instances." + }, + "additional_permissions": { + "type": "object", + "description": "Information about additional permissions other than those required by the Netdata process (e.g. setuid, specific Linux capabilities).", + "properties": { + "description": { + "type": "string" + } + }, + "required": [ + "description" + ] + }, + "default_behavior": { + "type": "object", + "description": "Descriptions of how the data collector works with the default configuration.", + "properties": { + "auto_detection": { + "type": "object", + "description": "Information about detecting (discovering) monitored instances with default configuration. Example: tries to connect to Apache running on localhost on ports 80, 443 and 8080.", + "properties": { + "description": { + "type": "string" + } + }, + "required": [ + "description" + ] + }, + "limits": { + "type": "object", + "description": "Information about limiting data collection, taking into account the default values of any configuration settings that restrict data collection (including filtering metrics).", + "properties": { + "description": { + "type": "string" + } + }, + "required": [ + "description" + ] + }, + "performance_impact": { + "type": "object", + "description": "Information about the known impact on the performance of the monitored application or system.", + "properties": { + "description": { + "type": "string" + } + }, + "required": [ + "description" + ] + } + }, + "required": [ + "auto_detection", + "limits", + "performance_impact" + ] + } + }, + "required": [ + "data_collection", + "supported_platforms", + "multi-instance", + "additional_permissions", + "default_behavior" + ] + }, + "setup": { + "type": "object", + "description": "Complete information that is needed to enable and configure the data collector.", + "properties": { + "prerequisites": { + "type": "object", + "description": "Actions the user must take to make the collector work, if any. It includes both configuring Netdata (e.g. if the collector is disabled by default) and configuring the monitored instance (e.g. enabling Apache mod_stats).", + "properties": { + "list": { + "type": "array", + "description": "List of prerequisites.", + "items": { + "type": "object", + "properties": { + "title": { + "type": "string", + "description": "Title should reflect the description, be short and in the form of a command (e.g. Create netdata user, Enable build-in web server)." + }, + "description": { + "type": "string", + "description": "Steps to follow to meet this prerequisite." + } + }, + "required": [ + "title", + "description" + ] + } + } + }, + "required": [ + "list" + ] + }, + "configuration": { + "description": "Information on how to configure the collector and available configuration options.", + "type": "object", + "properties": { + "file": { + "type": "object", + "description": "Configuration file.", + "properties": { + "name": { + "type": "string", + "description": "Configuration file name." + }, + "section_name": { + "type": "string", + "description": "The name of the section in the configuration file. Only for data collectors whose configuration is in netdata.conf (e.g. proc.plugin modules)." + } + }, + "required": [ + "name" + ] + }, + "options": { + "type": "object", + "description": "All information about the available configuration options.", + "properties": { + "description": { + "type": "string", + "description": "Optional common information about options." + }, + "folding": { + "$ref": "#/$defs/_folding" + }, + "list": { + "type": "array", + "description": "List of configuration options.", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Option name." + }, + "description": { + "type": "string", + "description": "Option description. Must be short. Use 'detailed_description' for a long description." + }, + "detailed_description": { + "type": "string", + "description": "Option detailed description. Use it to describe in details complex options." + }, + "default_value": { + "type": [ + "string", + "number", + "boolean" + ], + "description": "Default value. Leave empty if none." + }, + "required": { + "type": "boolean", + "description": "Indicates whether this option is required or not. The option is required if the collector does not work, if it is not set." + } + }, + "required": [ + "name", + "description", + "default_value", + "required" + ] + } + } + }, + "required": [ + "description", + "folding", + "list" + ] + }, + "examples": { + "type": "object", + "description": "Configuration examples. The more examples the better!", + "properties": { + "folding": { + "$ref": "#/$defs/_folding" + }, + "list": { + "type": "array", + "description": "List of configuration examples.", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Example name." + }, + "folding": { + "$ref": "#/$defs/_folding_relaxed" + }, + "description": { + "type": "string", + "description": "Example description." + }, + "config": { + "type": "string", + "description": "Example configuration." + } + }, + "required": [ + "name", + "description", + "config" + ] + } + } + }, + "required": [ + "folding", + "list" + ] + } + }, + "required": [ + "file", + "options", + "examples" + ] + } + }, + "required": [ + "prerequisites", + "configuration" + ] + }, + "troubleshooting": { + "type": "object", + "description": "Information needed to troubleshoot issues with this collector.", + "properties": { + "problems": { + "type": "object", + "description": "Common problems that users face again and again... and their solutions.", + "properties": { + "list": { + "type": "array", + "description": "List of common problems.", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Problem name." + }, + "description": { + "type": "string", + "description": "Explanation of the problem and its solution." + } + } + }, + "required": [ + "name", + "description" + ] + } + }, + "required": [ + "list" + ] + } + }, + "required": [ + "problems" + ] + }, + "alerts": { + "type": "array", + "description": "The list of configured alerts shipped with Netdata for this collector.", + "items": { + "type": "object", + "description": "Information about the configured alert.", + "properties": { + "name": { + "type": "string", + "description": "Alert's 'alarm' or 'template' value (https://learn.netdata.cloud/docs/alerting/health-configuration-reference#alarm-line-alarm-or-template)." + }, + "link": { + "type": "string", + "description": "Link to github .conf file that this alert originates from" + }, + "metric": { + "type": "string", + "description": "Alert's 'on' value (https://learn.netdata.cloud/docs/alerting/health-configuration-reference#alarm-line-on)." + }, + "info": { + "type": "string", + "description": "Alert's 'info' value (https://learn.netdata.cloud/docs/alerting/health-configuration-reference#alarm-line-info)." + }, + "os": { + "type": "string", + "description": "Alert's 'os' value (https://learn.netdata.cloud/docs/alerting/health-configuration-reference#alarm-line-os)." + } + }, + "required": [ + "name", + "link", + "metric", + "info" + ] + } + }, + "metrics": { + "type": "object", + "description": "Collected metrics grouped by scope. The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.", + "properties": { + "folding": { + "$ref": "#/$defs/_folding" + }, + "description": { + "type": "string", + "description": "General description of collected metrics/scopes." + }, + "availability": { + "type": "array", + "description": "Metrics collection availability conditions. Some metrics are only available when certain conditions are met. For example, Apache exposes additional metrics when Extended status is configured, Consul exposes different set of metrics depends on its mode. This field should list the available conditions that will later be matched for each of the metrics.", + "items": { + "type": "string", + "description": "Availability condition name." + } + }, + "scopes": { + "type": "array", + "description": "List of scopes and their metrics.", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Scope name." + }, + "description": { + "type": "string", + "description": "Scope description." + }, + "labels": { + "type": "array", + "description": "Label set of the scope.", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Label name." + }, + "description": { + "type": "string", + "description": "Label description." + } + }, + "required": [ + "name", + "description" + ] + } + }, + "metrics": { + "type": "array", + "description": "List of collected metrics (chart contexts) in the scope.", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Metric name (chart context)." + }, + "availability": { + "type": "array", + "description": "Metric collection availability conditions. An empty list means that it is available for all conditions defined in 'metrics.availability'.", + "items": { + "type": "string", + "description": "Availability condition name." + } + }, + "description": { + "type": "string", + "description": "Metric description (chart title)." + }, + "unit": { + "type": "string", + "description": "Metric description (chart unit)." + }, + "chart_type": { + "type": "string", + "description": "Metric description (chart type).", + "enum": [ + "line", + "area", + "stacked" + ] + }, + "dimensions": { + "type": "array", + "description": "", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Dimension name." + } + }, + "required": [ + "name" + ] + } + } + }, + "required": [ + "name", + "description", + "unit", + "chart_type", + "dimensions" + ] + } + } + }, + "required": [ + "name", + "description", + "labels", + "metrics" + ] + } + } + }, + "required": [ + "folding", + "description", + "availability", + "scopes" + ] + } + }, + "required": [ + "meta", + "overview", + "setup", + "troubleshooting", + "alerts", + "metrics" + ], + "$defs": { + "_folding": { + "type": "object", + "description": "Content folding settings.", + "properties": { + "title": { + "description": "Folded content summary title.", + "type": "string" + }, + "enabled": { + "description": "Determines if this content should be folded.", + "type": "boolean" + } + }, + "required": [ + "title", + "enabled" + ] + }, + "_folding_relaxed": { + "type": "object", + "description": "Content folding settings with optional title.", + "properties": { + "title": { + "description": "Folded content summary title.", + "type": "string" + }, + "enabled": { + "description": "Determines if this content should be folded.", + "type": "boolean" + } + }, + "required": [ + "enabled" + ] + } + } +} diff --git a/collectors/metadata/single-module-template.yaml b/collectors/metadata/single-module-template.yaml new file mode 100644 index 00000000..7f040350 --- /dev/null +++ b/collectors/metadata/single-module-template.yaml @@ -0,0 +1,97 @@ +meta: + plugin_name: "" + module_name: "" + alternative_monitored_instances: [] + monitored_instance: + name: "" + link: "" + categories: [] + icon_filename: "" + related_resources: + integrations: + list: + - plugin_name: "" + module_name: "" + info_provided_to_referring_integrations: + description: "" + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: "" + method_description: "" + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" +setup: + prerequisites: + list: + - title: "" + description: "" + configuration: + file: + name: "" + description: "" + options: + description: "" + folding: + title: "" + enabled: true + list: + - name: "" + default_value: "" + description: "" + required: false + examples: + folding: + enabled: true + title: "" + list: + - name: "" + folding: + enabled: false + description: "" + config: "" +troubleshooting: + problems: + list: + - name: "" + description: "" +alerts: + - info: "" + link: "" + metric: "" + name: "" + os: "" +metrics: + folding: + enabled: false + title: "" + description: "" + availability: + - "" + scopes: + - name: "" + description: "" + labels: + - name: "" + description: "" + metrics: + - name: "" + availability: + - "" + description: "" + unit: "" + chart_type: "" + dimensions: + - name: "" diff --git a/collectors/nfacct.plugin/metadata.yaml b/collectors/nfacct.plugin/metadata.yaml new file mode 100644 index 00000000..2dbd31ec --- /dev/null +++ b/collectors/nfacct.plugin/metadata.yaml @@ -0,0 +1,119 @@ +meta: + plugin_name: nfacct.plugin + module_name: nfacct.plugin + monitored_instance: + name: Netfilter + link: '' + categories: + - data-collection.networking-stack-and-network-interfaces + icon_filename: 'netfilter.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Netfilter metrics for optimal packet filtering and manipulation. Keep tabs on packet counts, dropped packets, and error rates to secure network operations.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: netfilter.netlink_new + description: Connection Tracker New Connections + unit: "connections/s" + chart_type: line + dimensions: + - name: new + - name: ignore + - name: invalid + - name: netfilter.netlink_changes + description: Connection Tracker Changes + unit: "changes/s" + chart_type: line + dimensions: + - name: insert + - name: delete + - name: delete_list + - name: netfilter.netlink_search + description: Connection Tracker Searches + unit: "searches/s" + chart_type: line + dimensions: + - name: searched + - name: search_restart + - name: found + - name: netfilter.netlink_errors + description: Connection Tracker Errors + unit: "events/s" + chart_type: line + dimensions: + - name: icmp_error + - name: insert_failed + - name: drop + - name: early_drop + - name: netfilter.netlink_expect + description: Connection Tracker Expectations + unit: "expectations/s" + chart_type: line + dimensions: + - name: created + - name: deleted + - name: new + - name: netfilter.nfacct_packets + description: Netfilter Accounting Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: a dimension per nfacct object + - name: netfilter.nfacct_bytes + description: Netfilter Accounting Bandwidth + unit: "kilobytes/s" + chart_type: line + dimensions: + - name: a dimension per nfacct object diff --git a/collectors/perf.plugin/metadata.yaml b/collectors/perf.plugin/metadata.yaml new file mode 100644 index 00000000..a93970e5 --- /dev/null +++ b/collectors/perf.plugin/metadata.yaml @@ -0,0 +1,183 @@ +meta: + plugin_name: perf.plugin + module_name: perf.plugin + monitored_instance: + name: CPU performance + link: '' + categories: + - data-collection.linux-systems + icon_filename: 'bolt.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor CPU performance to ensure optimal computational operations. Monitor core usage, load averages, and thermal throttling for seamless computation tasks.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: perf.cpu_cycles + description: CPU cycles + unit: "cycles/s" + chart_type: line + dimensions: + - name: cpu + - name: ref_cpu + - name: perf.instructions + description: Instructions + unit: "instructions/s" + chart_type: line + dimensions: + - name: instructions + - name: perf.instructions_per_cycle + description: Instructions per Cycle(IPC) + unit: "instructions/cycle" + chart_type: line + dimensions: + - name: ipc + - name: perf.branch_instructions + description: Branch instructions + unit: "instructions/s" + chart_type: line + dimensions: + - name: instructions + - name: misses + - name: perf.cache + description: Cache operations + unit: "operations/s" + chart_type: line + dimensions: + - name: references + - name: misses + - name: perf.bus_cycles + description: Bus cycles + unit: "cycles/s" + chart_type: line + dimensions: + - name: bus + - name: perf.stalled_cycles + description: Stalled frontend and backend cycles + unit: "cycles/s" + chart_type: line + dimensions: + - name: frontend + - name: backend + - name: perf.migrations + description: CPU migrations + unit: "migrations" + chart_type: line + dimensions: + - name: migrations + - name: perf.alignment_faults + description: Alignment faults + unit: "faults" + chart_type: line + dimensions: + - name: faults + - name: perf.emulation_faults + description: Emulation faults + unit: "faults" + chart_type: line + dimensions: + - name: faults + - name: perf.l1d_cache + description: L1D cache operations + unit: "events/s" + chart_type: line + dimensions: + - name: read_access + - name: read_misses + - name: write_access + - name: write_misses + - name: perf.l1d_cache_prefetch + description: L1D prefetch cache operations + unit: "prefetches/s" + chart_type: line + dimensions: + - name: prefetches + - name: perf.l1i_cache + description: L1I cache operations + unit: "events/s" + chart_type: line + dimensions: + - name: read_access + - name: read_misses + - name: perf.ll_cache + description: LL cache operations + unit: "events/s" + chart_type: line + dimensions: + - name: read_access + - name: read_misses + - name: write_access + - name: write_misses + - name: perf.dtlb_cache + description: DTLB cache operations + unit: "events/s" + chart_type: line + dimensions: + - name: read_access + - name: read_misses + - name: write_access + - name: write_misses + - name: perf.itlb_cache + description: ITLB cache operations + unit: "events/s" + chart_type: line + dimensions: + - name: read_access + - name: read_misses + - name: perf.pbu_cache + description: PBU cache operations + unit: "events/s" + chart_type: line + dimensions: + - name: read_access diff --git a/collectors/plugins.d/Makefile.am b/collectors/plugins.d/Makefile.am index 59250a99..67fed309 100644 --- a/collectors/plugins.d/Makefile.am +++ b/collectors/plugins.d/Makefile.am @@ -7,5 +7,6 @@ SUBDIRS = \ $(NULL) dist_noinst_DATA = \ + gperf-config.txt \ README.md \ $(NULL) diff --git a/collectors/plugins.d/gperf-config.txt b/collectors/plugins.d/gperf-config.txt new file mode 100644 index 00000000..43be129e --- /dev/null +++ b/collectors/plugins.d/gperf-config.txt @@ -0,0 +1,52 @@ +%struct-type +%omit-struct-type +%define hash-function-name gperf_keyword_hash_function +%define lookup-function-name gperf_lookup_keyword +%define word-array-name gperf_keywords +%define constants-prefix GPERF_PARSER_ +%define slot-name keyword +%global-table +%null-strings +PARSER_KEYWORD; +%% +# +# Plugins Only Keywords +# +FLUSH, 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1 +DISABLE, 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2 +EXIT, 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3 +HOST, 71, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 4 +HOST_DEFINE, 72, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 5 +HOST_DEFINE_END, 73, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 6 +HOST_LABEL, 74, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 7 +# +# Common keywords +# +BEGIN, 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8 +CHART, 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 9 +CLABEL, 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 10 +CLABEL_COMMIT, 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 11 +DIMENSION, 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 12 +END, 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13 +FUNCTION, 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 14 +FUNCTION_RESULT_BEGIN, 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15 +LABEL, 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 16 +OVERWRITE, 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 17 +SET, 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18 +VARIABLE, 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 19 +# +# Streaming only keywords +# +CLAIMED_ID, 61, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20 +BEGIN2, 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21 +SET2, 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22 +END2, 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23 +# +# Streaming Replication keywords +# +CHART_DEFINITION_END, 33, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24 +RBEGIN, 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25 +RDSTATE, 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 26 +REND, 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27 +RSET, 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28 +RSSTATE, 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29 diff --git a/collectors/plugins.d/gperf-hashtable.h b/collectors/plugins.d/gperf-hashtable.h new file mode 100644 index 00000000..b9e58975 --- /dev/null +++ b/collectors/plugins.d/gperf-hashtable.h @@ -0,0 +1,163 @@ +/* ANSI-C code produced by gperf version 3.1 */ +/* Command-line: gperf --multiple-iterations=1000 --output-file=gperf-hashtable.h gperf-config.txt */ +/* Computed positions: -k'1-2' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>." +#endif + + +#define GPERF_PARSER_TOTAL_KEYWORDS 29 +#define GPERF_PARSER_MIN_WORD_LENGTH 3 +#define GPERF_PARSER_MAX_WORD_LENGTH 21 +#define GPERF_PARSER_MIN_HASH_VALUE 4 +#define GPERF_PARSER_MAX_HASH_VALUE 36 +/* maximum key range = 33, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +gperf_keyword_hash_function (register const char *str, register size_t len) +{ + static unsigned char asso_values[] = + { + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 15, 10, 1, 1, 9, + 4, 37, 0, 20, 37, 37, 9, 37, 14, 0, + 37, 37, 1, 0, 37, 7, 13, 37, 18, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37 + }; + return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]]; +} + +static PARSER_KEYWORD gperf_keywords[] = + { + {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0}, +#line 18 "gperf-config.txt" + {"HOST", 71, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 4}, +#line 51 "gperf-config.txt" + {"RSET", 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28}, +#line 26 "gperf-config.txt" + {"CHART", 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 9}, + {(char*)0}, +#line 52 "gperf-config.txt" + {"RSSTATE", 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29}, +#line 49 "gperf-config.txt" + {"RDSTATE", 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 26}, +#line 21 "gperf-config.txt" + {"HOST_LABEL", 74, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 7}, +#line 19 "gperf-config.txt" + {"HOST_DEFINE", 72, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 5}, +#line 35 "gperf-config.txt" + {"SET", 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18}, +#line 42 "gperf-config.txt" + {"SET2", 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22}, +#line 50 "gperf-config.txt" + {"REND", 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27}, +#line 20 "gperf-config.txt" + {"HOST_DEFINE_END", 73, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 6}, +#line 27 "gperf-config.txt" + {"CLABEL", 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 10}, +#line 48 "gperf-config.txt" + {"RBEGIN", 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25}, +#line 15 "gperf-config.txt" + {"FLUSH", 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1}, +#line 31 "gperf-config.txt" + {"FUNCTION", 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 14}, +#line 40 "gperf-config.txt" + {"CLAIMED_ID", 61, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20}, +#line 47 "gperf-config.txt" + {"CHART_DEFINITION_END", 33, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24}, +#line 34 "gperf-config.txt" + {"OVERWRITE", 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 17}, +#line 28 "gperf-config.txt" + {"CLABEL_COMMIT", 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 11}, +#line 25 "gperf-config.txt" + {"BEGIN", 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8}, +#line 41 "gperf-config.txt" + {"BEGIN2", 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21}, +#line 30 "gperf-config.txt" + {"END", 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13}, +#line 43 "gperf-config.txt" + {"END2", 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23}, +#line 16 "gperf-config.txt" + {"DISABLE", 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2}, +#line 33 "gperf-config.txt" + {"LABEL", 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 16}, +#line 29 "gperf-config.txt" + {"DIMENSION", 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 12}, +#line 17 "gperf-config.txt" + {"EXIT", 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3}, +#line 32 "gperf-config.txt" + {"FUNCTION_RESULT_BEGIN", 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15}, + {(char*)0}, {(char*)0}, {(char*)0}, +#line 36 "gperf-config.txt" + {"VARIABLE", 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 19} + }; + +PARSER_KEYWORD * +gperf_lookup_keyword (register const char *str, register size_t len) +{ + if (len <= GPERF_PARSER_MAX_WORD_LENGTH && len >= GPERF_PARSER_MIN_WORD_LENGTH) + { + register unsigned int key = gperf_keyword_hash_function (str, len); + + if (key <= GPERF_PARSER_MAX_HASH_VALUE) + { + register const char *s = gperf_keywords[key].keyword; + + if (s && *str == *s && !strcmp (str + 1, s + 1)) + return &gperf_keywords[key]; + } + } + return 0; +} diff --git a/collectors/plugins.d/local_listeners.c b/collectors/plugins.d/local_listeners.c new file mode 100644 index 00000000..a39de797 --- /dev/null +++ b/collectors/plugins.d/local_listeners.c @@ -0,0 +1,366 @@ +#include "libnetdata/libnetdata.h" +#include "libnetdata/required_dummies.h" + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <dirent.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> +#include <arpa/inet.h> + +typedef enum { + PROC_NET_PROTOCOL_TCP, + PROC_NET_PROTOCOL_TCP6, + PROC_NET_PROTOCOL_UDP, + PROC_NET_PROTOCOL_UDP6, +} PROC_NET_PROTOCOLS; + +#define MAX_ERROR_LOGS 10 + +static size_t pid_fds_processed = 0; +static size_t pid_fds_failed = 0; +static size_t errors_encountered = 0; + +static inline const char *protocol_name(PROC_NET_PROTOCOLS protocol) { + switch(protocol) { + default: + case PROC_NET_PROTOCOL_TCP: + return "TCP"; + + case PROC_NET_PROTOCOL_UDP: + return "UDP"; + + case PROC_NET_PROTOCOL_TCP6: + return "TCP6"; + + case PROC_NET_PROTOCOL_UDP6: + return "UDP6"; + } +} + +static inline int read_cmdline(pid_t pid, char* buffer, size_t bufferSize) { + char path[FILENAME_MAX + 1]; + snprintfz(path, FILENAME_MAX, "%s/proc/%d/cmdline", netdata_configured_host_prefix, pid); + + FILE* file = fopen(path, "r"); + if (!file) { + if(++errors_encountered < MAX_ERROR_LOGS) + collector_error("LOCAL-LISTENERS: error opening file: %s\n", path); + + return -1; + } + + size_t bytesRead = fread(buffer, 1, bufferSize - 1, file); + buffer[bytesRead] = '\0'; // Ensure null-terminated + + // Replace null characters in cmdline with spaces + for (size_t i = 0; i < bytesRead; i++) { + if (buffer[i] == '\0') { + buffer[i] = ' '; + } + } + + fclose(file); + return 0; +} + +static inline void fix_cmdline(char* str) { + if (str == NULL) + return; + + char *s = str; + + do { + if(*s == '|' || iscntrl(*s)) + *s = '_'; + + } while(*++s); + + + while(s > str && *(s-1) == ' ') + *--s = '\0'; +} + +// ---------------------------------------------------------------------------- + +#define HASH_TABLE_SIZE 100000 + +typedef struct Node { + unsigned int inode; // key + + // values + unsigned int port; + char local_address[INET6_ADDRSTRLEN]; + PROC_NET_PROTOCOLS protocol; + bool processed; + + // linking + struct Node *prev, *next; +} Node; + +typedef struct HashTable { + Node *table[HASH_TABLE_SIZE]; +} HashTable; + +static HashTable *hashTable_key_inode_port_value = NULL; + +static inline void generate_output(const char *protocol, const char *address, unsigned int port, const char *cmdline) { + printf("%s|%s|%u|%s\n", protocol, address, port, cmdline); +} + +HashTable* createHashTable() { + HashTable *hashTable = (HashTable*)mallocz(sizeof(HashTable)); + memset(hashTable, 0, sizeof(HashTable)); + return hashTable; +} + +static inline unsigned int hashFunction(unsigned int inode) { + return inode % HASH_TABLE_SIZE; +} + +static inline void insertHashTable(HashTable *hashTable, unsigned int inode, unsigned int port, PROC_NET_PROTOCOLS protocol, char *local_address) { + unsigned int index = hashFunction(inode); + Node *newNode = (Node*)mallocz(sizeof(Node)); + newNode->inode = inode; + newNode->port = port; + newNode->protocol = protocol; + strncpyz(newNode->local_address, local_address, INET6_ADDRSTRLEN - 1); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(hashTable->table[index], newNode, prev, next); +} + +static inline bool lookupHashTable_and_execute(HashTable *hashTable, unsigned int inode, pid_t pid) { + unsigned int index = hashFunction(inode); + for(Node *node = hashTable->table[index], *next = NULL ; node ; node = next) { + next = node->next; + + if(node->inode == inode && node->port) { + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(hashTable->table[index], node, prev, next); + char cmdline[8192] = ""; + read_cmdline(pid, cmdline, sizeof(cmdline)); + fix_cmdline(cmdline); + generate_output(protocol_name(node->protocol), node->local_address, node->port, cmdline); + freez(node); + return true; + } + } + + return false; +} + +void freeHashTable(HashTable *hashTable) { + for (unsigned int i = 0; i < HASH_TABLE_SIZE; i++) { + while(hashTable->table[i]) { + Node *tmp = hashTable->table[i]; + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(hashTable->table[i], tmp, prev, next); + generate_output(protocol_name(tmp->protocol), tmp->local_address, tmp->port, ""); + freez(tmp); + } + } + freez(hashTable); +} + +// ---------------------------------------------------------------------------- + +static inline void found_this_socket_inode(pid_t pid, unsigned int inode) { + lookupHashTable_and_execute(hashTable_key_inode_port_value, inode, pid); +} + +bool find_all_sockets_in_proc(const char *proc_filename) { + DIR *proc_dir, *fd_dir; + struct dirent *proc_entry, *fd_entry; + char path_buffer[FILENAME_MAX + 1]; + + proc_dir = opendir(proc_filename); + if (proc_dir == NULL) { + if(++errors_encountered < MAX_ERROR_LOGS) + collector_error("LOCAL-LISTENERS: cannot opendir() '%s'", proc_filename); + + pid_fds_failed++; + return false; + } + + while ((proc_entry = readdir(proc_dir)) != NULL) { + // Check if directory entry is a PID by seeing if the name is made up of digits only + int is_pid = 1; + for (char *c = proc_entry->d_name; *c != '\0'; c++) { + if (*c < '0' || *c > '9') { + is_pid = 0; + break; + } + } + + if (!is_pid) + continue; + + // Build the path to the fd directory of the process + snprintfz(path_buffer, FILENAME_MAX, "%s/%s/fd/", proc_filename, proc_entry->d_name); + + fd_dir = opendir(path_buffer); + if (fd_dir == NULL) { + if(++errors_encountered < MAX_ERROR_LOGS) + collector_error("LOCAL-LISTENERS: cannot opendir() '%s'", path_buffer); + + pid_fds_failed++; + continue; + } + + while ((fd_entry = readdir(fd_dir)) != NULL) { + if(!strcmp(fd_entry->d_name, ".") || !strcmp(fd_entry->d_name, "..")) + continue; + + char link_path[FILENAME_MAX + 1]; + char link_target[FILENAME_MAX + 1]; + int inode; + + // Build the path to the file descriptor link + snprintfz(link_path, FILENAME_MAX, "%s/%s", path_buffer, fd_entry->d_name); + + ssize_t len = readlink(link_path, link_target, sizeof(link_target) - 1); + if (len == -1) { + if(++errors_encountered < MAX_ERROR_LOGS) + collector_error("LOCAL-LISTENERS: cannot read link '%s'", link_path); + + pid_fds_failed++; + continue; + } + link_target[len] = '\0'; + + pid_fds_processed++; + + // If the link target indicates a socket, print its inode number + if (sscanf(link_target, "socket:[%d]", &inode) == 1) + found_this_socket_inode((pid_t)strtoul(proc_entry->d_name, NULL, 10), inode); + } + + closedir(fd_dir); + } + + closedir(proc_dir); + return true; +} + +// ---------------------------------------------------------------------------- + +static inline void add_port_and_inode(PROC_NET_PROTOCOLS protocol, unsigned int port, unsigned int inode, char *local_address) { + insertHashTable(hashTable_key_inode_port_value, inode, port, protocol, local_address); +} + +static inline void print_ipv6_address(const char *ipv6_str, char *dst) { + unsigned k; + char buf[9]; + struct sockaddr_in6 sa; + + // Initialize sockaddr_in6 + memset(&sa, 0, sizeof(struct sockaddr_in6)); + sa.sin6_family = AF_INET6; + sa.sin6_port = htons(0); // replace 0 with your port number + + // Convert hex string to byte array + for (k = 0; k < 4; ++k) + { + memset(buf, 0, 9); + memcpy(buf, ipv6_str + (k * 8), 8); + sa.sin6_addr.s6_addr32[k] = strtoul(buf, NULL, 16); + } + + // Convert to human-readable format + if (inet_ntop(AF_INET6, &(sa.sin6_addr), dst, INET6_ADDRSTRLEN) == NULL) + *dst = '\0'; +} + +static inline void print_ipv4_address(uint32_t address, char *dst) { + uint8_t octets[4]; + octets[0] = address & 0xFF; + octets[1] = (address >> 8) & 0xFF; + octets[2] = (address >> 16) & 0xFF; + octets[3] = (address >> 24) & 0xFF; + sprintf(dst, "%u.%u.%u.%u", octets[0], octets[1], octets[2], octets[3]); +} + +bool read_proc_net_x(const char *filename, PROC_NET_PROTOCOLS protocol) { + FILE *fp; + char *line = NULL; + size_t len = 0; + ssize_t read; + char address[INET6_ADDRSTRLEN]; + + ssize_t min_line_length = (protocol == PROC_NET_PROTOCOL_TCP || protocol == PROC_NET_PROTOCOL_UDP) ? 105 : 155; + + fp = fopen(filename, "r"); + if (fp == NULL) + return false; + + // Read line by line + while ((read = getline(&line, &len, fp)) != -1) { + if(read < min_line_length) continue; + + char local_address6[33], rem_address6[33]; + unsigned int local_address, local_port, state, rem_address, rem_port, inode; + + switch(protocol) { + case PROC_NET_PROTOCOL_TCP: + if(line[34] != '0' || line[35] != 'A') + continue; + // fall-through + + case PROC_NET_PROTOCOL_UDP: + if (sscanf(line, "%*d: %X:%X %X:%X %X %*X:%*X %*X:%*X %*X %*d %*d %u", + &local_address, &local_port, &rem_address, &rem_port, &state, &inode) != 6) + continue; + + print_ipv4_address(local_address, address); + break; + + case PROC_NET_PROTOCOL_TCP6: + if(line[82] != '0' || line[83] != 'A') + continue; + // fall-through + + case PROC_NET_PROTOCOL_UDP6: + if(sscanf(line, "%*d: %32[0-9A-Fa-f]:%X %32[0-9A-Fa-f]:%X %X %*X:%*X %*X:%*X %*X %*d %*d %u", + local_address6, &local_port, rem_address6, &rem_port, &state, &inode) != 6) + continue; + + print_ipv6_address(local_address6, address); + break; + } + + add_port_and_inode(protocol, local_port, inode, address); + } + + fclose(fp); + if (line) + free(line); + + return true; +} + +// ---------------------------------------------------------------------------- + +int main(int argc __maybe_unused, char **argv __maybe_unused) { + char path[FILENAME_MAX + 1]; + hashTable_key_inode_port_value = createHashTable(); + + netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); + if(!netdata_configured_host_prefix) netdata_configured_host_prefix = ""; + + snprintfz(path, FILENAME_MAX, "%s/proc/net/tcp", netdata_configured_host_prefix); + read_proc_net_x(path, PROC_NET_PROTOCOL_TCP); + + snprintfz(path, FILENAME_MAX, "%s/proc/net/udp", netdata_configured_host_prefix); + read_proc_net_x(path, PROC_NET_PROTOCOL_UDP); + + snprintfz(path, FILENAME_MAX, "%s/proc/net/tcp6", netdata_configured_host_prefix); + read_proc_net_x(path, PROC_NET_PROTOCOL_TCP6); + + snprintfz(path, FILENAME_MAX, "%s/proc/net/udp6", netdata_configured_host_prefix); + read_proc_net_x(path, PROC_NET_PROTOCOL_UDP6); + + snprintfz(path, FILENAME_MAX, "%s/proc", netdata_configured_host_prefix); + find_all_sockets_in_proc(path); + + freeHashTable(hashTable_key_inode_port_value); + return 0; +} diff --git a/collectors/plugins.d/plugins_d.c b/collectors/plugins.d/plugins_d.c index da5226a5..6a235b4e 100644 --- a/collectors/plugins.d/plugins_d.c +++ b/collectors/plugins.d/plugins_d.c @@ -3,7 +3,7 @@ #include "plugins_d.h" #include "pluginsd_parser.h" -char *plugin_directories[PLUGINSD_MAX_DIRECTORIES] = { NULL }; +char *plugin_directories[PLUGINSD_MAX_DIRECTORIES] = { [0] = PLUGINS_DIR, }; struct plugind *pluginsd_root = NULL; inline size_t pluginsd_initialize_plugin_directories() @@ -18,32 +18,32 @@ inline size_t pluginsd_initialize_plugin_directories() } // Parse it and store it to plugin directories - return quoted_strings_splitter(plugins_dir_list, plugin_directories, PLUGINSD_MAX_DIRECTORIES, config_isspace); + return quoted_strings_splitter_config(plugins_dir_list, plugin_directories, PLUGINSD_MAX_DIRECTORIES); } static inline void plugin_set_disabled(struct plugind *cd) { - netdata_spinlock_lock(&cd->unsafe.spinlock); + spinlock_lock(&cd->unsafe.spinlock); cd->unsafe.enabled = false; - netdata_spinlock_unlock(&cd->unsafe.spinlock); + spinlock_unlock(&cd->unsafe.spinlock); } bool plugin_is_enabled(struct plugind *cd) { - netdata_spinlock_lock(&cd->unsafe.spinlock); + spinlock_lock(&cd->unsafe.spinlock); bool ret = cd->unsafe.enabled; - netdata_spinlock_unlock(&cd->unsafe.spinlock); + spinlock_unlock(&cd->unsafe.spinlock); return ret; } static inline void plugin_set_running(struct plugind *cd) { - netdata_spinlock_lock(&cd->unsafe.spinlock); + spinlock_lock(&cd->unsafe.spinlock); cd->unsafe.running = true; - netdata_spinlock_unlock(&cd->unsafe.spinlock); + spinlock_unlock(&cd->unsafe.spinlock); } static inline bool plugin_is_running(struct plugind *cd) { - netdata_spinlock_lock(&cd->unsafe.spinlock); + spinlock_lock(&cd->unsafe.spinlock); bool ret = cd->unsafe.running; - netdata_spinlock_unlock(&cd->unsafe.spinlock); + spinlock_unlock(&cd->unsafe.spinlock); return ret; } @@ -53,7 +53,7 @@ static void pluginsd_worker_thread_cleanup(void *arg) worker_unregister(); - netdata_spinlock_lock(&cd->unsafe.spinlock); + spinlock_lock(&cd->unsafe.spinlock); cd->unsafe.running = false; cd->unsafe.thread = 0; @@ -61,15 +61,15 @@ static void pluginsd_worker_thread_cleanup(void *arg) pid_t pid = cd->unsafe.pid; cd->unsafe.pid = 0; - netdata_spinlock_unlock(&cd->unsafe.spinlock); + spinlock_unlock(&cd->unsafe.spinlock); if (pid) { siginfo_t info; - info("PLUGINSD: 'host:%s', killing data collection child process with pid %d", + netdata_log_info("PLUGINSD: 'host:%s', killing data collection child process with pid %d", rrdhost_hostname(cd->host), pid); if (killpid(pid) != -1) { - info("PLUGINSD: 'host:%s', waiting for data collection child process pid %d to exit...", + netdata_log_info("PLUGINSD: 'host:%s', waiting for data collection child process pid %d to exit...", rrdhost_hostname(cd->host), pid); netdata_waitid(P_PID, (id_t)pid, &info, WEXITED); @@ -85,7 +85,7 @@ static void pluginsd_worker_thread_handle_success(struct plugind *cd) { } if (likely(cd->serial_failures <= SERIAL_FAILURES_THRESHOLD)) { - info("PLUGINSD: 'host:%s', '%s' (pid %d) does not generate useful output but it reports success (exits with 0). %s.", + netdata_log_info("PLUGINSD: 'host:%s', '%s' (pid %d) does not generate useful output but it reports success (exits with 0). %s.", rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid, plugin_is_enabled(cd) ? "Waiting a bit before starting it again." : "Will not start it again - it is now disabled."); @@ -94,7 +94,7 @@ static void pluginsd_worker_thread_handle_success(struct plugind *cd) { } if (cd->serial_failures > SERIAL_FAILURES_THRESHOLD) { - error("PLUGINSD: 'host:'%s', '%s' (pid %d) does not generate useful output, " + netdata_log_error("PLUGINSD: 'host:'%s', '%s' (pid %d) does not generate useful output, " "although it reports success (exits with 0)." "We have tried to collect something %zu times - unsuccessfully. Disabling it.", rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid, cd->serial_failures); @@ -105,21 +105,21 @@ static void pluginsd_worker_thread_handle_success(struct plugind *cd) { static void pluginsd_worker_thread_handle_error(struct plugind *cd, int worker_ret_code) { if (worker_ret_code == -1) { - info("PLUGINSD: 'host:%s', '%s' (pid %d) was killed with SIGTERM. Disabling it.", + netdata_log_info("PLUGINSD: 'host:%s', '%s' (pid %d) was killed with SIGTERM. Disabling it.", rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid); plugin_set_disabled(cd); return; } if (!cd->successful_collections) { - error("PLUGINSD: 'host:%s', '%s' (pid %d) exited with error code %d and haven't collected any data. Disabling it.", + netdata_log_error("PLUGINSD: 'host:%s', '%s' (pid %d) exited with error code %d and haven't collected any data. Disabling it.", rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid, worker_ret_code); plugin_set_disabled(cd); return; } if (cd->serial_failures <= SERIAL_FAILURES_THRESHOLD) { - error("PLUGINSD: 'host:%s', '%s' (pid %d) exited with error code %d, but has given useful output in the past (%zu times). %s", + netdata_log_error("PLUGINSD: 'host:%s', '%s' (pid %d) exited with error code %d, but has given useful output in the past (%zu times). %s", rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid, worker_ret_code, cd->successful_collections, plugin_is_enabled(cd) ? "Waiting a bit before starting it again." : "Will not start it again - it is disabled."); sleep((unsigned int)(cd->update_every * 10)); @@ -127,7 +127,7 @@ static void pluginsd_worker_thread_handle_error(struct plugind *cd, int worker_r } if (cd->serial_failures > SERIAL_FAILURES_THRESHOLD) { - error("PLUGINSD: 'host:%s', '%s' (pid %d) exited with error code %d, but has given useful output in the past (%zu times)." + netdata_log_error("PLUGINSD: 'host:%s', '%s' (pid %d) exited with error code %d, but has given useful output in the past (%zu times)." "We tried to restart it %zu times, but it failed to generate data. Disabling it.", rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid, worker_ret_code, cd->successful_collections, cd->serial_failures); @@ -153,16 +153,16 @@ static void *pluginsd_worker_thread(void *arg) { FILE *fp_child_output = netdata_popen(cd->cmd, &cd->unsafe.pid, &fp_child_input); if (unlikely(!fp_child_input || !fp_child_output)) { - error("PLUGINSD: 'host:%s', cannot popen(\"%s\", \"r\").", rrdhost_hostname(cd->host), cd->cmd); + netdata_log_error("PLUGINSD: 'host:%s', cannot popen(\"%s\", \"r\").", rrdhost_hostname(cd->host), cd->cmd); break; } - info("PLUGINSD: 'host:%s' connected to '%s' running on pid %d", + netdata_log_info("PLUGINSD: 'host:%s' connected to '%s' running on pid %d", rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid); count = pluginsd_process(cd->host, cd, fp_child_input, fp_child_output, 0); - info("PLUGINSD: 'host:%s', '%s' (pid %d) disconnected after %zu successful data collections (ENDs).", + netdata_log_info("PLUGINSD: 'host:%s', '%s' (pid %d) disconnected after %zu successful data collections (ENDs).", rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid, count); killpid(cd->unsafe.pid); @@ -186,21 +186,21 @@ static void *pluginsd_worker_thread(void *arg) { static void pluginsd_main_cleanup(void *data) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)data; static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; - info("PLUGINSD: cleaning up..."); + netdata_log_info("PLUGINSD: cleaning up..."); struct plugind *cd; for (cd = pluginsd_root; cd; cd = cd->next) { - netdata_spinlock_lock(&cd->unsafe.spinlock); + spinlock_lock(&cd->unsafe.spinlock); if (cd->unsafe.enabled && cd->unsafe.running && cd->unsafe.thread != 0) { - info("PLUGINSD: 'host:%s', stopping plugin thread: %s", + netdata_log_info("PLUGINSD: 'host:%s', stopping plugin thread: %s", rrdhost_hostname(cd->host), cd->id); netdata_thread_cancel(cd->unsafe.thread); } - netdata_spinlock_unlock(&cd->unsafe.spinlock); + spinlock_unlock(&cd->unsafe.spinlock); } - info("PLUGINSD: cleanup completed."); + netdata_log_info("PLUGINSD: cleanup completed."); static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; worker_unregister(); @@ -235,7 +235,7 @@ void *pluginsd_main(void *ptr) if (unlikely(!dir)) { if (directory_errors[idx] != errno) { directory_errors[idx] = errno; - error("cannot open plugins directory '%s'", directory_name); + netdata_log_error("cannot open plugins directory '%s'", directory_name); } continue; } @@ -245,7 +245,7 @@ void *pluginsd_main(void *ptr) if (unlikely(!service_running(SERVICE_COLLECTORS))) break; - debug(D_PLUGINSD, "examining file '%s'", file->d_name); + netdata_log_debug(D_PLUGINSD, "examining file '%s'", file->d_name); if (unlikely(strcmp(file->d_name, ".") == 0 || strcmp(file->d_name, "..") == 0)) continue; @@ -254,7 +254,7 @@ void *pluginsd_main(void *ptr) if (unlikely(len <= (int)PLUGINSD_FILE_SUFFIX_LEN)) continue; if (unlikely(strcmp(PLUGINSD_FILE_SUFFIX, &file->d_name[len - (int)PLUGINSD_FILE_SUFFIX_LEN]) != 0)) { - debug(D_PLUGINSD, "file '%s' does not end in '%s'", file->d_name, PLUGINSD_FILE_SUFFIX); + netdata_log_debug(D_PLUGINSD, "file '%s' does not end in '%s'", file->d_name, PLUGINSD_FILE_SUFFIX); continue; } @@ -263,7 +263,7 @@ void *pluginsd_main(void *ptr) int enabled = config_get_boolean(CONFIG_SECTION_PLUGINS, pluginname, automatic_run); if (unlikely(!enabled)) { - debug(D_PLUGINSD, "plugin '%s' is not enabled", file->d_name); + netdata_log_debug(D_PLUGINSD, "plugin '%s' is not enabled", file->d_name); continue; } @@ -274,7 +274,7 @@ void *pluginsd_main(void *ptr) break; if (likely(cd && plugin_is_running(cd))) { - debug(D_PLUGINSD, "plugin '%s' is already running", cd->filename); + netdata_log_debug(D_PLUGINSD, "plugin '%s' is already running", cd->filename); continue; } diff --git a/collectors/plugins.d/pluginsd_parser.c b/collectors/plugins.d/pluginsd_parser.c index 097e5ea6..cda17710 100644 --- a/collectors/plugins.d/pluginsd_parser.c +++ b/collectors/plugins.d/pluginsd_parser.c @@ -4,90 +4,103 @@ #define LOG_FUNCTIONS false -static int send_to_plugin(const char *txt, void *data) { +static ssize_t send_to_plugin(const char *txt, void *data) { PARSER *parser = data; if(!txt || !*txt) return 0; + errno = 0; + spinlock_lock(&parser->writer.spinlock); + ssize_t bytes = -1; + #ifdef ENABLE_HTTPS NETDATA_SSL *ssl = parser->ssl_output; if(ssl) { + if(SSL_connection(ssl)) - return (int)netdata_ssl_write(ssl, (void *)txt, strlen(txt)); + bytes = netdata_ssl_write(ssl, (void *) txt, strlen(txt)); - error("PLUGINSD: cannot send command (SSL)"); - return -1; + else + netdata_log_error("PLUGINSD: cannot send command (SSL)"); + + spinlock_unlock(&parser->writer.spinlock); + return bytes; } #endif if(parser->fp_output) { - int bytes = fprintf(parser->fp_output, "%s", txt); + + bytes = fprintf(parser->fp_output, "%s", txt); if(bytes <= 0) { - error("PLUGINSD: cannot send command (FILE)"); - return -2; + netdata_log_error("PLUGINSD: cannot send command (FILE)"); + bytes = -2; } - fflush(parser->fp_output); + else + fflush(parser->fp_output); + + spinlock_unlock(&parser->writer.spinlock); return bytes; } if(parser->fd != -1) { - size_t bytes = 0; - size_t total = strlen(txt); + bytes = 0; + ssize_t total = (ssize_t)strlen(txt); ssize_t sent; do { sent = write(parser->fd, &txt[bytes], total - bytes); if(sent <= 0) { - error("PLUGINSD: cannot send command (fd)"); + netdata_log_error("PLUGINSD: cannot send command (fd)"); + spinlock_unlock(&parser->writer.spinlock); return -3; } bytes += sent; } while(bytes < total); + spinlock_unlock(&parser->writer.spinlock); return (int)bytes; } - error("PLUGINSD: cannot send command (no output socket/pipe/file given to plugins.d parser)"); + spinlock_unlock(&parser->writer.spinlock); + netdata_log_error("PLUGINSD: cannot send command (no output socket/pipe/file given to plugins.d parser)"); return -4; } -static inline RRDHOST *pluginsd_require_host_from_parent(void *user, const char *cmd) { - RRDHOST *host = ((PARSER_USER_OBJECT *) user)->host; +static inline RRDHOST *pluginsd_require_host_from_parent(PARSER *parser, const char *cmd) { + RRDHOST *host = parser->user.host; if(unlikely(!host)) - error("PLUGINSD: command %s requires a host, but is not set.", cmd); + netdata_log_error("PLUGINSD: command %s requires a host, but is not set.", cmd); return host; } -static inline RRDSET *pluginsd_require_chart_from_parent(void *user, const char *cmd, const char *parent_cmd) { - RRDSET *st = ((PARSER_USER_OBJECT *) user)->st; +static inline RRDSET *pluginsd_require_chart_from_parent(PARSER *parser, const char *cmd, const char *parent_cmd) { + RRDSET *st = parser->user.st; if(unlikely(!st)) - error("PLUGINSD: command %s requires a chart defined via command %s, but is not set.", cmd, parent_cmd); + netdata_log_error("PLUGINSD: command %s requires a chart defined via command %s, but is not set.", cmd, parent_cmd); return st; } -static inline RRDSET *pluginsd_get_chart_from_parent(void *user) { - return ((PARSER_USER_OBJECT *) user)->st; +static inline RRDSET *pluginsd_get_chart_from_parent(PARSER *parser) { + return parser->user.st; } -static inline void pluginsd_lock_rrdset_data_collection(void *user) { - PARSER_USER_OBJECT *u = (PARSER_USER_OBJECT *) user; - if(u->st && !u->v2.locked_data_collection) { - netdata_spinlock_lock(&u->st->data_collection_lock); - u->v2.locked_data_collection = true; +static inline void pluginsd_lock_rrdset_data_collection(PARSER *parser) { + if(parser->user.st && !parser->user.v2.locked_data_collection) { + spinlock_lock(&parser->user.st->data_collection_lock); + parser->user.v2.locked_data_collection = true; } } -static inline bool pluginsd_unlock_rrdset_data_collection(void *user) { - PARSER_USER_OBJECT *u = (PARSER_USER_OBJECT *) user; - if(u->st && u->v2.locked_data_collection) { - netdata_spinlock_unlock(&u->st->data_collection_lock); - u->v2.locked_data_collection = false; +static inline bool pluginsd_unlock_rrdset_data_collection(PARSER *parser) { + if(parser->user.st && parser->user.v2.locked_data_collection) { + spinlock_unlock(&parser->user.st->data_collection_lock); + parser->user.v2.locked_data_collection = false; return true; } @@ -108,29 +121,29 @@ void pluginsd_rrdset_cleanup(RRDSET *st) { st->pluginsd.pos = 0; } -static inline void pluginsd_unlock_previous_chart(void *user, const char *keyword, bool stale) { - PARSER_USER_OBJECT *u = (PARSER_USER_OBJECT *) user; - - if(unlikely(pluginsd_unlock_rrdset_data_collection(user))) { +static inline void pluginsd_unlock_previous_chart(PARSER *parser, const char *keyword, bool stale) { + if(unlikely(pluginsd_unlock_rrdset_data_collection(parser))) { if(stale) - error("PLUGINSD: 'host:%s/chart:%s/' stale data collection lock found during %s; it has been unlocked", - rrdhost_hostname(u->st->rrdhost), rrdset_id(u->st), keyword); + netdata_log_error("PLUGINSD: 'host:%s/chart:%s/' stale data collection lock found during %s; it has been unlocked", + rrdhost_hostname(parser->user.st->rrdhost), + rrdset_id(parser->user.st), + keyword); } - if(unlikely(u->v2.ml_locked)) { - ml_chart_update_end(u->st); - u->v2.ml_locked = false; + if(unlikely(parser->user.v2.ml_locked)) { + ml_chart_update_end(parser->user.st); + parser->user.v2.ml_locked = false; if(stale) - error("PLUGINSD: 'host:%s/chart:%s/' stale ML lock found during %s, it has been unlocked", - rrdhost_hostname(u->st->rrdhost), rrdset_id(u->st), keyword); + netdata_log_error("PLUGINSD: 'host:%s/chart:%s/' stale ML lock found during %s, it has been unlocked", + rrdhost_hostname(parser->user.st->rrdhost), + rrdset_id(parser->user.st), + keyword); } } -static inline void pluginsd_set_chart_from_parent(void *user, RRDSET *st, const char *keyword) { - PARSER_USER_OBJECT *u = (PARSER_USER_OBJECT *) user; - - pluginsd_unlock_previous_chart(user, keyword, true); +static inline void pluginsd_set_chart_from_parent(PARSER *parser, RRDSET *st, const char *keyword) { + pluginsd_unlock_previous_chart(parser, keyword, true); if(st) { size_t dims = dictionary_entries(st->rrddim_root_index); @@ -145,13 +158,13 @@ static inline void pluginsd_set_chart_from_parent(void *user, RRDSET *st, const st->pluginsd.pos = 0; } - u->st = st; + parser->user.st = st; } static inline RRDDIM *pluginsd_acquire_dimension(RRDHOST *host, RRDSET *st, const char *dimension, const char *cmd) { if (unlikely(!dimension || !*dimension)) { - error("PLUGINSD: 'host:%s/chart:%s' got a %s, without a dimension.", - rrdhost_hostname(host), rrdset_id(st), cmd); + netdata_log_error("PLUGINSD: 'host:%s/chart:%s' got a %s, without a dimension.", + rrdhost_hostname(host), rrdset_id(st), cmd); return NULL; } @@ -172,8 +185,8 @@ static inline RRDDIM *pluginsd_acquire_dimension(RRDHOST *host, RRDSET *st, cons rda = rrddim_find_and_acquire(st, dimension); if (unlikely(!rda)) { - error("PLUGINSD: 'host:%s/chart:%s/dim:%s' got a %s but dimension does not exist.", - rrdhost_hostname(host), rrdset_id(st), dimension, cmd); + netdata_log_error("PLUGINSD: 'host:%s/chart:%s/dim:%s' got a %s but dimension does not exist.", + rrdhost_hostname(host), rrdset_id(st), dimension, cmd); return NULL; } @@ -186,21 +199,21 @@ static inline RRDDIM *pluginsd_acquire_dimension(RRDHOST *host, RRDSET *st, cons static inline RRDSET *pluginsd_find_chart(RRDHOST *host, const char *chart, const char *cmd) { if (unlikely(!chart || !*chart)) { - error("PLUGINSD: 'host:%s' got a %s without a chart id.", - rrdhost_hostname(host), cmd); + netdata_log_error("PLUGINSD: 'host:%s' got a %s without a chart id.", + rrdhost_hostname(host), cmd); return NULL; } RRDSET *st = rrdset_find(host, chart); if (unlikely(!st)) - error("PLUGINSD: 'host:%s/chart:%s' got a %s but chart does not exist.", - rrdhost_hostname(host), chart, cmd); + netdata_log_error("PLUGINSD: 'host:%s/chart:%s' got a %s but chart does not exist.", + rrdhost_hostname(host), chart, cmd); return st; } -static inline PARSER_RC PLUGINSD_DISABLE_PLUGIN(void *user, const char *keyword, const char *msg) { - ((PARSER_USER_OBJECT *) user)->enabled = 0; +static inline PARSER_RC PLUGINSD_DISABLE_PLUGIN(PARSER *parser, const char *keyword, const char *msg) { + parser->user.enabled = 0; if(keyword && msg) { error_limit_static_global_var(erl, 1, 0); @@ -210,22 +223,21 @@ static inline PARSER_RC PLUGINSD_DISABLE_PLUGIN(void *user, const char *keyword, return PARSER_RC_ERROR; } -PARSER_RC pluginsd_set(char **words, size_t num_words, void *user) -{ +static inline PARSER_RC pluginsd_set(char **words, size_t num_words, PARSER *parser) { char *dimension = get_word(words, num_words, 1); char *value = get_word(words, num_words, 2); - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_SET); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_SET); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_SET, PLUGINSD_KEYWORD_CHART); - if(!st) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDSET *st = pluginsd_require_chart_from_parent(parser, PLUGINSD_KEYWORD_SET, PLUGINSD_KEYWORD_CHART); + if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_SET); - if(!rd) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + if(!rd) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) - debug(D_PLUGINSD, "PLUGINSD: 'host:%s/chart:%s/dim:%s' SET is setting value to '%s'", + netdata_log_debug(D_PLUGINSD, "PLUGINSD: 'host:%s/chart:%s/dim:%s' SET is setting value to '%s'", rrdhost_hostname(host), rrdset_id(st), dimension, value && *value ? value : "UNSET"); if (value && *value) @@ -234,18 +246,17 @@ PARSER_RC pluginsd_set(char **words, size_t num_words, void *user) return PARSER_RC_OK; } -PARSER_RC pluginsd_begin(char **words, size_t num_words, void *user) -{ +static inline PARSER_RC pluginsd_begin(char **words, size_t num_words, PARSER *parser) { char *id = get_word(words, num_words, 1); char *microseconds_txt = get_word(words, num_words, 2); - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_BEGIN); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_BEGIN); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); RRDSET *st = pluginsd_find_chart(host, id, PLUGINSD_KEYWORD_BEGIN); - if(!st) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - pluginsd_set_chart_from_parent(user, st, PLUGINSD_KEYWORD_BEGIN); + pluginsd_set_chart_from_parent(parser, st, PLUGINSD_KEYWORD_BEGIN); usec_t microseconds = 0; if (microseconds_txt && *microseconds_txt) { @@ -270,7 +281,7 @@ PARSER_RC pluginsd_begin(char **words, size_t num_words, void *user) if (likely(st->counter_done)) { if (likely(microseconds)) { - if (((PARSER_USER_OBJECT *)user)->trust_durations) + if (parser->user.trust_durations) rrdset_next_usec_unfiltered(st, microseconds); else rrdset_next_usec(st, microseconds); @@ -281,22 +292,21 @@ PARSER_RC pluginsd_begin(char **words, size_t num_words, void *user) return PARSER_RC_OK; } -PARSER_RC pluginsd_end(char **words, size_t num_words, void *user) -{ +static inline PARSER_RC pluginsd_end(char **words, size_t num_words, PARSER *parser) { UNUSED(words); UNUSED(num_words); - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_END); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_END); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_END, PLUGINSD_KEYWORD_BEGIN); - if(!st) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDSET *st = pluginsd_require_chart_from_parent(parser, PLUGINSD_KEYWORD_END, PLUGINSD_KEYWORD_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) - debug(D_PLUGINSD, "requested an END on chart '%s'", rrdset_id(st)); + netdata_log_debug(D_PLUGINSD, "requested an END on chart '%s'", rrdset_id(st)); - pluginsd_set_chart_from_parent(user, NULL, PLUGINSD_KEYWORD_END); - ((PARSER_USER_OBJECT *) user)->data_collections_count++; + pluginsd_set_chart_from_parent(parser, NULL, PLUGINSD_KEYWORD_END); + parser->user.data_collections_count++; struct timeval now; now_realtime_timeval(&now); @@ -305,15 +315,13 @@ PARSER_RC pluginsd_end(char **words, size_t num_words, void *user) return PARSER_RC_OK; } -static void pluginsd_host_define_cleanup(void *user) { - PARSER_USER_OBJECT *u = user; +static void pluginsd_host_define_cleanup(PARSER *parser) { + string_freez(parser->user.host_define.hostname); + dictionary_destroy(parser->user.host_define.rrdlabels); - string_freez(u->host_define.hostname); - dictionary_destroy(u->host_define.rrdlabels); - - u->host_define.hostname = NULL; - u->host_define.rrdlabels = NULL; - u->host_define.parsing_host = false; + parser->user.host_define.hostname = NULL; + parser->user.host_define.rrdlabels = NULL; + parser->user.host_define.parsing_host = false; } static inline bool pluginsd_validate_machine_guid(const char *guid, uuid_t *uuid, char *output) { @@ -325,61 +333,56 @@ static inline bool pluginsd_validate_machine_guid(const char *guid, uuid_t *uuid return true; } -static PARSER_RC pluginsd_host_define(char **words, size_t num_words, void *user) { - PARSER_USER_OBJECT *u = user; - +static inline PARSER_RC pluginsd_host_define(char **words, size_t num_words, PARSER *parser) { char *guid = get_word(words, num_words, 1); char *hostname = get_word(words, num_words, 2); if(unlikely(!guid || !*guid || !hostname || !*hostname)) - return PLUGINSD_DISABLE_PLUGIN(user, PLUGINSD_KEYWORD_HOST_DEFINE, "missing parameters"); + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_HOST_DEFINE, "missing parameters"); - if(unlikely(u->host_define.parsing_host)) - return PLUGINSD_DISABLE_PLUGIN(user, PLUGINSD_KEYWORD_HOST_DEFINE, + if(unlikely(parser->user.host_define.parsing_host)) + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_HOST_DEFINE, "another host definition is already open - did you send " PLUGINSD_KEYWORD_HOST_DEFINE_END "?"); - if(!pluginsd_validate_machine_guid(guid, &u->host_define.machine_guid, u->host_define.machine_guid_str)) - return PLUGINSD_DISABLE_PLUGIN(user, PLUGINSD_KEYWORD_HOST_DEFINE, "cannot parse MACHINE_GUID - is it a valid UUID?"); + if(!pluginsd_validate_machine_guid(guid, &parser->user.host_define.machine_guid, parser->user.host_define.machine_guid_str)) + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_HOST_DEFINE, "cannot parse MACHINE_GUID - is it a valid UUID?"); - u->host_define.hostname = string_strdupz(hostname); - u->host_define.rrdlabels = rrdlabels_create(); - u->host_define.parsing_host = true; + parser->user.host_define.hostname = string_strdupz(hostname); + parser->user.host_define.rrdlabels = rrdlabels_create(); + parser->user.host_define.parsing_host = true; return PARSER_RC_OK; } -static inline PARSER_RC pluginsd_host_dictionary(char **words, size_t num_words, void *user, DICTIONARY *dict, const char *keyword) { - PARSER_USER_OBJECT *u = user; - +static inline PARSER_RC pluginsd_host_dictionary(char **words, size_t num_words, PARSER *parser, DICTIONARY *dict, const char *keyword) { char *name = get_word(words, num_words, 1); char *value = get_word(words, num_words, 2); if(!name || !*name || !value) - return PLUGINSD_DISABLE_PLUGIN(user, keyword, "missing parameters"); + return PLUGINSD_DISABLE_PLUGIN(parser, keyword, "missing parameters"); - if(!u->host_define.parsing_host || !dict) - return PLUGINSD_DISABLE_PLUGIN(user, keyword, "host is not defined, send " PLUGINSD_KEYWORD_HOST_DEFINE " before this"); + if(!parser->user.host_define.parsing_host || !dict) + return PLUGINSD_DISABLE_PLUGIN(parser, keyword, "host is not defined, send " PLUGINSD_KEYWORD_HOST_DEFINE " before this"); rrdlabels_add(dict, name, value, RRDLABEL_SRC_CONFIG); return PARSER_RC_OK; } -static PARSER_RC pluginsd_host_labels(char **words, size_t num_words, void *user) { - PARSER_USER_OBJECT *u = user; - return pluginsd_host_dictionary(words, num_words, user, u->host_define.rrdlabels, PLUGINSD_KEYWORD_HOST_LABEL); +static inline PARSER_RC pluginsd_host_labels(char **words, size_t num_words, PARSER *parser) { + return pluginsd_host_dictionary(words, num_words, parser, + parser->user.host_define.rrdlabels, + PLUGINSD_KEYWORD_HOST_LABEL); } -static PARSER_RC pluginsd_host_define_end(char **words __maybe_unused, size_t num_words __maybe_unused, void *user) { - PARSER_USER_OBJECT *u = user; - - if(!u->host_define.parsing_host) - return PLUGINSD_DISABLE_PLUGIN(user, PLUGINSD_KEYWORD_HOST_DEFINE_END, "missing initialization, send " PLUGINSD_KEYWORD_HOST_DEFINE " before this"); +static inline PARSER_RC pluginsd_host_define_end(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser) { + if(!parser->user.host_define.parsing_host) + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_HOST_DEFINE_END, "missing initialization, send " PLUGINSD_KEYWORD_HOST_DEFINE " before this"); RRDHOST *host = rrdhost_find_or_create( - string2str(u->host_define.hostname), - string2str(u->host_define.hostname), - u->host_define.machine_guid_str, + string2str(parser->user.host_define.hostname), + string2str(parser->user.host_define.hostname), + parser->user.host_define.machine_guid_str, "Netdata Virtual Host 1.0", netdata_configured_timezone, netdata_configured_abbrev_timezone, @@ -398,22 +401,24 @@ static PARSER_RC pluginsd_host_define_end(char **words __maybe_unused, size_t nu default_rrdpush_enable_replication, default_rrdpush_seconds_to_replicate, default_rrdpush_replication_step, - rrdhost_labels_to_system_info(u->host_define.rrdlabels), + rrdhost_labels_to_system_info(parser->user.host_define.rrdlabels), false ); + rrdhost_option_set(host, RRDHOST_OPTION_VIRTUAL_HOST); + if(host->rrdlabels) { - rrdlabels_migrate_to_these(host->rrdlabels, u->host_define.rrdlabels); + rrdlabels_migrate_to_these(host->rrdlabels, parser->user.host_define.rrdlabels); } else { - host->rrdlabels = u->host_define.rrdlabels; - u->host_define.rrdlabels = NULL; + host->rrdlabels = parser->user.host_define.rrdlabels; + parser->user.host_define.rrdlabels = NULL; } - pluginsd_host_define_cleanup(user); + pluginsd_host_define_cleanup(parser); - u->host = host; - pluginsd_set_chart_from_parent(user, NULL, PLUGINSD_KEYWORD_HOST_DEFINE_END); + parser->user.host = host; + pluginsd_set_chart_from_parent(parser, NULL, PLUGINSD_KEYWORD_HOST_DEFINE_END); rrdhost_flag_clear(host, RRDHOST_FLAG_ORPHAN); rrdcontext_host_child_connected(host); @@ -422,34 +427,31 @@ static PARSER_RC pluginsd_host_define_end(char **words __maybe_unused, size_t nu return PARSER_RC_OK; } -static PARSER_RC pluginsd_host(char **words, size_t num_words, void *user) { - PARSER_USER_OBJECT *u = user; - +static inline PARSER_RC pluginsd_host(char **words, size_t num_words, PARSER *parser) { char *guid = get_word(words, num_words, 1); if(!guid || !*guid || strcmp(guid, "localhost") == 0) { - u->host = localhost; + parser->user.host = localhost; return PARSER_RC_OK; } uuid_t uuid; char uuid_str[UUID_STR_LEN]; if(!pluginsd_validate_machine_guid(guid, &uuid, uuid_str)) - return PLUGINSD_DISABLE_PLUGIN(user, PLUGINSD_KEYWORD_HOST, "cannot parse MACHINE_GUID - is it a valid UUID?"); + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_HOST, "cannot parse MACHINE_GUID - is it a valid UUID?"); RRDHOST *host = rrdhost_find_by_guid(uuid_str); if(unlikely(!host)) - return PLUGINSD_DISABLE_PLUGIN(user, PLUGINSD_KEYWORD_HOST, "cannot find a host with this machine guid - have you created it?"); + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_HOST, "cannot find a host with this machine guid - have you created it?"); - u->host = host; + parser->user.host = host; return PARSER_RC_OK; } -PARSER_RC pluginsd_chart(char **words, size_t num_words, void *user) -{ - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_CHART); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); +static inline PARSER_RC pluginsd_chart(char **words, size_t num_words, PARSER *parser) { + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_CHART); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); char *type = get_word(words, num_words, 1); char *name = get_word(words, num_words, 2); @@ -473,7 +475,7 @@ PARSER_RC pluginsd_chart(char **words, size_t num_words, void *user) // make sure we have the required variables if (unlikely((!type || !*type || !id || !*id))) - return PLUGINSD_DISABLE_PLUGIN(user, PLUGINSD_KEYWORD_CHART, "missing parameters"); + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_CHART, "missing parameters"); // parse the name, and make sure it does not include 'type.' if (unlikely(name && *name)) { @@ -494,11 +496,11 @@ PARSER_RC pluginsd_chart(char **words, size_t num_words, void *user) if (likely(priority_s && *priority_s)) priority = str2i(priority_s); - int update_every = ((PARSER_USER_OBJECT *) user)->cd->update_every; + int update_every = parser->user.cd->update_every; if (likely(update_every_s && *update_every_s)) update_every = str2i(update_every_s); if (unlikely(!update_every)) - update_every = ((PARSER_USER_OBJECT *) user)->cd->update_every; + update_every = parser->user.cd->update_every; RRDSET_TYPE chart_type = RRDSET_TYPE_LINE; if (unlikely(chart)) @@ -515,7 +517,7 @@ PARSER_RC pluginsd_chart(char **words, size_t num_words, void *user) if (unlikely(!units)) units = "unknown"; - debug( + netdata_log_debug( D_PLUGINSD, "creating chart type='%s', id='%s', name='%s', family='%s', context='%s', chart='%s', priority=%d, update_every=%d", type, id, name ? name : "", family ? family : "", context ? context : "", rrdset_type_name(chart_type), @@ -525,14 +527,16 @@ PARSER_RC pluginsd_chart(char **words, size_t num_words, void *user) st = rrdset_create( host, type, id, name, family, context, title, units, - (plugin && *plugin) ? plugin : ((PARSER_USER_OBJECT *)user)->cd->filename, + (plugin && *plugin) ? plugin : parser->user.cd->filename, module, priority, update_every, chart_type); if (likely(st)) { if (options && *options) { - if (strstr(options, "obsolete")) + if (strstr(options, "obsolete")) { + pluginsd_rrdset_cleanup(st); rrdset_is_obsolete(st); + } else rrdset_isnot_obsolete(st); @@ -556,22 +560,21 @@ PARSER_RC pluginsd_chart(char **words, size_t num_words, void *user) rrdset_flag_clear(st, RRDSET_FLAG_STORE_FIRST); } } - pluginsd_set_chart_from_parent(user, st, PLUGINSD_KEYWORD_CHART); + pluginsd_set_chart_from_parent(parser, st, PLUGINSD_KEYWORD_CHART); return PARSER_RC_OK; } -PARSER_RC pluginsd_chart_definition_end(char **words, size_t num_words, void *user) -{ +static inline PARSER_RC pluginsd_chart_definition_end(char **words, size_t num_words, PARSER *parser) { const char *first_entry_txt = get_word(words, num_words, 1); const char *last_entry_txt = get_word(words, num_words, 2); const char *wall_clock_time_txt = get_word(words, num_words, 3); - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_CHART_DEFINITION_END); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_CHART_DEFINITION_END); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_CHART_DEFINITION_END, PLUGINSD_KEYWORD_CHART); - if(!st) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDSET *st = pluginsd_require_chart_from_parent(parser, PLUGINSD_KEYWORD_CHART_DEFINITION_END, PLUGINSD_KEYWORD_CHART); + if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); time_t first_entry_child = (first_entry_txt && *first_entry_txt) ? (time_t)str2ul(first_entry_txt) : 0; time_t last_entry_child = (last_entry_txt && *last_entry_txt) ? (time_t)str2ul(last_entry_txt) : 0; @@ -590,7 +593,6 @@ PARSER_RC pluginsd_chart_definition_end(char **words, size_t num_words, void *us rrdset_flag_clear(st, RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED); rrdhost_receiver_replicating_charts_plus_one(st->rrdhost); - PARSER *parser = ((PARSER_USER_OBJECT *)user)->parser; ok = replicate_chart_request(send_to_plugin, parser, host, st, first_entry_child, last_entry_child, child_wall_clock_time, 0, 0); @@ -605,8 +607,7 @@ PARSER_RC pluginsd_chart_definition_end(char **words, size_t num_words, void *us return ok ? PARSER_RC_OK : PARSER_RC_ERROR; } -PARSER_RC pluginsd_dimension(char **words, size_t num_words, void *user) -{ +static inline PARSER_RC pluginsd_dimension(char **words, size_t num_words, PARSER *parser) { char *id = get_word(words, num_words, 1); char *name = get_word(words, num_words, 2); char *algorithm = get_word(words, num_words, 3); @@ -614,14 +615,14 @@ PARSER_RC pluginsd_dimension(char **words, size_t num_words, void *user) char *divisor_s = get_word(words, num_words, 5); char *options = get_word(words, num_words, 6); - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_DIMENSION); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_DIMENSION); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_DIMENSION, PLUGINSD_KEYWORD_CHART); - if(!st) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDSET *st = pluginsd_require_chart_from_parent(parser, PLUGINSD_KEYWORD_DIMENSION, PLUGINSD_KEYWORD_CHART); + if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); if (unlikely(!id)) - return PLUGINSD_DISABLE_PLUGIN(user, PLUGINSD_KEYWORD_DIMENSION, "missing dimension id"); + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DIMENSION, "missing dimension id"); long multiplier = 1; if (multiplier_s && *multiplier_s) { @@ -641,7 +642,7 @@ PARSER_RC pluginsd_dimension(char **words, size_t num_words, void *user) algorithm = "absolute"; if (unlikely(st && rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) - debug( + netdata_log_debug( D_PLUGINSD, "creating dimension in chart %s, id='%s', name='%s', algorithm='%s', multiplier=%ld, divisor=%ld, hidden='%s'", rrdset_id(st), id, name ? name : "", rrd_algorithm_name(rrd_algorithm_id(algorithm)), multiplier, divisor, @@ -720,7 +721,7 @@ static void inflight_functions_insert_callback(const DICTIONARY_ITEM *item, void pf->sent_ut = now_realtime_usec(); if(ret < 0) { - error("FUNCTION: failed to send function to plugin, error %d", ret); + netdata_log_error("FUNCTION: failed to send function to plugin, error %d", ret); rrd_call_function_error(pf->destination_wb, "Failed to communicate with collector", HTTP_RESP_BACKEND_FETCH_FAILED); } else { @@ -734,7 +735,7 @@ static void inflight_functions_insert_callback(const DICTIONARY_ITEM *item, void static bool inflight_functions_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func __maybe_unused, void *new_func, void *parser_ptr __maybe_unused) { struct inflight_function *pf = new_func; - error("PLUGINSD_PARSER: duplicate UUID on pending function '%s' detected. Ignoring the second one.", string2str(pf->function)); + netdata_log_error("PLUGINSD_PARSER: duplicate UUID on pending function '%s' detected. Ignoring the second one.", string2str(pf->function)); pf->code = rrd_call_function_error(pf->destination_wb, "This request is already in progress", HTTP_RESP_BAD_REQUEST); pf->callback(pf->destination_wb, pf->code, pf->callback_data); string_freez(pf->function); @@ -825,8 +826,7 @@ static int pluginsd_execute_function_callback(BUFFER *destination_wb, int timeou return HTTP_RESP_OK; } -PARSER_RC pluginsd_function(char **words, size_t num_words, void *user) -{ +static inline PARSER_RC pluginsd_function(char **words, size_t num_words, PARSER *parser) { bool global = false; size_t i = 1; if(num_words >= 2 && strcmp(get_word(words, num_words, 1), "GLOBAL") == 0) { @@ -838,21 +838,21 @@ PARSER_RC pluginsd_function(char **words, size_t num_words, void *user) char *timeout_s = get_word(words, num_words, i++); char *help = get_word(words, num_words, i++); - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_FUNCTION); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_FUNCTION); if(!host) return PARSER_RC_ERROR; - RRDSET *st = (global)?NULL:pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_FUNCTION, PLUGINSD_KEYWORD_CHART); + RRDSET *st = (global)?NULL:pluginsd_require_chart_from_parent(parser, PLUGINSD_KEYWORD_FUNCTION, PLUGINSD_KEYWORD_CHART); if(!st) global = true; if (unlikely(!timeout_s || !name || !help || (!global && !st))) { - error("PLUGINSD: 'host:%s/chart:%s' got a FUNCTION, without providing the required data (global = '%s', name = '%s', timeout = '%s', help = '%s'). Ignoring it.", - rrdhost_hostname(host), - st?rrdset_id(st):"(unset)", - global?"yes":"no", - name?name:"(unset)", - timeout_s?timeout_s:"(unset)", - help?help:"(unset)" - ); + netdata_log_error("PLUGINSD: 'host:%s/chart:%s' got a FUNCTION, without providing the required data (global = '%s', name = '%s', timeout = '%s', help = '%s'). Ignoring it.", + rrdhost_hostname(host), + st?rrdset_id(st):"(unset)", + global?"yes":"no", + name?name:"(unset)", + timeout_s?timeout_s:"(unset)", + help?help:"(unset)" + ); return PARSER_RC_ERROR; } @@ -863,7 +863,6 @@ PARSER_RC pluginsd_function(char **words, size_t num_words, void *user) timeout = PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT; } - PARSER *parser = ((PARSER_USER_OBJECT *) user)->parser; rrd_collector_add_function(host, st, name, timeout, help, false, pluginsd_execute_function_callback, parser); return PARSER_RC_OK; @@ -876,15 +875,14 @@ static void pluginsd_function_result_end(struct parser *parser, void *action_dat string_freez(key); } -PARSER_RC pluginsd_function_result_begin(char **words, size_t num_words, void *user) -{ +static inline PARSER_RC pluginsd_function_result_begin(char **words, size_t num_words, PARSER *parser) { char *key = get_word(words, num_words, 1); char *status = get_word(words, num_words, 2); char *format = get_word(words, num_words, 3); char *expires = get_word(words, num_words, 4); if (unlikely(!key || !*key || !status || !*status || !format || !*format || !expires || !*expires)) { - error("got a " PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN " without providing the required data (key = '%s', status = '%s', format = '%s', expires = '%s')." + netdata_log_error("got a " PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN " without providing the required data (key = '%s', status = '%s', format = '%s', expires = '%s')." , key ? key : "(unset)" , status ? status : "(unset)" , format ? format : "(unset)" @@ -898,15 +896,13 @@ PARSER_RC pluginsd_function_result_begin(char **words, size_t num_words, void *u time_t expiration = (expires && *expires) ? str2l(expires) : 0; - PARSER *parser = ((PARSER_USER_OBJECT *) user)->parser; - struct inflight_function *pf = NULL; if(key && *key) pf = (struct inflight_function *)dictionary_get(parser->inflight.functions, key); if(!pf) { - error("got a " PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN " for transaction '%s', but the transaction is not found.", key?key:"(unset)"); + netdata_log_error("got a " PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN " for transaction '%s', but the transaction is not found.", key?key:"(unset)"); } else { if(format && *format) @@ -932,16 +928,15 @@ PARSER_RC pluginsd_function_result_begin(char **words, size_t num_words, void *u // ---------------------------------------------------------------------------- -PARSER_RC pluginsd_variable(char **words, size_t num_words, void *user) -{ +static inline PARSER_RC pluginsd_variable(char **words, size_t num_words, PARSER *parser) { char *name = get_word(words, num_words, 1); char *value = get_word(words, num_words, 2); NETDATA_DOUBLE v; - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_VARIABLE); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_VARIABLE); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDSET *st = pluginsd_get_chart_from_parent(user); + RRDSET *st = pluginsd_get_chart_from_parent(parser); int global = (st) ? 0 : 1; @@ -958,39 +953,39 @@ PARSER_RC pluginsd_variable(char **words, size_t num_words, void *user) } if (unlikely(!name || !*name)) - return PLUGINSD_DISABLE_PLUGIN(user, PLUGINSD_KEYWORD_VARIABLE, "missing variable name"); + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_VARIABLE, "missing variable name"); if (unlikely(!value || !*value)) value = NULL; if (unlikely(!value)) { - error("PLUGINSD: 'host:%s/chart:%s' cannot set %s VARIABLE '%s' to an empty value", - rrdhost_hostname(host), - st ? rrdset_id(st):"UNSET", - (global) ? "HOST" : "CHART", - name); + netdata_log_error("PLUGINSD: 'host:%s/chart:%s' cannot set %s VARIABLE '%s' to an empty value", + rrdhost_hostname(host), + st ? rrdset_id(st):"UNSET", + (global) ? "HOST" : "CHART", + name); return PARSER_RC_OK; } if (!global && !st) - return PLUGINSD_DISABLE_PLUGIN(user, PLUGINSD_KEYWORD_VARIABLE, "no chart is defined and no GLOBAL is given"); + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_VARIABLE, "no chart is defined and no GLOBAL is given"); char *endptr = NULL; v = (NETDATA_DOUBLE) str2ndd_encoded(value, &endptr); if (unlikely(endptr && *endptr)) { if (endptr == value) - error("PLUGINSD: 'host:%s/chart:%s' the value '%s' of VARIABLE '%s' cannot be parsed as a number", - rrdhost_hostname(host), - st ? rrdset_id(st):"UNSET", - value, - name); + netdata_log_error("PLUGINSD: 'host:%s/chart:%s' the value '%s' of VARIABLE '%s' cannot be parsed as a number", + rrdhost_hostname(host), + st ? rrdset_id(st):"UNSET", + value, + name); else - error("PLUGINSD: 'host:%s/chart:%s' the value '%s' of VARIABLE '%s' has leftovers: '%s'", - rrdhost_hostname(host), - st ? rrdset_id(st):"UNSET", - value, - name, - endptr); + netdata_log_error("PLUGINSD: 'host:%s/chart:%s' the value '%s' of VARIABLE '%s' has leftovers: '%s'", + rrdhost_hostname(host), + st ? rrdset_id(st):"UNSET", + value, + name, + endptr); } if (global) { @@ -1000,9 +995,9 @@ PARSER_RC pluginsd_variable(char **words, size_t num_words, void *user) rrdvar_custom_host_variable_release(host, rva); } else - error("PLUGINSD: 'host:%s' cannot find/create HOST VARIABLE '%s'", - rrdhost_hostname(host), - name); + netdata_log_error("PLUGINSD: 'host:%s' cannot find/create HOST VARIABLE '%s'", + rrdhost_hostname(host), + name); } else { const RRDSETVAR_ACQUIRED *rsa = rrdsetvar_custom_chart_variable_add_and_acquire(st, name); if (rsa) { @@ -1010,39 +1005,36 @@ PARSER_RC pluginsd_variable(char **words, size_t num_words, void *user) rrdsetvar_custom_chart_variable_release(st, rsa); } else - error("PLUGINSD: 'host:%s/chart:%s' cannot find/create CHART VARIABLE '%s'", - rrdhost_hostname(host), rrdset_id(st), name); + netdata_log_error("PLUGINSD: 'host:%s/chart:%s' cannot find/create CHART VARIABLE '%s'", + rrdhost_hostname(host), rrdset_id(st), name); } return PARSER_RC_OK; } -PARSER_RC pluginsd_flush(char **words __maybe_unused, size_t num_words __maybe_unused, void *user) -{ - debug(D_PLUGINSD, "requested a " PLUGINSD_KEYWORD_FLUSH); - pluginsd_set_chart_from_parent(user, NULL, PLUGINSD_KEYWORD_FLUSH); - ((PARSER_USER_OBJECT *) user)->replay.start_time = 0; - ((PARSER_USER_OBJECT *) user)->replay.end_time = 0; - ((PARSER_USER_OBJECT *) user)->replay.start_time_ut = 0; - ((PARSER_USER_OBJECT *) user)->replay.end_time_ut = 0; +static inline PARSER_RC pluginsd_flush(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser) { + netdata_log_debug(D_PLUGINSD, "requested a " PLUGINSD_KEYWORD_FLUSH); + pluginsd_set_chart_from_parent(parser, NULL, PLUGINSD_KEYWORD_FLUSH); + parser->user.replay.start_time = 0; + parser->user.replay.end_time = 0; + parser->user.replay.start_time_ut = 0; + parser->user.replay.end_time_ut = 0; return PARSER_RC_OK; } -PARSER_RC pluginsd_disable(char **words __maybe_unused, size_t num_words __maybe_unused, void *user __maybe_unused) -{ - info("PLUGINSD: plugin called DISABLE. Disabling it."); - ((PARSER_USER_OBJECT *) user)->enabled = 0; +static inline PARSER_RC pluginsd_disable(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser) { + netdata_log_info("PLUGINSD: plugin called DISABLE. Disabling it."); + parser->user.enabled = 0; return PARSER_RC_STOP; } -PARSER_RC pluginsd_label(char **words, size_t num_words, void *user) -{ +static inline PARSER_RC pluginsd_label(char **words, size_t num_words, PARSER *parser) { const char *name = get_word(words, num_words, 1); const char *label_source = get_word(words, num_words, 2); const char *value = get_word(words, num_words, 3); if (!name || !label_source || !value) - return PLUGINSD_DISABLE_PLUGIN(user, PLUGINSD_KEYWORD_LABEL, "missing parameters"); + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_LABEL, "missing parameters"); char *store = (char *)value; bool allocated_store = false; @@ -1071,13 +1063,10 @@ PARSER_RC pluginsd_label(char **words, size_t num_words, void *user) } } - if(unlikely(!((PARSER_USER_OBJECT *) user)->new_host_labels)) - ((PARSER_USER_OBJECT *) user)->new_host_labels = rrdlabels_create(); + if(unlikely(!(parser->user.new_host_labels))) + parser->user.new_host_labels = rrdlabels_create(); - rrdlabels_add(((PARSER_USER_OBJECT *)user)->new_host_labels, - name, - store, - str2l(label_source)); + rrdlabels_add(parser->user.new_host_labels, name, store, str2l(label_source)); if (allocated_store) freez(store); @@ -1085,90 +1074,84 @@ PARSER_RC pluginsd_label(char **words, size_t num_words, void *user) return PARSER_RC_OK; } -PARSER_RC pluginsd_overwrite(char **words __maybe_unused, size_t num_words __maybe_unused, void *user) -{ - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_OVERWRITE); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); +static inline PARSER_RC pluginsd_overwrite(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser) { + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_OVERWRITE); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - debug(D_PLUGINSD, "requested to OVERWRITE host labels"); + netdata_log_debug(D_PLUGINSD, "requested to OVERWRITE host labels"); if(unlikely(!host->rrdlabels)) host->rrdlabels = rrdlabels_create(); - rrdlabels_migrate_to_these(host->rrdlabels, (DICTIONARY *) (((PARSER_USER_OBJECT *)user)->new_host_labels)); + rrdlabels_migrate_to_these(host->rrdlabels, parser->user.new_host_labels); rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_LABELS | RRDHOST_FLAG_METADATA_UPDATE); - rrdlabels_destroy(((PARSER_USER_OBJECT *)user)->new_host_labels); - ((PARSER_USER_OBJECT *)user)->new_host_labels = NULL; + rrdlabels_destroy(parser->user.new_host_labels); + parser->user.new_host_labels = NULL; return PARSER_RC_OK; } - -PARSER_RC pluginsd_clabel(char **words, size_t num_words, void *user) -{ +static inline PARSER_RC pluginsd_clabel(char **words, size_t num_words, PARSER *parser) { const char *name = get_word(words, num_words, 1); const char *value = get_word(words, num_words, 2); const char *label_source = get_word(words, num_words, 3); if (!name || !value || !*label_source) { - error("Ignoring malformed or empty CHART LABEL command."); - return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + netdata_log_error("Ignoring malformed or empty CHART LABEL command."); + return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); } - if(unlikely(!((PARSER_USER_OBJECT *) user)->chart_rrdlabels_linked_temporarily)) { - RRDSET *st = pluginsd_get_chart_from_parent(user); - ((PARSER_USER_OBJECT *)user)->chart_rrdlabels_linked_temporarily = st->rrdlabels; - rrdlabels_unmark_all(((PARSER_USER_OBJECT *)user)->chart_rrdlabels_linked_temporarily); + if(unlikely(!parser->user.chart_rrdlabels_linked_temporarily)) { + RRDSET *st = pluginsd_get_chart_from_parent(parser); + parser->user.chart_rrdlabels_linked_temporarily = st->rrdlabels; + rrdlabels_unmark_all(parser->user.chart_rrdlabels_linked_temporarily); } - rrdlabels_add(((PARSER_USER_OBJECT *)user)->chart_rrdlabels_linked_temporarily, - name, value, str2l(label_source)); + rrdlabels_add(parser->user.chart_rrdlabels_linked_temporarily, name, value, str2l(label_source)); return PARSER_RC_OK; } -PARSER_RC pluginsd_clabel_commit(char **words __maybe_unused, size_t num_words __maybe_unused, void *user) -{ - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_CLABEL_COMMIT); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); +static inline PARSER_RC pluginsd_clabel_commit(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser) { + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_CLABEL_COMMIT); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_CLABEL_COMMIT, PLUGINSD_KEYWORD_BEGIN); - if(!st) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDSET *st = pluginsd_require_chart_from_parent(parser, PLUGINSD_KEYWORD_CLABEL_COMMIT, PLUGINSD_KEYWORD_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - debug(D_PLUGINSD, "requested to commit chart labels"); + netdata_log_debug(D_PLUGINSD, "requested to commit chart labels"); - if(!((PARSER_USER_OBJECT *)user)->chart_rrdlabels_linked_temporarily) { - error("PLUGINSD: 'host:%s' got CLABEL_COMMIT, without a CHART or BEGIN. Ignoring it.", - rrdhost_hostname(host)); - return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + if(!parser->user.chart_rrdlabels_linked_temporarily) { + netdata_log_error("PLUGINSD: 'host:%s' got CLABEL_COMMIT, without a CHART or BEGIN. Ignoring it.", rrdhost_hostname(host)); + return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); } - rrdlabels_remove_all_unmarked(((PARSER_USER_OBJECT *)user)->chart_rrdlabels_linked_temporarily); + rrdlabels_remove_all_unmarked(parser->user.chart_rrdlabels_linked_temporarily); rrdset_flag_set(st, RRDSET_FLAG_METADATA_UPDATE); rrdhost_flag_set(st->rrdhost, RRDHOST_FLAG_METADATA_UPDATE); - ((PARSER_USER_OBJECT *)user)->chart_rrdlabels_linked_temporarily = NULL; + parser->user.chart_rrdlabels_linked_temporarily = NULL; return PARSER_RC_OK; } -PARSER_RC pluginsd_replay_begin(char **words, size_t num_words, void *user) { +static inline PARSER_RC pluginsd_replay_begin(char **words, size_t num_words, PARSER *parser) { char *id = get_word(words, num_words, 1); char *start_time_str = get_word(words, num_words, 2); char *end_time_str = get_word(words, num_words, 3); char *child_now_str = get_word(words, num_words, 4); - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_REPLAY_BEGIN); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_REPLAY_BEGIN); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); RRDSET *st; if (likely(!id || !*id)) - st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_REPLAY_BEGIN, PLUGINSD_KEYWORD_REPLAY_BEGIN); + st = pluginsd_require_chart_from_parent(parser, PLUGINSD_KEYWORD_REPLAY_BEGIN, PLUGINSD_KEYWORD_REPLAY_BEGIN); else st = pluginsd_find_chart(host, id, PLUGINSD_KEYWORD_REPLAY_BEGIN); - if(!st) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); - pluginsd_set_chart_from_parent(user, st, PLUGINSD_KEYWORD_REPLAY_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); + pluginsd_set_chart_from_parent(parser, st, PLUGINSD_KEYWORD_REPLAY_BEGIN); if(start_time_str && end_time_str) { time_t start_time = (time_t) str2ull_encoded(start_time_str); @@ -1216,36 +1199,37 @@ PARSER_RC pluginsd_replay_begin(char **words, size_t num_words, void *user) { st->counter_done++; // these are only needed for db mode RAM, SAVE, MAP, ALLOC - st->current_entry++; - if(st->current_entry >= st->entries) - st->current_entry -= st->entries; + st->db.current_entry++; + if(st->db.current_entry >= st->db.entries) + st->db.current_entry -= st->db.entries; - ((PARSER_USER_OBJECT *) user)->replay.start_time = start_time; - ((PARSER_USER_OBJECT *) user)->replay.end_time = end_time; - ((PARSER_USER_OBJECT *) user)->replay.start_time_ut = (usec_t) start_time * USEC_PER_SEC; - ((PARSER_USER_OBJECT *) user)->replay.end_time_ut = (usec_t) end_time * USEC_PER_SEC; - ((PARSER_USER_OBJECT *) user)->replay.wall_clock_time = wall_clock_time; - ((PARSER_USER_OBJECT *) user)->replay.rset_enabled = true; + parser->user.replay.start_time = start_time; + parser->user.replay.end_time = end_time; + parser->user.replay.start_time_ut = (usec_t) start_time * USEC_PER_SEC; + parser->user.replay.end_time_ut = (usec_t) end_time * USEC_PER_SEC; + parser->user.replay.wall_clock_time = wall_clock_time; + parser->user.replay.rset_enabled = true; return PARSER_RC_OK; } - error("PLUGINSD REPLAY ERROR: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_BEGIN - " from %ld to %ld, but timestamps are invalid " - "(now is %ld [%s], tolerance %ld). Ignoring " PLUGINSD_KEYWORD_REPLAY_SET, - rrdhost_hostname(st->rrdhost), rrdset_id(st), start_time, end_time, - wall_clock_time, wall_clock_comes_from_child ? "child wall clock" : "parent wall clock", tolerance); + netdata_log_error("PLUGINSD REPLAY ERROR: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_BEGIN + " from %ld to %ld, but timestamps are invalid " + "(now is %ld [%s], tolerance %ld). Ignoring " PLUGINSD_KEYWORD_REPLAY_SET, + rrdhost_hostname(st->rrdhost), rrdset_id(st), start_time, end_time, + wall_clock_time, wall_clock_comes_from_child ? "child wall clock" : "parent wall clock", + tolerance); } // the child sends an RBEGIN without any parameters initially // setting rset_enabled to false, means the RSET should not store any metrics // to store metrics, the RBEGIN needs to have timestamps - ((PARSER_USER_OBJECT *) user)->replay.start_time = 0; - ((PARSER_USER_OBJECT *) user)->replay.end_time = 0; - ((PARSER_USER_OBJECT *) user)->replay.start_time_ut = 0; - ((PARSER_USER_OBJECT *) user)->replay.end_time_ut = 0; - ((PARSER_USER_OBJECT *) user)->replay.wall_clock_time = 0; - ((PARSER_USER_OBJECT *) user)->replay.rset_enabled = false; + parser->user.replay.start_time = 0; + parser->user.replay.end_time = 0; + parser->user.replay.start_time_ut = 0; + parser->user.replay.end_time_ut = 0; + parser->user.replay.wall_clock_time = 0; + parser->user.replay.rset_enabled = false; return PARSER_RC_OK; } @@ -1276,20 +1260,18 @@ static inline SN_FLAGS pluginsd_parse_storage_number_flags(const char *flags_str return flags; } -PARSER_RC pluginsd_replay_set(char **words, size_t num_words, void *user) -{ +static inline PARSER_RC pluginsd_replay_set(char **words, size_t num_words, PARSER *parser) { char *dimension = get_word(words, num_words, 1); char *value_str = get_word(words, num_words, 2); char *flags_str = get_word(words, num_words, 3); - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_REPLAY_SET); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_REPLAY_SET); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_REPLAY_SET, PLUGINSD_KEYWORD_REPLAY_BEGIN); - if(!st) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDSET *st = pluginsd_require_chart_from_parent(parser, PLUGINSD_KEYWORD_REPLAY_SET, PLUGINSD_KEYWORD_REPLAY_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - PARSER_USER_OBJECT *u = user; - if(!u->replay.rset_enabled) { + if(!parser->user.replay.rset_enabled) { error_limit_static_thread_var(erl, 1, 0); error_limit(&erl, "PLUGINSD: 'host:%s/chart:%s' got a %s but it is disabled by %s errors", rrdhost_hostname(host), rrdset_id(st), PLUGINSD_KEYWORD_REPLAY_SET, PLUGINSD_KEYWORD_REPLAY_BEGIN); @@ -1299,18 +1281,18 @@ PARSER_RC pluginsd_replay_set(char **words, size_t num_words, void *user) } RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_REPLAY_SET); - if(!rd) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + if(!rd) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - if (unlikely(!u->replay.start_time || !u->replay.end_time)) { - error("PLUGINSD: 'host:%s/chart:%s/dim:%s' got a %s with invalid timestamps %ld to %ld from a %s. Disabling it.", + if (unlikely(!parser->user.replay.start_time || !parser->user.replay.end_time)) { + netdata_log_error("PLUGINSD: 'host:%s/chart:%s/dim:%s' got a %s with invalid timestamps %ld to %ld from a %s. Disabling it.", rrdhost_hostname(host), rrdset_id(st), dimension, PLUGINSD_KEYWORD_REPLAY_SET, - u->replay.start_time, - u->replay.end_time, + parser->user.replay.start_time, + parser->user.replay.end_time, PLUGINSD_KEYWORD_REPLAY_BEGIN); - return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); } if (unlikely(!value_str || !*value_str)) @@ -1331,10 +1313,10 @@ PARSER_RC pluginsd_replay_set(char **words, size_t num_words, void *user) flags = SN_EMPTY_SLOT; } - rrddim_store_metric(rd, u->replay.end_time_ut, value, flags); - rd->last_collected_time.tv_sec = u->replay.end_time; - rd->last_collected_time.tv_usec = 0; - rd->collections_counter++; + rrddim_store_metric(rd, parser->user.replay.end_time_ut, value, flags); + rd->collector.last_collected_time.tv_sec = parser->user.replay.end_time; + rd->collector.last_collected_time.tv_usec = 0; + rd->collector.counter++; } else { error_limit_static_global_var(erl, 1, 0); @@ -1346,9 +1328,8 @@ PARSER_RC pluginsd_replay_set(char **words, size_t num_words, void *user) return PARSER_RC_OK; } -PARSER_RC pluginsd_replay_rrddim_collection_state(char **words, size_t num_words, void *user) -{ - if(((PARSER_USER_OBJECT *) user)->replay.rset_enabled == false) +static inline PARSER_RC pluginsd_replay_rrddim_collection_state(char **words, size_t num_words, PARSER *parser) { + if(parser->user.replay.rset_enabled == false) return PARSER_RC_OK; char *dimension = get_word(words, num_words, 1); @@ -1357,42 +1338,41 @@ PARSER_RC pluginsd_replay_rrddim_collection_state(char **words, size_t num_words char *last_calculated_value_str = get_word(words, num_words, 4); char *last_stored_value_str = get_word(words, num_words, 5); - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE, PLUGINSD_KEYWORD_REPLAY_BEGIN); - if(!st) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDSET *st = pluginsd_require_chart_from_parent(parser, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE, PLUGINSD_KEYWORD_REPLAY_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE); - if(!rd) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + if(!rd) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - usec_t dim_last_collected_ut = (usec_t)rd->last_collected_time.tv_sec * USEC_PER_SEC + (usec_t)rd->last_collected_time.tv_usec; + usec_t dim_last_collected_ut = (usec_t)rd->collector.last_collected_time.tv_sec * USEC_PER_SEC + (usec_t)rd->collector.last_collected_time.tv_usec; usec_t last_collected_ut = last_collected_ut_str ? str2ull_encoded(last_collected_ut_str) : 0; if(last_collected_ut > dim_last_collected_ut) { - rd->last_collected_time.tv_sec = (time_t)(last_collected_ut / USEC_PER_SEC); - rd->last_collected_time.tv_usec = (last_collected_ut % USEC_PER_SEC); + rd->collector.last_collected_time.tv_sec = (time_t)(last_collected_ut / USEC_PER_SEC); + rd->collector.last_collected_time.tv_usec = (last_collected_ut % USEC_PER_SEC); } - rd->last_collected_value = last_collected_value_str ? str2ll_encoded(last_collected_value_str) : 0; - rd->last_calculated_value = last_calculated_value_str ? str2ndd_encoded(last_calculated_value_str, NULL) : 0; - rd->last_stored_value = last_stored_value_str ? str2ndd_encoded(last_stored_value_str, NULL) : 0.0; + rd->collector.last_collected_value = last_collected_value_str ? str2ll_encoded(last_collected_value_str) : 0; + rd->collector.last_calculated_value = last_calculated_value_str ? str2ndd_encoded(last_calculated_value_str, NULL) : 0; + rd->collector.last_stored_value = last_stored_value_str ? str2ndd_encoded(last_stored_value_str, NULL) : 0.0; return PARSER_RC_OK; } -PARSER_RC pluginsd_replay_rrdset_collection_state(char **words, size_t num_words, void *user) -{ - if(((PARSER_USER_OBJECT *) user)->replay.rset_enabled == false) +static inline PARSER_RC pluginsd_replay_rrdset_collection_state(char **words, size_t num_words, PARSER *parser) { + if(parser->user.replay.rset_enabled == false) return PARSER_RC_OK; char *last_collected_ut_str = get_word(words, num_words, 1); char *last_updated_ut_str = get_word(words, num_words, 2); - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE, PLUGINSD_KEYWORD_REPLAY_BEGIN); - if(!st) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDSET *st = pluginsd_require_chart_from_parent(parser, PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE, PLUGINSD_KEYWORD_REPLAY_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); usec_t chart_last_collected_ut = (usec_t)st->last_collected_time.tv_sec * USEC_PER_SEC + (usec_t)st->last_collected_time.tv_usec; usec_t last_collected_ut = last_collected_ut_str ? str2ull_encoded(last_collected_ut_str) : 0; @@ -1414,10 +1394,9 @@ PARSER_RC pluginsd_replay_rrdset_collection_state(char **words, size_t num_words return PARSER_RC_OK; } -PARSER_RC pluginsd_replay_end(char **words, size_t num_words, void *user) -{ +static inline PARSER_RC pluginsd_replay_end(char **words, size_t num_words, PARSER *parser) { if (num_words < 7) { // accepts 7, but the 7th is optional - error("REPLAY: malformed " PLUGINSD_KEYWORD_REPLAY_END " command"); + netdata_log_error("REPLAY: malformed " PLUGINSD_KEYWORD_REPLAY_END " command"); return PARSER_RC_ERROR; } @@ -1441,13 +1420,11 @@ PARSER_RC pluginsd_replay_end(char **words, size_t num_words, void *user) time_t child_world_time = (child_world_time_txt && *child_world_time_txt) ? (time_t) str2ull_encoded( child_world_time_txt) : now_realtime_sec(); - PARSER_USER_OBJECT *user_object = user; - - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_REPLAY_END); - if(!host) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_REPLAY_END); + if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_REPLAY_END, PLUGINSD_KEYWORD_REPLAY_BEGIN); - if(!st) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDSET *st = pluginsd_require_chart_from_parent(parser, PLUGINSD_KEYWORD_REPLAY_END, PLUGINSD_KEYWORD_REPLAY_BEGIN); + if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); #ifdef NETDATA_LOG_REPLICATION_REQUESTS internal_error(true, @@ -1460,12 +1437,12 @@ PARSER_RC pluginsd_replay_end(char **words, size_t num_words, void *user) ); #endif - ((PARSER_USER_OBJECT *) user)->data_collections_count++; + parser->user.data_collections_count++; - if(((PARSER_USER_OBJECT *) user)->replay.rset_enabled && st->rrdhost->receiver) { + if(parser->user.replay.rset_enabled && st->rrdhost->receiver) { time_t now = now_realtime_sec(); time_t started = st->rrdhost->receiver->replication_first_time_t; - time_t current = ((PARSER_USER_OBJECT *) user)->replay.end_time; + time_t current = parser->user.replay.end_time; if(started && current > started) { host->rrdpush_receiver_replication_percent = (NETDATA_DOUBLE) (current - started) * 100.0 / (NETDATA_DOUBLE) (now - started); @@ -1474,12 +1451,12 @@ PARSER_RC pluginsd_replay_end(char **words, size_t num_words, void *user) } } - ((PARSER_USER_OBJECT *) user)->replay.start_time = 0; - ((PARSER_USER_OBJECT *) user)->replay.end_time = 0; - ((PARSER_USER_OBJECT *) user)->replay.start_time_ut = 0; - ((PARSER_USER_OBJECT *) user)->replay.end_time_ut = 0; - ((PARSER_USER_OBJECT *) user)->replay.wall_clock_time = 0; - ((PARSER_USER_OBJECT *) user)->replay.rset_enabled = false; + parser->user.replay.start_time = 0; + parser->user.replay.end_time = 0; + parser->user.replay.start_time_ut = 0; + parser->user.replay.end_time_ut = 0; + parser->user.replay.wall_clock_time = 0; + parser->user.replay.rset_enabled = false; st->counter++; st->counter_done++; @@ -1509,7 +1486,7 @@ PARSER_RC pluginsd_replay_end(char **words, size_t num_words, void *user) rrdhost_hostname(host), rrdset_id(st)); #endif - pluginsd_set_chart_from_parent(user, NULL, PLUGINSD_KEYWORD_REPLAY_END); + pluginsd_set_chart_from_parent(parser, NULL, PLUGINSD_KEYWORD_REPLAY_END); host->rrdpush_receiver_replication_percent = 100.0; worker_set_metric(WORKER_RECEIVER_JOB_REPLICATION_COMPLETION, host->rrdpush_receiver_replication_percent); @@ -1517,17 +1494,17 @@ PARSER_RC pluginsd_replay_end(char **words, size_t num_words, void *user) return PARSER_RC_OK; } - pluginsd_set_chart_from_parent(user, NULL, PLUGINSD_KEYWORD_REPLAY_END); + pluginsd_set_chart_from_parent(parser, NULL, PLUGINSD_KEYWORD_REPLAY_END); rrdcontext_updated_retention_rrdset(st); - bool ok = replicate_chart_request(send_to_plugin, user_object->parser, host, st, + bool ok = replicate_chart_request(send_to_plugin, parser, host, st, first_entry_child, last_entry_child, child_world_time, first_entry_requested, last_entry_requested); return ok ? PARSER_RC_OK : PARSER_RC_ERROR; } -PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, void *user) { +static inline PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, PARSER *parser) { timing_init(); char *id = get_word(words, num_words, 1); @@ -1536,17 +1513,17 @@ PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, void *user) { char *wall_clock_time_str = get_word(words, num_words, 4); if(unlikely(!id || !update_every_str || !end_time_str || !wall_clock_time_str)) - return PLUGINSD_DISABLE_PLUGIN(user, PLUGINSD_KEYWORD_BEGIN_V2, "missing parameters"); + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_BEGIN_V2, "missing parameters"); - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_BEGIN_V2); - if(unlikely(!host)) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_BEGIN_V2); + if(unlikely(!host)) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); timing_step(TIMING_STEP_BEGIN2_PREPARE); RRDSET *st = pluginsd_find_chart(host, id, PLUGINSD_KEYWORD_BEGIN_V2); - if(unlikely(!st)) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + if(unlikely(!st)) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - pluginsd_set_chart_from_parent(user, st, PLUGINSD_KEYWORD_BEGIN_V2); + pluginsd_set_chart_from_parent(parser, st, PLUGINSD_KEYWORD_BEGIN_V2); if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE | RRDSET_FLAG_ARCHIVED))) rrdset_isnot_obsolete(st); @@ -1573,32 +1550,31 @@ PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, void *user) { // ------------------------------------------------------------------------ // prepare our state - pluginsd_lock_rrdset_data_collection(user); + pluginsd_lock_rrdset_data_collection(parser); - PARSER_USER_OBJECT *u = (PARSER_USER_OBJECT *) user; - u->v2.update_every = update_every; - u->v2.end_time = end_time; - u->v2.wall_clock_time = wall_clock_time; - u->v2.ml_locked = ml_chart_update_begin(st); + parser->user.v2.update_every = update_every; + parser->user.v2.end_time = end_time; + parser->user.v2.wall_clock_time = wall_clock_time; + parser->user.v2.ml_locked = ml_chart_update_begin(st); timing_step(TIMING_STEP_BEGIN2_ML); // ------------------------------------------------------------------------ // propagate it forward in v2 - if(!u->v2.stream_buffer.wb && rrdhost_has_rrdpush_sender_enabled(st->rrdhost)) - u->v2.stream_buffer = rrdset_push_metric_initialize(u->st, wall_clock_time); + if(!parser->user.v2.stream_buffer.wb && rrdhost_has_rrdpush_sender_enabled(st->rrdhost)) + parser->user.v2.stream_buffer = rrdset_push_metric_initialize(parser->user.st, wall_clock_time); - if(u->v2.stream_buffer.v2 && u->v2.stream_buffer.wb) { + if(parser->user.v2.stream_buffer.v2 && parser->user.v2.stream_buffer.wb) { // check if receiver and sender have the same number parsing capabilities - bool can_copy = stream_has_capability(u, STREAM_CAP_IEEE754) == stream_has_capability(&u->v2.stream_buffer, STREAM_CAP_IEEE754); - NUMBER_ENCODING encoding = stream_has_capability(&u->v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX; + bool can_copy = stream_has_capability(&parser->user, STREAM_CAP_IEEE754) == stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754); + NUMBER_ENCODING encoding = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX; - BUFFER *wb = u->v2.stream_buffer.wb; + BUFFER *wb = parser->user.v2.stream_buffer.wb; buffer_need_bytes(wb, 1024); - if(unlikely(u->v2.stream_buffer.begin_v2_added)) + if(unlikely(parser->user.v2.stream_buffer.begin_v2_added)) buffer_fast_strcat(wb, PLUGINSD_KEYWORD_END_V2 "\n", sizeof(PLUGINSD_KEYWORD_END_V2) - 1 + 1); buffer_fast_strcat(wb, PLUGINSD_KEYWORD_BEGIN_V2 " '", sizeof(PLUGINSD_KEYWORD_BEGIN_V2) - 1 + 2); @@ -1626,8 +1602,8 @@ PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, void *user) { buffer_fast_strcat(wb, "\n", 1); - u->v2.stream_buffer.last_point_end_time_s = end_time; - u->v2.stream_buffer.begin_v2_added = true; + parser->user.v2.stream_buffer.last_point_end_time_s = end_time; + parser->user.v2.stream_buffer.begin_v2_added = true; } timing_step(TIMING_STEP_BEGIN2_PROPAGATE); @@ -1643,16 +1619,16 @@ PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, void *user) { st->counter_done++; // these are only needed for db mode RAM, SAVE, MAP, ALLOC - st->current_entry++; - if(st->current_entry >= st->entries) - st->current_entry -= st->entries; + st->db.current_entry++; + if(st->db.current_entry >= st->db.entries) + st->db.current_entry -= st->db.entries; timing_step(TIMING_STEP_BEGIN2_STORE); return PARSER_RC_OK; } -PARSER_RC pluginsd_set_v2(char **words, size_t num_words, void *user) { +static inline PARSER_RC pluginsd_set_v2(char **words, size_t num_words, PARSER *parser) { timing_init(); char *dimension = get_word(words, num_words, 1); @@ -1661,20 +1637,18 @@ PARSER_RC pluginsd_set_v2(char **words, size_t num_words, void *user) { char *flags_str = get_word(words, num_words, 4); if(unlikely(!dimension || !collected_str || !value_str || !flags_str)) - return PLUGINSD_DISABLE_PLUGIN(user, PLUGINSD_KEYWORD_SET_V2, "missing parameters"); - - PARSER_USER_OBJECT *u = (PARSER_USER_OBJECT *) user; + return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_SET_V2, "missing parameters"); - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_SET_V2); - if(unlikely(!host)) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_SET_V2); + if(unlikely(!host)) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_SET_V2, PLUGINSD_KEYWORD_BEGIN_V2); - if(unlikely(!st)) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDSET *st = pluginsd_require_chart_from_parent(parser, PLUGINSD_KEYWORD_SET_V2, PLUGINSD_KEYWORD_BEGIN_V2); + if(unlikely(!st)) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); timing_step(TIMING_STEP_SET2_PREPARE); RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_SET_V2); - if(unlikely(!rd)) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + if(unlikely(!rd)) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE | RRDDIM_FLAG_ARCHIVED))) rrddim_isnot_obsolete(st, rd); @@ -1703,11 +1677,11 @@ PARSER_RC pluginsd_set_v2(char **words, size_t num_words, void *user) { value = NAN; flags = SN_EMPTY_SLOT; - if(u->v2.ml_locked) - ml_dimension_is_anomalous(rd, u->v2.end_time, 0, false); + if(parser->user.v2.ml_locked) + ml_dimension_is_anomalous(rd, parser->user.v2.end_time, 0, false); } - else if(u->v2.ml_locked) { - if (ml_dimension_is_anomalous(rd, u->v2.end_time, value, true)) { + else if(parser->user.v2.ml_locked) { + if (ml_dimension_is_anomalous(rd, parser->user.v2.end_time, value, true)) { // clear anomaly bit: 0 -> is anomalous, 1 -> not anomalous flags &= ~((storage_number) SN_FLAG_NOT_ANOMALOUS); } @@ -1720,13 +1694,13 @@ PARSER_RC pluginsd_set_v2(char **words, size_t num_words, void *user) { // ------------------------------------------------------------------------ // propagate it forward in v2 - if(u->v2.stream_buffer.v2 && u->v2.stream_buffer.begin_v2_added && u->v2.stream_buffer.wb) { + if(parser->user.v2.stream_buffer.v2 && parser->user.v2.stream_buffer.begin_v2_added && parser->user.v2.stream_buffer.wb) { // check if receiver and sender have the same number parsing capabilities - bool can_copy = stream_has_capability(u, STREAM_CAP_IEEE754) == stream_has_capability(&u->v2.stream_buffer, STREAM_CAP_IEEE754); - NUMBER_ENCODING integer_encoding = stream_has_capability(&u->v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX; - NUMBER_ENCODING doubles_encoding = stream_has_capability(&u->v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_DECIMAL; + bool can_copy = stream_has_capability(&parser->user, STREAM_CAP_IEEE754) == stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754); + NUMBER_ENCODING integer_encoding = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX; + NUMBER_ENCODING doubles_encoding = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_DECIMAL; - BUFFER *wb = u->v2.stream_buffer.wb; + BUFFER *wb = parser->user.v2.stream_buffer.wb; buffer_need_bytes(wb, 1024); buffer_fast_strcat(wb, PLUGINSD_KEYWORD_SET_V2 " '", sizeof(PLUGINSD_KEYWORD_SET_V2) - 1 + 2); buffer_fast_strcat(wb, rrddim_id(rd), string_strlen(rd->id)); @@ -1750,51 +1724,50 @@ PARSER_RC pluginsd_set_v2(char **words, size_t num_words, void *user) { // ------------------------------------------------------------------------ // store it - rrddim_store_metric(rd, u->v2.end_time * USEC_PER_SEC, value, flags); - rd->last_collected_time.tv_sec = u->v2.end_time; - rd->last_collected_time.tv_usec = 0; - rd->last_collected_value = collected_value; - rd->last_stored_value = value; - rd->last_calculated_value = value; - rd->collections_counter++; - rd->updated = true; + rrddim_store_metric(rd, parser->user.v2.end_time * USEC_PER_SEC, value, flags); + rd->collector.last_collected_time.tv_sec = parser->user.v2.end_time; + rd->collector.last_collected_time.tv_usec = 0; + rd->collector.last_collected_value = collected_value; + rd->collector.last_stored_value = value; + rd->collector.last_calculated_value = value; + rd->collector.counter++; + rrddim_set_updated(rd); timing_step(TIMING_STEP_SET2_STORE); return PARSER_RC_OK; } -void pluginsd_cleanup_v2(void *user) { +void pluginsd_cleanup_v2(PARSER *parser) { // this is called when the thread is stopped while processing - pluginsd_set_chart_from_parent(user, NULL, "THREAD CLEANUP"); + pluginsd_set_chart_from_parent(parser, NULL, "THREAD CLEANUP"); } -PARSER_RC pluginsd_end_v2(char **words __maybe_unused, size_t num_words __maybe_unused, void *user) { +static inline PARSER_RC pluginsd_end_v2(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser) { timing_init(); - RRDHOST *host = pluginsd_require_host_from_parent(user, PLUGINSD_KEYWORD_END_V2); - if(unlikely(!host)) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDHOST *host = pluginsd_require_host_from_parent(parser, PLUGINSD_KEYWORD_END_V2); + if(unlikely(!host)) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - RRDSET *st = pluginsd_require_chart_from_parent(user, PLUGINSD_KEYWORD_END_V2, PLUGINSD_KEYWORD_BEGIN_V2); - if(unlikely(!st)) return PLUGINSD_DISABLE_PLUGIN(user, NULL, NULL); + RRDSET *st = pluginsd_require_chart_from_parent(parser, PLUGINSD_KEYWORD_END_V2, PLUGINSD_KEYWORD_BEGIN_V2); + if(unlikely(!st)) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL); - PARSER_USER_OBJECT *u = (PARSER_USER_OBJECT *) user; - u->data_collections_count++; + parser->user.data_collections_count++; timing_step(TIMING_STEP_END2_PREPARE); // ------------------------------------------------------------------------ // propagate the whole chart update in v1 - if(unlikely(!u->v2.stream_buffer.v2 && !u->v2.stream_buffer.begin_v2_added && u->v2.stream_buffer.wb)) - rrdset_push_metrics_v1(&u->v2.stream_buffer, st); + if(unlikely(!parser->user.v2.stream_buffer.v2 && !parser->user.v2.stream_buffer.begin_v2_added && parser->user.v2.stream_buffer.wb)) + rrdset_push_metrics_v1(&parser->user.v2.stream_buffer, st); timing_step(TIMING_STEP_END2_PUSH_V1); // ------------------------------------------------------------------------ // unblock data collection - pluginsd_unlock_previous_chart(user, PLUGINSD_KEYWORD_END_V2, false); + pluginsd_unlock_previous_chart(parser, PLUGINSD_KEYWORD_END_V2, false); rrdcontext_collected_rrdset(st); store_metric_collection_completed(); @@ -1803,7 +1776,7 @@ PARSER_RC pluginsd_end_v2(char **words __maybe_unused, size_t num_words __maybe_ // ------------------------------------------------------------------------ // propagate it forward - rrdset_push_metrics_finished(&u->v2.stream_buffer, st); + rrdset_push_metrics_finished(&parser->user.v2.stream_buffer, st); timing_step(TIMING_STEP_END2_PROPAGATE); @@ -1812,16 +1785,16 @@ PARSER_RC pluginsd_end_v2(char **words __maybe_unused, size_t num_words __maybe_ RRDDIM *rd; rrddim_foreach_read(rd, st) { - rd->calculated_value = 0; - rd->collected_value = 0; - rd->updated = false; - } + rd->collector.calculated_value = 0; + rd->collector.collected_value = 0; + rrddim_clear_updated(rd); + } rrddim_foreach_done(rd); // ------------------------------------------------------------------------ // reset state - u->v2 = (struct parser_user_object_v2){ 0 }; + parser->user.v2 = (struct parser_user_object_v2){ 0 }; timing_step(TIMING_STEP_END2_STORE); timing_report(); @@ -1829,19 +1802,126 @@ PARSER_RC pluginsd_end_v2(char **words __maybe_unused, size_t num_words __maybe_ return PARSER_RC_OK; } +static inline PARSER_RC pluginsd_exit(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser __maybe_unused) { + netdata_log_info("PLUGINSD: plugin called EXIT."); + return PARSER_RC_STOP; +} + +static inline PARSER_RC streaming_claimed_id(char **words, size_t num_words, PARSER *parser) +{ + const char *host_uuid_str = get_word(words, num_words, 1); + const char *claim_id_str = get_word(words, num_words, 2); + + if (!host_uuid_str || !claim_id_str) { + netdata_log_error("Command CLAIMED_ID came malformed, uuid = '%s', claim_id = '%s'", + host_uuid_str ? host_uuid_str : "[unset]", + claim_id_str ? claim_id_str : "[unset]"); + return PARSER_RC_ERROR; + } + + uuid_t uuid; + RRDHOST *host = parser->user.host; + + // We don't need the parsed UUID + // just do it to check the format + if(uuid_parse(host_uuid_str, uuid)) { + netdata_log_error("1st parameter (host GUID) to CLAIMED_ID command is not valid GUID. Received: \"%s\".", host_uuid_str); + return PARSER_RC_ERROR; + } + if(uuid_parse(claim_id_str, uuid) && strcmp(claim_id_str, "NULL") != 0) { + netdata_log_error("2nd parameter (Claim ID) to CLAIMED_ID command is not valid GUID. Received: \"%s\".", claim_id_str); + return PARSER_RC_ERROR; + } + + if(strcmp(host_uuid_str, host->machine_guid) != 0) { + netdata_log_error("Claim ID is for host \"%s\" but it came over connection for \"%s\"", host_uuid_str, host->machine_guid); + return PARSER_RC_OK; //the message is OK problem must be somewhere else + } + + rrdhost_aclk_state_lock(host); + + if (host->aclk_state.claimed_id) + freez(host->aclk_state.claimed_id); + + host->aclk_state.claimed_id = strcmp(claim_id_str, "NULL") ? strdupz(claim_id_str) : NULL; + + rrdhost_aclk_state_unlock(host); + + rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_CLAIMID |RRDHOST_FLAG_METADATA_UPDATE); + + rrdpush_send_claimed_id(host); + + return PARSER_RC_OK; +} + +// ---------------------------------------------------------------------------- + +static inline bool buffered_reader_read(struct buffered_reader *reader, int fd) { +#ifdef NETDATA_INTERNAL_CHECKS + if(reader->read_buffer[reader->read_len] != '\0') + fatal("%s(): read_buffer does not start with zero", __FUNCTION__ ); +#endif + + ssize_t bytes_read = read(fd, reader->read_buffer + reader->read_len, sizeof(reader->read_buffer) - reader->read_len - 1); + if(unlikely(bytes_read <= 0)) + return false; + + reader->read_len += bytes_read; + reader->read_buffer[reader->read_len] = '\0'; + + return true; +} + +static inline bool buffered_reader_read_timeout(struct buffered_reader *reader, int fd, int timeout_ms) { + errno = 0; + struct pollfd fds[1]; + + fds[0].fd = fd; + fds[0].events = POLLIN; + + int ret = poll(fds, 1, timeout_ms); + + if (ret > 0) { + /* There is data to read */ + if (fds[0].revents & POLLIN) + return buffered_reader_read(reader, fd); + + else if(fds[0].revents & POLLERR) { + netdata_log_error("PARSER: read failed: POLLERR."); + return false; + } + else if(fds[0].revents & POLLHUP) { + netdata_log_error("PARSER: read failed: POLLHUP."); + return false; + } + else if(fds[0].revents & POLLNVAL) { + netdata_log_error("PARSER: read failed: POLLNVAL."); + return false; + } + + netdata_log_error("PARSER: poll() returned positive number, but POLLIN|POLLERR|POLLHUP|POLLNVAL are not set."); + return false; + } + else if (ret == 0) { + netdata_log_error("PARSER: timeout while waiting for data."); + return false; + } + + netdata_log_error("PARSER: poll() failed with code %d.", ret); + return false; +} + void pluginsd_process_thread_cleanup(void *ptr) { PARSER *parser = (PARSER *)ptr; - pluginsd_cleanup_v2(parser->user); - pluginsd_host_define_cleanup(parser->user); + pluginsd_cleanup_v2(parser); + pluginsd_host_define_cleanup(parser); rrd_collector_finished(); parser_destroy(parser); } -// New plugins.d parser - inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp_plugin_input, FILE *fp_plugin_output, int trust_durations) { int enabled = cd->unsafe.enabled; @@ -1852,13 +1932,13 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp_plugi } if (unlikely(fileno(fp_plugin_input) == -1)) { - error("input file descriptor given is not a valid stream"); + netdata_log_error("input file descriptor given is not a valid stream"); cd->serial_failures++; return 0; } if (unlikely(fileno(fp_plugin_output) == -1)) { - error("output file descriptor given is not a valid stream"); + netdata_log_error("output file descriptor given is not a valid stream"); cd->serial_failures++; return 0; } @@ -1866,38 +1946,42 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp_plugi clearerr(fp_plugin_input); clearerr(fp_plugin_output); - PARSER_USER_OBJECT user = { - .enabled = cd->unsafe.enabled, - .host = host, - .cd = cd, - .trust_durations = trust_durations - }; + PARSER *parser; + { + PARSER_USER_OBJECT user = { + .enabled = cd->unsafe.enabled, + .host = host, + .cd = cd, + .trust_durations = trust_durations + }; - // fp_plugin_output = our input; fp_plugin_input = our output - PARSER *parser = parser_init(&user, fp_plugin_output, fp_plugin_input, -1, - PARSER_INPUT_SPLIT, NULL); + // fp_plugin_output = our input; fp_plugin_input = our output + parser = parser_init(&user, fp_plugin_output, fp_plugin_input, -1, PARSER_INPUT_SPLIT, NULL); + } pluginsd_keywords_init(parser, PARSER_INIT_PLUGINSD); rrd_collector_started(); + size_t count = 0; + // this keeps the parser with its current value // so, parser needs to be allocated before pushing it netdata_thread_cleanup_push(pluginsd_process_thread_cleanup, parser); - user.parser = parser; - char buffer[PLUGINSD_LINE_MAX + 1]; - - while (likely(!parser_next(parser, buffer, PLUGINSD_LINE_MAX))) { - if (unlikely(!service_running(SERVICE_COLLECTORS) || parser_action(parser, buffer))) + buffered_reader_init(&parser->reader); + char buffer[PLUGINSD_LINE_MAX + 2]; + while(likely(service_running(SERVICE_COLLECTORS))) { + if (unlikely(!buffered_reader_next_line(&parser->reader, buffer, PLUGINSD_LINE_MAX + 2))) { + if(unlikely(!buffered_reader_read_timeout(&parser->reader, fileno((FILE *)parser->fp_input), 2 * 60 * MSEC_PER_SEC))) + break; + } + else if(unlikely(parser_action(parser, buffer))) break; } - // free parser with the pop function - netdata_thread_cleanup_pop(1); - - cd->unsafe.enabled = user.enabled; - size_t count = user.data_collections_count; + cd->unsafe.enabled = parser->user.enabled; + count = parser->user.data_collections_count; if (likely(count)) { cd->successful_collections += count; @@ -1906,143 +1990,187 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp_plugi else cd->serial_failures++; + // free parser with the pop function + netdata_thread_cleanup_pop(1); + return count; } -PARSER_RC pluginsd_exit(char **words __maybe_unused, size_t num_words __maybe_unused, void *user __maybe_unused) -{ - info("PLUGINSD: plugin called EXIT."); - return PARSER_RC_STOP; +void pluginsd_keywords_init(PARSER *parser, PARSER_REPERTOIRE repertoire) { + parser_init_repertoire(parser, repertoire); + + if (repertoire & (PARSER_INIT_PLUGINSD | PARSER_INIT_STREAMING)) + inflight_functions_init(parser); } -static void pluginsd_keywords_init_internal(PARSER *parser, PLUGINSD_KEYWORDS types, void (*add_func)(PARSER *parser, char *keyword, keyword_function func)) { +PARSER *parser_init(struct parser_user_object *user, FILE *fp_input, FILE *fp_output, int fd, + PARSER_INPUT_TYPE flags, void *ssl __maybe_unused) { + PARSER *parser; - if (types & PARSER_INIT_PLUGINSD) { - add_func(parser, PLUGINSD_KEYWORD_FLUSH, pluginsd_flush); - add_func(parser, PLUGINSD_KEYWORD_DISABLE, pluginsd_disable); + parser = callocz(1, sizeof(*parser)); + if(user) + parser->user = *user; + parser->fd = fd; + parser->fp_input = fp_input; + parser->fp_output = fp_output; +#ifdef ENABLE_HTTPS + parser->ssl_output = ssl; +#endif + parser->flags = flags; - add_func(parser, PLUGINSD_KEYWORD_HOST_DEFINE, pluginsd_host_define); - add_func(parser, PLUGINSD_KEYWORD_HOST_DEFINE_END, pluginsd_host_define_end); - add_func(parser, PLUGINSD_KEYWORD_HOST_LABEL, pluginsd_host_labels); - add_func(parser, PLUGINSD_KEYWORD_HOST, pluginsd_host); + spinlock_init(&parser->writer.spinlock); + return parser; +} - add_func(parser, PLUGINSD_KEYWORD_EXIT, pluginsd_exit); - } +PARSER_RC parser_execute(PARSER *parser, PARSER_KEYWORD *keyword, char **words, size_t num_words) { + switch(keyword->id) { + case 1: + return pluginsd_set_v2(words, num_words, parser); - if (types & (PARSER_INIT_PLUGINSD | PARSER_INIT_STREAMING)) { - // plugins.d plugins and streaming - add_func(parser, PLUGINSD_KEYWORD_CHART, pluginsd_chart); - add_func(parser, PLUGINSD_KEYWORD_DIMENSION, pluginsd_dimension); - add_func(parser, PLUGINSD_KEYWORD_VARIABLE, pluginsd_variable); - add_func(parser, PLUGINSD_KEYWORD_LABEL, pluginsd_label); - add_func(parser, PLUGINSD_KEYWORD_OVERWRITE, pluginsd_overwrite); - add_func(parser, PLUGINSD_KEYWORD_CLABEL_COMMIT, pluginsd_clabel_commit); - add_func(parser, PLUGINSD_KEYWORD_CLABEL, pluginsd_clabel); - add_func(parser, PLUGINSD_KEYWORD_FUNCTION, pluginsd_function); - add_func(parser, PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN, pluginsd_function_result_begin); + case 2: + return pluginsd_begin_v2(words, num_words, parser); - add_func(parser, PLUGINSD_KEYWORD_BEGIN, pluginsd_begin); - add_func(parser, PLUGINSD_KEYWORD_SET, pluginsd_set); - add_func(parser, PLUGINSD_KEYWORD_END, pluginsd_end); + case 3: + return pluginsd_end_v2(words, num_words, parser); - inflight_functions_init(parser); - } + case 11: + return pluginsd_set(words, num_words, parser); - if (types & PARSER_INIT_STREAMING) { - add_func(parser, PLUGINSD_KEYWORD_CHART_DEFINITION_END, pluginsd_chart_definition_end); + case 12: + return pluginsd_begin(words, num_words, parser); - // replication - add_func(parser, PLUGINSD_KEYWORD_REPLAY_BEGIN, pluginsd_replay_begin); - add_func(parser, PLUGINSD_KEYWORD_REPLAY_SET, pluginsd_replay_set); - add_func(parser, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE, pluginsd_replay_rrddim_collection_state); - add_func(parser, PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE, pluginsd_replay_rrdset_collection_state); - add_func(parser, PLUGINSD_KEYWORD_REPLAY_END, pluginsd_replay_end); + case 13: + return pluginsd_end(words, num_words, parser); - // streaming metrics v2 - add_func(parser, PLUGINSD_KEYWORD_BEGIN_V2, pluginsd_begin_v2); - add_func(parser, PLUGINSD_KEYWORD_SET_V2, pluginsd_set_v2); - add_func(parser, PLUGINSD_KEYWORD_END_V2, pluginsd_end_v2); - } -} + case 21: + return pluginsd_replay_set(words, num_words, parser); -void pluginsd_keywords_init(PARSER *parser, PLUGINSD_KEYWORDS types) { - pluginsd_keywords_init_internal(parser, types, parser_add_keyword); -} + case 22: + return pluginsd_replay_begin(words, num_words, parser); -struct pluginsd_user_unittest { - size_t size; - const char **hashtable; - uint32_t (*hash)(const char *s); - size_t collisions; -}; + case 23: + return pluginsd_replay_rrddim_collection_state(words, num_words, parser); + + case 24: + return pluginsd_replay_rrdset_collection_state(words, num_words, parser); + + case 25: + return pluginsd_replay_end(words, num_words, parser); + + case 31: + return pluginsd_dimension(words, num_words, parser); + + case 32: + return pluginsd_chart(words, num_words, parser); + + case 33: + return pluginsd_chart_definition_end(words, num_words, parser); + + case 34: + return pluginsd_clabel(words, num_words, parser); + + case 35: + return pluginsd_clabel_commit(words, num_words, parser); + + case 41: + return pluginsd_function(words, num_words, parser); + + case 42: + return pluginsd_function_result_begin(words, num_words, parser); + + case 51: + return pluginsd_label(words, num_words, parser); + + case 52: + return pluginsd_overwrite(words, num_words, parser); + + case 53: + return pluginsd_variable(words, num_words, parser); + + case 61: + return streaming_claimed_id(words, num_words, parser); + + case 71: + return pluginsd_host(words, num_words, parser); + + case 72: + return pluginsd_host_define(words, num_words, parser); + + case 73: + return pluginsd_host_define_end(words, num_words, parser); -void pluginsd_keyword_collision_check(PARSER *parser, char *keyword, keyword_function func __maybe_unused) { - struct pluginsd_user_unittest *u = parser->user; + case 74: + return pluginsd_host_labels(words, num_words, parser); - uint32_t hash = u->hash(keyword); - uint32_t slot = hash % u->size; + case 97: + return pluginsd_flush(words, num_words, parser); - if(u->hashtable[slot]) - u->collisions++; + case 98: + return pluginsd_disable(words, num_words, parser); - u->hashtable[slot] = keyword; + case 99: + return pluginsd_exit(words, num_words, parser); + + default: + fatal("Unknown keyword '%s' with id %zu", keyword->keyword, keyword->id); + } } -static struct { - const char *name; - uint32_t (*hash)(const char *s); - size_t slots_needed; -} hashers[] = { - { .name = "djb2_hash32(s)", djb2_hash32, .slots_needed = 0, }, - { .name = "fnv1_hash32(s)", fnv1_hash32, .slots_needed = 0, }, - { .name = "fnv1a_hash32(s)", fnv1a_hash32, .slots_needed = 0, }, - { .name = "larson_hash32(s)", larson_hash32, .slots_needed = 0, }, - { .name = "pluginsd_parser_hash32(s)", pluginsd_parser_hash32, .slots_needed = 0, }, - - // terminator - { .name = NULL, NULL, .slots_needed = 0, }, -}; +#include "gperf-hashtable.h" + +void parser_init_repertoire(PARSER *parser, PARSER_REPERTOIRE repertoire) { + parser->repertoire = repertoire; + + for(size_t i = GPERF_PARSER_MIN_HASH_VALUE ; i <= GPERF_PARSER_MAX_HASH_VALUE ;i++) { + if(gperf_keywords[i].keyword && *gperf_keywords[i].keyword && (parser->repertoire & gperf_keywords[i].repertoire)) + worker_register_job_name(gperf_keywords[i].worker_job_id, gperf_keywords[i].keyword); + } +} + +void parser_destroy(PARSER *parser) { + if (unlikely(!parser)) + return; + + dictionary_destroy(parser->inflight.functions); + freez(parser); +} int pluginsd_parser_unittest(void) { - PARSER *p; - size_t slots_to_check = 1000; - size_t i, h; - - // check for hashtable collisions - for(h = 0; hashers[h].name ;h++) { - hashers[h].slots_needed = slots_to_check * 1000000; - - for (i = 10; i < slots_to_check; i++) { - struct pluginsd_user_unittest user = { - .hash = hashers[h].hash, - .size = i, - .hashtable = callocz(i, sizeof(const char *)), - .collisions = 0, - }; - - p = parser_init(&user, NULL, NULL, -1, PARSER_INPUT_SPLIT, NULL); - pluginsd_keywords_init_internal(p, PARSER_INIT_PLUGINSD | PARSER_INIT_STREAMING, - pluginsd_keyword_collision_check); - parser_destroy(p); - - freez(user.hashtable); - - if (!user.collisions) { - hashers[h].slots_needed = i; - break; - } + PARSER *p = parser_init(NULL, NULL, NULL, -1, PARSER_INPUT_SPLIT, NULL); + pluginsd_keywords_init(p, PARSER_INIT_PLUGINSD | PARSER_INIT_STREAMING); + + char *lines[] = { + "BEGIN2 abcdefghijklmnopqr 123", + "SET2 abcdefg 0x12345678 0 0", + "SET2 hijklmnoqr 0x12345678 0 0", + "SET2 stuvwxyz 0x12345678 0 0", + "END2", + NULL, + }; + + char *words[PLUGINSD_MAX_WORDS]; + size_t iterations = 1000000; + size_t count = 0; + char input[PLUGINSD_LINE_MAX + 1]; + + usec_t started = now_realtime_usec(); + while(--iterations) { + for(size_t line = 0; lines[line] ;line++) { + strncpyz(input, lines[line], PLUGINSD_LINE_MAX); + size_t num_words = quoted_strings_splitter_pluginsd(input, words, PLUGINSD_MAX_WORDS); + const char *command = get_word(words, num_words, 0); + PARSER_KEYWORD *keyword = parser_find_keyword(p, command); + if(unlikely(!keyword)) + fatal("Cannot parse the line '%s'", lines[line]); + count++; } } + usec_t ended = now_realtime_usec(); - for(h = 0; hashers[h].name ;h++) { - if(hashers[h].slots_needed > 1000) - info("PARSER: hash function '%s' cannot be used without collisions under %zu slots", hashers[h].name, slots_to_check); - else - info("PARSER: hash function '%s' needs PARSER_KEYWORDS_HASHTABLE_SIZE (in parser.h) set to %zu", hashers[h].name, hashers[h].slots_needed); - } + netdata_log_info("Parsed %zu lines in %0.2f secs, %0.2f klines/sec", count, + (double)(ended - started) / (double)USEC_PER_SEC, + (double)count / ((double)(ended - started) / (double)USEC_PER_SEC) / 1000.0); - p = parser_init(NULL, NULL, NULL, -1, PARSER_INPUT_SPLIT, NULL); - pluginsd_keywords_init(p, PARSER_INIT_PLUGINSD | PARSER_INIT_STREAMING); parser_destroy(p); return 0; } diff --git a/collectors/plugins.d/pluginsd_parser.h b/collectors/plugins.d/pluginsd_parser.h index 1fdc23a0..5e1ea124 100644 --- a/collectors/plugins.d/pluginsd_parser.h +++ b/collectors/plugins.d/pluginsd_parser.h @@ -5,13 +5,39 @@ #include "daemon/common.h" +#define WORKER_PARSER_FIRST_JOB 3 + +// this has to be in-sync with the same at receiver.c +#define WORKER_RECEIVER_JOB_REPLICATION_COMPLETION (WORKER_PARSER_FIRST_JOB - 3) + +// PARSER return codes +typedef enum __attribute__ ((__packed__)) parser_rc { + PARSER_RC_OK, // Callback was successful, go on + PARSER_RC_STOP, // Callback says STOP + PARSER_RC_ERROR // Callback failed (abort rest of callbacks) +} PARSER_RC; + +typedef enum __attribute__ ((__packed__)) parser_input_type { + PARSER_INPUT_SPLIT = (1 << 1), + PARSER_DEFER_UNTIL_KEYWORD = (1 << 2), +} PARSER_INPUT_TYPE; + typedef enum __attribute__ ((__packed__)) { PARSER_INIT_PLUGINSD = (1 << 1), PARSER_INIT_STREAMING = (1 << 2), -} PLUGINSD_KEYWORDS; +} PARSER_REPERTOIRE; + +struct parser; +typedef PARSER_RC (*keyword_function)(char **words, size_t num_words, struct parser *parser); + +typedef struct parser_keyword { + char *keyword; + size_t id; + PARSER_REPERTOIRE repertoire; + size_t worker_job_id; +} PARSER_KEYWORD; typedef struct parser_user_object { - PARSER *parser; RRDSET *st; RRDHOST *host; void *opaque; @@ -54,9 +80,142 @@ typedef struct parser_user_object { } v2; } PARSER_USER_OBJECT; -PARSER_RC pluginsd_function(char **words, size_t num_words, void *user); -PARSER_RC pluginsd_function_result_begin(char **words, size_t num_words, void *user); +typedef struct parser { + uint8_t version; // Parser version + PARSER_REPERTOIRE repertoire; + uint32_t flags; + int fd; // Socket + size_t line; + FILE *fp_input; // Input source e.g. stream + FILE *fp_output; // Stream to send commands to plugin + +#ifdef ENABLE_HTTPS + NETDATA_SSL *ssl_output; +#endif + + PARSER_USER_OBJECT user; // User defined structure to hold extra state between calls + + struct buffered_reader reader; + + struct { + const char *end_keyword; + BUFFER *response; + void (*action)(struct parser *parser, void *action_data); + void *action_data; + } defer; + + struct { + DICTIONARY *functions; + usec_t smaller_timeout; + } inflight; + + struct { + SPINLOCK spinlock; + } writer; + +} PARSER; + +PARSER *parser_init(struct parser_user_object *user, FILE *fp_input, FILE *fp_output, int fd, PARSER_INPUT_TYPE flags, void *ssl); +void parser_init_repertoire(PARSER *parser, PARSER_REPERTOIRE repertoire); +void parser_destroy(PARSER *working_parser); +void pluginsd_cleanup_v2(PARSER *parser); void inflight_functions_init(PARSER *parser); -void pluginsd_keywords_init(PARSER *parser, PLUGINSD_KEYWORDS types); +void pluginsd_keywords_init(PARSER *parser, PARSER_REPERTOIRE repertoire); +PARSER_RC parser_execute(PARSER *parser, PARSER_KEYWORD *keyword, char **words, size_t num_words); + +static inline int find_first_keyword(const char *src, char *dst, int dst_size, bool *isspace_map) { + const char *s = src, *keyword_start; + + while (unlikely(isspace_map[(uint8_t)*s])) s++; + keyword_start = s; + + while (likely(*s && !isspace_map[(uint8_t)*s]) && dst_size > 1) { + *dst++ = *s++; + dst_size--; + } + *dst = '\0'; + return dst_size == 0 ? 0 : (int) (s - keyword_start); +} + +PARSER_KEYWORD *gperf_lookup_keyword(register const char *str, register size_t len); + +static inline PARSER_KEYWORD *parser_find_keyword(PARSER *parser, const char *command) { + PARSER_KEYWORD *t = gperf_lookup_keyword(command, strlen(command)); + if(t && (t->repertoire & parser->repertoire)) + return t; + + return NULL; +} + +static inline int parser_action(PARSER *parser, char *input) { + parser->line++; + + if(unlikely(parser->flags & PARSER_DEFER_UNTIL_KEYWORD)) { + char command[PLUGINSD_LINE_MAX + 1]; + bool has_keyword = find_first_keyword(input, command, PLUGINSD_LINE_MAX, isspace_map_pluginsd); + + if(!has_keyword || strcmp(command, parser->defer.end_keyword) != 0) { + if(parser->defer.response) { + buffer_strcat(parser->defer.response, input); + if(buffer_strlen(parser->defer.response) > 10 * 1024 * 1024) { + // more than 10MB of data + // a bad plugin that did not send the end_keyword + internal_error(true, "PLUGINSD: deferred response is too big (%zu bytes). Stopping this plugin.", buffer_strlen(parser->defer.response)); + return 1; + } + } + return 0; + } + else { + // call the action + parser->defer.action(parser, parser->defer.action_data); + + // empty everything + parser->defer.action = NULL; + parser->defer.action_data = NULL; + parser->defer.end_keyword = NULL; + parser->defer.response = NULL; + parser->flags &= ~PARSER_DEFER_UNTIL_KEYWORD; + } + return 0; + } + + char *words[PLUGINSD_MAX_WORDS]; + size_t num_words = quoted_strings_splitter_pluginsd(input, words, PLUGINSD_MAX_WORDS); + const char *command = get_word(words, num_words, 0); + + if(unlikely(!command)) + return 0; + + PARSER_RC rc; + PARSER_KEYWORD *t = parser_find_keyword(parser, command); + if(likely(t)) { + worker_is_busy(t->worker_job_id); + rc = parser_execute(parser, t, words, num_words); + // rc = (*t->func)(words, num_words, parser); + worker_is_idle(); + } + else + rc = PARSER_RC_ERROR; + + if(rc == PARSER_RC_ERROR) { + BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX, NULL); + for(size_t i = 0; i < num_words ;i++) { + if(i) buffer_fast_strcat(wb, " ", 1); + + buffer_fast_strcat(wb, "\"", 1); + const char *s = get_word(words, num_words, i); + buffer_strcat(wb, s?s:""); + buffer_fast_strcat(wb, "\"", 1); + } + + netdata_log_error("PLUGINSD: parser_action('%s') failed on line %zu: { %s } (quotes added to show parsing)", + command, parser->line, buffer_tostring(wb)); + + buffer_free(wb); + } + + return (rc == PARSER_RC_ERROR || rc == PARSER_RC_STOP); +} #endif //NETDATA_PLUGINSD_PARSER_H diff --git a/collectors/proc.plugin/multi_metadata.yaml b/collectors/proc.plugin/multi_metadata.yaml new file mode 100644 index 00000000..e78ec795 --- /dev/null +++ b/collectors/proc.plugin/multi_metadata.yaml @@ -0,0 +1,4716 @@ +name: proc.plugin +modules: + - meta: + plugin_name: proc.plugin + module_name: /proc/stat + monitored_instance: + name: proc /proc/stat + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 10min_cpu_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf + metric: system.cpu + info: average CPU utilization over the last 10 minutes (excluding iowait, nice and steal) + os: "linux" + - name: 10min_cpu_iowait + link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf + metric: system.cpu + info: average CPU iowait time over the last 10 minutes + os: "linux" + - name: 20min_steal_cpu + link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf + metric: system.cpu + info: average CPU steal time over the last 20 minutes + os: "linux" + - name: 10min_cpu_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf + metric: system.cpu + info: average CPU utilization over the last 10 minutes (excluding nice) + os: "freebsd" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.cpu + description: Total CPU utilization + unit: "percentage" + chart_type: stacked + dimensions: + - name: guest_nice + - name: guest + - name: steal + - name: softirq + - name: irq + - name: user + - name: system + - name: nice + - name: iowait + - name: idle + - name: system.intr + description: CPU Interrupts + unit: "interrupts/s" + chart_type: line + dimensions: + - name: interrupts + - name: system.ctxt + description: CPU Context Switches + unit: "context switches/s" + chart_type: line + dimensions: + - name: switches + - name: system.forks + description: Started Processes + unit: "processes/s" + chart_type: line + dimensions: + - name: started + - name: system.processes + description: System Processes + unit: "processes" + chart_type: line + dimensions: + - name: running + - name: blocked + - name: cpu.core_throttling + description: Core Thermal Throttling Events + unit: "events/s" + chart_type: line + dimensions: + - name: a dimension per cpu core + - name: cpu.package_throttling + description: Package Thermal Throttling Events + unit: "events/s" + chart_type: line + dimensions: + - name: a dimension per package + - name: cpu.cpufreq + description: Current CPU Frequency + unit: "MHz" + chart_type: line + dimensions: + - name: a dimension per cpu core + - name: cpu core + description: "" + labels: + - name: cpu + description: TBD + metrics: + - name: cpu.cpu + description: Core utilization + unit: "percentage" + chart_type: stacked + dimensions: + - name: guest_nice + - name: guest + - name: steal + - name: softirq + - name: irq + - name: user + - name: system + - name: nice + - name: iowait + - name: idle + - name: cpuidle.cpu_cstate_residency_time + description: C-state residency time + unit: "percentage" + chart_type: stacked + dimensions: + - name: a dimension per c-state + - meta: + plugin_name: proc.plugin + module_name: /proc/sys/kernel/random/entropy_avail + monitored_instance: + name: proc /proc/sys/kernel/random/entropy_avail + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: lowest_entropy + link: https://github.com/netdata/netdata/blob/master/health/health.d/entropy.conf + metric: system.entropy + info: minimum number of entries in the random numbers pool in the last 5 minutes + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.entropy + description: Available Entropy + unit: "entropy" + chart_type: line + dimensions: + - name: entropy + - meta: + plugin_name: proc.plugin + module_name: /proc/uptime + monitored_instance: + name: proc /proc/uptime + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.uptime + description: System Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: uptime + - meta: + plugin_name: proc.plugin + module_name: /proc/vmstat + monitored_instance: + name: proc /proc/vmstat + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 30min_ram_swapped_out + link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf + metric: system.swapio + info: percentage of the system RAM swapped in the last 30 minutes + os: "linux freebsd" + - name: oom_kill + link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf + metric: mem.oom_kill + info: number of out of memory kills in the last 30 minutes + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.swapio + description: Swap I/O + unit: "KiB/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: system.pgpgio + description: Memory Paged from/to disk + unit: "KiB/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: system.pgfaults + description: Memory Page Faults + unit: "faults/s" + chart_type: line + dimensions: + - name: minor + - name: major + - name: mem.balloon + description: Memory Ballooning Operations + unit: "KiB/s" + chart_type: line + dimensions: + - name: inflate + - name: deflate + - name: migrate + - name: mem.zswapio + description: ZSwap I/O + unit: "KiB/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: mem.ksm_cow + description: KSM Copy On Write Operations + unit: "KiB/s" + chart_type: line + dimensions: + - name: swapin + - name: write + - name: mem.thp_faults + description: Transparent Huge Page Fault Allocations + unit: "events/s" + chart_type: line + dimensions: + - name: alloc + - name: fallback + - name: fallback_charge + - name: mem.thp_file + description: Transparent Huge Page File Allocations + unit: "events/s" + chart_type: line + dimensions: + - name: alloc + - name: fallback + - name: mapped + - name: fallback_charge + - name: mem.thp_zero + description: Transparent Huge Zero Page Allocations + unit: "events/s" + chart_type: line + dimensions: + - name: alloc + - name: failed + - name: mem.thp_collapse + description: Transparent Huge Pages Collapsed by khugepaged + unit: "events/s" + chart_type: line + dimensions: + - name: alloc + - name: failed + - name: mem.thp_split + description: Transparent Huge Page Splits + unit: "events/s" + chart_type: line + dimensions: + - name: split + - name: failed + - name: split_pmd + - name: split_deferred + - name: mem.thp_swapout + description: Transparent Huge Pages Swap Out + unit: "events/s" + chart_type: line + dimensions: + - name: swapout + - name: fallback + - name: mem.thp_compact + description: Transparent Huge Pages Compaction + unit: "events/s" + chart_type: line + dimensions: + - name: success + - name: fail + - name: stall + - name: mem.oom_kill + description: Out of Memory Kills + unit: "kills/s" + chart_type: line + dimensions: + - name: kills + - name: mem.numa + description: NUMA events + unit: "events/s" + chart_type: line + dimensions: + - name: local + - name: foreign + - name: interleave + - name: other + - name: pte_updates + - name: huge_pte_updates + - name: hint_faults + - name: hint_faults_local + - name: pages_migrated + - meta: + plugin_name: proc.plugin + module_name: /proc/interrupts + monitored_instance: + name: proc /proc/interrupts + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.interrupts + description: System interrupts + unit: "interrupts/s" + chart_type: stacked + dimensions: + - name: a dimension per device + - name: cpu core + description: "" + labels: + - name: cpu + description: TBD + metrics: + - name: cpu.interrupts + description: CPU interrupts + unit: "interrupts/s" + chart_type: stacked + dimensions: + - name: a dimension per device + - meta: + plugin_name: proc.plugin + module_name: /proc/loadavg + monitored_instance: + name: proc /proc/loadavg + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: load_cpu_number + link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf + metric: system.load + info: number of active CPU cores in the system + os: "linux" + - name: load_average_15 + link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf + metric: system.load + info: system fifteen-minute load average + os: "linux" + - name: load_average_5 + link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf + metric: system.load + info: system five-minute load average + os: "linux" + - name: load_average_1 + link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf + metric: system.load + info: system one-minute load average + os: "linux" + - name: active_processes + link: https://github.com/netdata/netdata/blob/master/health/health.d/processes.conf + metric: system.active_processes + info: system process IDs (PID) space utilization + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.load + description: System Load Average + unit: "load" + chart_type: line + dimensions: + - name: load1 + - name: load5 + - name: load15 + - name: system.active_processes + description: System Active Processes + unit: "processes" + chart_type: line + dimensions: + - name: active + - meta: + plugin_name: proc.plugin + module_name: /proc/pressure + monitored_instance: + name: proc /proc/pressure + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.cpu_some_pressure + description: CPU some pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: system.cpu_some_pressure_stall_time + description: CPU some pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: system.cpu_full_pressure + description: CPU full pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: system.cpu_full_pressure_stall_time + description: CPU full pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: system.memory_some_pressure + description: Memory some pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: system.memory_some_pressure_stall_time + description: Memory some pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: system.memory_full_pressure + description: Memory full pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: system.memory_full_pressure_stall_time + description: Memory full pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: system.io_some_pressure + description: I/O some pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: system.io_some_pressure_stall_time + description: I/O some pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - name: system.io_full_pressure + description: I/O some pressure + unit: "percentage" + chart_type: line + dimensions: + - name: some10 + - name: some60 + - name: some300 + - name: system.io_full_pressure_stall_time + description: I/O some pressure stall time + unit: "ms" + chart_type: line + dimensions: + - name: time + - meta: + plugin_name: proc.plugin + module_name: /proc/softirqs + monitored_instance: + name: proc /proc/softirqs + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.softirqs + description: System softirqs + unit: "softirqs/s" + chart_type: stacked + dimensions: + - name: a dimension per softirq + - name: cpu core + description: "" + labels: + - name: cpu + description: TBD + metrics: + - name: cpu.softirqs + description: CPU softirqs + unit: "softirqs/s" + chart_type: stacked + dimensions: + - name: a dimension per softirq + - meta: + plugin_name: proc.plugin + module_name: /proc/net/softnet_stat + monitored_instance: + name: proc /proc/net/softnet_stat + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 1min_netdev_backlog_exceeded + link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf + metric: system.softnet_stat + info: average number of dropped packets in the last minute due to exceeded net.core.netdev_max_backlog + os: "linux" + - name: 1min_netdev_budget_ran_outs + link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf + metric: system.softnet_stat + info: average number of times ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs with work remaining over the last minute (this can be a cause for dropped packets) + os: "linux" + - name: 10min_netisr_backlog_exceeded + link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf + metric: system.softnet_stat + info: average number of drops in the last minute due to exceeded sysctl net.route.netisr_maxqlen (this can be a cause for dropped packets) + os: "freebsd" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.softnet_stat + description: System softnet_stat + unit: "events/s" + chart_type: line + dimensions: + - name: processed + - name: dropped + - name: squeezed + - name: received_rps + - name: flow_limit_count + - name: cpu core + description: "" + labels: [] + metrics: + - name: cpu.softnet_stat + description: CPU softnet_stat + unit: "events/s" + chart_type: line + dimensions: + - name: processed + - name: dropped + - name: squeezed + - name: received_rps + - name: flow_limit_count + - meta: + plugin_name: proc.plugin + module_name: /proc/meminfo + monitored_instance: + name: proc /proc/meminfo + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: ram_in_use + link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf + metric: system.ram + info: system memory utilization + os: "linux" + - name: ram_in_use + link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf + metric: system.ram + info: system memory utilization + os: "freebsd" + - name: ram_available + link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf + metric: mem.available + info: percentage of estimated amount of RAM available for userspace processes, without causing swapping + os: "linux" + - name: ram_available + link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf + metric: mem.available + info: percentage of estimated amount of RAM available for userspace processes, without causing swapping + os: "freebsd" + - name: used_swap + link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf + metric: system.swap + info: swap memory utilization + os: "linux freebsd" + - name: 1hour_memory_hw_corrupted + link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf + metric: mem.hwcorrupt + info: amount of memory corrupted due to a hardware failure + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.ram + description: System RAM + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: cached + - name: buffers + - name: mem.available + description: Available RAM for applications + unit: "MiB" + chart_type: area + dimensions: + - name: avail + - name: system.swap + description: System Swap + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: mem.hwcorrupt + description: Corrupted Memory detected by ECC + unit: "MiB" + chart_type: line + dimensions: + - name: HardwareCorrupted + - name: mem.commited + description: Committed (Allocated) Memory + unit: "MiB" + chart_type: area + dimensions: + - name: Commited_AS + - name: mem.writeback + description: Writeback Memory + unit: "MiB" + chart_type: line + dimensions: + - name: Dirty + - name: Writeback + - name: FuseWriteback + - name: NfsWriteback + - name: Bounce + - name: mem.kernel + description: Memory Used by Kernel + unit: "MiB" + chart_type: stacked + dimensions: + - name: Slab + - name: KernelStack + - name: PageTables + - name: VmallocUsed + - name: Percpu + - name: mem.slab + description: Reclaimable Kernel Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: reclaimable + - name: unreclaimable + - name: mem.hugepage + description: Dedicated HugePages Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: surplus + - name: reserved + - name: mem.transparent_hugepages + description: Transparent HugePages Memory + unit: "MiB" + chart_type: stacked + dimensions: + - name: anonymous + - name: shmem + - meta: + plugin_name: proc.plugin + module_name: /proc/pagetypeinfo + monitored_instance: + name: proc /proc/pagetypeinfo + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: mem.pagetype_global + description: System orders available + unit: "B" + chart_type: stacked + dimensions: + - name: a dimension per pagesize + - name: node, zone, type + description: "" + labels: + - name: node_id + description: TBD + - name: node_zone + description: TBD + - name: node_type + description: TBD + metrics: + - name: mem.pagetype + description: pagetype_Node{node}_{zone}_{type} + unit: "B" + chart_type: stacked + dimensions: + - name: a dimension per pagesize + - meta: + plugin_name: proc.plugin + module_name: /sys/devices/system/edac/mc + monitored_instance: + name: proc /sys/devices/system/edac/mc + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 1hour_ecc_memory_correctable + link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf + metric: mem.ecc_ce + info: number of ECC correctable errors in the last 10 minutes + os: "linux" + - name: 1hour_ecc_memory_uncorrectable + link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf + metric: mem.ecc_ue + info: number of ECC uncorrectable errors in the last 10 minutes + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: mem.ecc_ce + description: ECC Memory Correctable Errors + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per mem controller + - name: mem.ecc_ue + description: ECC Memory Uncorrectable Errors + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per mem controller + - meta: + plugin_name: proc.plugin + module_name: /sys/devices/system/node + monitored_instance: + name: proc /sys/devices/system/node + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: numa node + description: "" + labels: + - name: numa_node + description: TBD + metrics: + - name: mem.numa_nodes + description: NUMA events + unit: "events/s" + chart_type: line + dimensions: + - name: hit + - name: miss + - name: local + - name: foreign + - name: interleave + - name: other + - meta: + plugin_name: proc.plugin + module_name: /sys/kernel/mm/ksm + monitored_instance: + name: proc /sys/kernel/mm/ksm + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: mem.ksm + description: Kernel Same Page Merging + unit: "MiB" + chart_type: stacked + dimensions: + - name: shared + - name: unshared + - name: sharing + - name: volatile + - name: mem.ksm_savings + description: Kernel Same Page Merging Savings + unit: "MiB" + chart_type: area + dimensions: + - name: savings + - name: offered + - name: mem.ksm_ratios + description: Kernel Same Page Merging Effectiveness + unit: "percentage" + chart_type: line + dimensions: + - name: savings + - meta: + plugin_name: proc.plugin + module_name: /sys/block/zram + monitored_instance: + name: proc /sys/block/zram + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: zram device + description: "" + labels: + - name: device + description: TBD + metrics: + - name: mem.zram_usage + description: ZRAM Memory Usage + unit: "MiB" + chart_type: area + dimensions: + - name: compressed + - name: metadata + - name: mem.zram_savings + description: ZRAM Memory Savings + unit: "MiB" + chart_type: area + dimensions: + - name: savings + - name: original + - name: mem.zram_ratio + description: ZRAM Compression Ratio (original to compressed) + unit: "ratio" + chart_type: line + dimensions: + - name: ratio + - name: mem.zram_efficiency + description: ZRAM Efficiency + unit: "percentage" + chart_type: line + dimensions: + - name: percent + - meta: + plugin_name: proc.plugin + module_name: ipc + monitored_instance: + name: proc ipc + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: semaphores_used + link: https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf + metric: system.ipc_semaphores + info: IPC semaphore utilization + os: "linux" + - name: semaphore_arrays_used + link: https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf + metric: system.ipc_semaphore_arrays + info: IPC semaphore arrays utilization + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.ipc_semaphores + description: IPC Semaphores + unit: "semaphores" + chart_type: area + dimensions: + - name: semaphores + - name: system.ipc_semaphore_arrays + description: IPC Semaphore Arrays + unit: "arrays" + chart_type: area + dimensions: + - name: arrays + - name: system.message_queue_message + description: IPC Message Queue Number of Messages + unit: "messages" + chart_type: stacked + dimensions: + - name: a dimension per queue + - name: system.message_queue_bytes + description: IPC Message Queue Used Bytes + unit: "bytes" + chart_type: stacked + dimensions: + - name: a dimension per queue + - name: system.shared_memory_segments + description: IPC Shared Memory Number of Segments + unit: "segments" + chart_type: stacked + dimensions: + - name: segments + - name: system.shared_memory_bytes + description: IPC Shared Memory Used Bytes + unit: "bytes" + chart_type: stacked + dimensions: + - name: bytes + - meta: + plugin_name: proc.plugin + module_name: /proc/diskstats + monitored_instance: + name: proc /proc/diskstats + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 10min_disk_backlog + link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf + metric: disk.backlog + info: average backlog size of the ${label:device} disk over the last 10 minutes + os: "linux" + - name: 10min_disk_utilization + link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf + metric: disk.util + info: average percentage of time ${label:device} disk was busy over the last 10 minutes + os: "linux freebsd" + - name: bcache_cache_dirty + link: https://github.com/netdata/netdata/blob/master/health/health.d/bcache.conf + metric: disk.bcache_cache_alloc + info: percentage of cache space used for dirty data and metadata (this usually means your SSD cache is too small) + - name: bcache_cache_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/bcache.conf + metric: disk.bcache_cache_read_races + info: number of times data was read from the cache, the bucket was reused and invalidated in the last 10 minutes (when this occurs the data is reread from the backing device) + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.io + description: Disk I/O + unit: "KiB/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: disk + description: "" + labels: + - name: device + description: TBD + - name: mount_point + description: TBD + - name: device_type + description: TBD + metrics: + - name: disk.io + description: Disk I/O Bandwidth + unit: "KiB/s" + chart_type: area + dimensions: + - name: reads + - name: writes + - name: disk_ext.io + description: Amount of Discarded Data + unit: "KiB/s" + chart_type: area + dimensions: + - name: discards + - name: disk.ops + description: Disk Completed I/O Operations + unit: "operations/s" + chart_type: line + dimensions: + - name: reads + - name: writes + - name: disk_ext.ops + description: Disk Completed Extended I/O Operations + unit: "operations/s" + chart_type: line + dimensions: + - name: discards + - name: flushes + - name: disk.qops + description: Disk Current I/O Operations + unit: "operations" + chart_type: line + dimensions: + - name: operations + - name: disk.backlog + description: Disk Backlog + unit: "milliseconds" + chart_type: area + dimensions: + - name: backlog + - name: disk.busy + description: Disk Busy Time + unit: "milliseconds" + chart_type: area + dimensions: + - name: busy + - name: disk.util + description: Disk Utilization Time + unit: "% of time working" + chart_type: area + dimensions: + - name: utilization + - name: disk.mops + description: Disk Merged Operations + unit: "merged operations/s" + chart_type: line + dimensions: + - name: reads + - name: writes + - name: disk_ext.mops + description: Disk Merged Discard Operations + unit: "merged operations/s" + chart_type: line + dimensions: + - name: discards + - name: disk.iotime + description: Disk Total I/O Time + unit: "milliseconds/s" + chart_type: line + dimensions: + - name: reads + - name: writes + - name: disk_ext.iotime + description: Disk Total I/O Time for Extended Operations + unit: "milliseconds/s" + chart_type: line + dimensions: + - name: discards + - name: flushes + - name: disk.await + description: Average Completed I/O Operation Time + unit: "milliseconds/operation" + chart_type: line + dimensions: + - name: reads + - name: writes + - name: disk_ext.await + description: Average Completed Extended I/O Operation Time + unit: "milliseconds/operation" + chart_type: line + dimensions: + - name: discards + - name: flushes + - name: disk.avgsz + description: Average Completed I/O Operation Bandwidth + unit: "KiB/operation" + chart_type: area + dimensions: + - name: reads + - name: writes + - name: disk_ext.avgsz + description: Average Amount of Discarded Data + unit: "KiB/operation" + chart_type: area + dimensions: + - name: discards + - name: disk.svctm + description: Average Service Time + unit: "milliseconds/operation" + chart_type: line + dimensions: + - name: svctm + - name: disk.bcache_cache_alloc + description: BCache Cache Allocations + unit: "percentage" + chart_type: stacked + dimensions: + - name: ununsed + - name: dirty + - name: clean + - name: metadata + - name: undefined + - name: disk.bcache_hit_ratio + description: BCache Cache Hit Ratio + unit: "percentage" + chart_type: line + dimensions: + - name: 5min + - name: 1hour + - name: 1day + - name: ever + - name: disk.bcache_rates + description: BCache Rates + unit: "KiB/s" + chart_type: area + dimensions: + - name: congested + - name: writeback + - name: disk.bcache_size + description: BCache Cache Sizes + unit: "MiB" + chart_type: area + dimensions: + - name: dirty + - name: disk.bcache_usage + description: BCache Cache Usage + unit: "percentage" + chart_type: area + dimensions: + - name: avail + - name: disk.bcache_cache_read_races + description: BCache Cache Read Races + unit: "operations/s" + chart_type: line + dimensions: + - name: races + - name: errors + - name: disk.bcache + description: BCache Cache I/O Operations + unit: "operations/s" + chart_type: line + dimensions: + - name: hits + - name: misses + - name: collisions + - name: readaheads + - name: disk.bcache_bypass + description: BCache Cache Bypass I/O Operations + unit: "operations/s" + chart_type: line + dimensions: + - name: hits + - name: misses + - meta: + plugin_name: proc.plugin + module_name: /proc/mdstat + monitored_instance: + name: proc /proc/mdstat + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: mdstat_last_collected + link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf + metric: md.disks + info: number of seconds since the last successful data collection + - name: mdstat_disks + link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf + metric: md.disks + info: number of devices in the down state for the ${label:device} ${label:raid_level} array. Any number > 0 indicates that the array is degraded. + - name: mdstat_mismatch_cnt + link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf + metric: md.mismatch_cnt + info: number of unsynchronized blocks for the ${label:device} ${label:raid_level} array + - name: mdstat_nonredundant_last_collected + link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf + metric: md.nonredundant + info: number of seconds since the last successful data collection + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: md.health + description: Faulty Devices In MD + unit: "failed disks" + chart_type: line + dimensions: + - name: a dimension per md array + - name: md array + description: "" + labels: + - name: device + description: TBD + - name: raid_level + description: TBD + metrics: + - name: md.disks + description: Disks Stats + unit: "disks" + chart_type: stacked + dimensions: + - name: inuse + - name: down + - name: md.mismatch_cnt + description: Mismatch Count + unit: "unsynchronized blocks" + chart_type: line + dimensions: + - name: count + - name: md.status + description: Current Status + unit: "percent" + chart_type: line + dimensions: + - name: check + - name: resync + - name: recovery + - name: reshape + - name: md.expected_time_until_operation_finish + description: Approximate Time Until Finish + unit: "seconds" + chart_type: line + dimensions: + - name: finish_in + - name: md.operation_speed + description: Operation Speed + unit: "KiB/s" + chart_type: line + dimensions: + - name: speed + - name: md.nonredundant + description: Nonredundant Array Availability + unit: "boolean" + chart_type: line + dimensions: + - name: available + - meta: + plugin_name: proc.plugin + module_name: /proc/net/dev + monitored_instance: + name: proc /proc/net/dev + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: interface_speed + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.net + info: network interface ${label:device} current speed + os: "*" + - name: 1m_received_traffic_overflow + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.net + info: average inbound utilization for the network interface ${label:device} over the last minute + os: "linux" + - name: 1m_sent_traffic_overflow + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.net + info: average outbound utilization for the network interface ${label:device} over the last minute + os: "linux" + - name: inbound_packets_dropped_ratio + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes + os: "linux" + - name: outbound_packets_dropped_ratio + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes + os: "linux" + - name: wifi_inbound_packets_dropped_ratio + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes + os: "linux" + - name: wifi_outbound_packets_dropped_ratio + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes + os: "linux" + - name: 1m_received_packets_rate + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: average number of packets received by the network interface ${label:device} over the last minute + os: "linux freebsd" + - name: 10s_received_packets_storm + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.packets + info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute + os: "linux freebsd" + - name: interface_inbound_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.errors + info: number of inbound errors for the network interface ${label:device} in the last 10 minutes + os: "freebsd" + - name: interface_outbound_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.errors + info: number of outbound errors for the network interface ${label:device} in the last 10 minutes + os: "freebsd" + - name: inbound_packets_dropped + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.drops + info: number of inbound dropped packets for the network interface ${label:device} in the last 10 minutes + os: "linux" + - name: outbound_packets_dropped + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.drops + info: number of outbound dropped packets for the network interface ${label:device} in the last 10 minutes + os: "linux" + - name: 10min_fifo_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf + metric: net.fifo + info: number of FIFO errors for the network interface ${label:device} in the last 10 minutes + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.net + description: Physical Network Interfaces Aggregated Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: network device + description: "" + labels: + - name: interface_type + description: TBD + - name: device + description: TBD + metrics: + - name: net.net + description: Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: net.speed + description: Interface Speed + unit: "kilobits/s" + chart_type: line + dimensions: + - name: speed + - name: net.duplex + description: Interface Duplex State + unit: "state" + chart_type: line + dimensions: + - name: full + - name: half + - name: unknown + - name: net.operstate + description: Interface Operational State + unit: "state" + chart_type: line + dimensions: + - name: up + - name: down + - name: notpresent + - name: lowerlayerdown + - name: testing + - name: dormant + - name: unknown + - name: net.carrier + description: Interface Physical Link State + unit: "state" + chart_type: line + dimensions: + - name: up + - name: down + - name: net.mtu + description: Interface MTU + unit: "octets" + chart_type: line + dimensions: + - name: mtu + - name: net.packets + description: Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: multicast + - name: net.errors + description: Interface Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: inbound + - name: outbound + - name: net.drops + description: Interface Drops + unit: "drops/s" + chart_type: line + dimensions: + - name: inbound + - name: outbound + - name: net.fifo + description: Interface FIFO Buffer Errors + unit: "errors" + chart_type: line + dimensions: + - name: receive + - name: transmit + - name: net.compressed + description: Compressed Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: net.events + description: Network Interface Events + unit: "events/s" + chart_type: line + dimensions: + - name: frames + - name: collisions + - name: carrier + - meta: + plugin_name: proc.plugin + module_name: /proc/net/wireless + monitored_instance: + name: proc /proc/net/wireless + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: wireless device + description: "" + labels: [] + metrics: + - name: wireless.status + description: Internal status reported by interface. + unit: "status" + chart_type: line + dimensions: + - name: status + - name: wireless.link_quality + description: Overall quality of the link. This is an aggregate value, and depends on the driver and hardware. + unit: "value" + chart_type: line + dimensions: + - name: link_quality + - name: wireless.signal_level + description: The signal level is the wireless signal power level received by the wireless client. The closer the value is to 0, the stronger the signal. + unit: "dBm" + chart_type: line + dimensions: + - name: signal_level + - name: wireless.noise_level + description: The noise level indicates the amount of background noise in your environment. The closer the value to 0, the greater the noise level. + unit: "dBm" + chart_type: line + dimensions: + - name: noise_level + - name: wireless.discarded_packets + description: Packet discarded in the wireless adapter due to wireless specific problems. + unit: "packets/s" + chart_type: line + dimensions: + - name: nwid + - name: crypt + - name: frag + - name: retry + - name: misc + - name: wireless.missed_beacons + description: Number of missed beacons. + unit: "frames/s" + chart_type: line + dimensions: + - name: missed_beacons + - meta: + plugin_name: proc.plugin + module_name: /sys/class/infiniband + monitored_instance: + name: proc /sys/class/infiniband + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: infiniband port + description: "" + labels: [] + metrics: + - name: ib.bytes + description: Bandwidth usage + unit: "kilobits/s" + chart_type: area + dimensions: + - name: Received + - name: Sent + - name: ib.packets + description: Packets Statistics + unit: "packets/s" + chart_type: area + dimensions: + - name: Received + - name: Sent + - name: Mcast_rcvd + - name: Mcast_sent + - name: Ucast_rcvd + - name: Ucast_sent + - name: ib.errors + description: Error Counters + unit: "errors/s" + chart_type: line + dimensions: + - name: Pkts_malformated + - name: Pkts_rcvd_discarded + - name: Pkts_sent_discarded + - name: Tick_Wait_to_send + - name: Pkts_missed_resource + - name: Buffer_overrun + - name: Link_Downed + - name: Link_recovered + - name: Link_integrity_err + - name: Link_minor_errors + - name: Pkts_rcvd_with_EBP + - name: Pkts_rcvd_discarded_by_switch + - name: Pkts_sent_discarded_by_switch + - name: ib.hwerrors + description: Hardware Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: Duplicated_packets + - name: Pkt_Seq_Num_gap + - name: Ack_timer_expired + - name: Drop_missing_buffer + - name: Drop_out_of_sequence + - name: NAK_sequence_rcvd + - name: CQE_err_Req + - name: CQE_err_Resp + - name: CQE_Flushed_err_Req + - name: CQE_Flushed_err_Resp + - name: Remote_access_err_Req + - name: Remote_access_err_Resp + - name: Remote_invalid_req + - name: Local_length_err_Resp + - name: RNR_NAK_Packets + - name: CNP_Pkts_ignored + - name: RoCE_ICRC_Errors + - name: ib.hwpackets + description: Hardware Packets Statistics + unit: "packets/s" + chart_type: line + dimensions: + - name: RoCEv2_Congestion_sent + - name: RoCEv2_Congestion_rcvd + - name: IB_Congestion_handled + - name: ATOMIC_req_rcvd + - name: Connection_req_rcvd + - name: Read_req_rcvd + - name: Write_req_rcvd + - name: RoCE_retrans_adaptive + - name: RoCE_retrans_timeout + - name: RoCE_slow_restart + - name: RoCE_slow_restart_congestion + - name: RoCE_slow_restart_count + - meta: + plugin_name: proc.plugin + module_name: /proc/net/netstat + monitored_instance: + name: proc /proc/net/netstat + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: 1m_tcp_syn_queue_drops + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf + metric: ip.tcp_syn_queue + info: average number of SYN requests was dropped due to the full TCP SYN queue over the last minute (SYN cookies were not enabled) + os: "linux" + - name: 1m_tcp_syn_queue_cookies + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf + metric: ip.tcp_syn_queue + info: average number of sent SYN cookies due to the full TCP SYN queue over the last minute + os: "linux" + - name: 1m_tcp_accept_queue_overflows + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf + metric: ip.tcp_accept_queue + info: average number of overflows in the TCP accept queue over the last minute + os: "linux" + - name: 1m_tcp_accept_queue_drops + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf + metric: ip.tcp_accept_queue + info: average number of dropped packets in the TCP accept queue over the last minute + os: "linux" + - name: tcp_connections + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_conn.conf + metric: ipv4.tcpsock + info: IPv4 TCP connections utilization + os: "linux" + - name: 1m_ipv4_tcp_resets_sent + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf + metric: ipv4.tcphandshake + info: average number of sent TCP RESETS over the last minute + os: "linux" + - name: 10s_ipv4_tcp_resets_sent + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf + metric: ipv4.tcphandshake + info: average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has crashed. Netdata will not send a clear notification for this alarm. + os: "linux" + - name: 1m_ipv4_tcp_resets_received + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf + metric: ipv4.tcphandshake + info: average number of received TCP RESETS over the last minute + os: "linux freebsd" + - name: 10s_ipv4_tcp_resets_received + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf + metric: ipv4.tcphandshake + info: average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed. Netdata will not send a clear notification for this alarm. + os: "linux freebsd" + - name: 1m_ipv4_udp_receive_buffer_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf + metric: ipv4.udperrors + info: average number of UDP receive buffer errors over the last minute + os: "linux freebsd" + - name: 1m_ipv4_udp_send_buffer_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf + metric: ipv4.udperrors + info: average number of UDP send buffer errors over the last minute + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.ip + description: IP Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: ip.inerrors + description: IP Input Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: noroutes + - name: truncated + - name: checksum + - name: ip.mcast + description: IP Multicast Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: ip.bcast + description: IP Broadcast Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: ip.mcastpkts + description: IP Multicast Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ip.bcastpkts + description: IP Broadcast Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ip.ecnpkts + description: IP ECN Statistics + unit: "packets/s" + chart_type: line + dimensions: + - name: CEP + - name: NoECTP + - name: ECTP0 + - name: ECTP1 + - name: ip.tcpmemorypressures + description: TCP Memory Pressures + unit: "events/s" + chart_type: line + dimensions: + - name: pressures + - name: ip.tcpconnaborts + description: TCP Connection Aborts + unit: "connections/s" + chart_type: line + dimensions: + - name: baddata + - name: userclosed + - name: nomemory + - name: timeout + - name: linger + - name: failed + - name: ip.tcpreorders + description: TCP Reordered Packets by Detection Method + unit: "packets/s" + chart_type: line + dimensions: + - name: timestamp + - name: sack + - name: fack + - name: reno + - name: ip.tcpofo + description: TCP Out-Of-Order Queue + unit: "packets/s" + chart_type: line + dimensions: + - name: inqueue + - name: dropped + - name: merged + - name: pruned + - name: ip.tcpsyncookies + description: TCP SYN Cookies + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: failed + - name: ip.tcp_syn_queue + description: TCP SYN Queue Issues + unit: "packets/s" + chart_type: line + dimensions: + - name: drops + - name: cookies + - name: ip.tcp_accept_queue + description: TCP Accept Queue Issues + unit: "packets/s" + chart_type: line + dimensions: + - name: overflows + - name: drops + - name: ipv4.packets + description: IPv4 Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: forwarded + - name: delivered + - name: ipv4.fragsout + description: IPv4 Fragments Sent + unit: "packets/s" + chart_type: line + dimensions: + - name: ok + - name: failed + - name: created + - name: ipv4.fragsin + description: IPv4 Fragments Reassembly + unit: "packets/s" + chart_type: line + dimensions: + - name: ok + - name: failed + - name: all + - name: ipv4.errors + description: IPv4 Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: InDiscards + - name: OutDiscards + - name: InHdrErrors + - name: OutNoRoutes + - name: InAddrErrors + - name: InUnknownProtos + - name: ipv4.icmp + description: IPv4 ICMP Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv4.icmp_errors + description: IPv4 ICMP Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: InErrors + - name: OutErrors + - name: InCsumErrors + - name: ipv4.icmpmsg + description: IPv4 ICMP Messages + unit: "packets/s" + chart_type: line + dimensions: + - name: InEchoReps + - name: OutEchoReps + - name: InDestUnreachs + - name: OutDestUnreachs + - name: InRedirects + - name: OutRedirects + - name: InEchos + - name: OutEchos + - name: InRouterAdvert + - name: OutRouterAdvert + - name: InRouterSelect + - name: OutRouterSelect + - name: InTimeExcds + - name: OutTimeExcds + - name: InParmProbs + - name: OutParmProbs + - name: InTimestamps + - name: OutTimestamps + - name: InTimestampReps + - name: OutTimestampReps + - name: ipv4.tcpsock + description: IPv4 TCP Connections + unit: "active connections" + chart_type: line + dimensions: + - name: connections + - name: ipv4.tcppackets + description: IPv4 TCP Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv4.tcperrors + description: IPv4 TCP Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: InErrs + - name: InCsumErrors + - name: RetransSegs + - name: ipv4.tcpopens + description: IPv4 TCP Opens + unit: "connections/s" + chart_type: line + dimensions: + - name: active + - name: passive + - name: ipv4.tcphandshake + description: IPv4 TCP Handshake Issues + unit: "events/s" + chart_type: line + dimensions: + - name: EstabResets + - name: OutRsts + - name: AttemptFails + - name: SynRetrans + - name: ipv4.udppackets + description: IPv4 UDP Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv4.udperrors + description: IPv4 UDP Errors + unit: "events/s" + chart_type: line + dimensions: + - name: RcvbufErrors + - name: SndbufErrors + - name: InErrors + - name: NoPorts + - name: InCsumErrors + - name: IgnoredMulti + - name: ipv4.udplite + description: IPv4 UDPLite Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv4.udplite_errors + description: IPv4 UDPLite Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: RcvbufErrors + - name: SndbufErrors + - name: InErrors + - name: NoPorts + - name: InCsumErrors + - name: IgnoredMulti + - name: system.ipv6 + description: IPv6 Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: system.ipv6 + description: IPv6 Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: forwarded + - name: delivers + - name: ipv6.fragsout + description: IPv6 Fragments Sent + unit: "packets/s" + chart_type: line + dimensions: + - name: ok + - name: failed + - name: all + - name: ipv6.fragsin + description: IPv6 Fragments Reassembly + unit: "packets/s" + chart_type: line + dimensions: + - name: ok + - name: failed + - name: timeout + - name: all + - name: ipv6.errors + description: IPv6 Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: InDiscards + - name: OutDiscards + - name: InHdrErrors + - name: InAddrErrors + - name: InUnknownProtos + - name: InTooBigErrors + - name: InTruncatedPkts + - name: InNoRoutes + - name: OutNoRoutes + - name: ipv6.udppackets + description: IPv6 UDP Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv6.udperrors + description: IPv6 UDP Errors + unit: "events/s" + chart_type: line + dimensions: + - name: RcvbufErrors + - name: SndbufErrors + - name: InErrors + - name: NoPorts + - name: InCsumErrors + - name: IgnoredMulti + - name: ipv6.udplitepackets + description: IPv6 UDPlite Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv6.udpliteerrors + description: IPv6 UDP Lite Errors + unit: "events/s" + chart_type: line + dimensions: + - name: RcvbufErrors + - name: SndbufErrors + - name: InErrors + - name: NoPorts + - name: InCsumErrors + - name: ipv6.mcast + description: IPv6 Multicast Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: ipv6.bcast + description: IPv6 Broadcast Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - name: ipv6.mcastpkts + description: IPv6 Multicast Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv6.icmp + description: IPv6 ICMP Messages + unit: "messages/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv6.icmpredir + description: IPv6 ICMP Redirects + unit: "redirects/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv6.icmperrors + description: IPv6 ICMP Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: InErrors + - name: OutErrors + - name: InCsumErrors + - name: InDestUnreachs + - name: InPktTooBigs + - name: InTimeExcds + - name: InParmProblems + - name: OutDestUnreachs + - name: OutPktTooBigs + - name: OutTimeExcds + - name: OutParmProblems + - name: ipv6.icmpechos + description: IPv6 ICMP Echo + unit: "messages/s" + chart_type: line + dimensions: + - name: InEchos + - name: OutEchos + - name: InEchoReplies + - name: OutEchoReplies + - name: ipv6.groupmemb + description: IPv6 ICMP Group Membership + unit: "messages/s" + chart_type: line + dimensions: + - name: InQueries + - name: OutQueries + - name: InResponses + - name: OutResponses + - name: InReductions + - name: OutReductions + - name: ipv6.icmprouter + description: IPv6 Router Messages + unit: "messages/s" + chart_type: line + dimensions: + - name: InSolicits + - name: OutSolicits + - name: InAdvertisements + - name: OutAdvertisements + - name: ipv6.icmpneighbor + description: IPv6 Neighbor Messages + unit: "messages/s" + chart_type: line + dimensions: + - name: InSolicits + - name: OutSolicits + - name: InAdvertisements + - name: OutAdvertisements + - name: ipv6.icmpmldv2 + description: IPv6 ICMP MLDv2 Reports + unit: "reports/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipv6.icmptypes + description: IPv6 ICMP Types + unit: "messages/s" + chart_type: line + dimensions: + - name: InType1 + - name: InType128 + - name: InType129 + - name: InType136 + - name: OutType1 + - name: OutType128 + - name: OutType129 + - name: OutType133 + - name: OutType135 + - name: OutType143 + - name: ipv6.ect + description: IPv6 ECT Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: InNoECTPkts + - name: InECT1Pkts + - name: InECT0Pkts + - name: InCEPkts + - name: ipv6.ect + description: IPv6 ECT Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: InNoECTPkts + - name: InECT1Pkts + - name: InECT0Pkts + - name: InCEPkts + - meta: + plugin_name: proc.plugin + module_name: /proc/net/sockstat + monitored_instance: + name: proc /proc/net/sockstat + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: tcp_orphans + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_orphans.conf + metric: ipv4.sockstat_tcp_sockets + info: orphan IPv4 TCP sockets utilization + os: "linux" + - name: tcp_memory + link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_mem.conf + metric: ipv4.sockstat_tcp_mem + info: TCP memory utilization + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ipv4.sockstat_sockets + description: IPv4 Sockets Used + unit: "sockets" + chart_type: line + dimensions: + - name: used + - name: ipv4.sockstat_tcp_sockets + description: IPv4 TCP Sockets + unit: "sockets" + chart_type: line + dimensions: + - name: alloc + - name: orphan + - name: inuse + - name: timewait + - name: ipv4.sockstat_tcp_mem + description: IPv4 TCP Sockets Memory + unit: "KiB" + chart_type: area + dimensions: + - name: mem + - name: ipv4.sockstat_udp_sockets + description: IPv4 UDP Sockets + unit: "sockets" + chart_type: line + dimensions: + - name: inuse + - name: ipv4.sockstat_udp_mem + description: IPv4 UDP Sockets Memory + unit: "sockets" + chart_type: line + dimensions: + - name: mem + - name: ipv4.sockstat_udplite_sockets + description: IPv4 UDPLITE Sockets + unit: "sockets" + chart_type: line + dimensions: + - name: inuse + - name: ipv4.sockstat_raw_sockets + description: IPv4 RAW Sockets + unit: "sockets" + chart_type: line + dimensions: + - name: inuse + - name: ipv4.sockstat_frag_sockets + description: IPv4 FRAG Sockets + unit: "fragments" + chart_type: line + dimensions: + - name: inuse + - name: ipv4.sockstat_frag_mem + description: IPv4 FRAG Sockets Memory + unit: "KiB" + chart_type: area + dimensions: + - name: mem + - meta: + plugin_name: proc.plugin + module_name: /proc/net/sockstat6 + monitored_instance: + name: proc /proc/net/sockstat6 + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ipv6.sockstat6_tcp_sockets + description: IPv6 TCP Sockets + unit: "sockets" + chart_type: line + dimensions: + - name: inuse + - name: ipv6.sockstat6_udp_sockets + description: IPv6 UDP Sockets + unit: "sockets" + chart_type: line + dimensions: + - name: inuse + - name: ipv6.sockstat6_udplite_sockets + description: IPv6 UDPLITE Sockets + unit: "sockets" + chart_type: line + dimensions: + - name: inuse + - name: ipv6.sockstat6_raw_sockets + description: IPv6 RAW Sockets + unit: "sockets" + chart_type: line + dimensions: + - name: inuse + - name: ipv6.sockstat6_frag_sockets + description: IPv6 FRAG Sockets + unit: "fragments" + chart_type: line + dimensions: + - name: inuse + - meta: + plugin_name: proc.plugin + module_name: /proc/net/ip_vs_stats + monitored_instance: + name: proc /proc/net/ip_vs_stats + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ipvs.sockets + description: IPVS New Connections + unit: "connections/s" + chart_type: line + dimensions: + - name: connections + - name: ipvs.packets + description: IPVS Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: ipvs.net + description: IPVS Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: received + - name: sent + - meta: + plugin_name: proc.plugin + module_name: /proc/net/rpc/nfs + monitored_instance: + name: proc /proc/net/rpc/nfs + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: nfs.net + description: NFS Client Network + unit: "operations/s" + chart_type: stacked + dimensions: + - name: udp + - name: tcp + - name: nfs.rpc + description: NFS Client Remote Procedure Calls Statistics + unit: "calls/s" + chart_type: line + dimensions: + - name: calls + - name: retransmits + - name: auth_refresh + - name: nfs.proc2 + description: NFS v2 Client Remote Procedure Calls + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per proc2 call + - name: nfs.proc3 + description: NFS v3 Client Remote Procedure Calls + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per proc3 call + - name: nfs.proc4 + description: NFS v4 Client Remote Procedure Calls + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per proc4 call + - meta: + plugin_name: proc.plugin + module_name: /proc/net/rpc/nfsd + monitored_instance: + name: proc /proc/net/rpc/nfsd + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: nfsd.readcache + description: NFS Server Read Cache + unit: "reads/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: nocache + - name: nfsd.filehandles + description: NFS Server File Handles + unit: "handles/s" + chart_type: line + dimensions: + - name: stale + - name: nfsd.io + description: NFS Server I/O + unit: "kilobytes/s" + chart_type: area + dimensions: + - name: read + - name: write + - name: nfsd.threads + description: NFS Server Threads + unit: "threads" + chart_type: line + dimensions: + - name: threads + - name: nfsd.net + description: NFS Server Network Statistics + unit: "packets/s" + chart_type: line + dimensions: + - name: udp + - name: tcp + - name: nfsd.rpc + description: NFS Server Remote Procedure Calls Statistics + unit: "calls/s" + chart_type: line + dimensions: + - name: calls + - name: bad_format + - name: bad_auth + - name: nfsd.proc2 + description: NFS v2 Server Remote Procedure Calls + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per proc2 call + - name: nfsd.proc3 + description: NFS v3 Server Remote Procedure Calls + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per proc3 call + - name: nfsd.proc4 + description: NFS v4 Server Remote Procedure Calls + unit: "calls/s" + chart_type: stacked + dimensions: + - name: a dimension per proc4 call + - name: nfsd.proc4ops + description: NFS v4 Server Operations + unit: "operations/s" + chart_type: stacked + dimensions: + - name: a dimension per proc4 operation + - meta: + plugin_name: proc.plugin + module_name: /proc/net/sctp/snmp + monitored_instance: + name: proc /proc/net/sctp/snmp + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: sctp.established + description: SCTP current total number of established associations + unit: "associations" + chart_type: line + dimensions: + - name: established + - name: sctp.transitions + description: SCTP Association Transitions + unit: "transitions/s" + chart_type: line + dimensions: + - name: active + - name: passive + - name: aborted + - name: shutdown + - name: sctp.packets + description: SCTP Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: sctp.packet_errors + description: SCTP Packet Errors + unit: "packets/s" + chart_type: line + dimensions: + - name: invalid + - name: checksum + - name: sctp.fragmentation + description: SCTP Fragmentation + unit: "packets/s" + chart_type: line + dimensions: + - name: reassembled + - name: fragmented + - meta: + plugin_name: proc.plugin + module_name: /proc/net/stat/nf_conntrack + monitored_instance: + name: proc /proc/net/stat/nf_conntrack + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: netfilter_conntrack_full + link: https://github.com/netdata/netdata/blob/master/health/health.d/netfilter.conf + metric: netfilter.conntrack_sockets + info: netfilter connection tracker table size utilization + os: "linux" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: netfilter.conntrack_sockets + description: Connection Tracker Connections + unit: "active connections" + chart_type: line + dimensions: + - name: connections + - name: netfilter.conntrack_new + description: Connection Tracker New Connections + unit: "connections/s" + chart_type: line + dimensions: + - name: new + - name: ignore + - name: invalid + - name: netfilter.conntrack_changes + description: Connection Tracker Changes + unit: "changes/s" + chart_type: line + dimensions: + - name: inserted + - name: deleted + - name: delete_list + - name: netfilter.conntrack_expect + description: Connection Tracker Expectations + unit: "expectations/s" + chart_type: line + dimensions: + - name: created + - name: deleted + - name: new + - name: netfilter.conntrack_search + description: Connection Tracker Searches + unit: "searches/s" + chart_type: line + dimensions: + - name: searched + - name: restarted + - name: found + - name: netfilter.conntrack_errors + description: Connection Tracker Errors + unit: "events/s" + chart_type: line + dimensions: + - name: icmp_error + - name: error_failed + - name: drop + - name: early_drop + - meta: + plugin_name: proc.plugin + module_name: /proc/net/stat/synproxy + monitored_instance: + name: proc /proc/net/stat/synproxy + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: netfilter.synproxy_syn_received + description: SYNPROXY SYN Packets received + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: netfilter.synproxy_conn_reopened + description: SYNPROXY Connections Reopened + unit: "connections/s" + chart_type: line + dimensions: + - name: reopened + - name: netfilter.synproxy_cookies + description: SYNPROXY TCP Cookies + unit: "cookies/s" + chart_type: line + dimensions: + - name: valid + - name: invalid + - name: retransmits + - meta: + plugin_name: proc.plugin + module_name: /proc/spl/kstat/zfs + monitored_instance: + name: proc /proc/spl/kstat/zfs + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: zfs_pool_state_warn + link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf + metric: zfspool.state + info: ZFS pool ${label:pool} state is degraded + - name: zfs_pool_state_crit + link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf + metric: zfspool.state + info: ZFS pool ${label:pool} state is faulted or unavail + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: zfs pool + description: "" + labels: + - name: pool + description: TBD + metrics: + - name: zfspool.state + description: ZFS pool state + unit: "boolean" + chart_type: line + dimensions: + - name: online + - name: degraded + - name: faulted + - name: offline + - name: removed + - name: unavail + - name: suspended + - meta: + plugin_name: proc.plugin + module_name: /proc/spl/kstat/zfs/arcstats + monitored_instance: + name: proc /proc/spl/kstat/zfs/arcstats + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: zfs_memory_throttle + link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf + metric: zfs.memory_ops + info: number of times ZFS had to limit the ARC growth in the last 10 minutes + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: zfs.arc_size + description: ZFS ARC Size + unit: "MiB" + chart_type: area + dimensions: + - name: arcsz + - name: target + - name: min + - name: max + - name: zfs.l2_size + description: ZFS L2 ARC Size + unit: "MiB" + chart_type: area + dimensions: + - name: actual + - name: size + - name: zfs.reads + description: ZFS Reads + unit: "reads/s" + chart_type: area + dimensions: + - name: arc + - name: demand + - name: prefetch + - name: metadata + - name: l2 + - name: zfs.bytes + description: ZFS ARC L2 Read/Write Rate + unit: "KiB/s" + chart_type: area + dimensions: + - name: read + - name: write + - name: zfs.hits + description: ZFS ARC Hits + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.hits_rate + description: ZFS ARC Hits Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.dhits + description: ZFS Demand Hits + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.dhits_rate + description: ZFS Demand Hits Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.phits + description: ZFS Prefetch Hits + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.phits_rate + description: ZFS Prefetch Hits Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.mhits + description: ZFS Metadata Hits + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.mhits_rate + description: ZFS Metadata Hits Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.l2hits + description: ZFS L2 Hits + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.l2hits_rate + description: ZFS L2 Hits Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.list_hits + description: ZFS List Hits + unit: "hits/s" + chart_type: area + dimensions: + - name: mfu + - name: mfu_ghost + - name: mru + - name: mru_ghost + - name: zfs.arc_size_breakdown + description: ZFS ARC Size Breakdown + unit: "percentage" + chart_type: stacked + dimensions: + - name: recent + - name: frequent + - name: zfs.memory_ops + description: ZFS Memory Operations + unit: "operations/s" + chart_type: line + dimensions: + - name: direct + - name: throttled + - name: indirect + - name: zfs.important_ops + description: ZFS Important Operations + unit: "operations/s" + chart_type: line + dimensions: + - name: evict_skip + - name: deleted + - name: mutex_miss + - name: hash_collisions + - name: zfs.actual_hits + description: ZFS Actual Cache Hits + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.actual_hits_rate + description: ZFS Actual Cache Hits Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.demand_data_hits + description: ZFS Data Demand Efficiency + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.demand_data_hits_rate + description: ZFS Data Demand Efficiency Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.prefetch_data_hits + description: ZFS Data Prefetch Efficiency + unit: "percentage" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.prefetch_data_hits_rate + description: ZFS Data Prefetch Efficiency Rate + unit: "events/s" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: zfs.hash_elements + description: ZFS ARC Hash Elements + unit: "elements" + chart_type: line + dimensions: + - name: current + - name: max + - name: zfs.hash_chains + description: ZFS ARC Hash Chains + unit: "chains" + chart_type: line + dimensions: + - name: current + - name: max + - meta: + plugin_name: proc.plugin + module_name: /sys/fs/btrfs + monitored_instance: + name: proc /sys/fs/btrfs + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: btrfs_allocated + link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf + metric: btrfs.disk + info: percentage of allocated BTRFS physical disk space + os: "*" + - name: btrfs_data + link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf + metric: btrfs.data + info: utilization of BTRFS data space + os: "*" + - name: btrfs_metadata + link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf + metric: btrfs.metadata + info: utilization of BTRFS metadata space + os: "*" + - name: btrfs_system + link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf + metric: btrfs.system + info: utilization of BTRFS system space + os: "*" + - name: btrfs_device_read_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf + metric: btrfs.device_errors + info: number of encountered BTRFS read errors + os: "*" + - name: btrfs_device_write_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf + metric: btrfs.device_errors + info: number of encountered BTRFS write errors + os: "*" + - name: btrfs_device_flush_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf + metric: btrfs.device_errors + info: number of encountered BTRFS flush errors + os: "*" + - name: btrfs_device_corruption_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf + metric: btrfs.device_errors + info: number of encountered BTRFS corruption errors + os: "*" + - name: btrfs_device_generation_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf + metric: btrfs.device_errors + info: number of encountered BTRFS generation errors + os: "*" + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: btrfs filesystem + description: "" + labels: + - name: filesystem_uuid + description: TBD + - name: filesystem_label + description: TBD + metrics: + - name: btrfs.disk + description: BTRFS Physical Disk Allocation + unit: "MiB" + chart_type: stacked + dimensions: + - name: unallocated + - name: data_free + - name: data_used + - name: meta_free + - name: meta_used + - name: sys_free + - name: sys_used + - name: btrfs.data + description: BTRFS Data Allocation + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: btrfs.metadata + description: BTRFS Metadata Allocation + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: reserved + - name: btrfs.system + description: BTRFS System Allocation + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: btrfs.commits + description: BTRFS Commits + unit: "commits" + chart_type: line + dimensions: + - name: commits + - name: btrfs.commits_perc_time + description: BTRFS Commits Time Share + unit: "percentage" + chart_type: line + dimensions: + - name: commits + - name: btrfs.commit_timings + description: BTRFS Commit Timings + unit: "ms" + chart_type: line + dimensions: + - name: last + - name: max + - name: btrfs device + description: "" + labels: + - name: device_id + description: TBD + - name: filesystem_uuid + description: TBD + - name: filesystem_label + description: TBD + metrics: + - name: btrfs.device_errors + description: BTRFS Device Errors + unit: "errors" + chart_type: line + dimensions: + - name: write_errs + - name: read_errs + - name: flush_errs + - name: corruption_errs + - name: generation_errs + - meta: + plugin_name: proc.plugin + module_name: /sys/class/power_supply + monitored_instance: + name: proc /sys/class/power_supply + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false + overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' + setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] + troubleshooting: + problems: + list: [] + alerts: + - name: linux_power_supply_capacity + link: https://github.com/netdata/netdata/blob/master/health/health.d/linux_power_supply.conf + metric: powersupply.capacity + info: percentage of remaining power supply capacity + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: power device + description: "" + labels: + - name: device + description: TBD + metrics: + - name: powersupply.capacity + description: Battery capacity + unit: "percentage" + chart_type: line + dimensions: + - name: capacity + - name: powersupply.charge + description: Battery charge + unit: "Ah" + chart_type: line + dimensions: + - name: empty_design + - name: empty + - name: now + - name: full + - name: full_design + - name: powersupply.energy + description: Battery energy + unit: "Wh" + chart_type: line + dimensions: + - name: empty_design + - name: empty + - name: now + - name: full + - name: full_design + - name: powersupply.voltage + description: Power supply voltage + unit: "V" + chart_type: line + dimensions: + - name: min_design + - name: min + - name: now + - name: max + - name: max_design diff --git a/collectors/proc.plugin/plugin_proc.c b/collectors/proc.plugin/plugin_proc.c index 1f52713c..c1a3293f 100644 --- a/collectors/proc.plugin/plugin_proc.c +++ b/collectors/proc.plugin/plugin_proc.c @@ -18,6 +18,7 @@ static struct proc_module { {.name = "/proc/stat", .dim = "stat", .func = do_proc_stat}, {.name = "/proc/uptime", .dim = "uptime", .func = do_proc_uptime}, {.name = "/proc/loadavg", .dim = "loadavg", .func = do_proc_loadavg}, + {.name = "/proc/sys/fs/file-nr", .dim = "file-nr", .func = do_proc_sys_fs_file_nr}, {.name = "/proc/sys/kernel/random/entropy_avail", .dim = "entropy", .func = do_proc_sys_kernel_random_entropy_avail}, // pressure metrics @@ -139,7 +140,7 @@ void *proc_main(void *ptr) if (config_get_boolean("plugin:proc", "/proc/net/dev", CONFIG_BOOLEAN_YES)) { netdev_thread = mallocz(sizeof(netdata_thread_t)); - debug(D_SYSTEM, "Starting thread %s.", THREAD_NETDEV_NAME); + netdata_log_debug(D_SYSTEM, "Starting thread %s.", THREAD_NETDEV_NAME); netdata_thread_create( netdev_thread, THREAD_NETDEV_NAME, NETDATA_THREAD_OPTION_JOINABLE, netdev_main, netdev_thread); } @@ -180,7 +181,7 @@ void *proc_main(void *ptr) if (unlikely(!pm->enabled)) continue; - debug(D_PROCNETDEV_LOOP, "PROC calling %s.", pm->name); + netdata_log_debug(D_PROCNETDEV_LOOP, "PROC calling %s.", pm->name); worker_is_busy(i); pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt); diff --git a/collectors/proc.plugin/plugin_proc.h b/collectors/proc.plugin/plugin_proc.h index e8746ba3..2b2cabca 100644 --- a/collectors/proc.plugin/plugin_proc.h +++ b/collectors/proc.plugin/plugin_proc.h @@ -22,6 +22,7 @@ int do_proc_meminfo(int update_every, usec_t dt); int do_proc_vmstat(int update_every, usec_t dt); int do_proc_net_rpc_nfs(int update_every, usec_t dt); int do_proc_net_rpc_nfsd(int update_every, usec_t dt); +int do_proc_sys_fs_file_nr(int update_every, usec_t dt); int do_proc_sys_kernel_random_entropy_avail(int update_every, usec_t dt); int do_proc_interrupts(int update_every, usec_t dt); int do_proc_softirqs(int update_every, usec_t dt); diff --git a/collectors/proc.plugin/proc_sys_fs_file_nr.c b/collectors/proc.plugin/proc_sys_fs_file_nr.c new file mode 100644 index 00000000..570945d0 --- /dev/null +++ b/collectors/proc.plugin/proc_sys_fs_file_nr.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +int do_proc_sys_fs_file_nr(int update_every, usec_t dt) { + (void)dt; + + static procfile *ff = NULL; + + if(unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/sys/fs/file-nr"); + ff = procfile_open(config_get("plugin:proc:/proc/sys/fs/file-nr", "filename to monitor", filename), "", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time + + uint64_t allocated = str2ull(procfile_lineword(ff, 0, 0), NULL); + uint64_t unused = str2ull(procfile_lineword(ff, 0, 1), NULL); + uint64_t max = str2ull(procfile_lineword(ff, 0, 2), NULL); + + uint64_t used = allocated - unused; + + static RRDSET *st_files = NULL; + static RRDDIM *rd_used = NULL; + + if(unlikely(!st_files)) { + st_files = rrdset_create_localhost( + "system" + , "file_nr_used" + , NULL + , "files" + , NULL + , "File Descriptors" + , "files" + , PLUGIN_PROC_NAME + , "/proc/sys/fs/file-nr" + , NETDATA_CHART_PRIO_SYSTEM_FILES_NR + , update_every + , RRDSET_TYPE_LINE + ); + + rd_used = rrddim_add(st_files, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_files, rd_used, (collected_number )used); + rrdset_done(st_files); + + static RRDSET *st_files_utilization = NULL; + static RRDDIM *rd_utilization = NULL; + + if(unlikely(!st_files_utilization)) { + st_files_utilization = rrdset_create_localhost( + "system" + , "file_nr_utilization" + , NULL + , "files" + , NULL + , "File Descriptors Utilization" + , "percentage" + , PLUGIN_PROC_NAME + , "/proc/sys/fs/file-nr" + , NETDATA_CHART_PRIO_SYSTEM_FILES_NR + 1 + , update_every + , RRDSET_TYPE_LINE + ); + + rd_utilization = rrddim_add(st_files_utilization, "utilization", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + } + + NETDATA_DOUBLE d_used = (NETDATA_DOUBLE)used; + NETDATA_DOUBLE d_max = (NETDATA_DOUBLE)max; + NETDATA_DOUBLE percent = d_used * 100.0 / d_max; + + rrddim_set_by_pointer(st_files_utilization, rd_utilization, (collected_number)(percent * 10000)); + rrdset_done(st_files_utilization); + + return 0; +} diff --git a/collectors/profile.plugin/Makefile.am b/collectors/profile.plugin/Makefile.am new file mode 100644 index 00000000..161784b8 --- /dev/null +++ b/collectors/profile.plugin/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/collectors/profile.plugin/README.md b/collectors/profile.plugin/README.md new file mode 100644 index 00000000..1f200fc3 --- /dev/null +++ b/collectors/profile.plugin/README.md @@ -0,0 +1,34 @@ +# profile.plugin + +This plugin allows someone to backfill an agent with random data. + +A user can specify: + + - The number charts they want, + - the number of dimensions per chart, + - the desire update every collection frequency, + - the number of seconds to backfill. + - the number of collection threads. + +## Configuration + +Edit the `netdata.conf` configuration file using [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#use-edit-config-to-edit-configuration-files) from the [Netdata config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory), which is typically at `/etc/netdata`. + +Scroll down to the `[plugin:profile]` section to find the available options: + +``` +[plugin:profile] + update every = 5 + number of charts = 200 + number of dimensions per chart = 5 + seconds to backfill = 86400 + number of threads = 16 +``` + +The `number of threads` option will create the specified number of collection +threads. The rest of the options apply to each thread individually, eg. the +above configuration will create 3200 charts, 16000 dimensions in total, which will be +backfilled for the duration of 1 day. + +Note that all but the 1st chart created in each thread will be marked as hidden +in order to ease the load on the dashboard's UI. diff --git a/collectors/profile.plugin/plugin_profile.cc b/collectors/profile.plugin/plugin_profile.cc new file mode 100644 index 00000000..5f7b22d2 --- /dev/null +++ b/collectors/profile.plugin/plugin_profile.cc @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifdef __cplusplus +extern "C" { +#endif + +#include "daemon/common.h" + +#ifdef __cplusplus +} +#endif + +#include <random> +#include <thread> +#include <vector> + +#define PLUGIN_PROFILE_NAME "profile.plugin" + +#define CONFIG_SECTION_PROFILE "plugin:profile" + +class Generator { +public: + Generator(size_t N) : Offset(0) { + std::random_device RandDev; + std::mt19937 Gen(RandDev()); + std::uniform_int_distribution<int> D(-16, 16); + + V.reserve(N); + for (size_t Idx = 0; Idx != N; Idx++) + V.push_back(D(Gen)); + } + + double getRandValue() { + return V[Offset++ % V.size()]; + } + +private: + size_t Offset; + std::vector<double> V; +}; + +class Profiler { +public: + Profiler(size_t ID, size_t NumCharts, size_t NumDimsPerChart, time_t SecondsToBackfill, int UpdateEvery) : + ID(ID), + NumCharts(NumCharts), + NumDimsPerChart(NumDimsPerChart), + SecondsToBackfill(SecondsToBackfill), + UpdateEvery(UpdateEvery), + Gen(1024 * 1024) + {} + + void create() { + char ChartId[1024]; + char DimId[1024]; + + Charts.reserve(NumCharts); + for (size_t I = 0; I != NumCharts; I++) { + size_t CID = ID + Charts.size() + 1; + + snprintfz(ChartId, 1024 - 1, "chart_%zu", CID); + + RRDSET *RS = rrdset_create_localhost( + "profile", // type + ChartId, // id + nullptr, // name, + "profile_family", // family + "profile_context", // context + "profile_title", // title + "profile_units", // units + "profile_plugin", // plugin + "profile_module", // module + 12345678 + CID, // priority + UpdateEvery, // update_every + RRDSET_TYPE_LINE // chart_type + ); + if (I != 0) + rrdset_flag_set(RS, RRDSET_FLAG_HIDDEN); + Charts.push_back(RS); + + Dimensions.reserve(NumDimsPerChart); + for (size_t J = 0; J != NumDimsPerChart; J++) { + snprintfz(DimId, 1024 - 1, "dim_%zu", J); + + RRDDIM *RD = rrddim_add( + RS, // st + DimId, // id + nullptr, // name + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE // algorithm + ); + + Dimensions.push_back(RD); + } + } + } + + void update(const struct timeval &Now) { + for (RRDSET *RS: Charts) { + for (RRDDIM *RD : Dimensions) { + rrddim_timed_set_by_pointer(RS, RD, Now, Gen.getRandValue()); + } + + rrdset_timed_done(RS, Now, RS->counter_done != 0); + } + } + + void run() { + #define WORKER_JOB_CREATE_CHARTS 0 + #define WORKER_JOB_UPDATE_CHARTS 1 + #define WORKER_JOB_METRIC_DURATION_TO_BACKFILL 2 + #define WORKER_JOB_METRIC_POINTS_BACKFILLED 3 + + worker_register("PROFILER"); + worker_register_job_name(WORKER_JOB_CREATE_CHARTS, "create charts"); + worker_register_job_name(WORKER_JOB_UPDATE_CHARTS, "update charts"); + worker_register_job_custom_metric(WORKER_JOB_METRIC_DURATION_TO_BACKFILL, "duration to backfill", "seconds", WORKER_METRIC_ABSOLUTE); + worker_register_job_custom_metric(WORKER_JOB_METRIC_POINTS_BACKFILLED, "points backfilled", "points", WORKER_METRIC_ABSOLUTE); + + heartbeat_t HB; + heartbeat_init(&HB); + + worker_is_busy(WORKER_JOB_CREATE_CHARTS); + create(); + + struct timeval CollectionTV; + now_realtime_timeval(&CollectionTV); + + if (SecondsToBackfill) { + CollectionTV.tv_sec -= SecondsToBackfill; + CollectionTV.tv_sec -= (CollectionTV.tv_sec % UpdateEvery); + + CollectionTV.tv_usec = 0; + } + + size_t BackfilledPoints = 0; + struct timeval NowTV, PrevTV; + now_realtime_timeval(&NowTV); + PrevTV = NowTV; + + while (service_running(SERVICE_COLLECTORS)) { + worker_is_busy(WORKER_JOB_UPDATE_CHARTS); + + update(CollectionTV); + CollectionTV.tv_sec += UpdateEvery; + + now_realtime_timeval(&NowTV); + + ++BackfilledPoints; + if (NowTV.tv_sec > PrevTV.tv_sec) { + PrevTV = NowTV; + worker_set_metric(WORKER_JOB_METRIC_POINTS_BACKFILLED, BackfilledPoints * NumCharts * NumDimsPerChart); + BackfilledPoints = 0; + } + + size_t RemainingSeconds = (CollectionTV.tv_sec >= NowTV.tv_sec) ? 0 : (NowTV.tv_sec - CollectionTV.tv_sec); + worker_set_metric(WORKER_JOB_METRIC_DURATION_TO_BACKFILL, RemainingSeconds); + + if (CollectionTV.tv_sec >= NowTV.tv_sec) { + worker_is_idle(); + heartbeat_next(&HB, UpdateEvery * USEC_PER_SEC); + } + } + } + +private: + size_t ID; + size_t NumCharts; + size_t NumDimsPerChart; + size_t SecondsToBackfill; + int UpdateEvery; + + Generator Gen; + std::vector<RRDSET *> Charts; + std::vector<RRDDIM *> Dimensions; +}; + +static void *subprofile_main(void* Arg) { + Profiler *P = reinterpret_cast<Profiler *>(Arg); + P->run(); + return nullptr; +} + +static void profile_main_cleanup(void *ptr) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *) ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + netdata_log_info("cleaning up..."); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +extern "C" void *profile_main(void *ptr) { + netdata_thread_cleanup_push(profile_main_cleanup, ptr); + + int UpdateEvery = (int) config_get_number(CONFIG_SECTION_PROFILE, "update every", 1); + if (UpdateEvery < localhost->rrd_update_every) + UpdateEvery = localhost->rrd_update_every; + + // pick low-default values, in case this plugin is ever enabled accidentaly. + size_t NumThreads = config_get_number(CONFIG_SECTION_PROFILE, "number of threads", 2); + size_t NumCharts = config_get_number(CONFIG_SECTION_PROFILE, "number of charts", 2); + size_t NumDimsPerChart = config_get_number(CONFIG_SECTION_PROFILE, "number of dimensions per chart", 2); + size_t SecondsToBackfill = config_get_number(CONFIG_SECTION_PROFILE, "seconds to backfill", 10 * 60); + + std::vector<Profiler> Profilers; + + for (size_t Idx = 0; Idx != NumThreads; Idx++) { + Profiler P(1e8 + Idx * 1e6, NumCharts, NumDimsPerChart, SecondsToBackfill, UpdateEvery); + Profilers.push_back(P); + } + + std::vector<netdata_thread_t> Threads(NumThreads); + + for (size_t Idx = 0; Idx != NumThreads; Idx++) { + char Tag[NETDATA_THREAD_TAG_MAX + 1]; + + snprintfz(Tag, NETDATA_THREAD_TAG_MAX, "PROFILER[%zu]", Idx); + netdata_thread_create(&Threads[Idx], Tag, NETDATA_THREAD_OPTION_JOINABLE, subprofile_main, static_cast<void *>(&Profilers[Idx])); + } + + for (size_t Idx = 0; Idx != NumThreads; Idx++) + netdata_thread_join(Threads[Idx], nullptr); + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/collectors/python.d.plugin/adaptec_raid/metadata.yaml b/collectors/python.d.plugin/adaptec_raid/metadata.yaml new file mode 100644 index 00000000..5986aed6 --- /dev/null +++ b/collectors/python.d.plugin/adaptec_raid/metadata.yaml @@ -0,0 +1,98 @@ +meta: + plugin_name: python.d.plugin + module_name: adaptec_raid + monitored_instance: + name: AdaptecRAID + link: '' + categories: + - data-collection.storage-mount-points-and-filesystems + icon_filename: 'adaptec.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Assess Adaptec RAID hardware storage controllers with Netdata for RAID controller performance and operational metrics. Improve your RAID controller performance with comprehensive dashboards and anomaly detection.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: adaptec_raid_ld_status + link: https://github.com/netdata/netdata/blob/master/health/health.d/adaptec_raid.conf + metric: adaptec_raid.ld_status + info: logical device status is failed or degraded +- name: adaptec_raid_pd_state + link: https://github.com/netdata/netdata/blob/master/health/health.d/adaptec_raid.conf + metric: adaptec_raid.pd_state + info: physical device state is not online +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: adaptec_raid.ld_status + description: 'Status of logical devices (1: Failed or Degraded)' + unit: "bool" + chart_type: line + dimensions: + - name: a dimension per logical device + - name: adaptec_raid.pd_state + description: 'State of physical devices (1: not Online)' + unit: "bool" + chart_type: line + dimensions: + - name: a dimension per physical device + - name: adaptec_raid.smart_warnings + description: S.M.A.R.T warnings + unit: "count" + chart_type: line + dimensions: + - name: a dimension per physical device + - name: adaptec_raid.temperature + description: Temperature + unit: "celsius" + chart_type: line + dimensions: + - name: a dimension per physical device diff --git a/collectors/python.d.plugin/alarms/metadata.yaml b/collectors/python.d.plugin/alarms/metadata.yaml new file mode 100644 index 00000000..81afd5cc --- /dev/null +++ b/collectors/python.d.plugin/alarms/metadata.yaml @@ -0,0 +1,77 @@ +meta: + plugin_name: python.d.plugin + module_name: alarms + monitored_instance: + name: python.d alarms + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: alarms.status + description: Alarms ({status mapping}) + unit: "status" + chart_type: line + dimensions: + - name: a dimension per alarm + - name: alarms.status + description: Alarm Values + unit: "value" + chart_type: line + dimensions: + - name: a dimension per alarm diff --git a/collectors/python.d.plugin/am2320/metadata.yaml b/collectors/python.d.plugin/am2320/metadata.yaml new file mode 100644 index 00000000..88e86fb2 --- /dev/null +++ b/collectors/python.d.plugin/am2320/metadata.yaml @@ -0,0 +1,78 @@ +meta: + plugin_name: python.d.plugin + module_name: am2320 + monitored_instance: + name: AM2320 + link: '' + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: 'microchip.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor AM2320 metrics with Netdata for optimal temperature and humidity sensor performance. Improve your sensor performance with comprehensive dashboards and anomaly detection.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: am2320.temperature + description: Temperature + unit: "celsius" + chart_type: line + dimensions: + - name: temperature + - name: am2320.humidity + description: Relative Humidity + unit: "percentage" + chart_type: line + dimensions: + - name: humidity diff --git a/collectors/python.d.plugin/anomalies/metadata.yaml b/collectors/python.d.plugin/anomalies/metadata.yaml new file mode 100644 index 00000000..7bcac646 --- /dev/null +++ b/collectors/python.d.plugin/anomalies/metadata.yaml @@ -0,0 +1,85 @@ +meta: + plugin_name: python.d.plugin + module_name: anomalies + monitored_instance: + name: python.d anomalies + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: anomalies_anomaly_probabilities + link: https://github.com/netdata/netdata/blob/master/health/health.d/anomalies.conf + metric: anomalies.probability + info: average anomaly probability over the last 2 minutes +- name: anomalies_anomaly_flags + link: https://github.com/netdata/netdata/blob/master/health/health.d/anomalies.conf + metric: anomalies.anomaly + info: number of anomalies in the last 2 minutes +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: anomalies.probability + description: Anomaly Probability + unit: "probability" + chart_type: line + dimensions: + - name: a dimension per probability + - name: anomalies.anomaly + description: Anomaly + unit: "count" + chart_type: stacked + dimensions: + - name: a dimension per anomaly diff --git a/collectors/python.d.plugin/beanstalk/metadata.yaml b/collectors/python.d.plugin/beanstalk/metadata.yaml new file mode 100644 index 00000000..d8730bb8 --- /dev/null +++ b/collectors/python.d.plugin/beanstalk/metadata.yaml @@ -0,0 +1,194 @@ +meta: + plugin_name: python.d.plugin + module_name: beanstalk + monitored_instance: + name: Beanstalk + link: '' + categories: + - data-collection.message-brokers + icon_filename: 'beanstalk.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Beanstalk metrics to enhance job queueing and processing efficiency. Track job rates, processing times, and queue lengths for better task management.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: beanstalk_server_buried_jobs + link: https://github.com/netdata/netdata/blob/master/health/health.d/beanstalkd.conf + metric: beanstalk.current_jobs + info: number of buried jobs across all tubes. You need to manually kick them so they can be processed. Presence of buried jobs in a tube does not affect new jobs. +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: beanstalk.cpu_usage + description: Cpu Usage + unit: "cpu time" + chart_type: area + dimensions: + - name: user + - name: system + - name: beanstalk.jobs_rate + description: Jobs Rate + unit: "jobs/s" + chart_type: line + dimensions: + - name: total + - name: timeouts + - name: beanstalk.connections_rate + description: Connections Rate + unit: "connections/s" + chart_type: area + dimensions: + - name: connections + - name: beanstalk.commands_rate + description: Commands Rate + unit: "commands/s" + chart_type: stacked + dimensions: + - name: put + - name: peek + - name: peek-ready + - name: peek-delayed + - name: peek-buried + - name: reserve + - name: use + - name: watch + - name: ignore + - name: delete + - name: bury + - name: kick + - name: stats + - name: stats-job + - name: stats-tube + - name: list-tubes + - name: list-tube-used + - name: list-tubes-watched + - name: pause-tube + - name: beanstalk.connections_rate + description: Current Tubes + unit: "tubes" + chart_type: area + dimensions: + - name: tubes + - name: beanstalk.current_jobs + description: Current Jobs + unit: "jobs" + chart_type: stacked + dimensions: + - name: urgent + - name: ready + - name: reserved + - name: delayed + - name: buried + - name: beanstalk.current_connections + description: Current Connections + unit: "connections" + chart_type: line + dimensions: + - name: written + - name: producers + - name: workers + - name: waiting + - name: beanstalk.binlog + description: Binlog + unit: "records/s" + chart_type: line + dimensions: + - name: written + - name: migrated + - name: beanstalk.uptime + description: seconds + unit: "seconds" + chart_type: line + dimensions: + - name: uptime + - name: tube + description: "" + labels: [] + metrics: + - name: beanstalk.jobs_rate + description: Jobs Rate + unit: "jobs/s" + chart_type: area + dimensions: + - name: jobs + - name: beanstalk.jobs + description: Jobs + unit: "jobs" + chart_type: stacked + dimensions: + - name: urgent + - name: ready + - name: reserved + - name: delayed + - name: buried + - name: beanstalk.connections + description: Connections + unit: "connections" + chart_type: stacked + dimensions: + - name: using + - name: waiting + - name: watching + - name: beanstalk.commands + description: Commands + unit: "commands/s" + chart_type: stacked + dimensions: + - name: deletes + - name: pauses + - name: beanstalk.pause + description: Pause + unit: "seconds" + chart_type: stacked + dimensions: + - name: since + - name: left diff --git a/collectors/python.d.plugin/bind_rndc/metadata.yaml b/collectors/python.d.plugin/bind_rndc/metadata.yaml new file mode 100644 index 00000000..72c3acff --- /dev/null +++ b/collectors/python.d.plugin/bind_rndc/metadata.yaml @@ -0,0 +1,105 @@ +meta: + plugin_name: python.d.plugin + module_name: bind_rndc + monitored_instance: + name: ISCBind (RNDC) + link: '' + categories: + - data-collection.dns-and-dhcp-servers + icon_filename: 'isc.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor ISCBind (RNDC) performance for optimal DNS server operations. Monitor query rates, response times, and error rates to ensure reliable DNS service delivery.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: bind_rndc_stats_file_size + link: https://github.com/netdata/netdata/blob/master/health/health.d/bind_rndc.conf + metric: bind_rndc.stats_size + info: BIND statistics-file size +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: bind_rndc.name_server_statistics + description: Name Server Statistics + unit: "stats" + chart_type: line + dimensions: + - name: requests + - name: rejected_queries + - name: success + - name: failure + - name: responses + - name: duplicate + - name: recursion + - name: nxrrset + - name: nxdomain + - name: non_auth_answer + - name: auth_answer + - name: dropped_queries + - name: bind_rndc.incoming_queries + description: Incoming queries + unit: "queries" + chart_type: line + dimensions: + - name: a dimension per incoming query type + - name: bind_rndc.outgoing_queries + description: Outgoing queries + unit: "queries" + chart_type: line + dimensions: + - name: a dimension per outgoing query type + - name: bind_rndc.stats_size + description: Named Stats File Size + unit: "MiB" + chart_type: line + dimensions: + - name: stats_size diff --git a/collectors/python.d.plugin/boinc/metadata.yaml b/collectors/python.d.plugin/boinc/metadata.yaml new file mode 100644 index 00000000..a0a7bb32 --- /dev/null +++ b/collectors/python.d.plugin/boinc/metadata.yaml @@ -0,0 +1,125 @@ +meta: + plugin_name: python.d.plugin + module_name: boinc + monitored_instance: + name: BOINC + link: '' + categories: + - data-collection.distributed-computing-systems + icon_filename: 'bolt.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Study BOINC metrics to gain insights into volunteer computing projects. Examine computation times, task completion rates, and project statuses to enhance volunteer computing efforts.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: boinc_total_tasks + link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf + metric: boinc.tasks + info: average number of total tasks over the last 10 minutes + os: "*" +- name: boinc_active_tasks + link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf + metric: boinc.tasks + info: average number of active tasks over the last 10 minutes + os: "*" +- name: boinc_compute_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf + metric: boinc.states + info: average number of compute errors over the last 10 minutes + os: "*" +- name: boinc_upload_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf + metric: boinc.states + info: average number of failed uploads over the last 10 minutes + os: "*" +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: boinc.tasks + description: Overall Tasks + unit: "tasks" + chart_type: line + dimensions: + - name: Total + - name: Active + - name: boinc.states + description: Tasks per State + unit: "tasks" + chart_type: line + dimensions: + - name: New + - name: Downloading + - name: Ready to Run + - name: Compute Errors + - name: Uploading + - name: Uploaded + - name: Aborted + - name: Failed Uploads + - name: boinc.sched + description: Tasks per Scheduler State + unit: "tasks" + chart_type: line + dimensions: + - name: Uninitialized + - name: Preempted + - name: Scheduled + - name: boinc.process + description: Tasks per Process State + unit: "tasks" + chart_type: line + dimensions: + - name: Uninitialized + - name: Executing + - name: Suspended + - name: Aborted + - name: Quit + - name: Copy Pending diff --git a/collectors/python.d.plugin/ceph/metadata.yaml b/collectors/python.d.plugin/ceph/metadata.yaml new file mode 100644 index 00000000..eabf6b26 --- /dev/null +++ b/collectors/python.d.plugin/ceph/metadata.yaml @@ -0,0 +1,164 @@ +meta: + plugin_name: python.d.plugin + module_name: ceph + monitored_instance: + name: Ceph + link: '' + categories: + - data-collection.storage-mount-points-and-filesystems + icon_filename: 'ceph.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Ceph metrics for efficient distributed storage system performance. Keep tabs on cluster health, data redundancy, and latency to ensure reliable storage operations.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: ceph_cluster_space_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/ceph.conf + metric: ceph.general_usage + info: cluster disk space utilization +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ceph.general_usage + description: Ceph General Space + unit: "KiB" + chart_type: stacked + dimensions: + - name: avail + - name: used + - name: ceph.general_objects + description: Ceph General Objects + unit: "objects" + chart_type: area + dimensions: + - name: cluster + - name: ceph.general_bytes + description: Ceph General Read/Write Data/s + unit: "KiB/s" + chart_type: area + dimensions: + - name: read + - name: write + - name: ceph.general_operations + description: Ceph General Read/Write Operations/s + unit: "operations" + chart_type: area + dimensions: + - name: read + - name: write + - name: ceph.general_latency + description: Ceph General Apply/Commit latency + unit: "milliseconds" + chart_type: area + dimensions: + - name: apply + - name: commit + - name: ceph.pool_usage + description: Ceph Pools + unit: "KiB" + chart_type: line + dimensions: + - name: a dimension per Ceph Pool + - name: ceph.pool_objects + description: Ceph Pools + unit: "objects" + chart_type: line + dimensions: + - name: a dimension per Ceph Pool + - name: ceph.pool_read_bytes + description: Ceph Read Pool Data/s + unit: "KiB/s" + chart_type: area + dimensions: + - name: a dimension per Ceph Pool + - name: ceph.pool_write_bytes + description: Ceph Write Pool Data/s + unit: "KiB/s" + chart_type: area + dimensions: + - name: a dimension per Ceph Pool + - name: ceph.pool_read_operations + description: Ceph Read Pool Operations/s + unit: "operations" + chart_type: area + dimensions: + - name: a dimension per Ceph Pool + - name: ceph.pool_write_operations + description: Ceph Write Pool Operations/s + unit: "operations" + chart_type: area + dimensions: + - name: a dimension per Ceph Pool + - name: ceph.osd_usage + description: Ceph OSDs + unit: "KiB" + chart_type: line + dimensions: + - name: a dimension per Ceph OSD + - name: ceph.osd_size + description: Ceph OSDs size + unit: "KiB" + chart_type: line + dimensions: + - name: a dimension per Ceph OSD + - name: ceph.apply_latency + description: Ceph OSDs apply latency + unit: "milliseconds" + chart_type: line + dimensions: + - name: a dimension per Ceph OSD + - name: ceph.commit_latency + description: Ceph OSDs commit latency + unit: "milliseconds" + chart_type: line + dimensions: + - name: a dimension per Ceph OSD diff --git a/collectors/python.d.plugin/changefinder/metadata.yaml b/collectors/python.d.plugin/changefinder/metadata.yaml new file mode 100644 index 00000000..99b28e48 --- /dev/null +++ b/collectors/python.d.plugin/changefinder/metadata.yaml @@ -0,0 +1,77 @@ +meta: + plugin_name: python.d.plugin + module_name: changefinder + monitored_instance: + name: python.d changefinder + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: changefinder.scores + description: ChangeFinder + unit: "score" + chart_type: line + dimensions: + - name: a dimension per chart + - name: changefinder.flags + description: ChangeFinder + unit: "flag" + chart_type: stacked + dimensions: + - name: a dimension per chart diff --git a/collectors/python.d.plugin/dovecot/metadata.yaml b/collectors/python.d.plugin/dovecot/metadata.yaml new file mode 100644 index 00000000..35d820ef --- /dev/null +++ b/collectors/python.d.plugin/dovecot/metadata.yaml @@ -0,0 +1,146 @@ +meta: + plugin_name: python.d.plugin + module_name: dovecot + monitored_instance: + name: Dovecot + link: '' + categories: + - data-collection.mail-servers + icon_filename: 'dovecot.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Examine Dovecot metrics for insights into IMAP and POP3 server operations. Analyze connection statuses, mailbox operations, and error rates for efficient mail server operations.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: dovecot.sessions + description: Dovecot Active Sessions + unit: "number" + chart_type: line + dimensions: + - name: active sessions + - name: dovecot.logins + description: Dovecot Logins + unit: "number" + chart_type: line + dimensions: + - name: logins + - name: dovecot.commands + description: Dovecot Commands + unit: "commands" + chart_type: line + dimensions: + - name: commands + - name: dovecot.faults + description: Dovecot Page Faults + unit: "faults" + chart_type: line + dimensions: + - name: minor + - name: major + - name: dovecot.context_switches + description: Dovecot Context Switches + unit: "switches" + chart_type: line + dimensions: + - name: voluntary + - name: involuntary + - name: dovecot.io + description: Dovecot Disk I/O + unit: "KiB/s" + chart_type: area + dimensions: + - name: read + - name: write + - name: dovecot.net + description: Dovecot Network Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: read + - name: write + - name: dovecot.syscalls + description: Dovecot Number of SysCalls + unit: "syscalls/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: dovecot.lookup + description: Dovecot Lookups + unit: "number/s" + chart_type: stacked + dimensions: + - name: path + - name: attr + - name: dovecot.cache + description: Dovecot Cache Hits + unit: "hits/s" + chart_type: line + dimensions: + - name: hits + - name: dovecot.auth + description: Dovecot Authentications + unit: "attempts" + chart_type: stacked + dimensions: + - name: ok + - name: failed + - name: dovecot.auth_cache + description: Dovecot Authentication Cache + unit: "number" + chart_type: stacked + dimensions: + - name: hit + - name: miss diff --git a/collectors/python.d.plugin/exim/metadata.yaml b/collectors/python.d.plugin/exim/metadata.yaml new file mode 100644 index 00000000..092479a0 --- /dev/null +++ b/collectors/python.d.plugin/exim/metadata.yaml @@ -0,0 +1,72 @@ +meta: + plugin_name: python.d.plugin + module_name: exim + monitored_instance: + name: Exim + link: '' + categories: + - data-collection.mail-servers + icon_filename: 'exim.jpg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Exim metrics for efficient mail transfer' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: exim.qemails + description: Exim Queue Emails + unit: "emails" + chart_type: line + dimensions: + - name: emails diff --git a/collectors/python.d.plugin/fail2ban/metadata.yaml b/collectors/python.d.plugin/fail2ban/metadata.yaml new file mode 100644 index 00000000..1c906c67 --- /dev/null +++ b/collectors/python.d.plugin/fail2ban/metadata.yaml @@ -0,0 +1,84 @@ +meta: + plugin_name: python.d.plugin + module_name: fail2ban + monitored_instance: + name: Fail2ban + link: '' + categories: + - data-collection.authentication-and-authorization + icon_filename: 'fail2ban.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Fail2ban performance for prime intrusion prevention operations. Monitor ban counts, jail statuses, and failed login attempts to ensure robust network security.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: fail2ban.faile_attempts + description: Failed attempts + unit: "attempts/s" + chart_type: line + dimensions: + - name: a dimension per jail + - name: fail2ban.bans + description: Bans + unit: "bans/s" + chart_type: line + dimensions: + - name: a dimension per jail + - name: fail2ban.banned_ips + description: Banned IP addresses (since the last restart of netdata) + unit: "ips" + chart_type: line + dimensions: + - name: a dimension per jail diff --git a/collectors/python.d.plugin/gearman/metadata.yaml b/collectors/python.d.plugin/gearman/metadata.yaml new file mode 100644 index 00000000..49d4e27b --- /dev/null +++ b/collectors/python.d.plugin/gearman/metadata.yaml @@ -0,0 +1,89 @@ +meta: + plugin_name: python.d.plugin + module_name: gearman + monitored_instance: + name: Gearman + link: '' + categories: + - data-collection.distributed-computing-systems + icon_filename: 'gearman.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Gearman metrics for proficient system task distribution. Track job counts, worker statuses, and queue lengths for effective distributed task management.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: gearman_workers_queued + link: https://github.com/netdata/netdata/blob/master/health/health.d/gearman.conf + metric: gearman.single_job + info: average number of queued jobs over the last 10 minutes +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: gearman.total_jobs + description: Total Jobs + unit: "Jobs" + chart_type: line + dimensions: + - name: Pending + - name: Running + - name: gearman job + description: "" + labels: [] + metrics: + - name: gearman.single_job + description: '{job_name}' + unit: "Jobs" + chart_type: stacked + dimensions: + - name: Pending + - name: Idle + - name: Runnning diff --git a/collectors/python.d.plugin/go_expvar/metadata.yaml b/collectors/python.d.plugin/go_expvar/metadata.yaml new file mode 100644 index 00000000..31b85fa1 --- /dev/null +++ b/collectors/python.d.plugin/go_expvar/metadata.yaml @@ -0,0 +1,109 @@ +meta: + plugin_name: python.d.plugin + module_name: go_expvar + monitored_instance: + name: Go applications + link: '' + categories: + - data-collection.apm + icon_filename: 'go.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Go applications performance for optimal Go language software operations. Monitor runtime statistics, garbage collection, and memory usage to enhance Go application performance.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: expvar.memstats.heap + description: 'memory: size of heap memory structures' + unit: "KiB" + chart_type: line + dimensions: + - name: alloc + - name: inuse + - name: expvar.memstats.stack + description: 'memory: size of stack memory structures' + unit: "KiB" + chart_type: line + dimensions: + - name: inuse + - name: expvar.memstats.mspan + description: 'memory: size of mspan memory structures' + unit: "KiB" + chart_type: line + dimensions: + - name: inuse + - name: expvar.memstats.mcache + description: 'memory: size of mcache memory structures' + unit: "KiB" + chart_type: line + dimensions: + - name: inuse + - name: expvar.memstats.live_objects + description: 'memory: number of live objects' + unit: "objects" + chart_type: line + dimensions: + - name: live + - name: expvar.memstats.sys + description: 'memory: size of reserved virtual address space' + unit: "KiB" + chart_type: line + dimensions: + - name: sys + - name: expvar.memstats.gc_pauses + description: 'memory: average duration of GC pauses' + unit: "ns" + chart_type: line + dimensions: + - name: avg diff --git a/collectors/python.d.plugin/haproxy/metadata.yaml b/collectors/python.d.plugin/haproxy/metadata.yaml new file mode 100644 index 00000000..401313e9 --- /dev/null +++ b/collectors/python.d.plugin/haproxy/metadata.yaml @@ -0,0 +1,254 @@ +meta: + plugin_name: python.d.plugin + module_name: haproxy + monitored_instance: + name: HAProxy + link: '' + categories: + - data-collection.web-servers-and-web-proxies + icon_filename: 'haproxy.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor HAProxy performance for ideal load balancing operations. Monitor session rates, queue lengths, and error rates to maintain balanced network traffic.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: haproxy_backend_server_status + link: https://github.com/netdata/netdata/blob/master/health/health.d/haproxy.conf + metric: haproxy_hs.down + info: average number of failed haproxy backend servers over the last 10 seconds +- name: haproxy_backend_status + link: https://github.com/netdata/netdata/blob/master/health/health.d/haproxy.conf + metric: haproxy_hb.down + info: average number of failed haproxy backends over the last 10 seconds +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: haproxy_f.bin + description: Kilobytes In + unit: "KiB/s" + chart_type: line + dimensions: + - name: a dimension per frontend server + - name: haproxy_f.bout + description: Kilobytes Out + unit: "KiB/s" + chart_type: line + dimensions: + - name: a dimension per frontend server + - name: haproxy_f.scur + description: Sessions Active + unit: "sessions" + chart_type: line + dimensions: + - name: a dimension per frontend server + - name: haproxy_f.qcur + description: Session In Queue + unit: "sessions" + chart_type: line + dimensions: + - name: a dimension per frontend server + - name: haproxy_f.hrsp_1xx + description: HTTP responses with 1xx code + unit: "responses/s" + chart_type: line + dimensions: + - name: a dimension per frontend server + - name: haproxy_f.hrsp_2xx + description: HTTP responses with 2xx code + unit: "responses/s" + chart_type: line + dimensions: + - name: a dimension per frontend server + - name: haproxy_f.hrsp_3xx + description: HTTP responses with 3xx code + unit: "responses/s" + chart_type: line + dimensions: + - name: a dimension per frontend server + - name: haproxy_f.hrsp_4xx + description: HTTP responses with 4xx code + unit: "responses/s" + chart_type: line + dimensions: + - name: a dimension per frontend server + - name: haproxy_f.hrsp_5xx + description: HTTP responses with 5xx code + unit: "responses/s" + chart_type: line + dimensions: + - name: a dimension per frontend server + - name: haproxy_f.hrsp_other + description: HTTP responses with other codes (protocol error) + unit: "responses/s" + chart_type: line + dimensions: + - name: a dimension per frontend server + - name: haproxy_f.hrsp_total + description: HTTP responses + unit: "responses" + chart_type: line + dimensions: + - name: a dimension per frontend server + - name: haproxy_b.bin + description: Kilobytes In + unit: "KiB/s" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.bout + description: Kilobytes Out + unit: "KiB/s" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.scur + description: Sessions Active + unit: "sessions" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.qcur + description: Sessions In Queue + unit: "sessions" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.hrsp_1xx + description: HTTP responses with 1xx code + unit: "responses/s" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.hrsp_2xx + description: HTTP responses with 2xx code + unit: "responses/s" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.hrsp_3xx + description: HTTP responses with 3xx code + unit: "responses/s" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.hrsp_4xx + description: HTTP responses with 4xx code + unit: "responses/s" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.hrsp_5xx + description: HTTP responses with 5xx code + unit: "responses/s" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.hrsp_other + description: HTTP responses with other codes (protocol error) + unit: "responses/s" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.hrsp_total + description: HTTP responses (total) + unit: "responses/s" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.qtime + description: The average queue time over the 1024 last requests + unit: "milliseconds" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.ctime + description: The average connect time over the 1024 last requests + unit: "milliseconds" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.rtime + description: The average response time over the 1024 last requests + unit: "milliseconds" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_b.ttime + description: The average total session time over the 1024 last requests + unit: "milliseconds" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_hs.down + description: Backend Servers In DOWN State + unit: "failed servers" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_hs.up + description: Backend Servers In UP State + unit: "health servers" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy_hb.down + description: Is Backend Failed? + unit: "boolean" + chart_type: line + dimensions: + - name: a dimension per backend server + - name: haproxy.idle + description: The Ratio Of Polling Time Vs Total Time + unit: "percentage" + chart_type: line + dimensions: + - name: idle diff --git a/collectors/python.d.plugin/hddtemp/metadata.yaml b/collectors/python.d.plugin/hddtemp/metadata.yaml new file mode 100644 index 00000000..7c78a752 --- /dev/null +++ b/collectors/python.d.plugin/hddtemp/metadata.yaml @@ -0,0 +1,72 @@ +meta: + plugin_name: python.d.plugin + module_name: hddtemp + monitored_instance: + name: HDD temperature + link: '' + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: 'hard-drive.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor HDD temperature metrics for maintaining optimal hard drive health. Keep tabs on drive temperatures, read/write speeds, and error rates to ensure hard drive longevity.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: hddtemp.temperatures + description: Disk Temperatures + unit: "Celsius" + chart_type: line + dimensions: + - name: a dimension per disk diff --git a/collectors/python.d.plugin/hpssa/metadata.yaml b/collectors/python.d.plugin/hpssa/metadata.yaml new file mode 100644 index 00000000..cc340780 --- /dev/null +++ b/collectors/python.d.plugin/hpssa/metadata.yaml @@ -0,0 +1,99 @@ +meta: + plugin_name: python.d.plugin + module_name: hpssa + monitored_instance: + name: HP Smart Storage Arrays + link: '' + categories: + - data-collection.storage-mount-points-and-filesystems + icon_filename: 'hp.jpeg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Examine HP Smart Storage Arrays metrics with Netdata for efficient storage management. Improve your storage efficiency with real-time performance insights.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: hpssa.ctrl_status + description: Status 1 is OK, Status 0 is not OK + unit: "Status" + chart_type: line + dimensions: + - name: ctrl_{adapter slot}_status + - name: cache_{adapter slot}_status + - name: battery_{adapter slot}_status per adapter + - name: hpssa.ctrl_temperature + description: Temperature + unit: "Celsius" + chart_type: line + dimensions: + - name: ctrl_{adapter slot}_temperature + - name: cache_{adapter slot}_temperature per adapter + - name: hpssa.ld_status + description: Status 1 is OK, Status 0 is not OK + unit: "Status" + chart_type: line + dimensions: + - name: a dimension per logical drive + - name: hpssa.pd_status + description: Status 1 is OK, Status 0 is not OK + unit: "Status" + chart_type: line + dimensions: + - name: a dimension per physical drive + - name: hpssa.pd_temperature + description: Temperature + unit: "Celsius" + chart_type: line + dimensions: + - name: a dimension per physical drive diff --git a/collectors/python.d.plugin/icecast/metadata.yaml b/collectors/python.d.plugin/icecast/metadata.yaml new file mode 100644 index 00000000..7b71360d --- /dev/null +++ b/collectors/python.d.plugin/icecast/metadata.yaml @@ -0,0 +1,72 @@ +meta: + plugin_name: python.d.plugin + module_name: icecast + monitored_instance: + name: Icecast + link: '' + categories: + - data-collection.media-streaming-servers + icon_filename: 'icecast.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Examine Icecast metrics for insights into media streaming server operations. Study listener counts, bitrate, and connection statuses for smooth streaming services.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: icecast.listeners + description: Number Of Listeners + unit: "listeners" + chart_type: line + dimensions: + - name: a dimension for each active source diff --git a/collectors/python.d.plugin/ipfs/metadata.yaml b/collectors/python.d.plugin/ipfs/metadata.yaml new file mode 100644 index 00000000..51e50e2a --- /dev/null +++ b/collectors/python.d.plugin/ipfs/metadata.yaml @@ -0,0 +1,98 @@ +meta: + plugin_name: python.d.plugin + module_name: ipfs + monitored_instance: + name: IPFS + link: '' + categories: + - data-collection.storage-mount-points-and-filesystems + icon_filename: 'ipfs.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Examine IPFS metrics for insights into distributed file system operations. Analyze node connectivity, data replication, and retrieval times for efficient distributed file handling.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: ipfs_datastore_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/ipfs.conf + metric: ipfs.repo_size + info: IPFS datastore utilization +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: ipfs.bandwidth + description: IPFS Bandwidth + unit: "kilobits/s" + chart_type: line + dimensions: + - name: in + - name: out + - name: ipfs.peers + description: IPFS Peers + unit: "peers" + chart_type: line + dimensions: + - name: peers + - name: ipfs.repo_size + description: IPFS Repo Size + unit: "GiB" + chart_type: area + dimensions: + - name: avail + - name: size + - name: ipfs.repo_objects + description: IPFS Repo Objects + unit: "objects" + chart_type: line + dimensions: + - name: objects + - name: pinned + - name: recursive_pins diff --git a/collectors/python.d.plugin/litespeed/metadata.yaml b/collectors/python.d.plugin/litespeed/metadata.yaml new file mode 100644 index 00000000..43a26775 --- /dev/null +++ b/collectors/python.d.plugin/litespeed/metadata.yaml @@ -0,0 +1,124 @@ +meta: + plugin_name: python.d.plugin + module_name: litespeed + monitored_instance: + name: Litespeed + link: '' + categories: + - data-collection.web-servers-and-web-proxies + icon_filename: 'litespeed.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Examine Litespeed metrics for insights into web server operations. Analyze request rates, response times, and error rates for efficient web service delivery.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: litespeed.net_throughput + description: Network Throughput HTTP + unit: "kilobits/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: litespeed.net_throughput + description: Network Throughput HTTPS + unit: "kilobits/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: litespeed.connections + description: Connections HTTP + unit: "conns" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: litespeed.connections + description: Connections HTTPS + unit: "conns" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: litespeed.requests + description: Requests + unit: "requests/s" + chart_type: line + dimensions: + - name: requests + - name: litespeed.requests_processing + description: Requests In Processing + unit: "requests" + chart_type: line + dimensions: + - name: processing + - name: litespeed.cache + description: Public Cache Hits + unit: "hits/s" + chart_type: line + dimensions: + - name: hits + - name: litespeed.cache + description: Private Cache Hits + unit: "hits/s" + chart_type: line + dimensions: + - name: hits + - name: litespeed.static + description: Static Hits + unit: "hits/s" + chart_type: line + dimensions: + - name: hits diff --git a/collectors/python.d.plugin/megacli/metadata.yaml b/collectors/python.d.plugin/megacli/metadata.yaml new file mode 100644 index 00000000..75238dc8 --- /dev/null +++ b/collectors/python.d.plugin/megacli/metadata.yaml @@ -0,0 +1,120 @@ +meta: + plugin_name: python.d.plugin + module_name: megacli + monitored_instance: + name: MegaCLI + link: '' + categories: + - data-collection.storage-mount-points-and-filesystems + icon_filename: 'hard-drive.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Examine MegaCLI metrics with Netdata for insights into RAID controller performance. Improve your RAID controller efficiency with real-time MegaCLI metrics.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: megacli_adapter_state + link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf + metric: megacli.adapter_degraded + info: 'adapter is in the degraded state (0: false, 1: true)' +- name: megacli_pd_media_errors + link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf + metric: megacli.pd_media_error + info: number of physical drive media errors +- name: megacli_pd_predictive_failures + link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf + metric: megacli.pd_predictive_failure + info: number of physical drive predictive failures +- name: megacli_bbu_relative_charge + link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf + metric: megacli.bbu_relative_charge + info: average battery backup unit (BBU) relative state of charge over the last 10 seconds +- name: megacli_bbu_cycle_count + link: https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf + metric: megacli.bbu_cycle_count + info: average battery backup unit (BBU) charge cycles count over the last 10 seconds +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: megacli.adapter_degraded + description: Adapter State + unit: "is degraded" + chart_type: line + dimensions: + - name: a dimension per adapter + - name: megacli.pd_media_error + description: Physical Drives Media Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: a dimension per physical drive + - name: megacli.pd_predictive_failure + description: Physical Drives Predictive Failures + unit: "failures/s" + chart_type: line + dimensions: + - name: a dimension per physical drive + - name: battery + description: "" + labels: [] + metrics: + - name: megacli.bbu_relative_charge + description: Relative State of Charge + unit: "percentage" + chart_type: line + dimensions: + - name: adapter {battery id} + - name: megacli.bbu_cycle_count + description: Cycle Count + unit: "cycle count" + chart_type: line + dimensions: + - name: adapter {battery id} diff --git a/collectors/python.d.plugin/memcached/metadata.yaml b/collectors/python.d.plugin/memcached/metadata.yaml new file mode 100644 index 00000000..46195a46 --- /dev/null +++ b/collectors/python.d.plugin/memcached/metadata.yaml @@ -0,0 +1,175 @@ +meta: + plugin_name: python.d.plugin + module_name: memcached + monitored_instance: + name: Memcached + link: '' + categories: + - data-collection.database-servers + icon_filename: 'memcached.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Memcached metrics for proficient in-memory key-value store operations. Track cache hits, misses, and memory usage for efficient data caching.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: memcached_cache_memory_usage + link: https://github.com/netdata/netdata/blob/master/health/health.d/memcached.conf + metric: memcached.cache + info: cache memory utilization +- name: memcached_cache_fill_rate + link: https://github.com/netdata/netdata/blob/master/health/health.d/memcached.conf + metric: memcached.cache + info: average rate the cache fills up (positive), or frees up (negative) space over the last hour +- name: memcached_out_of_cache_space_time + link: https://github.com/netdata/netdata/blob/master/health/health.d/memcached.conf + metric: memcached.cache + info: estimated time the cache will run out of space if the system continues to add data at the same rate as the past hour +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: memcached.cache + description: Cache Size + unit: "MiB" + chart_type: stacked + dimensions: + - name: available + - name: used + - name: memcached.net + description: Network + unit: "kilobits/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: memcached.connections + description: Connections + unit: "connections/s" + chart_type: line + dimensions: + - name: current + - name: rejected + - name: total + - name: memcached.items + description: Items + unit: "items" + chart_type: line + dimensions: + - name: current + - name: total + - name: memcached.evicted_reclaimed + description: Evicted and Reclaimed Items + unit: "items" + chart_type: line + dimensions: + - name: reclaimed + - name: evicted + - name: memcached.get + description: Get Requests + unit: "requests" + chart_type: stacked + dimensions: + - name: hints + - name: misses + - name: memcached.get_rate + description: Get Request Rate + unit: "requests/s" + chart_type: line + dimensions: + - name: rate + - name: memcached.set_rate + description: Set Request Rate + unit: "requests/s" + chart_type: line + dimensions: + - name: rate + - name: memcached.delete + description: Delete Requests + unit: "requests" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: memcached.cas + description: Check and Set Requests + unit: "requests" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: bad value + - name: memcached.increment + description: Increment Requests + unit: "requests" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: memcached.decrement + description: Decrement Requests + unit: "requests" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: memcached.touch + description: Touch Requests + unit: "requests" + chart_type: stacked + dimensions: + - name: hits + - name: misses + - name: memcached.touch_rate + description: Touch Request Rate + unit: "requests/s" + chart_type: line + dimensions: + - name: rate diff --git a/collectors/python.d.plugin/memcached/metrics.csv b/collectors/python.d.plugin/memcached/metrics.csv index c7362075..8016a9d6 100644 --- a/collectors/python.d.plugin/memcached/metrics.csv +++ b/collectors/python.d.plugin/memcached/metrics.csv @@ -3,7 +3,7 @@ memcached.cache,,"available, used",MiB,Cache Size,stacked,,python.d.plugin,memca memcached.net,,"in, out",kilobits/s,Network,area,,python.d.plugin,memcached memcached.connections,,"current, rejected, total",connections/s,Connections,line,,python.d.plugin,memcached memcached.items,,"current,total",items,Items,line,,python.d.plugin,memcached -memcached.evicted_reclaimed,,"reclaimed, evicted", items,Evicted and Reclaimed Items,line,,python.d.plugin,memcached +memcached.evicted_reclaimed,,"reclaimed, evicted",items,Evicted and Reclaimed Items,line,,python.d.plugin,memcached memcached.get,,"hints, misses",requests,Get Requests,stacked,,python.d.plugin,memcached memcached.get_rate,,rate,requests/s,Get Request Rate,line,,python.d.plugin,memcached memcached.set_rate,,rate,requests/s,Set Request Rate,line,,python.d.plugin,memcached diff --git a/collectors/python.d.plugin/monit/metadata.yaml b/collectors/python.d.plugin/monit/metadata.yaml new file mode 100644 index 00000000..bfa3e621 --- /dev/null +++ b/collectors/python.d.plugin/monit/metadata.yaml @@ -0,0 +1,138 @@ +meta: + plugin_name: python.d.plugin + module_name: monit + monitored_instance: + name: Monit + link: '' + categories: + - data-collection.synthetic-checks + icon_filename: 'monit.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Monit performance for optimal system monitoring operations. Monitor system status, process health, and error rates to maintain system stability.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: monit.filesystems + description: Filesystems + unit: "filesystems" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.directories + description: Directories + unit: "directories" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.files + description: Files + unit: "files" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.fifos + description: Pipes (fifo) + unit: "pipes" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.programs + description: Programs statuses + unit: "programs" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.services + description: Processes statuses + unit: "processes" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.process_uptime + description: Processes uptime + unit: "seconds" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.process_threads + description: Processes threads + unit: "threads" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.process_childrens + description: Child processes + unit: "children" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.hosts + description: Hosts + unit: "hosts" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.host_latency + description: Hosts latency + unit: "milliseconds" + chart_type: line + dimensions: + - name: a dimension per target + - name: monit.networks + description: Network interfaces and addresses + unit: "interfaces" + chart_type: line + dimensions: + - name: a dimension per target diff --git a/collectors/python.d.plugin/nsd/metadata.yaml b/collectors/python.d.plugin/nsd/metadata.yaml new file mode 100644 index 00000000..ce4ce35b --- /dev/null +++ b/collectors/python.d.plugin/nsd/metadata.yaml @@ -0,0 +1,124 @@ +meta: + plugin_name: python.d.plugin + module_name: nsd + monitored_instance: + name: NSD + link: '' + categories: + - data-collection.dns-and-dhcp-servers + icon_filename: 'nsd.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor NSD performance for optimal authoritative DNS server operations. Monitor query rates, response times, and error rates to ensure reliable DNS service delivery.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: nsd.queries + description: queries + unit: "queries/s" + chart_type: line + dimensions: + - name: queries + - name: nsd.zones + description: zones + unit: "zones" + chart_type: stacked + dimensions: + - name: master + - name: slave + - name: nsd.protocols + description: protocol + unit: "queries/s" + chart_type: stacked + dimensions: + - name: udp + - name: udp6 + - name: tcp + - name: tcp6 + - name: nsd.type + description: query type + unit: "queries/s" + chart_type: stacked + dimensions: + - name: A + - name: NS + - name: CNAME + - name: SOA + - name: PTR + - name: HINFO + - name: MX + - name: NAPTR + - name: TXT + - name: AAAA + - name: SRV + - name: ANY + - name: nsd.transfer + description: transfer + unit: "queries/s" + chart_type: stacked + dimensions: + - name: NOTIFY + - name: AXFR + - name: nsd.rcode + description: return code + unit: "queries/s" + chart_type: stacked + dimensions: + - name: NOERROR + - name: FORMERR + - name: SERVFAIL + - name: NXDOMAIN + - name: NOTIMP + - name: REFUSED + - name: YXDOMAIN diff --git a/collectors/python.d.plugin/nvidia_smi/metadata.yaml b/collectors/python.d.plugin/nvidia_smi/metadata.yaml new file mode 100644 index 00000000..fc0c90d5 --- /dev/null +++ b/collectors/python.d.plugin/nvidia_smi/metadata.yaml @@ -0,0 +1,163 @@ +meta: + plugin_name: python.d.plugin + module_name: nvidia_smi + monitored_instance: + name: python.d nvidia_smi + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: GPU + description: "" + labels: [] + metrics: + - name: nvidia_smi.pci_bandwidth + description: PCI Express Bandwidth Utilization + unit: "KiB/s" + chart_type: area + dimensions: + - name: rx + - name: tx + - name: nvidia_smi.pci_bandwidth_percent + description: PCI Express Bandwidth Percent + unit: "percentage" + chart_type: area + dimensions: + - name: rx_percent + - name: tx_percent + - name: nvidia_smi.fan_speed + description: Fan Speed + unit: "percentage" + chart_type: line + dimensions: + - name: speed + - name: nvidia_smi.gpu_utilization + description: GPU Utilization + unit: "percentage" + chart_type: line + dimensions: + - name: utilization + - name: nvidia_smi.mem_utilization + description: Memory Bandwidth Utilization + unit: "percentage" + chart_type: line + dimensions: + - name: utilization + - name: nvidia_smi.encoder_utilization + description: Encoder/Decoder Utilization + unit: "percentage" + chart_type: line + dimensions: + - name: encoder + - name: decoder + - name: nvidia_smi.memory_allocated + description: Memory Usage + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: nvidia_smi.bar1_memory_usage + description: Bar1 Memory Usage + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: nvidia_smi.temperature + description: Temperature + unit: "celsius" + chart_type: line + dimensions: + - name: temp + - name: nvidia_smi.clocks + description: Clock Frequencies + unit: "MHz" + chart_type: line + dimensions: + - name: graphics + - name: video + - name: sm + - name: mem + - name: nvidia_smi.power + description: Power Utilization + unit: "Watts" + chart_type: line + dimensions: + - name: power + - name: nvidia_smi.power_state + description: Power State + unit: "state" + chart_type: line + dimensions: + - name: a dimension per {power_state} + - name: nvidia_smi.processes_mem + description: Memory Used by Each Process + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per process + - name: nvidia_smi.user_mem + description: Memory Used by Each User + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per user + - name: nvidia_smi.user_num + description: Number of User on GPU + unit: "num" + chart_type: line + dimensions: + - name: users diff --git a/collectors/python.d.plugin/openldap/metadata.yaml b/collectors/python.d.plugin/openldap/metadata.yaml new file mode 100644 index 00000000..413aaf1d --- /dev/null +++ b/collectors/python.d.plugin/openldap/metadata.yaml @@ -0,0 +1,116 @@ +meta: + plugin_name: python.d.plugin + module_name: openldap + monitored_instance: + name: OpenLDAP + link: '' + categories: + - data-collection.authentication-and-authorization + icon_filename: 'statsd.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Examine OpenLDAP metrics for insights into directory service operations. Analyze query rates, response times, and error rates for efficient directory services.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: openldap.total_connections + description: Total Connections + unit: "connections/s" + chart_type: line + dimensions: + - name: connections + - name: openldap.traffic_stats + description: Traffic + unit: "KiB/s" + chart_type: line + dimensions: + - name: sent + - name: openldap.operations_status + description: Operations Status + unit: "ops/s" + chart_type: line + dimensions: + - name: completed + - name: initiated + - name: openldap.referrals + description: Referrals + unit: "referrals/s" + chart_type: line + dimensions: + - name: sent + - name: openldap.entries + description: Entries + unit: "entries/s" + chart_type: line + dimensions: + - name: sent + - name: openldap.ldap_operations + description: Operations + unit: "ops/s" + chart_type: line + dimensions: + - name: bind + - name: search + - name: unbind + - name: add + - name: delete + - name: modify + - name: compare + - name: openldap.waiters + description: Waiters + unit: "waiters/s" + chart_type: line + dimensions: + - name: write + - name: read diff --git a/collectors/python.d.plugin/oracledb/metadata.yaml b/collectors/python.d.plugin/oracledb/metadata.yaml new file mode 100644 index 00000000..7c530aa5 --- /dev/null +++ b/collectors/python.d.plugin/oracledb/metadata.yaml @@ -0,0 +1,216 @@ +meta: + plugin_name: python.d.plugin + module_name: oracledb + monitored_instance: + name: OracleDB + link: '' + categories: + - data-collection.database-servers + icon_filename: 'oracle.jpeg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor OracleDB performance for efficient database operations and resource management. Netdata provides real-time insights and alerts for optimal database management.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: oracledb.session_count + description: Session Count + unit: "sessions" + chart_type: line + dimensions: + - name: total + - name: active + - name: oracledb.session_limit_usage + description: Session Limit Usage + unit: "%" + chart_type: area + dimensions: + - name: usage + - name: oracledb.logons + description: Logons + unit: "events/s" + chart_type: area + dimensions: + - name: logons + - name: oracledb.physical_disk_read_writes + description: Physical Disk Reads/Writes + unit: "events/s" + chart_type: area + dimensions: + - name: reads + - name: writes + - name: oracledb.sorts_on_disks + description: Sorts On Disk + unit: "events/s" + chart_type: line + dimensions: + - name: sorts + - name: oracledb.full_table_scans + description: Full Table Scans + unit: "events/s" + chart_type: line + dimensions: + - name: full table scans + - name: oracledb.database_wait_time_ratio + description: Database Wait Time Ratio + unit: "%" + chart_type: line + dimensions: + - name: wait time ratio + - name: oracledb.shared_pool_free_memory + description: Shared Pool Free Memory + unit: "%" + chart_type: line + dimensions: + - name: free memory + - name: oracledb.in_memory_sorts_ratio + description: In-Memory Sorts Ratio + unit: "%" + chart_type: line + dimensions: + - name: in-memory sorts + - name: oracledb.sql_service_response_time + description: SQL Service Response Time + unit: "seconds" + chart_type: line + dimensions: + - name: time + - name: oracledb.user_rollbacks + description: User Rollbacks + unit: "events/s" + chart_type: line + dimensions: + - name: rollbacks + - name: oracledb.enqueue_timeouts + description: Enqueue Timeouts + unit: "events/s" + chart_type: line + dimensions: + - name: enqueue timeouts + - name: oracledb.cache_hit_ration + description: Cache Hit Ratio + unit: "%" + chart_type: stacked + dimensions: + - name: buffer + - name: cursor + - name: library + - name: row + - name: oracledb.global_cache_blocks + description: Global Cache Blocks Events + unit: "events/s" + chart_type: area + dimensions: + - name: corrupted + - name: lost + - name: oracledb.activity + description: Activities + unit: "events/s" + chart_type: stacked + dimensions: + - name: parse count + - name: execute count + - name: user commits + - name: user rollbacks + - name: oracledb.wait_time + description: Wait Time + unit: "ms" + chart_type: stacked + dimensions: + - name: application + - name: configuration + - name: administrative + - name: concurrency + - name: commit + - name: network + - name: user I/O + - name: system I/O + - name: scheduler + - name: other + - name: oracledb.tablespace_size + description: Size + unit: "KiB" + chart_type: line + dimensions: + - name: a dimension per active tablespace + - name: oracledb.tablespace_usage + description: Usage + unit: "KiB" + chart_type: line + dimensions: + - name: a dimension per active tablespace + - name: oracledb.tablespace_usage_in_percent + description: Usage + unit: "%" + chart_type: line + dimensions: + - name: a dimension per active tablespace + - name: oracledb.allocated_size + description: Size + unit: "B" + chart_type: line + dimensions: + - name: a dimension per active tablespace + - name: oracledb.allocated_usage + description: Usage + unit: "B" + chart_type: line + dimensions: + - name: a dimension per active tablespace + - name: oracledb.allocated_usage_in_percent + description: Usage + unit: "%" + chart_type: line + dimensions: + - name: a dimension per active tablespace diff --git a/collectors/python.d.plugin/postfix/metadata.yaml b/collectors/python.d.plugin/postfix/metadata.yaml new file mode 100644 index 00000000..3c1275ed --- /dev/null +++ b/collectors/python.d.plugin/postfix/metadata.yaml @@ -0,0 +1,78 @@ +meta: + plugin_name: python.d.plugin + module_name: postfix + monitored_instance: + name: Postfix + link: '' + categories: + - data-collection.mail-servers + icon_filename: 'postfix.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Keep an eye on Postfix metrics for efficient mail server operations. Improve your mail server performance with Netdata''s real-time metrics and built-in alerts.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: postfix.qemails + description: Postfix Queue Emails + unit: "emails" + chart_type: line + dimensions: + - name: emails + - name: postfix.qsize + description: Postfix Queue Emails Size + unit: "KiB" + chart_type: area + dimensions: + - name: size diff --git a/collectors/python.d.plugin/puppet/metadata.yaml b/collectors/python.d.plugin/puppet/metadata.yaml new file mode 100644 index 00000000..fc162746 --- /dev/null +++ b/collectors/python.d.plugin/puppet/metadata.yaml @@ -0,0 +1,93 @@ +meta: + plugin_name: python.d.plugin + module_name: puppet + monitored_instance: + name: Puppet + link: '' + categories: + - data-collection.provisioning-systems + icon_filename: 'puppet.jpeg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Puppet configurations with Netdata for changes, errors, and performance metrics. Enhance configuration management and troubleshoot faster with real-time insights and built-in alerts.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: puppet.jvm + description: JVM Heap + unit: "MiB" + chart_type: area + dimensions: + - name: committed + - name: used + - name: puppet.jvm + description: JVM Non-Heap + unit: "MiB" + chart_type: area + dimensions: + - name: committed + - name: used + - name: puppet.cpu + description: CPU usage + unit: "percentage" + chart_type: stacked + dimensions: + - name: execution + - name: GC + - name: puppet.fdopen + description: File Descriptors + unit: "descriptors" + chart_type: line + dimensions: + - name: used diff --git a/collectors/python.d.plugin/rethinkdbs/metadata.yaml b/collectors/python.d.plugin/rethinkdbs/metadata.yaml new file mode 100644 index 00000000..53959b89 --- /dev/null +++ b/collectors/python.d.plugin/rethinkdbs/metadata.yaml @@ -0,0 +1,121 @@ +meta: + plugin_name: python.d.plugin + module_name: rethinkdbs + monitored_instance: + name: RethinkDB + link: '' + categories: + - data-collection.database-servers + icon_filename: 'rethinkdb.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor RethinkDB performance for real-time database operations and performance. Improve your database operations with Netdata''s real-time metrics and built-in alerts.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: rethinkdb.cluster_connected_servers + description: Connected Servers + unit: "servers" + chart_type: stacked + dimensions: + - name: connected + - name: missing + - name: rethinkdb.cluster_clients_active + description: Active Clients + unit: "clients" + chart_type: line + dimensions: + - name: active + - name: rethinkdb.cluster_queries + description: Queries + unit: "queries/s" + chart_type: line + dimensions: + - name: queries + - name: rethinkdb.cluster_documents + description: Documents + unit: "documents/s" + chart_type: line + dimensions: + - name: reads + - name: writes + - name: database server + description: "" + labels: [] + metrics: + - name: rethinkdb.client_connections + description: Client Connections + unit: "connections" + chart_type: line + dimensions: + - name: connections + - name: rethinkdb.clients_active + description: Active Clients + unit: "clients" + chart_type: line + dimensions: + - name: active + - name: rethinkdb.queries + description: Queries + unit: "queries/s" + chart_type: line + dimensions: + - name: queries + - name: rethinkdb.documents + description: Documents + unit: "documents/s" + chart_type: line + dimensions: + - name: reads + - name: writes diff --git a/collectors/python.d.plugin/retroshare/metadata.yaml b/collectors/python.d.plugin/retroshare/metadata.yaml new file mode 100644 index 00000000..b847bb6f --- /dev/null +++ b/collectors/python.d.plugin/retroshare/metadata.yaml @@ -0,0 +1,91 @@ +meta: + plugin_name: python.d.plugin + module_name: retroshare + monitored_instance: + name: RetroShare + link: '' + categories: + - data-collection.p2p + icon_filename: 'retroshare.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor RetroShare metrics for efficient peer-to-peer communication and file sharing. Enhance your communication and file sharing performance with real-time insights and alerts from Netdata.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: retroshare_dht_working + link: https://github.com/netdata/netdata/blob/master/health/health.d/retroshare.conf + metric: retroshare.dht + info: number of DHT peers +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: retroshare.bandwidth + description: RetroShare Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: Upload + - name: Download + - name: retroshare.peers + description: RetroShare Peers + unit: "peers" + chart_type: line + dimensions: + - name: All friends + - name: Connected friends + - name: retroshare.dht + description: Retroshare DHT + unit: "peers" + chart_type: line + dimensions: + - name: DHT nodes estimated + - name: RS nodes estimated diff --git a/collectors/python.d.plugin/riakkv/metadata.yaml b/collectors/python.d.plugin/riakkv/metadata.yaml new file mode 100644 index 00000000..795763eb --- /dev/null +++ b/collectors/python.d.plugin/riakkv/metadata.yaml @@ -0,0 +1,300 @@ +meta: + plugin_name: python.d.plugin + module_name: riakkv + monitored_instance: + name: RiakKV + link: '' + categories: + - data-collection.database-servers + icon_filename: 'riak.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Examine RiakKV metrics for optimal key-value database performance. Netdata provides comprehensive dashboards and anomaly detection for efficient database operations.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: riakkv_1h_kv_get_mean_latency + link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf + metric: riak.kv.latency.get + info: average time between reception of client GET request and subsequent response to client over the last hour +- name: riakkv_kv_get_slow + link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf + metric: riak.kv.latency.get + info: average time between reception of client GET request and subsequent response to the client over the last 3 minutes, compared to the average over the last hour +- name: riakkv_1h_kv_put_mean_latency + link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf + metric: riak.kv.latency.put + info: average time between reception of client PUT request and subsequent response to the client over the last hour +- name: riakkv_kv_put_slow + link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf + metric: riak.kv.latency.put + info: average time between reception of client PUT request and subsequent response to the client over the last 3 minutes, compared to the average over the last hour +- name: riakkv_vm_high_process_count + link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf + metric: riak.vm + info: number of processes running in the Erlang VM +- name: riakkv_list_keys_active + link: https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf + metric: riak.core.fsm_active + info: number of currently running list keys finite state machines +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: riak.kv.throughput + description: Reads & writes coordinated by this node + unit: "operations/s" + chart_type: line + dimensions: + - name: gets + - name: puts + - name: riak.dt.vnode_updates + description: Update operations coordinated by local vnodes by data type + unit: "operations/s" + chart_type: line + dimensions: + - name: counters + - name: sets + - name: maps + - name: riak.search + description: Search queries on the node + unit: "queries/s" + chart_type: line + dimensions: + - name: queries + - name: riak.search.documents + description: Documents indexed by search + unit: "documents/s" + chart_type: line + dimensions: + - name: indexed + - name: riak.consistent.operations + description: Consistent node operations + unit: "operations/s" + chart_type: line + dimensions: + - name: gets + - name: puts + - name: riak.kv.latency.get + description: Time between reception of a client GET request and subsequent response to client + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: '95' + - name: '99' + - name: '100' + - name: riak.kv.latency.put + description: Time between reception of a client PUT request and subsequent response to client + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: '95' + - name: '99' + - name: '100' + - name: riak.dt.latency.counter_merge + description: Time it takes to perform an Update Counter operation + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: '95' + - name: '99' + - name: '100' + - name: riak.dt.latency.set_merge + description: Time it takes to perform an Update Set operation + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: '95' + - name: '99' + - name: '100' + - name: riak.dt.latency.map_merge + description: Time it takes to perform an Update Map operation + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: '95' + - name: '99' + - name: '100' + - name: riak.search.latency.query + description: Search query latency + unit: "ms" + chart_type: line + dimensions: + - name: median + - name: min + - name: '95' + - name: '99' + - name: '999' + - name: max + - name: riak.search.latency.index + description: Time it takes Search to index a new document + unit: "ms" + chart_type: line + dimensions: + - name: median + - name: min + - name: '95' + - name: '99' + - name: '999' + - name: max + - name: riak.consistent.latency.get + description: Strongly consistent read latency + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: '95' + - name: '99' + - name: '100' + - name: riak.consistent.latency.put + description: Strongly consistent write latency + unit: "ms" + chart_type: line + dimensions: + - name: mean + - name: median + - name: '95' + - name: '99' + - name: '100' + - name: riak.vm + description: Total processes running in the Erlang VM + unit: "total" + chart_type: line + dimensions: + - name: processes + - name: riak.vm.memory.processes + description: Memory allocated & used by Erlang processes + unit: "MB" + chart_type: line + dimensions: + - name: allocated + - name: used + - name: riak.kv.siblings_encountered.get + description: Number of siblings encountered during GET operations by this node during the past minute + unit: "siblings" + chart_type: line + dimensions: + - name: mean + - name: median + - name: '95' + - name: '99' + - name: '100' + - name: riak.kv.objsize.get + description: Object size encountered by this node during the past minute + unit: "KB" + chart_type: line + dimensions: + - name: mean + - name: median + - name: '95' + - name: '99' + - name: '100' + - name: riak.search.vnodeq_size + description: Number of unprocessed messages in the vnode message queues of Search on this node in the past minute + unit: "messages" + chart_type: line + dimensions: + - name: mean + - name: median + - name: '95' + - name: '99' + - name: '100' + - name: riak.search.index + description: Number of document index errors encountered by Search + unit: "errors" + chart_type: line + dimensions: + - name: errors + - name: riak.core.protobuf_connections + description: Protocol buffer connections by status + unit: "connections" + chart_type: line + dimensions: + - name: active + - name: riak.core.repairs + description: Number of repair operations this node has coordinated + unit: "repairs" + chart_type: line + dimensions: + - name: read + - name: riak.core.fsm_active + description: Active finite state machines by kind + unit: "fsms" + chart_type: line + dimensions: + - name: get + - name: put + - name: secondary index + - name: list keys + - name: riak.core.fsm_rejected + description: Finite state machines being rejected by Sidejobs overload protection + unit: "fsms" + chart_type: line + dimensions: + - name: get + - name: put + - name: riak.search.index + description: Number of writes to Search failed due to bad data format by reason + unit: "writes" + chart_type: line + dimensions: + - name: bad_entry + - name: extract_fail diff --git a/collectors/python.d.plugin/samba/metadata.yaml b/collectors/python.d.plugin/samba/metadata.yaml new file mode 100644 index 00000000..d9231a1f --- /dev/null +++ b/collectors/python.d.plugin/samba/metadata.yaml @@ -0,0 +1,123 @@ +meta: + plugin_name: python.d.plugin + module_name: samba + monitored_instance: + name: Samba + link: '' + categories: + - data-collection.storage-mount-points-and-filesystems + icon_filename: 'samba.jpg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Samba performance for optimal network file sharing operations. Netdata provides real-time insights and alerts for efficient file sharing.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: syscall.rw + description: R/Ws + unit: "KiB/s" + chart_type: area + dimensions: + - name: sendfile + - name: recvfile + - name: smb2.rw + description: R/Ws + unit: "KiB/s" + chart_type: area + dimensions: + - name: readout + - name: writein + - name: readin + - name: writeout + - name: smb2.create_close + description: Create/Close + unit: "operations/s" + chart_type: line + dimensions: + - name: create + - name: close + - name: smb2.get_set_info + description: Info + unit: "operations/s" + chart_type: line + dimensions: + - name: getinfo + - name: setinfo + - name: smb2.find + description: Find + unit: "operations/s" + chart_type: line + dimensions: + - name: find + - name: smb2.notify + description: Notify + unit: "operations/s" + chart_type: line + dimensions: + - name: notify + - name: smb2.sm_counters + description: Lesser Ops + unit: "count" + chart_type: stacked + dimensions: + - name: tcon + - name: negprot + - name: tdis + - name: cancel + - name: logoff + - name: flush + - name: lock + - name: keepalive + - name: break + - name: sessetup diff --git a/collectors/python.d.plugin/sensors/metadata.yaml b/collectors/python.d.plugin/sensors/metadata.yaml new file mode 100644 index 00000000..1c01554a --- /dev/null +++ b/collectors/python.d.plugin/sensors/metadata.yaml @@ -0,0 +1,108 @@ +meta: + plugin_name: python.d.plugin + module_name: sensors + monitored_instance: + name: Linux Sensors (lm-sensors) + link: '' + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: 'microchip.svg' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Examine Linux Sensors metrics with Netdata for insights into hardware health and performance. Enhance your system''s reliability with real-time hardware health insights.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: chip + description: "" + labels: [] + metrics: + - name: sensors.temperature + description: Temperature + unit: "Celsius" + chart_type: line + dimensions: + - name: a dimension per sensor + - name: sensors.voltage + description: Voltage + unit: "Volts" + chart_type: line + dimensions: + - name: a dimension per sensor + - name: sensors.current + description: Current + unit: "Ampere" + chart_type: line + dimensions: + - name: a dimension per sensor + - name: sensors.power + description: Power + unit: "Watt" + chart_type: line + dimensions: + - name: a dimension per sensor + - name: sensors.fan + description: Fans speed + unit: "Rotations/min" + chart_type: line + dimensions: + - name: a dimension per sensor + - name: sensors.energy + description: Energy + unit: "Joule" + chart_type: line + dimensions: + - name: a dimension per sensor + - name: sensors.humidity + description: Humidity + unit: "Percent" + chart_type: line + dimensions: + - name: a dimension per sensor diff --git a/collectors/python.d.plugin/smartd_log/metadata.yaml b/collectors/python.d.plugin/smartd_log/metadata.yaml new file mode 100644 index 00000000..334fb90c --- /dev/null +++ b/collectors/python.d.plugin/smartd_log/metadata.yaml @@ -0,0 +1,276 @@ +meta: + plugin_name: python.d.plugin + module_name: smartd_log + monitored_instance: + name: S.M.A.R.T. + link: '' + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: 'smart.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor S.M.A.R.T. metrics for insights into your hard drive health and performance. Enhance your hard drive performance and reliability with real-time insights and alerts from Netdata.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: smartd_log.read_error_rate + description: Read Error Rate + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.seek_error_rate + description: Seek Error Rate + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.soft_read_error_rate + description: Soft Read Error Rate + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.write_error_rate + description: Write Error Rate + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.read_total_err_corrected + description: Read Error Corrected + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.read_total_unc_errors + description: Read Error Uncorrected + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.write_total_err_corrected + description: Write Error Corrected + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.write_total_unc_errors + description: Write Error Uncorrected + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.verify_total_err_corrected + description: Verify Error Corrected + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.verify_total_unc_errors + description: Verify Error Uncorrected + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.sata_interface_downshift + description: SATA Interface Downshift + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.udma_crc_error_count + description: UDMA CRC Error Count + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.throughput_performance + description: Throughput Performance + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.seek_time_performance + description: Seek Time Performance + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.start_stop_count + description: Start/Stop Count + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.power_on_hours_count + description: Power-On Hours Count + unit: "hours" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.power_cycle_count + description: Power Cycle Count + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.unexpected_power_loss + description: Unexpected Power Loss + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.spin_up_time + description: Spin-Up Time + unit: "ms" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.spin_up_retries + description: Spin-up Retries + unit: "retries" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.calibration_retries + description: Calibration Retries + unit: "retries" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.airflow_temperature_celsius + description: Airflow Temperature Celsius + unit: "celsius" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.temperature_celsius + description: Temperature + unit: "celsius" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.reallocated_sectors_count + description: Reallocated Sectors Count + unit: "sectors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.reserved_block_count + description: Reserved Block Count + unit: "percentage" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.program_fail_count + description: Program Fail Count + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.erase_fail_count + description: Erase Fail Count + unit: "failures" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.wear_leveller_worst_case_erase_count + description: Wear Leveller Worst Case Erase Count + unit: "erases" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.unused_reserved_nand_blocks + description: Unused Reserved NAND Blocks + unit: "blocks" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.reallocation_event_count + description: Reallocation Event Count + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.current_pending_sector_count + description: Current Pending Sector Count + unit: "sectors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.offline_uncorrectable_sector_count + description: Offline Uncorrectable Sector Count + unit: "sectors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.percent_lifetime_used + description: Percent Lifetime Used + unit: "percentage" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.media_wearout_indicator + description: Media Wearout Indicator + unit: "percentage" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.nand_writes_1gib + description: NAND Writes + unit: "GiB" + chart_type: line + dimensions: + - name: a dimension per device diff --git a/collectors/python.d.plugin/spigotmc/metadata.yaml b/collectors/python.d.plugin/spigotmc/metadata.yaml new file mode 100644 index 00000000..5446bb30 --- /dev/null +++ b/collectors/python.d.plugin/spigotmc/metadata.yaml @@ -0,0 +1,88 @@ +meta: + plugin_name: python.d.plugin + module_name: spigotmc + monitored_instance: + name: SpigotMC + link: '' + categories: + - data-collection.gaming + icon_filename: 'spigot.jfif' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor SpigotMC performance with Netdata for optimal Minecraft server operations. Improve your gaming experience with real-time server performance insights.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: spigotmc.tps + description: Spigot Ticks Per Second + unit: "ticks" + chart_type: line + dimensions: + - name: 1 Minute Average + - name: 5 Minute Average + - name: 15 Minute Average + - name: spigotmc.users + description: Minecraft Users + unit: "users" + chart_type: area + dimensions: + - name: Users + - name: spigotmc.mem + description: Minecraft Memory Usage + unit: "MiB" + chart_type: line + dimensions: + - name: used + - name: allocated + - name: max diff --git a/collectors/python.d.plugin/squid/metadata.yaml b/collectors/python.d.plugin/squid/metadata.yaml new file mode 100644 index 00000000..736a2204 --- /dev/null +++ b/collectors/python.d.plugin/squid/metadata.yaml @@ -0,0 +1,96 @@ +meta: + plugin_name: python.d.plugin + module_name: squid + monitored_instance: + name: Squid + link: '' + categories: + - data-collection.web-servers-and-web-proxies + icon_filename: 'squid.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Squid metrics with Netdata for efficient caching proxy for the Web performance. Improve your web caching efficiency with real-time Squid metrics.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: squid instance + description: "" + labels: [] + metrics: + - name: squid.clients_net + description: Squid Client Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: hits + - name: squid.clients_requests + description: Squid Client Requests + unit: "requests/s" + chart_type: line + dimensions: + - name: requests + - name: hits + - name: errors + - name: squid.servers_net + description: Squid Server Bandwidth + unit: "kilobits/s" + chart_type: area + dimensions: + - name: in + - name: out + - name: squid.servers_requests + description: Squid Server Requests + unit: "requests/s" + chart_type: line + dimensions: + - name: requests + - name: errors diff --git a/collectors/python.d.plugin/tomcat/metadata.yaml b/collectors/python.d.plugin/tomcat/metadata.yaml new file mode 100644 index 00000000..4f2a2b0e --- /dev/null +++ b/collectors/python.d.plugin/tomcat/metadata.yaml @@ -0,0 +1,129 @@ +meta: + plugin_name: python.d.plugin + module_name: tomcat + monitored_instance: + name: Tomcat + link: '' + categories: + - data-collection.web-servers-and-web-proxies + icon_filename: 'tomcat.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Tomcat performance with Netdata for optimal Java servlet container operations. Improve your web application performance with real-time Tomcat metrics.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: tomcat.accesses + description: Requests + unit: "requests/s" + chart_type: area + dimensions: + - name: accesses + - name: errors + - name: tomcat.bandwidth + description: Bandwidth + unit: "KiB/s" + chart_type: area + dimensions: + - name: sent + - name: received + - name: tomcat.processing_time + description: processing time + unit: "seconds" + chart_type: area + dimensions: + - name: processing time + - name: tomcat.threads + description: Threads + unit: "current threads" + chart_type: area + dimensions: + - name: current + - name: busy + - name: tomcat.jvm + description: JVM Memory Pool Usage + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: eden + - name: survivor + - name: tenured + - name: code cache + - name: compressed + - name: metaspace + - name: tomcat.jvm_eden + description: Eden Memory Usage + unit: "MiB" + chart_type: area + dimensions: + - name: used + - name: committed + - name: max + - name: tomcat.jvm_survivor + description: Survivor Memory Usage + unit: "MiB" + chart_type: area + dimensions: + - name: used + - name: committed + - name: max + - name: tomcat.jvm_tenured + description: Tenured Memory Usage + unit: "MiB" + chart_type: area + dimensions: + - name: used + - name: committed + - name: max diff --git a/collectors/python.d.plugin/tor/metadata.yaml b/collectors/python.d.plugin/tor/metadata.yaml new file mode 100644 index 00000000..7d02b2d7 --- /dev/null +++ b/collectors/python.d.plugin/tor/metadata.yaml @@ -0,0 +1,73 @@ +meta: + plugin_name: python.d.plugin + module_name: tor + monitored_instance: + name: Tor + link: '' + categories: + - data-collection.vpns + icon_filename: 'tor.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Tor metrics with Netdata for efficient anonymous communication operations. Enhance your anonymous communication with real-time insights and alerts.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: tor.traffic + description: Tor Traffic + unit: "KiB/s" + chart_type: area + dimensions: + - name: read + - name: write diff --git a/collectors/python.d.plugin/traefik/metadata.yaml b/collectors/python.d.plugin/traefik/metadata.yaml new file mode 100644 index 00000000..b817d422 --- /dev/null +++ b/collectors/python.d.plugin/traefik/metadata.yaml @@ -0,0 +1,122 @@ +meta: + plugin_name: python.d.plugin + module_name: traefik + monitored_instance: + name: python.d traefik + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: traefik.response_statuses + description: Response statuses + unit: "requests/s" + chart_type: stacked + dimensions: + - name: success + - name: error + - name: redirect + - name: bad + - name: other + - name: traefik.response_codes + description: Responses by codes + unit: "requests/s" + chart_type: stacked + dimensions: + - name: 2xx + - name: 5xx + - name: 3xx + - name: 4xx + - name: 1xx + - name: other + - name: traefik.detailed_response_codes + description: Detailed response codes + unit: "requests/s" + chart_type: stacked + dimensions: + - name: a dimension for each response code family + - name: traefik.requests + description: Requests + unit: "requests/s" + chart_type: line + dimensions: + - name: requests + - name: traefik.total_response_time + description: Total response time + unit: "seconds" + chart_type: line + dimensions: + - name: response + - name: traefik.average_response_time + description: Average response time + unit: "milliseconds" + chart_type: line + dimensions: + - name: response + - name: traefik.average_response_time_per_iteration + description: Average response time per iteration + unit: "milliseconds" + chart_type: line + dimensions: + - name: response + - name: traefik.uptime + description: Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: uptime diff --git a/collectors/python.d.plugin/uwsgi/metadata.yaml b/collectors/python.d.plugin/uwsgi/metadata.yaml new file mode 100644 index 00000000..3447f532 --- /dev/null +++ b/collectors/python.d.plugin/uwsgi/metadata.yaml @@ -0,0 +1,114 @@ +meta: + plugin_name: python.d.plugin + module_name: uwsgi + monitored_instance: + name: uWSGI + link: '' + categories: + - data-collection.web-servers-and-web-proxies + icon_filename: 'uwsgi.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor uWSGI performance for optimal application server operations. Monitor request rates, worker statuses, and error rates to ensure efficient application delivery.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: uwsgi.requests + description: Requests + unit: "requests/s" + chart_type: stacked + dimensions: + - name: a dimension per worker + - name: uwsgi.tx + description: Transmitted data + unit: "KiB/s" + chart_type: stacked + dimensions: + - name: a dimension per worker + - name: uwsgi.avg_rt + description: Average request time + unit: "milliseconds" + chart_type: line + dimensions: + - name: a dimension per worker + - name: uwsgi.memory_rss + description: RSS (Resident Set Size) + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per worker + - name: uwsgi.memory_vsz + description: VSZ (Virtual Memory Size) + unit: "MiB" + chart_type: stacked + dimensions: + - name: a dimension per worker + - name: uwsgi.exceptions + description: Exceptions + unit: "exceptions" + chart_type: line + dimensions: + - name: exceptions + - name: uwsgi.harakiris + description: Harakiris + unit: "harakiris" + chart_type: line + dimensions: + - name: harakiris + - name: uwsgi.respawns + description: Respawns + unit: "respawns" + chart_type: line + dimensions: + - name: respawns diff --git a/collectors/python.d.plugin/varnish/metadata.yaml b/collectors/python.d.plugin/varnish/metadata.yaml new file mode 100644 index 00000000..267279fa --- /dev/null +++ b/collectors/python.d.plugin/varnish/metadata.yaml @@ -0,0 +1,192 @@ +meta: + plugin_name: python.d.plugin + module_name: varnish + monitored_instance: + name: Varnish + link: '' + categories: + - data-collection.web-servers-and-web-proxies + icon_filename: 'varnish.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Varnish metrics with Netdata for efficient HTTP accelerator performance. Enhance your web performance with real-time Varnish metrics.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: varnish.session_connection + description: Connections Statistics + unit: "connections/s" + chart_type: line + dimensions: + - name: accepted + - name: dropped + - name: varnish.client_requests + description: Client Requests + unit: "requests/s" + chart_type: line + dimensions: + - name: received + - name: varnish.all_time_hit_rate + description: All History Hit Rate Ratio + unit: "percentage" + chart_type: stacked + dimensions: + - name: hit + - name: miss + - name: hitpass + - name: varnish.current_poll_hit_rate + description: Current Poll Hit Rate Ratio + unit: "percentage" + chart_type: stacked + dimensions: + - name: hit + - name: miss + - name: hitpass + - name: varnish.cached_objects_expired + description: Expired Objects + unit: "expired/s" + chart_type: line + dimensions: + - name: objects + - name: varnish.cached_objects_nuked + description: Least Recently Used Nuked Objects + unit: "nuked/s" + chart_type: line + dimensions: + - name: objects + - name: varnish.threads_total + description: Number Of Threads In All Pools + unit: "number" + chart_type: line + dimensions: + - name: None + - name: varnish.threads_statistics + description: Threads Statistics + unit: "threads/s" + chart_type: line + dimensions: + - name: created + - name: failed + - name: limited + - name: varnish.threads_queue_len + description: Current Queue Length + unit: "requests" + chart_type: line + dimensions: + - name: in queue + - name: varnish.backend_connections + description: Backend Connections Statistics + unit: "connections/s" + chart_type: line + dimensions: + - name: successful + - name: unhealthy + - name: reused + - name: closed + - name: recycled + - name: failed + - name: varnish.backend_requests + description: Requests To The Backend + unit: "requests/s" + chart_type: line + dimensions: + - name: sent + - name: varnish.esi_statistics + description: ESI Statistics + unit: "problems/s" + chart_type: line + dimensions: + - name: errors + - name: warnings + - name: varnish.memory_usage + description: Memory Usage + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: allocated + - name: varnish.uptime + description: Uptime + unit: "seconds" + chart_type: line + dimensions: + - name: uptime + - name: Backend + description: "" + labels: [] + metrics: + - name: varnish.backend + description: Backend {backend_name} + unit: "kilobits/s" + chart_type: area + dimensions: + - name: header + - name: body + - name: Storage + description: "" + labels: [] + metrics: + - name: varnish.storage_usage + description: Storage {storage_name} Usage + unit: "KiB" + chart_type: stacked + dimensions: + - name: free + - name: allocated + - name: varnish.storage_alloc_objs + description: Storage {storage_name} Allocated Objects + unit: "objects" + chart_type: line + dimensions: + - name: allocated diff --git a/collectors/python.d.plugin/w1sensor/metadata.yaml b/collectors/python.d.plugin/w1sensor/metadata.yaml new file mode 100644 index 00000000..5d495fe5 --- /dev/null +++ b/collectors/python.d.plugin/w1sensor/metadata.yaml @@ -0,0 +1,72 @@ +meta: + plugin_name: python.d.plugin + module_name: w1sensor + monitored_instance: + name: 1-Wire Sensors + link: '' + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: '1-wire.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor 1-Wire Sensors metrics with Netdata for optimal environmental conditions monitoring. Enhance your environmental monitoring with real-time insights and alerts.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: w1sensor.temp + description: 1-Wire Temperature Sensor + unit: "Celsius" + chart_type: line + dimensions: + - name: a dimension per sensor diff --git a/collectors/python.d.plugin/zscores/metadata.yaml b/collectors/python.d.plugin/zscores/metadata.yaml new file mode 100644 index 00000000..740d91e4 --- /dev/null +++ b/collectors/python.d.plugin/zscores/metadata.yaml @@ -0,0 +1,77 @@ +meta: + plugin_name: python.d.plugin + module_name: zscores + monitored_instance: + name: python.d zscores + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: zscores.z + description: Z Score + unit: "z" + chart_type: line + dimensions: + - name: a dimension per chart or dimension + - name: zscores.3stddev + description: Z Score >3 + unit: "count" + chart_type: stacked + dimensions: + - name: a dimension per chart or dimension diff --git a/collectors/slabinfo.plugin/metadata.yaml b/collectors/slabinfo.plugin/metadata.yaml new file mode 100644 index 00000000..4da1a198 --- /dev/null +++ b/collectors/slabinfo.plugin/metadata.yaml @@ -0,0 +1,83 @@ +meta: + plugin_name: slabinfo.plugin + module_name: slabinfo.plugin + monitored_instance: + name: slabinfo + link: '' + categories: [] + icon_filename: '' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: '' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: mem.slabmemory + description: Memory Usage + unit: "B" + chart_type: line + dimensions: + - name: a dimension per cache + - name: mem.slabfilling + description: Object Filling + unit: "%" + chart_type: line + dimensions: + - name: a dimension per cache + - name: mem.slabwaste + description: Memory waste + unit: "B" + chart_type: line + dimensions: + - name: a dimension per cache diff --git a/collectors/statsd.plugin/statsd.c b/collectors/statsd.plugin/statsd.c index 1425d0a9..5422d290 100644 --- a/collectors/statsd.plugin/statsd.c +++ b/collectors/statsd.plugin/statsd.c @@ -34,7 +34,7 @@ typedef struct statsd_metric_gauge { } STATSD_METRIC_GAUGE; typedef struct statsd_metric_counter { // counter and meter - long long value; + collected_number value; } STATSD_METRIC_COUNTER; typedef struct statsd_histogram_extensions { @@ -57,8 +57,8 @@ typedef struct statsd_histogram_extensions { RRDDIM *rd_stddev; //RRDDIM *rd_sum; - size_t size; - size_t used; + uint32_t size; + uint32_t used; NETDATA_DOUBLE *values; // dynamic array of values collected } STATSD_METRIC_HISTOGRAM_EXTENSIONS; @@ -68,24 +68,22 @@ typedef struct statsd_metric_histogram { // histogram and timer typedef struct statsd_metric_set { DICTIONARY *dict; - size_t unique; } STATSD_METRIC_SET; typedef struct statsd_metric_dictionary_item { - size_t count; + uint32_t count; RRDDIM *rd; } STATSD_METRIC_DICTIONARY_ITEM; typedef struct statsd_metric_dictionary { DICTIONARY *dict; - size_t unique; } STATSD_METRIC_DICTIONARY; // -------------------------------------------------------------------------------------------------------------------- // this is a metric - for all types of metrics -typedef enum statsd_metric_options { +typedef enum __attribute__((packed)) statsd_metric_options { STATSD_METRIC_OPTION_NONE = 0x00000000, // no options set STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED = 0x00000001, // do not update the chart dimension, when this metric is not collected STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED = 0x00000002, // render a private chart for this metric @@ -99,7 +97,7 @@ typedef enum statsd_metric_options { STATSD_METRIC_OPTION_UPDATED_CHART_METADATA = 0x00000200, // set when the private chart metadata have been updated via tags } STATS_METRIC_OPTIONS; -typedef enum statsd_metric_type { +typedef enum __attribute__((packed)) statsd_metric_type { STATSD_METRIC_TYPE_GAUGE, STATSD_METRIC_TYPE_COUNTER, STATSD_METRIC_TYPE_METER, @@ -118,7 +116,7 @@ typedef struct statsd_metric { // metadata about data collection collected_number events; // the number of times this metric has been collected (never resets) - size_t count; // the number of times this metric has been collected since the last flush + uint32_t count; // the number of times this metric has been collected since the last flush // the actual collected data union { @@ -151,22 +149,21 @@ typedef struct statsd_metric { typedef struct statsd_index { char *name; // the name of the index of metrics - size_t events; // the number of events processed for this index - size_t metrics; // the number of metrics in this index - size_t useful; // the number of useful metrics in this index + uint32_t events; // the number of events processed for this index + uint32_t metrics; // the number of metrics in this index + uint32_t useful; // the number of useful metrics in this index + STATS_METRIC_OPTIONS default_options; // default options for all metrics in this index STATSD_METRIC_TYPE type; // the type of index DICTIONARY *dict; STATSD_METRIC *first_useful; // the linked list of useful metrics (new metrics are added in front) - - STATS_METRIC_OPTIONS default_options; // default options for all metrics in this index } STATSD_INDEX; // -------------------------------------------------------------------------------------------------------------------- // synthetic charts -typedef enum statsd_app_chart_dimension_value_type { +typedef enum __attribute__((packed)) statsd_app_chart_dimension_value_type { STATSD_APP_CHART_DIM_VALUE_TYPE_EVENTS, STATSD_APP_CHART_DIM_VALUE_TYPE_LAST, STATSD_APP_CHART_DIM_VALUE_TYPE_AVERAGE, @@ -183,18 +180,18 @@ typedef struct statsd_app_chart_dimension { const char *metric; // the source metric name of this dimension uint32_t metric_hash; // hash for fast string comparisons - SIMPLE_PATTERN *metric_pattern; // set when the 'metric' is a simple pattern - - collected_number multiplier; // the multiplier of the dimension - collected_number divisor; // the divisor of the dimension + int32_t multiplier; // the multiplier of the dimension + int32_t divisor; // the divisor of the dimension RRDDIM_FLAGS flags; // the RRDDIM flags for this dimension RRDDIM_OPTIONS options; // the RRDDIM options for this dimension + RRD_ALGORITHM algorithm; // the algorithm of this dimension STATSD_APP_CHART_DIM_VALUE_TYPE value_type; // which value to use of the source metric + SIMPLE_PATTERN *metric_pattern; // set when the 'metric' is a simple pattern + RRDDIM *rd; // a pointer to the RRDDIM that has been created for this dimension collected_number *value_ptr; // a pointer to the source metric value - RRD_ALGORITHM algorithm; // the algorithm of this dimension struct statsd_app_chart_dimension *next; // the next dimension for this chart } STATSD_APP_CHART_DIM; @@ -207,11 +204,11 @@ typedef struct statsd_app_chart { const char *context; const char *units; const char *module; - long priority; + int32_t priority; RRDSET_TYPE chart_type; STATSD_APP_CHART_DIM *dimensions; - size_t dimensions_count; - size_t dimensions_linked_count; + uint32_t dimensions_count; + uint32_t dimensions_linked_count; RRDSET *st; struct statsd_app_chart *next; @@ -222,8 +219,8 @@ typedef struct statsd_app { SIMPLE_PATTERN *metrics; STATS_METRIC_OPTIONS default_options; RRD_MEMORY_MODE rrd_memory_mode; + int32_t rrd_history_entries; DICTIONARY *dict; - long rrd_history_entries; const char *source; STATSD_APP_CHART *charts; @@ -236,7 +233,7 @@ typedef struct statsd_app { struct collection_thread_status { SPINLOCK spinlock; bool running; - size_t max_sockets; + uint32_t max_sockets; netdata_thread_t thread; }; @@ -262,23 +259,22 @@ static struct statsd { size_t udp_packets_received; size_t udp_bytes_read; - int enabled; - int update_every; + int32_t update_every; + bool enabled; + bool private_charts_hidden; SIMPLE_PATTERN *charts_for; - size_t tcp_idle_timeout; + uint32_t tcp_idle_timeout; collected_number decimal_detail; - size_t private_charts; - size_t max_private_charts_hard; - long private_charts_rrd_history_entries; - unsigned int private_charts_hidden:1; + uint32_t private_charts; + uint32_t max_private_charts_hard; STATSD_APP *apps; - size_t recvmmsg_size; - size_t histogram_increase_step; + uint32_t recvmmsg_size; + uint32_t histogram_increase_step; + uint32_t dictionary_max_unique; double histogram_percentile; char *histogram_percentile_str; - size_t dictionary_max_unique; int threads; struct collection_thread_status *collection_threads_status; @@ -287,7 +283,7 @@ static struct statsd { } statsd = { .enabled = 1, .max_private_charts_hard = 1000, - .private_charts_hidden = 0, + .private_charts_hidden = false, .recvmmsg_size = 10, .decimal_detail = STATSD_DECIMAL_DETAIL, @@ -374,7 +370,7 @@ static void dictionary_metric_insert_callback(const DICTIONARY_ITEM *item, void STATSD_METRIC *m = (STATSD_METRIC *)value; const char *name = dictionary_acquired_item_name(item); - debug(D_STATSD, "Creating new %s metric '%s'", index->name, name); + netdata_log_debug(D_STATSD, "Creating new %s metric '%s'", index->name, name); m->name = name; m->hash = simple_hash(name); @@ -405,7 +401,7 @@ static void dictionary_metric_delete_callback(const DICTIONARY_ITEM *item, void } static inline STATSD_METRIC *statsd_find_or_add_metric(STATSD_INDEX *index, const char *name) { - debug(D_STATSD, "searching for metric '%s' under '%s'", name, index->name); + netdata_log_debug(D_STATSD, "searching for metric '%s' under '%s'", name, index->name); #ifdef STATSD_MULTITHREADED // avoid the write lock of dictionary_set() for existing metrics @@ -571,18 +567,11 @@ static inline void statsd_process_histogram_or_timer(STATSD_METRIC *m, const cha #define statsd_process_timer(m, value, sampling) statsd_process_histogram_or_timer(m, value, sampling, "timer") #define statsd_process_histogram(m, value, sampling) statsd_process_histogram_or_timer(m, value, sampling, "histogram") -static void dictionary_metric_set_value_insert_callback(const DICTIONARY_ITEM *item, void *value, void *data) { - (void)item; - (void)value; - STATSD_METRIC *m = (STATSD_METRIC *)data; - m->set.unique++; -} - static inline void statsd_process_set(STATSD_METRIC *m, const char *value) { if(!is_metric_useful_for_collection(m)) return; if(unlikely(!value || !*value)) { - error("STATSD: metric of type set, with empty value is ignored."); + netdata_log_error("STATSD: metric of type set, with empty value is ignored."); return; } @@ -594,11 +583,8 @@ static inline void statsd_process_set(STATSD_METRIC *m, const char *value) { statsd_reset_metric(m); } - if (unlikely(!m->set.dict)) { - m->set.dict = dictionary_create_advanced(STATSD_DICTIONARY_OPTIONS, &dictionary_stats_category_collectors, 0); - dictionary_register_insert_callback(m->set.dict, dictionary_metric_set_value_insert_callback, m); - m->set.unique = 0; - } + if (unlikely(!m->set.dict)) + m->set.dict = dictionary_create_advanced(STATSD_DICTIONARY_OPTIONS, &dictionary_stats_category_collectors, 0); if(unlikely(value_is_zinit(value))) { // magic loading of metric, without affecting anything @@ -616,29 +602,19 @@ static inline void statsd_process_set(STATSD_METRIC *m, const char *value) { } } -static void dictionary_metric_dict_value_insert_callback(const DICTIONARY_ITEM *item, void *value, void *data) { - (void)item; - (void)value; - STATSD_METRIC *m = (STATSD_METRIC *)data; - m->dictionary.unique++; -} - static inline void statsd_process_dictionary(STATSD_METRIC *m, const char *value) { if(!is_metric_useful_for_collection(m)) return; if(unlikely(!value || !*value)) { - error("STATSD: metric of type set, with empty value is ignored."); + netdata_log_error("STATSD: metric of type set, with empty value is ignored."); return; } if(unlikely(m->reset)) statsd_reset_metric(m); - if (unlikely(!m->dictionary.dict)) { - m->dictionary.dict = dictionary_create_advanced(STATSD_DICTIONARY_OPTIONS, &dictionary_stats_category_collectors, 0); - dictionary_register_insert_callback(m->dictionary.dict, dictionary_metric_dict_value_insert_callback, m); - m->dictionary.unique = 0; - } + if (unlikely(!m->dictionary.dict)) + m->dictionary.dict = dictionary_create_advanced(STATSD_DICTIONARY_OPTIONS, &dictionary_stats_category_collectors, 0); if(unlikely(value_is_zinit(value))) { // magic loading of metric, without affecting anything @@ -647,7 +623,7 @@ static inline void statsd_process_dictionary(STATSD_METRIC *m, const char *value STATSD_METRIC_DICTIONARY_ITEM *t = (STATSD_METRIC_DICTIONARY_ITEM *)dictionary_get(m->dictionary.dict, value); if (unlikely(!t)) { - if(!t && m->dictionary.unique >= statsd.dictionary_max_unique) + if(!t && dictionary_entries(m->dictionary.dict) >= statsd.dictionary_max_unique) value = "other"; t = (STATSD_METRIC_DICTIONARY_ITEM *)dictionary_set(m->dictionary.dict, value, NULL, sizeof(STATSD_METRIC_DICTIONARY_ITEM)); @@ -697,7 +673,7 @@ static inline const char *statsd_parse_field_trim(const char *start, char *end) } static void statsd_process_metric(const char *name, const char *value, const char *type, const char *sampling, const char *tags) { - debug(D_STATSD, "STATSD: raw metric '%s', value '%s', type '%s', sampling '%s', tags '%s'", name?name:"(null)", value?value:"(null)", type?type:"(null)", sampling?sampling:"(null)", tags?tags:"(null)"); + netdata_log_debug(D_STATSD, "STATSD: raw metric '%s', value '%s', type '%s', sampling '%s', tags '%s'", name?name:"(null)", value?value:"(null)", type?type:"(null)", sampling?sampling:"(null)", tags?tags:"(null)"); if(unlikely(!name || !*name)) return; if(unlikely(!type || !*type)) type = "m"; @@ -744,7 +720,7 @@ static void statsd_process_metric(const char *name, const char *value, const cha } else { statsd.unknown_types++; - error("STATSD: metric '%s' with value '%s' is sent with unknown metric type '%s'", name, value?value:"", type); + netdata_log_error("STATSD: metric '%s' with value '%s' is sent with unknown metric type '%s'", name, value?value:"", type); } if(m && tags && *tags) { @@ -792,7 +768,7 @@ static void statsd_process_metric(const char *name, const char *value, const cha static inline size_t statsd_process(char *buffer, size_t size, int require_newlines) { buffer[size] = '\0'; - debug(D_STATSD, "RECEIVED: %zu bytes: '%s'", size, buffer); + netdata_log_debug(D_STATSD, "RECEIVED: %zu bytes: '%s'", size, buffer); const char *s = buffer; while(*s) { @@ -916,14 +892,14 @@ static void statsd_del_callback(POLLINFO *pi) { if(t->type == STATSD_SOCKET_DATA_TYPE_TCP) { if(t->len != 0) { statsd.socket_errors++; - error("STATSD: client is probably sending unterminated metrics. Closed socket left with '%s'. Trying to process it.", t->buffer); + netdata_log_error("STATSD: client is probably sending unterminated metrics. Closed socket left with '%s'. Trying to process it.", t->buffer); statsd_process(t->buffer, t->len, 0); } statsd.tcp_socket_disconnects++; statsd.tcp_socket_connected--; } else - error("STATSD: internal error: received socket data type is %d, but expected %d", (int)t->type, (int)STATSD_SOCKET_DATA_TYPE_TCP); + netdata_log_error("STATSD: internal error: received socket data type is %d, but expected %d", (int)t->type, (int)STATSD_SOCKET_DATA_TYPE_TCP); freez(t); } @@ -944,7 +920,7 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) { case SOCK_STREAM: { struct statsd_tcp *d = (struct statsd_tcp *)pi->data; if(unlikely(!d)) { - error("STATSD: internal error: expected TCP data pointer is NULL"); + netdata_log_error("STATSD: internal error: expected TCP data pointer is NULL"); statsd.socket_errors++; retval = -1; goto cleanup; @@ -952,7 +928,7 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) { #ifdef NETDATA_INTERNAL_CHECKS if(unlikely(d->type != STATSD_SOCKET_DATA_TYPE_TCP)) { - error("STATSD: internal error: socket data type should be %d, but it is %d", (int)STATSD_SOCKET_DATA_TYPE_TCP, (int)d->type); + netdata_log_error("STATSD: internal error: socket data type should be %d, but it is %d", (int)STATSD_SOCKET_DATA_TYPE_TCP, (int)d->type); statsd.socket_errors++; retval = -1; goto cleanup; @@ -966,14 +942,14 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) { if (rc < 0) { // read failed if (errno != EWOULDBLOCK && errno != EAGAIN && errno != EINTR) { - error("STATSD: recv() on TCP socket %d failed.", fd); + netdata_log_error("STATSD: recv() on TCP socket %d failed.", fd); statsd.socket_errors++; ret = -1; } } else if (!rc) { // connection closed - debug(D_STATSD, "STATSD: client disconnected."); + netdata_log_debug(D_STATSD, "STATSD: client disconnected."); ret = -1; } else { @@ -1000,7 +976,7 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) { case SOCK_DGRAM: { struct statsd_udp *d = (struct statsd_udp *)pi->data; if(unlikely(!d)) { - error("STATSD: internal error: expected UDP data pointer is NULL"); + netdata_log_error("STATSD: internal error: expected UDP data pointer is NULL"); statsd.socket_errors++; retval = -1; goto cleanup; @@ -1008,7 +984,7 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) { #ifdef NETDATA_INTERNAL_CHECKS if(unlikely(d->type != STATSD_SOCKET_DATA_TYPE_UDP)) { - error("STATSD: internal error: socket data should be %d, but it is %d", (int)d->type, (int)STATSD_SOCKET_DATA_TYPE_UDP); + netdata_log_error("STATSD: internal error: socket data should be %d, but it is %d", (int)d->type, (int)STATSD_SOCKET_DATA_TYPE_UDP); statsd.socket_errors++; retval = -1; goto cleanup; @@ -1022,7 +998,7 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) { if (rc < 0) { // read failed if (errno != EWOULDBLOCK && errno != EAGAIN && errno != EINTR) { - error("STATSD: recvmmsg() on UDP socket %d failed.", fd); + netdata_log_error("STATSD: recvmmsg() on UDP socket %d failed.", fd); statsd.socket_errors++; retval = -1; goto cleanup; @@ -1048,7 +1024,7 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) { if (rc < 0) { // read failed if (errno != EWOULDBLOCK && errno != EAGAIN && errno != EINTR) { - error("STATSD: recv() on UDP socket %d failed.", fd); + netdata_log_error("STATSD: recv() on UDP socket %d failed.", fd); statsd.socket_errors++; retval = -1; goto cleanup; @@ -1067,7 +1043,7 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) { } default: { - error("STATSD: internal error: unknown socktype %d on socket %d", pi->socktype, fd); + netdata_log_error("STATSD: internal error: unknown socktype %d on socket %d", pi->socktype, fd); statsd.socket_errors++; retval = -1; goto cleanup; @@ -1085,7 +1061,7 @@ static int statsd_snd_callback(POLLINFO *pi, short int *events) { (void)events; worker_is_busy(WORKER_JOB_TYPE_SND_DATA); - error("STATSD: snd_callback() called, but we never requested to send data to statsd clients."); + netdata_log_error("STATSD: snd_callback() called, but we never requested to send data to statsd clients."); worker_is_idle(); return -1; @@ -1096,9 +1072,9 @@ static int statsd_snd_callback(POLLINFO *pi, short int *events) { void statsd_collector_thread_cleanup(void *data) { struct statsd_udp *d = data; - netdata_spinlock_lock(&d->status->spinlock); + spinlock_lock(&d->status->spinlock); d->status->running = false; - netdata_spinlock_unlock(&d->status->spinlock); + spinlock_unlock(&d->status->spinlock); collector_info("cleaning up..."); @@ -1121,9 +1097,9 @@ static bool statsd_should_stop(void) { void *statsd_collector_thread(void *ptr) { struct collection_thread_status *status = ptr; - netdata_spinlock_lock(&status->spinlock); + spinlock_lock(&status->spinlock); status->running = true; - netdata_spinlock_unlock(&status->spinlock); + spinlock_unlock(&status->spinlock); worker_register("STATSD"); worker_register_job_name(WORKER_JOB_TYPE_TCP_CONNECTED, "tcp connect"); @@ -1193,7 +1169,7 @@ static STATSD_APP_CHART_DIM_VALUE_TYPE string2valuetype(const char *type, size_t else if(!strcmp(type, "stddev")) return STATSD_APP_CHART_DIM_VALUE_TYPE_STDDEV; else if(!strcmp(type, "percentile")) return STATSD_APP_CHART_DIM_VALUE_TYPE_PERCENTILE; - error("STATSD: invalid type '%s' at line %zu of file '%s'. Using 'last'.", type, line, filename); + netdata_log_error("STATSD: invalid type '%s' at line %zu of file '%s'. Using 'last'.", type, line, filename); return STATSD_APP_CHART_DIM_VALUE_TYPE_LAST; } @@ -1255,20 +1231,20 @@ static STATSD_APP_CHART_DIM *add_dimension_to_app_chart( } chart->dimensions_count++; - debug(D_STATSD, "Added dimension '%s' to chart '%s' of app '%s', for metric '%s', with type %u, multiplier " COLLECTED_NUMBER_FORMAT ", divisor " COLLECTED_NUMBER_FORMAT, + netdata_log_debug(D_STATSD, "Added dimension '%s' to chart '%s' of app '%s', for metric '%s', with type %u, multiplier %d, divisor %d", dim->name, chart->id, app->name, dim->metric, dim->value_type, dim->multiplier, dim->divisor); return dim; } static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHART *chart, DICTIONARY *dict) { - debug(D_STATSD, "STATSD configuration reading file '%s'", filename); + netdata_log_debug(D_STATSD, "STATSD configuration reading file '%s'", filename); char *buffer = mallocz(STATSD_CONF_LINE_MAX + 1); FILE *fp = fopen(filename, "r"); if(!fp) { - error("STATSD: cannot open file '%s'.", filename); + netdata_log_error("STATSD: cannot open file '%s'.", filename); freez(buffer); return -1; } @@ -1281,11 +1257,11 @@ static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHA s = trim(buffer); if (!s || *s == '#') { - debug(D_STATSD, "STATSD: ignoring line %zu of file '%s', it is empty.", line, filename); + netdata_log_debug(D_STATSD, "STATSD: ignoring line %zu of file '%s', it is empty.", line, filename); continue; } - debug(D_STATSD, "STATSD: processing line %zu of file '%s': %s", line, filename, buffer); + netdata_log_debug(D_STATSD, "STATSD: processing line %zu of file '%s': %s", line, filename, buffer); if(*s == 'i' && strncmp(s, "include", 7) == 0) { s = trim(&s[7]); @@ -1305,7 +1281,7 @@ static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHA freez(tmp); } else - error("STATSD: ignoring line %zu of file '%s', include filename is empty", line, filename); + netdata_log_error("STATSD: ignoring line %zu of file '%s', include filename is empty", line, filename); continue; } @@ -1372,20 +1348,20 @@ static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHA } } else - error("STATSD: ignoring line %zu ('%s') of file '%s', [app] is not defined.", line, s, filename); + netdata_log_error("STATSD: ignoring line %zu ('%s') of file '%s', [app] is not defined.", line, s, filename); continue; } if(!app) { - error("STATSD: ignoring line %zu ('%s') of file '%s', it is outside all sections.", line, s, filename); + netdata_log_error("STATSD: ignoring line %zu ('%s') of file '%s', it is outside all sections.", line, s, filename); continue; } char *name = s; char *value = strchr(s, '='); if(!value) { - error("STATSD: ignoring line %zu ('%s') of file '%s', there is no = in it.", line, s, filename); + netdata_log_error("STATSD: ignoring line %zu ('%s') of file '%s', there is no = in it.", line, s, filename); continue; } *value = '\0'; @@ -1395,11 +1371,11 @@ static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHA value = trim(value); if(!name || *name == '#') { - error("STATSD: ignoring line %zu of file '%s', name is empty.", line, filename); + netdata_log_error("STATSD: ignoring line %zu of file '%s', name is empty.", line, filename); continue; } if(!value) { - debug(D_CONFIG, "STATSD: ignoring line %zu of file '%s', value is empty.", line, filename); + netdata_log_debug(D_CONFIG, "STATSD: ignoring line %zu of file '%s', value is empty.", line, filename); continue; } @@ -1442,7 +1418,7 @@ static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHA app->rrd_history_entries = 5; } else { - error("STATSD: ignoring line %zu ('%s') of file '%s'. Unknown keyword for the [app] section.", line, name, filename); + netdata_log_error("STATSD: ignoring line %zu ('%s') of file '%s'. Unknown keyword for the [app] section.", line, name, filename); continue; } } @@ -1480,7 +1456,7 @@ static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHA else if (!strcmp(name, "dimension")) { // metric [name [type [multiplier [divisor]]]] char *words[10] = { NULL }; - size_t num_words = pluginsd_split_words(value, words, 10); + size_t num_words = quoted_strings_splitter_pluginsd(value, words, 10); int pattern = 0; size_t i = 0; @@ -1536,7 +1512,7 @@ static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHA dim->metric_pattern = simple_pattern_create(dim->metric, NULL, SIMPLE_PATTERN_EXACT, true); } else { - error("STATSD: ignoring line %zu ('%s') of file '%s'. Unknown keyword for the [%s] section.", line, name, filename, chart->id); + netdata_log_error("STATSD: ignoring line %zu ('%s') of file '%s'. Unknown keyword for the [%s] section.", line, name, filename, chart->id); continue; } } @@ -1649,7 +1625,7 @@ static inline RRDSET *statsd_private_rrdset_create( } static inline void statsd_private_chart_gauge(STATSD_METRIC *m) { - debug(D_STATSD, "updating private chart for gauge metric '%s'", m->name); + netdata_log_debug(D_STATSD, "updating private chart for gauge metric '%s'", m->name); if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; @@ -1689,7 +1665,7 @@ static inline void statsd_private_chart_gauge(STATSD_METRIC *m) { } static inline void statsd_private_chart_counter_or_meter(STATSD_METRIC *m, const char *dim, const char *family) { - debug(D_STATSD, "updating private chart for %s metric '%s'", dim, m->name); + netdata_log_debug(D_STATSD, "updating private chart for %s metric '%s'", dim, m->name); if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; @@ -1729,7 +1705,7 @@ static inline void statsd_private_chart_counter_or_meter(STATSD_METRIC *m, const } static inline void statsd_private_chart_set(STATSD_METRIC *m) { - debug(D_STATSD, "updating private chart for set metric '%s'", m->name); + netdata_log_debug(D_STATSD, "updating private chart for set metric '%s'", m->name); if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; @@ -1769,7 +1745,7 @@ static inline void statsd_private_chart_set(STATSD_METRIC *m) { } static inline void statsd_private_chart_dictionary(STATSD_METRIC *m) { - debug(D_STATSD, "updating private chart for dictionary metric '%s'", m->name); + netdata_log_debug(D_STATSD, "updating private chart for dictionary metric '%s'", m->name); if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; @@ -1812,7 +1788,7 @@ static inline void statsd_private_chart_dictionary(STATSD_METRIC *m) { } static inline void statsd_private_chart_timer_or_histogram(STATSD_METRIC *m, const char *dim, const char *family, const char *units) { - debug(D_STATSD, "updating private chart for %s metric '%s'", dim, m->name); + netdata_log_debug(D_STATSD, "updating private chart for %s metric '%s'", dim, m->name); if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; @@ -1867,7 +1843,7 @@ static inline void statsd_private_chart_timer_or_histogram(STATSD_METRIC *m, con // statsd flush metrics static inline void statsd_flush_gauge(STATSD_METRIC *m) { - debug(D_STATSD, "flushing gauge metric '%s'", m->name); + netdata_log_debug(D_STATSD, "flushing gauge metric '%s'", m->name); int updated = 0; if(unlikely(!m->reset && m->count)) { @@ -1882,7 +1858,7 @@ static inline void statsd_flush_gauge(STATSD_METRIC *m) { } static inline void statsd_flush_counter_or_meter(STATSD_METRIC *m, const char *dim, const char *family) { - debug(D_STATSD, "flushing %s metric '%s'", dim, m->name); + netdata_log_debug(D_STATSD, "flushing %s metric '%s'", dim, m->name); int updated = 0; if(unlikely(!m->reset && m->count)) { @@ -1905,11 +1881,11 @@ static inline void statsd_flush_meter(STATSD_METRIC *m) { } static inline void statsd_flush_set(STATSD_METRIC *m) { - debug(D_STATSD, "flushing set metric '%s'", m->name); + netdata_log_debug(D_STATSD, "flushing set metric '%s'", m->name); int updated = 0; if(unlikely(!m->reset && m->count)) { - m->last = (collected_number)m->set.unique; + m->last = (collected_number)dictionary_entries(m->set.dict); m->reset = 1; updated = 1; @@ -1923,11 +1899,11 @@ static inline void statsd_flush_set(STATSD_METRIC *m) { } static inline void statsd_flush_dictionary(STATSD_METRIC *m) { - debug(D_STATSD, "flushing dictionary metric '%s'", m->name); + netdata_log_debug(D_STATSD, "flushing dictionary metric '%s'", m->name); int updated = 0; if(unlikely(!m->reset && m->count)) { - m->last = (collected_number)m->dictionary.unique; + m->last = (collected_number)dictionary_entries(m->dictionary.dict); m->reset = 1; updated = 1; @@ -1939,19 +1915,19 @@ static inline void statsd_flush_dictionary(STATSD_METRIC *m) { if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) statsd_private_chart_dictionary(m); - if(m->dictionary.unique >= statsd.dictionary_max_unique) { + if(dictionary_entries(m->dictionary.dict) >= statsd.dictionary_max_unique) { if(!(m->options & STATSD_METRIC_OPTION_COLLECTION_FULL_LOGGED)) { m->options |= STATSD_METRIC_OPTION_COLLECTION_FULL_LOGGED; collector_info( "STATSD dictionary '%s' reach max of %zu items - try increasing 'dictionaries max unique dimensions' in netdata.conf", m->name, - m->dictionary.unique); + dictionary_entries(m->dictionary.dict)); } } } static inline void statsd_flush_timer_or_histogram(STATSD_METRIC *m, const char *dim, const char *family, const char *units) { - debug(D_STATSD, "flushing %s metric '%s'", dim, m->name); + netdata_log_debug(D_STATSD, "flushing %s metric '%s'", dim, m->name); int updated = 0; if(unlikely(!m->reset && m->count && m->histogram.ext->used > 0)) { @@ -1976,7 +1952,7 @@ static inline void statsd_flush_timer_or_histogram(STATSD_METRIC *m, const char netdata_mutex_unlock(&m->histogram.ext->mutex); - debug(D_STATSD, "STATSD %s metric %s: min " COLLECTED_NUMBER_FORMAT ", max " COLLECTED_NUMBER_FORMAT ", last " COLLECTED_NUMBER_FORMAT ", pcent " COLLECTED_NUMBER_FORMAT ", median " COLLECTED_NUMBER_FORMAT ", stddev " COLLECTED_NUMBER_FORMAT ", sum " COLLECTED_NUMBER_FORMAT, + netdata_log_debug(D_STATSD, "STATSD %s metric %s: min " COLLECTED_NUMBER_FORMAT ", max " COLLECTED_NUMBER_FORMAT ", last " COLLECTED_NUMBER_FORMAT ", pcent " COLLECTED_NUMBER_FORMAT ", median " COLLECTED_NUMBER_FORMAT ", stddev " COLLECTED_NUMBER_FORMAT ", sum " COLLECTED_NUMBER_FORMAT, dim, m->name, m->histogram.ext->last_min, m->histogram.ext->last_max, m->last, m->histogram.ext->last_percentile, m->histogram.ext->last_median, m->histogram.ext->last_stddev, m->histogram.ext->last_sum); m->histogram.ext->zeroed = 0; @@ -2073,7 +2049,7 @@ static inline void link_metric_to_app_dimension(STATSD_APP *app, STATSD_METRIC * } else { if (dim->value_type != STATSD_APP_CHART_DIM_VALUE_TYPE_LAST) - error("STATSD: unsupported value type for dimension '%s' of chart '%s' of app '%s' on metric '%s'", dim->name, chart->id, app->name, m->name); + netdata_log_error("STATSD: unsupported value type for dimension '%s' of chart '%s' of app '%s' on metric '%s'", dim->name, chart->id, app->name, m->name); dim->value_ptr = &m->last; dim->algorithm = statsd_algorithm_for_metric(m); @@ -2090,7 +2066,7 @@ static inline void link_metric_to_app_dimension(STATSD_APP *app, STATSD_METRIC * chart->dimensions_linked_count++; m->options |= STATSD_METRIC_OPTION_USED_IN_APPS; - debug(D_STATSD, "metric '%s' of type %u linked with app '%s', chart '%s', dimension '%s', algorithm '%s'", m->name, m->type, app->name, chart->id, dim->name, rrd_algorithm_name(dim->algorithm)); + netdata_log_debug(D_STATSD, "metric '%s' of type %u linked with app '%s', chart '%s', dimension '%s', algorithm '%s'", m->name, m->type, app->name, chart->id, dim->name, rrd_algorithm_name(dim->algorithm)); } static inline void check_if_metric_is_for_app(STATSD_INDEX *index, STATSD_METRIC *m) { @@ -2099,7 +2075,7 @@ static inline void check_if_metric_is_for_app(STATSD_INDEX *index, STATSD_METRIC STATSD_APP *app; for(app = statsd.apps; app ;app = app->next) { if(unlikely(simple_pattern_matches(app->metrics, m->name))) { - debug(D_STATSD, "metric '%s' matches app '%s'", m->name, app->name); + netdata_log_debug(D_STATSD, "metric '%s' matches app '%s'", m->name, app->name); // the metric should get the options from the app @@ -2213,18 +2189,18 @@ static inline RRDDIM *statsd_add_dim_to_app_chart(STATSD_APP *app, STATSD_APP_CH dim->rd = rrddim_add(chart->st, metric, dim->name, dim->multiplier, dim->divisor, dim->algorithm); if(dim->flags != RRDDIM_FLAG_NONE) dim->rd->flags |= dim->flags; - if(dim->options != RRDDIM_OPTION_NONE) dim->rd->options |= dim->options; + if(dim->options != RRDDIM_OPTION_NONE) dim->rd->collector.options |= dim->options; return dim->rd; } dim->rd = rrddim_add(chart->st, dim->metric, dim->name, dim->multiplier, dim->divisor, dim->algorithm); if(dim->flags != RRDDIM_FLAG_NONE) dim->rd->flags |= dim->flags; - if(dim->options != RRDDIM_OPTION_NONE) dim->rd->options |= dim->options; + if(dim->options != RRDDIM_OPTION_NONE) dim->rd->collector.options |= dim->options; return dim->rd; } static inline void statsd_update_app_chart(STATSD_APP *app, STATSD_APP_CHART *chart) { - debug(D_STATSD, "updating chart '%s' for app '%s'", chart->id, app->name); + netdata_log_debug(D_STATSD, "updating chart '%s' for app '%s'", chart->id, app->name); if(!chart->st) { chart->st = rrdset_create_custom( @@ -2256,22 +2232,22 @@ static inline void statsd_update_app_chart(STATSD_APP *app, STATSD_APP_CHART *ch statsd_add_dim_to_app_chart(app, chart, dim); if (unlikely(dim->value_ptr)) { - debug(D_STATSD, "updating dimension '%s' (%s) of chart '%s' (%s) for app '%s' with value " COLLECTED_NUMBER_FORMAT, dim->name, rrddim_id(dim->rd), chart->id, rrdset_id(chart->st), app->name, *dim->value_ptr); + netdata_log_debug(D_STATSD, "updating dimension '%s' (%s) of chart '%s' (%s) for app '%s' with value " COLLECTED_NUMBER_FORMAT, dim->name, rrddim_id(dim->rd), chart->id, rrdset_id(chart->st), app->name, *dim->value_ptr); rrddim_set_by_pointer(chart->st, dim->rd, *dim->value_ptr); } } } rrdset_done(chart->st); - debug(D_STATSD, "completed update of chart '%s' for app '%s'", chart->id, app->name); + netdata_log_debug(D_STATSD, "completed update of chart '%s' for app '%s'", chart->id, app->name); } static inline void statsd_update_all_app_charts(void) { - // debug(D_STATSD, "updating app charts"); + // netdata_log_debug(D_STATSD, "updating app charts"); STATSD_APP *app; for(app = statsd.apps; app ;app = app->next) { - // debug(D_STATSD, "updating charts for app '%s'", app->name); + // netdata_log_debug(D_STATSD, "updating charts for app '%s'", app->name); STATSD_APP_CHART *chart; for(chart = app->charts; chart ;chart = chart->next) { @@ -2281,7 +2257,7 @@ static inline void statsd_update_all_app_charts(void) { } } - // debug(D_STATSD, "completed update of app charts"); + // netdata_log_debug(D_STATSD, "completed update of app charts"); } const char *statsd_metric_type_string(STATSD_METRIC_TYPE type) { @@ -2307,23 +2283,28 @@ static inline void statsd_flush_index_metrics(STATSD_INDEX *index, void (*flush_ if(unlikely(is_metric_checked(m))) break; if(unlikely(!(m->options & STATSD_METRIC_OPTION_CHECKED_IN_APPS))) { - log_access("NEW STATSD METRIC '%s': '%s'", statsd_metric_type_string(m->type), m->name); + netdata_log_access("NEW STATSD METRIC '%s': '%s'", statsd_metric_type_string(m->type), m->name); check_if_metric_is_for_app(index, m); m->options |= STATSD_METRIC_OPTION_CHECKED_IN_APPS; } if(unlikely(!(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_CHECKED))) { if(unlikely(statsd.private_charts >= statsd.max_private_charts_hard)) { - debug(D_STATSD, "STATSD: metric '%s' will not be charted, because the hard limit of the maximum number of charts has been reached.", m->name); - collector_info("STATSD: metric '%s' will not be charted, because the hard limit of the maximum number of charts (%zu) has been reached. Increase the number of charts by editing netdata.conf, [statsd] section.", m->name, statsd.max_private_charts_hard); + netdata_log_debug(D_STATSD, "STATSD: metric '%s' will not be charted, because the hard limit of the maximum number " + "of charts has been reached.", m->name); + + collector_info("STATSD: metric '%s' will not be charted, because the hard limit of the maximum number " + "of charts (%u) has been reached. Increase the number of charts by editing netdata.conf, " + "[statsd] section.", m->name, statsd.max_private_charts_hard); + m->options &= ~STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; } else { if (simple_pattern_matches(statsd.charts_for, m->name)) { - debug(D_STATSD, "STATSD: metric '%s' will be charted.", m->name); + netdata_log_debug(D_STATSD, "STATSD: metric '%s' will be charted.", m->name); m->options |= STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; } else { - debug(D_STATSD, "STATSD: metric '%s' will not be charted.", m->name); + netdata_log_debug(D_STATSD, "STATSD: metric '%s' will not be charted.", m->name); m->options &= ~STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; } } @@ -2366,7 +2347,7 @@ static void statsd_main_cleanup(void *data) { if (statsd.collection_threads_status) { int i; for (i = 0; i < statsd.threads; i++) { - netdata_spinlock_lock(&statsd.collection_threads_status[i].spinlock); + spinlock_lock(&statsd.collection_threads_status[i].spinlock); if(statsd.collection_threads_status[i].running) { collector_info("STATSD: stopping data collection thread %d...", i + 1); netdata_thread_cancel(statsd.collection_threads_status[i].thread); @@ -2374,7 +2355,7 @@ static void statsd_main_cleanup(void *data) { else { collector_info("STATSD: data collection thread %d found stopped.", i + 1); } - netdata_spinlock_unlock(&statsd.collection_threads_status[i].spinlock); + spinlock_unlock(&statsd.collection_threads_status[i].spinlock); } } @@ -2466,7 +2447,6 @@ void *statsd_main(void *ptr) { config_get(CONFIG_SECTION_STATSD, "create private charts for metrics matching", "*"), NULL, SIMPLE_PATTERN_EXACT, true); statsd.max_private_charts_hard = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts hard limit", (long long)statsd.max_private_charts_hard); - statsd.private_charts_rrd_history_entries = (int)config_get_number(CONFIG_SECTION_STATSD, "private charts history", default_rrd_history_entries); statsd.decimal_detail = (collected_number)config_get_number(CONFIG_SECTION_STATSD, "decimal detail", (long long int)statsd.decimal_detail); statsd.tcp_idle_timeout = (size_t) config_get_number(CONFIG_SECTION_STATSD, "disconnect idle tcp clients after seconds", (long long int)statsd.tcp_idle_timeout); statsd.private_charts_hidden = (unsigned int)config_get_boolean(CONFIG_SECTION_STATSD, "private charts hidden", statsd.private_charts_hidden); @@ -2549,7 +2529,7 @@ void *statsd_main(void *ptr) { statsd.collection_threads_status[i].max_sockets = max_sockets / statsd.threads; char tag[NETDATA_THREAD_TAG_MAX + 1]; snprintfz(tag, NETDATA_THREAD_TAG_MAX, "STATSD_IN[%d]", i + 1); - netdata_spinlock_init(&statsd.collection_threads_status[i].spinlock); + spinlock_init(&statsd.collection_threads_status[i].spinlock); netdata_thread_create(&statsd.collection_threads_status[i].thread, tag, NETDATA_THREAD_OPTION_DEFAULT, statsd_collector_thread, &statsd.collection_threads_status[i]); } diff --git a/collectors/tc.plugin/metadata.yaml b/collectors/tc.plugin/metadata.yaml new file mode 100644 index 00000000..2fca8d1e --- /dev/null +++ b/collectors/tc.plugin/metadata.yaml @@ -0,0 +1,102 @@ +meta: + plugin_name: tc.plugin + module_name: tc.plugin + monitored_instance: + name: tc + link: '' + categories: + - data-collection.networking-stack-and-network-interfaces + icon_filename: 'freeradius.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Examine tc metrics to gain insights into Linux traffic control operations. Study packet flow rates, queue lengths, and drop rates to optimize network traffic flow.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: network device, direction + description: "" + labels: + - name: device + description: TBD + - name: name + description: TBD + - name: family + description: TBD + metrics: + - name: tc.qos + description: Class Usage + unit: "kilobits/s" + chart_type: stacked + dimensions: + - name: a dimension per class + - name: tc.qos_packets + description: Class Packets + unit: "packets/s" + chart_type: stacked + dimensions: + - name: a dimension per class + - name: tc.qos_dropped + description: Class Dropped Packets + unit: "packets/s" + chart_type: stacked + dimensions: + - name: a dimension per class + - name: tc.qos_tokens + description: Class Tokens + unit: "tokens" + chart_type: line + dimensions: + - name: a dimension per class + - name: tc.qos_ctokens + description: Class cTokens + unit: "ctokens" + chart_type: line + dimensions: + - name: a dimension per class diff --git a/collectors/tc.plugin/plugin_tc.c b/collectors/tc.plugin/plugin_tc.c index b833fd3c..eae70453 100644 --- a/collectors/tc.plugin/plugin_tc.c +++ b/collectors/tc.plugin/plugin_tc.c @@ -174,7 +174,7 @@ static inline struct tc_device *tc_device_index_find(const char *id) { // ---------------------------------------------------------------------------- static inline void tc_class_free(struct tc_device *n, struct tc_class *c) { - debug(D_TC_LOOP, "Removing from device '%s' class '%s', parentid '%s', leafid '%s', unused=%d", + netdata_log_debug(D_TC_LOOP, "Removing from device '%s' class '%s', parentid '%s', leafid '%s', unused=%d", string2str(n->id), string2str(c->id), string2str(c->parentid), string2str(c->leafid), c->unupdated); @@ -271,7 +271,7 @@ static inline void tc_device_commit(struct tc_device *d) { dfe_done(c); if(unlikely(!d->enabled || (!updated_classes && !updated_qdiscs))) { - debug(D_TC_LOOP, "TC: Ignoring TC device '%s'. It is not enabled/updated.", string2str(d->name?d->name:d->id)); + netdata_log_debug(D_TC_LOOP, "TC: Ignoring TC device '%s'. It is not enabled/updated.", string2str(d->name?d->name:d->id)); tc_device_classes_cleanup(d); return; } @@ -308,7 +308,7 @@ static inline void tc_device_commit(struct tc_device *d) { if(unlikely(!c->updated)) continue; - //debug(D_TC_LOOP, "TC: In device '%s', %s '%s' has leafid: '%s' and parentid '%s'.", + //netdata_log_debug(D_TC_LOOP, "TC: In device '%s', %s '%s' has leafid: '%s' and parentid '%s'.", // d->id, // c->isqdisc?"qdisc":"class", // c->id, @@ -326,7 +326,7 @@ static inline void tc_device_commit(struct tc_device *d) { if((x->parentid && c->id == x->parentid) || (c->leafid && x->parentid && c->leafid == x->parentid)) { - // debug(D_TC_LOOP, "TC: In device '%s', %s '%s' (leafid: '%s') has as leaf %s '%s' (parentid: '%s').", d->name?d->name:d->id, c->isqdisc?"qdisc":"class", c->name?c->name:c->id, c->leafid?c->leafid:c->id, x->isqdisc?"qdisc":"class", x->name?x->name:x->id, x->parentid?x->parentid:x->id); + // netdata_log_debug(D_TC_LOOP, "TC: In device '%s', %s '%s' (leafid: '%s') has as leaf %s '%s' (parentid: '%s').", d->name?d->name:d->id, c->isqdisc?"qdisc":"class", c->name?c->name:c->id, c->leafid?c->leafid:c->id, x->isqdisc?"qdisc":"class", x->name?x->name:x->id, x->parentid?x->parentid:x->id); c->isleaf = false; x->hasparent = true; } @@ -340,7 +340,7 @@ static inline void tc_device_commit(struct tc_device *d) { if(unlikely(!c->updated)) continue; - // debug(D_TC_LOOP, "TC: device '%s', %s '%s' isleaf=%d, hasparent=%d", d->id, (c->isqdisc)?"qdisc":"class", c->id, c->isleaf, c->hasparent); + // netdata_log_debug(D_TC_LOOP, "TC: device '%s', %s '%s' isleaf=%d, hasparent=%d", d->id, (c->isqdisc)?"qdisc":"class", c->id, c->isleaf, c->hasparent); if(unlikely((c->isleaf && c->hasparent) || d->enabled_all_classes_qdiscs)) { c->render = true; @@ -355,7 +355,7 @@ static inline void tc_device_commit(struct tc_device *d) { //if(unlikely(!c->hasparent)) { // if(root) collector_error("TC: multiple root class/qdisc for device '%s' (old: '%s', new: '%s')", d->id, root->id, c->id); // root = c; - // debug(D_TC_LOOP, "TC: found root class/qdisc '%s'", root->id); + // netdata_log_debug(D_TC_LOOP, "TC: found root class/qdisc '%s'", root->id); //} } dfe_done(c); @@ -365,8 +365,8 @@ static inline void tc_device_commit(struct tc_device *d) { if(unlikely(debug_flags & D_TC_LOOP)) { dfe_start_read(d->classes, c) { - if(c->render) debug(D_TC_LOOP, "TC: final nodes dump for '%s': class %s, OK", string2str(d->name), string2str(c->id)); - else debug(D_TC_LOOP, "TC: final nodes dump for '%s': class '%s', IGNORE (updated: %d, isleaf: %d, hasparent: %d, parent: '%s')", + if(c->render) netdata_log_debug(D_TC_LOOP, "TC: final nodes dump for '%s': class %s, OK", string2str(d->name), string2str(c->id)); + else netdata_log_debug(D_TC_LOOP, "TC: final nodes dump for '%s': class '%s', IGNORE (updated: %d, isleaf: %d, hasparent: %d, parent: '%s')", string2str(d->name?d->name:d->id), string2str(c->id), c->updated, c->isleaf, c->hasparent, string2str(c->parentid)); } dfe_done(c); @@ -374,12 +374,12 @@ static inline void tc_device_commit(struct tc_device *d) { #endif if(unlikely(!active_nodes)) { - debug(D_TC_LOOP, "TC: Ignoring TC device '%s'. No useful classes/qdiscs.", string2str(d->name?d->name:d->id)); + netdata_log_debug(D_TC_LOOP, "TC: Ignoring TC device '%s'. No useful classes/qdiscs.", string2str(d->name?d->name:d->id)); tc_device_classes_cleanup(d); return; } - debug(D_TC_LOOP, "TC: evaluating TC device '%s'. enabled = %d/%d (bytes: %d/%d, packets: %d/%d, dropped: %d/%d, tokens: %d/%d, ctokens: %d/%d, all_classes_qdiscs: %d/%d), classes: (bytes = %llu, packets = %llu, dropped = %llu, tokens = %llu, ctokens = %llu).", + netdata_log_debug(D_TC_LOOP, "TC: evaluating TC device '%s'. enabled = %d/%d (bytes: %d/%d, packets: %d/%d, dropped: %d/%d, tokens: %d/%d, ctokens: %d/%d, all_classes_qdiscs: %d/%d), classes: (bytes = %llu, packets = %llu, dropped = %llu, tokens = %llu, ctokens = %llu).", string2str(d->name?d->name:d->id), d->enabled, enable_new_interfaces, d->enabled_bytes, enable_bytes, @@ -418,18 +418,18 @@ static inline void tc_device_commit(struct tc_device *d) { d->enabled_all_classes_qdiscs ? RRDSET_TYPE_LINE : RRDSET_TYPE_STACKED); rrdlabels_add(d->st_bytes->rrdlabels, "device", string2str(d->id), RRDLABEL_SRC_AUTO); - rrdlabels_add(d->st_bytes->rrdlabels, "name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); - rrdlabels_add(d->st_bytes->rrdlabels, "family", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "device_name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "device_group", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); } else { if(unlikely(d->name_updated)) rrdset_reset_name(d->st_bytes, string2str(d->name)); if(d->name && d->name_updated) - rrdlabels_add(d->st_bytes->rrdlabels, "name", string2str(d->name), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "device_name", string2str(d->name), RRDLABEL_SRC_AUTO); if(d->family && d->family_updated) - rrdlabels_add(d->st_bytes->rrdlabels, "family", string2str(d->family), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_bytes->rrdlabels, "device_group", string2str(d->family), RRDLABEL_SRC_AUTO); // TODO // update the family @@ -479,8 +479,8 @@ static inline void tc_device_commit(struct tc_device *d) { d->enabled_all_classes_qdiscs ? RRDSET_TYPE_LINE : RRDSET_TYPE_STACKED); rrdlabels_add(d->st_packets->rrdlabels, "device", string2str(d->id), RRDLABEL_SRC_AUTO); - rrdlabels_add(d->st_packets->rrdlabels, "name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); - rrdlabels_add(d->st_packets->rrdlabels, "family", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_packets->rrdlabels, "device_name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_packets->rrdlabels, "device_group", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); } else { if(unlikely(d->name_updated)) { @@ -490,10 +490,10 @@ static inline void tc_device_commit(struct tc_device *d) { } if(d->name && d->name_updated) - rrdlabels_add(d->st_packets->rrdlabels, "name", string2str(d->name), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_packets->rrdlabels, "device_name", string2str(d->name), RRDLABEL_SRC_AUTO); if(d->family && d->family_updated) - rrdlabels_add(d->st_packets->rrdlabels, "family", string2str(d->family), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_packets->rrdlabels, "device_group", string2str(d->family), RRDLABEL_SRC_AUTO); // TODO // update the family @@ -543,8 +543,8 @@ static inline void tc_device_commit(struct tc_device *d) { d->enabled_all_classes_qdiscs ? RRDSET_TYPE_LINE : RRDSET_TYPE_STACKED); rrdlabels_add(d->st_dropped->rrdlabels, "device", string2str(d->id), RRDLABEL_SRC_AUTO); - rrdlabels_add(d->st_dropped->rrdlabels, "name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); - rrdlabels_add(d->st_dropped->rrdlabels, "family", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_dropped->rrdlabels, "device_name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_dropped->rrdlabels, "device_group", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); } else { if(unlikely(d->name_updated)) { @@ -554,10 +554,10 @@ static inline void tc_device_commit(struct tc_device *d) { } if(d->name && d->name_updated) - rrdlabels_add(d->st_dropped->rrdlabels, "name", string2str(d->name), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_dropped->rrdlabels, "device_name", string2str(d->name), RRDLABEL_SRC_AUTO); if(d->family && d->family_updated) - rrdlabels_add(d->st_dropped->rrdlabels, "family", string2str(d->family), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_dropped->rrdlabels, "device_group", string2str(d->family), RRDLABEL_SRC_AUTO); // TODO // update the family @@ -607,8 +607,8 @@ static inline void tc_device_commit(struct tc_device *d) { RRDSET_TYPE_LINE); rrdlabels_add(d->st_tokens->rrdlabels, "device", string2str(d->id), RRDLABEL_SRC_AUTO); - rrdlabels_add(d->st_tokens->rrdlabels, "name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); - rrdlabels_add(d->st_tokens->rrdlabels, "family", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_tokens->rrdlabels, "device_name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_tokens->rrdlabels, "device_group", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); } else { if(unlikely(d->name_updated)) { @@ -618,10 +618,10 @@ static inline void tc_device_commit(struct tc_device *d) { } if(d->name && d->name_updated) - rrdlabels_add(d->st_tokens->rrdlabels, "name", string2str(d->name), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_tokens->rrdlabels, "device_name", string2str(d->name), RRDLABEL_SRC_AUTO); if(d->family && d->family_updated) - rrdlabels_add(d->st_tokens->rrdlabels, "family", string2str(d->family), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_tokens->rrdlabels, "device_group", string2str(d->family), RRDLABEL_SRC_AUTO); // TODO // update the family @@ -672,11 +672,11 @@ static inline void tc_device_commit(struct tc_device *d) { RRDSET_TYPE_LINE); rrdlabels_add(d->st_ctokens->rrdlabels, "device", string2str(d->id), RRDLABEL_SRC_AUTO); - rrdlabels_add(d->st_ctokens->rrdlabels, "name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); - rrdlabels_add(d->st_ctokens->rrdlabels, "family", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_ctokens->rrdlabels, "device_name", string2str(d->name?d->name:d->id), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_ctokens->rrdlabels, "device_group", string2str(d->family?d->family:d->id), RRDLABEL_SRC_AUTO); } else { - debug(D_TC_LOOP, "TC: Updating _ctokens chart for device '%s'", string2str(d->name?d->name:d->id)); + netdata_log_debug(D_TC_LOOP, "TC: Updating _ctokens chart for device '%s'", string2str(d->name?d->name:d->id)); if(unlikely(d->name_updated)) { char name[RRD_ID_LENGTH_MAX + 1]; @@ -685,10 +685,10 @@ static inline void tc_device_commit(struct tc_device *d) { } if(d->name && d->name_updated) - rrdlabels_add(d->st_ctokens->rrdlabels, "name", string2str(d->name), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_ctokens->rrdlabels, "device_name", string2str(d->name), RRDLABEL_SRC_AUTO); if(d->family && d->family_updated) - rrdlabels_add(d->st_ctokens->rrdlabels, "family", string2str(d->family), RRDLABEL_SRC_AUTO); + rrdlabels_add(d->st_ctokens->rrdlabels, "device_group", string2str(d->family), RRDLABEL_SRC_AUTO); // TODO // update the family @@ -724,7 +724,7 @@ static inline void tc_device_set_class_name(struct tc_device *d, char *id, char } if(likely(name && *name && strcmp(string2str(c->id), name) != 0)) { - debug(D_TC_LOOP, "TC: Setting device '%s', class '%s' name to '%s'", string2str(d->id), id, name); + netdata_log_debug(D_TC_LOOP, "TC: Setting device '%s', class '%s' name to '%s'", string2str(d->id), id, name); c->name = string_strdupz(name); c->name_updated = true; } @@ -741,7 +741,7 @@ static inline void tc_device_set_device_name(struct tc_device *d, char *name) { } if(likely(name && *name && strcmp(string2str(d->id), name) != 0)) { - debug(D_TC_LOOP, "TC: Setting device '%s' name to '%s'", string2str(d->id), name); + netdata_log_debug(D_TC_LOOP, "TC: Setting device '%s' name to '%s'", string2str(d->id), name); d->name = string_strdupz(name); d->name_updated = true; } @@ -752,7 +752,7 @@ static inline void tc_device_set_device_family(struct tc_device *d, char *family d->family = NULL; if(likely(family && *family && strcmp(string2str(d->id), family) != 0)) { - debug(D_TC_LOOP, "TC: Setting device '%s' family to '%s'", string2str(d->id), family); + netdata_log_debug(D_TC_LOOP, "TC: Setting device '%s' family to '%s'", string2str(d->id), family); d->family = string_strdupz(family); d->family_updated = true; } @@ -763,7 +763,7 @@ static inline struct tc_device *tc_device_create(char *id) { struct tc_device *d = tc_device_index_find(id); if(!d) { - debug(D_TC_LOOP, "TC: Creating device '%s'", id); + netdata_log_debug(D_TC_LOOP, "TC: Creating device '%s'", id); struct tc_device tmp = { .id = string_strdupz(id), @@ -779,7 +779,7 @@ static inline struct tc_class *tc_class_add(struct tc_device *n, char *id, bool struct tc_class *c = tc_class_index_find(n, id); if(!c) { - debug(D_TC_LOOP, "TC: Creating in device '%s', class id '%s', parentid '%s', leafid '%s'", + netdata_log_debug(D_TC_LOOP, "TC: Creating in device '%s', class id '%s', parentid '%s', leafid '%s'", string2str(n->id), id, parentid?parentid:"", leafid?leafid:""); struct tc_class tmp = { @@ -936,7 +936,7 @@ void *tc_main(void *ptr) { struct tc_class *class = NULL; snprintfz(command, TC_LINE_MAX, "exec %s %d", tc_script, localhost->rrd_update_every); - debug(D_TC_LOOP, "executing '%s'", command); + netdata_log_debug(D_TC_LOOP, "executing '%s'", command); fp_child_output = netdata_popen(command, (pid_t *)&tc_child_pid, &fp_child_input); if(unlikely(!fp_child_output)) { @@ -949,23 +949,23 @@ void *tc_main(void *ptr) { if(unlikely(!service_running(SERVICE_COLLECTORS))) break; buffer[TC_LINE_MAX] = '\0'; - // debug(D_TC_LOOP, "TC: read '%s'", buffer); + // netdata_log_debug(D_TC_LOOP, "TC: read '%s'", buffer); tc_split_words(buffer, words, PLUGINSD_MAX_WORDS); if(unlikely(!words[0] || !*words[0])) { - // debug(D_TC_LOOP, "empty line"); + // netdata_log_debug(D_TC_LOOP, "empty line"); worker_is_idle(); continue; } - // else debug(D_TC_LOOP, "First word is '%s'", words[0]); + // else netdata_log_debug(D_TC_LOOP, "First word is '%s'", words[0]); first_hash = simple_hash(words[0]); if(unlikely(device && ((first_hash == CLASS_HASH && strcmp(words[0], "class") == 0) || (first_hash == QDISC_HASH && strcmp(words[0], "qdisc") == 0)))) { worker_is_busy(WORKER_TC_CLASS); - // debug(D_TC_LOOP, "CLASS line on class id='%s', parent='%s', parentid='%s', leaf='%s', leafid='%s'", words[2], words[3], words[4], words[5], words[6]); + // netdata_log_debug(D_TC_LOOP, "CLASS line on class id='%s', parent='%s', parentid='%s', leaf='%s', leafid='%s'", words[2], words[3], words[4], words[5], words[6]); char *type = words[1]; // the class/qdisc type: htb, fq_codel, etc char *id = words[2]; // the class/qdisc major:minor @@ -1033,7 +1033,7 @@ void *tc_main(void *ptr) { else if(unlikely(first_hash == END_HASH && strcmp(words[0], "END") == 0)) { worker_is_busy(WORKER_TC_END); - // debug(D_TC_LOOP, "END line"); + // netdata_log_debug(D_TC_LOOP, "END line"); if(likely(device)) { netdata_thread_disable_cancelability(); @@ -1048,7 +1048,7 @@ void *tc_main(void *ptr) { else if(unlikely(first_hash == BEGIN_HASH && strcmp(words[0], "BEGIN") == 0)) { worker_is_busy(WORKER_TC_BEGIN); - // debug(D_TC_LOOP, "BEGIN line on device '%s'", words[1]); + // netdata_log_debug(D_TC_LOOP, "BEGIN line on device '%s'", words[1]); if(likely(words[1] && *words[1])) { device = tc_device_create(words[1]); @@ -1063,7 +1063,7 @@ void *tc_main(void *ptr) { else if(unlikely(device && class && first_hash == SENT_HASH && strcmp(words[0], "Sent") == 0)) { worker_is_busy(WORKER_TC_SENT); - // debug(D_TC_LOOP, "SENT line '%s'", words[1]); + // netdata_log_debug(D_TC_LOOP, "SENT line '%s'", words[1]); if(likely(words[1] && *words[1])) { class->bytes = str2ull(words[1], NULL); class->updated = true; @@ -1087,7 +1087,7 @@ void *tc_main(void *ptr) { else if(unlikely(device && class && class->updated && first_hash == LENDED_HASH && strcmp(words[0], "lended:") == 0)) { worker_is_busy(WORKER_TC_LENDED); - // debug(D_TC_LOOP, "LENDED line '%s'", words[1]); + // netdata_log_debug(D_TC_LOOP, "LENDED line '%s'", words[1]); //if(likely(words[1] && *words[1])) // class->lended = str2ull(words[1]); @@ -1100,7 +1100,7 @@ void *tc_main(void *ptr) { else if(unlikely(device && class && class->updated && first_hash == TOKENS_HASH && strcmp(words[0], "tokens:") == 0)) { worker_is_busy(WORKER_TC_TOKENS); - // debug(D_TC_LOOP, "TOKENS line '%s'", words[1]); + // netdata_log_debug(D_TC_LOOP, "TOKENS line '%s'", words[1]); if(likely(words[1] && *words[1])) class->tokens = str2ull(words[1], NULL); @@ -1110,21 +1110,21 @@ void *tc_main(void *ptr) { else if(unlikely(device && first_hash == SETDEVICENAME_HASH && strcmp(words[0], "SETDEVICENAME") == 0)) { worker_is_busy(WORKER_TC_SETDEVICENAME); - // debug(D_TC_LOOP, "SETDEVICENAME line '%s'", words[1]); + // netdata_log_debug(D_TC_LOOP, "SETDEVICENAME line '%s'", words[1]); if(likely(words[1] && *words[1])) tc_device_set_device_name(device, words[1]); } else if(unlikely(device && first_hash == SETDEVICEGROUP_HASH && strcmp(words[0], "SETDEVICEGROUP") == 0)) { worker_is_busy(WORKER_TC_SETDEVICEGROUP); - // debug(D_TC_LOOP, "SETDEVICEGROUP line '%s'", words[1]); + // netdata_log_debug(D_TC_LOOP, "SETDEVICEGROUP line '%s'", words[1]); if(likely(words[1] && *words[1])) tc_device_set_device_family(device, words[1]); } else if(unlikely(device && first_hash == SETCLASSNAME_HASH && strcmp(words[0], "SETCLASSNAME") == 0)) { worker_is_busy(WORKER_TC_SETCLASSNAME); - // debug(D_TC_LOOP, "SETCLASSNAME line '%s' '%s'", words[1], words[2]); + // netdata_log_debug(D_TC_LOOP, "SETCLASSNAME line '%s' '%s'", words[1], words[2]); char *id = words[1]; char *path = words[2]; if(likely(id && *id && path && *path)) @@ -1147,7 +1147,7 @@ void *tc_main(void *ptr) { worker_set_metric(WORKER_TC_CLASSES, number_of_classes); } //else { - // debug(D_TC_LOOP, "IGNORED line"); + // netdata_log_debug(D_TC_LOOP, "IGNORED line"); //} worker_is_idle(); diff --git a/collectors/timex.plugin/metadata.yaml b/collectors/timex.plugin/metadata.yaml new file mode 100644 index 00000000..27a54575 --- /dev/null +++ b/collectors/timex.plugin/metadata.yaml @@ -0,0 +1,90 @@ +meta: + plugin_name: timex.plugin + module_name: timex.plugin + monitored_instance: + name: Timex + link: '' + categories: + - data-collection.system-clock-and-ntp + icon_filename: 'syslog.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Examine Timex metrics to gain insights into system clock operations. Study time sync status, clock drift, and adjustments to ensure accurate system timekeeping.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: +- name: system_clock_sync_state + link: https://github.com/netdata/netdata/blob/master/health/health.d/timex.conf + metric: system.clock_sync_state + info: when set to 0, the system kernel believes the system clock is not properly synchronized to a reliable server + os: "linux" +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: system.clock_sync_state + description: System Clock Synchronization State + unit: "state" + chart_type: line + dimensions: + - name: state + - name: system.clock_status + description: System Clock Status + unit: "status" + chart_type: line + dimensions: + - name: unsync + - name: clockerr + - name: system.clock_sync_offset + description: Computed Time Offset Between Local System and Reference Clock + unit: "milliseconds" + chart_type: line + dimensions: + - name: offset diff --git a/collectors/timex.plugin/plugin_timex.c b/collectors/timex.plugin/plugin_timex.c index 84147c85..025b699a 100644 --- a/collectors/timex.plugin/plugin_timex.c +++ b/collectors/timex.plugin/plugin_timex.c @@ -37,7 +37,7 @@ static void timex_main_cleanup(void *ptr) struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; - info("cleaning up..."); + netdata_log_info("cleaning up..."); static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; } @@ -57,7 +57,7 @@ void *timex_main(void *ptr) int do_offset = config_get_boolean(CONFIG_SECTION_TIMEX, "time offset", CONFIG_BOOLEAN_YES); if (unlikely(do_sync == CONFIG_BOOLEAN_NO && do_offset == CONFIG_BOOLEAN_NO)) { - info("No charts to show"); + netdata_log_info("No charts to show"); goto exit; } @@ -79,7 +79,7 @@ void *timex_main(void *ptr) prev_sync_state = sync_state; if (non_seq_failure) { - error("Cannot get clock synchronization state"); + netdata_log_error("Cannot get clock synchronization state"); continue; } diff --git a/collectors/xenstat.plugin/metadata.yaml b/collectors/xenstat.plugin/metadata.yaml new file mode 100644 index 00000000..610435a3 --- /dev/null +++ b/collectors/xenstat.plugin/metadata.yaml @@ -0,0 +1,181 @@ +meta: + plugin_name: xenstat.plugin + module_name: xenstat.plugin + monitored_instance: + name: Xen/XCP-ng + link: '' + categories: + - data-collection.containers-and-vms + icon_filename: 'xen.png' + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: '' + keywords: [] + most_popular: false +overview: + data_collection: + metrics_description: 'Monitor Xen/XCP-ng with Netdata for streamlined virtual machine performance and resource management. Optimize virtualization operations with real-time insights, built-in alerts, and anomaly advisor.' + method_description: '' + supported_platforms: + include: [] + exclude: [] + multi-instance: true + additional_permissions: + description: '' + default_behavior: + auto_detection: + description: '' + limits: + description: '' + performance_impact: + description: '' +setup: + prerequisites: + list: [] + configuration: + file: + name: '' + description: '' + options: + description: '' + folding: + title: '' + enabled: true + list: [] + examples: + folding: + enabled: true + title: '' + list: [] +troubleshooting: + problems: + list: [] +alerts: [] +metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "" + labels: [] + metrics: + - name: xenstat.mem + description: Memory Usage + unit: "MiB" + chart_type: stacked + dimensions: + - name: free + - name: used + - name: xenstat.domains + description: Number of Domains + unit: "domains" + chart_type: line + dimensions: + - name: domains + - name: xenstat.cpus + description: Number of CPUs + unit: "cpus" + chart_type: line + dimensions: + - name: cpus + - name: xenstat.cpu_freq + description: CPU Frequency + unit: "MHz" + chart_type: line + dimensions: + - name: frequency + - name: xendomain + description: "" + labels: [] + metrics: + - name: xendomain.states + description: Domain States + unit: "boolean" + chart_type: line + dimensions: + - name: running + - name: blocked + - name: paused + - name: shutdown + - name: crashed + - name: dying + - name: xendomain.cpu + description: CPU Usage (100% = 1 core) + unit: "percentage" + chart_type: line + dimensions: + - name: used + - name: xendomain.mem + description: Memory Reservation + unit: "MiB" + chart_type: line + dimensions: + - name: maximum + - name: current + - name: xendomain.vcpu + description: CPU Usage per VCPU + unit: "percentage" + chart_type: line + dimensions: + - name: a dimension per vcpu + - name: xendomain, vbd + description: "" + labels: [] + metrics: + - name: xendomain.oo_req_vbd + description: VBD{%u} Out Of Requests + unit: "requests/s" + chart_type: line + dimensions: + - name: requests + - name: xendomain.requests_vbd + description: VBD{%u} Requests + unit: "requests/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: xendomain.sectors_vbd + description: VBD{%u} Read/Written Sectors + unit: "sectors/s" + chart_type: line + dimensions: + - name: read + - name: write + - name: xendomain, network + description: "" + labels: [] + metrics: + - name: xendomain.bytes_network + description: Network{%u} Received/Sent Bytes + unit: "kilobits/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: xendomain.packets_network + description: Network{%u} Received/Sent Packets + unit: "packets/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: xendomain.errors_network + description: Network{%u} Receive/Transmit Errors + unit: "errors/s" + chart_type: line + dimensions: + - name: received + - name: sent + - name: xendomain.drops_network + description: Network{%u} Receive/Transmit Drops + unit: "drops/s" + chart_type: line + dimensions: + - name: received + - name: sent diff --git a/collectors/xenstat.plugin/xenstat_plugin.c b/collectors/xenstat.plugin/xenstat_plugin.c index b0cfa0b2..acd07260 100644 --- a/collectors/xenstat.plugin/xenstat_plugin.c +++ b/collectors/xenstat.plugin/xenstat_plugin.c @@ -178,7 +178,7 @@ static struct domain_metrics *domain_metrics_free(struct domain_metrics *d) { } if(unlikely(!cur)) { - error("XENSTAT: failed to free domain metrics."); + netdata_log_error("XENSTAT: failed to free domain metrics."); return NULL; } @@ -242,7 +242,7 @@ static int vcpu_metrics_collect(struct domain_metrics *d, xenstat_domain *domain vcpu = xenstat_domain_vcpu(domain, i); if(unlikely(!vcpu)) { - error("XENSTAT: cannot get VCPU statistics."); + netdata_log_error("XENSTAT: cannot get VCPU statistics."); return 1; } @@ -288,7 +288,7 @@ static int vbd_metrics_collect(struct domain_metrics *d, xenstat_domain *domain) vbd = xenstat_domain_vbd(domain, i); if(unlikely(!vbd)) { - error("XENSTAT: cannot get VBD statistics."); + netdata_log_error("XENSTAT: cannot get VBD statistics."); return 1; } @@ -336,7 +336,7 @@ static int network_metrics_collect(struct domain_metrics *d, xenstat_domain *dom network = xenstat_domain_network(domain, i); if(unlikely(!network)) { - error("XENSTAT: cannot get network statistics."); + netdata_log_error("XENSTAT: cannot get network statistics."); return 1; } @@ -368,7 +368,7 @@ static int xenstat_collect(xenstat_handle *xhandle, libxl_ctx *ctx, libxl_dominf xenstat_node *node = xenstat_get_node(xhandle, XENSTAT_ALL); if (unlikely(!node)) { - error("XENSTAT: failed to retrieve statistics from libxenstat."); + netdata_log_error("XENSTAT: failed to retrieve statistics from libxenstat."); return 1; } @@ -388,7 +388,7 @@ static int xenstat_collect(xenstat_handle *xhandle, libxl_ctx *ctx, libxl_dominf // get domain UUID unsigned int id = xenstat_domain_id(domain); if(unlikely(libxl_domain_info(ctx, info, id))) { - error("XENSTAT: cannot get domain info."); + netdata_log_error("XENSTAT: cannot get domain info."); } else { snprintfz(uuid, LIBXL_UUID_FMTLEN, LIBXL_UUID_FMT "\n", LIBXL_UUID_BYTES(info->uuid)); @@ -989,7 +989,7 @@ int main(int argc, char **argv) { exit(1); } - error("xenstat.plugin: ignoring parameter '%s'", argv[i]); + netdata_log_error("xenstat.plugin: ignoring parameter '%s'", argv[i]); } errno = 0; @@ -997,7 +997,7 @@ int main(int argc, char **argv) { if(freq >= netdata_update_every) netdata_update_every = freq; else if(freq) - error("update frequency %d seconds is too small for XENSTAT. Using %d.", freq, netdata_update_every); + netdata_log_error("update frequency %d seconds is too small for XENSTAT. Using %d.", freq, netdata_update_every); // ------------------------------------------------------------------------ // initialize xen API handles @@ -1008,13 +1008,13 @@ int main(int argc, char **argv) { if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: calling xenstat_init()\n"); xhandle = xenstat_init(); if (xhandle == NULL) { - error("XENSTAT: failed to initialize xenstat library."); + netdata_log_error("XENSTAT: failed to initialize xenstat library."); return 1; } if(unlikely(debug)) fprintf(stderr, "xenstat.plugin: calling libxl_ctx_alloc()\n"); if (libxl_ctx_alloc(&ctx, LIBXL_VERSION, 0, NULL)) { - error("XENSTAT: failed to initialize xl context."); + netdata_log_error("XENSTAT: failed to initialize xl context."); xenstat_uninit(xhandle); return 1; } @@ -1066,7 +1066,7 @@ int main(int argc, char **argv) { libxl_ctx_free(ctx); xenstat_uninit(xhandle); - info("XENSTAT process exiting"); + netdata_log_info("XENSTAT process exiting"); return 0; } |