diff options
Diffstat (limited to '')
-rw-r--r-- | collectors/apps.plugin/apps_plugin.c (renamed from src/apps_plugin.c) | 712 |
1 files changed, 436 insertions, 276 deletions
diff --git a/src/apps_plugin.c b/collectors/apps.plugin/apps_plugin.c index 8595da6c2..f592e9fc8 100644 --- a/src/apps_plugin.c +++ b/collectors/apps.plugin/apps_plugin.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-3.0-or-later /* * netdata apps.plugin @@ -5,7 +6,61 @@ * Released under GPL v3+ */ -#include "common.h" +#include "../../libnetdata/libnetdata.h" + +// ---------------------------------------------------------------------------- + +// callback required by fatal() +void netdata_cleanup_and_exit(int ret) { + exit(ret); +} + +// callbacks required by popen() +void signals_block(void) {}; +void signals_unblock(void) {}; +void signals_reset(void) {}; + +// callback required by eval() +int health_variable_lookup(const char *variable, uint32_t hash, struct rrdcalc *rc, calculated_number *result) { + (void)variable; + (void)hash; + (void)rc; + (void)result; + return 0; +}; + +// required by get_system_cpus() +char *netdata_configured_host_prefix = ""; + + +// ---------------------------------------------------------------------------- +// debugging + +static int debug_enabled = 0; +static inline void debug_log_int(const char *fmt, ... ) { + va_list args; + + fprintf( stderr, "apps.plugin: "); + va_start( args, fmt ); + vfprintf( stderr, fmt, args ); + va_end( args ); + + fputc('\n', stderr); +} + +#ifdef NETDATA_INTERNAL_CHECKS + +#define debug_log(fmt, args...) do { if(unlikely(debug_enabled)) debug_log_int(fmt, ##args); } while(0) + +#else + +static inline void debug_log_dummy(void) {} +#define debug_log(fmt, args...) debug_log_dummy() + +#endif + + +// ---------------------------------------------------------------------------- #ifdef __FreeBSD__ #include <sys/user.h> @@ -57,21 +112,21 @@ // command line options static int - debug = 0, update_every = 1, enable_guest_charts = 0, #ifdef __FreeBSD__ enable_file_charts = 0, #else enable_file_charts = 1, + max_fds_cache_seconds = 60, #endif enable_users_charts = 1, enable_groups_charts = 1, include_exited_childs = 1; - -// will be changed to getenv(NETDATA_CONFIG_DIR) if it exists -static char *config_dir = CONFIG_DIR; +// will be changed to getenv(NETDATA_USER_CONFIG_DIR) if it exists +static char *user_config_dir = CONFIG_DIR; +static char *stock_config_dir = LIBCONFIG_DIR; // ---------------------------------------------------------------------------- // internal flags @@ -90,6 +145,9 @@ static size_t global_iterations_counter = 1, calls_counter = 0, file_counter = 0, + filenames_allocated_counter = 0, + inodes_changed_counter = 0, + links_changed_counter = 0, targets_assignment_counter = 0; @@ -193,7 +251,7 @@ struct target { unsigned int processes; // how many processes have been merged to this int exposed; // if set, we have sent this to netdata int hidden; // if set, we set the hidden flag on the dimension - int debug; + int debug_enabled; int ends_with; int starts_with; // if set, the compare string matches only the // beginning of the command @@ -218,6 +276,18 @@ size_t // structure to store data for each process running // see: man proc for the description of the fields +struct pid_fd { + int fd; + +#ifndef __FreeBSD__ + ino_t inode; + char *filename; + uint32_t link_hash; + size_t cache_iterations_counter; + size_t cache_iterations_reset; +#endif +}; + struct pid_stat { int32_t pid; char comm[MAX_COMPARE_NAME + 1]; @@ -312,15 +382,15 @@ struct pid_stat { kernel_uint_t io_storage_bytes_written; // kernel_uint_t io_cancelled_write_bytes; - int *fds; // array of fds it uses - int fds_size; // the size of the fds array + struct pid_fd *fds; // array of fds it uses + size_t fds_size; // the size of the fds array int children_count; // number of processes directly referencing this - char keep:1; // 1 when we need to keep this process in memory even after it exited + unsigned char keep:1; // 1 when we need to keep this process in memory even after it exited int keeploops; // increases by 1 every time keep is 1 and updated 0 - char updated:1; // 1 when the process is currently running - char merged:1; // 1 when it has been merged to its parent - char read:1; // 1 when we have already read this process for this iteration + unsigned char updated:1; // 1 when the process is currently running + unsigned char merged:1; // 1 when it has been merged to its parent + unsigned char read:1; // 1 when we have already read this process for this iteration int sortlist; // higher numbers = top on the process tree // each process gets a unique number @@ -416,13 +486,6 @@ static int all_files_size = 0; // ---------------------------------------------------------------------------- -// callback required by fatal() - -void netdata_cleanup_and_exit(int ret) { - exit(ret); -} - -// ---------------------------------------------------------------------------- // apps_groups.conf // aggregate all processes in groups, to have a limited number of dimensions @@ -452,8 +515,7 @@ static struct target *get_users_target(uid_t uid) { w->next = users_root_target; users_root_target = w; - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: added uid %u ('%s') target\n", w->uid, w->name); + debug_log("added uid %u ('%s') target", w->uid, w->name); return w; } @@ -485,8 +547,7 @@ struct target *get_groups_target(gid_t gid) w->next = groups_root_target; groups_root_target = w; - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: added gid %u ('%s') target\n", w->gid, w->name); + debug_log("added gid %u ('%s') target", w->gid, w->name); return w; } @@ -527,11 +588,11 @@ static struct target *get_apps_groups_target(const char *id, struct target *targ break; } - if(unlikely(debug)) { + if(unlikely(debug_enabled)) { if(unlikely(target)) - fprintf(stderr, "apps.plugin: REUSING TARGET NAME '%s' on ID '%s'\n", target->name, target->id); + debug_log("REUSING TARGET NAME '%s' on ID '%s'", target->name, target->id); else - fprintf(stderr, "apps.plugin: NEW TARGET NAME '%s' on ID '%s'\n", name, id); + debug_log("NEW TARGET NAME '%s' on ID '%s'", name, id); } } @@ -564,34 +625,37 @@ static struct target *get_apps_groups_target(const char *id, struct target *targ w->comparelen = strlen(w->compare); w->hidden = thidden; - w->debug = tdebug; +#ifdef NETDATA_INTERNAL_CHECKS + w->debug_enabled = tdebug; +#else + if(tdebug) + fprintf(stderr, "apps.plugin has been compiled without debugging\n"); +#endif w->target = target; // append it, to maintain the order in apps_groups.conf if(last) last->next = w; else apps_groups_root_target = w; - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s\n" - , w->id - , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact")) - , w->target?w->target->name:w->name - , (w->hidden)?"hidden":"-" - , (w->debug)?"debug":"-" - ); + debug_log("ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s" + , w->id + , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact")) + , w->target?w->target->name:w->name + , (w->hidden)?"hidden":"-" + , (w->debug_enabled)?"debug":"-" + ); return w; } // read the apps_groups.conf file -static int read_apps_groups_conf(const char *file) +static int read_apps_groups_conf(const char *path, const char *file) { char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/apps_%s.conf", config_dir, file); + snprintfz(filename, FILENAME_MAX, "%s/apps_%s.conf", path, file); - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: process groups file: '%s'\n", filename); + debug_log("process groups file: '%s'", filename); // ---------------------------------------- @@ -654,14 +718,16 @@ static int read_apps_groups_conf(const char *file) // ---------------------------------------------------------------------------- // struct pid_stat management +static inline void init_pid_fds(struct pid_stat *p, size_t first, size_t size); static inline struct pid_stat *get_pid_entry(pid_t pid) { if(unlikely(all_pids[pid])) return all_pids[pid]; struct pid_stat *p = callocz(sizeof(struct pid_stat), 1); - p->fds = callocz(sizeof(int), MAX_SPARE_FDS); + p->fds = mallocz(sizeof(struct pid_fd) * MAX_SPARE_FDS); p->fds_size = MAX_SPARE_FDS; + init_pid_fds(p, 0, p->fds_size); if(likely(root_of_pids)) root_of_pids->prev = p; @@ -685,8 +751,7 @@ static inline void del_pid_entry(pid_t pid) { return; } - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: process %d %s exited, deleting it.\n", pid, p->comm); + debug_log("process %d %s exited, deleting it.", pid, p->comm); if(root_of_pids == p) root_of_pids = p->next; @@ -694,7 +759,17 @@ static inline void del_pid_entry(pid_t pid) { if(p->next) p->next->prev = p->prev; if(p->prev) p->prev->next = p->next; + // free the filename +#ifndef __FreeBSD__ + { + size_t i; + for(i = 0; i < p->fds_size; i++) + if(p->fds[i].filename) + freez(p->fds[i].filename); + } +#endif freez(p->fds); + freez(p->fds_dirname); freez(p->stat_filename); freez(p->status_filename); @@ -716,8 +791,8 @@ static inline int managed_log(struct pid_stat *p, uint32_t log, int status) { if(unlikely(!status)) { // error("command failed log %u, errno %d", log, errno); - if(unlikely(debug || errno != ENOENT)) { - if(unlikely(debug || !(p->log_thrown & log))) { + if(unlikely(debug_enabled || errno != ENOENT)) { + if(unlikely(debug_enabled || !(p->log_thrown & log))) { p->log_thrown |= log; switch(log) { case PID_LOG_IO: @@ -779,7 +854,7 @@ static inline void assign_target_to_pid(struct pid_stat *p) { struct target *w; for(w = apps_groups_root_target; w ; w = w->next) { - // if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \t\tcomparing '%s' with '%s'\n", w->compare, p->comm); + // if(debug_enabled || (p->target && p->target->debug_enabled)) debug_log_int("\t\tcomparing '%s' with '%s'", w->compare, p->comm); // find it - 4 cases: // 1. the target is not a pattern @@ -796,8 +871,8 @@ static inline void assign_target_to_pid(struct pid_stat *p) { if(w->target) p->target = w->target; else p->target = w; - if(debug || (p->target && p->target->debug)) - fprintf(stderr, "apps.plugin: \t\t%s linked to target %s\n", p->comm, p->target->name); + if(debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int("%s linked to target %s", p->comm, p->target->name); break; } @@ -828,7 +903,7 @@ static inline int read_proc_pid_cmdline(struct pid_stat *p) { p->cmdline_filename = strdupz(filename); } - int fd = open(p->cmdline_filename, O_RDONLY, 0666); + int fd = open(p->cmdline_filename, procfile_open_flags, 0666); if(unlikely(fd == -1)) goto cleanup; ssize_t i, bytes = read(fd, cmdline, MAX_CMDLINE); @@ -838,18 +913,20 @@ static inline int read_proc_pid_cmdline(struct pid_stat *p) { #endif cmdline[bytes] = '\0'; - for(i = 0; i < bytes ; i++) + for(i = 0; i < bytes ; i++) { if(unlikely(!cmdline[i])) cmdline[i] = ' '; + } + if(p->cmdline) freez(p->cmdline); p->cmdline = strdupz(cmdline); - if(unlikely(debug)) - fprintf(stderr, "Read file '%s' contents: %s\n", p->cmdline_filename, p->cmdline); + debug_log("Read file '%s' contents: %s", p->cmdline_filename, p->cmdline); return 1; cleanup: // copy the command to the command line + if(p->cmdline) freez(p->cmdline); p->cmdline = strdupz(p->comm); return 0; } @@ -963,7 +1040,7 @@ static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) { p->gid = proc_info->ki_groups[0]; p->status_vmsize = proc_info->ki_size / 1024; // in kB p->status_vmrss = proc_info->ki_rssize * pagesize / 1024; // in kB - // FIXME: what about shared and swap memory on FreeBSD? + // TODO: what about shared and swap memory on FreeBSD? return 1; #else (void)ptr; @@ -1004,7 +1081,7 @@ static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) { arl_begin(p->status_arl); for(l = 0; l < lines ;l++) { - // fprintf(stderr, "CHECK: line %zu of %zu, key '%s' = '%s'\n", l, lines, procfile_lineword(ff, l, 0), procfile_lineword(ff, l, 1)); + // debug_log("CHECK: line %zu of %zu, key '%s' = '%s'", l, lines, procfile_lineword(ff, l, 0), procfile_lineword(ff, l, 1)); arl_ptr.line = l; if(unlikely(arl_check(p->status_arl, procfile_lineword(ff, l, 0), @@ -1013,7 +1090,7 @@ static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) { p->status_vmshared = p->status_rssfile + p->status_rssshmem; - // fprintf(stderr, "%s uid %d, gid %d, VmSize %zu, VmRSS %zu, RssFile %zu, RssShmem %zu, shared %zu\n", p->comm, (int)p->uid, (int)p->gid, p->status_vmsize, p->status_vmrss, p->status_rssfile, p->status_rssshmem, p->status_vmshared); + // debug_log("%s uid %d, gid %d, VmSize %zu, VmRSS %zu, RssFile %zu, RssShmem %zu, shared %zu", p->comm, (int)p->uid, (int)p->gid, p->status_vmsize, p->status_vmrss, p->status_rssfile, p->status_rssshmem, p->status_vmshared); return 1; #endif @@ -1072,11 +1149,11 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) { #endif if(strcmp(p->comm, comm) != 0) { - if(unlikely(debug)) { + if(unlikely(debug_enabled)) { if(p->comm[0]) - fprintf(stderr, "apps.plugin: \tpid %d (%s) changed name to '%s'\n", p->pid, p->comm, comm); + debug_log("\tpid %d (%s) changed name to '%s'", p->pid, p->comm, comm); else - fprintf(stderr, "apps.plugin: \tJust added %d (%s)\n", p->pid, comm); + debug_log("\tJust added %d (%s)", p->pid, comm); } strncpyz(p->comm, comm, MAX_COMPARE_NAME); @@ -1152,8 +1229,8 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) { } #endif - if(unlikely(debug || (p->target && p->target->debug))) - fprintf(stderr, "apps.plugin: READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=%d\n", netdata_configured_host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); + if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) + debug_log_int("READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=%d", netdata_configured_host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); if(unlikely(global_iterations_counter == 1)) { p->minflt = 0; @@ -1332,7 +1409,7 @@ int file_descriptor_compare(void* a, void* b) { return strcmp(((struct file_descriptor *)a)->name, ((struct file_descriptor *)b)->name); } -int file_descriptor_iterator(avl *a) { if(a) {}; return 0; } +// int file_descriptor_iterator(avl *a) { if(a) {}; return 0; } avl_tree all_files_index = { NULL, @@ -1368,15 +1445,13 @@ static inline void file_descriptor_not_used(int id) } #endif /* NETDATA_INTERNAL_CHECKS */ - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: decreasing slot %d (count = %d).\n", id, all_files[id].count); + debug_log("decreasing slot %d (count = %d).", id, all_files[id].count); if(all_files[id].count > 0) { all_files[id].count--; if(!all_files[id].count) { - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> slot %d is empty.\n", id); + debug_log(" >> slot %d is empty.", id); if(unlikely(file_descriptor_remove(&all_files[id]) != (void *)&all_files[id])) error("INTERNAL ERROR: removal of unused fd from index, removed a different fd"); @@ -1398,8 +1473,7 @@ static inline void all_files_grow() { int i; // there is no empty slot - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: extending fd array to %d entries\n", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); + debug_log("extending fd array to %d entries", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); all_files = reallocz(all_files, (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP) * sizeof(struct file_descriptor)); @@ -1407,8 +1481,7 @@ static inline void all_files_grow() { // since all pointers are now invalid if(unlikely(old && old != (void *)all_files)) { - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> re-indexing.\n"); + debug_log(" >> re-indexing."); all_files_index.root = NULL; for(i = 0; i < all_files_size; i++) { @@ -1417,8 +1490,7 @@ static inline void all_files_grow() { error("INTERNAL ERROR: duplicate indexing of fd during realloc."); } - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> re-indexing done.\n"); + debug_log(" >> re-indexing done."); } // initialize the newly added entries @@ -1441,8 +1513,7 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h if(!all_files || all_files_len == all_files_size) all_files_grow(); - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> searching for empty slot.\n"); + debug_log(" >> searching for empty slot."); // search for an empty slot @@ -1453,16 +1524,14 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h if(c == 0) continue; if(!all_files[c].count) { - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> Examining slot %d.\n", c); + debug_log(" >> Examining slot %d.", c); #ifdef NETDATA_INTERNAL_CHECKS if(all_files[c].magic == 0x0BADCAFE && all_files[c].name && file_descriptor_find(all_files[c].name, all_files[c].hash)) - error("fd on position %d is not cleared properly. It still has %s in it.\n", c, all_files[c].name); + error("fd on position %d is not cleared properly. It still has %s in it.", c, all_files[c].name); #endif /* NETDATA_INTERNAL_CHECKS */ - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> %s fd position %d for %s (last name: %s)\n", all_files[c].name?"re-using":"using", c, name, all_files[c].name); + debug_log(" >> %s fd position %d for %s (last name: %s)", all_files[c].name?"re-using":"using", c, name, all_files[c].name); freez((void *)all_files[c].name); all_files[c].name = NULL; @@ -1479,8 +1548,7 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h } // else we have an empty slot in 'c' - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> updating slot %d.\n", c); + debug_log(" >> updating slot %d.", c); all_files[c].name = strdupz(name); all_files[c].hash = hash; @@ -1493,24 +1561,21 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h if(unlikely(file_descriptor_add(&all_files[c]) != (void *)&all_files[c])) error("INTERNAL ERROR: duplicate indexing of fd."); - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: using fd position %d (name: %s)\n", c, all_files[c].name); + debug_log("using fd position %d (name: %s)", c, all_files[c].name); return c; } -static inline int file_descriptor_find_or_add(const char *name) -{ - uint32_t hash = simple_hash(name); +static inline int file_descriptor_find_or_add(const char *name, uint32_t hash) { + if(unlikely(!hash)) + hash = simple_hash(name); - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: adding or finding name '%s' with hash %u\n", name, hash); + debug_log("adding or finding name '%s' with hash %u", name, hash); struct file_descriptor *fd = file_descriptor_find(name, hash); if(fd) { // found - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> found on slot %d\n", fd->pos); + debug_log(" >> found on slot %d", fd->pos); fd->count++; return fd->pos; @@ -1530,47 +1595,65 @@ static inline int file_descriptor_find_or_add(const char *name) else if(strcmp(t, "[timerfd]") == 0) type = FILETYPE_TIMERFD; else if(strcmp(t, "[signalfd]") == 0) type = FILETYPE_SIGNALFD; else { - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: FIXME: unknown anonymous inode: %s\n", name); - + debug_log("UNKNOWN anonymous inode: %s", name); type = FILETYPE_OTHER; } } else if(likely(strcmp(name, "inotify") == 0)) type = FILETYPE_INOTIFY; else { - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: FIXME: cannot understand linkname: %s\n", name); - + debug_log("UNKNOWN linkname: %s", name); type = FILETYPE_OTHER; } return file_descriptor_set_on_empty_slot(name, hash, type); } +static inline void clear_pid_fd(struct pid_fd *pfd) { + pfd->fd = 0; + + #ifndef __FreeBSD__ + pfd->link_hash = 0; + pfd->inode = 0; + pfd->cache_iterations_counter = 0; + pfd->cache_iterations_reset = 0; +#endif +} + static inline void make_all_pid_fds_negative(struct pid_stat *p) { - int *fd = p->fds, *end = &p->fds[p->fds_size]; - while(fd < end) { - *fd = -(*fd); - fd++; + struct pid_fd *pfd = p->fds, *pfdend = &p->fds[p->fds_size]; + while(pfd < pfdend) { + pfd->fd = -(pfd->fd); + pfd++; } } static inline void cleanup_negative_pid_fds(struct pid_stat *p) { - int *fd = p->fds, *fdend = &p->fds[p->fds_size]; + struct pid_fd *pfd = p->fds, *pfdend = &p->fds[p->fds_size]; + + while(pfd < pfdend) { + int fd = pfd->fd; - while(fd < fdend) { - if(unlikely(*fd < 0)) { - file_descriptor_not_used(-(*fd)); - *fd++ = 0; + if(unlikely(fd < 0)) { + file_descriptor_not_used(-(fd)); + clear_pid_fd(pfd); } - else - fd++; + + pfd++; } } -static inline void zero_pid_fds(struct pid_stat *p, int first, int size) { - int *fd = &p->fds[first], *end = &p->fds[first + size]; - while(fd < end) *fd++ = 0; +static inline void init_pid_fds(struct pid_stat *p, size_t first, size_t size) { + struct pid_fd *pfd = &p->fds[first], *pfdend = &p->fds[first + size]; + size_t i = first; + + while(pfd < pfdend) { +#ifndef __FreeBSD__ + pfd->filename = NULL; +#endif + clear_pid_fd(pfd); + pfd++; + i++; + } } static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { @@ -1625,17 +1708,16 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { if (unlikely(fdid >= p->fds_size)) { // it is small, extend it - if (unlikely(debug)) - fprintf(stderr, "apps.plugin: extending fd memory slots for %s from %d to %d\n", p->comm, p->fds_size, fdid + MAX_SPARE_FDS); + debug_log("extending fd memory slots for %s from %d to %d", p->comm, p->fds_size, fdid + MAX_SPARE_FDS); - p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(int)); + p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(struct pid_fd)); // and initialize it - zero_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); + init_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); p->fds_size = fdid + MAX_SPARE_FDS; } - if (unlikely(p->fds[fdid] == 0)) { + if (unlikely(p->fds[fdid].fd == 0)) { // we don't know this fd, get it switch (fds->kf_type) { @@ -1691,15 +1773,14 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { // if another process already has this, we will get // the same id - p->fds[fdid] = file_descriptor_find_or_add(fdsname); + p->fds[fdid].fd = file_descriptor_find_or_add(fdsname, 0); } // else make it positive again, we need it - // of course, the actual file may have changed, but we don't care so much - // FIXME: we could compare the inode as returned by readdir dirent structure + // of course, the actual file may have changed else - p->fds[fdid] = -p->fds[fdid]; + p->fds[fdid].fd = -p->fds[fdid].fd; bfdsbuf += fds->kf_structsize; } @@ -1714,7 +1795,6 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { if(unlikely(!fds)) return 0; struct dirent *de; - char fdname[FILENAME_MAX + 1]; char linkname[FILENAME_MAX + 1]; // we make all pid fds negative, so that @@ -1733,53 +1813,103 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { if(unlikely(fdid < 0)) continue; // check if the fds array is small - if(unlikely(fdid >= p->fds_size)) { + if(unlikely((size_t)fdid >= p->fds_size)) { // it is small, extend it - if(unlikely(debug)) - fprintf(stderr - , "apps.plugin: extending fd memory slots for %s from %d to %d\n" - , p->comm - , p->fds_size - , fdid + MAX_SPARE_FDS - ); + debug_log("extending fd memory slots for %s from %d to %d" + , p->comm + , p->fds_size + , fdid + MAX_SPARE_FDS + ); - p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(int)); + p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(struct pid_fd)); // and initialize it - zero_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); - p->fds_size = fdid + MAX_SPARE_FDS; + init_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); + p->fds_size = (size_t)fdid + MAX_SPARE_FDS; } - if(unlikely(p->fds[fdid] == 0)) { - // we don't know this fd, get it + if(unlikely(p->fds[fdid].fd < 0 && de->d_ino != p->fds[fdid].inode)) { + // inodes do not match, clear the previous entry + inodes_changed_counter++; + file_descriptor_not_used(-p->fds[fdid].fd); + clear_pid_fd(&p->fds[fdid]); + } - sprintf(fdname, "%s/proc/%d/fd/%s", netdata_configured_host_prefix, p->pid, de->d_name); - ssize_t l = readlink(fdname, linkname, FILENAME_MAX); - if(unlikely(l == -1)) { - if(debug || (p->target && p->target->debug)) { - if(debug || (p->target && p->target->debug)) - error("Cannot read link %s", fdname); - } - continue; + if(p->fds[fdid].fd < 0 && p->fds[fdid].cache_iterations_counter > 0) { + p->fds[fdid].fd = -p->fds[fdid].fd; + p->fds[fdid].cache_iterations_counter--; + continue; + } + + if(unlikely(!p->fds[fdid].filename)) { + filenames_allocated_counter++; + char fdname[FILENAME_MAX + 1]; + snprintfz(fdname, FILENAME_MAX, "%s/proc/%d/fd/%s", netdata_configured_host_prefix, p->pid, de->d_name); + p->fds[fdid].filename = strdupz(fdname); + } + + file_counter++; + ssize_t l = readlink(p->fds[fdid].filename, linkname, FILENAME_MAX); + if(unlikely(l == -1)) { + // cannot read the link + + if(debug_enabled || (p->target && p->target->debug_enabled)) + error("Cannot read link %s", p->fds[fdid].filename); + + if(unlikely(p->fds[fdid].fd < 0)) { + file_descriptor_not_used(-p->fds[fdid].fd); + clear_pid_fd(&p->fds[fdid]); } - else - linkname[l] = '\0'; - file_counter++; + continue; + } + else + linkname[l] = '\0'; + + uint32_t link_hash = simple_hash(linkname); + + if(unlikely(p->fds[fdid].fd < 0 && p->fds[fdid].link_hash != link_hash)) { + // the link changed + links_changed_counter++; + file_descriptor_not_used(-p->fds[fdid].fd); + clear_pid_fd(&p->fds[fdid]); + } + + if(unlikely(p->fds[fdid].fd == 0)) { + // we don't know this fd, get it // if another process already has this, we will get // the same id - p->fds[fdid] = file_descriptor_find_or_add(linkname); + p->fds[fdid].fd = file_descriptor_find_or_add(linkname, link_hash); + p->fds[fdid].inode = de->d_ino; + p->fds[fdid].link_hash = link_hash; } - + else { // else make it positive again, we need it - // of course, the actual file may have changed, but we don't care so much - // FIXME: we could compare the inode as returned by readdir dirent structure - // UPDATE: no we cannot use inodes - under /proc inodes don't change when the link is changed + p->fds[fdid].fd = -p->fds[fdid].fd; + } - else - p->fds[fdid] = -p->fds[fdid]; + // caching control + // without this we read all the files on every iteration + if(max_fds_cache_seconds > 0) { + size_t spread = ((size_t)max_fds_cache_seconds > 10) ? 10 : (size_t)max_fds_cache_seconds; + + // cache it for a few iterations + size_t max = ((size_t) max_fds_cache_seconds + (fdid % spread)) / (size_t) update_every; + p->fds[fdid].cache_iterations_reset++; + + if(unlikely(p->fds[fdid].cache_iterations_reset % spread == (size_t) fdid % spread)) + p->fds[fdid].cache_iterations_reset++; + + if(unlikely((fdid <= 2 && p->fds[fdid].cache_iterations_reset > 5) || + p->fds[fdid].cache_iterations_reset > max)) { + // for stdin, stdout, stderr (fdid <= 2) we have checked a few times, or if it goes above the max, goto max + p->fds[fdid].cache_iterations_reset = max; + } + + p->fds[fdid].cache_iterations_counter = p->fds[fdid].cache_iterations_reset; + } } closedir(fds); @@ -1791,12 +1921,12 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { // ---------------------------------------------------------------------------- -static inline int print_process_and_parents(struct pid_stat *p, usec_t time) { +static inline int debug_print_process_and_parents(struct pid_stat *p, usec_t time) { char *prefix = "\\_ "; int indent = 0; if(p->parent) - indent = print_process_and_parents(p->parent, p->stat_collected_usec); + indent = debug_print_process_and_parents(p->parent, p->stat_collected_usec); else prefix = " > "; @@ -1830,12 +1960,12 @@ static inline int print_process_and_parents(struct pid_stat *p, usec_t time) { return indent + 1; } -static inline void print_process_tree(struct pid_stat *p, char *msg) { - fprintf(stderr, "%s: process %s (%d, %s) with parents:\n", msg, p->comm, p->pid, p->updated?"running":"exited"); - print_process_and_parents(p, p->stat_collected_usec); +static inline void debug_print_process_tree(struct pid_stat *p, char *msg) { + debug_log("%s: process %s (%d, %s) with parents:", msg, p->comm, p->pid, p->updated?"running":"exited"); + debug_print_process_and_parents(p, p->stat_collected_usec); } -static inline void find_lost_child_debug(struct pid_stat *pe, kernel_uint_t lost, int type) { +static inline void debug_find_lost_child(struct pid_stat *pe, kernel_uint_t lost, int type) { int found = 0; struct pid_stat *p = NULL; @@ -1938,8 +2068,8 @@ static inline void process_exited_processes() { if(utime + stime + gtime + minflt + majflt == 0) continue; - if(unlikely(debug)) { - fprintf(stderr, "Absorb %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")\n" + if(unlikely(debug_enabled)) { + debug_log("Absorb %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")" , p->comm , p->pid , p->updated?"running":"exited" @@ -1949,7 +2079,7 @@ static inline void process_exited_processes() { , minflt , majflt ); - print_process_tree(p, "Searching parents"); + debug_print_process_tree(p, "Searching parents"); } struct pid_stat *pp; @@ -1958,59 +2088,57 @@ static inline void process_exited_processes() { kernel_uint_t absorbed; absorbed = remove_exited_child_from_parent(&utime, &pp->cutime); - if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " utime (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, utime); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " utime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, utime); absorbed = remove_exited_child_from_parent(&stime, &pp->cstime); - if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " stime (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, stime); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " stime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, stime); absorbed = remove_exited_child_from_parent(>ime, &pp->cgtime); - if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " gtime (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, gtime); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " gtime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, gtime); absorbed = remove_exited_child_from_parent(&minflt, &pp->cminflt); - if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " minflt (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, minflt); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " minflt (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, minflt); absorbed = remove_exited_child_from_parent(&majflt, &pp->cmajflt); - if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " majflt (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, majflt); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " majflt (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, majflt); } if(unlikely(utime + stime + gtime + minflt + majflt > 0)) { - if(unlikely(debug)) { - if(utime) find_lost_child_debug(p, utime, 3); - if(stime) find_lost_child_debug(p, stime, 4); - if(gtime) find_lost_child_debug(p, gtime, 5); - if(minflt) find_lost_child_debug(p, minflt, 1); - if(majflt) find_lost_child_debug(p, majflt, 2); + if(unlikely(debug_enabled)) { + if(utime) debug_find_lost_child(p, utime, 3); + if(stime) debug_find_lost_child(p, stime, 4); + if(gtime) debug_find_lost_child(p, gtime, 5); + if(minflt) debug_find_lost_child(p, minflt, 1); + if(majflt) debug_find_lost_child(p, majflt, 2); } p->keep = 1; - if(unlikely(debug)) - fprintf(stderr, " > remaining resources - KEEP - for another loop: %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")\n" - , p->comm - , p->pid - , p->updated?"running":"exited" - , utime - , stime - , gtime - , minflt - , majflt - ); + debug_log(" > remaining resources - KEEP - for another loop: %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")" + , p->comm + , p->pid + , p->updated?"running":"exited" + , utime + , stime + , gtime + , minflt + , majflt + ); for(pp = p->parent; pp ; pp = pp->parent) { if(pp->updated) break; pp->keep = 1; - if(unlikely(debug)) - fprintf(stderr, " > - KEEP - parent for another loop: %s (%d %s)\n" - , pp->comm - , pp->pid - , pp->updated?"running":"exited" - ); + debug_log(" > - KEEP - parent for another loop: %s (%d %s)" + , pp->comm + , pp->pid + , pp->updated?"running":"exited" + ); } p->utime_raw = utime * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); @@ -2020,16 +2148,14 @@ static inline void process_exited_processes() { p->majflt_raw = majflt * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); p->cutime_raw = p->cstime_raw = p->cgtime_raw = p->cminflt_raw = p->cmajflt_raw = 0; - if(unlikely(debug)) - fprintf(stderr, "\n"); + debug_log(" "); } - else if(unlikely(debug)) { - fprintf(stderr, " > totally absorbed - DONE - %s (%d %s)\n" + else + debug_log(" > totally absorbed - DONE - %s (%d %s)" , p->comm , p->pid , p->updated?"running":"exited" - ); - } + ); } } @@ -2054,8 +2180,8 @@ static inline void link_all_processes_to_their_parents(void) { p->parent = pp; pp->children_count++; - if(unlikely(debug || (p->target && p->target->debug))) - fprintf(stderr, "apps.plugin: \tchild %d (%s, %s) on target '%s' has parent %d (%s, %s). Parent: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "\n", p->pid, p->comm, p->updated?"running":"exited", (p->target)?p->target->name:"UNSET", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->gtime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cgtime, pp->cminflt, pp->cmajflt); + if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) + debug_log_int("child %d (%s, %s) on target '%s' has parent %d (%s, %s). Parent: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "", p->pid, p->comm, p->updated?"running":"exited", (p->target)?p->target->name:"UNSET", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->gtime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cgtime, pp->cminflt, pp->cmajflt); } else { p->parent = NULL; @@ -2105,7 +2231,7 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) { if(unlikely(!p || p->read)) return 0; p->read = 1; - // fprintf(stderr, "Reading process %d (%s), sortlist %d\n", p->pid, p->comm, p->sortlist); + // debug_log("Reading process %d (%s), sortlist %d", p->pid, p->comm, p->sortlist); // -------------------------------------------------------------------- // /proc/<pid>/stat @@ -2141,8 +2267,8 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) { // -------------------------------------------------------------------- // done! - if(unlikely(debug && include_exited_childs && all_pids_count && p->ppid && all_pids[p->ppid] && !all_pids[p->ppid]->read)) - fprintf(stderr, "Read process %d (%s) sortlisted %d, but its parent %d (%s) sortlisted %d, is not read\n", p->pid, p->comm, p->sortlist, all_pids[p->ppid]->pid, all_pids[p->ppid]->comm, all_pids[p->ppid]->sortlist); + if(unlikely(debug_enabled && include_exited_childs && all_pids_count && p->ppid && all_pids[p->ppid] && !all_pids[p->ppid]->read)) + debug_log("Read process %d (%s) sortlisted %d, but its parent %d (%s) sortlisted %d, is not read", p->pid, p->comm, p->sortlist, all_pids[p->ppid]->pid, all_pids[p->ppid]->comm, all_pids[p->ppid]->sortlist); // mark it as updated p->updated = 1; @@ -2302,18 +2428,18 @@ static int collect_data_for_all_processes(void) { // check: update_apps_groups_statistics() static void cleanup_exited_pids(void) { - int c; + size_t c; struct pid_stat *p = NULL; for(p = root_of_pids; p ;) { if(!p->updated && (!p->keep || p->keeploops > 0)) { - if(unlikely(debug && (p->keep || p->keeploops))) - fprintf(stderr, " > CLEANUP cannot keep exited process %d (%s) anymore - removing it.\n", p->pid, p->comm); + if(unlikely(debug_enabled && (p->keep || p->keeploops))) + debug_log(" > CLEANUP cannot keep exited process %d (%s) anymore - removing it.", p->pid, p->comm); for(c = 0; c < p->fds_size; c++) - if(p->fds[c] > 0) { - file_descriptor_not_used(p->fds[c]); - p->fds[c] = 0; + if(p->fds[c].fd > 0) { + file_descriptor_not_used(p->fds[c].fd); + clear_pid_fd(&p->fds[c]); } pid_t r = p->pid; @@ -2335,7 +2461,7 @@ static void apply_apps_groups_targets_inheritance(void) { // inherit their target from their parent int found = 1, loops = 0; while(found) { - if(unlikely(debug)) loops++; + if(unlikely(debug_enabled)) loops++; found = 0; for(p = root_of_pids; p ; p = p->next) { // if this process does not have a target @@ -2346,8 +2472,8 @@ static void apply_apps_groups_targets_inheritance(void) { p->target = p->parent->target; found++; - if(debug || (p->target && p->target->debug)) - fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).\n", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); + if(debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); } } } @@ -2357,7 +2483,7 @@ static void apply_apps_groups_targets_inheritance(void) { int sortlist = 1; found = 1; while(found) { - if(unlikely(debug)) loops++; + if(unlikely(debug_enabled)) loops++; found = 0; for(p = root_of_pids; p ; p = p->next) { @@ -2382,16 +2508,15 @@ static void apply_apps_groups_targets_inheritance(void) { if(unlikely(p->target && !p->parent->target)) { p->parent->target = p->target; - if(debug || (p->target && p->target->debug)) - fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its child %d (%s).\n", p->target->name, p->parent->pid, p->parent->comm, p->pid, p->comm); + if(debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its child %d (%s).", p->target->name, p->parent->pid, p->parent->comm, p->pid, p->comm); } found++; } } - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: TARGET INHERITANCE: merged %d processes\n", found); + debug_log("TARGET INHERITANCE: merged %d processes", found); } // init goes always to default target @@ -2403,7 +2528,7 @@ static void apply_apps_groups_targets_inheritance(void) { all_pids[0]->target = apps_groups_default_target; // give a default target on all top level processes - if(unlikely(debug)) loops++; + if(unlikely(debug_enabled)) loops++; for(p = root_of_pids; p ; p = p->next) { // if the process is not merged itself // then is is a top level process @@ -2421,21 +2546,20 @@ static void apply_apps_groups_targets_inheritance(void) { // give a target to all merged child processes found = 1; while(found) { - if(unlikely(debug)) loops++; + if(unlikely(debug_enabled)) loops++; found = 0; for(p = root_of_pids; p ; p = p->next) { if(unlikely(!p->target && p->merged && p->parent && p->parent->target)) { p->target = p->parent->target; found++; - if(debug || (p->target && p->target->debug)) - fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s) at phase 2.\n", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); + if(debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s) at phase 2.", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); } } } - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: apply_apps_groups_targets_inheritance() made %d loops on the process tree\n", loops); + debug_log("apply_apps_groups_targets_inheritance() made %d loops on the process tree", loops); } static size_t zero_all_targets(struct target *root) { @@ -2570,9 +2694,10 @@ static inline void aggregate_pid_fds_on_targets(struct pid_stat *p) { reallocate_target_fds(u); reallocate_target_fds(g); - int c, size = p->fds_size, *fds = p->fds; + size_t c, size = p->fds_size; + struct pid_fd *fds = p->fds; for(c = 0; c < size ;c++) { - int fd = fds[c]; + int fd = fds[c].fd; if(likely(fd <= 0 || fd >= all_files_size)) continue; @@ -2628,8 +2753,8 @@ static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p, w->processes++; w->num_threads += p->num_threads; - if(unlikely(debug || w->debug)) - fprintf(stderr, "apps.plugin: \taggregating '%s' pid %d on target '%s' utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "\n", p->comm, p->pid, w->name, p->utime, p->stime, p->gtime, p->cutime, p->cstime, p->cgtime, p->minflt, p->majflt, p->cminflt, p->cmajflt); + if(unlikely(debug_enabled || w->debug_enabled)) + debug_log_int("aggregating '%s' pid %d on target '%s' utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "", p->comm, p->pid, w->name, p->utime, p->stime, p->gtime, p->cutime, p->cstime, p->cgtime, p->minflt, p->majflt, p->cminflt, p->cmajflt); } static void calculate_netdata_statistics(void) { @@ -2660,8 +2785,8 @@ static void calculate_netdata_statistics(void) { if(likely(p->user_target && p->user_target->uid == p->uid)) w = p->user_target; else { - if(unlikely(debug && p->user_target)) - fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched user from %u (%s) to %u.\n", p->pid, p->comm, p->user_target->uid, p->user_target->name, p->uid); + if(unlikely(debug_enabled && p->user_target)) + debug_log("pid %d (%s) switched user from %u (%s) to %u.", p->pid, p->comm, p->user_target->uid, p->user_target->name, p->uid); w = p->user_target = get_users_target(p->uid); } @@ -2676,8 +2801,8 @@ static void calculate_netdata_statistics(void) { if(likely(p->group_target && p->group_target->gid == p->gid)) w = p->group_target; else { - if(unlikely(debug && p->group_target)) - fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched group from %u (%s) to %u.\n", p->pid, p->comm, p->group_target->gid, p->group_target->name, p->gid); + if(unlikely(debug_enabled && p->group_target)) + debug_log("pid %d (%s) switched group from %u (%s) to %u.", p->pid, p->comm, p->group_target->gid, p->group_target->name, p->gid); w = p->group_target = get_groups_target(p->gid); } @@ -2698,8 +2823,6 @@ static void calculate_netdata_statistics(void) { // ---------------------------------------------------------------------------- // update chart dimensions -int print_calculated_number(char *str, calculated_number value) { (void)str; (void)value; return 0; } - static inline void send_BEGIN(const char *type, const char *id, usec_t usec) { fprintf(stdout, "BEGIN %s.%s %llu\n", type, id, usec); } @@ -2751,6 +2874,9 @@ void send_resource_usage_to_netdata(usec_t dt) { "CHART netdata.apps_sizes '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_sizes line 140001 %1$d\n" "DIMENSION calls '' incremental 1 1\n" "DIMENSION files '' incremental 1 1\n" + "DIMENSION filenames '' incremental 1 1\n" + "DIMENSION inode_changes '' incremental 1 1\n" + "DIMENSION link_changes '' incremental 1 1\n" "DIMENSION pids '' absolute 1 1\n" "DIMENSION fds '' absolute 1 1\n" "DIMENSION targets '' absolute 1 1\n" @@ -2793,6 +2919,9 @@ void send_resource_usage_to_netdata(usec_t dt) { "BEGIN netdata.apps_sizes %llu\n" "SET calls = %zu\n" "SET files = %zu\n" + "SET filenames = %zu\n" + "SET inode_changes = %zu\n" + "SET link_changes = %zu\n" "SET pids = %zu\n" "SET fds = %d\n" "SET targets = %zu\n" @@ -2804,6 +2933,9 @@ void send_resource_usage_to_netdata(usec_t dt) { , dt , calls_counter , file_counter + , filenames_allocated_counter + , inodes_changed_counter + , links_changed_counter , all_pids_count , all_files_len , apps_groups_targets_count @@ -2854,7 +2986,7 @@ static void normalize_utilization(struct target *root) { // here we try to eliminate them by disabling childs processing either for specific dimensions // or entirely. Of course, either way, we disable it just a single iteration. - kernel_uint_t max_time = processors * hz * RATES_DETAIL; + kernel_uint_t max_time = processors * system_hz * RATES_DETAIL; kernel_uint_t utime = 0, cutime = 0, stime = 0, cstime = 0, gtime = 0, cgtime = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0; if(global_utime > max_time) global_utime = max_time; @@ -2932,7 +3064,7 @@ static void normalize_utilization(struct target *root) { // if(gtime_fix_ratio < 0.0) gtime_fix_ratio = 0.0; // if(cgtime_fix_ratio < 0.0) cgtime_fix_ratio = 0.0; - // FIXME + // TODO // we use cpu time to normalize page faults // the problem is that to find the proper max values // for page faults we have to parse /proc/vmstat @@ -2957,14 +3089,12 @@ static void normalize_utilization(struct target *root) { // the report - if(unlikely(debug)) { - fprintf(stderr, + debug_log( "SYSTEM: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " " "COLLECTED: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " cu=" KERNEL_UINT_FORMAT " cs=" KERNEL_UINT_FORMAT " cg=" KERNEL_UINT_FORMAT " " "DELTA: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " " "FIX: u=%0.2f s=%0.2f g=%0.2f cu=%0.2f cs=%0.2f cg=%0.2f " "FINALLY: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " cu=" KERNEL_UINT_FORMAT " cs=" KERNEL_UINT_FORMAT " cg=" KERNEL_UINT_FORMAT " " - "\n" , global_utime , global_stime , global_gtime @@ -2989,8 +3119,7 @@ static void normalize_utilization(struct target *root) { , (kernel_uint_t)(cutime * cutime_fix_ratio) , (kernel_uint_t)(cstime * cstime_fix_ratio) , (kernel_uint_t)(cgtime * cgtime_fix_ratio) - ); - } + ); } #else // ALL_PIDS_ARE_READ_INSTANTLY == 1 static void normalize_utilization(struct target *root) { @@ -3151,7 +3280,8 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type if (!w->exposed && w->processes) { newly_added++; w->exposed = 1; - if (debug || w->debug) fprintf(stderr, "apps.plugin: %s just added - regenerating charts.\n", w->name); + if (debug_enabled || w->debug_enabled) + debug_log_int("%s just added - regenerating charts.", w->name); } } @@ -3163,7 +3293,7 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type fprintf(stdout, "CHART %s.cpu '' '%s CPU Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu stacked 20001 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' absolute 1 %llu %s\n", w->name, hz * RATES_DETAIL / 100, w->hidden ? "hidden" : ""); + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu %s\n", w->name, system_hz * RATES_DETAIL / 100, w->hidden ? "hidden" : ""); } fprintf(stdout, "CHART %s.mem '' '%s Real Memory (w/o shared)' 'MB' mem %s.mem stacked 20003 %d\n", type, title, type, update_every); @@ -3193,20 +3323,20 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type fprintf(stdout, "CHART %s.cpu_user '' '%s CPU User Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_user stacked 20020 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, hz * RATES_DETAIL / 100LLU); + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, system_hz * RATES_DETAIL / 100LLU); } fprintf(stdout, "CHART %s.cpu_system '' '%s CPU System Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_system stacked 20021 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, hz * RATES_DETAIL / 100LLU); + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, system_hz * RATES_DETAIL / 100LLU); } if(show_guest_time) { fprintf(stdout, "CHART %s.cpu_guest '' '%s CPU Guest Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_system stacked 20022 %d\n", type, title, (processors * 100), processors, (processors > 1) ? "s" : "", type, update_every); for (w = root; w; w = w->next) { if(unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, hz * RATES_DETAIL / 100LLU); + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, system_hz * RATES_DETAIL / 100LLU); } } @@ -3315,7 +3445,6 @@ cleanup: static void parse_args(int argc, char **argv) { int i, freq = 0; - char *name = NULL; for(i = 1; i < argc; i++) { if(!freq) { @@ -3341,10 +3470,26 @@ static void parse_args(int argc, char **argv) } if(strcmp("debug", argv[i]) == 0) { - debug = 1; - // debug_flags = 0xffffffff; +#ifdef NETDATA_INTERNAL_CHECKS + debug_enabled = 1; +#else + fprintf(stderr, "apps.plugin has been compiled without debugging\n"); +#endif + continue; + } + +#ifndef __FreeBSD__ + if(strcmp("fds-cache-secs", argv[i]) == 0) { + if(argc <= i + 1) { + fprintf(stderr, "Parameter 'fds-cache-secs' requires a number as argument.\n"); + exit(1); + } + i++; + max_fds_cache_seconds = str2i(argv[i]); + if(max_fds_cache_seconds < 0) max_fds_cache_seconds = 0; continue; } +#endif if(strcmp("no-childs", argv[i]) == 0 || strcmp("without-childs", argv[i]) == 0) { include_exited_childs = 0; @@ -3415,44 +3560,54 @@ static void parse_args(int argc, char **argv) " without-files enable / disable reporting files, sockets, pipes\n" " (default is enabled)\n" "\n" - " NAME read apps_NAME.conf instead of\n" - " apps_groups.conf\n" - " (default NAME=groups)\n" +#ifndef __FreeBSD__ + " fds-cache-secs N cache the files of processed for N seconds\n" + " caching is adaptive per file (when a file\n" + " is found, it starts at 0 and while the file\n" + " remains open, it is incremented up to the\n" + " max given)\n" + " (default is %d seconds)\n" "\n" +#endif " version or -v or -V print program version and exit\n" "\n" , VERSION +#ifndef __FreeBSD__ + , max_fds_cache_seconds +#endif ); exit(1); } - if(!name) { - name = argv[i]; - continue; - } - error("Cannot understand option %s", argv[i]); exit(1); } if(freq > 0) update_every = freq; - if(!name) name = "groups"; - if(read_apps_groups_conf(name)) { - error("Cannot read process groups '%s/apps_%s.conf'. There are no internal defaults. Failing.", config_dir, name); - exit(1); + if(read_apps_groups_conf(user_config_dir, "groups")) { + info("Cannot read process groups configuration file '%s/apps_groups.conf'. Will try '%s/apps_groups.conf'", user_config_dir, stock_config_dir); + + if(read_apps_groups_conf(stock_config_dir, "groups")) { + error("Cannot read process groups '%s/apps_groups.conf'. There are no internal defaults. Failing.", stock_config_dir); + exit(1); + } + else + info("Loaded config file '%s/apps_groups.conf'", stock_config_dir); } + else + info("Loaded config file '%s/apps_groups.conf'", user_config_dir); } static int am_i_running_as_root() { uid_t uid = getuid(), euid = geteuid(); if(uid == 0 || euid == 0) { - if(debug) info("I am running with escalated privileges, uid = %u, euid = %u.", uid, euid); + if(debug_enabled) info("I am running with escalated privileges, uid = %u, euid = %u.", uid, euid); return 1; } - if(debug) info("I am not running with escalated privileges, uid = %u, euid = %u.", uid, euid); + if(debug_enabled) info("I am not running with escalated privileges, uid = %u, euid = %u.", uid, euid); return 0; } @@ -3463,7 +3618,7 @@ static int check_capabilities() { error("Cannot get current capabilities."); return 0; } - else if(debug) + else if(debug_enabled) info("Received my capabilities from the system."); int ret = 1; @@ -3478,7 +3633,7 @@ static int check_capabilities() { error("apps.plugin should run with CAP_DAC_READ_SEARCH."); ret = 0; } - else if(debug) + else if(debug_enabled) info("apps.plugin runs with CAP_DAC_READ_SEARCH."); } @@ -3492,7 +3647,7 @@ static int check_capabilities() { error("apps.plugin should run with CAP_SYS_PTRACE."); ret = 0; } - else if(debug) + else if(debug_enabled) info("apps.plugin runs with CAP_SYS_PTRACE."); } @@ -3521,19 +3676,25 @@ int main(int argc, char **argv) { error_log_errors_per_period = 100; error_log_throttle_period = 3600; + // since apps.plugin runs as root, prevent it from opening symbolic links + procfile_open_flags = O_RDONLY|O_NOFOLLOW; + netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); - if(netdata_configured_host_prefix == NULL) { - // info("NETDATA_HOST_PREFIX is not passed from netdata"); - netdata_configured_host_prefix = ""; + if(verify_netdata_host_prefix() == -1) exit(1); + + user_config_dir = getenv("NETDATA_USER_CONFIG_DIR"); + if(user_config_dir == NULL) { + // info("NETDATA_CONFIG_DIR is not passed from netdata"); + user_config_dir = CONFIG_DIR; } - // else info("Found NETDATA_HOST_PREFIX='%s'", netdata_configured_host_prefix); + // else info("Found NETDATA_USER_CONFIG_DIR='%s'", user_config_dir); - config_dir = getenv("NETDATA_CONFIG_DIR"); - if(config_dir == NULL) { + stock_config_dir = getenv("NETDATA_STOCK_CONFIG_DIR"); + if(stock_config_dir == NULL) { // info("NETDATA_CONFIG_DIR is not passed from netdata"); - config_dir = CONFIG_DIR; + stock_config_dir = LIBCONFIG_DIR; } - // else info("Found NETDATA_CONFIG_DIR='%s'", config_dir); + // else info("Found NETDATA_USER_CONFIG_DIR='%s'", user_config_dir); #ifdef NETDATA_INTERNAL_CHECKS if(debug_flags != 0) { @@ -3561,14 +3722,14 @@ int main(int argc, char **argv) { error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " "Without these, apps.plugin cannot report disk I/O utilization of other processes. " "To enable capabilities run: sudo setcap cap_dac_read_search,cap_sys_ptrace+ep %s; " - "To enable setuid to root run: sudo chown root %s; sudo chmod 4755 %s; " + "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; " , uid, euid, argv[0], argv[0], argv[0] ); #else error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " "Without these, apps.plugin cannot report disk I/O utilization of other processes. " "Your system does not support capabilities. " - "To enable setuid to root run: sudo chown root %s; sudo chmod 4755 %s; " + "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; " , uid, euid, argv[0], argv[0] ); #endif @@ -3630,8 +3791,7 @@ int main(int argc, char **argv) { show_guest_time_old = show_guest_time; - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: done Loop No %zu\n", global_iterations_counter); + debug_log("done Loop No %zu", global_iterations_counter); // restart check (14400 seconds) if(now_monotonic_sec() - started_t > 14400) exit(0); |