summaryrefslogtreecommitdiffstats
path: root/collectors/apps.plugin/apps_plugin.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--collectors/apps.plugin/apps_plugin.c (renamed from src/apps_plugin.c)712
1 files changed, 436 insertions, 276 deletions
diff --git a/src/apps_plugin.c b/collectors/apps.plugin/apps_plugin.c
index 8595da6c2..f592e9fc8 100644
--- a/src/apps_plugin.c
+++ b/collectors/apps.plugin/apps_plugin.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
/*
* netdata apps.plugin
@@ -5,7 +6,61 @@
* Released under GPL v3+
*/
-#include "common.h"
+#include "../../libnetdata/libnetdata.h"
+
+// ----------------------------------------------------------------------------
+
+// callback required by fatal()
+void netdata_cleanup_and_exit(int ret) {
+ exit(ret);
+}
+
+// callbacks required by popen()
+void signals_block(void) {};
+void signals_unblock(void) {};
+void signals_reset(void) {};
+
+// callback required by eval()
+int health_variable_lookup(const char *variable, uint32_t hash, struct rrdcalc *rc, calculated_number *result) {
+ (void)variable;
+ (void)hash;
+ (void)rc;
+ (void)result;
+ return 0;
+};
+
+// required by get_system_cpus()
+char *netdata_configured_host_prefix = "";
+
+
+// ----------------------------------------------------------------------------
+// debugging
+
+static int debug_enabled = 0;
+static inline void debug_log_int(const char *fmt, ... ) {
+ va_list args;
+
+ fprintf( stderr, "apps.plugin: ");
+ va_start( args, fmt );
+ vfprintf( stderr, fmt, args );
+ va_end( args );
+
+ fputc('\n', stderr);
+}
+
+#ifdef NETDATA_INTERNAL_CHECKS
+
+#define debug_log(fmt, args...) do { if(unlikely(debug_enabled)) debug_log_int(fmt, ##args); } while(0)
+
+#else
+
+static inline void debug_log_dummy(void) {}
+#define debug_log(fmt, args...) debug_log_dummy()
+
+#endif
+
+
+// ----------------------------------------------------------------------------
#ifdef __FreeBSD__
#include <sys/user.h>
@@ -57,21 +112,21 @@
// command line options
static int
- debug = 0,
update_every = 1,
enable_guest_charts = 0,
#ifdef __FreeBSD__
enable_file_charts = 0,
#else
enable_file_charts = 1,
+ max_fds_cache_seconds = 60,
#endif
enable_users_charts = 1,
enable_groups_charts = 1,
include_exited_childs = 1;
-
-// will be changed to getenv(NETDATA_CONFIG_DIR) if it exists
-static char *config_dir = CONFIG_DIR;
+// will be changed to getenv(NETDATA_USER_CONFIG_DIR) if it exists
+static char *user_config_dir = CONFIG_DIR;
+static char *stock_config_dir = LIBCONFIG_DIR;
// ----------------------------------------------------------------------------
// internal flags
@@ -90,6 +145,9 @@ static size_t
global_iterations_counter = 1,
calls_counter = 0,
file_counter = 0,
+ filenames_allocated_counter = 0,
+ inodes_changed_counter = 0,
+ links_changed_counter = 0,
targets_assignment_counter = 0;
@@ -193,7 +251,7 @@ struct target {
unsigned int processes; // how many processes have been merged to this
int exposed; // if set, we have sent this to netdata
int hidden; // if set, we set the hidden flag on the dimension
- int debug;
+ int debug_enabled;
int ends_with;
int starts_with; // if set, the compare string matches only the
// beginning of the command
@@ -218,6 +276,18 @@ size_t
// structure to store data for each process running
// see: man proc for the description of the fields
+struct pid_fd {
+ int fd;
+
+#ifndef __FreeBSD__
+ ino_t inode;
+ char *filename;
+ uint32_t link_hash;
+ size_t cache_iterations_counter;
+ size_t cache_iterations_reset;
+#endif
+};
+
struct pid_stat {
int32_t pid;
char comm[MAX_COMPARE_NAME + 1];
@@ -312,15 +382,15 @@ struct pid_stat {
kernel_uint_t io_storage_bytes_written;
// kernel_uint_t io_cancelled_write_bytes;
- int *fds; // array of fds it uses
- int fds_size; // the size of the fds array
+ struct pid_fd *fds; // array of fds it uses
+ size_t fds_size; // the size of the fds array
int children_count; // number of processes directly referencing this
- char keep:1; // 1 when we need to keep this process in memory even after it exited
+ unsigned char keep:1; // 1 when we need to keep this process in memory even after it exited
int keeploops; // increases by 1 every time keep is 1 and updated 0
- char updated:1; // 1 when the process is currently running
- char merged:1; // 1 when it has been merged to its parent
- char read:1; // 1 when we have already read this process for this iteration
+ unsigned char updated:1; // 1 when the process is currently running
+ unsigned char merged:1; // 1 when it has been merged to its parent
+ unsigned char read:1; // 1 when we have already read this process for this iteration
int sortlist; // higher numbers = top on the process tree
// each process gets a unique number
@@ -416,13 +486,6 @@ static int
all_files_size = 0;
// ----------------------------------------------------------------------------
-// callback required by fatal()
-
-void netdata_cleanup_and_exit(int ret) {
- exit(ret);
-}
-
-// ----------------------------------------------------------------------------
// apps_groups.conf
// aggregate all processes in groups, to have a limited number of dimensions
@@ -452,8 +515,7 @@ static struct target *get_users_target(uid_t uid) {
w->next = users_root_target;
users_root_target = w;
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: added uid %u ('%s') target\n", w->uid, w->name);
+ debug_log("added uid %u ('%s') target", w->uid, w->name);
return w;
}
@@ -485,8 +547,7 @@ struct target *get_groups_target(gid_t gid)
w->next = groups_root_target;
groups_root_target = w;
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: added gid %u ('%s') target\n", w->gid, w->name);
+ debug_log("added gid %u ('%s') target", w->gid, w->name);
return w;
}
@@ -527,11 +588,11 @@ static struct target *get_apps_groups_target(const char *id, struct target *targ
break;
}
- if(unlikely(debug)) {
+ if(unlikely(debug_enabled)) {
if(unlikely(target))
- fprintf(stderr, "apps.plugin: REUSING TARGET NAME '%s' on ID '%s'\n", target->name, target->id);
+ debug_log("REUSING TARGET NAME '%s' on ID '%s'", target->name, target->id);
else
- fprintf(stderr, "apps.plugin: NEW TARGET NAME '%s' on ID '%s'\n", name, id);
+ debug_log("NEW TARGET NAME '%s' on ID '%s'", name, id);
}
}
@@ -564,34 +625,37 @@ static struct target *get_apps_groups_target(const char *id, struct target *targ
w->comparelen = strlen(w->compare);
w->hidden = thidden;
- w->debug = tdebug;
+#ifdef NETDATA_INTERNAL_CHECKS
+ w->debug_enabled = tdebug;
+#else
+ if(tdebug)
+ fprintf(stderr, "apps.plugin has been compiled without debugging\n");
+#endif
w->target = target;
// append it, to maintain the order in apps_groups.conf
if(last) last->next = w;
else apps_groups_root_target = w;
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s\n"
- , w->id
- , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact"))
- , w->target?w->target->name:w->name
- , (w->hidden)?"hidden":"-"
- , (w->debug)?"debug":"-"
- );
+ debug_log("ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s"
+ , w->id
+ , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact"))
+ , w->target?w->target->name:w->name
+ , (w->hidden)?"hidden":"-"
+ , (w->debug_enabled)?"debug":"-"
+ );
return w;
}
// read the apps_groups.conf file
-static int read_apps_groups_conf(const char *file)
+static int read_apps_groups_conf(const char *path, const char *file)
{
char filename[FILENAME_MAX + 1];
- snprintfz(filename, FILENAME_MAX, "%s/apps_%s.conf", config_dir, file);
+ snprintfz(filename, FILENAME_MAX, "%s/apps_%s.conf", path, file);
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: process groups file: '%s'\n", filename);
+ debug_log("process groups file: '%s'", filename);
// ----------------------------------------
@@ -654,14 +718,16 @@ static int read_apps_groups_conf(const char *file)
// ----------------------------------------------------------------------------
// struct pid_stat management
+static inline void init_pid_fds(struct pid_stat *p, size_t first, size_t size);
static inline struct pid_stat *get_pid_entry(pid_t pid) {
if(unlikely(all_pids[pid]))
return all_pids[pid];
struct pid_stat *p = callocz(sizeof(struct pid_stat), 1);
- p->fds = callocz(sizeof(int), MAX_SPARE_FDS);
+ p->fds = mallocz(sizeof(struct pid_fd) * MAX_SPARE_FDS);
p->fds_size = MAX_SPARE_FDS;
+ init_pid_fds(p, 0, p->fds_size);
if(likely(root_of_pids))
root_of_pids->prev = p;
@@ -685,8 +751,7 @@ static inline void del_pid_entry(pid_t pid) {
return;
}
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: process %d %s exited, deleting it.\n", pid, p->comm);
+ debug_log("process %d %s exited, deleting it.", pid, p->comm);
if(root_of_pids == p)
root_of_pids = p->next;
@@ -694,7 +759,17 @@ static inline void del_pid_entry(pid_t pid) {
if(p->next) p->next->prev = p->prev;
if(p->prev) p->prev->next = p->next;
+ // free the filename
+#ifndef __FreeBSD__
+ {
+ size_t i;
+ for(i = 0; i < p->fds_size; i++)
+ if(p->fds[i].filename)
+ freez(p->fds[i].filename);
+ }
+#endif
freez(p->fds);
+
freez(p->fds_dirname);
freez(p->stat_filename);
freez(p->status_filename);
@@ -716,8 +791,8 @@ static inline int managed_log(struct pid_stat *p, uint32_t log, int status) {
if(unlikely(!status)) {
// error("command failed log %u, errno %d", log, errno);
- if(unlikely(debug || errno != ENOENT)) {
- if(unlikely(debug || !(p->log_thrown & log))) {
+ if(unlikely(debug_enabled || errno != ENOENT)) {
+ if(unlikely(debug_enabled || !(p->log_thrown & log))) {
p->log_thrown |= log;
switch(log) {
case PID_LOG_IO:
@@ -779,7 +854,7 @@ static inline void assign_target_to_pid(struct pid_stat *p) {
struct target *w;
for(w = apps_groups_root_target; w ; w = w->next) {
- // if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \t\tcomparing '%s' with '%s'\n", w->compare, p->comm);
+ // if(debug_enabled || (p->target && p->target->debug_enabled)) debug_log_int("\t\tcomparing '%s' with '%s'", w->compare, p->comm);
// find it - 4 cases:
// 1. the target is not a pattern
@@ -796,8 +871,8 @@ static inline void assign_target_to_pid(struct pid_stat *p) {
if(w->target) p->target = w->target;
else p->target = w;
- if(debug || (p->target && p->target->debug))
- fprintf(stderr, "apps.plugin: \t\t%s linked to target %s\n", p->comm, p->target->name);
+ if(debug_enabled || (p->target && p->target->debug_enabled))
+ debug_log_int("%s linked to target %s", p->comm, p->target->name);
break;
}
@@ -828,7 +903,7 @@ static inline int read_proc_pid_cmdline(struct pid_stat *p) {
p->cmdline_filename = strdupz(filename);
}
- int fd = open(p->cmdline_filename, O_RDONLY, 0666);
+ int fd = open(p->cmdline_filename, procfile_open_flags, 0666);
if(unlikely(fd == -1)) goto cleanup;
ssize_t i, bytes = read(fd, cmdline, MAX_CMDLINE);
@@ -838,18 +913,20 @@ static inline int read_proc_pid_cmdline(struct pid_stat *p) {
#endif
cmdline[bytes] = '\0';
- for(i = 0; i < bytes ; i++)
+ for(i = 0; i < bytes ; i++) {
if(unlikely(!cmdline[i])) cmdline[i] = ' ';
+ }
+ if(p->cmdline) freez(p->cmdline);
p->cmdline = strdupz(cmdline);
- if(unlikely(debug))
- fprintf(stderr, "Read file '%s' contents: %s\n", p->cmdline_filename, p->cmdline);
+ debug_log("Read file '%s' contents: %s", p->cmdline_filename, p->cmdline);
return 1;
cleanup:
// copy the command to the command line
+ if(p->cmdline) freez(p->cmdline);
p->cmdline = strdupz(p->comm);
return 0;
}
@@ -963,7 +1040,7 @@ static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) {
p->gid = proc_info->ki_groups[0];
p->status_vmsize = proc_info->ki_size / 1024; // in kB
p->status_vmrss = proc_info->ki_rssize * pagesize / 1024; // in kB
- // FIXME: what about shared and swap memory on FreeBSD?
+ // TODO: what about shared and swap memory on FreeBSD?
return 1;
#else
(void)ptr;
@@ -1004,7 +1081,7 @@ static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) {
arl_begin(p->status_arl);
for(l = 0; l < lines ;l++) {
- // fprintf(stderr, "CHECK: line %zu of %zu, key '%s' = '%s'\n", l, lines, procfile_lineword(ff, l, 0), procfile_lineword(ff, l, 1));
+ // debug_log("CHECK: line %zu of %zu, key '%s' = '%s'", l, lines, procfile_lineword(ff, l, 0), procfile_lineword(ff, l, 1));
arl_ptr.line = l;
if(unlikely(arl_check(p->status_arl,
procfile_lineword(ff, l, 0),
@@ -1013,7 +1090,7 @@ static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) {
p->status_vmshared = p->status_rssfile + p->status_rssshmem;
- // fprintf(stderr, "%s uid %d, gid %d, VmSize %zu, VmRSS %zu, RssFile %zu, RssShmem %zu, shared %zu\n", p->comm, (int)p->uid, (int)p->gid, p->status_vmsize, p->status_vmrss, p->status_rssfile, p->status_rssshmem, p->status_vmshared);
+ // debug_log("%s uid %d, gid %d, VmSize %zu, VmRSS %zu, RssFile %zu, RssShmem %zu, shared %zu", p->comm, (int)p->uid, (int)p->gid, p->status_vmsize, p->status_vmrss, p->status_rssfile, p->status_rssshmem, p->status_vmshared);
return 1;
#endif
@@ -1072,11 +1149,11 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) {
#endif
if(strcmp(p->comm, comm) != 0) {
- if(unlikely(debug)) {
+ if(unlikely(debug_enabled)) {
if(p->comm[0])
- fprintf(stderr, "apps.plugin: \tpid %d (%s) changed name to '%s'\n", p->pid, p->comm, comm);
+ debug_log("\tpid %d (%s) changed name to '%s'", p->pid, p->comm, comm);
else
- fprintf(stderr, "apps.plugin: \tJust added %d (%s)\n", p->pid, comm);
+ debug_log("\tJust added %d (%s)", p->pid, comm);
}
strncpyz(p->comm, comm, MAX_COMPARE_NAME);
@@ -1152,8 +1229,8 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) {
}
#endif
- if(unlikely(debug || (p->target && p->target->debug)))
- fprintf(stderr, "apps.plugin: READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=%d\n", netdata_configured_host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads);
+ if(unlikely(debug_enabled || (p->target && p->target->debug_enabled)))
+ debug_log_int("READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=%d", netdata_configured_host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads);
if(unlikely(global_iterations_counter == 1)) {
p->minflt = 0;
@@ -1332,7 +1409,7 @@ int file_descriptor_compare(void* a, void* b) {
return strcmp(((struct file_descriptor *)a)->name, ((struct file_descriptor *)b)->name);
}
-int file_descriptor_iterator(avl *a) { if(a) {}; return 0; }
+// int file_descriptor_iterator(avl *a) { if(a) {}; return 0; }
avl_tree all_files_index = {
NULL,
@@ -1368,15 +1445,13 @@ static inline void file_descriptor_not_used(int id)
}
#endif /* NETDATA_INTERNAL_CHECKS */
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: decreasing slot %d (count = %d).\n", id, all_files[id].count);
+ debug_log("decreasing slot %d (count = %d).", id, all_files[id].count);
if(all_files[id].count > 0) {
all_files[id].count--;
if(!all_files[id].count) {
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: >> slot %d is empty.\n", id);
+ debug_log(" >> slot %d is empty.", id);
if(unlikely(file_descriptor_remove(&all_files[id]) != (void *)&all_files[id]))
error("INTERNAL ERROR: removal of unused fd from index, removed a different fd");
@@ -1398,8 +1473,7 @@ static inline void all_files_grow() {
int i;
// there is no empty slot
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: extending fd array to %d entries\n", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP);
+ debug_log("extending fd array to %d entries", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP);
all_files = reallocz(all_files, (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP) * sizeof(struct file_descriptor));
@@ -1407,8 +1481,7 @@ static inline void all_files_grow() {
// since all pointers are now invalid
if(unlikely(old && old != (void *)all_files)) {
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: >> re-indexing.\n");
+ debug_log(" >> re-indexing.");
all_files_index.root = NULL;
for(i = 0; i < all_files_size; i++) {
@@ -1417,8 +1490,7 @@ static inline void all_files_grow() {
error("INTERNAL ERROR: duplicate indexing of fd during realloc.");
}
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: >> re-indexing done.\n");
+ debug_log(" >> re-indexing done.");
}
// initialize the newly added entries
@@ -1441,8 +1513,7 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h
if(!all_files || all_files_len == all_files_size)
all_files_grow();
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: >> searching for empty slot.\n");
+ debug_log(" >> searching for empty slot.");
// search for an empty slot
@@ -1453,16 +1524,14 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h
if(c == 0) continue;
if(!all_files[c].count) {
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: >> Examining slot %d.\n", c);
+ debug_log(" >> Examining slot %d.", c);
#ifdef NETDATA_INTERNAL_CHECKS
if(all_files[c].magic == 0x0BADCAFE && all_files[c].name && file_descriptor_find(all_files[c].name, all_files[c].hash))
- error("fd on position %d is not cleared properly. It still has %s in it.\n", c, all_files[c].name);
+ error("fd on position %d is not cleared properly. It still has %s in it.", c, all_files[c].name);
#endif /* NETDATA_INTERNAL_CHECKS */
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: >> %s fd position %d for %s (last name: %s)\n", all_files[c].name?"re-using":"using", c, name, all_files[c].name);
+ debug_log(" >> %s fd position %d for %s (last name: %s)", all_files[c].name?"re-using":"using", c, name, all_files[c].name);
freez((void *)all_files[c].name);
all_files[c].name = NULL;
@@ -1479,8 +1548,7 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h
}
// else we have an empty slot in 'c'
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: >> updating slot %d.\n", c);
+ debug_log(" >> updating slot %d.", c);
all_files[c].name = strdupz(name);
all_files[c].hash = hash;
@@ -1493,24 +1561,21 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h
if(unlikely(file_descriptor_add(&all_files[c]) != (void *)&all_files[c]))
error("INTERNAL ERROR: duplicate indexing of fd.");
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: using fd position %d (name: %s)\n", c, all_files[c].name);
+ debug_log("using fd position %d (name: %s)", c, all_files[c].name);
return c;
}
-static inline int file_descriptor_find_or_add(const char *name)
-{
- uint32_t hash = simple_hash(name);
+static inline int file_descriptor_find_or_add(const char *name, uint32_t hash) {
+ if(unlikely(!hash))
+ hash = simple_hash(name);
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: adding or finding name '%s' with hash %u\n", name, hash);
+ debug_log("adding or finding name '%s' with hash %u", name, hash);
struct file_descriptor *fd = file_descriptor_find(name, hash);
if(fd) {
// found
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: >> found on slot %d\n", fd->pos);
+ debug_log(" >> found on slot %d", fd->pos);
fd->count++;
return fd->pos;
@@ -1530,47 +1595,65 @@ static inline int file_descriptor_find_or_add(const char *name)
else if(strcmp(t, "[timerfd]") == 0) type = FILETYPE_TIMERFD;
else if(strcmp(t, "[signalfd]") == 0) type = FILETYPE_SIGNALFD;
else {
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: FIXME: unknown anonymous inode: %s\n", name);
-
+ debug_log("UNKNOWN anonymous inode: %s", name);
type = FILETYPE_OTHER;
}
}
else if(likely(strcmp(name, "inotify") == 0)) type = FILETYPE_INOTIFY;
else {
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: FIXME: cannot understand linkname: %s\n", name);
-
+ debug_log("UNKNOWN linkname: %s", name);
type = FILETYPE_OTHER;
}
return file_descriptor_set_on_empty_slot(name, hash, type);
}
+static inline void clear_pid_fd(struct pid_fd *pfd) {
+ pfd->fd = 0;
+
+ #ifndef __FreeBSD__
+ pfd->link_hash = 0;
+ pfd->inode = 0;
+ pfd->cache_iterations_counter = 0;
+ pfd->cache_iterations_reset = 0;
+#endif
+}
+
static inline void make_all_pid_fds_negative(struct pid_stat *p) {
- int *fd = p->fds, *end = &p->fds[p->fds_size];
- while(fd < end) {
- *fd = -(*fd);
- fd++;
+ struct pid_fd *pfd = p->fds, *pfdend = &p->fds[p->fds_size];
+ while(pfd < pfdend) {
+ pfd->fd = -(pfd->fd);
+ pfd++;
}
}
static inline void cleanup_negative_pid_fds(struct pid_stat *p) {
- int *fd = p->fds, *fdend = &p->fds[p->fds_size];
+ struct pid_fd *pfd = p->fds, *pfdend = &p->fds[p->fds_size];
+
+ while(pfd < pfdend) {
+ int fd = pfd->fd;
- while(fd < fdend) {
- if(unlikely(*fd < 0)) {
- file_descriptor_not_used(-(*fd));
- *fd++ = 0;
+ if(unlikely(fd < 0)) {
+ file_descriptor_not_used(-(fd));
+ clear_pid_fd(pfd);
}
- else
- fd++;
+
+ pfd++;
}
}
-static inline void zero_pid_fds(struct pid_stat *p, int first, int size) {
- int *fd = &p->fds[first], *end = &p->fds[first + size];
- while(fd < end) *fd++ = 0;
+static inline void init_pid_fds(struct pid_stat *p, size_t first, size_t size) {
+ struct pid_fd *pfd = &p->fds[first], *pfdend = &p->fds[first + size];
+ size_t i = first;
+
+ while(pfd < pfdend) {
+#ifndef __FreeBSD__
+ pfd->filename = NULL;
+#endif
+ clear_pid_fd(pfd);
+ pfd++;
+ i++;
+ }
}
static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) {
@@ -1625,17 +1708,16 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) {
if (unlikely(fdid >= p->fds_size)) {
// it is small, extend it
- if (unlikely(debug))
- fprintf(stderr, "apps.plugin: extending fd memory slots for %s from %d to %d\n", p->comm, p->fds_size, fdid + MAX_SPARE_FDS);
+ debug_log("extending fd memory slots for %s from %d to %d", p->comm, p->fds_size, fdid + MAX_SPARE_FDS);
- p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(int));
+ p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(struct pid_fd));
// and initialize it
- zero_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size);
+ init_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size);
p->fds_size = fdid + MAX_SPARE_FDS;
}
- if (unlikely(p->fds[fdid] == 0)) {
+ if (unlikely(p->fds[fdid].fd == 0)) {
// we don't know this fd, get it
switch (fds->kf_type) {
@@ -1691,15 +1773,14 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) {
// if another process already has this, we will get
// the same id
- p->fds[fdid] = file_descriptor_find_or_add(fdsname);
+ p->fds[fdid].fd = file_descriptor_find_or_add(fdsname, 0);
}
// else make it positive again, we need it
- // of course, the actual file may have changed, but we don't care so much
- // FIXME: we could compare the inode as returned by readdir dirent structure
+ // of course, the actual file may have changed
else
- p->fds[fdid] = -p->fds[fdid];
+ p->fds[fdid].fd = -p->fds[fdid].fd;
bfdsbuf += fds->kf_structsize;
}
@@ -1714,7 +1795,6 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) {
if(unlikely(!fds)) return 0;
struct dirent *de;
- char fdname[FILENAME_MAX + 1];
char linkname[FILENAME_MAX + 1];
// we make all pid fds negative, so that
@@ -1733,53 +1813,103 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) {
if(unlikely(fdid < 0)) continue;
// check if the fds array is small
- if(unlikely(fdid >= p->fds_size)) {
+ if(unlikely((size_t)fdid >= p->fds_size)) {
// it is small, extend it
- if(unlikely(debug))
- fprintf(stderr
- , "apps.plugin: extending fd memory slots for %s from %d to %d\n"
- , p->comm
- , p->fds_size
- , fdid + MAX_SPARE_FDS
- );
+ debug_log("extending fd memory slots for %s from %d to %d"
+ , p->comm
+ , p->fds_size
+ , fdid + MAX_SPARE_FDS
+ );
- p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(int));
+ p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(struct pid_fd));
// and initialize it
- zero_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size);
- p->fds_size = fdid + MAX_SPARE_FDS;
+ init_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size);
+ p->fds_size = (size_t)fdid + MAX_SPARE_FDS;
}
- if(unlikely(p->fds[fdid] == 0)) {
- // we don't know this fd, get it
+ if(unlikely(p->fds[fdid].fd < 0 && de->d_ino != p->fds[fdid].inode)) {
+ // inodes do not match, clear the previous entry
+ inodes_changed_counter++;
+ file_descriptor_not_used(-p->fds[fdid].fd);
+ clear_pid_fd(&p->fds[fdid]);
+ }
- sprintf(fdname, "%s/proc/%d/fd/%s", netdata_configured_host_prefix, p->pid, de->d_name);
- ssize_t l = readlink(fdname, linkname, FILENAME_MAX);
- if(unlikely(l == -1)) {
- if(debug || (p->target && p->target->debug)) {
- if(debug || (p->target && p->target->debug))
- error("Cannot read link %s", fdname);
- }
- continue;
+ if(p->fds[fdid].fd < 0 && p->fds[fdid].cache_iterations_counter > 0) {
+ p->fds[fdid].fd = -p->fds[fdid].fd;
+ p->fds[fdid].cache_iterations_counter--;
+ continue;
+ }
+
+ if(unlikely(!p->fds[fdid].filename)) {
+ filenames_allocated_counter++;
+ char fdname[FILENAME_MAX + 1];
+ snprintfz(fdname, FILENAME_MAX, "%s/proc/%d/fd/%s", netdata_configured_host_prefix, p->pid, de->d_name);
+ p->fds[fdid].filename = strdupz(fdname);
+ }
+
+ file_counter++;
+ ssize_t l = readlink(p->fds[fdid].filename, linkname, FILENAME_MAX);
+ if(unlikely(l == -1)) {
+ // cannot read the link
+
+ if(debug_enabled || (p->target && p->target->debug_enabled))
+ error("Cannot read link %s", p->fds[fdid].filename);
+
+ if(unlikely(p->fds[fdid].fd < 0)) {
+ file_descriptor_not_used(-p->fds[fdid].fd);
+ clear_pid_fd(&p->fds[fdid]);
}
- else
- linkname[l] = '\0';
- file_counter++;
+ continue;
+ }
+ else
+ linkname[l] = '\0';
+
+ uint32_t link_hash = simple_hash(linkname);
+
+ if(unlikely(p->fds[fdid].fd < 0 && p->fds[fdid].link_hash != link_hash)) {
+ // the link changed
+ links_changed_counter++;
+ file_descriptor_not_used(-p->fds[fdid].fd);
+ clear_pid_fd(&p->fds[fdid]);
+ }
+
+ if(unlikely(p->fds[fdid].fd == 0)) {
+ // we don't know this fd, get it
// if another process already has this, we will get
// the same id
- p->fds[fdid] = file_descriptor_find_or_add(linkname);
+ p->fds[fdid].fd = file_descriptor_find_or_add(linkname, link_hash);
+ p->fds[fdid].inode = de->d_ino;
+ p->fds[fdid].link_hash = link_hash;
}
-
+ else {
// else make it positive again, we need it
- // of course, the actual file may have changed, but we don't care so much
- // FIXME: we could compare the inode as returned by readdir dirent structure
- // UPDATE: no we cannot use inodes - under /proc inodes don't change when the link is changed
+ p->fds[fdid].fd = -p->fds[fdid].fd;
+ }
- else
- p->fds[fdid] = -p->fds[fdid];
+ // caching control
+ // without this we read all the files on every iteration
+ if(max_fds_cache_seconds > 0) {
+ size_t spread = ((size_t)max_fds_cache_seconds > 10) ? 10 : (size_t)max_fds_cache_seconds;
+
+ // cache it for a few iterations
+ size_t max = ((size_t) max_fds_cache_seconds + (fdid % spread)) / (size_t) update_every;
+ p->fds[fdid].cache_iterations_reset++;
+
+ if(unlikely(p->fds[fdid].cache_iterations_reset % spread == (size_t) fdid % spread))
+ p->fds[fdid].cache_iterations_reset++;
+
+ if(unlikely((fdid <= 2 && p->fds[fdid].cache_iterations_reset > 5) ||
+ p->fds[fdid].cache_iterations_reset > max)) {
+ // for stdin, stdout, stderr (fdid <= 2) we have checked a few times, or if it goes above the max, goto max
+ p->fds[fdid].cache_iterations_reset = max;
+ }
+
+ p->fds[fdid].cache_iterations_counter = p->fds[fdid].cache_iterations_reset;
+ }
}
closedir(fds);
@@ -1791,12 +1921,12 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) {
// ----------------------------------------------------------------------------
-static inline int print_process_and_parents(struct pid_stat *p, usec_t time) {
+static inline int debug_print_process_and_parents(struct pid_stat *p, usec_t time) {
char *prefix = "\\_ ";
int indent = 0;
if(p->parent)
- indent = print_process_and_parents(p->parent, p->stat_collected_usec);
+ indent = debug_print_process_and_parents(p->parent, p->stat_collected_usec);
else
prefix = " > ";
@@ -1830,12 +1960,12 @@ static inline int print_process_and_parents(struct pid_stat *p, usec_t time) {
return indent + 1;
}
-static inline void print_process_tree(struct pid_stat *p, char *msg) {
- fprintf(stderr, "%s: process %s (%d, %s) with parents:\n", msg, p->comm, p->pid, p->updated?"running":"exited");
- print_process_and_parents(p, p->stat_collected_usec);
+static inline void debug_print_process_tree(struct pid_stat *p, char *msg) {
+ debug_log("%s: process %s (%d, %s) with parents:", msg, p->comm, p->pid, p->updated?"running":"exited");
+ debug_print_process_and_parents(p, p->stat_collected_usec);
}
-static inline void find_lost_child_debug(struct pid_stat *pe, kernel_uint_t lost, int type) {
+static inline void debug_find_lost_child(struct pid_stat *pe, kernel_uint_t lost, int type) {
int found = 0;
struct pid_stat *p = NULL;
@@ -1938,8 +2068,8 @@ static inline void process_exited_processes() {
if(utime + stime + gtime + minflt + majflt == 0)
continue;
- if(unlikely(debug)) {
- fprintf(stderr, "Absorb %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")\n"
+ if(unlikely(debug_enabled)) {
+ debug_log("Absorb %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")"
, p->comm
, p->pid
, p->updated?"running":"exited"
@@ -1949,7 +2079,7 @@ static inline void process_exited_processes() {
, minflt
, majflt
);
- print_process_tree(p, "Searching parents");
+ debug_print_process_tree(p, "Searching parents");
}
struct pid_stat *pp;
@@ -1958,59 +2088,57 @@ static inline void process_exited_processes() {
kernel_uint_t absorbed;
absorbed = remove_exited_child_from_parent(&utime, &pp->cutime);
- if(unlikely(debug && absorbed))
- fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " utime (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, utime);
+ if(unlikely(debug_enabled && absorbed))
+ debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " utime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, utime);
absorbed = remove_exited_child_from_parent(&stime, &pp->cstime);
- if(unlikely(debug && absorbed))
- fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " stime (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, stime);
+ if(unlikely(debug_enabled && absorbed))
+ debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " stime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, stime);
absorbed = remove_exited_child_from_parent(&gtime, &pp->cgtime);
- if(unlikely(debug && absorbed))
- fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " gtime (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, gtime);
+ if(unlikely(debug_enabled && absorbed))
+ debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " gtime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, gtime);
absorbed = remove_exited_child_from_parent(&minflt, &pp->cminflt);
- if(unlikely(debug && absorbed))
- fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " minflt (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, minflt);
+ if(unlikely(debug_enabled && absorbed))
+ debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " minflt (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, minflt);
absorbed = remove_exited_child_from_parent(&majflt, &pp->cmajflt);
- if(unlikely(debug && absorbed))
- fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " majflt (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, majflt);
+ if(unlikely(debug_enabled && absorbed))
+ debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " majflt (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, majflt);
}
if(unlikely(utime + stime + gtime + minflt + majflt > 0)) {
- if(unlikely(debug)) {
- if(utime) find_lost_child_debug(p, utime, 3);
- if(stime) find_lost_child_debug(p, stime, 4);
- if(gtime) find_lost_child_debug(p, gtime, 5);
- if(minflt) find_lost_child_debug(p, minflt, 1);
- if(majflt) find_lost_child_debug(p, majflt, 2);
+ if(unlikely(debug_enabled)) {
+ if(utime) debug_find_lost_child(p, utime, 3);
+ if(stime) debug_find_lost_child(p, stime, 4);
+ if(gtime) debug_find_lost_child(p, gtime, 5);
+ if(minflt) debug_find_lost_child(p, minflt, 1);
+ if(majflt) debug_find_lost_child(p, majflt, 2);
}
p->keep = 1;
- if(unlikely(debug))
- fprintf(stderr, " > remaining resources - KEEP - for another loop: %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")\n"
- , p->comm
- , p->pid
- , p->updated?"running":"exited"
- , utime
- , stime
- , gtime
- , minflt
- , majflt
- );
+ debug_log(" > remaining resources - KEEP - for another loop: %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")"
+ , p->comm
+ , p->pid
+ , p->updated?"running":"exited"
+ , utime
+ , stime
+ , gtime
+ , minflt
+ , majflt
+ );
for(pp = p->parent; pp ; pp = pp->parent) {
if(pp->updated) break;
pp->keep = 1;
- if(unlikely(debug))
- fprintf(stderr, " > - KEEP - parent for another loop: %s (%d %s)\n"
- , pp->comm
- , pp->pid
- , pp->updated?"running":"exited"
- );
+ debug_log(" > - KEEP - parent for another loop: %s (%d %s)"
+ , pp->comm
+ , pp->pid
+ , pp->updated?"running":"exited"
+ );
}
p->utime_raw = utime * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL);
@@ -2020,16 +2148,14 @@ static inline void process_exited_processes() {
p->majflt_raw = majflt * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL);
p->cutime_raw = p->cstime_raw = p->cgtime_raw = p->cminflt_raw = p->cmajflt_raw = 0;
- if(unlikely(debug))
- fprintf(stderr, "\n");
+ debug_log(" ");
}
- else if(unlikely(debug)) {
- fprintf(stderr, " > totally absorbed - DONE - %s (%d %s)\n"
+ else
+ debug_log(" > totally absorbed - DONE - %s (%d %s)"
, p->comm
, p->pid
, p->updated?"running":"exited"
- );
- }
+ );
}
}
@@ -2054,8 +2180,8 @@ static inline void link_all_processes_to_their_parents(void) {
p->parent = pp;
pp->children_count++;
- if(unlikely(debug || (p->target && p->target->debug)))
- fprintf(stderr, "apps.plugin: \tchild %d (%s, %s) on target '%s' has parent %d (%s, %s). Parent: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "\n", p->pid, p->comm, p->updated?"running":"exited", (p->target)?p->target->name:"UNSET", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->gtime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cgtime, pp->cminflt, pp->cmajflt);
+ if(unlikely(debug_enabled || (p->target && p->target->debug_enabled)))
+ debug_log_int("child %d (%s, %s) on target '%s' has parent %d (%s, %s). Parent: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "", p->pid, p->comm, p->updated?"running":"exited", (p->target)?p->target->name:"UNSET", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->gtime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cgtime, pp->cminflt, pp->cmajflt);
}
else {
p->parent = NULL;
@@ -2105,7 +2231,7 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) {
if(unlikely(!p || p->read)) return 0;
p->read = 1;
- // fprintf(stderr, "Reading process %d (%s), sortlist %d\n", p->pid, p->comm, p->sortlist);
+ // debug_log("Reading process %d (%s), sortlist %d", p->pid, p->comm, p->sortlist);
// --------------------------------------------------------------------
// /proc/<pid>/stat
@@ -2141,8 +2267,8 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) {
// --------------------------------------------------------------------
// done!
- if(unlikely(debug && include_exited_childs && all_pids_count && p->ppid && all_pids[p->ppid] && !all_pids[p->ppid]->read))
- fprintf(stderr, "Read process %d (%s) sortlisted %d, but its parent %d (%s) sortlisted %d, is not read\n", p->pid, p->comm, p->sortlist, all_pids[p->ppid]->pid, all_pids[p->ppid]->comm, all_pids[p->ppid]->sortlist);
+ if(unlikely(debug_enabled && include_exited_childs && all_pids_count && p->ppid && all_pids[p->ppid] && !all_pids[p->ppid]->read))
+ debug_log("Read process %d (%s) sortlisted %d, but its parent %d (%s) sortlisted %d, is not read", p->pid, p->comm, p->sortlist, all_pids[p->ppid]->pid, all_pids[p->ppid]->comm, all_pids[p->ppid]->sortlist);
// mark it as updated
p->updated = 1;
@@ -2302,18 +2428,18 @@ static int collect_data_for_all_processes(void) {
// check: update_apps_groups_statistics()
static void cleanup_exited_pids(void) {
- int c;
+ size_t c;
struct pid_stat *p = NULL;
for(p = root_of_pids; p ;) {
if(!p->updated && (!p->keep || p->keeploops > 0)) {
- if(unlikely(debug && (p->keep || p->keeploops)))
- fprintf(stderr, " > CLEANUP cannot keep exited process %d (%s) anymore - removing it.\n", p->pid, p->comm);
+ if(unlikely(debug_enabled && (p->keep || p->keeploops)))
+ debug_log(" > CLEANUP cannot keep exited process %d (%s) anymore - removing it.", p->pid, p->comm);
for(c = 0; c < p->fds_size; c++)
- if(p->fds[c] > 0) {
- file_descriptor_not_used(p->fds[c]);
- p->fds[c] = 0;
+ if(p->fds[c].fd > 0) {
+ file_descriptor_not_used(p->fds[c].fd);
+ clear_pid_fd(&p->fds[c]);
}
pid_t r = p->pid;
@@ -2335,7 +2461,7 @@ static void apply_apps_groups_targets_inheritance(void) {
// inherit their target from their parent
int found = 1, loops = 0;
while(found) {
- if(unlikely(debug)) loops++;
+ if(unlikely(debug_enabled)) loops++;
found = 0;
for(p = root_of_pids; p ; p = p->next) {
// if this process does not have a target
@@ -2346,8 +2472,8 @@ static void apply_apps_groups_targets_inheritance(void) {
p->target = p->parent->target;
found++;
- if(debug || (p->target && p->target->debug))
- fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).\n", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm);
+ if(debug_enabled || (p->target && p->target->debug_enabled))
+ debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm);
}
}
}
@@ -2357,7 +2483,7 @@ static void apply_apps_groups_targets_inheritance(void) {
int sortlist = 1;
found = 1;
while(found) {
- if(unlikely(debug)) loops++;
+ if(unlikely(debug_enabled)) loops++;
found = 0;
for(p = root_of_pids; p ; p = p->next) {
@@ -2382,16 +2508,15 @@ static void apply_apps_groups_targets_inheritance(void) {
if(unlikely(p->target && !p->parent->target)) {
p->parent->target = p->target;
- if(debug || (p->target && p->target->debug))
- fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its child %d (%s).\n", p->target->name, p->parent->pid, p->parent->comm, p->pid, p->comm);
+ if(debug_enabled || (p->target && p->target->debug_enabled))
+ debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its child %d (%s).", p->target->name, p->parent->pid, p->parent->comm, p->pid, p->comm);
}
found++;
}
}
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: TARGET INHERITANCE: merged %d processes\n", found);
+ debug_log("TARGET INHERITANCE: merged %d processes", found);
}
// init goes always to default target
@@ -2403,7 +2528,7 @@ static void apply_apps_groups_targets_inheritance(void) {
all_pids[0]->target = apps_groups_default_target;
// give a default target on all top level processes
- if(unlikely(debug)) loops++;
+ if(unlikely(debug_enabled)) loops++;
for(p = root_of_pids; p ; p = p->next) {
// if the process is not merged itself
// then is is a top level process
@@ -2421,21 +2546,20 @@ static void apply_apps_groups_targets_inheritance(void) {
// give a target to all merged child processes
found = 1;
while(found) {
- if(unlikely(debug)) loops++;
+ if(unlikely(debug_enabled)) loops++;
found = 0;
for(p = root_of_pids; p ; p = p->next) {
if(unlikely(!p->target && p->merged && p->parent && p->parent->target)) {
p->target = p->parent->target;
found++;
- if(debug || (p->target && p->target->debug))
- fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s) at phase 2.\n", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm);
+ if(debug_enabled || (p->target && p->target->debug_enabled))
+ debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s) at phase 2.", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm);
}
}
}
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: apply_apps_groups_targets_inheritance() made %d loops on the process tree\n", loops);
+ debug_log("apply_apps_groups_targets_inheritance() made %d loops on the process tree", loops);
}
static size_t zero_all_targets(struct target *root) {
@@ -2570,9 +2694,10 @@ static inline void aggregate_pid_fds_on_targets(struct pid_stat *p) {
reallocate_target_fds(u);
reallocate_target_fds(g);
- int c, size = p->fds_size, *fds = p->fds;
+ size_t c, size = p->fds_size;
+ struct pid_fd *fds = p->fds;
for(c = 0; c < size ;c++) {
- int fd = fds[c];
+ int fd = fds[c].fd;
if(likely(fd <= 0 || fd >= all_files_size))
continue;
@@ -2628,8 +2753,8 @@ static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p,
w->processes++;
w->num_threads += p->num_threads;
- if(unlikely(debug || w->debug))
- fprintf(stderr, "apps.plugin: \taggregating '%s' pid %d on target '%s' utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "\n", p->comm, p->pid, w->name, p->utime, p->stime, p->gtime, p->cutime, p->cstime, p->cgtime, p->minflt, p->majflt, p->cminflt, p->cmajflt);
+ if(unlikely(debug_enabled || w->debug_enabled))
+ debug_log_int("aggregating '%s' pid %d on target '%s' utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "", p->comm, p->pid, w->name, p->utime, p->stime, p->gtime, p->cutime, p->cstime, p->cgtime, p->minflt, p->majflt, p->cminflt, p->cmajflt);
}
static void calculate_netdata_statistics(void) {
@@ -2660,8 +2785,8 @@ static void calculate_netdata_statistics(void) {
if(likely(p->user_target && p->user_target->uid == p->uid))
w = p->user_target;
else {
- if(unlikely(debug && p->user_target))
- fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched user from %u (%s) to %u.\n", p->pid, p->comm, p->user_target->uid, p->user_target->name, p->uid);
+ if(unlikely(debug_enabled && p->user_target))
+ debug_log("pid %d (%s) switched user from %u (%s) to %u.", p->pid, p->comm, p->user_target->uid, p->user_target->name, p->uid);
w = p->user_target = get_users_target(p->uid);
}
@@ -2676,8 +2801,8 @@ static void calculate_netdata_statistics(void) {
if(likely(p->group_target && p->group_target->gid == p->gid))
w = p->group_target;
else {
- if(unlikely(debug && p->group_target))
- fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched group from %u (%s) to %u.\n", p->pid, p->comm, p->group_target->gid, p->group_target->name, p->gid);
+ if(unlikely(debug_enabled && p->group_target))
+ debug_log("pid %d (%s) switched group from %u (%s) to %u.", p->pid, p->comm, p->group_target->gid, p->group_target->name, p->gid);
w = p->group_target = get_groups_target(p->gid);
}
@@ -2698,8 +2823,6 @@ static void calculate_netdata_statistics(void) {
// ----------------------------------------------------------------------------
// update chart dimensions
-int print_calculated_number(char *str, calculated_number value) { (void)str; (void)value; return 0; }
-
static inline void send_BEGIN(const char *type, const char *id, usec_t usec) {
fprintf(stdout, "BEGIN %s.%s %llu\n", type, id, usec);
}
@@ -2751,6 +2874,9 @@ void send_resource_usage_to_netdata(usec_t dt) {
"CHART netdata.apps_sizes '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_sizes line 140001 %1$d\n"
"DIMENSION calls '' incremental 1 1\n"
"DIMENSION files '' incremental 1 1\n"
+ "DIMENSION filenames '' incremental 1 1\n"
+ "DIMENSION inode_changes '' incremental 1 1\n"
+ "DIMENSION link_changes '' incremental 1 1\n"
"DIMENSION pids '' absolute 1 1\n"
"DIMENSION fds '' absolute 1 1\n"
"DIMENSION targets '' absolute 1 1\n"
@@ -2793,6 +2919,9 @@ void send_resource_usage_to_netdata(usec_t dt) {
"BEGIN netdata.apps_sizes %llu\n"
"SET calls = %zu\n"
"SET files = %zu\n"
+ "SET filenames = %zu\n"
+ "SET inode_changes = %zu\n"
+ "SET link_changes = %zu\n"
"SET pids = %zu\n"
"SET fds = %d\n"
"SET targets = %zu\n"
@@ -2804,6 +2933,9 @@ void send_resource_usage_to_netdata(usec_t dt) {
, dt
, calls_counter
, file_counter
+ , filenames_allocated_counter
+ , inodes_changed_counter
+ , links_changed_counter
, all_pids_count
, all_files_len
, apps_groups_targets_count
@@ -2854,7 +2986,7 @@ static void normalize_utilization(struct target *root) {
// here we try to eliminate them by disabling childs processing either for specific dimensions
// or entirely. Of course, either way, we disable it just a single iteration.
- kernel_uint_t max_time = processors * hz * RATES_DETAIL;
+ kernel_uint_t max_time = processors * system_hz * RATES_DETAIL;
kernel_uint_t utime = 0, cutime = 0, stime = 0, cstime = 0, gtime = 0, cgtime = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0;
if(global_utime > max_time) global_utime = max_time;
@@ -2932,7 +3064,7 @@ static void normalize_utilization(struct target *root) {
// if(gtime_fix_ratio < 0.0) gtime_fix_ratio = 0.0;
// if(cgtime_fix_ratio < 0.0) cgtime_fix_ratio = 0.0;
- // FIXME
+ // TODO
// we use cpu time to normalize page faults
// the problem is that to find the proper max values
// for page faults we have to parse /proc/vmstat
@@ -2957,14 +3089,12 @@ static void normalize_utilization(struct target *root) {
// the report
- if(unlikely(debug)) {
- fprintf(stderr,
+ debug_log(
"SYSTEM: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " "
"COLLECTED: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " cu=" KERNEL_UINT_FORMAT " cs=" KERNEL_UINT_FORMAT " cg=" KERNEL_UINT_FORMAT " "
"DELTA: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " "
"FIX: u=%0.2f s=%0.2f g=%0.2f cu=%0.2f cs=%0.2f cg=%0.2f "
"FINALLY: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " cu=" KERNEL_UINT_FORMAT " cs=" KERNEL_UINT_FORMAT " cg=" KERNEL_UINT_FORMAT " "
- "\n"
, global_utime
, global_stime
, global_gtime
@@ -2989,8 +3119,7 @@ static void normalize_utilization(struct target *root) {
, (kernel_uint_t)(cutime * cutime_fix_ratio)
, (kernel_uint_t)(cstime * cstime_fix_ratio)
, (kernel_uint_t)(cgtime * cgtime_fix_ratio)
- );
- }
+ );
}
#else // ALL_PIDS_ARE_READ_INSTANTLY == 1
static void normalize_utilization(struct target *root) {
@@ -3151,7 +3280,8 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type
if (!w->exposed && w->processes) {
newly_added++;
w->exposed = 1;
- if (debug || w->debug) fprintf(stderr, "apps.plugin: %s just added - regenerating charts.\n", w->name);
+ if (debug_enabled || w->debug_enabled)
+ debug_log_int("%s just added - regenerating charts.", w->name);
}
}
@@ -3163,7 +3293,7 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type
fprintf(stdout, "CHART %s.cpu '' '%s CPU Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu stacked 20001 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
for (w = root; w ; w = w->next) {
if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu %s\n", w->name, hz * RATES_DETAIL / 100, w->hidden ? "hidden" : "");
+ fprintf(stdout, "DIMENSION %s '' absolute 1 %llu %s\n", w->name, system_hz * RATES_DETAIL / 100, w->hidden ? "hidden" : "");
}
fprintf(stdout, "CHART %s.mem '' '%s Real Memory (w/o shared)' 'MB' mem %s.mem stacked 20003 %d\n", type, title, type, update_every);
@@ -3193,20 +3323,20 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type
fprintf(stdout, "CHART %s.cpu_user '' '%s CPU User Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_user stacked 20020 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
for (w = root; w ; w = w->next) {
if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, hz * RATES_DETAIL / 100LLU);
+ fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, system_hz * RATES_DETAIL / 100LLU);
}
fprintf(stdout, "CHART %s.cpu_system '' '%s CPU System Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_system stacked 20021 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
for (w = root; w ; w = w->next) {
if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, hz * RATES_DETAIL / 100LLU);
+ fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, system_hz * RATES_DETAIL / 100LLU);
}
if(show_guest_time) {
fprintf(stdout, "CHART %s.cpu_guest '' '%s CPU Guest Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_system stacked 20022 %d\n", type, title, (processors * 100), processors, (processors > 1) ? "s" : "", type, update_every);
for (w = root; w; w = w->next) {
if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, hz * RATES_DETAIL / 100LLU);
+ fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, system_hz * RATES_DETAIL / 100LLU);
}
}
@@ -3315,7 +3445,6 @@ cleanup:
static void parse_args(int argc, char **argv)
{
int i, freq = 0;
- char *name = NULL;
for(i = 1; i < argc; i++) {
if(!freq) {
@@ -3341,10 +3470,26 @@ static void parse_args(int argc, char **argv)
}
if(strcmp("debug", argv[i]) == 0) {
- debug = 1;
- // debug_flags = 0xffffffff;
+#ifdef NETDATA_INTERNAL_CHECKS
+ debug_enabled = 1;
+#else
+ fprintf(stderr, "apps.plugin has been compiled without debugging\n");
+#endif
+ continue;
+ }
+
+#ifndef __FreeBSD__
+ if(strcmp("fds-cache-secs", argv[i]) == 0) {
+ if(argc <= i + 1) {
+ fprintf(stderr, "Parameter 'fds-cache-secs' requires a number as argument.\n");
+ exit(1);
+ }
+ i++;
+ max_fds_cache_seconds = str2i(argv[i]);
+ if(max_fds_cache_seconds < 0) max_fds_cache_seconds = 0;
continue;
}
+#endif
if(strcmp("no-childs", argv[i]) == 0 || strcmp("without-childs", argv[i]) == 0) {
include_exited_childs = 0;
@@ -3415,44 +3560,54 @@ static void parse_args(int argc, char **argv)
" without-files enable / disable reporting files, sockets, pipes\n"
" (default is enabled)\n"
"\n"
- " NAME read apps_NAME.conf instead of\n"
- " apps_groups.conf\n"
- " (default NAME=groups)\n"
+#ifndef __FreeBSD__
+ " fds-cache-secs N cache the files of processed for N seconds\n"
+ " caching is adaptive per file (when a file\n"
+ " is found, it starts at 0 and while the file\n"
+ " remains open, it is incremented up to the\n"
+ " max given)\n"
+ " (default is %d seconds)\n"
"\n"
+#endif
" version or -v or -V print program version and exit\n"
"\n"
, VERSION
+#ifndef __FreeBSD__
+ , max_fds_cache_seconds
+#endif
);
exit(1);
}
- if(!name) {
- name = argv[i];
- continue;
- }
-
error("Cannot understand option %s", argv[i]);
exit(1);
}
if(freq > 0) update_every = freq;
- if(!name) name = "groups";
- if(read_apps_groups_conf(name)) {
- error("Cannot read process groups '%s/apps_%s.conf'. There are no internal defaults. Failing.", config_dir, name);
- exit(1);
+ if(read_apps_groups_conf(user_config_dir, "groups")) {
+ info("Cannot read process groups configuration file '%s/apps_groups.conf'. Will try '%s/apps_groups.conf'", user_config_dir, stock_config_dir);
+
+ if(read_apps_groups_conf(stock_config_dir, "groups")) {
+ error("Cannot read process groups '%s/apps_groups.conf'. There are no internal defaults. Failing.", stock_config_dir);
+ exit(1);
+ }
+ else
+ info("Loaded config file '%s/apps_groups.conf'", stock_config_dir);
}
+ else
+ info("Loaded config file '%s/apps_groups.conf'", user_config_dir);
}
static int am_i_running_as_root() {
uid_t uid = getuid(), euid = geteuid();
if(uid == 0 || euid == 0) {
- if(debug) info("I am running with escalated privileges, uid = %u, euid = %u.", uid, euid);
+ if(debug_enabled) info("I am running with escalated privileges, uid = %u, euid = %u.", uid, euid);
return 1;
}
- if(debug) info("I am not running with escalated privileges, uid = %u, euid = %u.", uid, euid);
+ if(debug_enabled) info("I am not running with escalated privileges, uid = %u, euid = %u.", uid, euid);
return 0;
}
@@ -3463,7 +3618,7 @@ static int check_capabilities() {
error("Cannot get current capabilities.");
return 0;
}
- else if(debug)
+ else if(debug_enabled)
info("Received my capabilities from the system.");
int ret = 1;
@@ -3478,7 +3633,7 @@ static int check_capabilities() {
error("apps.plugin should run with CAP_DAC_READ_SEARCH.");
ret = 0;
}
- else if(debug)
+ else if(debug_enabled)
info("apps.plugin runs with CAP_DAC_READ_SEARCH.");
}
@@ -3492,7 +3647,7 @@ static int check_capabilities() {
error("apps.plugin should run with CAP_SYS_PTRACE.");
ret = 0;
}
- else if(debug)
+ else if(debug_enabled)
info("apps.plugin runs with CAP_SYS_PTRACE.");
}
@@ -3521,19 +3676,25 @@ int main(int argc, char **argv) {
error_log_errors_per_period = 100;
error_log_throttle_period = 3600;
+ // since apps.plugin runs as root, prevent it from opening symbolic links
+ procfile_open_flags = O_RDONLY|O_NOFOLLOW;
+
netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX");
- if(netdata_configured_host_prefix == NULL) {
- // info("NETDATA_HOST_PREFIX is not passed from netdata");
- netdata_configured_host_prefix = "";
+ if(verify_netdata_host_prefix() == -1) exit(1);
+
+ user_config_dir = getenv("NETDATA_USER_CONFIG_DIR");
+ if(user_config_dir == NULL) {
+ // info("NETDATA_CONFIG_DIR is not passed from netdata");
+ user_config_dir = CONFIG_DIR;
}
- // else info("Found NETDATA_HOST_PREFIX='%s'", netdata_configured_host_prefix);
+ // else info("Found NETDATA_USER_CONFIG_DIR='%s'", user_config_dir);
- config_dir = getenv("NETDATA_CONFIG_DIR");
- if(config_dir == NULL) {
+ stock_config_dir = getenv("NETDATA_STOCK_CONFIG_DIR");
+ if(stock_config_dir == NULL) {
// info("NETDATA_CONFIG_DIR is not passed from netdata");
- config_dir = CONFIG_DIR;
+ stock_config_dir = LIBCONFIG_DIR;
}
- // else info("Found NETDATA_CONFIG_DIR='%s'", config_dir);
+ // else info("Found NETDATA_USER_CONFIG_DIR='%s'", user_config_dir);
#ifdef NETDATA_INTERNAL_CHECKS
if(debug_flags != 0) {
@@ -3561,14 +3722,14 @@ int main(int argc, char **argv) {
error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. "
"Without these, apps.plugin cannot report disk I/O utilization of other processes. "
"To enable capabilities run: sudo setcap cap_dac_read_search,cap_sys_ptrace+ep %s; "
- "To enable setuid to root run: sudo chown root %s; sudo chmod 4755 %s; "
+ "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; "
, uid, euid, argv[0], argv[0], argv[0]
);
#else
error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. "
"Without these, apps.plugin cannot report disk I/O utilization of other processes. "
"Your system does not support capabilities. "
- "To enable setuid to root run: sudo chown root %s; sudo chmod 4755 %s; "
+ "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; "
, uid, euid, argv[0], argv[0]
);
#endif
@@ -3630,8 +3791,7 @@ int main(int argc, char **argv) {
show_guest_time_old = show_guest_time;
- if(unlikely(debug))
- fprintf(stderr, "apps.plugin: done Loop No %zu\n", global_iterations_counter);
+ debug_log("done Loop No %zu", global_iterations_counter);
// restart check (14400 seconds)
if(now_monotonic_sec() - started_t > 14400) exit(0);