diff options
author | Lennart Weller <lhw@ring0.de> | 2016-04-21 12:27:29 +0000 |
---|---|---|
committer | Lennart Weller <lhw@ring0.de> | 2016-04-21 12:27:29 +0000 |
commit | 30b94862648cdbf4f537337d2e2f01c369a9dee9 (patch) | |
tree | f0219dd8bd744fc09899bd0266402e05a4bff854 /src/apps_plugin.c | |
parent | Imported Upstream version 1.0.0 (diff) | |
download | netdata-30b94862648cdbf4f537337d2e2f01c369a9dee9.tar.xz netdata-30b94862648cdbf4f537337d2e2f01c369a9dee9.zip |
Imported Upstream version 1.1.0upstream/1.1.0
Diffstat (limited to 'src/apps_plugin.c')
-rw-r--r--[-rwxr-xr-x] | src/apps_plugin.c | 1640 |
1 files changed, 1080 insertions, 560 deletions
diff --git a/src/apps_plugin.c b/src/apps_plugin.c index 153687196..e8a6f43ae 100755..100644 --- a/src/apps_plugin.c +++ b/src/apps_plugin.c @@ -13,6 +13,7 @@ #include <sys/types.h> #include <sys/time.h> #include <sys/wait.h> +#include <sys/stat.h> #include <sys/resource.h> #include <sys/stat.h> @@ -27,13 +28,20 @@ #include <dirent.h> #include <arpa/inet.h> +#include <sys/types.h> +#include <pwd.h> +#include <grp.h> + +#include "avl.h" + #include "common.h" #include "log.h" -#include "avl.h" #include "procfile.h" +#include "../config.h" -#define MAX_COMPARE_NAME 15 +#define MAX_COMPARE_NAME 100 #define MAX_NAME 100 +#define MAX_CMDLINE 1024 unsigned long long Hertz = 1; @@ -43,11 +51,15 @@ int debug = 0; int update_every = 1; unsigned long long file_counter = 0; +int proc_pid_cmdline_is_needed = 0; char *host_prefix = ""; +char *config_dir = CONFIG_DIR; +#ifdef NETDATA_INTERNAL_CHECKS // ---------------------------------------------------------------------------- // memory debugger +// do not use in production systems - it mis-aligns allocated memory struct allocations { size_t allocations; @@ -197,23 +209,26 @@ char *strdup_debug(const char *file, int line, const char *function, const char #endif #define strdup(ptr) strdup_debug(__FILE__, __LINE__, __FUNCTION__, (ptr)) +#endif /* NETDATA_INTERNAL_CHECKS */ + // ---------------------------------------------------------------------------- -// helper functions +// system functions +// to retrieve settings of the system -procfile *ff = NULL; +long get_system_cpus(void) { + procfile *ff = NULL; -long get_processors(void) { int processors = 0; char filename[FILENAME_MAX + 1]; snprintf(filename, FILENAME_MAX, "%s/proc/stat", host_prefix); - ff = procfile_reopen(ff, filename, "", PROCFILE_FLAG_DEFAULT); + ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT); if(!ff) return 1; ff = procfile_readall(ff); if(!ff) { - // procfile_close(ff); + procfile_close(ff); return 1; } @@ -226,32 +241,33 @@ long get_processors(void) { processors--; if(processors < 1) processors = 1; - // procfile_close(ff); + procfile_close(ff); return processors; } -long get_pid_max(void) { +long get_system_pid_max(void) { + procfile *ff = NULL; long mpid = 32768; char filename[FILENAME_MAX + 1]; snprintf(filename, FILENAME_MAX, "%s/proc/sys/kernel/pid_max", host_prefix); - ff = procfile_reopen(ff, filename, "", PROCFILE_FLAG_DEFAULT); + ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT); if(!ff) return mpid; ff = procfile_readall(ff); if(!ff) { - // procfile_close(ff); + procfile_close(ff); return mpid; } mpid = atol(procfile_lineword(ff, 0, 0)); if(!mpid) mpid = 32768; - // procfile_close(ff); + procfile_close(ff); return mpid; } -unsigned long long get_hertz(void) +unsigned long long get_system_hertz(void) { unsigned long long myhz = 1; @@ -263,10 +279,10 @@ unsigned long long get_hertz(void) #ifdef HZ myhz = HZ; /* <asm/param.h> */ -#else +#else /* HZ */ /* If 32-bit or big-endian (not Alpha or ia64), assume HZ is 100. */ hz = (sizeof(long)==sizeof(int) || htons(999)==999) ? 100UL : 1024UL; -#endif +#endif /* HZ */ error("Unknown HZ value. Assuming %llu.", myhz); return myhz; @@ -275,13 +291,21 @@ unsigned long long get_hertz(void) // ---------------------------------------------------------------------------- // target -// target is the point to aggregate a process tree values +// target is the structure that process data are aggregated struct target { char compare[MAX_COMPARE_NAME + 1]; + uint32_t comparehash; + size_t comparelen; + char id[MAX_NAME + 1]; + uint32_t idhash; + char name[MAX_NAME + 1]; + uid_t uid; + gid_t gid; + unsigned long long minflt; unsigned long long cminflt; unsigned long long majflt; @@ -341,143 +365,248 @@ struct target { int exposed; // if set, we have sent this to netdata int hidden; // if set, we set the hidden flag on the dimension int debug; + int ends_with; + int starts_with; // if set, the compare string matches only the + // beginning of the command - struct target *target; // the one that will be reported to netdata + struct target *target; // the one that will be reported to netdata struct target *next; -} *target_root = NULL, *default_target = NULL; +}; -long targets = 0; -// find or create a target -// there are targets that are just agregated to other target (the second argument) -struct target *get_target(const char *id, struct target *target) +// ---------------------------------------------------------------------------- +// apps_groups.conf +// aggregate all processes in groups, to have a limited number of dimensions + +struct target *apps_groups_root_target = NULL; +struct target *apps_groups_default_target = NULL; +long apps_groups_targets = 0; + +struct target *users_root_target = NULL; +struct target *groups_root_target = NULL; + +struct target *get_users_target(uid_t uid) { - const char *nid = id; - if(nid[0] == '-') nid++; + struct target *w; + for(w = users_root_target ; w ; w = w->next) + if(w->uid == uid) return w; + + w = calloc(sizeof(struct target), 1); + if(unlikely(!w)) { + error("Cannot allocate %lu bytes of memory", (unsigned long)sizeof(struct target)); + return NULL; + } + + snprintf(w->compare, MAX_COMPARE_NAME, "%d", uid); + w->comparehash = simple_hash(w->compare); + w->comparelen = strlen(w->compare); + + snprintf(w->id, MAX_NAME, "%d", uid); + w->idhash = simple_hash(w->id); + + struct passwd *pw = getpwuid(uid); + if(!pw) + snprintf(w->name, MAX_NAME, "%d", uid); + else + snprintf(w->name, MAX_NAME, "%s", pw->pw_name); + + netdata_fix_id(w->name); + + w->uid = uid; + + w->next = users_root_target; + users_root_target = w; + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: added uid %d ('%s') target\n", w->uid, w->name); + + return w; +} + +struct target *get_groups_target(gid_t gid) +{ struct target *w; - for(w = target_root ; w ; w = w->next) - if(strncmp(nid, w->id, MAX_NAME) == 0) return w; + for(w = groups_root_target ; w ; w = w->next) + if(w->gid == gid) return w; w = calloc(sizeof(struct target), 1); - if(!w) { + if(unlikely(!w)) { error("Cannot allocate %lu bytes of memory", (unsigned long)sizeof(struct target)); return NULL; } - strncpy(w->id, nid, MAX_NAME); - strncpy(w->name, nid, MAX_NAME); - strncpy(w->compare, nid, MAX_COMPARE_NAME); - if(id[0] == '-') w->hidden = 1; + snprintf(w->compare, MAX_COMPARE_NAME, "%d", gid); + w->comparehash = simple_hash(w->compare); + w->comparelen = strlen(w->compare); - w->target = target; + snprintf(w->id, MAX_NAME, "%d", gid); + w->idhash = simple_hash(w->id); - w->next = target_root; - target_root = w; + struct group *gr = getgrgid(gid); + if(!gr) + snprintf(w->name, MAX_NAME, "%d", gid); + else + snprintf(w->name, MAX_NAME, "%s", gr->gr_name); - if(debug) fprintf(stderr, "apps.plugin: adding hook for process '%s', compare '%s' on target '%s'\n", w->id, w->compare, w->target?w->target->id:""); + netdata_fix_id(w->name); + + w->gid = gid; + + w->next = groups_root_target; + groups_root_target = w; + + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: added gid %d ('%s') target\n", w->gid, w->name); return w; } -// read the process groups file -int read_process_groups(const char *name) +// find or create a new target +// there are targets that are just aggregated to other target (the second argument) +struct target *get_apps_groups_target(const char *id, struct target *target) { - char buffer[4096+1]; - char filename[FILENAME_MAX + 1]; + int tdebug = 0, thidden = 0, ends_with = 0; + const char *nid = id; - snprintf(filename, FILENAME_MAX, "%s/apps_%s.conf", CONFIG_DIR, name); + while(nid[0] == '-' || nid[0] == '+' || nid[0] == '*') { + if(nid[0] == '-') thidden = 1; + if(nid[0] == '+') tdebug = 1; + if(nid[0] == '*') ends_with = 1; + nid++; + } + uint32_t hash = simple_hash(id); - if(debug) fprintf(stderr, "apps.plugin: process groups file: '%s'\n", filename); - FILE *fp = fopen(filename, "r"); - if(!fp) { - error("Cannot open file '%s'", filename); - return 1; + struct target *w; + for(w = apps_groups_root_target ; w ; w = w->next) { + if(w->idhash == hash && strncmp(nid, w->id, MAX_NAME) == 0) + return w; } - long line = 0; - while(fgets(buffer, 4096, fp) != NULL) { - int whidden = 0, wdebug = 0; - line++; + w = calloc(sizeof(struct target), 1); + if(unlikely(!w)) { + error("Cannot allocate %lu bytes of memory", (unsigned long)sizeof(struct target)); + return NULL; + } - // if(debug) fprintf(stderr, "apps.plugin: \tread %s\n", buffer); + strncpy(w->id, nid, MAX_NAME); + w->idhash = simple_hash(w->id); + + strncpy(w->name, nid, MAX_NAME); - char *s = buffer, *t, *p; - s = trim(s); - if(!s || !*s || *s == '#') continue; + strncpy(w->compare, nid, MAX_COMPARE_NAME); + int len = strlen(w->compare); + if(w->compare[len - 1] == '*') { + w->compare[len - 1] = '\0'; + w->starts_with = 1; + } + w->ends_with = ends_with; - if(debug) fprintf(stderr, "apps.plugin: \tread %s\n", s); + if(w->starts_with && w->ends_with) + proc_pid_cmdline_is_needed = 1; - // the target name - t = strsep(&s, ":"); - if(t) t = trim(t); - if(!t || !*t) continue; + w->comparehash = simple_hash(w->compare); + w->comparelen = strlen(w->compare); - while(t[0]) { - int stop = 1; + w->hidden = thidden; + w->debug = tdebug; + w->target = target; - switch(t[0]) { - case '-': - stop = 0; - whidden = 1; - t++; - break; + w->next = apps_groups_root_target; + apps_groups_root_target = w; - case '+': - stop = 0; - wdebug = 1; - t++; - break; - } + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s\n" + , w->id + , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact")) + , w->target?w->target->id:w->id + , (w->hidden)?"hidden":"-" + , (w->debug)?"debug":"-" + ); - if(stop) break; - } + return w; +} - if(debug) fprintf(stderr, "apps.plugin: \t\ttarget %s\n", t); +// read the apps_groups.conf file +int read_apps_groups_conf(const char *name) +{ + char filename[FILENAME_MAX + 1]; - struct target *w = NULL; - long count = 0; - int blen = 0; - char buffer[4097] = ""; - buffer[4096] = '\0'; + snprintf(filename, FILENAME_MAX, "%s/apps_%s.conf", config_dir, name); + + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: process groups file: '%s'\n", filename); - // the process names - while((p = strsep(&s, " "))) { - p = trim(p); - if(!p || !*p) continue; + // ---------------------------------------- - strncpy(&buffer[blen], p, 4096 - blen); - blen = strlen(buffer); + procfile *ff = procfile_open(filename, " :\t", PROCFILE_FLAG_DEFAULT); + if(!ff) return 1; + + procfile_set_quotes(ff, "'\""); + + ff = procfile_readall(ff); + if(!ff) { + procfile_close(ff); + return 1; + } + + unsigned long line, lines = procfile_lines(ff); + + for(line = 0; line < lines ;line++) { + unsigned long word, words = procfile_linewords(ff, line); + struct target *w = NULL; + + char *t = procfile_lineword(ff, line, 0); + if(!t || !*t) continue; - while(buffer[blen - 1] == '\\') { - buffer[blen - 1] = ' '; + for(word = 0; word < words ;word++) { + char *s = procfile_lineword(ff, line, word); + if(!s || !*s) continue; + if(*s == '#') break; - if((p = strsep(&s, " "))) - p = trim(p); + if(t == s) continue; - if(!p || !*p) p = " "; - strncpy(&buffer[blen], p, 4096 - blen); - blen = strlen(buffer); + struct target *n = get_apps_groups_target(s, w); + if(!n) { + error("Cannot create target '%s' (line %d, word %d)", s, line, word); + continue; } - struct target *n = get_target(buffer, w); - n->hidden = whidden; - n->debug = wdebug; if(!w) w = n; + } - buffer[0] = '\0'; - blen = 0; + if(w) { + int tdebug = 0, thidden = 0; - count++; - } + while(t[0] == '-' || t[0] == '+') { + if(t[0] == '-') thidden = 1; + if(t[0] == '+') tdebug = 1; + t++; + } - if(w) strncpy(w->name, t, MAX_NAME); - if(!count) error("The line %ld on file '%s', for group '%s' does not state any process names.", line, filename, t); + strncpy(w->name, t, MAX_NAME); + w->name[MAX_NAME] = '\0'; + w->hidden = thidden; + w->debug = tdebug; + + if(unlikely(debug)) + fprintf(stderr, "apps.plugin: AGGREGATION TARGET NAME '%s' on ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s\n" + , w->name + , w->id + , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact")) + , w->target?w->target->id:w->id + , (w->hidden)?"hidden":"-" + , (w->debug)?"debug":"-" + ); + } } - fclose(fp); - default_target = get_target("+p!o@w#e$i^r&7*5(-i)l-o_", NULL); // match nothing - strncpy(default_target->name, "other", MAX_NAME); + procfile_close(ff); + + apps_groups_default_target = get_apps_groups_target("p+!o@w#e$i^r&7*5(-i)l-o_", NULL); // match nothing + if(!apps_groups_default_target) + error("Cannot create default target"); + else + strncpy(apps_groups_default_target->name, "other", MAX_NAME); return 0; } @@ -490,6 +619,8 @@ int read_process_groups(const char *name) struct pid_stat { int32_t pid; char comm[MAX_COMPARE_NAME + 1]; + char cmdline[MAX_CMDLINE + 1]; + // char state; int32_t ppid; // int32_t pgrp; @@ -533,6 +664,9 @@ struct pid_stat { // uint64_t guest_time; // int64_t cguest_time; + uid_t uid; + gid_t gid; + unsigned long long statm_size; unsigned long long statm_resident; unsigned long long statm_share; @@ -549,7 +683,29 @@ struct pid_stat { unsigned long long io_storage_bytes_written; unsigned long long io_cancelled_write_bytes; -#ifdef INCLUDE_CHILDS + // we need the last values + // for all incremental counters + // so that when a process switches users/groups + // we will subtract these values from the old + // target + unsigned long long last_minflt; + unsigned long long last_cminflt; + unsigned long long last_majflt; + unsigned long long last_cmajflt; + unsigned long long last_utime; + unsigned long long last_stime; + unsigned long long last_cutime; + unsigned long long last_cstime; + + unsigned long long last_io_logical_bytes_read; + unsigned long long last_io_logical_bytes_written; + unsigned long long last_io_read_calls; + unsigned long long last_io_write_calls; + unsigned long long last_io_storage_bytes_read; + unsigned long long last_io_storage_bytes_written; + unsigned long long last_io_cancelled_write_bytes; + +#ifdef AGGREGATE_CHILDREN_TO_PARENTS unsigned long long old_utime; unsigned long long old_stime; unsigned long long old_minflt; @@ -569,19 +725,24 @@ struct pid_stat { unsigned long long diff_cstime; unsigned long long diff_cminflt; unsigned long long diff_cmajflt; -#endif +#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ - int *fds; // array of fds it uses - int fds_size; // the size of the fds array + int *fds; // array of fds it uses + int fds_size; // the size of the fds array - int childs; // number of processes directly referencing this - int updated; // 1 when update - int merged; // 1 when it has been merged to its parent + int children_count; // number of processes directly referencing this + int updated; // 1 when update + int merged; // 1 when it has been merged to its parent int new_entry; - struct target *target; + + struct target *target; // app_groups.conf targets + struct target *user_target; // uid based targets + struct target *group_target; // gid based targets + struct pid_stat *parent; struct pid_stat *prev; struct pid_stat *next; + } *root_of_pids = NULL, **all_pids; long all_pids_count = 0; @@ -633,14 +794,68 @@ void del_pid_entry(pid_t pid) // ---------------------------------------------------------------------------- // update pids from proc +int read_proc_pid_cmdline(struct pid_stat *p) { + char filename[FILENAME_MAX + 1]; + snprintf(filename, FILENAME_MAX, "%s/proc/%d/cmdline", host_prefix, p->pid); + + int fd = open(filename, O_RDONLY, 0666); + if(unlikely(fd == -1)) return 1; + + int i, bytes = read(fd, p->cmdline, MAX_CMDLINE); + close(fd); + + if(bytes <= 0) { + // copy the command to the command line + strncpy(p->cmdline, p->comm, MAX_CMDLINE); + p->cmdline[MAX_CMDLINE] = '\0'; + return 0; + } + + p->cmdline[bytes] = '\0'; + for(i = 0; i < bytes ; i++) + if(!p->cmdline[i]) p->cmdline[i] = ' '; + + if(unlikely(debug)) + fprintf(stderr, "Read file '%s' contents: %s\n", filename, p->cmdline); + + return 0; +} + +int read_proc_pid_ownership(struct pid_stat *p) { + char filename[FILENAME_MAX + 1]; + + snprintf(filename, FILENAME_MAX, "%s/proc/%d", host_prefix, p->pid); + + // ---------------------------------------- + // read uid and gid + + struct stat st; + if(stat(filename, &st) != 0) + return 1; + + p->uid = st.st_uid; + p->gid = st.st_gid; + + return 0; +} + int read_proc_pid_stat(struct pid_stat *p) { + static procfile *ff = NULL; + char filename[FILENAME_MAX + 1]; snprintf(filename, FILENAME_MAX, "%s/proc/%d/stat", host_prefix, p->pid); + // ---------------------------------------- + + int set_quotes = (!ff)?1:0; + ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); if(!ff) return 1; + // if(set_quotes) procfile_set_quotes(ff, "()"); + if(set_quotes) procfile_set_open_close(ff, "(", ")"); + ff = procfile_readall(ff); if(!ff) { // procfile_close(ff); @@ -649,27 +864,10 @@ int read_proc_pid_stat(struct pid_stat *p) { file_counter++; - p->comm[0] = '\0'; - p->comm[MAX_COMPARE_NAME] = '\0'; - size_t blen = 0; - - char *s = procfile_lineword(ff, 0, 1); - if(*s == '(') s++; - size_t len = strlen(s); + // parse the process name unsigned int i = 0; - while(len && s[len - 1] != ')') { - if(blen < MAX_COMPARE_NAME) { - strncpy(&p->comm[blen], s, MAX_COMPARE_NAME - blen); - blen = strlen(p->comm); - } - - i++; - s = procfile_lineword(ff, 0, 1+i); - len = strlen(s); - } - if(len && s[len - 1] == ')') s[len - 1] = '\0'; - if(blen < MAX_COMPARE_NAME) - strncpy(&p->comm[blen], s, MAX_COMPARE_NAME - blen); + strncpy(p->comm, procfile_lineword(ff, 0, 1), MAX_COMPARE_NAME); + p->comm[MAX_COMPARE_NAME] = '\0'; // p->pid = atol(procfile_lineword(ff, 0, 0+i)); // comm is at 1 @@ -716,13 +914,16 @@ int read_proc_pid_stat(struct pid_stat *p) { // p->guest_time = strtoull(procfile_lineword(ff, 0, 42+i), NULL, 10); // p->cguest_time = strtoull(procfile_lineword(ff, 0, 43), NULL, 10); - if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: VALUES: %s utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu, threads=%d\n", p->comm, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); + if(debug || (p->target && p->target->debug)) + fprintf(stderr, "apps.plugin: READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' VALUES: utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu, threads=%d\n", host_prefix, p->pid, p->comm, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); // procfile_close(ff); return 0; } int read_proc_pid_statm(struct pid_stat *p) { + static procfile *ff = NULL; + char filename[FILENAME_MAX + 1]; snprintf(filename, FILENAME_MAX, "%s/proc/%d/statm", host_prefix, p->pid); @@ -751,6 +952,8 @@ int read_proc_pid_statm(struct pid_stat *p) { } int read_proc_pid_io(struct pid_stat *p) { + static procfile *ff = NULL; + char filename[FILENAME_MAX + 1]; snprintf(filename, FILENAME_MAX, "%s/proc/%d/io", host_prefix, p->pid); @@ -780,56 +983,17 @@ int read_proc_pid_io(struct pid_stat *p) { // ---------------------------------------------------------------------------- - -#ifdef INCLUDE_CHILDS -// print a tree view of all processes -int walk_down(pid_t pid, int level) { - struct pid_stat *p = NULL; - char b[level+3]; - int i, ret = 0; - - for(i = 0; i < level; i++) b[i] = '\t'; - b[level] = '|'; - b[level+1] = '-'; - b[level+2] = '\0'; - - for(p = root_of_pids; p ; p = p->next) { - if(p->ppid == pid) { - ret += walk_down(p->pid, level+1); - } - } - - p = all_pids[pid]; - if(p) { - if(!p->updated) ret += 1; - if(ret) fprintf(stderr, "%s %s %d [%s, %s] c=%d u=%llu+%llu, s=%llu+%llu, cu=%llu+%llu, cs=%llu+%llu, n=%llu+%llu, j=%llu+%llu, cn=%llu+%llu, cj=%llu+%llu\n" - , b, p->comm, p->pid, p->updated?"OK":"KILLED", p->target->name, p->childs - , p->utime, p->utime - p->old_utime - , p->stime, p->stime - p->old_stime - , p->cutime, p->cutime - p->old_cutime - , p->cstime, p->cstime - p->old_cstime - , p->minflt, p->minflt - p->old_minflt - , p->majflt, p->majflt - p->old_majflt - , p->cminflt, p->cminflt - p->old_cminflt - , p->cmajflt, p->cmajflt - p->old_cmajflt - ); - } - - return ret; -} -#endif - - -// ---------------------------------------------------------------------------- // file descriptor // this is used to keep a global list of all open files of the system -// it is needed in order to figure out the unique files a process tree has open +// it is needed in order to calculate the unique files processes have open #define FILE_DESCRIPTORS_INCREASE_STEP 100 struct file_descriptor { avl avl; +#ifdef NETDATA_INTERNAL_CHECKS uint32_t magic; +#endif /* NETDATA_INTERNAL_CHECKS */ uint32_t hash; const char *name; int type; @@ -841,25 +1005,33 @@ int all_files_len = 0; int all_files_size = 0; int file_descriptor_compare(void* a, void* b) { +#ifdef NETDATA_INTERNAL_CHECKS if(((struct file_descriptor *)a)->magic != 0x0BADCAFE || ((struct file_descriptor *)b)->magic != 0x0BADCAFE) error("Corrupted index data detected. Please report this."); +#endif /* NETDATA_INTERNAL_CHECKS */ if(((struct file_descriptor *)a)->hash < ((struct file_descriptor *)b)->hash) return -1; + else if(((struct file_descriptor *)a)->hash > ((struct file_descriptor *)b)->hash) return 1; - else return strcmp(((struct file_descriptor *)a)->name, ((struct file_descriptor *)b)->name); + + else + return strcmp(((struct file_descriptor *)a)->name, ((struct file_descriptor *)b)->name); } + int file_descriptor_iterator(avl *a) { if(a) {}; return 0; } avl_tree all_files_index = { NULL, file_descriptor_compare, +#ifndef AVL_WITHOUT_PTHREADS #ifdef AVL_LOCK_WITH_MUTEX PTHREAD_MUTEX_INITIALIZER #else PTHREAD_RWLOCK_INITIALIZER #endif +#endif /* AVL_WITHOUT_PTHREADS */ }; static struct file_descriptor *file_descriptor_find(const char *name, uint32_t hash) { @@ -868,7 +1040,9 @@ static struct file_descriptor *file_descriptor_find(const char *name, uint32_t h tmp.name = name; tmp.count = 0; tmp.pos = 0; +#ifdef NETDATA_INTERNAL_CHECKS tmp.magic = 0x0BADCAFE; +#endif /* NETDATA_INTERNAL_CHECKS */ avl_search(&all_files_index, (avl *)&tmp, file_descriptor_iterator, (avl **)&result); return result; @@ -890,10 +1064,13 @@ static struct file_descriptor *file_descriptor_find(const char *name, uint32_t h void file_descriptor_not_used(int id) { if(id > 0 && id < all_files_size) { + +#ifdef NETDATA_INTERNAL_CHECKS if(all_files[id].magic != 0x0BADCAFE) { error("Ignoring request to remove empty file id %d.", id); return; } +#endif /* NETDATA_INTERNAL_CHECKS */ if(debug) fprintf(stderr, "apps.plugin: decreasing slot %d (count = %d).\n", id, all_files[id].count); @@ -903,7 +1080,9 @@ void file_descriptor_not_used(int id) if(!all_files[id].count) { if(debug) fprintf(stderr, "apps.plugin: >> slot %d is empty.\n", id); file_descriptor_remove(&all_files[id]); +#ifdef NETDATA_INTERNAL_CHECKS all_files[id].magic = 0x00000000; +#endif /* NETDATA_INTERNAL_CHECKS */ all_files_len--; } } @@ -953,7 +1132,9 @@ int file_descriptor_find_or_add(const char *name) for(i = all_files_size; i < (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); i++) { all_files[i].count = 0; all_files[i].name = NULL; +#ifdef NETDATA_INTERNAL_CHECKS all_files[i].magic = 0x00000000; +#endif /* NETDATA_INTERNAL_CHECKS */ all_files[i].pos = i; } @@ -972,8 +1153,10 @@ int file_descriptor_find_or_add(const char *name) if(!all_files[c].count) { if(debug) fprintf(stderr, "apps.plugin: >> Examining slot %d.\n", c); +#ifdef NETDATA_INTERNAL_CHECKS if(all_files[c].magic == 0x0BADCAFE && all_files[c].name && file_descriptor_find(all_files[c].name, all_files[c].hash)) error("fd on position %d is not cleared properly. It still has %s in it.\n", c, all_files[c].name); +#endif /* NETDATA_INTERNAL_CHECKS */ if(debug) fprintf(stderr, "apps.plugin: >> %s fd position %d for %s (last name: %s)\n", all_files[c].name?"re-using":"using", c, name, all_files[c].name); if(all_files[c].name) free((void *)all_files[c].name); @@ -1015,8 +1198,9 @@ int file_descriptor_find_or_add(const char *name) all_files[c].type = type; all_files[c].pos = c; all_files[c].count = 1; +#ifdef NETDATA_INTERNAL_CHECKS all_files[c].magic = 0x0BADCAFE; - +#endif /* NETDATA_INTERNAL_CHECKS */ file_descriptor_add(&all_files[c]); if(debug) fprintf(stderr, "apps.plugin: using fd position %d (name: %s)\n", c, all_files[c].name); @@ -1024,6 +1208,81 @@ int file_descriptor_find_or_add(const char *name) return c; } +int read_pid_file_descriptors(struct pid_stat *p) { + char dirname[FILENAME_MAX+1]; + + snprintf(dirname, FILENAME_MAX, "%s/proc/%d/fd", host_prefix, p->pid); + DIR *fds = opendir(dirname); + if(fds) { + int c; + struct dirent *de; + char fdname[FILENAME_MAX + 1]; + char linkname[FILENAME_MAX + 1]; + + // make the array negative + for(c = 0 ; c < p->fds_size ; c++) + p->fds[c] = -p->fds[c]; + + while((de = readdir(fds))) { + if(strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) + continue; + + // check if the fds array is small + int fdid = atoi(de->d_name); + if(fdid < 0) continue; + if(fdid >= p->fds_size) { + // it is small, extend it + if(debug) fprintf(stderr, "apps.plugin: extending fd memory slots for %s from %d to %d\n", p->comm, p->fds_size, fdid + 100); + p->fds = realloc(p->fds, (fdid + 100) * sizeof(int)); + if(!p->fds) { + error("Cannot re-allocate fds for %s", p->comm); + break; + } + + // and initialize it + for(c = p->fds_size ; c < (fdid + 100) ; c++) p->fds[c] = 0; + p->fds_size = fdid + 100; + } + + if(p->fds[fdid] == 0) { + // we don't know this fd, get it + + sprintf(fdname, "%s/proc/%d/fd/%s", host_prefix, p->pid, de->d_name); + ssize_t l = readlink(fdname, linkname, FILENAME_MAX); + if(l == -1) { + if(debug || (p->target && p->target->debug)) { + if(debug || (p->target && p->target->debug)) + error("Cannot read link %s", fdname); + } + continue; + } + linkname[l] = '\0'; + file_counter++; + + // if another process already has this, we will get + // the same id + p->fds[fdid] = file_descriptor_find_or_add(linkname); + } + + // else make it positive again, we need it + // of course, the actual file may have changed, but we don't care so much + // FIXME: we could compare the inode as returned by readdir direct structure + else p->fds[fdid] = -p->fds[fdid]; + } + closedir(fds); + + // remove all the negative file descriptors + for(c = 0 ; c < p->fds_size ; c++) if(p->fds[c] < 0) { + file_descriptor_not_used(-p->fds[c]); + p->fds[c] = 0; + } + } + else return 1; + + return 0; +} + +// ---------------------------------------------------------------------------- // 1. read all files in /proc // 2. for each numeric directory: @@ -1041,11 +1300,8 @@ int file_descriptor_find_or_add(const char *name) // to avoid filling up all disk space // if debug is enabled, all errors are printed -int update_from_proc(void) +int collect_data_for_all_processes_from_proc(void) { - static long count_errors = 0; - - char filename[FILENAME_MAX+1]; char dirname[FILENAME_MAX + 1]; snprintf(dirname, FILENAME_MAX, "%s/proc", host_prefix); @@ -1061,38 +1317,73 @@ int update_from_proc(void) all_pids_count++; p->parent = NULL; p->updated = 0; - p->childs = 0; + p->children_count = 0; p->merged = 0; p->new_entry = 0; + + p->last_minflt = p->minflt; + p->last_cminflt = p->cminflt; + p->last_majflt = p->majflt; + p->last_cmajflt = p->cmajflt; + p->last_utime = p->utime; + p->last_stime = p->stime; + p->last_cutime = p->cutime; + p->last_cstime = p->cstime; + + p->last_io_logical_bytes_read = p->io_logical_bytes_read; + p->last_io_logical_bytes_written = p->io_logical_bytes_written; + p->last_io_read_calls = p->io_read_calls; + p->last_io_write_calls = p->io_write_calls; + p->last_io_storage_bytes_read = p->io_storage_bytes_read; + p->last_io_storage_bytes_written = p->io_storage_bytes_written; + p->last_io_cancelled_write_bytes = p->io_cancelled_write_bytes; } while((file = readdir(dir))) { char *endptr = file->d_name; pid_t pid = (pid_t) strtoul(file->d_name, &endptr, 10); - if(pid <= 0 || pid > pid_max || endptr == file->d_name || *endptr != '\0') continue; + + // make sure we read a valid number + if(unlikely(pid <= 0 || pid > pid_max || endptr == file->d_name || *endptr != '\0')) + continue; p = get_pid_entry(pid); - if(!p) continue; + if(unlikely(!p)) continue; + // -------------------------------------------------------------------- // /proc/<pid>/stat - if(read_proc_pid_stat(p)) { - if(!count_errors++ || debug || (p->target && p->target->debug)) + if(unlikely(read_proc_pid_stat(p))) { error("Cannot process %s/proc/%d/stat", host_prefix, pid); + // there is no reason to proceed if we cannot get its status continue; } - if(p->ppid < 0 || p->ppid > pid_max) p->ppid = 0; + // check its parent pid + if(unlikely(p->ppid < 0 || p->ppid > pid_max)) { + error("Pid %d states invalid parent pid %d. Using 0.", pid, p->ppid); + + p->ppid = 0; + } + + // -------------------------------------------------------------------- + // /proc/<pid>/cmdline + + if(proc_pid_cmdline_is_needed) { + if(unlikely(read_proc_pid_cmdline(p))) { + error("Cannot process %s/proc/%d/cmdline", host_prefix, pid); + } + } // -------------------------------------------------------------------- // /proc/<pid>/statm - if(read_proc_pid_statm(p)) { - if(!count_errors++ || debug || (p->target && p->target->debug)) + if(unlikely(read_proc_pid_statm(p))) { error("Cannot process %s/proc/%d/statm", host_prefix, pid); + // there is no reason to proceed if we cannot get its memory status continue; } @@ -1100,11 +1391,19 @@ int update_from_proc(void) // -------------------------------------------------------------------- // /proc/<pid>/io - if(read_proc_pid_io(p)) { - if(!count_errors++ || debug || (p->target && p->target->debug)) + if(unlikely(read_proc_pid_io(p))) { error("Cannot process %s/proc/%d/io", host_prefix, pid); - continue; + // on systems without /proc/X/io + // allow proceeding without I/O information + // continue; + } + + // -------------------------------------------------------------------- + // <pid> ownership + + if(unlikely(read_proc_pid_ownership(p))) { + error("Cannot stat %s/proc/%d", host_prefix, pid); } // -------------------------------------------------------------------- @@ -1112,18 +1411,30 @@ int update_from_proc(void) // check if it is target // we do this only once, the first time this pid is loaded - if(p->new_entry) { + if(unlikely(p->new_entry)) { if(debug) fprintf(stderr, "apps.plugin: \tJust added %s\n", p->comm); + uint32_t hash = simple_hash(p->comm); + size_t pclen = strlen(p->comm); struct target *w; - for(w = target_root; w ; w = w->next) { + for(w = apps_groups_root_target; w ; w = w->next) { // if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \t\tcomparing '%s' with '%s'\n", w->compare, p->comm); - if(strcmp(w->compare, p->comm) == 0) { + // find it - 4 cases: + // 1. the target is not a pattern + // 2. the target has the prefix + // 3. the target has the suffix + // 4. the target is something inside cmdline + if( (!w->starts_with && !w->ends_with && w->comparehash == hash && !strcmp(w->compare, p->comm)) + || (w->starts_with && !w->ends_with && !strncmp(w->compare, p->comm, w->comparelen)) + || (!w->starts_with && w->ends_with && pclen >= w->comparelen && !strcmp(w->compare, &p->comm[pclen - w->comparelen])) + || (proc_pid_cmdline_is_needed && w->starts_with && w->ends_with && strstr(p->cmdline, w->compare)) + ) { if(w->target) p->target = w->target; else p->target = w; - if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \t\t%s linked to target %s\n", p->comm, p->target->name); + if(debug || (p->target && p->target->debug)) + fprintf(stderr, "apps.plugin: \t\t%s linked to target %s\n", p->comm, p->target->name); } } } @@ -1131,69 +1442,8 @@ int update_from_proc(void) // -------------------------------------------------------------------- // /proc/<pid>/fd - snprintf(filename, FILENAME_MAX, "%s/proc/%s/fd", host_prefix, file->d_name); - DIR *fds = opendir(filename); - if(fds) { - int c; - struct dirent *de; - char fdname[FILENAME_MAX + 1]; - char linkname[FILENAME_MAX + 1]; - - // make the array negative - for(c = 0 ; c < p->fds_size ; c++) p->fds[c] = -p->fds[c]; - - while((de = readdir(fds))) { - if(strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue; - - // check if the fds array is small - int fdid = atoi(de->d_name); - if(fdid < 0) continue; - if(fdid >= p->fds_size) { - // it is small, extend it - if(debug) fprintf(stderr, "apps.plugin: extending fd memory slots for %s from %d to %d\n", p->comm, p->fds_size, fdid + 100); - p->fds = realloc(p->fds, (fdid + 100) * sizeof(int)); - if(!p->fds) { - error("Cannot re-allocate fds for %s", p->comm); - break; - } - - // and initialize it - for(c = p->fds_size ; c < (fdid + 100) ; c++) p->fds[c] = 0; - p->fds_size = fdid + 100; - } - - if(p->fds[fdid] == 0) { - // we don't know this fd, get it - - sprintf(fdname, "%s/proc/%s/fd/%s", host_prefix, file->d_name, de->d_name); - ssize_t l = readlink(fdname, linkname, FILENAME_MAX); - if(l == -1) { - if(debug || (p->target && p->target->debug)) { - if(!count_errors++ || debug || (p->target && p->target->debug)) - error("Cannot read link %s", fdname); - } - continue; - } - linkname[l] = '\0'; - file_counter++; - - // if another process already has this, we will get - // the same id - p->fds[fdid] = file_descriptor_find_or_add(linkname); - } - - // else make it positive again, we need it - // of course, the actual file may have changed, but we don't care so much - // FIXME: we could compare the inode as returned by readdir direct structure - else p->fds[fdid] = -p->fds[fdid]; - } - closedir(fds); - - // remove all the negative file descriptors - for(c = 0 ; c < p->fds_size ; c++) if(p->fds[c] < 0) { - file_descriptor_not_used(-p->fds[c]); - p->fds[c] = 0; - } + if(unlikely(read_pid_file_descriptors(p))) { + error("Cannot process entries in %s/proc/%d/fd", host_prefix, pid); } // -------------------------------------------------------------------- @@ -1202,10 +1452,6 @@ int update_from_proc(void) // mark it as updated p->updated = 1; } - if(count_errors > 1000) { - error("%ld more errors encountered\n", count_errors - 1); - count_errors = 0; - } closedir(dir); @@ -1214,6 +1460,48 @@ int update_from_proc(void) // ---------------------------------------------------------------------------- + +#ifdef AGGREGATE_CHILDREN_TO_PARENTS +// print a tree view of all processes +int debug_childrens_aggregations(pid_t pid, int level) { + struct pid_stat *p = NULL; + char b[level+3]; + int i, ret = 0; + + for(i = 0; i < level; i++) b[i] = '\t'; + b[level] = '|'; + b[level+1] = '-'; + b[level+2] = '\0'; + + for(p = root_of_pids; p ; p = p->next) { + if(p->ppid == pid) { + ret += debug_childrens_aggregations(p->pid, level+1); + } + } + + p = all_pids[pid]; + if(p) { + if(!p->updated) ret += 1; + if(ret) fprintf(stderr, "%s %s %d [%s, %s] c=%d u=%llu+%llu, s=%llu+%llu, cu=%llu+%llu, cs=%llu+%llu, n=%llu+%llu, j=%llu+%llu, cn=%llu+%llu, cj=%llu+%llu\n" + , b, p->comm, p->pid, p->updated?"OK":"KILLED", p->target->name, p->children_count + , p->utime, p->utime - p->old_utime + , p->stime, p->stime - p->old_stime + , p->cutime, p->cutime - p->old_cutime + , p->cstime, p->cstime - p->old_cstime + , p->minflt, p->minflt - p->old_minflt + , p->majflt, p->majflt - p->old_majflt + , p->cminflt, p->cminflt - p->old_cminflt + , p->cmajflt, p->cmajflt - p->old_cmajflt + ); + } + + return ret; +} +#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ + + + +// ---------------------------------------------------------------------------- // update statistics on the targets // 1. link all childs to their parents @@ -1223,87 +1511,41 @@ int update_from_proc(void) // 4. go from top to bottom, linking all childs without a target, to their parent target // after this step, all processes have a target // [5. for each killed pid (updated = 0), remove its usage from its target] -// 6. zero all targets -// 7. concentrate all values on the targets +// 6. zero all apps_groups_targets +// 7. concentrate all values on the apps_groups_targets // 8. remove all killed processes // 9. find the unique file count for each target +// check: update_apps_groups_statistics() -void update_statistics(void) -{ - int c; +void link_all_processes_to_their_parents(void) { struct pid_stat *p = NULL; - // link all parents and update childs count + // link all children to their parents + // and update children count on parents for(p = root_of_pids; p ; p = p->next) { - if(p->ppid > 0 && p->ppid <= pid_max && all_pids[p->ppid]) { - if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \tparent of %d %s is %d %s\n", p->pid, p->comm, p->ppid, all_pids[p->ppid]->comm); + // for each process found running - p->parent = all_pids[p->ppid]; - p->parent->childs++; - } - else if(p->ppid != 0) error("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid); - } + if(p->ppid > 0 + && p->ppid <= pid_max + && all_pids[p->ppid] + ) { + // for valid processes - // find all the procs with 0 childs and merge them to their parents - // repeat, until nothing more can be done. - int found = 1; - while(found) { - found = 0; - for(p = root_of_pids; p ; p = p->next) { - // if this process does not have any childs, and - // is not already merged, and - // its parent has childs waiting to be merged, and - // the target of this process and its parent is the same, or the parent does not have a target, or this process does not have a parent - // and its parent is not init - // then... merge them! - if(!p->childs && !p->merged && p->parent && p->parent->childs && (p->target == p->parent->target || !p->parent->target || !p->target) && p->ppid != 1) { - p->parent->childs--; - p->merged = 1; - - // the parent inherits the child's target, if it does not have a target itself - if(p->target && !p->parent->target) { - p->parent->target = p->target; - if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \t\ttarget %s is inherited by %d %s from its child %d %s.\n", p->target->name, p->parent->pid, p->parent->comm, p->pid, p->comm); - } + if(debug || (p->target && p->target->debug)) + fprintf(stderr, "apps.plugin: \tparent of %d (%s) is %d (%s)\n", p->pid, p->comm, p->ppid, all_pids[p->ppid]->comm); - found++; - } + p->parent = all_pids[p->ppid]; + p->parent->children_count++; } - if(debug) fprintf(stderr, "apps.plugin: merged %d processes\n", found); - } - - // give a default target on all top level processes - // init goes always to default target - if(all_pids[1]) all_pids[1]->target = default_target; - - for(p = root_of_pids; p ; p = p->next) { - // if the process is not merged itself - // then is is a top level process - if(!p->merged && !p->target) p->target = default_target; - -#ifdef INCLUDE_CHILDS - // by the way, update the diffs - // will be used later for substracting killed process times - p->diff_cutime = p->utime - p->cutime; - p->diff_cstime = p->stime - p->cstime; - p->diff_cminflt = p->minflt - p->cminflt; - p->diff_cmajflt = p->majflt - p->cmajflt; -#endif + else if(p->ppid != 0) + error("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid); } +} - // give a target to all merged child processes - found = 1; - while(found) { - found = 0; - for(p = root_of_pids; p ; p = p->next) { - if(!p->target && p->merged && p->parent && p->parent->target) { - p->target = p->parent->target; - found++; - } - } - } +#ifdef AGGREGATE_CHILDREN_TO_PARENTS +void aggregate_children_to_parents(void) { + struct pid_stat *p = NULL; -#ifdef INCLUDE_CHILDS // for each killed process, remove its values from the parents // sums (we had already added them in a previous loop) for(p = root_of_pids; p ; p = p->next) { @@ -1356,17 +1598,140 @@ void update_statistics(void) if(diff_minflt) error("Cannot fix up minflt %llu", diff_minflt); if(diff_majflt) error("Cannot fix up majflt %llu", diff_majflt); } -#endif +} +#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ + +void cleanup_non_existing_pids(void) { + int c; + struct pid_stat *p = NULL; + + for(p = root_of_pids; p ;) { + if(!p->updated) { +// fprintf(stderr, "\tEXITED %d %s [parent %d %s, target %s] utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu\n", p->pid, p->comm, p->parent->pid, p->parent->comm, p->target->name, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt); + + for(c = 0 ; c < p->fds_size ; c++) if(p->fds[c] > 0) { + file_descriptor_not_used(p->fds[c]); + p->fds[c] = 0; + } + + pid_t r = p->pid; + p = p->next; + del_pid_entry(r); + } + else p = p->next; + } +} + +void apply_apps_groups_targets_inheritance(void) { + struct pid_stat *p = NULL; + + // children that do not have a target + // inherit their target from their parent + int found = 1; + while(found) { + found = 0; + for(p = root_of_pids; p ; p = p->next) { + // if this process does not have a target + // and it has a parent + // and its parent has a target + // then, set the parent's target to this process + if(unlikely(!p->target && p->parent && p->parent->target)) { + p->target = p->parent->target; + found++; + + if(debug || (p->target && p->target->debug)) + fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).\n", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); + } + } + } + + + // find all the procs with 0 childs and merge them to their parents + // repeat, until nothing more can be done. + found = 1; + while(found) { + found = 0; + for(p = root_of_pids; p ; p = p->next) { + // if this process does not have any children + // and is not already merged + // and has a parent + // and its parent has children + // and the target of this process and its parent is the same, or the parent does not have a target + // and its parent is not init + // then, mark them as merged. + if(unlikely( + !p->children_count + && !p->merged + && p->parent + && p->parent->children_count + && (p->target == p->parent->target || !p->parent->target) + && p->ppid != 1 + )) { + p->parent->children_count--; + p->merged = 1; - // zero all the targets - targets = 0; + // the parent inherits the child's target, if it does not have a target itself + if(unlikely(p->target && !p->parent->target)) { + p->parent->target = p->target; + + if(debug || (p->target && p->target->debug)) + fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its child %d (%s).\n", p->target->name, p->parent->pid, p->parent->comm, p->pid, p->comm); + } + + found++; + } + } + + if(debug) + fprintf(stderr, "apps.plugin: merged %d processes\n", found); + } + + // init goes always to default target + if(all_pids[1]) + all_pids[1]->target = apps_groups_default_target; + + // give a default target on all top level processes + for(p = root_of_pids; p ; p = p->next) { + // if the process is not merged itself + // then is is a top level process + if(!p->merged && !p->target) + p->target = apps_groups_default_target; + +#ifdef AGGREGATE_CHILDREN_TO_PARENTS + // by the way, update the diffs + // will be used later for subtracting killed process times + p->diff_cutime = p->utime - p->cutime; + p->diff_cstime = p->stime - p->cstime; + p->diff_cminflt = p->minflt - p->cminflt; + p->diff_cmajflt = p->majflt - p->cmajflt; +#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ + } + + // give a target to all merged child processes + found = 1; + while(found) { + found = 0; + for(p = root_of_pids; p ; p = p->next) { + if(unlikely(!p->target && p->merged && p->parent && p->parent->target)) { + p->target = p->parent->target; + found++; + + if(debug || (p->target && p->target->debug)) + fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s) at phase 2.\n", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); + } + } + } +} + +long zero_all_targets(struct target *root) { struct target *w; - for (w = target_root; w ; w = w->next) { - targets++; + long count = 0; - w->fds = calloc(sizeof(int), (size_t) all_files_size); - if(!w->fds) - error("Cannot allocate memory for fds in %s", w->name); + for (w = root; w ; w = w->next) { + count++; + + if(w->fds) free(w->fds); + w->fds = NULL; w->minflt = 0; w->majflt = 0; @@ -1397,131 +1762,175 @@ void update_statistics(void) w->io_cancelled_write_bytes = 0; } -#ifdef INCLUDE_CHILDS - if(debug) walk_down(0, 1); -#endif + return count; +} - // concentrate everything on the targets - for(p = root_of_pids; p ; p = p->next) { - if(!p->target) { - error("pid %d %s was left without a target!", p->pid, p->comm); - continue; - } +void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target *o) { + if(unlikely(!w->fds)) { + w->fds = calloc(sizeof(int), (size_t) all_files_size); + if(unlikely(!w->fds)) + error("Cannot allocate memory for fds in %s", w->name); + } - if(p->updated) { - p->target->cutime += p->cutime; // - p->fix_cutime; - p->target->cstime += p->cstime; // - p->fix_cstime; - p->target->cminflt += p->cminflt; // - p->fix_cminflt; - p->target->cmajflt += p->cmajflt; // - p->fix_cmajflt; + if(likely(p->updated)) { + w->cutime += p->cutime; // - p->fix_cutime; + w->cstime += p->cstime; // - p->fix_cstime; + w->cminflt += p->cminflt; // - p->fix_cminflt; + w->cmajflt += p->cmajflt; // - p->fix_cmajflt; - p->target->utime += p->utime; //+ (p->pid != 1)?(p->cutime - p->fix_cutime):0; - p->target->stime += p->stime; //+ (p->pid != 1)?(p->cstime - p->fix_cstime):0; - p->target->minflt += p->minflt; //+ (p->pid != 1)?(p->cminflt - p->fix_cminflt):0; - p->target->majflt += p->majflt; //+ (p->pid != 1)?(p->cmajflt - p->fix_cmajflt):0; + w->utime += p->utime; //+ (p->pid != 1)?(p->cutime - p->fix_cutime):0; + w->stime += p->stime; //+ (p->pid != 1)?(p->cstime - p->fix_cstime):0; + w->minflt += p->minflt; //+ (p->pid != 1)?(p->cminflt - p->fix_cminflt):0; + w->majflt += p->majflt; //+ (p->pid != 1)?(p->cmajflt - p->fix_cmajflt):0; - //if(p->num_threads < 0) - // error("Negative threads number for pid '%s' (%d): %d", p->comm, p->pid, p->num_threads); + //if(p->num_threads < 0) + // error("Negative threads number for pid '%s' (%d): %d", p->comm, p->pid, p->num_threads); - //if(p->num_threads > 10000) - // error("Excessive threads number for pid '%s' (%d): %d", p->comm, p->pid, p->num_threads); + //if(p->num_threads > 10000) + // error("Excessive threads number for pid '%s' (%d): %d", p->comm, p->pid, p->num_threads); - p->target->num_threads += p->num_threads; - p->target->rss += p->rss; + w->num_threads += p->num_threads; + w->rss += p->rss; - p->target->statm_size += p->statm_size; - p->target->statm_resident += p->statm_resident; - p->target->statm_share += p->statm_share; - p->target->statm_text += p->statm_text; - p->target->statm_lib += p->statm_lib; - p->target->statm_data += p->statm_data; - p->target->statm_dirty += p->statm_dirty; + w->statm_size += p->statm_size; + w->statm_resident += p->statm_resident; + w->statm_share += p->statm_share; + w->statm_text += p->statm_text; + w->statm_lib += p->statm_lib; + w->statm_data += p->statm_data; + w->statm_dirty += p->statm_dirty; - p->target->io_logical_bytes_read += p->io_logical_bytes_read; - p->target->io_logical_bytes_written += p->io_logical_bytes_written; - p->target->io_read_calls += p->io_read_calls; - p->target->io_write_calls += p->io_write_calls; - p->target->io_storage_bytes_read += p->io_storage_bytes_read; - p->target->io_storage_bytes_written += p->io_storage_bytes_written; - p->target->io_cancelled_write_bytes += p->io_cancelled_write_bytes; + w->io_logical_bytes_read += p->io_logical_bytes_read; + w->io_logical_bytes_written += p->io_logical_bytes_written; + w->io_read_calls += p->io_read_calls; + w->io_write_calls += p->io_write_calls; + w->io_storage_bytes_read += p->io_storage_bytes_read; + w->io_storage_bytes_written += p->io_storage_bytes_written; + w->io_cancelled_write_bytes += p->io_cancelled_write_bytes; - p->target->processes++; + w->processes++; + if(likely(w->fds)) { + int c; for(c = 0; c < p->fds_size ;c++) { if(p->fds[c] == 0) continue; - if(p->fds[c] < all_files_size) { - if(p->target->fds) p->target->fds[p->fds[c]]++; + + if(likely(p->fds[c] < all_files_size)) { + if(w->fds) w->fds[p->fds[c]]++; } else error("Invalid fd number %d", p->fds[c]); } + } - if(debug || p->target->debug) fprintf(stderr, "apps.plugin: \tAgregating %s pid %d on %s utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu\n", p->comm, p->pid, p->target->name, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt); - -/* if(p->utime - p->old_utime > 100) fprintf(stderr, "BIG CHANGE: %d %s utime increased by %llu from %llu to %llu\n", p->pid, p->comm, p->utime - p->old_utime, p->old_utime, p->utime); - if(p->cutime - p->old_cutime > 100) fprintf(stderr, "BIG CHANGE: %d %s cutime increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cutime - p->old_cutime, p->old_cutime, p->cutime); - if(p->stime - p->old_stime > 100) fprintf(stderr, "BIG CHANGE: %d %s stime increased by %llu from %llu to %llu\n", p->pid, p->comm, p->stime - p->old_stime, p->old_stime, p->stime); - if(p->cstime - p->old_cstime > 100) fprintf(stderr, "BIG CHANGE: %d %s cstime increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cstime - p->old_cstime, p->old_cstime, p->cstime); - if(p->minflt - p->old_minflt > 5000) fprintf(stderr, "BIG CHANGE: %d %s minflt increased by %llu from %llu to %llu\n", p->pid, p->comm, p->minflt - p->old_minflt, p->old_minflt, p->minflt); - if(p->majflt - p->old_majflt > 5000) fprintf(stderr, "BIG CHANGE: %d %s majflt increased by %llu from %llu to %llu\n", p->pid, p->comm, p->majflt - p->old_majflt, p->old_majflt, p->majflt); - if(p->cminflt - p->old_cminflt > 15000) fprintf(stderr, "BIG CHANGE: %d %s cminflt increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cminflt - p->old_cminflt, p->old_cminflt, p->cminflt); - if(p->cmajflt - p->old_cmajflt > 15000) fprintf(stderr, "BIG CHANGE: %d %s cmajflt increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cmajflt - p->old_cmajflt, p->old_cmajflt, p->cmajflt); + if(unlikely(debug || w->debug)) + fprintf(stderr, "apps.plugin: \tAgregating %s pid %d on %s utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu\n", p->comm, p->pid, w->name, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt); + +/* if(p->utime - p->old_utime > 100) fprintf(stderr, "BIG CHANGE: %d %s utime increased by %llu from %llu to %llu\n", p->pid, p->comm, p->utime - p->old_utime, p->old_utime, p->utime); + if(p->cutime - p->old_cutime > 100) fprintf(stderr, "BIG CHANGE: %d %s cutime increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cutime - p->old_cutime, p->old_cutime, p->cutime); + if(p->stime - p->old_stime > 100) fprintf(stderr, "BIG CHANGE: %d %s stime increased by %llu from %llu to %llu\n", p->pid, p->comm, p->stime - p->old_stime, p->old_stime, p->stime); + if(p->cstime - p->old_cstime > 100) fprintf(stderr, "BIG CHANGE: %d %s cstime increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cstime - p->old_cstime, p->old_cstime, p->cstime); + if(p->minflt - p->old_minflt > 5000) fprintf(stderr, "BIG CHANGE: %d %s minflt increased by %llu from %llu to %llu\n", p->pid, p->comm, p->minflt - p->old_minflt, p->old_minflt, p->minflt); + if(p->majflt - p->old_majflt > 5000) fprintf(stderr, "BIG CHANGE: %d %s majflt increased by %llu from %llu to %llu\n", p->pid, p->comm, p->majflt - p->old_majflt, p->old_majflt, p->majflt); + if(p->cminflt - p->old_cminflt > 15000) fprintf(stderr, "BIG CHANGE: %d %s cminflt increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cminflt - p->old_cminflt, p->old_cminflt, p->cminflt); + if(p->cmajflt - p->old_cmajflt > 15000) fprintf(stderr, "BIG CHANGE: %d %s cmajflt increased by %llu from %llu to %llu\n", p->pid, p->comm, p->cmajflt - p->old_cmajflt, p->old_cmajflt, p->cmajflt); */ -#ifdef INCLUDE_CHILDS - p->old_utime = p->utime; - p->old_cutime = p->cutime; - p->old_stime = p->stime; - p->old_cstime = p->cstime; - p->old_minflt = p->minflt; - p->old_majflt = p->majflt; - p->old_cminflt = p->cminflt; - p->old_cmajflt = p->cmajflt; -#endif - } - else { - // since the process has exited, the user - // will see a drop in our charts, because the incremental - // values of this process will not be there - - // add them to the fix_* values and they will be added to - // the reported values, so that the report goes steady - p->target->fix_minflt += p->minflt; - p->target->fix_majflt += p->majflt; - p->target->fix_utime += p->utime; - p->target->fix_stime += p->stime; - p->target->fix_cminflt += p->cminflt; - p->target->fix_cmajflt += p->cmajflt; - p->target->fix_cutime += p->cutime; - p->target->fix_cstime += p->cstime; - - p->target->fix_io_logical_bytes_read += p->io_logical_bytes_read; - p->target->fix_io_logical_bytes_written += p->io_logical_bytes_written; - p->target->fix_io_read_calls += p->io_read_calls; - p->target->fix_io_write_calls += p->io_write_calls; - p->target->fix_io_storage_bytes_read += p->io_storage_bytes_read; - p->target->fix_io_storage_bytes_written += p->io_storage_bytes_written; - p->target->fix_io_cancelled_write_bytes += p->io_cancelled_write_bytes; +#ifdef AGGREGATE_CHILDREN_TO_PARENTS + p->old_utime = p->utime; + p->old_cutime = p->cutime; + p->old_stime = p->stime; + p->old_cstime = p->cstime; + p->old_minflt = p->minflt; + p->old_majflt = p->majflt; + p->old_cminflt = p->cminflt; + p->old_cmajflt = p->cmajflt; +#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ + + if(o) { + // since the process switched target + // for all incremental values + // we have to subtract its OLD values from the new target + // and add its OLD values to the old target + + // IMPORTANT + // We add/subtract the last/OLD values we added to the target + + w->fix_cutime -= p->last_cutime; + w->fix_cstime -= p->last_cstime; + w->fix_cminflt -= p->last_cminflt; + w->fix_cmajflt -= p->last_cmajflt; + + w->fix_utime -= p->last_utime; + w->fix_stime -= p->last_stime; + w->fix_minflt -= p->last_minflt; + w->fix_majflt -= p->last_majflt; + + + w->fix_io_logical_bytes_read -= p->last_io_logical_bytes_read; + w->fix_io_logical_bytes_written -= p->last_io_logical_bytes_written; + w->fix_io_read_calls -= p->last_io_read_calls; + w->fix_io_write_calls -= p->last_io_write_calls; + w->fix_io_storage_bytes_read -= p->last_io_storage_bytes_read; + w->fix_io_storage_bytes_written -= p->last_io_storage_bytes_written; + w->fix_io_cancelled_write_bytes -= p->last_io_cancelled_write_bytes; + + // --- + + o->fix_cutime += p->last_cutime; + o->fix_cstime += p->last_cstime; + o->fix_cminflt += p->last_cminflt; + o->fix_cmajflt += p->last_cmajflt; + + o->fix_utime += p->last_utime; + o->fix_stime += p->last_stime; + o->fix_minflt += p->last_minflt; + o->fix_majflt += p->last_majflt; + + o->fix_io_logical_bytes_read += p->last_io_logical_bytes_read; + o->fix_io_logical_bytes_written += p->last_io_logical_bytes_written; + o->fix_io_read_calls += p->last_io_read_calls; + o->fix_io_write_calls += p->last_io_write_calls; + o->fix_io_storage_bytes_read += p->last_io_storage_bytes_read; + o->fix_io_storage_bytes_written += p->last_io_storage_bytes_written; + o->fix_io_cancelled_write_bytes += p->last_io_cancelled_write_bytes; } } + else { + // if(o) fprintf(stderr, "apps.plugin: \t\tpid %d (%s) is not updated by OLD target %s (%s) is present.\n", p->pid, p->comm, o->id, o->name); + + // since the process has exited, the user + // will see a drop in our charts, because the incremental + // values of this process will not be there + + // add them to the fix_* values and they will be added to + // the reported values, so that the report goes steady + w->fix_minflt += p->minflt; + w->fix_majflt += p->majflt; + w->fix_utime += p->utime; + w->fix_stime += p->stime; + w->fix_cminflt += p->cminflt; + w->fix_cmajflt += p->cmajflt; + w->fix_cutime += p->cutime; + w->fix_cstime += p->cstime; + + w->fix_io_logical_bytes_read += p->io_logical_bytes_read; + w->fix_io_logical_bytes_written += p->io_logical_bytes_written; + w->fix_io_read_calls += p->io_read_calls; + w->fix_io_write_calls += p->io_write_calls; + w->fix_io_storage_bytes_read += p->io_storage_bytes_read; + w->fix_io_storage_bytes_written += p->io_storage_bytes_written; + w->fix_io_cancelled_write_bytes += p->io_cancelled_write_bytes; + } -// fprintf(stderr, "\n"); - // cleanup all un-updated processed (exited, killed, etc) - for(p = root_of_pids; p ;) { - if(!p->updated) { -// fprintf(stderr, "\tEXITED %d %s [parent %d %s, target %s] utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu\n", p->pid, p->comm, p->parent->pid, p->parent->comm, p->target->name, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt); +} - for(c = 0 ; c < p->fds_size ; c++) if(p->fds[c] > 0) { - file_descriptor_not_used(p->fds[c]); - p->fds[c] = 0; - } +void count_targets_fds(struct target *root) { + int c; + struct target *w; - pid_t r = p->pid; - p = p->next; - del_pid_entry(r); - } - else p = p->next; - } + for (w = root; w ; w = w->next) { + if(!w->fds) continue; - for (w = target_root; w ; w = w->next) { w->openfiles = 0; w->openpipes = 0; w->opensockets = 0; @@ -1533,7 +1942,8 @@ void update_statistics(void) w->openother = 0; for(c = 1; c < all_files_size ;c++) { - if(w->fds && w->fds[c] > 0) switch(all_files[c].type) { + if(w->fds[c] > 0) + switch(all_files[c].type) { case FILETYPE_FILE: w->openfiles++; break; @@ -1576,15 +1986,90 @@ void update_statistics(void) } } +void calculate_netdata_statistics(void) +{ + link_all_processes_to_their_parents(); + apply_apps_groups_targets_inheritance(); + +#ifdef AGGREGATE_CHILDREN_TO_PARENTS + aggregate_children_to_parents(); +#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ + + zero_all_targets(users_root_target); + zero_all_targets(groups_root_target); + apps_groups_targets = zero_all_targets(apps_groups_root_target); + +#ifdef AGGREGATE_CHILDREN_TO_PARENTS + if(debug) + debug_childrens_aggregations(0, 1); +#endif /* AGGREGATE_CHILDREN_TO_PARENTS */ + + // this has to be done, before the cleanup + struct pid_stat *p = NULL; + struct target *w = NULL, *o = NULL; + + // concentrate everything on the apps_groups_targets + for(p = root_of_pids; p ; p = p->next) { + + // -------------------------------------------------------------------- + // apps_groups targets + if(likely(p->target)) + aggregate_pid_on_target(p->target, p, NULL); + else + error("pid %d %s was left without a target!", p->pid, p->comm); + + + // -------------------------------------------------------------------- + // user targets + o = p->user_target; + if(likely(p->user_target && p->user_target->uid == p->uid)) + w = p->user_target; + else { + if(unlikely(debug && p->user_target)) + fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched user from %d (%s) to %d.\n", p->pid, p->comm, p->user_target->uid, p->user_target->name, p->uid); + + w = p->user_target = get_users_target(p->uid); + } + + if(likely(w)) + aggregate_pid_on_target(w, p, o); + else + error("pid %d %s was left without a user target!", p->pid, p->comm); + + + // -------------------------------------------------------------------- + // group targets + o = p->group_target; + if(likely(p->group_target && p->group_target->gid == p->gid)) + w = p->group_target; + else { + if(unlikely(debug && p->group_target)) + fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched group from %d (%s) to %d.\n", p->pid, p->comm, p->group_target->gid, p->group_target->name, p->gid); + + w = p->group_target = get_groups_target(p->gid); + } + + if(likely(w)) + aggregate_pid_on_target(w, p, o); + else + error("pid %d %s was left without a group target!", p->pid, p->comm); + + } + + count_targets_fds(apps_groups_root_target); + count_targets_fds(users_root_target); + count_targets_fds(groups_root_target); + + cleanup_non_existing_pids(); +} + // ---------------------------------------------------------------------------- // update chart dimensions -void show_dimensions(void) -{ +unsigned long long send_resource_usage_to_netdata() { static struct timeval last = { 0, 0 }; static struct rusage me_last; - struct target *w; struct timeval now; struct rusage me; @@ -1615,138 +2100,145 @@ void show_dimensions(void) bcopy(&me, &me_last, sizeof(struct rusage)); } - fprintf(stdout, "BEGIN apps.cpu %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN netdata.apps_cpu %llu\n", usec); + fprintf(stdout, "SET user = %llu\n", cpuuser); + fprintf(stdout, "SET system = %llu\n", cpusyst); + fprintf(stdout, "END\n"); + + fprintf(stdout, "BEGIN netdata.apps_files %llu\n", usec); + fprintf(stdout, "SET files = %llu\n", file_counter); + fprintf(stdout, "SET pids = %ld\n", all_pids_count); + fprintf(stdout, "SET fds = %d\n", all_files_len); + fprintf(stdout, "SET targets = %ld\n", apps_groups_targets); + fprintf(stdout, "END\n"); + + return usec; +} + +void send_collected_data_to_netdata(struct target *root, const char *type, unsigned long long usec) +{ + struct target *w; + + fprintf(stdout, "BEGIN %s.cpu %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + w->stime + w->fix_utime + w->fix_stime); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.cpu_user %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.cpu_user %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "SET %s = %llu\n", w->name, w->utime + w->fix_utime); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.cpu_system %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.cpu_system %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "SET %s = %llu\n", w->name, w->stime + w->fix_stime); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.threads %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.threads %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "SET %s = %llu\n", w->name, w->num_threads); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.processes %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.processes %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "SET %s = %lu\n", w->name, w->processes); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.mem %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.mem %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "SET %s = %lld\n", w->name, (long long)w->statm_resident - (long long)w->statm_share); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.minor_faults %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.minor_faults %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "SET %s = %llu\n", w->name, w->minflt + w->fix_minflt); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.major_faults %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.major_faults %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "SET %s = %llu\n", w->name, w->majflt + w->fix_majflt); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.lreads %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.lreads %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; - fprintf(stdout, "SET %s = %llu\n", w->name, w->io_logical_bytes_read); + fprintf(stdout, "SET %s = %llu\n", w->name, w->io_logical_bytes_read + w->fix_io_logical_bytes_read); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.lwrites %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.lwrites %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; - fprintf(stdout, "SET %s = %llu\n", w->name, w->io_logical_bytes_written); + fprintf(stdout, "SET %s = %llu\n", w->name, w->io_logical_bytes_written + w->fix_io_logical_bytes_written); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.preads %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.preads %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; - fprintf(stdout, "SET %s = %llu\n", w->name, w->io_storage_bytes_read); + fprintf(stdout, "SET %s = %llu\n", w->name, w->io_storage_bytes_read + w->fix_io_storage_bytes_read); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.pwrites %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.pwrites %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; - fprintf(stdout, "SET %s = %llu\n", w->name, w->io_storage_bytes_written); + fprintf(stdout, "SET %s = %llu\n", w->name, w->io_storage_bytes_written + w->fix_io_storage_bytes_written); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.files %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.files %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "SET %s = %llu\n", w->name, w->openfiles); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.sockets %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.sockets %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "SET %s = %llu\n", w->name, w->opensockets); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.pipes %llu\n", usec); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "BEGIN %s.pipes %llu\n", type, usec); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "SET %s = %llu\n", w->name, w->openpipes); } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN netdata.apps_cpu %llu\n", usec); - fprintf(stdout, "SET user = %llu\n", cpuuser); - fprintf(stdout, "SET system = %llu\n", cpusyst); - fprintf(stdout, "END\n"); - - fprintf(stdout, "BEGIN netdata.apps_files %llu\n", usec); - fprintf(stdout, "SET files = %llu\n", file_counter); - fprintf(stdout, "SET pids = %ld\n", all_pids_count); - fprintf(stdout, "SET fds = %d\n", all_files_len); - fprintf(stdout, "SET targets = %ld\n", targets); - fprintf(stdout, "END\n"); - fflush(stdout); } @@ -1754,12 +2246,12 @@ void show_dimensions(void) // ---------------------------------------------------------------------------- // generate the charts -void show_charts(void) +void send_charts_updates_to_netdata(struct target *root, const char *type, const char *title) { struct target *w; int newly_added = 0; - for(w = target_root ; w ; w = w->next) + for(w = root ; w ; w = w->next) if(!w->exposed && w->processes) { newly_added++; w->exposed = 1; @@ -1771,122 +2263,110 @@ void show_charts(void) // we have something new to show // update the charts - fprintf(stdout, "CHART apps.cpu '' 'Apps CPU Time (%ld%% = %ld core%s)' 'cpu time %%' cpu apps.cpu stacked 20001 %d\n", (processors * 100), processors, (processors>1)?"s":"", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.cpu '' '%s CPU Time (%ld%% = %ld core%s)' 'cpu time %%' cpu %s.cpu stacked 20001 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' incremental 100 %llu %s\n", w->name, Hertz, w->hidden ? "hidden,noreset" : "noreset"); } - fprintf(stdout, "CHART apps.mem '' 'Apps Dedicated Memory (w/o shared)' 'MB' mem apps.mem stacked 20003 %d\n", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.mem '' '%s Dedicated Memory (w/o shared)' 'MB' mem %s.mem stacked 20003 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' absolute %ld %ld noreset\n", w->name, sysconf(_SC_PAGESIZE), 1024L*1024L); } - fprintf(stdout, "CHART apps.threads '' 'Apps Threads' 'threads' processes apps.threads stacked 20005 %d\n", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.threads '' '%s Threads' 'threads' processes %s.threads stacked 20005 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name); } - fprintf(stdout, "CHART apps.processes '' 'Apps Processes' 'processes' processes apps.processes stacked 20004 %d\n", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.processes '' '%s Processes' 'processes' processes %s.processes stacked 20004 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name); } - fprintf(stdout, "CHART apps.cpu_user '' 'Apps CPU User Time (%ld%% = %ld core%s)' 'cpu time %%' cpu apps.cpu_user stacked 20020 %d\n", (processors * 100), processors, (processors>1)?"s":"", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.cpu_user '' '%s CPU User Time (%ld%% = %ld core%s)' 'cpu time %%' cpu %s.cpu_user stacked 20020 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' incremental 100 %llu noreset\n", w->name, Hertz * processors); } - fprintf(stdout, "CHART apps.cpu_system '' 'Apps CPU System Time (%ld%% = %ld core%s)' 'cpu time %%' cpu apps.cpu_system stacked 20021 %d\n", (processors * 100), processors, (processors>1)?"s":"", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.cpu_system '' '%s CPU System Time (%ld%% = %ld core%s)' 'cpu time %%' cpu %s.cpu_system stacked 20021 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' incremental 100 %llu noreset\n", w->name, Hertz * processors); } - fprintf(stdout, "CHART apps.major_faults '' 'Apps Major Page Faults (swap read)' 'page faults/s' swap apps.major_faults stacked 20010 %d\n", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.major_faults '' '%s Major Page Faults (swap read)' 'page faults/s' swap %s.major_faults stacked 20010 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' incremental 1 1 noreset\n", w->name); } - fprintf(stdout, "CHART apps.minor_faults '' 'Apps Minor Page Faults' 'page faults/s' mem apps.minor_faults stacked 20011 %d\n", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.minor_faults '' '%s Minor Page Faults' 'page faults/s' mem %s.minor_faults stacked 20011 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' incremental 1 1 noreset\n", w->name); } - fprintf(stdout, "CHART apps.lreads '' 'Apps Disk Logical Reads' 'kilobytes/s' disk apps.lreads stacked 20042 %d\n", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.lreads '' '%s Disk Logical Reads' 'kilobytes/s' disk %s.lreads stacked 20042 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' incremental 1 %d noreset\n", w->name, 1024); } - fprintf(stdout, "CHART apps.lwrites '' 'Apps I/O Logical Writes' 'kilobytes/s' disk apps.lwrites stacked 20042 %d\n", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.lwrites '' '%s I/O Logical Writes' 'kilobytes/s' disk %s.lwrites stacked 20042 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' incremental 1 %d noreset\n", w->name, 1024); } - fprintf(stdout, "CHART apps.preads '' 'Apps Disk Reads' 'kilobytes/s' disk apps.preads stacked 20002 %d\n", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.preads '' '%s Disk Reads' 'kilobytes/s' disk %s.preads stacked 20002 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' incremental 1 %d noreset\n", w->name, 1024); } - fprintf(stdout, "CHART apps.pwrites '' 'Apps Disk Writes' 'kilobytes/s' disk apps.pwrites stacked 20002 %d\n", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.pwrites '' '%s Disk Writes' 'kilobytes/s' disk %s.pwrites stacked 20002 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' incremental 1 %d noreset\n", w->name, 1024); } - fprintf(stdout, "CHART apps.files '' 'Apps Open Files' 'open files' disk apps.files stacked 20050 %d\n", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.files '' '%s Open Files' 'open files' disk %s.files stacked 20050 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name); } - fprintf(stdout, "CHART apps.sockets '' 'Apps Open Sockets' 'open sockets' net apps.sockets stacked 20051 %d\n", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.sockets '' '%s Open Sockets' 'open sockets' net %s.sockets stacked 20051 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name); } - fprintf(stdout, "CHART apps.pipes '' 'Apps Pipes' 'open pipes' processes apps.pipes stacked 20053 %d\n", update_every); - for (w = target_root; w ; w = w->next) { + fprintf(stdout, "CHART %s.pipes '' '%s Pipes' 'open pipes' processes %s.pipes stacked 20053 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; fprintf(stdout, "DIMENSION %s '' absolute 1 1 noreset\n", w->name); } - - fprintf(stdout, "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' apps.plugin netdata.apps_cpu stacked 140000 %d\n", update_every); - fprintf(stdout, "DIMENSION user '' incremental 1 %d\n", 1000); - fprintf(stdout, "DIMENSION system '' incremental 1 %d\n", 1000); - - fprintf(stdout, "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_files line 140001 %d\n", update_every); - fprintf(stdout, "DIMENSION files '' incremental 1 1\n"); - fprintf(stdout, "DIMENSION pids '' absolute 1 1\n"); - fprintf(stdout, "DIMENSION fds '' absolute 1 1\n"); - fprintf(stdout, "DIMENSION targets '' absolute 1 1\n"); - - fflush(stdout); } @@ -1925,12 +2405,18 @@ void parse_args(int argc, char **argv) if(freq > 0) update_every = freq; if(!name) name = "groups"; - if(read_process_groups(name)) { + if(read_apps_groups_conf(name)) { error("Cannot read process groups %s", name); exit(1); } } +unsigned long long sutime() { + struct timeval now; + gettimeofday(&now, NULL); + return now.tv_sec * 1000000ULL + now.tv_usec; +} + int main(int argc, char **argv) { // debug_flags = D_PROCFILE; @@ -1938,6 +2424,9 @@ int main(int argc, char **argv) // set the name for logging program_name = "apps.plugin"; + // disable syslog for apps.plugin + error_log_syslog = 0; + host_prefix = getenv("NETDATA_HOST_PREFIX"); if(host_prefix == NULL) { info("NETDATA_HOST_PREFIX is not passed from netdata"); @@ -1945,15 +2434,22 @@ int main(int argc, char **argv) } else info("Found NETDATA_HOST_PREFIX='%s'", host_prefix); + config_dir = getenv("NETDATA_CONFIG_DIR"); + if(config_dir == NULL) { + info("NETDATA_CONFIG_DIR is not passed from netdata"); + config_dir = CONFIG_DIR; + } + else info("Found NETDATA_CONFIG_DIR='%s'", config_dir); + info("starting..."); procfile_adaptive_initial_allocation = 1; time_t started_t = time(NULL); time_t current_t; - Hertz = get_hertz(); - pid_max = get_pid_max(); - processors = get_processors(); + Hertz = get_system_hertz(); + pid_max = get_system_pid_max(); + processors = get_system_cpus(); parse_args(argc, argv); @@ -1964,39 +2460,63 @@ int main(int argc, char **argv) exit(1); } - unsigned long long counter = 1; - unsigned long long usec = 0, susec = 0; - struct timeval last, now; - gettimeofday(&last, NULL); + fprintf(stdout, "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' apps.plugin netdata.apps_cpu stacked 140000 %d\n", update_every); + fprintf(stdout, "DIMENSION user '' incremental 1 %d\n", 1000); + fprintf(stdout, "DIMENSION system '' incremental 1 %d\n", 1000); + fprintf(stdout, "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_files line 140001 %d\n", update_every); + fprintf(stdout, "DIMENSION files '' incremental 1 1\n"); + fprintf(stdout, "DIMENSION pids '' absolute 1 1\n"); + fprintf(stdout, "DIMENSION fds '' absolute 1 1\n"); + fprintf(stdout, "DIMENSION targets '' absolute 1 1\n"); + + +#ifndef PROFILING_MODE + unsigned long long sunext = (time(NULL) - (time(NULL) % update_every) + update_every) * 1000000ULL; + unsigned long long sunow; +#endif /* PROFILING_MODE */ + + unsigned long long counter = 1; for(;1; counter++) { - if(!update_from_proc()) { - error("Cannot allocate %lu bytes of memory.", sizeof(struct pid_stat *) * pid_max); +#ifndef PROFILING_MODE + // delay until it is our time to run + while((sunow = sutime()) < sunext) + usleep((useconds_t)(sunext - sunow)); + + // find the next time we need to run + while(sutime() > sunext) + sunext += update_every * 1000000ULL; +#endif /* PROFILING_MODE */ + + if(!collect_data_for_all_processes_from_proc()) { + error("Cannot collect /proc data for running processes. Disabling apps.plugin..."); printf("DISABLE\n"); exit(1); } - update_statistics(); - show_charts(); // this is smart enough to show only newly added apps, when needed - show_dimensions(); + calculate_netdata_statistics(); - if(debug) fprintf(stderr, "apps.plugin: done Loop No %llu\n", counter); - fflush(NULL); + unsigned long long dt = send_resource_usage_to_netdata(); - gettimeofday(&now, NULL); - usec = usecdiff(&now, &last) - susec; - if(debug) fprintf(stderr, "apps.plugin: last loop took %llu usec (worked for %llu, sleeped for %llu).\n", usec + susec, usec, susec); + // this is smart enough to show only newly added apps, when needed + send_charts_updates_to_netdata(apps_groups_root_target, "apps", "Apps"); + send_charts_updates_to_netdata(users_root_target, "users", "Users"); + send_charts_updates_to_netdata(groups_root_target, "groups", "User Groups"); - // if the last loop took less than half the time - // wait the rest of the time - if(usec < (update_every * 1000000ULL / 2)) susec = (update_every * 1000000ULL) - usec; - else susec = update_every * 1000000ULL / 2; + send_collected_data_to_netdata(apps_groups_root_target, "apps", dt); + send_collected_data_to_netdata(users_root_target, "users", dt); + send_collected_data_to_netdata(groups_root_target, "groups", dt); - usleep((__useconds_t) susec); - bcopy(&now, &last, sizeof(struct timeval)); + if(debug) fprintf(stderr, "apps.plugin: done Loop No %llu\n", counter); + fflush(NULL); - // restart once per day (14400 seconds) current_t = time(NULL); + +#ifndef PROFILING_MODE + // restart check (14400 seconds) if(current_t - started_t > 14400) exit(0); +#else + if(current_t - started_t > 10) exit(0); +#endif /* PROFILING_MODE */ } } |