diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2018-11-07 12:19:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2018-11-07 12:20:17 +0000 |
commit | a64a253794ac64cb40befee54db53bde17dd0d49 (patch) | |
tree | c1024acc5f6e508814b944d99f112259bb28b1be /collectors/apps.plugin | |
parent | New upstream version 1.10.0+dfsg (diff) | |
download | netdata-upstream/1.11.0+dfsg.tar.xz netdata-upstream/1.11.0+dfsg.zip |
New upstream version 1.11.0+dfsgupstream/1.11.0+dfsg
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | collectors/apps.plugin/Makefile.am | 13 | ||||
-rw-r--r-- | collectors/apps.plugin/Makefile.in | 521 | ||||
-rw-r--r-- | collectors/apps.plugin/README.md | 372 | ||||
-rw-r--r-- | collectors/apps.plugin/apps_groups.conf (renamed from conf.d/apps_groups.conf) | 14 | ||||
-rw-r--r-- | collectors/apps.plugin/apps_plugin.c (renamed from src/apps_plugin.c) | 712 |
5 files changed, 1351 insertions, 281 deletions
diff --git a/collectors/apps.plugin/Makefile.am b/collectors/apps.plugin/Makefile.am new file mode 100644 index 000000000..be0306492 --- /dev/null +++ b/collectors/apps.plugin/Makefile.am @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects + +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) + +dist_libconfig_DATA = \ + apps_groups.conf \ + $(NULL) diff --git a/collectors/apps.plugin/Makefile.in b/collectors/apps.plugin/Makefile.in new file mode 100644 index 000000000..38120c048 --- /dev/null +++ b/collectors/apps.plugin/Makefile.in @@ -0,0 +1,521 @@ +# Makefile.in generated by automake 1.14.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# SPDX-License-Identifier: GPL-3.0-or-later + +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = collectors/apps.plugin +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(dist_libconfig_DATA) $(dist_noinst_DATA) +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/build/m4/ax_c___atomic.m4 \ + $(top_srcdir)/build/m4/ax_c__generic.m4 \ + $(top_srcdir)/build/m4/ax_c_lto.m4 \ + $(top_srcdir)/build/m4/ax_c_mallinfo.m4 \ + $(top_srcdir)/build/m4/ax_c_mallopt.m4 \ + $(top_srcdir)/build/m4/ax_check_compile_flag.m4 \ + $(top_srcdir)/build/m4/ax_gcc_func_attribute.m4 \ + $(top_srcdir)/build/m4/ax_pthread.m4 \ + $(top_srcdir)/build/m4/jemalloc.m4 \ + $(top_srcdir)/build/m4/tcmalloc.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libconfigdir)" +DATA = $(dist_libconfig_DATA) $(dist_noinst_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +IPMIMONITORING_CFLAGS = @IPMIMONITORING_CFLAGS@ +IPMIMONITORING_LIBS = @IPMIMONITORING_LIBS@ +LDFLAGS = @LDFLAGS@ +LIBCAP_CFLAGS = @LIBCAP_CFLAGS@ +LIBCAP_LIBS = @LIBCAP_LIBS@ +LIBMNL_CFLAGS = @LIBMNL_CFLAGS@ +LIBMNL_LIBS = @LIBMNL_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MATH_CFLAGS = @MATH_CFLAGS@ +MATH_LIBS = @MATH_LIBS@ +MKDIR_P = @MKDIR_P@ +NFACCT_CFLAGS = @NFACCT_CFLAGS@ +NFACCT_LIBS = @NFACCT_LIBS@ +OBJEXT = @OBJEXT@ +OPTIONAL_IPMIMONITORING_CFLAGS = @OPTIONAL_IPMIMONITORING_CFLAGS@ +OPTIONAL_IPMIMONITORING_LIBS = @OPTIONAL_IPMIMONITORING_LIBS@ +OPTIONAL_LIBCAP_CFLAGS = @OPTIONAL_LIBCAP_CFLAGS@ +OPTIONAL_LIBCAP_LIBS = @OPTIONAL_LIBCAP_LIBS@ +OPTIONAL_MATH_CLFAGS = @OPTIONAL_MATH_CLFAGS@ +OPTIONAL_MATH_LIBS = @OPTIONAL_MATH_LIBS@ +OPTIONAL_NFACCT_CLFAGS = @OPTIONAL_NFACCT_CLFAGS@ +OPTIONAL_NFACCT_LIBS = @OPTIONAL_NFACCT_LIBS@ +OPTIONAL_UUID_CLFAGS = @OPTIONAL_UUID_CLFAGS@ +OPTIONAL_UUID_LIBS = @OPTIONAL_UUID_LIBS@ +OPTIONAL_ZLIB_CLFAGS = @OPTIONAL_ZLIB_CLFAGS@ +OPTIONAL_ZLIB_LIBS = @OPTIONAL_ZLIB_LIBS@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_RPM_RELEASE = @PACKAGE_RPM_RELEASE@ +PACKAGE_RPM_VERSION = @PACKAGE_RPM_VERSION@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SSE_CANDIDATE = @SSE_CANDIDATE@ +STRIP = @STRIP@ +UUID_CFLAGS = @UUID_CFLAGS@ +UUID_LIBS = @UUID_LIBS@ +VERSION = @VERSION@ +ZLIB_CFLAGS = @ZLIB_CFLAGS@ +ZLIB_LIBS = @ZLIB_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +ax_pthread_config = @ax_pthread_config@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_target = @build_target@ +build_vendor = @build_vendor@ +builddir = @builddir@ +cachedir = @cachedir@ +chartsdir = @chartsdir@ +configdir = @configdir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +has_jemalloc = @has_jemalloc@ +has_tcmalloc = @has_tcmalloc@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libconfigdir = @libconfigdir@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +logdir = @logdir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +nodedir = @nodedir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pluginsdir = @pluginsdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +pythondir = @pythondir@ +registrydir = @registrydir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +varlibdir = @varlibdir@ +webdir = @webdir@ +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in +dist_noinst_DATA = \ + README.md \ + $(NULL) + +dist_libconfig_DATA = \ + apps_groups.conf \ + $(NULL) + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu collectors/apps.plugin/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu collectors/apps.plugin/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-dist_libconfigDATA: $(dist_libconfig_DATA) + @$(NORMAL_INSTALL) + @list='$(dist_libconfig_DATA)'; test -n "$(libconfigdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(libconfigdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libconfigdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(libconfigdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(libconfigdir)" || exit $$?; \ + done + +uninstall-dist_libconfigDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_libconfig_DATA)'; test -n "$(libconfigdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(libconfigdir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) +installdirs: + for dir in "$(DESTDIR)$(libconfigdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) +clean: clean-am + +clean-am: clean-generic mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-dist_libconfigDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-dist_libconfigDATA + +.MAKE: install-am install-strip + +.PHONY: all all-am check check-am clean clean-generic cscopelist-am \ + ctags-am distclean distclean-generic distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dist_libconfigDATA install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic pdf pdf-am ps ps-am tags-am uninstall \ + uninstall-am uninstall-dist_libconfigDATA + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/collectors/apps.plugin/README.md b/collectors/apps.plugin/README.md new file mode 100644 index 000000000..05680efe8 --- /dev/null +++ b/collectors/apps.plugin/README.md @@ -0,0 +1,372 @@ +# apps.plugin + +`apps.plugin` breaks down system resource usage to **processes**, **users** and **user groups**. + +To achieve this task, it iterates through the whole process tree, collecting resource usage information +for every process found running. + +Since netdata needs to present this information in charts and track them through time, +instead of presenting a `top` like list, `apps.plugin` uses a pre-defined list of **process groups** +to which it assigns all running processes. This list is [customizable](apps_groups.conf) and netdata +ships with a good default for most cases (to edit it on your system run `/etc/netdata/edit-config apps_groups.conf`). + +So, `apps.plugin` builds a process tree (much like `ps fax` does in Linux), and groups +processes together (evaluating both child and parent processes) so that the result is always a list with +a predefined set of members (of course, only process groups found running are reported). + +> If you find that `apps.plugin` categorizes standard applications as `other`, we would be +> glad to accept pull requests improving the [defaults](apps_groups.conf) shipped with netdata. + +Unlike traditional process monitoring tools (like `top`), `apps.plugin` is able to account the resource +utilization of exit processes. Their utilization is accounted at their currently running parents. +So, `apps.plugin` is perfectly able to measure the resources used by shell scripts and other processes +that fork/spawn other short lived processes hundreds of times per second. + +## Charts + +`apps.plugin` provides charts for 3 sections: + +1. Per application charts as **Applications** at netdata dashboards +2. Per user charts as **Users** at netdata dashboards +3. Per user group charts as **User Groups** at netdata dashboards + +Each of these sections provides the same number of charts: + +- CPU Utilization + - Total CPU usage + - User / System CPU usage +- Disk I/O + - Physical Reads / Writes + - Logical Reads / Writes + - Open Unique Files (if a file is found open multiple times, it is counted just once) +- Memory + - Real Memory Used (non shared) + - Virtual Memory Allocated + - Minor Page Faults (i.e. memory activity) +- Processes + - Threads Running + - Processes Running + - Pipes Open +- Swap Memory + - Swap Memory Used + - Major Page Faults (i.e. swap activity) +- Network + - Sockets Open + +The above are reported: + +- For **Applications** per [target configured](apps_groups.conf). +- For **Users** per username or UID (when the username is not available). +- For **User Groups** per groupname or GID (when groupname is not available). + +## Performance + +`apps.plugin` is a complex piece of software and has a lot of work to do +We are proud that `apps.plugin` is a lot faster compared to any other similar tool, +while collecting a lot more information for the processes, however the fact is that +this plugin requires more CPU resources than the netdata daemon itself. + +Under Linux, for each process running, `apps.plugin` reads several `/proc` files +per process. Doing this work per-second, especially on hosts with several thousands +of processes, may increase the CPU resources consumed by the plugin. + +In such cases, you many need to lower its data collection frequency. + +To do this, edit `/etc/netdata/netdata.conf` and find this section: + +``` +[plugin:apps] + # update every = 1 + # command options = +``` + +Uncomment the line `update every` and set it to a higher number. If you just set it to ` 2 `, +its CPU resources will be cut in half, and data collection will be once every 2 seconds. + +## Configuration + +The configuration file is `/etc/netdata/apps_groups.conf` (the default is [here](apps_groups.conf)). +To edit it on your system run `/etc/netdata/edit-config apps_groups.conf`. + +The configuration file works accepts multiple lines, each having this format: + +```txt +group: process1 process2 ... +``` + +Each group can be given multiple times, to add more processes to it. + +For the **Applications** section, only groups configured in this file are reported. +All other processes will be reported as `other`. + +For each process given, its whole process tree will be grouped, not just the process matched. +The plugin will include both parents and children. + +The process names are the ones returned by: + + - `ps -e` or `cat /proc/PID/stat` + - in case of substring mode (see below): `/proc/PID/cmdline` + +To add process names with spaces, enclose them in quotes (single or double) +example: ` 'Plex Media Serv' ` or ` "my other process" `. + +You can add an asterisk ` * ` at the beginning and/or the end of a process: + + - `*name` *suffix* mode: will search for processes ending with `name` (at `/proc/PID/stat`) + - `name*` *prefix* mode: will search for processes beginning with `name` (at `/proc/PID/stat`) + - `*name*` *substring* mode: will search for `name` in the whole command line (at `/proc/PID/cmdline`) + +If you enter even just one *name* (substring), `apps.plugin` will process +`/proc/PID/cmdline` for all processes (of course only once per process: when they are first seen). + +To add processes with single quotes, enclose them in double quotes: ` "process with this ' single quote" ` + +To add processes with double quotes, enclose them in single quotes: ` 'process with this " double quote' ` + +If a group or process name starts with a ` - `, the dimension will be hidden from the chart (cpu chart only). + +If a process starts with a ` + `, debugging will be enabled for it (debugging produces a lot of output - do not enable it in production systems). + +You can add any number of groups. Only the ones found running will affect the charts generated. +However, producing charts with hundreds of dimensions may slow down your web browser. + +The order of the entries in this list is important: the first that matches a process is used, so put important +ones at the top. Processes not matched by any row, will inherit it from their parents or children. + +The order also controls the order of the dimensions on the generated charts (although applications started +after apps.plugin is started, will be appended to the existing list of dimensions the netdata daemon maintains). + +## Permissions + +`apps.plugin` requires additional privileges to collect all the information it needs. +The problem is described in issue #157. + +When netdata is installed, `apps.plugin` is given the capabilities `cap_dac_read_search,cap_sys_ptrace+ep`. +If this fails (i.e. `setcap` fails), `apps.plugin` is setuid to `root`. + +#### linux capabilities in containers + +There are a few cases, like `docker` and `virtuozzo` containers, where `setcap` succeeds, but the capabilities +are silently ignored (in `lxc` containers `setcap` fails). + +In these cases ()`setcap` succeeds but capabilities do not work), you will have to setuid +to root `apps.plugin` by running these commands: + +```sh +chown root:netdata /usr/libexec/netdata/plugins.d/apps.plugin +chmod 4750 /usr/libexec/netdata/plugins.d/apps.plugin +``` + +You will have to run these, every time you update netdata. + +## Security + +`apps.plugin` performs a hard-coded function of building the process tree in memory, +iterating forever, collecting metrics for each running process and sending them to netdata. +This is a one-way communication, from `apps.plugin` to netdata. + +So, since `apps.plugin` cannot be instructed by netdata for the actions it performs, +we think it is pretty safe to allow it have these increased privileges. + +Keep in mind that `apps.plugin` will still run without escalated permissions, +but it will not be able to collect all the information. + +## Application Badges + +You can create badges that you can embed anywhere you like, with URLs like this: + +``` +https://your.netdata.ip:19999/api/v1/badge.svg?chart=apps.processes&dimensions=myapp&value_color=green%3E0%7Cred +``` + +The color expression unescaped is this: `value_color=green>0|red`. + +Here is an example for the process group `sql` at `https://registry.my-netdata.io`: + +![image](https://registry.my-netdata.io/api/v1/badge.svg?chart=apps.processes&dimensions=sql&value_color=green%3E0%7Cred) + +Netdata is able give you a lot more badges for your app. +Examples below for process group `sql`: + +- CPU usage: ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.cpu&dimensions=sql&value_color=green=0%7Corange%3C50%7Cred) +- Disk Physical Reads ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.preads&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) +- Disk Physical Writes ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.pwrites&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) +- Disk Logical Reads ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.lreads&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) +- Disk Logical Writes ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.lwrites&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) +- Open Files ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.files&dimensions=sql&value_color=green%3E30%7Cred) +- Real Memory ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.mem&dimensions=sql&value_color=green%3C100%7Corange%3C200%7Cred) +- Virtual Memory ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.vmem&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) +- Swap Memory ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.swap&dimensions=sql&value_color=green=0%7Cred) +- Minor Page Faults ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.minor_faults&dimensions=sql&value_color=green%3C100%7Corange%3C1000%7Cred) +- Processes ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.processes&dimensions=sql&value_color=green%3E0%7Cred) +- Threads ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.threads&dimensions=sql&value_color=green%3E=28%7Cred) +- Major Faults (swap activity) ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.major_faults&dimensions=sql&value_color=green=0%7Cred) +- Open Pipes ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.pipes&dimensions=sql&value_color=green=0%7Cred) +- Open Sockets ![image](http://registry.my-netdata.io/api/v1/badge.svg?chart=apps.sockets&dimensions=sql&value_color=green%3E=3%7Cred) + + +For more information about badges check [Generating Badges](../../web/api/badges) + +## Comparison with console tools + +Ssh to a server running netdata and execute this: + +```sh +while true; do ls -l /var/run >/dev/null; done +``` + +In most systems `/var/run` is a `tmpfs` device, so there is nothing that can stop this command +from consuming entirely one of the CPU cores of the machine. + +As we will see below, **none** of the console performance monitoring tools can report that this +command is using 100% CPU. They do report of course that the CPU is busy, but **they fail to +identify the process that consumes so much CPU**. + +Here is what common Linux console monitoring tools report: + +#### top + +`top` reports that `bash` is using just 14%. + +If you check the total system CPU utilization, it says there is no idle CPU at all, but `top` +fails to provide a breakdown of the CPU consumption in the system. The sum of the CPU utilization +of all processes reported by `top`, is 15.6%. + +``` +top - 18:46:28 up 3 days, 20:14, 2 users, load average: 0.22, 0.05, 0.02 +Tasks: 76 total, 2 running, 74 sleeping, 0 stopped, 0 zombie +%Cpu(s): 32.8 us, 65.6 sy, 0.0 ni, 0.0 id, 0.0 wa, 1.3 hi, 0.3 si, 0.0 st +KiB Mem : 1016576 total, 244112 free, 52012 used, 720452 buff/cache +KiB Swap: 0 total, 0 free, 0 used. 753712 avail Mem + + PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND +12789 root 20 0 14980 4180 3020 S 14.0 0.4 0:02.82 bash + 9 root 20 0 0 0 0 S 1.0 0.0 0:22.36 rcuos/0 + 642 netdata 20 0 132024 20112 2660 S 0.3 2.0 14:26.29 netdata +12522 netdata 20 0 9508 2476 1828 S 0.3 0.2 0:02.26 apps.plugin + 1 root 20 0 67196 10216 7500 S 0.0 1.0 0:04.83 systemd + 2 root 20 0 0 0 0 S 0.0 0.0 0:00.00 kthreadd +``` + +#### htop + +Exactly like `top`, `htop` is providing an incomplete breakdown of the system CPU utilization. + +``` + CPU[||||||||||||||||||||||||100.0%] Tasks: 27, 11 thr; 2 running + Mem[||||||||||||||||||||85.4M/993M] Load average: 1.16 0.88 0.90 + Swp[ 0K/0K] Uptime: 3 days, 21:37:03 + + PID USER PRI NI VIRT RES SHR S CPU% MEM% TIME+ Command +12789 root 20 0 15104 4484 3208 S 14.0 0.4 10:57.15 -bash + 7024 netdata 20 0 9544 2480 1744 S 0.7 0.2 0:00.88 /usr/libexec/netd + 7009 netdata 20 0 138M 21016 2712 S 0.7 2.1 0:00.89 /usr/sbin/netdata + 7012 netdata 20 0 138M 21016 2712 S 0.0 2.1 0:00.31 /usr/sbin/netdata + 563 root 20 0 308M 202M 202M S 0.0 20.4 1:00.81 /usr/lib/systemd/ + 7019 netdata 20 0 138M 21016 2712 S 0.0 2.1 0:00.14 /usr/sbin/netdata +``` + +#### atop + +`atop` also fails to break down CPU usage. + +``` +ATOP - localhost 2016/12/10 20:11:27 ----------- 10s elapsed +PRC | sys 1.13s | user 0.43s | #proc 75 | #zombie 0 | #exit 5383 | +CPU | sys 67% | user 31% | irq 2% | idle 0% | wait 0% | +CPL | avg1 1.34 | avg5 1.05 | avg15 0.96 | csw 51346 | intr 10508 | +MEM | tot 992.8M | free 211.5M | cache 470.0M | buff 87.2M | slab 164.7M | +SWP | tot 0.0M | free 0.0M | | vmcom 207.6M | vmlim 496.4M | +DSK | vda | busy 0% | read 0 | write 4 | avio 1.50 ms | +NET | transport | tcpi 16 | tcpo 15 | udpi 0 | udpo 0 | +NET | network | ipi 16 | ipo 15 | ipfrw 0 | deliv 16 | +NET | eth0 ---- | pcki 16 | pcko 15 | si 1 Kbps | so 4 Kbps | + + PID SYSCPU USRCPU VGROW RGROW RDDSK WRDSK ST EXC S CPU CMD 1/600 +12789 0.98s 0.40s 0K 0K 0K 336K -- - S 14% bash + 9 0.08s 0.00s 0K 0K 0K 0K -- - S 1% rcuos/0 + 7024 0.03s 0.00s 0K 0K 0K 0K -- - S 0% apps.plugin + 7009 0.01s 0.01s 0K 0K 0K 4K -- - S 0% netdata +``` + +#### glances + +And the same is true for `glances`. The system runs at 100%, but `glances` reports only 17% +per process utilization. + +Note also, that being a `python` program, `glances` uses 1.6% CPU while it runs. + + +``` +localhost Uptime: 3 days, 21:42:00 + +CPU [100.0%] CPU 100.0% MEM 23.7% SWAP 0.0% LOAD 1-core +MEM [ 23.7%] user: 30.9% total: 993M total: 0 1 min: 1.18 +SWAP [ 0.0%] system: 67.8% used: 236M used: 0 5 min: 1.08 + idle: 0.0% free: 757M free: 0 15 min: 1.00 + +NETWORK Rx/s Tx/s TASKS 75 (90 thr), 1 run, 74 slp, 0 oth +eth0 168b 2Kb +eth1 0b 0b CPU% MEM% PID USER NI S Command +lo 0b 0b 13.5 0.4 12789 root 0 S -bash + 1.6 2.2 7025 root 0 R /usr/bin/python /u +DISK I/O R/s W/s 1.0 0.0 9 root 0 S rcuos/0 +vda1 0 4K 0.3 0.2 7024 netdata 0 S /usr/libexec/netda + 0.3 0.0 7 root 0 S rcu_sched +FILE SYS Used Total 0.3 2.1 7009 netdata 0 S /usr/sbin/netdata +/ (vda1) 1.56G 29.5G 0.0 0.0 17 root 0 S oom_reaper +``` + +#### why this happens? + +All the console tools report usage based on the processes found running *at the moment they +examine the process tree*. So, they see just one `ls` command, which is actually very quick +with minor CPU utilization. But the shell, is spawning hundreds of them, one after another +(much like shell scripts do). + +#### what netdata reports? + +The total CPU utilization of the system: + +![image](https://cloud.githubusercontent.com/assets/2662304/21076212/9198e5a6-bf2e-11e6-9bc0-6bdea25befb2.png) +<br/>_**Figure 1**: The system overview section at netdata, just a few seconds after the command was run_ + +And at the applications `apps.plugin` breaks down CPU usage per application: + +![image](https://cloud.githubusercontent.com/assets/2662304/21076220/c9687848-bf2e-11e6-8d81-348592c5aca2.png) +<br/>_**Figure 2**: The Applications section at netdata, just a few seconds after the command was run_ + +So, the `ssh` session is using 95% CPU time. + +Why `ssh`? + +`apps.plugin` groups all processes based on its configuration file +[`/etc/netdata/apps_groups.conf`](apps_groups.conf) +(to edit it on your system run `/etc/netdata/edit-config apps_groups.conf`). +The default configuration has nothing for `bash`, but it has for `sshd`, so netdata accumulates +all ssh sessions to a dimension on the charts, called `ssh`. This includes all the processes in +the process tree of `sshd`, **including the exited children**. + +> Distributions based on `systemd`, provide another way to get cpu utilization per user session +> or service running: control groups, or cgroups, commonly used as part of containers +> `apps.plugin` does not use these mechanisms. The process grouping made by `apps.plugin` works +> on any Linux, `systemd` based or not. + +#### a more technical description of how netdata works + +netdata reads `/proc/<pid>/stat` for all processes, once per second and extracts `utime` and +`stime` (user and system cpu utilization), much like all the console tools do. + +But it [also extracts `cutime` and `cstime`](https://github.com/netdata/netdata/blob/62596cc6b906b1564657510ca9135c08f6d4cdda/src/apps_plugin.c#L636-L642) +that account the user and system time of the exit children of each process. By keeping a map in +memory of the whole process tree, it is capable of assigning the right time to every process, +taking into account all its exited children. + +It is tricky, since a process may be running for 1 hour and once it exits, its parent should not +receive the whole 1 hour of cpu time in just 1 second - you have to subtract the cpu time that has +been reported for it prior to this iteration. + +It is even trickier, because walking through the entire process tree takes some time itself. So, +if you sum the CPU utilization of all processes, you might have more CPU time than the reported +total cpu time of the system. netdata solves this, by adapting the per process cpu utilization to +the total of the system. [Netdata adds charts that document this normalization](https://london.my-netdata.io/default.html#menu_netdata_submenu_apps_plugin). diff --git a/conf.d/apps_groups.conf b/collectors/apps.plugin/apps_groups.conf index 4356e4910..c0d22fac9 100644 --- a/conf.d/apps_groups.conf +++ b/collectors/apps.plugin/apps_groups.conf @@ -107,15 +107,16 @@ timedb: prometheus *carbon-cache.py* *carbon-aggregator.py* *graphite/manage.py* # ----------------------------------------------------------------------------- # email servers -email: dovecot imapd pop3d amavis* master zmstat* zmmailboxdmgr qmgr oqmgr saslauthd opendkim clamd freshclam unbound tlsmgr postfwd2 postscreen postfix smtp* lmtp* +email: dovecot imapd pop3d amavis* master zmstat* zmmailboxdmgr qmgr oqmgr saslauthd opendkim clamd freshclam unbound tlsmgr postfwd2 postscreen postfix smtp* lmtp* sendmail # ----------------------------------------------------------------------------- # network, routing, VPN ppp: ppp* vpn: openvpn pptp* cjdroute gvpe tincd -wifi: hostapd wpa_supplicant +wifi: hostapd wpa_supplicant NetworkManager routing: ospfd* ospf6d* bgpd isisd ripd ripngd pimd ldpd zebra vtysh bird* +modem: ModemManager # ----------------------------------------------------------------------------- # high availability and balancers @@ -162,7 +163,7 @@ VMs: vbox* VBox* qemu* # ----------------------------------------------------------------------------- # ssh servers and clients -ssh: ssh* scp +ssh: ssh* scp dropbear # ----------------------------------------------------------------------------- # print servers and clients @@ -172,7 +173,7 @@ print: cups* lpd lpq # ----------------------------------------------------------------------------- # time servers and clients -time: ntp* systemd-timesyncd +time: ntp* systemd-timesyncd chronyd # ----------------------------------------------------------------------------- # dhcp servers and clients @@ -257,13 +258,14 @@ airflow: *airflow* X: X Xorg xinit lightdm xdm pulseaudio gkrellm xfwm4 xfdesktop xfce* Thunar X: xfsettingsd xfconfd gnome-* gdm gconf* dconf* xfconf* *gvfs gvfs* kdm slim X: evolution-* firefox chromium opera vivaldi-bin epiphany WebKit* +X: '*systemd --user*' chrome *chrome-sandbox* *google-chrome* *chromium* *firefox* # ----------------------------------------------------------------------------- # Kernel / System ksmd: ksmd -system: systemd* udisks* udevd* *udevd connmand ipv6_addrconf dbus-* rtkit* +system: systemd-* udisks* udevd* *udevd connmand ipv6_addrconf dbus-* rtkit* system: inetd xinetd mdadm polkitd acpid uuidd packagekitd upowerd colord system: accounts-daemon rngd haveged @@ -280,3 +282,5 @@ rabbitmq: *rabbitmq* sidekiq: *sidekiq* java: java ipfs: ipfs + +node: node diff --git a/src/apps_plugin.c b/collectors/apps.plugin/apps_plugin.c index 8595da6c2..f592e9fc8 100644 --- a/src/apps_plugin.c +++ b/collectors/apps.plugin/apps_plugin.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-3.0-or-later /* * netdata apps.plugin @@ -5,7 +6,61 @@ * Released under GPL v3+ */ -#include "common.h" +#include "../../libnetdata/libnetdata.h" + +// ---------------------------------------------------------------------------- + +// callback required by fatal() +void netdata_cleanup_and_exit(int ret) { + exit(ret); +} + +// callbacks required by popen() +void signals_block(void) {}; +void signals_unblock(void) {}; +void signals_reset(void) {}; + +// callback required by eval() +int health_variable_lookup(const char *variable, uint32_t hash, struct rrdcalc *rc, calculated_number *result) { + (void)variable; + (void)hash; + (void)rc; + (void)result; + return 0; +}; + +// required by get_system_cpus() +char *netdata_configured_host_prefix = ""; + + +// ---------------------------------------------------------------------------- +// debugging + +static int debug_enabled = 0; +static inline void debug_log_int(const char *fmt, ... ) { + va_list args; + + fprintf( stderr, "apps.plugin: "); + va_start( args, fmt ); + vfprintf( stderr, fmt, args ); + va_end( args ); + + fputc('\n', stderr); +} + +#ifdef NETDATA_INTERNAL_CHECKS + +#define debug_log(fmt, args...) do { if(unlikely(debug_enabled)) debug_log_int(fmt, ##args); } while(0) + +#else + +static inline void debug_log_dummy(void) {} +#define debug_log(fmt, args...) debug_log_dummy() + +#endif + + +// ---------------------------------------------------------------------------- #ifdef __FreeBSD__ #include <sys/user.h> @@ -57,21 +112,21 @@ // command line options static int - debug = 0, update_every = 1, enable_guest_charts = 0, #ifdef __FreeBSD__ enable_file_charts = 0, #else enable_file_charts = 1, + max_fds_cache_seconds = 60, #endif enable_users_charts = 1, enable_groups_charts = 1, include_exited_childs = 1; - -// will be changed to getenv(NETDATA_CONFIG_DIR) if it exists -static char *config_dir = CONFIG_DIR; +// will be changed to getenv(NETDATA_USER_CONFIG_DIR) if it exists +static char *user_config_dir = CONFIG_DIR; +static char *stock_config_dir = LIBCONFIG_DIR; // ---------------------------------------------------------------------------- // internal flags @@ -90,6 +145,9 @@ static size_t global_iterations_counter = 1, calls_counter = 0, file_counter = 0, + filenames_allocated_counter = 0, + inodes_changed_counter = 0, + links_changed_counter = 0, targets_assignment_counter = 0; @@ -193,7 +251,7 @@ struct target { unsigned int processes; // how many processes have been merged to this int exposed; // if set, we have sent this to netdata int hidden; // if set, we set the hidden flag on the dimension - int debug; + int debug_enabled; int ends_with; int starts_with; // if set, the compare string matches only the // beginning of the command @@ -218,6 +276,18 @@ size_t // structure to store data for each process running // see: man proc for the description of the fields +struct pid_fd { + int fd; + +#ifndef __FreeBSD__ + ino_t inode; + char *filename; + uint32_t link_hash; + size_t cache_iterations_counter; + size_t cache_iterations_reset; +#endif +}; + struct pid_stat { int32_t pid; char comm[MAX_COMPARE_NAME + 1]; @@ -312,15 +382,15 @@ struct pid_stat { kernel_uint_t io_storage_bytes_written; // kernel_uint_t io_cancelled_write_bytes; - int *fds; // array of fds it uses - int fds_size; // the size of the fds array + struct pid_fd *fds; // array of fds it uses + size_t fds_size; // the size of the fds array int children_count; // number of processes directly referencing this - char keep:1; // 1 when we need to keep this process in memory even after it exited + unsigned char keep:1; // 1 when we need to keep this process in memory even after it exited int keeploops; // increases by 1 every time keep is 1 and updated 0 - char updated:1; // 1 when the process is currently running - char merged:1; // 1 when it has been merged to its parent - char read:1; // 1 when we have already read this process for this iteration + unsigned char updated:1; // 1 when the process is currently running + unsigned char merged:1; // 1 when it has been merged to its parent + unsigned char read:1; // 1 when we have already read this process for this iteration int sortlist; // higher numbers = top on the process tree // each process gets a unique number @@ -416,13 +486,6 @@ static int all_files_size = 0; // ---------------------------------------------------------------------------- -// callback required by fatal() - -void netdata_cleanup_and_exit(int ret) { - exit(ret); -} - -// ---------------------------------------------------------------------------- // apps_groups.conf // aggregate all processes in groups, to have a limited number of dimensions @@ -452,8 +515,7 @@ static struct target *get_users_target(uid_t uid) { w->next = users_root_target; users_root_target = w; - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: added uid %u ('%s') target\n", w->uid, w->name); + debug_log("added uid %u ('%s') target", w->uid, w->name); return w; } @@ -485,8 +547,7 @@ struct target *get_groups_target(gid_t gid) w->next = groups_root_target; groups_root_target = w; - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: added gid %u ('%s') target\n", w->gid, w->name); + debug_log("added gid %u ('%s') target", w->gid, w->name); return w; } @@ -527,11 +588,11 @@ static struct target *get_apps_groups_target(const char *id, struct target *targ break; } - if(unlikely(debug)) { + if(unlikely(debug_enabled)) { if(unlikely(target)) - fprintf(stderr, "apps.plugin: REUSING TARGET NAME '%s' on ID '%s'\n", target->name, target->id); + debug_log("REUSING TARGET NAME '%s' on ID '%s'", target->name, target->id); else - fprintf(stderr, "apps.plugin: NEW TARGET NAME '%s' on ID '%s'\n", name, id); + debug_log("NEW TARGET NAME '%s' on ID '%s'", name, id); } } @@ -564,34 +625,37 @@ static struct target *get_apps_groups_target(const char *id, struct target *targ w->comparelen = strlen(w->compare); w->hidden = thidden; - w->debug = tdebug; +#ifdef NETDATA_INTERNAL_CHECKS + w->debug_enabled = tdebug; +#else + if(tdebug) + fprintf(stderr, "apps.plugin has been compiled without debugging\n"); +#endif w->target = target; // append it, to maintain the order in apps_groups.conf if(last) last->next = w; else apps_groups_root_target = w; - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s\n" - , w->id - , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact")) - , w->target?w->target->name:w->name - , (w->hidden)?"hidden":"-" - , (w->debug)?"debug":"-" - ); + debug_log("ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s" + , w->id + , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact")) + , w->target?w->target->name:w->name + , (w->hidden)?"hidden":"-" + , (w->debug_enabled)?"debug":"-" + ); return w; } // read the apps_groups.conf file -static int read_apps_groups_conf(const char *file) +static int read_apps_groups_conf(const char *path, const char *file) { char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/apps_%s.conf", config_dir, file); + snprintfz(filename, FILENAME_MAX, "%s/apps_%s.conf", path, file); - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: process groups file: '%s'\n", filename); + debug_log("process groups file: '%s'", filename); // ---------------------------------------- @@ -654,14 +718,16 @@ static int read_apps_groups_conf(const char *file) // ---------------------------------------------------------------------------- // struct pid_stat management +static inline void init_pid_fds(struct pid_stat *p, size_t first, size_t size); static inline struct pid_stat *get_pid_entry(pid_t pid) { if(unlikely(all_pids[pid])) return all_pids[pid]; struct pid_stat *p = callocz(sizeof(struct pid_stat), 1); - p->fds = callocz(sizeof(int), MAX_SPARE_FDS); + p->fds = mallocz(sizeof(struct pid_fd) * MAX_SPARE_FDS); p->fds_size = MAX_SPARE_FDS; + init_pid_fds(p, 0, p->fds_size); if(likely(root_of_pids)) root_of_pids->prev = p; @@ -685,8 +751,7 @@ static inline void del_pid_entry(pid_t pid) { return; } - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: process %d %s exited, deleting it.\n", pid, p->comm); + debug_log("process %d %s exited, deleting it.", pid, p->comm); if(root_of_pids == p) root_of_pids = p->next; @@ -694,7 +759,17 @@ static inline void del_pid_entry(pid_t pid) { if(p->next) p->next->prev = p->prev; if(p->prev) p->prev->next = p->next; + // free the filename +#ifndef __FreeBSD__ + { + size_t i; + for(i = 0; i < p->fds_size; i++) + if(p->fds[i].filename) + freez(p->fds[i].filename); + } +#endif freez(p->fds); + freez(p->fds_dirname); freez(p->stat_filename); freez(p->status_filename); @@ -716,8 +791,8 @@ static inline int managed_log(struct pid_stat *p, uint32_t log, int status) { if(unlikely(!status)) { // error("command failed log %u, errno %d", log, errno); - if(unlikely(debug || errno != ENOENT)) { - if(unlikely(debug || !(p->log_thrown & log))) { + if(unlikely(debug_enabled || errno != ENOENT)) { + if(unlikely(debug_enabled || !(p->log_thrown & log))) { p->log_thrown |= log; switch(log) { case PID_LOG_IO: @@ -779,7 +854,7 @@ static inline void assign_target_to_pid(struct pid_stat *p) { struct target *w; for(w = apps_groups_root_target; w ; w = w->next) { - // if(debug || (p->target && p->target->debug)) fprintf(stderr, "apps.plugin: \t\tcomparing '%s' with '%s'\n", w->compare, p->comm); + // if(debug_enabled || (p->target && p->target->debug_enabled)) debug_log_int("\t\tcomparing '%s' with '%s'", w->compare, p->comm); // find it - 4 cases: // 1. the target is not a pattern @@ -796,8 +871,8 @@ static inline void assign_target_to_pid(struct pid_stat *p) { if(w->target) p->target = w->target; else p->target = w; - if(debug || (p->target && p->target->debug)) - fprintf(stderr, "apps.plugin: \t\t%s linked to target %s\n", p->comm, p->target->name); + if(debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int("%s linked to target %s", p->comm, p->target->name); break; } @@ -828,7 +903,7 @@ static inline int read_proc_pid_cmdline(struct pid_stat *p) { p->cmdline_filename = strdupz(filename); } - int fd = open(p->cmdline_filename, O_RDONLY, 0666); + int fd = open(p->cmdline_filename, procfile_open_flags, 0666); if(unlikely(fd == -1)) goto cleanup; ssize_t i, bytes = read(fd, cmdline, MAX_CMDLINE); @@ -838,18 +913,20 @@ static inline int read_proc_pid_cmdline(struct pid_stat *p) { #endif cmdline[bytes] = '\0'; - for(i = 0; i < bytes ; i++) + for(i = 0; i < bytes ; i++) { if(unlikely(!cmdline[i])) cmdline[i] = ' '; + } + if(p->cmdline) freez(p->cmdline); p->cmdline = strdupz(cmdline); - if(unlikely(debug)) - fprintf(stderr, "Read file '%s' contents: %s\n", p->cmdline_filename, p->cmdline); + debug_log("Read file '%s' contents: %s", p->cmdline_filename, p->cmdline); return 1; cleanup: // copy the command to the command line + if(p->cmdline) freez(p->cmdline); p->cmdline = strdupz(p->comm); return 0; } @@ -963,7 +1040,7 @@ static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) { p->gid = proc_info->ki_groups[0]; p->status_vmsize = proc_info->ki_size / 1024; // in kB p->status_vmrss = proc_info->ki_rssize * pagesize / 1024; // in kB - // FIXME: what about shared and swap memory on FreeBSD? + // TODO: what about shared and swap memory on FreeBSD? return 1; #else (void)ptr; @@ -1004,7 +1081,7 @@ static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) { arl_begin(p->status_arl); for(l = 0; l < lines ;l++) { - // fprintf(stderr, "CHECK: line %zu of %zu, key '%s' = '%s'\n", l, lines, procfile_lineword(ff, l, 0), procfile_lineword(ff, l, 1)); + // debug_log("CHECK: line %zu of %zu, key '%s' = '%s'", l, lines, procfile_lineword(ff, l, 0), procfile_lineword(ff, l, 1)); arl_ptr.line = l; if(unlikely(arl_check(p->status_arl, procfile_lineword(ff, l, 0), @@ -1013,7 +1090,7 @@ static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) { p->status_vmshared = p->status_rssfile + p->status_rssshmem; - // fprintf(stderr, "%s uid %d, gid %d, VmSize %zu, VmRSS %zu, RssFile %zu, RssShmem %zu, shared %zu\n", p->comm, (int)p->uid, (int)p->gid, p->status_vmsize, p->status_vmrss, p->status_rssfile, p->status_rssshmem, p->status_vmshared); + // debug_log("%s uid %d, gid %d, VmSize %zu, VmRSS %zu, RssFile %zu, RssShmem %zu, shared %zu", p->comm, (int)p->uid, (int)p->gid, p->status_vmsize, p->status_vmrss, p->status_rssfile, p->status_rssshmem, p->status_vmshared); return 1; #endif @@ -1072,11 +1149,11 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) { #endif if(strcmp(p->comm, comm) != 0) { - if(unlikely(debug)) { + if(unlikely(debug_enabled)) { if(p->comm[0]) - fprintf(stderr, "apps.plugin: \tpid %d (%s) changed name to '%s'\n", p->pid, p->comm, comm); + debug_log("\tpid %d (%s) changed name to '%s'", p->pid, p->comm, comm); else - fprintf(stderr, "apps.plugin: \tJust added %d (%s)\n", p->pid, comm); + debug_log("\tJust added %d (%s)", p->pid, comm); } strncpyz(p->comm, comm, MAX_COMPARE_NAME); @@ -1152,8 +1229,8 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) { } #endif - if(unlikely(debug || (p->target && p->target->debug))) - fprintf(stderr, "apps.plugin: READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=%d\n", netdata_configured_host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); + if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) + debug_log_int("READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=%d", netdata_configured_host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); if(unlikely(global_iterations_counter == 1)) { p->minflt = 0; @@ -1332,7 +1409,7 @@ int file_descriptor_compare(void* a, void* b) { return strcmp(((struct file_descriptor *)a)->name, ((struct file_descriptor *)b)->name); } -int file_descriptor_iterator(avl *a) { if(a) {}; return 0; } +// int file_descriptor_iterator(avl *a) { if(a) {}; return 0; } avl_tree all_files_index = { NULL, @@ -1368,15 +1445,13 @@ static inline void file_descriptor_not_used(int id) } #endif /* NETDATA_INTERNAL_CHECKS */ - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: decreasing slot %d (count = %d).\n", id, all_files[id].count); + debug_log("decreasing slot %d (count = %d).", id, all_files[id].count); if(all_files[id].count > 0) { all_files[id].count--; if(!all_files[id].count) { - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> slot %d is empty.\n", id); + debug_log(" >> slot %d is empty.", id); if(unlikely(file_descriptor_remove(&all_files[id]) != (void *)&all_files[id])) error("INTERNAL ERROR: removal of unused fd from index, removed a different fd"); @@ -1398,8 +1473,7 @@ static inline void all_files_grow() { int i; // there is no empty slot - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: extending fd array to %d entries\n", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); + debug_log("extending fd array to %d entries", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); all_files = reallocz(all_files, (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP) * sizeof(struct file_descriptor)); @@ -1407,8 +1481,7 @@ static inline void all_files_grow() { // since all pointers are now invalid if(unlikely(old && old != (void *)all_files)) { - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> re-indexing.\n"); + debug_log(" >> re-indexing."); all_files_index.root = NULL; for(i = 0; i < all_files_size; i++) { @@ -1417,8 +1490,7 @@ static inline void all_files_grow() { error("INTERNAL ERROR: duplicate indexing of fd during realloc."); } - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> re-indexing done.\n"); + debug_log(" >> re-indexing done."); } // initialize the newly added entries @@ -1441,8 +1513,7 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h if(!all_files || all_files_len == all_files_size) all_files_grow(); - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> searching for empty slot.\n"); + debug_log(" >> searching for empty slot."); // search for an empty slot @@ -1453,16 +1524,14 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h if(c == 0) continue; if(!all_files[c].count) { - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> Examining slot %d.\n", c); + debug_log(" >> Examining slot %d.", c); #ifdef NETDATA_INTERNAL_CHECKS if(all_files[c].magic == 0x0BADCAFE && all_files[c].name && file_descriptor_find(all_files[c].name, all_files[c].hash)) - error("fd on position %d is not cleared properly. It still has %s in it.\n", c, all_files[c].name); + error("fd on position %d is not cleared properly. It still has %s in it.", c, all_files[c].name); #endif /* NETDATA_INTERNAL_CHECKS */ - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> %s fd position %d for %s (last name: %s)\n", all_files[c].name?"re-using":"using", c, name, all_files[c].name); + debug_log(" >> %s fd position %d for %s (last name: %s)", all_files[c].name?"re-using":"using", c, name, all_files[c].name); freez((void *)all_files[c].name); all_files[c].name = NULL; @@ -1479,8 +1548,7 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h } // else we have an empty slot in 'c' - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> updating slot %d.\n", c); + debug_log(" >> updating slot %d.", c); all_files[c].name = strdupz(name); all_files[c].hash = hash; @@ -1493,24 +1561,21 @@ static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t h if(unlikely(file_descriptor_add(&all_files[c]) != (void *)&all_files[c])) error("INTERNAL ERROR: duplicate indexing of fd."); - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: using fd position %d (name: %s)\n", c, all_files[c].name); + debug_log("using fd position %d (name: %s)", c, all_files[c].name); return c; } -static inline int file_descriptor_find_or_add(const char *name) -{ - uint32_t hash = simple_hash(name); +static inline int file_descriptor_find_or_add(const char *name, uint32_t hash) { + if(unlikely(!hash)) + hash = simple_hash(name); - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: adding or finding name '%s' with hash %u\n", name, hash); + debug_log("adding or finding name '%s' with hash %u", name, hash); struct file_descriptor *fd = file_descriptor_find(name, hash); if(fd) { // found - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: >> found on slot %d\n", fd->pos); + debug_log(" >> found on slot %d", fd->pos); fd->count++; return fd->pos; @@ -1530,47 +1595,65 @@ static inline int file_descriptor_find_or_add(const char *name) else if(strcmp(t, "[timerfd]") == 0) type = FILETYPE_TIMERFD; else if(strcmp(t, "[signalfd]") == 0) type = FILETYPE_SIGNALFD; else { - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: FIXME: unknown anonymous inode: %s\n", name); - + debug_log("UNKNOWN anonymous inode: %s", name); type = FILETYPE_OTHER; } } else if(likely(strcmp(name, "inotify") == 0)) type = FILETYPE_INOTIFY; else { - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: FIXME: cannot understand linkname: %s\n", name); - + debug_log("UNKNOWN linkname: %s", name); type = FILETYPE_OTHER; } return file_descriptor_set_on_empty_slot(name, hash, type); } +static inline void clear_pid_fd(struct pid_fd *pfd) { + pfd->fd = 0; + + #ifndef __FreeBSD__ + pfd->link_hash = 0; + pfd->inode = 0; + pfd->cache_iterations_counter = 0; + pfd->cache_iterations_reset = 0; +#endif +} + static inline void make_all_pid_fds_negative(struct pid_stat *p) { - int *fd = p->fds, *end = &p->fds[p->fds_size]; - while(fd < end) { - *fd = -(*fd); - fd++; + struct pid_fd *pfd = p->fds, *pfdend = &p->fds[p->fds_size]; + while(pfd < pfdend) { + pfd->fd = -(pfd->fd); + pfd++; } } static inline void cleanup_negative_pid_fds(struct pid_stat *p) { - int *fd = p->fds, *fdend = &p->fds[p->fds_size]; + struct pid_fd *pfd = p->fds, *pfdend = &p->fds[p->fds_size]; + + while(pfd < pfdend) { + int fd = pfd->fd; - while(fd < fdend) { - if(unlikely(*fd < 0)) { - file_descriptor_not_used(-(*fd)); - *fd++ = 0; + if(unlikely(fd < 0)) { + file_descriptor_not_used(-(fd)); + clear_pid_fd(pfd); } - else - fd++; + + pfd++; } } -static inline void zero_pid_fds(struct pid_stat *p, int first, int size) { - int *fd = &p->fds[first], *end = &p->fds[first + size]; - while(fd < end) *fd++ = 0; +static inline void init_pid_fds(struct pid_stat *p, size_t first, size_t size) { + struct pid_fd *pfd = &p->fds[first], *pfdend = &p->fds[first + size]; + size_t i = first; + + while(pfd < pfdend) { +#ifndef __FreeBSD__ + pfd->filename = NULL; +#endif + clear_pid_fd(pfd); + pfd++; + i++; + } } static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { @@ -1625,17 +1708,16 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { if (unlikely(fdid >= p->fds_size)) { // it is small, extend it - if (unlikely(debug)) - fprintf(stderr, "apps.plugin: extending fd memory slots for %s from %d to %d\n", p->comm, p->fds_size, fdid + MAX_SPARE_FDS); + debug_log("extending fd memory slots for %s from %d to %d", p->comm, p->fds_size, fdid + MAX_SPARE_FDS); - p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(int)); + p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(struct pid_fd)); // and initialize it - zero_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); + init_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); p->fds_size = fdid + MAX_SPARE_FDS; } - if (unlikely(p->fds[fdid] == 0)) { + if (unlikely(p->fds[fdid].fd == 0)) { // we don't know this fd, get it switch (fds->kf_type) { @@ -1691,15 +1773,14 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { // if another process already has this, we will get // the same id - p->fds[fdid] = file_descriptor_find_or_add(fdsname); + p->fds[fdid].fd = file_descriptor_find_or_add(fdsname, 0); } // else make it positive again, we need it - // of course, the actual file may have changed, but we don't care so much - // FIXME: we could compare the inode as returned by readdir dirent structure + // of course, the actual file may have changed else - p->fds[fdid] = -p->fds[fdid]; + p->fds[fdid].fd = -p->fds[fdid].fd; bfdsbuf += fds->kf_structsize; } @@ -1714,7 +1795,6 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { if(unlikely(!fds)) return 0; struct dirent *de; - char fdname[FILENAME_MAX + 1]; char linkname[FILENAME_MAX + 1]; // we make all pid fds negative, so that @@ -1733,53 +1813,103 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { if(unlikely(fdid < 0)) continue; // check if the fds array is small - if(unlikely(fdid >= p->fds_size)) { + if(unlikely((size_t)fdid >= p->fds_size)) { // it is small, extend it - if(unlikely(debug)) - fprintf(stderr - , "apps.plugin: extending fd memory slots for %s from %d to %d\n" - , p->comm - , p->fds_size - , fdid + MAX_SPARE_FDS - ); + debug_log("extending fd memory slots for %s from %d to %d" + , p->comm + , p->fds_size + , fdid + MAX_SPARE_FDS + ); - p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(int)); + p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(struct pid_fd)); // and initialize it - zero_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); - p->fds_size = fdid + MAX_SPARE_FDS; + init_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); + p->fds_size = (size_t)fdid + MAX_SPARE_FDS; } - if(unlikely(p->fds[fdid] == 0)) { - // we don't know this fd, get it + if(unlikely(p->fds[fdid].fd < 0 && de->d_ino != p->fds[fdid].inode)) { + // inodes do not match, clear the previous entry + inodes_changed_counter++; + file_descriptor_not_used(-p->fds[fdid].fd); + clear_pid_fd(&p->fds[fdid]); + } - sprintf(fdname, "%s/proc/%d/fd/%s", netdata_configured_host_prefix, p->pid, de->d_name); - ssize_t l = readlink(fdname, linkname, FILENAME_MAX); - if(unlikely(l == -1)) { - if(debug || (p->target && p->target->debug)) { - if(debug || (p->target && p->target->debug)) - error("Cannot read link %s", fdname); - } - continue; + if(p->fds[fdid].fd < 0 && p->fds[fdid].cache_iterations_counter > 0) { + p->fds[fdid].fd = -p->fds[fdid].fd; + p->fds[fdid].cache_iterations_counter--; + continue; + } + + if(unlikely(!p->fds[fdid].filename)) { + filenames_allocated_counter++; + char fdname[FILENAME_MAX + 1]; + snprintfz(fdname, FILENAME_MAX, "%s/proc/%d/fd/%s", netdata_configured_host_prefix, p->pid, de->d_name); + p->fds[fdid].filename = strdupz(fdname); + } + + file_counter++; + ssize_t l = readlink(p->fds[fdid].filename, linkname, FILENAME_MAX); + if(unlikely(l == -1)) { + // cannot read the link + + if(debug_enabled || (p->target && p->target->debug_enabled)) + error("Cannot read link %s", p->fds[fdid].filename); + + if(unlikely(p->fds[fdid].fd < 0)) { + file_descriptor_not_used(-p->fds[fdid].fd); + clear_pid_fd(&p->fds[fdid]); } - else - linkname[l] = '\0'; - file_counter++; + continue; + } + else + linkname[l] = '\0'; + + uint32_t link_hash = simple_hash(linkname); + + if(unlikely(p->fds[fdid].fd < 0 && p->fds[fdid].link_hash != link_hash)) { + // the link changed + links_changed_counter++; + file_descriptor_not_used(-p->fds[fdid].fd); + clear_pid_fd(&p->fds[fdid]); + } + + if(unlikely(p->fds[fdid].fd == 0)) { + // we don't know this fd, get it // if another process already has this, we will get // the same id - p->fds[fdid] = file_descriptor_find_or_add(linkname); + p->fds[fdid].fd = file_descriptor_find_or_add(linkname, link_hash); + p->fds[fdid].inode = de->d_ino; + p->fds[fdid].link_hash = link_hash; } - + else { // else make it positive again, we need it - // of course, the actual file may have changed, but we don't care so much - // FIXME: we could compare the inode as returned by readdir dirent structure - // UPDATE: no we cannot use inodes - under /proc inodes don't change when the link is changed + p->fds[fdid].fd = -p->fds[fdid].fd; + } - else - p->fds[fdid] = -p->fds[fdid]; + // caching control + // without this we read all the files on every iteration + if(max_fds_cache_seconds > 0) { + size_t spread = ((size_t)max_fds_cache_seconds > 10) ? 10 : (size_t)max_fds_cache_seconds; + + // cache it for a few iterations + size_t max = ((size_t) max_fds_cache_seconds + (fdid % spread)) / (size_t) update_every; + p->fds[fdid].cache_iterations_reset++; + + if(unlikely(p->fds[fdid].cache_iterations_reset % spread == (size_t) fdid % spread)) + p->fds[fdid].cache_iterations_reset++; + + if(unlikely((fdid <= 2 && p->fds[fdid].cache_iterations_reset > 5) || + p->fds[fdid].cache_iterations_reset > max)) { + // for stdin, stdout, stderr (fdid <= 2) we have checked a few times, or if it goes above the max, goto max + p->fds[fdid].cache_iterations_reset = max; + } + + p->fds[fdid].cache_iterations_counter = p->fds[fdid].cache_iterations_reset; + } } closedir(fds); @@ -1791,12 +1921,12 @@ static inline int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { // ---------------------------------------------------------------------------- -static inline int print_process_and_parents(struct pid_stat *p, usec_t time) { +static inline int debug_print_process_and_parents(struct pid_stat *p, usec_t time) { char *prefix = "\\_ "; int indent = 0; if(p->parent) - indent = print_process_and_parents(p->parent, p->stat_collected_usec); + indent = debug_print_process_and_parents(p->parent, p->stat_collected_usec); else prefix = " > "; @@ -1830,12 +1960,12 @@ static inline int print_process_and_parents(struct pid_stat *p, usec_t time) { return indent + 1; } -static inline void print_process_tree(struct pid_stat *p, char *msg) { - fprintf(stderr, "%s: process %s (%d, %s) with parents:\n", msg, p->comm, p->pid, p->updated?"running":"exited"); - print_process_and_parents(p, p->stat_collected_usec); +static inline void debug_print_process_tree(struct pid_stat *p, char *msg) { + debug_log("%s: process %s (%d, %s) with parents:", msg, p->comm, p->pid, p->updated?"running":"exited"); + debug_print_process_and_parents(p, p->stat_collected_usec); } -static inline void find_lost_child_debug(struct pid_stat *pe, kernel_uint_t lost, int type) { +static inline void debug_find_lost_child(struct pid_stat *pe, kernel_uint_t lost, int type) { int found = 0; struct pid_stat *p = NULL; @@ -1938,8 +2068,8 @@ static inline void process_exited_processes() { if(utime + stime + gtime + minflt + majflt == 0) continue; - if(unlikely(debug)) { - fprintf(stderr, "Absorb %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")\n" + if(unlikely(debug_enabled)) { + debug_log("Absorb %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")" , p->comm , p->pid , p->updated?"running":"exited" @@ -1949,7 +2079,7 @@ static inline void process_exited_processes() { , minflt , majflt ); - print_process_tree(p, "Searching parents"); + debug_print_process_tree(p, "Searching parents"); } struct pid_stat *pp; @@ -1958,59 +2088,57 @@ static inline void process_exited_processes() { kernel_uint_t absorbed; absorbed = remove_exited_child_from_parent(&utime, &pp->cutime); - if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " utime (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, utime); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " utime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, utime); absorbed = remove_exited_child_from_parent(&stime, &pp->cstime); - if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " stime (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, stime); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " stime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, stime); absorbed = remove_exited_child_from_parent(>ime, &pp->cgtime); - if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " gtime (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, gtime); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " gtime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, gtime); absorbed = remove_exited_child_from_parent(&minflt, &pp->cminflt); - if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " minflt (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, minflt); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " minflt (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, minflt); absorbed = remove_exited_child_from_parent(&majflt, &pp->cmajflt); - if(unlikely(debug && absorbed)) - fprintf(stderr, " > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " majflt (remaining: " KERNEL_UINT_FORMAT ")\n", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, majflt); + if(unlikely(debug_enabled && absorbed)) + debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " majflt (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, majflt); } if(unlikely(utime + stime + gtime + minflt + majflt > 0)) { - if(unlikely(debug)) { - if(utime) find_lost_child_debug(p, utime, 3); - if(stime) find_lost_child_debug(p, stime, 4); - if(gtime) find_lost_child_debug(p, gtime, 5); - if(minflt) find_lost_child_debug(p, minflt, 1); - if(majflt) find_lost_child_debug(p, majflt, 2); + if(unlikely(debug_enabled)) { + if(utime) debug_find_lost_child(p, utime, 3); + if(stime) debug_find_lost_child(p, stime, 4); + if(gtime) debug_find_lost_child(p, gtime, 5); + if(minflt) debug_find_lost_child(p, minflt, 1); + if(majflt) debug_find_lost_child(p, majflt, 2); } p->keep = 1; - if(unlikely(debug)) - fprintf(stderr, " > remaining resources - KEEP - for another loop: %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")\n" - , p->comm - , p->pid - , p->updated?"running":"exited" - , utime - , stime - , gtime - , minflt - , majflt - ); + debug_log(" > remaining resources - KEEP - for another loop: %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")" + , p->comm + , p->pid + , p->updated?"running":"exited" + , utime + , stime + , gtime + , minflt + , majflt + ); for(pp = p->parent; pp ; pp = pp->parent) { if(pp->updated) break; pp->keep = 1; - if(unlikely(debug)) - fprintf(stderr, " > - KEEP - parent for another loop: %s (%d %s)\n" - , pp->comm - , pp->pid - , pp->updated?"running":"exited" - ); + debug_log(" > - KEEP - parent for another loop: %s (%d %s)" + , pp->comm + , pp->pid + , pp->updated?"running":"exited" + ); } p->utime_raw = utime * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); @@ -2020,16 +2148,14 @@ static inline void process_exited_processes() { p->majflt_raw = majflt * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); p->cutime_raw = p->cstime_raw = p->cgtime_raw = p->cminflt_raw = p->cmajflt_raw = 0; - if(unlikely(debug)) - fprintf(stderr, "\n"); + debug_log(" "); } - else if(unlikely(debug)) { - fprintf(stderr, " > totally absorbed - DONE - %s (%d %s)\n" + else + debug_log(" > totally absorbed - DONE - %s (%d %s)" , p->comm , p->pid , p->updated?"running":"exited" - ); - } + ); } } @@ -2054,8 +2180,8 @@ static inline void link_all_processes_to_their_parents(void) { p->parent = pp; pp->children_count++; - if(unlikely(debug || (p->target && p->target->debug))) - fprintf(stderr, "apps.plugin: \tchild %d (%s, %s) on target '%s' has parent %d (%s, %s). Parent: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "\n", p->pid, p->comm, p->updated?"running":"exited", (p->target)?p->target->name:"UNSET", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->gtime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cgtime, pp->cminflt, pp->cmajflt); + if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) + debug_log_int("child %d (%s, %s) on target '%s' has parent %d (%s, %s). Parent: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "", p->pid, p->comm, p->updated?"running":"exited", (p->target)?p->target->name:"UNSET", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->gtime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cgtime, pp->cminflt, pp->cmajflt); } else { p->parent = NULL; @@ -2105,7 +2231,7 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) { if(unlikely(!p || p->read)) return 0; p->read = 1; - // fprintf(stderr, "Reading process %d (%s), sortlist %d\n", p->pid, p->comm, p->sortlist); + // debug_log("Reading process %d (%s), sortlist %d", p->pid, p->comm, p->sortlist); // -------------------------------------------------------------------- // /proc/<pid>/stat @@ -2141,8 +2267,8 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) { // -------------------------------------------------------------------- // done! - if(unlikely(debug && include_exited_childs && all_pids_count && p->ppid && all_pids[p->ppid] && !all_pids[p->ppid]->read)) - fprintf(stderr, "Read process %d (%s) sortlisted %d, but its parent %d (%s) sortlisted %d, is not read\n", p->pid, p->comm, p->sortlist, all_pids[p->ppid]->pid, all_pids[p->ppid]->comm, all_pids[p->ppid]->sortlist); + if(unlikely(debug_enabled && include_exited_childs && all_pids_count && p->ppid && all_pids[p->ppid] && !all_pids[p->ppid]->read)) + debug_log("Read process %d (%s) sortlisted %d, but its parent %d (%s) sortlisted %d, is not read", p->pid, p->comm, p->sortlist, all_pids[p->ppid]->pid, all_pids[p->ppid]->comm, all_pids[p->ppid]->sortlist); // mark it as updated p->updated = 1; @@ -2302,18 +2428,18 @@ static int collect_data_for_all_processes(void) { // check: update_apps_groups_statistics() static void cleanup_exited_pids(void) { - int c; + size_t c; struct pid_stat *p = NULL; for(p = root_of_pids; p ;) { if(!p->updated && (!p->keep || p->keeploops > 0)) { - if(unlikely(debug && (p->keep || p->keeploops))) - fprintf(stderr, " > CLEANUP cannot keep exited process %d (%s) anymore - removing it.\n", p->pid, p->comm); + if(unlikely(debug_enabled && (p->keep || p->keeploops))) + debug_log(" > CLEANUP cannot keep exited process %d (%s) anymore - removing it.", p->pid, p->comm); for(c = 0; c < p->fds_size; c++) - if(p->fds[c] > 0) { - file_descriptor_not_used(p->fds[c]); - p->fds[c] = 0; + if(p->fds[c].fd > 0) { + file_descriptor_not_used(p->fds[c].fd); + clear_pid_fd(&p->fds[c]); } pid_t r = p->pid; @@ -2335,7 +2461,7 @@ static void apply_apps_groups_targets_inheritance(void) { // inherit their target from their parent int found = 1, loops = 0; while(found) { - if(unlikely(debug)) loops++; + if(unlikely(debug_enabled)) loops++; found = 0; for(p = root_of_pids; p ; p = p->next) { // if this process does not have a target @@ -2346,8 +2472,8 @@ static void apply_apps_groups_targets_inheritance(void) { p->target = p->parent->target; found++; - if(debug || (p->target && p->target->debug)) - fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).\n", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); + if(debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); } } } @@ -2357,7 +2483,7 @@ static void apply_apps_groups_targets_inheritance(void) { int sortlist = 1; found = 1; while(found) { - if(unlikely(debug)) loops++; + if(unlikely(debug_enabled)) loops++; found = 0; for(p = root_of_pids; p ; p = p->next) { @@ -2382,16 +2508,15 @@ static void apply_apps_groups_targets_inheritance(void) { if(unlikely(p->target && !p->parent->target)) { p->parent->target = p->target; - if(debug || (p->target && p->target->debug)) - fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its child %d (%s).\n", p->target->name, p->parent->pid, p->parent->comm, p->pid, p->comm); + if(debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its child %d (%s).", p->target->name, p->parent->pid, p->parent->comm, p->pid, p->comm); } found++; } } - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: TARGET INHERITANCE: merged %d processes\n", found); + debug_log("TARGET INHERITANCE: merged %d processes", found); } // init goes always to default target @@ -2403,7 +2528,7 @@ static void apply_apps_groups_targets_inheritance(void) { all_pids[0]->target = apps_groups_default_target; // give a default target on all top level processes - if(unlikely(debug)) loops++; + if(unlikely(debug_enabled)) loops++; for(p = root_of_pids; p ; p = p->next) { // if the process is not merged itself // then is is a top level process @@ -2421,21 +2546,20 @@ static void apply_apps_groups_targets_inheritance(void) { // give a target to all merged child processes found = 1; while(found) { - if(unlikely(debug)) loops++; + if(unlikely(debug_enabled)) loops++; found = 0; for(p = root_of_pids; p ; p = p->next) { if(unlikely(!p->target && p->merged && p->parent && p->parent->target)) { p->target = p->parent->target; found++; - if(debug || (p->target && p->target->debug)) - fprintf(stderr, "apps.plugin: \t\tTARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s) at phase 2.\n", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); + if(debug_enabled || (p->target && p->target->debug_enabled)) + debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s) at phase 2.", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); } } } - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: apply_apps_groups_targets_inheritance() made %d loops on the process tree\n", loops); + debug_log("apply_apps_groups_targets_inheritance() made %d loops on the process tree", loops); } static size_t zero_all_targets(struct target *root) { @@ -2570,9 +2694,10 @@ static inline void aggregate_pid_fds_on_targets(struct pid_stat *p) { reallocate_target_fds(u); reallocate_target_fds(g); - int c, size = p->fds_size, *fds = p->fds; + size_t c, size = p->fds_size; + struct pid_fd *fds = p->fds; for(c = 0; c < size ;c++) { - int fd = fds[c]; + int fd = fds[c].fd; if(likely(fd <= 0 || fd >= all_files_size)) continue; @@ -2628,8 +2753,8 @@ static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p, w->processes++; w->num_threads += p->num_threads; - if(unlikely(debug || w->debug)) - fprintf(stderr, "apps.plugin: \taggregating '%s' pid %d on target '%s' utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "\n", p->comm, p->pid, w->name, p->utime, p->stime, p->gtime, p->cutime, p->cstime, p->cgtime, p->minflt, p->majflt, p->cminflt, p->cmajflt); + if(unlikely(debug_enabled || w->debug_enabled)) + debug_log_int("aggregating '%s' pid %d on target '%s' utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "", p->comm, p->pid, w->name, p->utime, p->stime, p->gtime, p->cutime, p->cstime, p->cgtime, p->minflt, p->majflt, p->cminflt, p->cmajflt); } static void calculate_netdata_statistics(void) { @@ -2660,8 +2785,8 @@ static void calculate_netdata_statistics(void) { if(likely(p->user_target && p->user_target->uid == p->uid)) w = p->user_target; else { - if(unlikely(debug && p->user_target)) - fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched user from %u (%s) to %u.\n", p->pid, p->comm, p->user_target->uid, p->user_target->name, p->uid); + if(unlikely(debug_enabled && p->user_target)) + debug_log("pid %d (%s) switched user from %u (%s) to %u.", p->pid, p->comm, p->user_target->uid, p->user_target->name, p->uid); w = p->user_target = get_users_target(p->uid); } @@ -2676,8 +2801,8 @@ static void calculate_netdata_statistics(void) { if(likely(p->group_target && p->group_target->gid == p->gid)) w = p->group_target; else { - if(unlikely(debug && p->group_target)) - fprintf(stderr, "apps.plugin: \t\tpid %d (%s) switched group from %u (%s) to %u.\n", p->pid, p->comm, p->group_target->gid, p->group_target->name, p->gid); + if(unlikely(debug_enabled && p->group_target)) + debug_log("pid %d (%s) switched group from %u (%s) to %u.", p->pid, p->comm, p->group_target->gid, p->group_target->name, p->gid); w = p->group_target = get_groups_target(p->gid); } @@ -2698,8 +2823,6 @@ static void calculate_netdata_statistics(void) { // ---------------------------------------------------------------------------- // update chart dimensions -int print_calculated_number(char *str, calculated_number value) { (void)str; (void)value; return 0; } - static inline void send_BEGIN(const char *type, const char *id, usec_t usec) { fprintf(stdout, "BEGIN %s.%s %llu\n", type, id, usec); } @@ -2751,6 +2874,9 @@ void send_resource_usage_to_netdata(usec_t dt) { "CHART netdata.apps_sizes '' 'Apps Plugin Files' 'files/s' apps.plugin netdata.apps_sizes line 140001 %1$d\n" "DIMENSION calls '' incremental 1 1\n" "DIMENSION files '' incremental 1 1\n" + "DIMENSION filenames '' incremental 1 1\n" + "DIMENSION inode_changes '' incremental 1 1\n" + "DIMENSION link_changes '' incremental 1 1\n" "DIMENSION pids '' absolute 1 1\n" "DIMENSION fds '' absolute 1 1\n" "DIMENSION targets '' absolute 1 1\n" @@ -2793,6 +2919,9 @@ void send_resource_usage_to_netdata(usec_t dt) { "BEGIN netdata.apps_sizes %llu\n" "SET calls = %zu\n" "SET files = %zu\n" + "SET filenames = %zu\n" + "SET inode_changes = %zu\n" + "SET link_changes = %zu\n" "SET pids = %zu\n" "SET fds = %d\n" "SET targets = %zu\n" @@ -2804,6 +2933,9 @@ void send_resource_usage_to_netdata(usec_t dt) { , dt , calls_counter , file_counter + , filenames_allocated_counter + , inodes_changed_counter + , links_changed_counter , all_pids_count , all_files_len , apps_groups_targets_count @@ -2854,7 +2986,7 @@ static void normalize_utilization(struct target *root) { // here we try to eliminate them by disabling childs processing either for specific dimensions // or entirely. Of course, either way, we disable it just a single iteration. - kernel_uint_t max_time = processors * hz * RATES_DETAIL; + kernel_uint_t max_time = processors * system_hz * RATES_DETAIL; kernel_uint_t utime = 0, cutime = 0, stime = 0, cstime = 0, gtime = 0, cgtime = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0; if(global_utime > max_time) global_utime = max_time; @@ -2932,7 +3064,7 @@ static void normalize_utilization(struct target *root) { // if(gtime_fix_ratio < 0.0) gtime_fix_ratio = 0.0; // if(cgtime_fix_ratio < 0.0) cgtime_fix_ratio = 0.0; - // FIXME + // TODO // we use cpu time to normalize page faults // the problem is that to find the proper max values // for page faults we have to parse /proc/vmstat @@ -2957,14 +3089,12 @@ static void normalize_utilization(struct target *root) { // the report - if(unlikely(debug)) { - fprintf(stderr, + debug_log( "SYSTEM: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " " "COLLECTED: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " cu=" KERNEL_UINT_FORMAT " cs=" KERNEL_UINT_FORMAT " cg=" KERNEL_UINT_FORMAT " " "DELTA: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " " "FIX: u=%0.2f s=%0.2f g=%0.2f cu=%0.2f cs=%0.2f cg=%0.2f " "FINALLY: u=" KERNEL_UINT_FORMAT " s=" KERNEL_UINT_FORMAT " g=" KERNEL_UINT_FORMAT " cu=" KERNEL_UINT_FORMAT " cs=" KERNEL_UINT_FORMAT " cg=" KERNEL_UINT_FORMAT " " - "\n" , global_utime , global_stime , global_gtime @@ -2989,8 +3119,7 @@ static void normalize_utilization(struct target *root) { , (kernel_uint_t)(cutime * cutime_fix_ratio) , (kernel_uint_t)(cstime * cstime_fix_ratio) , (kernel_uint_t)(cgtime * cgtime_fix_ratio) - ); - } + ); } #else // ALL_PIDS_ARE_READ_INSTANTLY == 1 static void normalize_utilization(struct target *root) { @@ -3151,7 +3280,8 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type if (!w->exposed && w->processes) { newly_added++; w->exposed = 1; - if (debug || w->debug) fprintf(stderr, "apps.plugin: %s just added - regenerating charts.\n", w->name); + if (debug_enabled || w->debug_enabled) + debug_log_int("%s just added - regenerating charts.", w->name); } } @@ -3163,7 +3293,7 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type fprintf(stdout, "CHART %s.cpu '' '%s CPU Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu stacked 20001 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' absolute 1 %llu %s\n", w->name, hz * RATES_DETAIL / 100, w->hidden ? "hidden" : ""); + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu %s\n", w->name, system_hz * RATES_DETAIL / 100, w->hidden ? "hidden" : ""); } fprintf(stdout, "CHART %s.mem '' '%s Real Memory (w/o shared)' 'MB' mem %s.mem stacked 20003 %d\n", type, title, type, update_every); @@ -3193,20 +3323,20 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type fprintf(stdout, "CHART %s.cpu_user '' '%s CPU User Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_user stacked 20020 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, hz * RATES_DETAIL / 100LLU); + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, system_hz * RATES_DETAIL / 100LLU); } fprintf(stdout, "CHART %s.cpu_system '' '%s CPU System Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_system stacked 20021 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, hz * RATES_DETAIL / 100LLU); + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, system_hz * RATES_DETAIL / 100LLU); } if(show_guest_time) { fprintf(stdout, "CHART %s.cpu_guest '' '%s CPU Guest Time (%d%% = %d core%s)' 'cpu time %%' cpu %s.cpu_system stacked 20022 %d\n", type, title, (processors * 100), processors, (processors > 1) ? "s" : "", type, update_every); for (w = root; w; w = w->next) { if(unlikely(w->exposed)) - fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, hz * RATES_DETAIL / 100LLU); + fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, system_hz * RATES_DETAIL / 100LLU); } } @@ -3315,7 +3445,6 @@ cleanup: static void parse_args(int argc, char **argv) { int i, freq = 0; - char *name = NULL; for(i = 1; i < argc; i++) { if(!freq) { @@ -3341,10 +3470,26 @@ static void parse_args(int argc, char **argv) } if(strcmp("debug", argv[i]) == 0) { - debug = 1; - // debug_flags = 0xffffffff; +#ifdef NETDATA_INTERNAL_CHECKS + debug_enabled = 1; +#else + fprintf(stderr, "apps.plugin has been compiled without debugging\n"); +#endif + continue; + } + +#ifndef __FreeBSD__ + if(strcmp("fds-cache-secs", argv[i]) == 0) { + if(argc <= i + 1) { + fprintf(stderr, "Parameter 'fds-cache-secs' requires a number as argument.\n"); + exit(1); + } + i++; + max_fds_cache_seconds = str2i(argv[i]); + if(max_fds_cache_seconds < 0) max_fds_cache_seconds = 0; continue; } +#endif if(strcmp("no-childs", argv[i]) == 0 || strcmp("without-childs", argv[i]) == 0) { include_exited_childs = 0; @@ -3415,44 +3560,54 @@ static void parse_args(int argc, char **argv) " without-files enable / disable reporting files, sockets, pipes\n" " (default is enabled)\n" "\n" - " NAME read apps_NAME.conf instead of\n" - " apps_groups.conf\n" - " (default NAME=groups)\n" +#ifndef __FreeBSD__ + " fds-cache-secs N cache the files of processed for N seconds\n" + " caching is adaptive per file (when a file\n" + " is found, it starts at 0 and while the file\n" + " remains open, it is incremented up to the\n" + " max given)\n" + " (default is %d seconds)\n" "\n" +#endif " version or -v or -V print program version and exit\n" "\n" , VERSION +#ifndef __FreeBSD__ + , max_fds_cache_seconds +#endif ); exit(1); } - if(!name) { - name = argv[i]; - continue; - } - error("Cannot understand option %s", argv[i]); exit(1); } if(freq > 0) update_every = freq; - if(!name) name = "groups"; - if(read_apps_groups_conf(name)) { - error("Cannot read process groups '%s/apps_%s.conf'. There are no internal defaults. Failing.", config_dir, name); - exit(1); + if(read_apps_groups_conf(user_config_dir, "groups")) { + info("Cannot read process groups configuration file '%s/apps_groups.conf'. Will try '%s/apps_groups.conf'", user_config_dir, stock_config_dir); + + if(read_apps_groups_conf(stock_config_dir, "groups")) { + error("Cannot read process groups '%s/apps_groups.conf'. There are no internal defaults. Failing.", stock_config_dir); + exit(1); + } + else + info("Loaded config file '%s/apps_groups.conf'", stock_config_dir); } + else + info("Loaded config file '%s/apps_groups.conf'", user_config_dir); } static int am_i_running_as_root() { uid_t uid = getuid(), euid = geteuid(); if(uid == 0 || euid == 0) { - if(debug) info("I am running with escalated privileges, uid = %u, euid = %u.", uid, euid); + if(debug_enabled) info("I am running with escalated privileges, uid = %u, euid = %u.", uid, euid); return 1; } - if(debug) info("I am not running with escalated privileges, uid = %u, euid = %u.", uid, euid); + if(debug_enabled) info("I am not running with escalated privileges, uid = %u, euid = %u.", uid, euid); return 0; } @@ -3463,7 +3618,7 @@ static int check_capabilities() { error("Cannot get current capabilities."); return 0; } - else if(debug) + else if(debug_enabled) info("Received my capabilities from the system."); int ret = 1; @@ -3478,7 +3633,7 @@ static int check_capabilities() { error("apps.plugin should run with CAP_DAC_READ_SEARCH."); ret = 0; } - else if(debug) + else if(debug_enabled) info("apps.plugin runs with CAP_DAC_READ_SEARCH."); } @@ -3492,7 +3647,7 @@ static int check_capabilities() { error("apps.plugin should run with CAP_SYS_PTRACE."); ret = 0; } - else if(debug) + else if(debug_enabled) info("apps.plugin runs with CAP_SYS_PTRACE."); } @@ -3521,19 +3676,25 @@ int main(int argc, char **argv) { error_log_errors_per_period = 100; error_log_throttle_period = 3600; + // since apps.plugin runs as root, prevent it from opening symbolic links + procfile_open_flags = O_RDONLY|O_NOFOLLOW; + netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); - if(netdata_configured_host_prefix == NULL) { - // info("NETDATA_HOST_PREFIX is not passed from netdata"); - netdata_configured_host_prefix = ""; + if(verify_netdata_host_prefix() == -1) exit(1); + + user_config_dir = getenv("NETDATA_USER_CONFIG_DIR"); + if(user_config_dir == NULL) { + // info("NETDATA_CONFIG_DIR is not passed from netdata"); + user_config_dir = CONFIG_DIR; } - // else info("Found NETDATA_HOST_PREFIX='%s'", netdata_configured_host_prefix); + // else info("Found NETDATA_USER_CONFIG_DIR='%s'", user_config_dir); - config_dir = getenv("NETDATA_CONFIG_DIR"); - if(config_dir == NULL) { + stock_config_dir = getenv("NETDATA_STOCK_CONFIG_DIR"); + if(stock_config_dir == NULL) { // info("NETDATA_CONFIG_DIR is not passed from netdata"); - config_dir = CONFIG_DIR; + stock_config_dir = LIBCONFIG_DIR; } - // else info("Found NETDATA_CONFIG_DIR='%s'", config_dir); + // else info("Found NETDATA_USER_CONFIG_DIR='%s'", user_config_dir); #ifdef NETDATA_INTERNAL_CHECKS if(debug_flags != 0) { @@ -3561,14 +3722,14 @@ int main(int argc, char **argv) { error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " "Without these, apps.plugin cannot report disk I/O utilization of other processes. " "To enable capabilities run: sudo setcap cap_dac_read_search,cap_sys_ptrace+ep %s; " - "To enable setuid to root run: sudo chown root %s; sudo chmod 4755 %s; " + "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; " , uid, euid, argv[0], argv[0], argv[0] ); #else error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " "Without these, apps.plugin cannot report disk I/O utilization of other processes. " "Your system does not support capabilities. " - "To enable setuid to root run: sudo chown root %s; sudo chmod 4755 %s; " + "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; " , uid, euid, argv[0], argv[0] ); #endif @@ -3630,8 +3791,7 @@ int main(int argc, char **argv) { show_guest_time_old = show_guest_time; - if(unlikely(debug)) - fprintf(stderr, "apps.plugin: done Loop No %zu\n", global_iterations_counter); + debug_log("done Loop No %zu", global_iterations_counter); // restart check (14400 seconds) if(now_monotonic_sec() - started_t > 14400) exit(0); |